% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MetaDICT_transfer.R
\name{metadict_add_new_data}
\alias{metadict_add_new_data}
\title{Batch correction for new datasets using existing dictionary.}
\usage{
metadict_add_new_data(
  newdata,
  newmeta,
  integrated_result,
  customize_parameter = FALSE,
  beta = 0.01,
  normalization = "uq",
  max_iter = 10000,
  imputation = FALSE,
  verbose = TRUE,
  optim_trace = FALSE
)
}
\arguments{
\item{newdata}{The integrated count table of new studies.
Rows represent taxa, and columns represent samples.
Should be provided as either a \code{matrix} or a \code{data.frame}.}

\item{newmeta}{The integrated meta table (\code{meta}) for the new studies,
containing sample information and batch IDs.}

\item{integrated_result}{The output list from a previous MetaDICT integration task.}

\item{customize_parameter}{A logical variable.
Set to \code{TRUE} if the \code{beta} parameter is customized.
If \code{FALSE}, MetaDICT determines \code{beta} based on the number of covariates.}

\item{beta}{A parameter controlling the smoothness of the estimated measurement efficiency.
A larger \code{beta} results in more similar measurement efficiency across taxa.}

\item{normalization}{The normalization method. Options are \code{"Upper quantile"}, \code{"RSim"} or \code{"TSS"}.
Set to \code{NULL} if normalization is not needed.
This should be the same as in the previous integration task.}

\item{max_iter}{The maximum number of iterations for the optimization process.
Default is \code{10000}.}

\item{imputation}{A logical variable.
Whether to allow MetaDICT to perform imputation based on dictionary learning results.
Default is \code{FALSE}.}

\item{verbose}{A logical variable.
Whether to generate verbose output. Default is \code{TRUE}.}

\item{optim_trace}{A logical variable.
Whether to print optimization steps. Default is \code{FALSE}.}
}
\value{
A \code{list} with the following components:
\item{\code{count}}{ (\code{data.frame}) – The corrected count table.
Rows represent taxa, and columns represent samples.}
\item{\code{D}}{ (\code{matrix}) – The estimated shared dictionary.}
\item{\code{R}}{ (\code{matrix}) – The estimated sample representation.}
\item{\code{w}}{ (\code{matrix}) – The estimated measurement efficiency.
Rows represent datasets, and columns represent taxa.}
\item{\code{meta}}{ (\code{data.frame}) – The meta table used in the covariate balancing step.}
\item{\code{dist_mat}}{ (\code{matrix}) – The distance matrix measuring taxa dissimilarity.}
}
\description{
This function adds new studies to an integrated dataset using a pre-learned dictionary.
The corrected data can be directly used with machine learning models trained on the
previously integrated dataset, enabling seamless application without retraining.
}
\details{
This function estimates measurement efficiency and debiased representations for new studies
while keeping the dictionary unchanged.
}
\examples{
 data(exampleData)
 O = exampleData$O
 meta = exampleData$meta
 dist_mat = exampleData$dist_mat
 metadict_res = MetaDICT(O, meta, distance_matrix = dist_mat)
 data("exampleData_transfer")
 new_data = exampleData_transfer$new_data
 new_meta = exampleData_transfer$new_meta
 new_data_res = metadict_add_new_data(new_data, new_meta, metadict_res)

}
