% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/formalize.R
\name{formalize}
\alias{formalize}
\title{use an LLM to match informal terms to terms in an ontology}
\usage{
formalize(
  informal_terms,
  ontology_terms,
  ontology_tags,
  ellmer_chatfun = ellmer::chat_openai,
  llm_model = "gpt-4.1-2025-04-14"
)
}
\arguments{
\item{informal_terms}{character() vector of terms not necessarily found in ontology}

\item{ontology_terms}{character() vector of ontology terms}

\item{ontology_tags}{character() vector of tags for ontology terms, must be of same length as ontology_terms}

\item{ellmer_chatfun}{function available in ellmer to connect to chatbot}

\item{llm_model}{character(1) used with chat_openai in ellmer, defaults to "gpt-4.1-2025-04-14",
or other models for other providers available through ellmer.}
}
\value{
A data.frame with columns informal_term, formal_term, similarity_score, and tag.
Invisible attributes chat_tokens, chat_cost, and chat_provider are also present.
}
\description{
use an LLM to match informal terms to terms in an ontology
}
\note{
Expects to have OPENAI_API_KEY set if an openai chatfun is used, or GOOGLE_API_KEY
if, e.g., a gemini chatfun is used.
}
\examples{
if (interactive()) {
 ctypes = c("tPlasma cells", "tMoMacDC", "tT cells",   # from Zilionis
  "tB cells", "tNK cells", "tNeutrophils", "Fibroblasts", "Type II cells", 
  "tpDC", "Endothelial cells", "tMast cells", "Smooth muscle cells", 
  "ND", "Club cells", "bNeutrophils", "bT cells", "bMonocytes", 
  "bNK cells", "bRBC", "bpDC", "bB cells", "bPlasma cells", "bPlatelets", 
  "tRBC", "Type I cells", "Ciliated cells", "bBasophils")
 cc = owl2cache(url="http://purl.obolibrary.org/obo/cl.owl")
 cloi = setup_entities2(cc)
 oname = cloi$name
 actual = grep("CL_", names(oname))
 oterms = as.character(oname[actual])
 otags = names(oname[actual])
 octy = formalize(ctypes, oterms, otags)
 head(octy)
 attr(octy, "chat_tokens")
 onto_plot2(cloi, unique(na.omit(octy$tag)), cex=.55)
}
}
