/*
 * Decompiled with CFR 0.152.
 */
package edu.msu.cme.rdp.classifier.train;

import edu.msu.cme.rdp.classifier.train.LineageSequenceParser;
import edu.msu.cme.rdp.classifier.train.NameRankDupException;
import edu.msu.cme.rdp.classifier.train.TreeFactory;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;

public class ClassifierTraineeMaker {
    private static final Options options = new Options();

    public ClassifierTraineeMaker(String taxFile, String seqFile, String cnFile, int trainset_no, String version, String modification, String outdir) throws FileNotFoundException, IOException {
        FileReader tax = new FileReader(taxFile);
        try {
            TreeFactory factory = new TreeFactory(tax, trainset_no, version, modification);
            LineageSequenceParser parser = new LineageSequenceParser(new File(seqFile));
            factory.parseSequenceFile(parser);
            if (cnFile != null) {
                factory.parseCopyNumberFile(cnFile);
            }
            factory.createGenusWordConditionalProb();
            if (!new File(outdir).exists()) {
                new File(outdir).mkdir();
            }
            outdir = outdir + File.separator;
            factory.printTrainingFiles(outdir);
            factory.printWordPriors(outdir);
            factory.printWordConditionalProbIndexArr(outdir);
            factory.printGenusIndex_WordProbArr(outdir);
        }
        catch (NameRankDupException ex) {
            ex.printStackTrace();
        }
    }

    public static void printLicense() {
        String license = "Copyright 2006 Michigan State University Board of Trustees.\n\nThis program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.\n\nThis program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.\n\nYou should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA\n\nAuthors's mailng address:\nCenter for Microbial Ecology\n2225A Biomedical Physical Science\nMichigan State University\nEast Lansing, Michigan USA 48824-4320\nE-mail: James R. Cole at colej@msu.edu\n\tQiong Wang at wangqion@msu.edu\n\tJames M. Tiedje at tiedjej@msu.edu\n\n";
        System.err.println(license);
    }

    public static void main(String[] args) throws FileNotFoundException, IOException {
        String seqFile;
        String taxFile;
        String cnFile = null;
        int trainset_no = 1;
        String version = null;
        String modification = null;
        String outdir = null;
        try {
            CommandLine line = new PosixParser().parse(options, args);
            if (!line.hasOption("t")) {
                throw new Exception("taxon file must be specified");
            }
            taxFile = line.getOptionValue("t");
            if (line.hasOption("c")) {
                cnFile = line.getOptionValue("c");
            }
            if (!line.hasOption("s")) {
                throw new Exception("seq file must be specified");
            }
            seqFile = line.getOptionValue("s");
            if (line.hasOption("n")) {
                try {
                    trainset_no = Integer.parseInt(line.getOptionValue("n"));
                }
                catch (NumberFormatException ex) {
                    throw new IllegalArgumentException("trainset_no needs to be an integer.");
                }
            }
            if (!line.hasOption("o")) {
                throw new Exception("output directory must be specified");
            }
            outdir = line.getOptionValue("o");
            if (line.hasOption("v")) {
                version = line.getOptionValue("v");
            }
            if (line.hasOption("m")) {
                modification = line.getOptionValue("m");
            }
        }
        catch (Exception e) {
            System.out.println("Command Error: " + e.getMessage());
            new HelpFormatter().printHelp(120, "train", "", options, "", true);
            return;
        }
        ClassifierTraineeMaker maker = new ClassifierTraineeMaker(taxFile, seqFile, cnFile, trainset_no, version, modification, outdir);
    }

    static {
        options.addOption(new Option("t", "tax_file", true, "contains the hierarchical taxonomy information in the following format:\ntaxid*taxon name*parent taxid*depth*rank\nFields taxid, the parent taxid and depth should be in integer format\nThe taxid, or the combination of taxon name and rank is unique\ndepth indicates the depth from the root taxon.\n Note: the depth for the root is 0"));
        options.addOption(new Option("s", "seq", true, "training sequences in FASTA format with lineage in the header:\na list taxon names seperated by ';' with highest rank taxon first.\nThe lowest rank of the lineage have to be the same for all sequence.\nThe lowest rank is not limited to genus"));
        options.addOption(new Option("n", "version_no", true, "an integer used to refer to a training set"));
        options.addOption(new Option("v", "version", true, "the version of the hierarchical taxonomy"));
        options.addOption(new Option("m", "mod", true, "the modifcation information of the taxonomy"));
        options.addOption(new Option("o", "out_dir", true, "the output directory"));
        options.addOption(new Option("c", "copynumber_file", true, "contains at least name, rank and the mean copy number of taxa. A header line is required to find the corresponding columns\nOnly the copy number of the lowest rank taxa will be loaded and the copy number of the other taxa are derived from these."));
    }
}

