package de.datexis.ner.exec;

import de.datexis.common.CommandLineParser;
import de.datexis.common.Resource;
import de.datexis.common.WordHelpers;
import de.datexis.encoder.impl.PositionEncoder;
import de.datexis.encoder.impl.SurfaceEncoder;
import de.datexis.encoder.impl.TrigramEncoder;
import de.datexis.model.Annotation;
import de.datexis.model.Dataset;
import de.datexis.ner.MentionAnnotator;
import de.datexis.ner.reader.CoNLLDatasetReader;
import java.io.IOException;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/ner/exec/TrainMentionAnnotatorCoNLL.class */
public class TrainMentionAnnotatorCoNLL {
    protected static final Logger log = LoggerFactory.getLogger(TrainMentionAnnotatorCoNLL.class);

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:de/datexis/ner/exec/TrainMentionAnnotatorCoNLL$ExecParams.class */
    public static class ExecParams implements CommandLineParser.Options {
        protected String trainingPath;
        protected String validationPath;
        protected String testPath;
        protected String outputPath;
        protected String language;
        protected boolean trainingUI = false;

        protected ExecParams() {
        }

        public void setParams(CommandLine commandLine) {
            this.trainingPath = commandLine.getOptionValue("i");
            this.validationPath = commandLine.getOptionValue("v");
            this.testPath = commandLine.getOptionValue("t");
            this.outputPath = commandLine.getOptionValue("o");
            this.trainingUI = commandLine.hasOption("u");
            this.language = commandLine.getOptionValue("l", "en");
        }

        protected void TrainMentionAnnotatorCoNLL() {
        }

        public Options setUpCliOptions() {
            Options options = new Options();
            options.addRequiredOption("i", "input", true, "path to input training data (CoNLL format)");
            options.addOption("v", "validation", true, "path to validation data (CoNLL format)");
            options.addOption("t", "test", true, "path to test data (CoNLL format)");
            options.addRequiredOption("o", "output", true, "path to create and store the model");
            options.addOption("l", "language", true, "language to use for sentence splitting and stopwords (EN or DE)");
            options.addOption("u", "ui", false, "enable training UI (http://127.0.0.1:9000)");
            return options;
        }
    }

    public static void main(String[] strArr) throws IOException {
        ExecParams execParams = new ExecParams();
        try {
            new CommandLineParser(execParams).parse(strArr);
            new TrainMentionAnnotatorCoNLL().runTraining(execParams);
            System.exit(0);
        } catch (ParseException e) {
            new HelpFormatter().printHelp("texoo-train-ner", "TeXoo: train MentionAnnotator with CoNLL annotations", execParams.setUpCliOptions(), "", true);
            System.exit(1);
        }
    }

    protected void runTraining(ExecParams execParams) throws IOException {
        Resource fromDirectory = Resource.fromDirectory(execParams.trainingPath);
        Resource fromDirectory2 = Resource.fromDirectory(execParams.outputPath);
        WordHelpers.Language language = WordHelpers.getLanguage(execParams.language);
        Dataset readDataset = CoNLLDatasetReader.readDataset(fromDirectory, fromDirectory.getFileName(), CoNLLDatasetReader.Charset.UTF_8);
        MentionAnnotator build = new MentionAnnotator.Builder().withEncoders("tri", new PositionEncoder(), new SurfaceEncoder(), new TrigramEncoder()).enableTrainingUI(execParams.trainingUI).pretrain(readDataset).build();
        build.trainModel(readDataset, Annotation.Source.GOLD, language, -1, false, true);
        build.writeModel(fromDirectory2);
    }
}
