package de.datexis.ner.exec;

import de.datexis.common.CommandLineParser;
import de.datexis.common.Resource;
import de.datexis.common.WordHelpers;
import de.datexis.encoder.impl.PositionEncoder;
import de.datexis.encoder.impl.SurfaceEncoder;
import de.datexis.encoder.impl.TrigramEncoder;
import de.datexis.model.Annotation;
import de.datexis.model.Dataset;
import de.datexis.ner.MatchingAnnotator;
import de.datexis.ner.MentionAnnotator;
import de.datexis.reader.RawTextDatasetReader;
import java.io.IOException;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/ner/exec/TrainMentionAnnotatorSeedList.class */
public class TrainMentionAnnotatorSeedList {
    protected static final Logger log = LoggerFactory.getLogger(TrainMentionAnnotatorSeedList.class);

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:de/datexis/ner/exec/TrainMentionAnnotatorSeedList$ExecParams.class */
    public static class ExecParams implements CommandLineParser.Options {
        protected String inputFiles;
        protected String seedList;
        protected String language;
        protected String outputPath = null;
        protected boolean trainingUI = false;

        protected ExecParams() {
        }

        public void setParams(CommandLine commandLine) {
            this.inputFiles = commandLine.getOptionValue("i");
            this.seedList = commandLine.getOptionValue("s");
            this.outputPath = commandLine.getOptionValue("o");
            this.trainingUI = commandLine.hasOption("u");
            this.language = commandLine.getOptionValue("l", "en");
        }

        public Options setUpCliOptions() {
            Options options = new Options();
            options.addRequiredOption("i", "input", true, "path or file name for raw input text");
            options.addRequiredOption("s", "seed", true, "path to seed list text file");
            options.addRequiredOption("o", "output", true, "path to create and store the model");
            options.addOption("l", "language", true, "language to use for sentence splitting and stopwords (EN or DE)");
            options.addOption("u", "ui", false, "enable training UI (http://127.0.0.1:9000)");
            return options;
        }
    }

    public static void main(String[] strArr) throws IOException {
        ExecParams execParams = new ExecParams();
        try {
            new CommandLineParser(execParams).parse(strArr);
            new TrainMentionAnnotatorSeedList().runTraining(execParams);
            System.exit(0);
        } catch (ParseException e) {
            new HelpFormatter().printHelp("texoo-train-ner-seed", "TeXoo: train MentionAnnotator with seed list", execParams.setUpCliOptions(), "", true);
            System.exit(1);
        }
    }

    protected void runTraining(ExecParams execParams) throws IOException {
        Resource fromDirectory = Resource.fromDirectory(execParams.inputFiles);
        Resource fromDirectory2 = Resource.fromDirectory(execParams.outputPath);
        Resource fromDirectory3 = Resource.fromDirectory(execParams.seedList);
        WordHelpers.Language language = WordHelpers.getLanguage(execParams.language);
        Dataset read = new RawTextDatasetReader().read(fromDirectory);
        new MatchingAnnotator(MatchingAnnotator.MatchingStrategy.LOWERCASE).loadTermsToMatch(fromDirectory3);
        MentionAnnotator build = new MentionAnnotator.Builder().withEncoders("tri", new PositionEncoder(), new SurfaceEncoder(), new TrigramEncoder()).enableTrainingUI(execParams.trainingUI).withTrainingParams(1.0E-4d, 16, 1).withModelParams(512, 256).withWorkspaceParams(1).pretrain(read).build();
        build.trainModel(read, Annotation.Source.SILVER, language, 5000, false, true);
        System.out.println("saving model to path: " + fromDirectory2);
        build.writeModel(fromDirectory2);
    }
}
