package de.julielab.jtbd;

import cc.mallet.fst.CRF;
import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.LabelSequence;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.zip.GZIPInputStream;
import org.apache.commons.io.FileUtils;

/* loaded from: input_file:de/julielab/jtbd/TokenizerApplication.class */
public class TokenizerApplication {

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/julielab/jtbd/TokenizerApplication$EvalResult.class */
    public static class EvalResult {
        double ACC;
        double fp;
        double fn;
        double corrDecisions;

        private EvalResult() {
        }

        double getF() {
            return ((2.0d * getR()) * getP()) / (getR() + getP());
        }

        double getP() {
            return this.corrDecisions / (this.corrDecisions + this.fp);
        }

        double getR() {
            return this.corrDecisions / (this.corrDecisions + this.fn);
        }

        /* synthetic */ EvalResult(EvalResult evalResult) {
            this();
        }
    }

    private static EvalResult do9010Evaluation(File file, File file2, ArrayList<String> arrayList, ArrayList<String> arrayList2) {
        ArrayList<String> readFile = readFile(file);
        ArrayList<String> readFile2 = readFile(file2);
        Collections.shuffle(readFile, new Random(1L));
        Collections.shuffle(readFile2, new Random(1L));
        int size = readFile.size();
        int i = (int) (size * 0.1d);
        int i2 = size - i;
        if (i == 0) {
            System.err.println("Error: no test files for this split.");
            System.exit(-1);
        }
        System.out.println("all: " + size + "\ttrain: " + i2 + "\ttest: " + i);
        ArrayList arrayList3 = new ArrayList();
        ArrayList arrayList4 = new ArrayList();
        ArrayList arrayList5 = new ArrayList();
        ArrayList arrayList6 = new ArrayList();
        for (int i3 = 0; i3 < i2; i3++) {
            arrayList3.add(readFile.get(i3));
            arrayList4.add(readFile2.get(i3));
        }
        for (int i4 = i2; i4 < size; i4++) {
            arrayList5.add(readFile.get(i4));
            arrayList6.add(readFile2.get(i4));
        }
        return doEvaluation(arrayList3, arrayList4, arrayList5, arrayList6, arrayList2, arrayList);
    }

    private static void doCheck(File file, File file2) {
        Tokenizer tokenizer = new Tokenizer();
        System.out.println("checking on files: \n * " + file.toString() + "\n * " + file2.toString() + "\n");
        System.out.println("\n\n\n# Features resulting from training data: " + tokenizer.makeTrainingData(readFile(file), readFile(file2)).getPipe().getDataAlphabet().size());
        System.out.println("(critical sentences were omitted for feature generation)");
        System.out.println("Done.");
    }

    private static double doCrossEvaluation(int i, File file, File file2, ArrayList<String> arrayList, ArrayList<String> arrayList2) {
        ArrayList<String> readFile = readFile(file);
        ArrayList<String> readFile2 = readFile(file2);
        Collections.shuffle(readFile, new Random(1L));
        Collections.shuffle(readFile2, new Random(1L));
        int i2 = 0;
        int size = readFile.size() / i;
        int size2 = readFile.size();
        System.out.println("number of files in directory: " + size2);
        System.out.println("size of each/last round: " + size + "/" + (size + (size2 % i)));
        System.out.println();
        EvalResult[] evalResultArr = new EvalResult[i];
        double d = 0.0d;
        double d2 = 0.0d;
        for (int i3 = 0; i3 < i; i3++) {
            ArrayList arrayList3 = new ArrayList();
            ArrayList arrayList4 = new ArrayList();
            ArrayList arrayList5 = new ArrayList();
            ArrayList arrayList6 = new ArrayList();
            if (i3 == i - 1) {
                for (int i4 = 0; i4 < readFile.size(); i4++) {
                    if (i4 < i2) {
                        arrayList5.add(readFile.get(i4));
                        arrayList6.add(readFile2.get(i4));
                    } else {
                        arrayList3.add(readFile.get(i4));
                        arrayList4.add(readFile2.get(i4));
                    }
                }
            } else {
                for (int i5 = 0; i5 < readFile.size(); i5++) {
                    if (i5 < i2 || i5 >= i2 + size) {
                        arrayList5.add(readFile.get(i5));
                        arrayList6.add(readFile2.get(i5));
                    } else {
                        arrayList3.add(readFile.get(i5));
                        arrayList4.add(readFile2.get(i5));
                    }
                }
                i2 += size;
            }
            System.out.println("training size: " + arrayList5.size());
            System.out.println("prediction size: " + arrayList3.size());
            evalResultArr[i3] = doEvaluation(arrayList5, arrayList6, arrayList3, arrayList4, arrayList2, arrayList);
        }
        DecimalFormat decimalFormat = new DecimalFormat("0.000");
        for (int i6 = 0; i6 < evalResultArr.length; i6++) {
            d += evalResultArr[i6].ACC;
            d2 += evalResultArr[i6].getF();
            System.out.println("ACC in round " + i6 + ": " + decimalFormat.format(evalResultArr[i6].ACC));
        }
        double d3 = d / i;
        System.out.println("\n\n------------------------------------");
        System.out.println("avg accuracy: " + decimalFormat.format(d3));
        System.out.println("avg F-score: " + decimalFormat.format(d2 / i));
        System.out.println("------------------------------------");
        return d3;
    }

    public static EvalResult doEvaluation(ArrayList<String> arrayList, ArrayList<String> arrayList2, ArrayList<String> arrayList3, ArrayList<String> arrayList4, ArrayList<String> arrayList5, ArrayList<String> arrayList6) {
        Tokenizer tokenizer = new Tokenizer();
        InstanceList makeTrainingData = tokenizer.makeTrainingData(arrayList, arrayList2);
        Pipe pipe = makeTrainingData.getPipe();
        System.out.println("training model...");
        tokenizer.train(makeTrainingData, pipe);
        return doEvaluation(tokenizer.getModel(), arrayList3, arrayList4, arrayList5, arrayList6);
    }

    private static EvalResult doEvaluation(CRF crf, ArrayList<String> arrayList, ArrayList<String> arrayList2, ArrayList<String> arrayList3, ArrayList<String> arrayList4) {
        Tokenizer tokenizer = new Tokenizer();
        tokenizer.setModel(crf);
        InstanceList makePredictionData = tokenizer.makePredictionData(arrayList, arrayList2);
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        for (int i5 = 0; i5 < makePredictionData.size(); i5++) {
            String str = arrayList.get(i5);
            String str2 = arrayList2.get(i5);
            String substring = str.substring(str.length() - 1, str.length());
            Instance instance = (Instance) makePredictionData.get(i5);
            ArrayList<Unit> predict = tokenizer.predict(instance);
            ArrayList<String> labelsFromLabelSequence = tokenizer.getLabelsFromLabelSequence((LabelSequence) instance.getTarget());
            ArrayList arrayList5 = (ArrayList) instance.getSource();
            String str3 = "";
            int i6 = 0;
            int i7 = 0;
            boolean z = false;
            for (int i8 = 0; i8 < predict.size(); i8++) {
                str3 = String.valueOf(str3) + predict.get(i8).rep + (predict.get(i8).label.equals("P") ? " " : "");
                if (!((String) arrayList5.get(i8)).equals("WS") && i8 < predict.size() - 1) {
                    i6++;
                    if (labelsFromLabelSequence.get(i8).equals(predict.get(i8).label)) {
                        i7++;
                    } else {
                        z = true;
                        if (labelsFromLabelSequence.get(i8).equals("P") && predict.get(i8).label.equals("N")) {
                            i4++;
                        }
                        if (labelsFromLabelSequence.get(i8).equals("N") && predict.get(i8).label.equals("P")) {
                            i3++;
                        }
                        arrayList3.add("@" + labelsFromLabelSequence.get(i8) + "->" + predict.get(i8).label);
                        arrayList3.add(tokenizer.showErrorContext(i8, predict, labelsFromLabelSequence));
                    }
                }
            }
            i += i6;
            i2 += i7;
            if (!str3.substring(str3.length() - 1, str3.length()).equals(" ")) {
                substring = " " + substring;
            }
            arrayList4.add(String.valueOf(str3) + substring);
            if (z) {
                arrayList3.add(String.valueOf(str3) + substring);
                arrayList3.add(str2);
                arrayList3.add("\n");
            }
        }
        double d = i2 / i;
        EvalResult evalResult = new EvalResult(null);
        evalResult.ACC = d;
        evalResult.fn = i4;
        evalResult.fp = i3;
        evalResult.corrDecisions = i2;
        System.out.println("\n* ------------------------------------");
        System.out.println("* critical decisions: " + i);
        System.out.println("* correct decisions: " + i2);
        System.out.println("* fp: " + i3);
        System.out.println("* fn: " + i4);
        System.out.println("* R: " + evalResult.getR());
        System.out.println("* P: " + evalResult.getP());
        System.out.println("* F: " + evalResult.getF());
        System.out.println("* ACC = " + d);
        System.out.println("* ------------------------------------\n");
        return evalResult;
    }

    public static void doPrediction(File file, File file2, String str) throws IOException {
        Tokenizer tokenizer = new Tokenizer();
        try {
            tokenizer.readModel(new File(str));
        } catch (Exception e) {
            e.printStackTrace();
        }
        for (File file3 : file.listFiles()) {
            long currentTimeMillis = System.currentTimeMillis();
            List<String> readLines = FileUtils.readLines(file3, "utf-8");
            ArrayList arrayList = new ArrayList();
            ArrayList arrayList2 = new ArrayList();
            for (int i = 0; i < readLines.size(); i++) {
                arrayList.add("");
            }
            InstanceList makePredictionData = tokenizer.makePredictionData(readLines, arrayList);
            for (int i2 = 0; i2 < makePredictionData.size(); i2++) {
                String str2 = readLines.get(i2);
                char charAt = str2.charAt(str2.length() - 1);
                ArrayList<Unit> predict = tokenizer.predict((Instance) makePredictionData.get(i2));
                String str3 = "";
                for (int i3 = 0; i3 < predict.size(); i3++) {
                    str3 = String.valueOf(str3) + predict.get(i3).rep + (predict.get(i3).label.equals("P") ? " " : "");
                }
                if (EOSSymbols.contains(Character.valueOf(charAt))) {
                    str3 = String.valueOf(str3) + " " + charAt;
                }
                arrayList2.add(str3.replaceAll(" +", " "));
            }
            String file4 = file3.toString();
            writeFile(arrayList2, new File(String.valueOf(file2.toString()) + "/" + file4.substring(file4.lastIndexOf("/") + 1, file4.length())));
            System.gc();
            System.out.println("took: " + (System.currentTimeMillis() - currentTimeMillis));
        }
        System.out.println("Tokenized texts written to: " + file2.toString());
    }

    public static void doTraining(File file, File file2, String str) {
        Tokenizer tokenizer = new Tokenizer();
        InstanceList makeTrainingData = tokenizer.makeTrainingData(readFile(file), readFile(file2));
        Pipe pipe = makeTrainingData.getPipe();
        System.out.println("training model...");
        tokenizer.train(makeTrainingData, pipe);
        tokenizer.writeModel(str);
        System.out.println("\nmodel written to: " + str);
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length < 1) {
            System.err.println("usage: JTBD <mode> <mode-specific-parameters>");
            showModes();
            System.exit(-1);
        }
        String str = strArr[0];
        if (str.equals("c")) {
            startCheckMode(strArr);
            return;
        }
        if (str.equals("s")) {
            start9010ValidationMode(strArr);
            return;
        }
        if (str.equals("x")) {
            startXValidationMode(strArr);
            return;
        }
        if (str.equals("t")) {
            startTrainingMode(strArr);
            return;
        }
        if (str.equals("p")) {
            startPredictionMode(strArr);
        } else if (str.equals("e")) {
            startCompareValidationMode(strArr);
        } else {
            System.err.println("unknown mode");
            showModes();
        }
    }

    static ArrayList<String> readFile(File file) {
        ArrayList<String> arrayList = new ArrayList<>();
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                String trim = readLine.replaceAll("[ ]+", " ").trim();
                if (trim.length() > 1 && !trim.equals(" ")) {
                    arrayList.add(trim);
                }
            }
            bufferedReader.close();
        } catch (Exception e) {
            System.err.println("ERR: error reading file: " + file.toString());
            e.printStackTrace();
            System.exit(-1);
        }
        return arrayList;
    }

    private static void showModes() {
        System.err.println("\nAvailable modes:");
        System.err.println("c: check data ");
        System.err.println("s: 90-10 split evaluation");
        System.err.println("x: cross validation ");
        System.err.println("t: train a tokenizer ");
        System.err.println("p: predict with tokenizer ");
        System.err.println("e: evaluation on previously trained model");
        System.exit(-1);
    }

    private static void start9010ValidationMode(String[] strArr) {
        if (strArr.length != 5) {
            System.err.println("usage: JTBD s <sent-file> <tok-file> <predout-file> <errout-file>");
            System.exit(-1);
        }
        File file = new File(strArr[1]);
        File file2 = new File(strArr[2]);
        File file3 = new File(strArr[3]);
        File file4 = new File(strArr[4]);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        do9010Evaluation(file, file2, arrayList2, arrayList);
        writeFile(arrayList2, file3);
        writeFile(arrayList, file4);
    }

    private static void startCheckMode(String[] strArr) {
        if (strArr.length != 3) {
            System.err.println("usage: JTBD c <sent-file> <tok-file>");
            System.exit(-1);
        }
        doCheck(new File(strArr[1]), new File(strArr[2]));
    }

    private static void startCompareValidationMode(String[] strArr) {
        if (strArr.length != 6) {
            System.err.println("usage: JTBD e <modelFile> <sent-file> <tok-file> <predout-file> <errout-file>");
            System.exit(-1);
        }
        CRF crf = null;
        try {
            ObjectInputStream objectInputStream = new ObjectInputStream(new GZIPInputStream(new FileInputStream(strArr[1])));
            crf = (CRF) objectInputStream.readObject();
            objectInputStream.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        File file = new File(strArr[2]);
        File file2 = new File(strArr[3]);
        ArrayList<String> readFile = readFile(file);
        ArrayList<String> readFile2 = readFile(file2);
        File file3 = new File(strArr[4]);
        File file4 = new File(strArr[5]);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        doEvaluation(crf, readFile, readFile2, arrayList2, arrayList);
        writeFile(arrayList2, file3);
        writeFile(arrayList, file4);
    }

    private static void startPredictionMode(String[] strArr) throws IOException {
        if (strArr.length != 4) {
            System.err.println("usage: JTBD p <inDir> <outDir> <model-file>");
            System.exit(-1);
        }
        File file = new File(strArr[1]);
        if (!file.isDirectory()) {
            System.err.println("Error: the specified input directory does not exist.");
            System.exit(-1);
        }
        File file2 = new File(strArr[2]);
        if (!file2.isDirectory() || !file2.canWrite()) {
            System.err.println("Error: the specified output directory does not exist or is not writable.");
            System.exit(-1);
        }
        doPrediction(file, file2, strArr[3]);
    }

    private static void startTrainingMode(String[] strArr) {
        if (strArr.length != 4) {
            System.err.println("usage: JTBD t <sent-file> <tok-file> <model-file>");
            System.exit(-1);
        }
        doTraining(new File(strArr[1]), new File(strArr[2]), strArr[3]);
    }

    private static void startXValidationMode(String[] strArr) {
        if (strArr.length != 6) {
            System.err.println("usage: JTBD x <sent-file> <tok-file> <cross-val-rounds> <predout-file> <errout-file>");
            System.exit(-1);
        }
        File file = new File(strArr[1]);
        File file2 = new File(strArr[2]);
        int intValue = new Integer(strArr[3]).intValue();
        File file3 = new File(strArr[4]);
        File file4 = new File(strArr[5]);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        doCrossEvaluation(intValue, file, file2, arrayList2, arrayList);
        writeFile(arrayList2, file3);
        writeFile(arrayList, file4);
    }

    static void writeFile(ArrayList<String> arrayList, File file) {
        try {
            FileWriter fileWriter = new FileWriter(file);
            for (int i = 0; i < arrayList.size(); i++) {
                fileWriter.write(String.valueOf(arrayList.get(i)) + "\n");
            }
            fileWriter.close();
        } catch (Exception e) {
            System.err.println("ERR: error writing file: " + file.toString());
            e.printStackTrace();
            System.exit(-1);
        }
    }
}
