package justhalf.nlp.reader.acereader;

import edu.stanford.nlp.ling.CoreLabel;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import justhalf.nlp.postagger.POSTagger;
import justhalf.nlp.reader.acereader.ACERelation;
import justhalf.nlp.sentencesplitter.SentenceSplitter;
import justhalf.nlp.tokenizer.Tokenizer;
import org.xml.sax.SAXException;

/* loaded from: input_file:justhalf/nlp/reader/acereader/ACEReader.class */
public class ACEReader {
    public static final List<String> ACE2004_DOMAINS = Arrays.asList("arabic_treebank", "bnews", "chinese_treebank", "fisher_transcripts", "nwire");
    public static final List<String> ACE2005_DOMAINS = Arrays.asList("bc", "bn", "cts", "nw", "un", "wl");

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    /* JADX WARN: Code restructure failed: missing block: B:128:0x046b, code lost:
    
        switch(r43) {
            case 0: goto L107;
            case 1: goto L108;
            default: goto L109;
        };
     */
    /* JADX WARN: Code restructure failed: missing block: B:129:0x0484, code lost:
    
        r0 = new justhalf.nlp.tokenizer.StanfordTokenizer();
     */
    /* JADX WARN: Code restructure failed: missing block: B:130:0x04c8, code lost:
    
        r26 = r0;
        r36 = r36 + 2;
     */
    /* JADX WARN: Code restructure failed: missing block: B:132:0x0490, code lost:
    
        r0 = new justhalf.nlp.tokenizer.RegexTokenizer();
     */
    /* JADX WARN: Code restructure failed: missing block: B:133:0x049c, code lost:
    
        java.lang.System.out.println("Unrecognized tokenizer \"" + r14[r36 + 1] + "\", using stanford.");
        r0 = new justhalf.nlp.tokenizer.StanfordTokenizer();
     */
    /* JADX WARN: Removed duplicated region for block: B:112:0x0404 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:115:0x0412 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:118:0x0420 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:134:0x04ce A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:146:0x0552 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:158:0x05d2 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:161:0x05db A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:164:0x05e4 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:167:0x05ed A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:170:0x05f6 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:173:0x05ff A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:176:0x0610 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:179:0x061a A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:182:0x02f4 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:74:0x0301 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:78:0x030e A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:81:0x032b A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:84:0x0344 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:87:0x0363 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:90:0x037d A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:93:0x0386 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:96:0x0392 A[SYNTHETIC] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public static void main(java.lang.String[] r14) throws java.io.FileNotFoundException {
        /*
            Method dump skipped, instructions count: 4395
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: justhalf.nlp.reader.acereader.ACEReader.main(java.lang.String[]):void");
    }

    /* JADX WARN: Multi-variable type inference failed */
    private static void printDataset(String str, List<ACEDocument> list, double[] dArr, boolean z, Tokenizer tokenizer, POSTagger pOSTagger, SentenceSplitter sentenceSplitter, boolean z2, boolean z3, boolean z4, boolean z5, boolean z6, int i) throws FileNotFoundException {
        List arrayList;
        List arrayList2;
        List arrayList3;
        new ArrayList();
        new ArrayList();
        new ArrayList();
        if (z5) {
            ArrayList arrayList4 = new ArrayList();
            ArrayList arrayList5 = new ArrayList();
            ArrayList arrayList6 = new ArrayList();
            splitData(list, arrayList4, arrayList5, arrayList6, dArr, z6, i);
            arrayList = getSentences(arrayList4, sentenceSplitter, z3);
            arrayList2 = getSentences(arrayList5, sentenceSplitter, z3);
            arrayList3 = getSentences(arrayList6, sentenceSplitter, z3);
        } else {
            List<ACESentence> sentences = getSentences(list, sentenceSplitter, z3);
            arrayList = new ArrayList();
            arrayList2 = new ArrayList();
            arrayList3 = new ArrayList();
            splitData(sentences, arrayList, arrayList2, arrayList3, dArr, z6, i);
        }
        writeData(arrayList, str, "/train.data", tokenizer, pOSTagger, z2, z4);
        writeData(arrayList2, str, "/dev.data", tokenizer, pOSTagger, z2, z4);
        writeData(arrayList3, str, "/test.data", tokenizer, pOSTagger, z2, z4);
    }

    public static List<ACESentence> getSentences(List<ACEDocument> list, SentenceSplitter sentenceSplitter, boolean z) {
        ArrayList arrayList = new ArrayList();
        for (ACEDocument aCEDocument : list) {
            for (CoreLabel coreLabel : fixSplit(sentenceSplitter.split(aCEDocument.text))) {
                Span span = new Span(coreLabel.beginPosition(), coreLabel.endPosition());
                ACESentence aCESentence = new ACESentence(aCEDocument, span, coreLabel.value());
                for (ACEEntityMention aCEEntityMention : aCEDocument.entityMentions) {
                    if (span.contains(aCEEntityMention.span)) {
                        ACEEntityMention aCEEntityMention2 = new ACEEntityMention(aCEEntityMention);
                        aCEEntityMention2.span.start -= span.start;
                        aCEEntityMention2.span.end -= span.start;
                        aCEEntityMention2.headSpan.start -= span.start;
                        aCEEntityMention2.headSpan.end -= span.start;
                        boolean z2 = true;
                        if (z) {
                            int size = aCESentence.entities.size() - 1;
                            while (true) {
                                if (size < 0) {
                                    break;
                                }
                                ACEEntityMention aCEEntityMention3 = aCESentence.entities.get(size);
                                if (aCEEntityMention2.overlapsWith(aCEEntityMention3)) {
                                    if (aCEEntityMention2.span.length() <= aCEEntityMention3.span.length()) {
                                        z2 = false;
                                        break;
                                    }
                                    aCESentence.entities.remove(size);
                                }
                                size--;
                            }
                        }
                        if (z2) {
                            aCESentence.addEntityMention(aCEEntityMention2);
                        }
                    }
                }
                for (ACERelationMention aCERelationMention : aCEDocument.relationMentions) {
                    if (span.contains(aCERelationMention.span)) {
                        aCESentence.addRelationMention(aCERelationMention);
                    }
                }
                for (ACEEventMention aCEEventMention : aCEDocument.eventMentions) {
                    if (span.contains(aCEEventMention.span)) {
                        aCESentence.addEventMention(aCEEventMention);
                    }
                }
                for (ACETimexMention aCETimexMention : aCEDocument.timexMentions) {
                    if (span.contains(aCETimexMention.span)) {
                        aCESentence.addTimexMention(aCETimexMention);
                    }
                }
                for (ACEValueMention aCEValueMention : aCEDocument.valueMentions) {
                    if (span.contains(aCEValueMention.span)) {
                        aCESentence.addValueMention(aCEValueMention);
                    }
                }
                arrayList.add(aCESentence);
            }
        }
        return arrayList;
    }

    private static List<CoreLabel> fixSplit(List<CoreLabel> list) {
        int indexOf;
        int indexOf2;
        ArrayList arrayList = new ArrayList();
        int i = 0;
        while (i < list.size()) {
            CoreLabel coreLabel = list.get(i);
            while (coreLabel != null) {
                CoreLabel coreLabel2 = i < list.size() - 1 ? list.get(i + 1) : null;
                if (coreLabel.value().contains("\n\n") || (coreLabel.endPosition() <= 70 && coreLabel.value().contains("\n"))) {
                    if (coreLabel.endPosition() > 70 || !coreLabel.value().contains("\n")) {
                        indexOf = coreLabel.value().indexOf("\n\n");
                        indexOf2 = coreLabel.originalText().indexOf("\n\n");
                    } else {
                        indexOf = coreLabel.value().indexOf("\n");
                        indexOf2 = coreLabel.originalText().indexOf("\n");
                    }
                    int i2 = -1;
                    int i3 = -1;
                    Matcher matcher = Pattern.compile("[\n\t ]+").matcher(coreLabel.value().substring(indexOf));
                    if (matcher.find()) {
                        i2 = matcher.end() + indexOf;
                    }
                    Matcher matcher2 = Pattern.compile("[\n\t ]+").matcher(coreLabel.originalText().substring(indexOf2));
                    if (matcher2.find()) {
                        i3 = matcher2.end() + indexOf2;
                    }
                    CoreLabel coreLabel3 = new CoreLabel();
                    coreLabel3.setBeginPosition(i2 + coreLabel.beginPosition());
                    coreLabel3.setEndPosition(coreLabel.endPosition());
                    coreLabel3.setAfter(coreLabel.after());
                    coreLabel3.setBefore(coreLabel.value().substring(indexOf, i2));
                    coreLabel3.setOriginalText(coreLabel.originalText().substring(i3));
                    coreLabel3.setWord(coreLabel.word().substring(i2));
                    coreLabel3.setValue(coreLabel.value().substring(i2));
                    coreLabel.setEndPosition(indexOf + coreLabel.beginPosition());
                    coreLabel.setAfter(coreLabel3.before());
                    coreLabel.setOriginalText(coreLabel.originalText().substring(0, indexOf2));
                    coreLabel.setWord(coreLabel.word().substring(0, indexOf));
                    coreLabel.setValue(coreLabel.value().substring(0, indexOf));
                    arrayList.add(coreLabel);
                    coreLabel = coreLabel3;
                } else if (coreLabel2 == null || coreLabel2.beginPosition() != coreLabel.endPosition()) {
                    arrayList.add(coreLabel);
                    coreLabel = null;
                } else {
                    coreLabel.setAfter(coreLabel2.after());
                    coreLabel.setEndPosition(coreLabel2.endPosition());
                    coreLabel.setOriginalText(coreLabel.originalText() + coreLabel2.before() + coreLabel2.originalText());
                    coreLabel.setWord(coreLabel.word() + coreLabel2.before() + coreLabel2.word());
                    coreLabel.setValue(coreLabel.value() + coreLabel2.before() + coreLabel2.value());
                    i++;
                }
            }
            i++;
        }
        return arrayList;
    }

    private static List<CoreLabel> fixTokens(List<CoreLabel> list) {
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < list.size(); i++) {
            CoreLabel coreLabel = list.get(i);
            while (true) {
                CoreLabel coreLabel2 = coreLabel;
                if (coreLabel2 != null) {
                    if (coreLabel2.value().matches("([^-]*-[A-Z].*|[^-]*[A-Z][^-]*-[^-]+-[^-]*)")) {
                        Matcher matcher = Pattern.compile("-").matcher(coreLabel2.value());
                        int start = matcher.find() ? matcher.start() : -1;
                        Matcher matcher2 = Pattern.compile("-").matcher(coreLabel2.originalText());
                        int start2 = matcher2.find() ? matcher2.start() : -1;
                        int i2 = start + 1;
                        CoreLabel coreLabel3 = new CoreLabel();
                        coreLabel3.setBeginPosition(i2 + coreLabel2.beginPosition());
                        coreLabel3.setEndPosition(coreLabel2.endPosition());
                        coreLabel3.setAfter(coreLabel2.after());
                        coreLabel3.setBefore(coreLabel2.value().substring(start, i2));
                        coreLabel3.setOriginalText(coreLabel2.originalText().substring(start2 + 1));
                        coreLabel3.setWord(coreLabel2.word().substring(i2));
                        coreLabel3.setValue(coreLabel2.value().substring(i2));
                        coreLabel2.setEndPosition(start + coreLabel2.beginPosition());
                        coreLabel2.setAfter(coreLabel3.before());
                        coreLabel2.setOriginalText(coreLabel2.originalText().substring(0, start2));
                        coreLabel2.setWord(coreLabel2.word().substring(0, start));
                        coreLabel2.setValue(coreLabel2.value().substring(0, start));
                        arrayList.add(coreLabel2);
                        coreLabel = coreLabel3;
                    } else {
                        arrayList.add(coreLabel2);
                        coreLabel = null;
                    }
                }
            }
        }
        return arrayList;
    }

    private static <T> void splitData(List<T> list, List<T> list2, List<T> list3, List<T> list4, double[] dArr, boolean z, int i) {
        int size = list.size();
        int i2 = (int) (dArr[0] * size);
        int i3 = (int) (dArr[1] * size);
        int i4 = (int) (dArr[2] * size);
        if (i2 + i3 + i4 != size) {
            i2 -= ((i2 + i3) + i4) - size;
        }
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(list);
        if (z) {
            Collections.shuffle(arrayList, new Random(i));
        }
        list2.addAll(arrayList.subList(0, i2));
        list3.addAll(arrayList.subList(i2, i2 + i3));
        list4.addAll(arrayList.subList(i2 + i3, size));
        String name = arrayList.get(0).getClass().getName();
        System.out.println("Number of objects (" + name.substring(name.lastIndexOf(".") + 1) + "):");
        System.out.println("Training: " + list2.size());
        System.out.println("Dev: " + list3.size());
        System.out.println("Test: " + list4.size());
    }

    private static void printStatistics(List<ACESentence> list) {
        HashMap hashMap = new HashMap();
        Iterator<ACESentence> it = list.iterator();
        while (it.hasNext()) {
            for (ACEEntityMention aCEEntityMention : it.next().entities) {
                if (!hashMap.containsKey(aCEEntityMention.entity.type.name())) {
                    hashMap.put(aCEEntityMention.entity.type.name(), 0);
                }
                hashMap.put(aCEEntityMention.entity.type.name(), Integer.valueOf(((Integer) hashMap.get(aCEEntityMention.entity.type.name())).intValue() + 1));
            }
        }
        System.out.println("Statistics:");
        for (String str : sorted(hashMap.keySet())) {
            System.out.println(str.toString() + ": " + hashMap.get(str));
        }
    }

    private static <T extends Comparable<T>> List<T> sorted(Collection<T> collection) {
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(collection);
        Collections.sort(arrayList);
        return arrayList;
    }

    private static void writeData(List<ACESentence> list, String str, String str2, Tokenizer tokenizer, POSTagger pOSTagger, boolean z, boolean z2) throws FileNotFoundException {
        PrintWriter printWriter = new PrintWriter(new File(str + str2));
        for (ACESentence aCESentence : list) {
            if (tokenizer != null) {
                List<CoreLabel> fixTokens = fixTokens(tokenizer.tokenize(aCESentence.text));
                if (pOSTagger != null) {
                    pOSTagger.tagCoreLabels(fixTokens);
                }
                if (z) {
                    List<WordLabel> spansToLabels = spansToLabels(aCESentence.entities, fixTokens, z2);
                    if (pOSTagger != null) {
                        for (int i = 0; i < fixTokens.size(); i++) {
                            printWriter.println(String.format("%s\t%s\t%s", fixTokens.get(i).value(), fixTokens.get(i).tag(), spansToLabels.get(i).form));
                        }
                    } else {
                        for (int i2 = 0; i2 < fixTokens.size(); i2++) {
                            printWriter.println(String.format("%s\t%s", fixTokens.get(i2).value(), spansToLabels.get(i2).form));
                        }
                    }
                    printWriter.println();
                } else {
                    StringBuilder sb = new StringBuilder();
                    for (CoreLabel coreLabel : fixTokens) {
                        if (sb.length() > 0) {
                            sb.append(" ");
                        }
                        sb.append(coreLabel.value());
                        coreLabel.setWord(escapeBracket(coreLabel.word()));
                    }
                    printWriter.println(sb.toString());
                    if (pOSTagger != null) {
                        StringBuilder sb2 = new StringBuilder();
                        for (CoreLabel coreLabel2 : fixTokens) {
                            if (sb2.length() > 0) {
                                sb2.append(" ");
                            }
                            sb2.append(coreLabel2.tag());
                        }
                        printWriter.println(sb2.toString());
                    }
                    StringBuilder sb3 = new StringBuilder();
                    for (ACEEntityMention aCEEntityMention : aCESentence.entities) {
                        Span findWordSpan = findWordSpan(aCEEntityMention.span, fixTokens);
                        Span findWordSpan2 = findWordSpan(aCEEntityMention.headSpan, fixTokens);
                        if (sb3.length() > 0) {
                            sb3.append("|");
                        }
                        sb3.append(String.format("%s,%s,%s,%s %s", Integer.valueOf(findWordSpan.start), Integer.valueOf(findWordSpan.end), Integer.valueOf(findWordSpan2.start), Integer.valueOf(findWordSpan2.end), aCEEntityMention.label.form));
                    }
                    printWriter.println(sb3.toString());
                    printWriter.println();
                }
            } else {
                printWriter.println(aCESentence.text.replaceAll("[\n\t]", " "));
                StringBuilder sb4 = new StringBuilder();
                for (ACEEntityMention aCEEntityMention2 : aCESentence.entities) {
                    Span span = aCEEntityMention2.span;
                    Span span2 = aCEEntityMention2.headSpan;
                    if (sb4.length() > 0) {
                        sb4.append("|");
                    }
                    sb4.append(String.format("%s,%s,%s,%s %s", Integer.valueOf(span.start), Integer.valueOf(span.end), Integer.valueOf(span2.start), Integer.valueOf(span2.end), aCEEntityMention2.label.form));
                }
                printWriter.println(sb4.toString());
                printWriter.println();
            }
        }
        printWriter.close();
        printStatistics(list);
    }

    private static List<WordLabel> spansToLabels(List<ACEEntityMention> list, List<CoreLabel> list2, boolean z) {
        WordLabel[] wordLabelArr = new WordLabel[list2.size()];
        Arrays.fill(wordLabelArr, (Object) null);
        for (ACEEntityMention aCEEntityMention : list) {
            Span findWordSpan = findWordSpan(aCEEntityMention.span, list2);
            String str = aCEEntityMention.label.form;
            for (int i = findWordSpan.start; i < findWordSpan.end; i++) {
                String str2 = wordLabelArr[i] != null ? "H" : "";
                if (i == findWordSpan.start && i == findWordSpan.end - 1) {
                    if (z) {
                        wordLabelArr[i] = WordLabel.get("U" + str2 + "-" + str);
                    } else {
                        wordLabelArr[i] = WordLabel.get("B" + str2 + "-" + str);
                    }
                } else if (i == findWordSpan.end - 1) {
                    if (z) {
                        wordLabelArr[i] = WordLabel.get("L" + str2 + "-" + str);
                    } else {
                        wordLabelArr[i] = WordLabel.get("I" + str2 + "-" + str);
                    }
                } else if (i == findWordSpan.start) {
                    wordLabelArr[i] = WordLabel.get("B" + str2 + "-" + str);
                } else {
                    wordLabelArr[i] = WordLabel.get("I" + str2 + "-" + str);
                }
            }
        }
        for (int i2 = 0; i2 < wordLabelArr.length; i2++) {
            if (wordLabelArr[i2] == null) {
                wordLabelArr[i2] = WordLabel.get("O");
            }
        }
        return Arrays.asList(wordLabelArr);
    }

    private static String escapeBracket(String str) {
        return str.contains("(") ? "-LRB-" : str.contains(")") ? "-RRB-" : str.contains("[") ? "-LSB-" : str.contains("]") ? "-RSB-" : str.contains("{") ? "-LCB-" : str.contains("}") ? "-RCB-" : str;
    }

    private static Span findWordSpan(Span span, List<CoreLabel> list) {
        int i = -1;
        int i2 = -1;
        for (int i3 = 0; i3 < list.size(); i3++) {
            CoreLabel coreLabel = list.get(i3);
            if (coreLabel.beginPosition() <= span.start && coreLabel.endPosition() > span.start && i == -1) {
                i = i3;
            }
            if (coreLabel.beginPosition() < span.end && coreLabel.endPosition() >= span.end) {
                i2 = i3 + 1;
            }
        }
        if (i == -1 || i2 == -1) {
            System.out.println("Mention [" + span.start + "," + span.end + "] not found in [" + list.get(0).beginPosition() + "," + list.get(list.size() - 1).endPosition() + "]");
            System.out.print("[");
            for (CoreLabel coreLabel2 : list) {
                System.out.print(coreLabel2.value() + "(" + coreLabel2.beginPosition() + "," + coreLabel2.endPosition() + ") ");
            }
            System.out.println("]");
        }
        return new Span(i, i2);
    }

    public static List<ACEDocument> readDocuments(String str, String str2) throws IOException, SAXException {
        return readDocuments(str, str2, ACE2004_DOMAINS, ACE2005_DOMAINS);
    }

    public static List<ACEDocument> readDocuments(String str, String str2, String[] strArr, String[] strArr2) throws IOException, SAXException {
        return readDocuments(str, str2, (List<String>) Arrays.asList(strArr), (List<String>) Arrays.asList(strArr2));
    }

    public static List<ACEDocument> readDocuments(String str, String str2, List<String> list, List<String> list2) throws IOException, SAXException {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        if (str != null) {
            extractDocList(arrayList2, str, list, new String[0]);
        }
        if (str2 != null) {
            extractDocList(arrayList2, str2, list2, "/timex2norm");
        }
        Iterator it = arrayList2.iterator();
        while (it.hasNext()) {
            arrayList.add(new ACEDocument(((File) it.next()).getAbsolutePath()));
        }
        return arrayList;
    }

    private static void extractDocList(List<File> list, String str, Collection<String> collection, String... strArr) {
        for (File file : new File(str).listFiles()) {
            if (file.isDirectory() && collection.contains(file.getName())) {
                if (strArr.length > 0) {
                    file = new File(file.getAbsolutePath() + strArr[0]);
                }
                for (File file2 : file.listFiles()) {
                    if (file2.getName().endsWith(".sgm")) {
                        list.add(file2);
                    }
                }
            }
        }
    }

    private static void count(ACEDocument aCEDocument, List<? extends ACEObject> list, Map<? extends ACEEventArgumentType, Integer> map, Map<? extends ACEEventArgumentType, Integer> map2, int[] iArr, int[] iArr2) {
        for (ACEObject aCEObject : list) {
            if (aCEObject.mentions().isEmpty() && aCEObject.type() != ACERelation.ACERelationType.METONYMY) {
                System.out.println("Non-metonymy empty mention set at " + aCEDocument.uri + ": " + aCEObject.id);
            }
            map.put(aCEObject.type(), Integer.valueOf(map.getOrDefault(aCEObject.type(), 0).intValue() + 1));
            map.put(aCEObject.subtype(), Integer.valueOf(map.getOrDefault(aCEObject.subtype(), 0).intValue() + 1));
            map2.put(aCEObject.type(), Integer.valueOf(map2.getOrDefault(aCEObject.type(), 0).intValue() + Math.max(1, aCEObject.mentions().size())));
            map2.put(aCEObject.subtype(), Integer.valueOf(map2.getOrDefault(aCEObject.subtype(), 0).intValue() + Math.max(1, aCEObject.mentions().size())));
            iArr[0] = iArr[0] + 1;
            iArr2[0] = iArr2[0] + aCEObject.mentions().size();
            for (ACEObjectMention aCEObjectMention : aCEObject.mentions()) {
                if (!aCEObjectMention.text.equals(ACEDocument.unescape(aCEObjectMention.getText(aCEDocument.text)))) {
                    System.err.println("===TEXT===");
                    System.err.println(aCEDocument.text);
                    System.err.println("===FULL TEXT===");
                    System.err.println(aCEDocument.fullText);
                    System.err.println("===SGM===");
                    System.err.println(aCEDocument.uri);
                    System.err.println("===TEXT LENGTH===");
                    System.err.println(aCEDocument.text.length());
                    System.err.println("===OFFSET===");
                    System.err.println(aCEDocument.offset);
                    System.err.println("===MENTION===");
                    System.err.println(aCEObjectMention.text);
                    System.err.println(aCEObjectMention.span);
                    throw new RuntimeException(aCEObjectMention.text + " != " + ACEDocument.unescape(aCEObjectMention.getText(aCEDocument.text)));
                }
            }
        }
    }

    private static void printHelp() {
        printHelp(null);
    }

    private static void printHelp(String str) {
        if (str != null) {
            System.out.println(str);
            System.out.println();
        }
        System.out.println("Usage: java -jar acereader-0.1.jar -ace2004Dir <dirname> -ace2005Dir <dirname>\n\t[-ace2004IncludeDomains (arabic_treebank,bnews,chinese_treebank,fisher_transcripts,nwire)]\n\t[-ace2004ExcludeDomains (arabic_treebank,bnews,chinese_treebank,fisher_transcripts,nwire)]\n\t[-ace2005IncludeDomains (bc,bn,cts,nw,un,bl)]\n\t[-ace2005ExcludeDomains (bc,bn,cts,nw,un,bl)]\n\t[-convertEntitiesToInline]\n\t[-ace2004OutputBasePath]\n\t[-ace2005OutputBasePath]\n\t[-dataSplit <two_or_three_comma_separated_values>]\n\t[-tokenizer (stanford|regex)]\n\t[-posTagger (stanford)]\n\t[-splitter (stanford)]\n\t[-toCoNLLFormat]\n\t[-ignoreOverlaps]\n\t[-useBILOU]\n\t[-splitBySentences]\n\n-ace2004Dir <dirname>\n\tPath to ACE2004 directory containing the domain subdirectories.\n\n-ace2005Dir <dirname>\n\tPath to ACE2004 directory containing the domain subdirectories.\n\tOnly the data from timex2norm version will be used.\n\n-ace2004{Include,Exclude}Domains <domains>\n\tTo include/exclude certain domains from ACE2004.\n\tOnly one of -ace2004IncludeDomains -ace2004ExcludeDomains will take effect.\n\tIf -ace2004IncludeDomains is specified, only those domains will be included.\n\tIf -ace2004ExcludeDomains is specified, all except those domains will be included.\n\tPut a subset of these separated by comma:\n\t- arabic_treebank\n\t- bnews\n\t- chinese_treebank\n\t- fisher_transcripts\n\t- nwire\n\n-ace2005{Include,Exclude}Domains <domains>\n\tTo include/exclude certain domains from ACE2005.\n\tOnly one of -ace2005IncludeDomains -ace2005ExcludeDomains will take effect.\n\tIf -ace2005IncludeDomains is specified, only those domains will be included.\n\tIf -ace2005ExcludeDomains is specified, all except those domains will be included.\n\tPut a subset of these separated by comma:\n\t- bc\n\t- bn\n\t- cts\n\t- nw\n\t- un\n\t- bl\n\n-excludeMetadata\n\tExclude the text that comes before the <body> tag, which includes date and article ID.\n\n-convertEntitiesToInline\n\tPrint the entities into files.\n\tNeed -ace2004OutputBasePath, -ace2005OutputBasePath, and -dataSplit options.\n\n-ace{2004,2005}OutputDir <path>\n\tThe directory for ACE2004 and ACE2005 inline output.\n\n-dataSplit <two_or_three_comma_separated_values>\n\tSplit into multiple files according to the ratio given.\n\tYou can give two (train+test) or three (train+dev+test) values.\n\tExamples:\n\t-dataSplit 90,10 to split into 90% training and 10% test\n\t-dataSplit 0.8,0.1,0.1 to split into 80% training, 10% dev, and 10% test\n\n-tokenizer (stanford,regex)\n\tIf specified, the sentences will be tokenized, and the spans will be token-based.\n\tCurrently there are two tokenizers supported: Stanford and regex-based.\n\n-posTagger (stanford)\n\tIf specified, the output files will contain POS tags.\n\tCurrently only Stanford POS tagger is supported.\n\n-splitter (stanford)\n\tThe sentence splitter to split the data.\n\tCurrently only Stanford Splitter is supported.\n\n-toCoNLLFormat\n\tOutput conversion in CoNLL format.\n\n-ignoreOverlaps\n\tIgnore overlapping entities by removing the shorter entity in an overlap.\n\n-useBILOU\n\tTo use BILOU (Begin, Inside, Last, Outside, Unit) format instead of BIO.\n\tOnly applicable when -toCoNLLFormat is used.\n\n-splitBySentences\n\tSplit into training, development, and test based on sentences instead of documents.\n\n-shuffle\n\tWhen splitting dataset, shuffle the order.\n\n-seed <seed>\n\tThe seed used to initialize the Random object used to shuffle the dataset.\n");
        if (str != null) {
            System.out.println("===");
            System.out.println(str);
        }
    }
}
