package com.ibm.icu.dev.tool.charsetdet.sbcs;

import com.ibm.icu.dev.tool.charsetdet.sbcs.NGramList;
import com.ibm.icu.dev.tool.charsetdet.sbcs.NGramParser;
import com.ibm.icu.impl.Utility;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:com/ibm/icu/dev/tool/charsetdet/sbcs/StatisticsTool.class */
public class StatisticsTool implements NGramParser.NGramParserClient, NGramList.NGramKeyMapper {
    private static final String usageString = "\nUsage: StatisticsTool [OPTIONS] [FILES]\n\nThis program will read in a Unicode text file of text in a particular language\nand compute the statistics needed to detected that language and character set.\n Options:\n-e       specify the target encoding\n-h or -? print this usage text.\n-v       also generate statistics for visual order.\n-l       only generate statistics for logical order (cancel -v).-c       run the checker.\n-t       run the encoding test.\nexample: com.ibm.icu.dev.tool.charset.StatisticsTool -e 8859-1 Spanish.txt";
    private static final int BUFFER_SIZE = 1024;
    private char[] buffer = new char[BUFFER_SIZE];
    private int bufIndex;
    private int bufMax;
    private InputFile inputFile;
    private NGramList ngrams;
    private static byte[] allBytes = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, Byte.MAX_VALUE, Byte.MIN_VALUE, -127, -126, -125, -124, -123, -122, -121, -120, -119, -118, -117, -116, -115, -114, -113, -112, -111, -110, -109, -108, -107, -106, -105, -104, -103, -102, -101, -100, -99, -98, -97, -96, -95, -94, -93, -92, -91, -90, -89, -88, -87, -86, -85, -84, -83, -82, -81, -80, -79, -78, -77, -76, -75, -74, -73, -72, -71, -70, -69, -68, -67, -66, -65, -64, -63, -62, -61, -60, -59, -58, -57, -56, -55, -54, -53, -52, -51, -50, -49, -48, -47, -46, -45, -44, -43, -42, -41, -40, -39, -38, -37, -36, -35, -34, -33, -32, -31, -30, -29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1};

    public StatisticsTool() {
        this.buffer[0] = ' ';
        this.bufIndex = 0;
        this.bufMax = 1;
    }

    private static void usage() {
        System.out.println(usageString);
    }

    private int nextBuffer(InputFile inputFile) {
        this.bufIndex = 0;
        return inputFile.read(this.buffer);
    }

    @Override // com.ibm.icu.dev.tool.charsetdet.sbcs.NGramParser.NGramParserClient
    public char nextChar() {
        if (this.bufIndex >= this.bufMax) {
            this.bufMax = nextBuffer(this.inputFile);
        }
        if (this.bufMax < 0) {
            return (char) 0;
        }
        char[] cArr = this.buffer;
        int i = this.bufIndex;
        this.bufIndex = i + 1;
        return cArr[i];
    }

    @Override // com.ibm.icu.dev.tool.charsetdet.sbcs.NGramParser.NGramParserClient
    public void handleNGram(String str) {
        this.ngrams.put(str);
    }

    @Override // com.ibm.icu.dev.tool.charsetdet.sbcs.NGramList.NGramKeyMapper
    public Object mapKey(String str) {
        return str;
    }

    private NGramList dumpNGrams() {
        String path = this.inputFile.getPath();
        int lastIndexOf = path.lastIndexOf(".");
        String str = path.substring(0, lastIndexOf) + ".raw" + path.substring(lastIndexOf);
        double d = 0.0d;
        try {
            PrintStream printStream = new PrintStream((OutputStream) new FileOutputStream(str), true, "UTF8");
            System.out.println(this.inputFile.getFilename() + ": " + this.ngrams.getUniqueNGrams() + "/" + this.ngrams.getTotalNGrams());
            ArrayList arrayList = new ArrayList(this.ngrams.values());
            Collections.sort(arrayList);
            NGramList nGramList = new NGramList(this.inputFile);
            int i = 0;
            int totalNGrams = this.ngrams.getTotalNGrams();
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                NGramList.NGram nGram = (NGramList.NGram) it.next();
                String value = nGram.getValue();
                int refCount = nGram.getRefCount();
                double d2 = (refCount / totalNGrams) * 100.0d;
                d += d2;
                if (i < 64) {
                    nGramList.put(value);
                }
                printStream.println(value + "\t" + refCount + "\t" + d2 + "%\t" + printStream + "%");
                i++;
            }
            printStream.close();
            return nGramList;
        } catch (IOException e) {
            System.out.println("? Could not open " + str + " for writing.");
            return null;
        }
    }

    private void writeStatistics(ArrayList arrayList, boolean z) {
        String path = this.inputFile.getPath();
        String str = path.substring(0, path.lastIndexOf(".")) + "-" + this.inputFile.getEncoding() + (z ? "-visual.dat" : ".dat");
        PrintStream printStream = null;
        try {
            try {
                printStream = new PrintStream((OutputStream) new FileOutputStream(str), true, "ASCII");
                if (printStream != null) {
                    try {
                        printStream.close();
                    } catch (Exception e) {
                    }
                }
                int i = 0;
                printStream.print("    private static int[] ngrams = {");
                Iterator it = arrayList.iterator();
                while (it.hasNext()) {
                    Integer num = (Integer) it.next();
                    if (i % 16 == 0) {
                        printStream.print("\n        ");
                    }
                    printStream.print("0x" + Utility.hex(num.intValue(), 6) + ", ");
                    i++;
                }
                printStream.println("\n    };\n");
                char[] decode = this.inputFile.decode(allBytes);
                for (int i2 = 0; i2 < 256; i2++) {
                    char c = decode[i2];
                    switch (NGramParser.getCharClass(c)) {
                        case 0:
                        default:
                            decode[i2] = 0;
                            break;
                        case 1:
                            decode[i2] = Character.toLowerCase(c);
                            break;
                        case 2:
                            decode[i2] = ' ';
                            break;
                    }
                }
                byte[] encode = this.inputFile.encode(decode);
                printStream.print("    private static byte[] byteMap = {");
                for (int i3 = 0; i3 < 256; i3++) {
                    if (i3 % 8 == 0) {
                        printStream.print("\n        ");
                    }
                    printStream.print("(byte) 0x" + Utility.hex(encode[i3] & 255, 2) + ", ");
                }
                printStream.println("\n    };");
            } catch (IOException e2) {
                System.out.println("? Could not open " + str + " for writing.");
                if (printStream != null) {
                    try {
                        printStream.close();
                    } catch (Exception e3) {
                    }
                }
            }
        } catch (Throwable th) {
            if (printStream != null) {
                try {
                    printStream.close();
                } catch (Exception e4) {
                }
            }
            throw th;
        }
    }

    public NGramList collectStatistics(InputFile inputFile) {
        if (!inputFile.open()) {
            return null;
        }
        this.inputFile = inputFile;
        NGramParser nGramParser = new NGramParser(this);
        this.ngrams = new NGramList(this);
        nGramParser.parse();
        inputFile.close();
        NGramList dumpNGrams = dumpNGrams();
        ArrayList arrayList = new ArrayList(dumpNGrams.keys());
        Collections.sort(arrayList);
        writeStatistics(arrayList, false);
        if (this.inputFile.getVisualOrder()) {
            ArrayList arrayList2 = new ArrayList(arrayList.size());
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                int i = 0;
                for (int intValue = ((Integer) it.next()).intValue(); intValue != 0; intValue >>= 8) {
                    i = (i << 8) | (intValue & 255);
                }
                arrayList2.add(new Integer(i));
            }
            Collections.sort(arrayList2);
            writeStatistics(arrayList2, true);
        }
        return dumpNGrams;
    }

    public static void main(String[] strArr) {
        List asList = Arrays.asList(strArr);
        InputFile[] inputFileArr = new InputFile[strArr.length];
        int i = 0;
        String str = null;
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        Iterator it = asList.iterator();
        while (it.hasNext()) {
            String str2 = (String) it.next();
            if (str2.equals("-v")) {
                z3 = true;
            } else if (str2.equals("-l")) {
                z3 = false;
            } else if (str2.equals("-c")) {
                z = true;
            } else if (str2.equals("-t")) {
                z2 = true;
            } else if (str2.equals("-e")) {
                if (it.hasNext()) {
                    str = (String) it.next();
                } else {
                    System.err.println("Error: missing encoding.");
                }
            } else if (str2.startsWith("-")) {
                if (!str2.equals("-h") && !str2.equals("-?")) {
                    System.err.println("Error: unknown option " + str2);
                }
                usage();
            } else {
                int i2 = i;
                i++;
                inputFileArr[i2] = new InputFile(str2, str, z3);
            }
        }
        if (i == 0) {
            System.err.println("Error: there are no files to process.");
            usage();
        }
        StatisticsTool statisticsTool = new StatisticsTool();
        Checker[] checkerArr = new Checker[i];
        for (int i3 = 0; i3 < i; i3++) {
            InputFile inputFile = inputFileArr[i3];
            checkerArr[i3] = new Checker(statisticsTool.collectStatistics(inputFile), inputFile);
        }
        System.out.println();
        if (z) {
            for (int i4 = 0; i4 < i; i4++) {
                Checker checker = checkerArr[i4];
                for (int i5 = 0; i5 < i; i5++) {
                    checker.check(inputFileArr[i5]);
                }
            }
        }
        if (z2) {
            char[] cArr = new char[128];
            System.out.println("Detection test");
            for (int i6 = 0; i6 < i; i6++) {
                InputFile inputFile2 = inputFileArr[i6];
                int[] iArr = new int[i];
                int i7 = 0;
                System.out.println(inputFile2.getFilename() + "(" + inputFile2.getEncoding() + "):");
                inputFile2.open();
                for (int i8 = 0; i8 < i; i8++) {
                    checkerArr[i8].setMapper(inputFile2);
                }
                while (true) {
                    int read = inputFile2.read(cArr);
                    if (read <= 0) {
                        break;
                    }
                    int[] iArr2 = new int[i];
                    int i9 = -1;
                    int i10 = 0;
                    for (int i11 = 0; i11 < i; i11++) {
                        iArr2[i11] = checkerArr[i11].checkBuffer(cArr, read);
                    }
                    for (int i12 = 0; i12 < i; i12++) {
                        int i13 = iArr2[i12];
                        if (i13 > i10) {
                            i10 = i13;
                            i9 = i12;
                        }
                    }
                    if (i9 >= 0) {
                        int i14 = i9;
                        iArr[i14] = iArr[i14] + 1;
                    } else {
                        i7++;
                    }
                }
                for (int i15 = 0; i15 < i; i15++) {
                    System.out.println("    " + checkerArr[i15].getLanguage() + ": " + iArr[i15]);
                }
                if (i7 > 0) {
                    System.out.println("    NONE: " + i7);
                }
                System.out.println();
            }
        }
    }
}
