package org.codelibs.elasticsearch.langfield.detect.util;

import java.lang.Character;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.codelibs.elasticsearch.langfield.jackson.core.util.MinimalPrettyPrinter;

/* loaded from: input_file:org/codelibs/elasticsearch/langfield/detect/util/NGram.class */
public class NGram {
    public static final int N_GRAM = 3;
    private StringBuilder grams = new StringBuilder(MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR);
    private boolean capitalword = false;
    private static final String LATIN1_EXCLUDED = Messages.getString("NGram.LATIN1_EXCLUDE");
    private static final String[] NORMALIZED_VI_CHARS = {Messages.getString("NORMALIZED_VI_CHARS_0300"), Messages.getString("NORMALIZED_VI_CHARS_0301"), Messages.getString("NORMALIZED_VI_CHARS_0303"), Messages.getString("NORMALIZED_VI_CHARS_0309"), Messages.getString("NORMALIZED_VI_CHARS_0323")};
    private static final String TO_NORMALIZE_VI_CHARS = Messages.getString("TO_NORMALIZE_VI_CHARS");
    private static final String DMARK_CLASS = Messages.getString("DMARK_CLASS");
    private static final Pattern ALPHABET_WITH_DMARK = Pattern.compile("([" + TO_NORMALIZE_VI_CHARS + "])([" + DMARK_CLASS + "])");
    static final String[] CJK_CLASS = {Messages.getString("NGram.KANJI_1_0"), Messages.getString("NGram.KANJI_1_2"), Messages.getString("NGram.KANJI_1_4"), Messages.getString("NGram.KANJI_1_8"), Messages.getString("NGram.KANJI_1_11"), Messages.getString("NGram.KANJI_1_12"), Messages.getString("NGram.KANJI_1_13"), Messages.getString("NGram.KANJI_1_14"), Messages.getString("NGram.KANJI_1_16"), Messages.getString("NGram.KANJI_1_18"), Messages.getString("NGram.KANJI_1_22"), Messages.getString("NGram.KANJI_1_27"), Messages.getString("NGram.KANJI_1_29"), Messages.getString("NGram.KANJI_1_31"), Messages.getString("NGram.KANJI_1_35"), Messages.getString("NGram.KANJI_2_0"), Messages.getString("NGram.KANJI_2_1"), Messages.getString("NGram.KANJI_2_4"), Messages.getString("NGram.KANJI_2_9"), Messages.getString("NGram.KANJI_2_10"), Messages.getString("NGram.KANJI_2_11"), Messages.getString("NGram.KANJI_2_12"), Messages.getString("NGram.KANJI_2_13"), Messages.getString("NGram.KANJI_2_15"), Messages.getString("NGram.KANJI_2_16"), Messages.getString("NGram.KANJI_2_18"), Messages.getString("NGram.KANJI_2_21"), Messages.getString("NGram.KANJI_2_22"), Messages.getString("NGram.KANJI_2_23"), Messages.getString("NGram.KANJI_2_28"), Messages.getString("NGram.KANJI_2_29"), Messages.getString("NGram.KANJI_2_30"), Messages.getString("NGram.KANJI_2_31"), Messages.getString("NGram.KANJI_2_32"), Messages.getString("NGram.KANJI_2_35"), Messages.getString("NGram.KANJI_2_36"), Messages.getString("NGram.KANJI_2_37"), Messages.getString("NGram.KANJI_2_38"), Messages.getString("NGram.KANJI_3_1"), Messages.getString("NGram.KANJI_3_2"), Messages.getString("NGram.KANJI_3_3"), Messages.getString("NGram.KANJI_3_4"), Messages.getString("NGram.KANJI_3_5"), Messages.getString("NGram.KANJI_3_8"), Messages.getString("NGram.KANJI_3_9"), Messages.getString("NGram.KANJI_3_11"), Messages.getString("NGram.KANJI_3_12"), Messages.getString("NGram.KANJI_3_13"), Messages.getString("NGram.KANJI_3_15"), Messages.getString("NGram.KANJI_3_16"), Messages.getString("NGram.KANJI_3_18"), Messages.getString("NGram.KANJI_3_19"), Messages.getString("NGram.KANJI_3_22"), Messages.getString("NGram.KANJI_3_23"), Messages.getString("NGram.KANJI_3_27"), Messages.getString("NGram.KANJI_3_29"), Messages.getString("NGram.KANJI_3_30"), Messages.getString("NGram.KANJI_3_31"), Messages.getString("NGram.KANJI_3_32"), Messages.getString("NGram.KANJI_3_35"), Messages.getString("NGram.KANJI_3_36"), Messages.getString("NGram.KANJI_3_37"), Messages.getString("NGram.KANJI_3_38"), Messages.getString("NGram.KANJI_4_0"), Messages.getString("NGram.KANJI_4_9"), Messages.getString("NGram.KANJI_4_10"), Messages.getString("NGram.KANJI_4_16"), Messages.getString("NGram.KANJI_4_17"), Messages.getString("NGram.KANJI_4_18"), Messages.getString("NGram.KANJI_4_22"), Messages.getString("NGram.KANJI_4_24"), Messages.getString("NGram.KANJI_4_28"), Messages.getString("NGram.KANJI_4_34"), Messages.getString("NGram.KANJI_4_39"), Messages.getString("NGram.KANJI_5_10"), Messages.getString("NGram.KANJI_5_11"), Messages.getString("NGram.KANJI_5_12"), Messages.getString("NGram.KANJI_5_13"), Messages.getString("NGram.KANJI_5_14"), Messages.getString("NGram.KANJI_5_18"), Messages.getString("NGram.KANJI_5_26"), Messages.getString("NGram.KANJI_5_29"), Messages.getString("NGram.KANJI_5_34"), Messages.getString("NGram.KANJI_5_39"), Messages.getString("NGram.KANJI_6_0"), Messages.getString("NGram.KANJI_6_3"), Messages.getString("NGram.KANJI_6_9"), Messages.getString("NGram.KANJI_6_10"), Messages.getString("NGram.KANJI_6_11"), Messages.getString("NGram.KANJI_6_12"), Messages.getString("NGram.KANJI_6_16"), Messages.getString("NGram.KANJI_6_18"), Messages.getString("NGram.KANJI_6_20"), Messages.getString("NGram.KANJI_6_21"), Messages.getString("NGram.KANJI_6_22"), Messages.getString("NGram.KANJI_6_23"), Messages.getString("NGram.KANJI_6_25"), Messages.getString("NGram.KANJI_6_28"), Messages.getString("NGram.KANJI_6_29"), Messages.getString("NGram.KANJI_6_30"), Messages.getString("NGram.KANJI_6_32"), Messages.getString("NGram.KANJI_6_34"), Messages.getString("NGram.KANJI_6_35"), Messages.getString("NGram.KANJI_6_37"), Messages.getString("NGram.KANJI_6_39"), Messages.getString("NGram.KANJI_7_0"), Messages.getString("NGram.KANJI_7_3"), Messages.getString("NGram.KANJI_7_6"), Messages.getString("NGram.KANJI_7_7"), Messages.getString("NGram.KANJI_7_9"), Messages.getString("NGram.KANJI_7_11"), Messages.getString("NGram.KANJI_7_12"), Messages.getString("NGram.KANJI_7_13"), Messages.getString("NGram.KANJI_7_16"), Messages.getString("NGram.KANJI_7_18"), Messages.getString("NGram.KANJI_7_19"), Messages.getString("NGram.KANJI_7_20"), Messages.getString("NGram.KANJI_7_21"), Messages.getString("NGram.KANJI_7_23"), Messages.getString("NGram.KANJI_7_25"), Messages.getString("NGram.KANJI_7_28"), Messages.getString("NGram.KANJI_7_29"), Messages.getString("NGram.KANJI_7_32"), Messages.getString("NGram.KANJI_7_33"), Messages.getString("NGram.KANJI_7_35"), Messages.getString("NGram.KANJI_7_37")};
    public static Map<Character, Character> cjkMap = new HashMap();

    public void addChar(char c) {
        char normalize = normalize(c);
        char charAt = this.grams.charAt(this.grams.length() - 1);
        if (charAt == ' ') {
            this.grams = new StringBuilder(MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR);
            this.capitalword = false;
            if (normalize == ' ') {
                return;
            }
        } else if (this.grams.length() >= 3) {
            this.grams.deleteCharAt(0);
        }
        this.grams.append(normalize);
        if (!Character.isUpperCase(normalize)) {
            this.capitalword = false;
        } else if (Character.isUpperCase(charAt)) {
            this.capitalword = true;
        }
    }

    public String get(int i) {
        if (this.capitalword) {
            return null;
        }
        int length = this.grams.length();
        if (i < 1 || i > 3 || length < i) {
            return null;
        }
        if (i != 1) {
            return this.grams.substring(length - i, length);
        }
        char charAt = this.grams.charAt(length - 1);
        if (charAt == ' ') {
            return null;
        }
        return Character.toString(charAt);
    }

    public static char normalize(char c) {
        Character.UnicodeBlock of = Character.UnicodeBlock.of(c);
        if (of == Character.UnicodeBlock.BASIC_LATIN) {
            if (c < 'A' || ((c < 'a' && c > 'Z') || c > 'z')) {
                c = ' ';
            }
        } else if (of == Character.UnicodeBlock.LATIN_1_SUPPLEMENT) {
            if (LATIN1_EXCLUDED.indexOf(c) >= 0) {
                c = ' ';
            }
        } else if (of == Character.UnicodeBlock.LATIN_EXTENDED_B) {
            if (c == 537) {
                c = 351;
            }
            if (c == 539) {
                c = 355;
            }
        } else if (of == Character.UnicodeBlock.GENERAL_PUNCTUATION) {
            c = ' ';
        } else if (of == Character.UnicodeBlock.ARABIC) {
            if (c == 1740) {
                c = 1610;
            }
        } else if (of == Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
            if (c >= 7840) {
                c = 7875;
            }
        } else if (of == Character.UnicodeBlock.HIRAGANA) {
            c = 12354;
        } else if (of == Character.UnicodeBlock.KATAKANA) {
            c = 12450;
        } else if (of == Character.UnicodeBlock.BOPOMOFO || of == Character.UnicodeBlock.BOPOMOFO_EXTENDED) {
            c = 12549;
        } else if (of == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
            if (cjkMap.containsKey(Character.valueOf(c))) {
                c = cjkMap.get(Character.valueOf(c)).charValue();
            }
        } else if (of == Character.UnicodeBlock.HANGUL_SYLLABLES) {
            c = 44032;
        }
        return c;
    }

    public static String normalize_vi(String str) {
        Matcher matcher = ALPHABET_WITH_DMARK.matcher(str);
        StringBuffer stringBuffer = new StringBuffer();
        while (matcher.find()) {
            int indexOf = TO_NORMALIZE_VI_CHARS.indexOf(matcher.group(1));
            matcher.appendReplacement(stringBuffer, NORMALIZED_VI_CHARS[DMARK_CLASS.indexOf(matcher.group(2))].substring(indexOf, indexOf + 1));
        }
        if (stringBuffer.length() == 0) {
            return str;
        }
        matcher.appendTail(stringBuffer);
        return stringBuffer.toString();
    }

    static {
        for (String str : CJK_CLASS) {
            char charAt = str.charAt(0);
            for (int i = 0; i < str.length(); i++) {
                cjkMap.put(Character.valueOf(str.charAt(i)), Character.valueOf(charAt));
            }
        }
    }
}
