package com.knuddels.jtokkit;

import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.function.Predicate;

/* loaded from: input_file:BOOT-INF/lib/jtokkit-1.0.0.jar:com/knuddels/jtokkit/Cl100kParser.class */
class Cl100kParser {
    private static final String SDTM = "sdtmSDTMſ";
    private static final String SIMPLE_WHITESPACES = "\t\n\u000b\f\r";
    private static final int[] REMAINING_WHITESPACES;
    static final /* synthetic */ boolean $assertionsDisabled;

    Cl100kParser() {
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static void split(String str, Predicate<ByteArrayList> predicate) {
        if (!$assertionsDisabled && !isValidUTF8(str)) {
            throw new AssertionError("Input is not UTF-8: " + str);
        }
        ByteArrayList byteArrayList = new ByteArrayList();
        boolean z = false;
        int i = 0;
        while (i < str.length() && !z) {
            int i2 = i;
            int codePointAt = str.codePointAt(i2);
            int charCount = Character.charCount(codePointAt);
            int i3 = i2 + charCount;
            int codePointAt2 = i3 < str.length() ? str.codePointAt(i3) : -1;
            if (codePointAt == 39 && codePointAt2 > 0) {
                if (isShortContraction(codePointAt2)) {
                    i += 2;
                    z = predicate.test(addUtf8Bytes(str, i2, i, byteArrayList));
                } else if (i2 + 2 < str.length() && isLongContraction(codePointAt2, str.codePointAt(i2 + 2))) {
                    i += 3;
                    z = predicate.test(addUtf8Bytes(str, i2, i, byteArrayList));
                }
            }
            int charCount2 = Character.charCount(codePointAt2);
            if ((isNotNewlineOrLetterOrNumeric(codePointAt) && isLetter(codePointAt2)) || isLetter(codePointAt)) {
                i += charCount;
                if (isLetter(codePointAt2)) {
                    int i4 = i;
                    int i5 = charCount2;
                    while (true) {
                        i = i4 + i5;
                        if (i >= str.length()) {
                            break;
                        }
                        int codePointAt3 = str.codePointAt(i);
                        if (!isLetter(codePointAt3)) {
                            break;
                        }
                        i4 = i;
                        i5 = Character.charCount(codePointAt3);
                    }
                }
                z = predicate.test(addUtf8Bytes(str, i2, i, byteArrayList));
            } else if (isNumeric(codePointAt)) {
                i += charCount;
                if (isNumeric(codePointAt2)) {
                    i += charCount2;
                    if (i < str.length()) {
                        int codePointAt4 = str.codePointAt(i);
                        if (isNumeric(codePointAt4)) {
                            i += Character.charCount(codePointAt4);
                        }
                    }
                }
                z = predicate.test(addUtf8Bytes(str, i2, i, byteArrayList));
            } else if (isNotWhitespaceOrLetterOrNumeric(codePointAt) || (codePointAt == 32 && isNotWhitespaceOrLetterOrNumeric(codePointAt2))) {
                i += charCount;
                if (i < str.length() && isNotWhitespaceOrLetterOrNumeric(codePointAt2)) {
                    int i6 = i;
                    int i7 = charCount2;
                    while (true) {
                        i = i6 + i7;
                        if (i >= str.length()) {
                            break;
                        }
                        int codePointAt5 = str.codePointAt(i);
                        if (!isNotWhitespaceOrLetterOrNumeric(codePointAt5)) {
                            break;
                        }
                        i6 = i;
                        i7 = Character.charCount(codePointAt5);
                    }
                }
                while (i < str.length() && isNewline(str.codePointAt(i))) {
                    i++;
                }
                z = predicate.test(addUtf8Bytes(str, i2, i, byteArrayList));
            } else {
                if (!$assertionsDisabled && !isWhitespace(codePointAt)) {
                    throw new AssertionError("Invalid character: " + Arrays.toString(Character.toChars(codePointAt)));
                }
                int i8 = isNewline(codePointAt) ? i : -1;
                i += charCount;
                if (isWhitespace(codePointAt2)) {
                    i8 = isNewline(codePointAt2) ? i : i8;
                    int i9 = i;
                    int i10 = charCount2;
                    while (true) {
                        i = i9 + i10;
                        if (i >= str.length()) {
                            break;
                        }
                        int codePointAt6 = str.codePointAt(i);
                        codePointAt = codePointAt6;
                        if (!isWhitespace(codePointAt6)) {
                            break;
                        }
                        i8 = isNewline(codePointAt) ? i : i8;
                        i9 = i;
                        i10 = Character.charCount(codePointAt);
                    }
                }
                if (i8 > -1) {
                    int i11 = i;
                    i = i8 + 1;
                    if (i < i11) {
                        if (!$assertionsDisabled && i2 >= i) {
                            throw new AssertionError();
                        }
                        z = predicate.test(addUtf8Bytes(str, i2, i, byteArrayList));
                        i2 = i;
                        i = i11;
                    }
                }
                if (!z) {
                    if (i8 + 1 < i && !isWhitespace(codePointAt)) {
                        i--;
                    }
                    if (i2 < i) {
                        z = predicate.test(addUtf8Bytes(str, i2, i, byteArrayList));
                    }
                }
            }
        }
    }

    static boolean isShortContraction(int i) {
        return SDTM.indexOf(i) >= 0;
    }

    static boolean isLongContraction(int i, int i2) {
        if (i == 108 && i2 == 108) {
            return true;
        }
        if (i == 118 && i2 == 101) {
            return true;
        }
        if (i == 114 && i2 == 101) {
            return true;
        }
        int upperCase = Character.toUpperCase(i);
        int upperCase2 = Character.toUpperCase(i2);
        return (upperCase == 76 && upperCase2 == 76) || (upperCase == 86 && upperCase2 == 69) || (upperCase == 82 && upperCase2 == 69);
    }

    static boolean isValidUTF8(String str) {
        return StandardCharsets.UTF_8.newEncoder().canEncode(str);
    }

    static boolean isLetter(int i) {
        if (i < 170) {
            return (i >= 97 && i <= 122) || (i >= 65 && i <= 90);
        }
        if (i > 205743) {
            return false;
        }
        switch (Character.getType(i)) {
            case 1:
            case 2:
            case 3:
            case 4:
            case 5:
                return true;
            default:
                return false;
        }
    }

    static boolean isNumeric(int i) {
        if (i < 178) {
            return i >= 48 && i <= 57;
        }
        if (i > 130041) {
            return false;
        }
        switch (Character.getType(i)) {
            case 9:
            case 10:
            case 11:
                return true;
            default:
                return false;
        }
    }

    static boolean isLetterOrNumeric(int i) {
        if (i < 170) {
            return (i >= 97 && i <= 122) || (i >= 65 && i <= 90) || (i >= 48 && i <= 57);
        }
        if (i > 205743) {
            return false;
        }
        switch (Character.getType(i)) {
            case 1:
            case 2:
            case 3:
            case 4:
            case 5:
            case 9:
            case 10:
            case 11:
                return true;
            case 6:
            case 7:
            case 8:
            default:
                return false;
        }
    }

    static boolean isWhitespace(int i) {
        return i <= 13 ? SIMPLE_WHITESPACES.indexOf(i) >= 0 : i < 133 ? i == 32 : i == 133 || i == 160 || (i >= 5760 && i <= 12288 && Arrays.binarySearch(REMAINING_WHITESPACES, i) >= 0);
    }

    static boolean isNewline(int i) {
        return i == 13 || i == 10;
    }

    static boolean isNotWhitespaceOrLetterOrNumeric(int i) {
        return i < 48 ? i >= 0 && i != 32 && (i > 13 || i < 9) : (isLetterOrNumeric(i) || isWhitespace(i)) ? false : true;
    }

    static boolean isNotNewlineOrLetterOrNumeric(int i) {
        return i < 48 ? i >= 0 && (i == 32 || !isNewline(i)) : !isLetterOrNumeric(i);
    }

    static ByteArrayList addUtf8Bytes(String str, int i, int i2, ByteArrayList byteArrayList) {
        byteArrayList.clear();
        int i3 = i;
        while (i3 < i2) {
            int codePointAt = str.codePointAt(i3);
            if (codePointAt < 128) {
                byteArrayList.add((byte) codePointAt);
            } else if (codePointAt < 2048) {
                byteArrayList.add((byte) (192 | (codePointAt >> 6)));
                byteArrayList.add((byte) (128 | (codePointAt & 63)));
            } else if (codePointAt < 65536) {
                byteArrayList.add((byte) (224 | (codePointAt >> 12)));
                byteArrayList.add((byte) (128 | ((codePointAt >> 6) & 63)));
                byteArrayList.add((byte) (128 | (codePointAt & 63)));
            } else {
                if (!$assertionsDisabled && codePointAt >= 1114112) {
                    throw new AssertionError("Invalid code point: " + codePointAt);
                }
                byteArrayList.add((byte) (240 | (codePointAt >> 18)));
                byteArrayList.add((byte) (128 | ((codePointAt >> 12) & 63)));
                byteArrayList.add((byte) (128 | ((codePointAt >> 6) & 63)));
                byteArrayList.add((byte) (128 | (codePointAt & 63)));
                i3++;
            }
            i3++;
        }
        return byteArrayList;
    }

    static {
        $assertionsDisabled = !Cl100kParser.class.desiredAssertionStatus();
        REMAINING_WHITESPACES = "\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006 \u2008\u2009\u200a\u2028\u2029 \u205f\u3000".codePoints().sorted().toArray();
    }
}
