package de.julielab.jcore.ae.lingpipegazetteer.utils;

import com.aliasi.tokenizer.Tokenizer;
import com.aliasi.tokenizer.TokenizerFactory;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Stack;

/* loaded from: input_file:de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.class */
public class StringNormalizerForChunking {
    private static Set<Character> charsToDelete = new HashSet();

    /* loaded from: input_file:de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking$Mode.class */
    public enum Mode {
        DELETE,
        REPLACE;

        /* renamed from: values, reason: to resolve conflict with enum method */
        public static Mode[] valuesCustom() {
            Mode[] valuesCustom = values();
            int length = valuesCustom.length;
            Mode[] modeArr = new Mode[length];
            System.arraycopy(valuesCustom, 0, modeArr, 0, length);
            return modeArr;
        }
    }

    /* loaded from: input_file:de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking$NormalizedString.class */
    public static class NormalizedString {
        public String string;
        public Map<Integer, Integer> offsetMap;

        public Integer getOriginalOffset(int i) {
            return this.offsetMap.get(Integer.valueOf(i));
        }
    }

    static {
        charsToDelete.add('-');
        charsToDelete.add('+');
        charsToDelete.add(',');
        charsToDelete.add('.');
        charsToDelete.add(':');
        charsToDelete.add(';');
        charsToDelete.add('?');
        charsToDelete.add('!');
        charsToDelete.add('*');
        charsToDelete.add((char) 167);
        charsToDelete.add('$');
        charsToDelete.add('%');
        charsToDelete.add('&');
        charsToDelete.add('/');
        charsToDelete.add('\\');
        charsToDelete.add('(');
        charsToDelete.add(')');
        charsToDelete.add('<');
        charsToDelete.add('>');
        charsToDelete.add('[');
        charsToDelete.add(']');
        charsToDelete.add('=');
        charsToDelete.add('\'');
        charsToDelete.add('`');
        charsToDelete.add((char) 180);
        charsToDelete.add('\"');
        charsToDelete.add('#');
    }

    public static NormalizedString normalizeString(String str) {
        NormalizedString normalizedString = new NormalizedString();
        normalizedString.offsetMap = new HashMap();
        StringBuilder sb = new StringBuilder();
        int i = 0;
        for (int i2 = 0; i2 < str.length(); i2++) {
            char charAt = str.charAt(i2);
            if (charsToDelete.contains(Character.valueOf(charAt))) {
                i++;
            } else {
                sb.append(charAt);
            }
            int max = Math.max(0, i2 - i);
            if (normalizedString.offsetMap.get(Integer.valueOf(max)) == null) {
                normalizedString.offsetMap.put(Integer.valueOf(max), Integer.valueOf(i2));
            }
        }
        normalizedString.string = sb.toString();
        return normalizedString;
    }

    public static NormalizedString normalizeString(String str, TokenizerFactory tokenizerFactory) {
        NormalizedString normalizedString = new NormalizedString();
        normalizedString.offsetMap = new HashMap();
        char[] charArray = str.toCharArray();
        Tokenizer tokenizer = tokenizerFactory.tokenizer(charArray, 0, charArray.length);
        StringBuilder sb = new StringBuilder();
        Stack stack = new Stack();
        HashMap hashMap = new HashMap();
        sb.append(tokenizer.nextWhitespace());
        normalizedString.offsetMap.put(0, 0);
        while (true) {
            String nextToken = tokenizer.nextToken();
            if (nextToken == null) {
                normalizedString.string = sb.toString();
                return normalizedString;
            }
            if (nextToken.equals("'")) {
                int length = sb.length() + sumOfStack(stack);
                int length2 = sb.length() + sumOfStack(stack) + nextToken.length();
                hashMap.put(Integer.valueOf(length), Integer.valueOf(tokenizer.lastTokenStartPosition()));
                hashMap.put(Integer.valueOf(length2), Integer.valueOf(tokenizer.lastTokenEndPosition()));
                stack.push(String.valueOf(nextToken) + tokenizer.nextWhitespace());
            } else if (nextToken.equals("s") && stack.size() == 1) {
                int length3 = sb.length() + sumOfStack(stack);
                int length4 = sb.length() + sumOfStack(stack) + nextToken.length();
                hashMap.put(Integer.valueOf(length3), Integer.valueOf(tokenizer.lastTokenStartPosition()));
                hashMap.put(Integer.valueOf(length4), Integer.valueOf(tokenizer.lastTokenEndPosition()));
                stack.push(nextToken);
                String nextWhitespace = tokenizer.nextWhitespace();
                if (nextWhitespace.length() > 0) {
                    sb.append(nextWhitespace);
                    stack.clear();
                    hashMap.clear();
                }
            } else {
                if (!stack.isEmpty()) {
                    for (int i = 0; i < stack.size(); i++) {
                        sb.append((String) stack.get(i));
                    }
                    stack.clear();
                    normalizedString.offsetMap.putAll(hashMap);
                    hashMap.clear();
                }
                sb.append(nextToken);
                int length5 = sb.length() - nextToken.length();
                int length6 = sb.length();
                normalizedString.offsetMap.put(Integer.valueOf(length5), Integer.valueOf(tokenizer.lastTokenStartPosition()));
                normalizedString.offsetMap.put(Integer.valueOf(length6), Integer.valueOf(tokenizer.lastTokenEndPosition()));
                sb.append(tokenizer.nextWhitespace());
            }
        }
    }

    private static int sumOfStack(Stack<String> stack) {
        int i = 0;
        Iterator<String> it = stack.iterator();
        while (it.hasNext()) {
            i += it.next().length();
        }
        return i;
    }
}
