package justhalf.nlp.tokenizer;

import edu.stanford.nlp.ling.CoreLabel;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:justhalf/nlp/tokenizer/RegexTokenizer.class */
public class RegexTokenizer implements Tokenizer {
    public static final String DEFAULT_REGEX = "[ \\t\\r\\n]+|((?<=[\\w\\p{IsL}])(?=[^\\w\\p{IsL}]))|((?<=[^\\w\\p{IsL}])(?=[\\w\\p{IsL}]))";
    public Pattern pattern;

    public RegexTokenizer() {
        this.pattern = Pattern.compile(DEFAULT_REGEX);
    }

    public RegexTokenizer(String str) {
        this.pattern = Pattern.compile(str);
    }

    @Override // justhalf.nlp.tokenizer.Tokenizer
    public String[] tokenizeToString(String str) {
        List<CoreLabel> list = tokenize(str);
        String[] strArr = new String[list.size()];
        for (int i = 0; i < list.size(); i++) {
            strArr[i] = list.get(i).word();
        }
        return strArr;
    }

    @Override // justhalf.nlp.tokenizer.Tokenizer
    public List<CoreLabel> tokenize(String str) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = this.pattern.matcher(str);
        int i = 0;
        String str2 = "";
        while (matcher.find()) {
            int start = matcher.start();
            int end = matcher.end();
            if (start != i || end != i) {
                String substring = str.substring(i, start);
                String substring2 = str.substring(start, end);
                CoreLabel coreLabel = new CoreLabel();
                coreLabel.setBefore(str2);
                coreLabel.setBeginPosition(i);
                coreLabel.setEndPosition(start);
                coreLabel.setValue(substring);
                coreLabel.setWord(substring);
                coreLabel.setOriginalText(substring);
                coreLabel.setAfter(substring2);
                i = end;
                str2 = substring2;
                arrayList.add(coreLabel);
            }
        }
        if (i != str.length()) {
            int length = str.length();
            int length2 = str.length();
            String substring3 = str.substring(i, length);
            String substring4 = str.substring(length, length2);
            CoreLabel coreLabel2 = new CoreLabel();
            coreLabel2.setBefore(str2);
            coreLabel2.setBeginPosition(i);
            coreLabel2.setEndPosition(length);
            coreLabel2.setValue(substring3);
            coreLabel2.setWord(substring3);
            coreLabel2.setOriginalText(substring3);
            coreLabel2.setAfter(substring4);
            arrayList.add(coreLabel2);
        }
        return arrayList;
    }

    @Override // justhalf.nlp.NLPInterface
    public boolean isThreadSafe() {
        return true;
    }
}
