package org.bigml.mimir.nlp.tokenization;

import java.util.Deque;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;

/* loaded from: input_file:org/bigml/mimir/nlp/tokenization/DictionaryTokenStream.class */
public class DictionaryTokenStream extends TokenStream {
    private TrieNode _root;
    private Deque<String> _found;
    private static final char BEGIN_CJK = 11904;

    /* loaded from: input_file:org/bigml/mimir/nlp/tokenization/DictionaryTokenStream$TrieNode.class */
    public static class TrieNode {
        private boolean _validWord;
        private Map<Character, TrieNode> _nextNodes;

        public TrieNode() {
            this._validWord = false;
            this._nextNodes = new HashMap();
        }

        private TrieNode(char[] cArr, int i) {
            this._nextNodes = new HashMap();
            if (i == cArr.length) {
                this._validWord = true;
                return;
            }
            this._nextNodes.put(Character.valueOf(cArr[i]), new TrieNode(cArr, i + 1));
            this._validWord = false;
        }

        public void insert(char[] cArr, int i) {
            if (i == cArr.length) {
                this._validWord = true;
                return;
            }
            TrieNode orDefault = this._nextNodes.getOrDefault(Character.valueOf(cArr[i]), null);
            if (orDefault != null) {
                orDefault.insert(cArr, i + 1);
            } else {
                this._nextNodes.put(Character.valueOf(cArr[i]), new TrieNode(cArr, i + 1));
            }
        }
    }

    public DictionaryTokenStream(String str, boolean z, TrieNode trieNode) {
        super(str, z);
        this._root = new TrieNode();
        this._found = new LinkedList();
        this._root = trieNode;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.bigml.mimir.nlp.tokenization.TokenStream
    public String nextTerm() {
        while (this._found.isEmpty() && this.begin < this.document.length) {
            _nextValidPositions();
        }
        if (this._found.isEmpty()) {
            return null;
        }
        return this._found.pop();
    }

    private void _nextValidPositions() {
        while (this.begin < this.document.length && !Character.isLetterOrDigit(this.document[this.begin]) && !this._root._nextNodes.containsKey(Character.valueOf(this.document[this.begin]))) {
            this.begin++;
        }
        if (this.begin < this.document.length) {
            _pushWords();
            this.begin++;
            if (this.document[this.begin - 1] < BEGIN_CJK) {
                while (this.begin < this.document.length && Character.isLetterOrDigit(this.document[this.begin]) && this.document[this.begin] < BEGIN_CJK) {
                    this.begin++;
                }
            }
        }
        this.end = this.begin;
    }

    private void _pushWords() {
        TrieNode trieNode = this._root._nextNodes.get(Character.valueOf(this.document[this.begin]));
        this.end = this.begin;
        while (trieNode != null && this.end < this.document.length) {
            this.end++;
            if (!trieNode._validWord) {
                trieNode = this.end == this.document.length ? null : trieNode._nextNodes.get(Character.valueOf(this.document[this.end]));
            } else if (this.end == this.document.length) {
                this._found.add(new String(this.document, this.begin, this.end - this.begin));
            } else {
                char c = this.document[this.end];
                if (!Character.isLetterOrDigit(c) || this.document[this.begin] >= BEGIN_CJK || c >= BEGIN_CJK) {
                    this._found.add(new String(this.document, this.begin, this.end - this.begin));
                }
                trieNode = trieNode._nextNodes.get(Character.valueOf(c));
            }
        }
    }
}
