package org.bigml.mimir.nlp.tokenization;

import java.util.Set;

/* loaded from: input_file:org/bigml/mimir/nlp/tokenization/BigramTokenStream.class */
public class BigramTokenStream extends TokenStream {
    private char _beginM2;
    private char _beginM1;
    private String _lastTerm;
    private String _termBefore;
    private Set<String> _stopWords;

    public BigramTokenStream(String str, boolean z, Set<String> set) {
        super(str, z);
        this._stopWords = set;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.bigml.mimir.nlp.tokenization.TokenStream
    public String nextTerm() {
        if (this._lastTerm != null && this._termBefore != null && (this._stopWords == null || (!this._stopWords.contains(this._lastTerm) && !this._stopWords.contains(this._termBefore)))) {
            String str = this._termBefore + " " + this._lastTerm;
            this._termBefore = null;
            return str;
        }
        this._termBefore = this._lastTerm;
        this._lastTerm = moveMarkers();
        if (this.begin >= 2 && this.begin < this.docLength) {
            this._beginM1 = this.document[this.begin - 1];
            if (this._beginM1 == ' ' || this._beginM1 == '\n') {
                this._beginM2 = this.document[this.begin - 2];
                if (!Character.isLetterOrDigit(this._beginM2) && this._beginM2 != '\'') {
                    this._termBefore = null;
                }
            } else {
                this._termBefore = null;
            }
        }
        return this._lastTerm;
    }
}
