package org.bigml.mimir.nlp.tokenization;

import java.io.Closeable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:org/bigml/mimir/nlp/tokenization/TokenStream.class */
public abstract class TokenStream implements Closeable, Iterable<String> {
    protected static final int MAX_TERM_LENGTH = 32;
    protected int docLength;
    protected int begin;
    protected int end;
    protected char[] document;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/bigml/mimir/nlp/tokenization/TokenStream$TokenIterator.class */
    public class TokenIterator implements Iterator<String> {
        private String term;

        private TokenIterator() {
            this.term = TokenStream.this.nextTerm();
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            return this.term != null;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public String next() {
            String str = this.term;
            this.term = TokenStream.this.nextTerm();
            return str;
        }

        @Override // java.util.Iterator
        public void remove() {
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public abstract String nextTerm();

    public TokenStream() {
    }

    public TokenStream(String str, boolean z) {
        setDocument(str, z);
    }

    protected void setDocument(String str) {
        setDocument(str, true);
    }

    protected void setDocument(String str, boolean z) {
        if (str == null) {
            this.document = new char[0];
        } else if (z) {
            this.document = str.toCharArray();
        } else {
            this.document = str.toLowerCase().toCharArray();
        }
        this.end = 0;
        this.begin = 0;
        this.docLength = this.document.length;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String moveMarkers() {
        this.begin = this.end;
        while (this.begin < this.docLength && !Character.isLetterOrDigit(this.document[this.begin])) {
            this.begin++;
        }
        this.end = this.begin + 1;
        while (this.end < this.docLength && ((Character.isLetterOrDigit(this.document[this.end]) || this.document[this.end] == '\'') && this.end - this.begin < MAX_TERM_LENGTH)) {
            this.end++;
        }
        if (this.begin >= this.docLength) {
            return null;
        }
        return new String(this.document, this.begin, this.end - this.begin);
    }

    public char[] getDocument() {
        return this.document;
    }

    public String toString() {
        return new String(this.document);
    }

    public List<String> toList() {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = iterator();
        while (it.hasNext()) {
            arrayList.add(it.next());
        }
        close();
        return arrayList;
    }

    @Override // java.lang.Iterable
    public Iterator<String> iterator() {
        return new TokenIterator();
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() {
    }
}
