package org.tinygroup.mmseg4j;

import com.chenlb.mmseg4j.MMSeg;
import com.chenlb.mmseg4j.Seg;
import com.chenlb.mmseg4j.Word;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;

/* loaded from: input_file:WEB-INF/lib/org.tinygroup.mmseg4j-2.2.0.jar:org/tinygroup/mmseg4j/NewMMSegTokenizer.class */
public class NewMMSegTokenizer extends Tokenizer {
    private MMSeg mmSeg;
    private CharTermAttribute termAtt;
    private OffsetAttribute offsetAtt;
    private TypeAttribute typeAtt;

    public NewMMSegTokenizer(Seg seg, Reader reader) {
        super(reader);
        this.mmSeg = new MMSeg(reader, seg);
        this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        this.typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
    }

    @Override // org.apache.lucene.analysis.Tokenizer, org.apache.lucene.analysis.TokenStream
    public void reset() throws IOException {
        super.reset();
        this.mmSeg.reset(this.input);
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public final boolean incrementToken() throws IOException {
        clearAttributes();
        Word next = this.mmSeg.next();
        if (next == null) {
            end();
            return false;
        }
        this.termAtt.copyBuffer(next.getSen(), next.getWordOffset(), next.getLength());
        this.offsetAtt.setOffset(next.getStartOffset(), next.getEndOffset());
        this.typeAtt.setType(next.getType());
        return true;
    }
}
