package opennlp.tools.formats.conllu;

import java.io.IOException;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.util.FilterObjectStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.StringUtil;

/* loaded from: input_file:WEB-INF/lib/opennlp-tools-1.9.3.jar:opennlp/tools/formats/conllu/ConlluTokenSampleStream.class */
public class ConlluTokenSampleStream extends FilterObjectStream<ConlluSentence, TokenSample> {
    public ConlluTokenSampleStream(ObjectStream<ConlluSentence> objectStream) {
        super(objectStream);
    }

    @Override // opennlp.tools.util.ObjectStream
    public TokenSample read() throws IOException {
        ConlluSentence conlluSentence = (ConlluSentence) this.samples.read();
        if (conlluSentence == null) {
            return null;
        }
        if (conlluSentence.getTextComment() == null) {
            throw new IOException("Sentence is missing raw text sample!");
        }
        StringBuilder sb = new StringBuilder(conlluSentence.getTextComment());
        int i = 0;
        for (ConlluWordLine conlluWordLine : conlluSentence.getWordLines()) {
            if (!conlluWordLine.getId().contains(".")) {
                String form = conlluWordLine.getForm();
                int indexOf = sb.indexOf(form, i);
                if (indexOf == -1) {
                    throw new IOException(String.format("Failed to match token [%s] in sentence [%s] with text [%s]", form, conlluSentence.getSentenceIdComment(), sb));
                }
                i = indexOf + form.length();
                if (i < sb.length() && !StringUtil.isWhitespace(sb.charAt(i))) {
                    sb.insert(i, TokenSample.DEFAULT_SEPARATOR_CHARS);
                }
            }
        }
        return TokenSample.parse(sb.toString(), TokenSample.DEFAULT_SEPARATOR_CHARS);
    }
}
