package de.citec.scie.annotators.structure;

import de.citec.scie.descriptors.Sentence;
import de.citec.scie.descriptors.Token;
import de.citec.scie.util.ResourceFinder;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import java.io.StringReader;
import java.util.Iterator;
import java.util.List;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.jcas.JCas;

/* loaded from: input_file:de/citec/scie/annotators/structure/StanfordTokenizer.class */
public class StanfordTokenizer extends JCasAnnotator_ImplBase {
    private static final MaxentTagger tagger = new MaxentTagger(ResourceFinder.find("data/english-left3words-distsim.tagger").getPath());

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        Iterator it = MaxentTagger.tokenizeText(new StringReader(jCas.getDocumentText())).iterator();
        while (it.hasNext()) {
            List<TaggedWord> tagSentence = tagger.tagSentence((List) it.next());
            if (!tagSentence.isEmpty()) {
                new Sentence(jCas, ((TaggedWord) tagSentence.get(0)).beginPosition(), ((TaggedWord) tagSentence.get(tagSentence.size() - 1)).endPosition()).addToIndexes();
                for (TaggedWord taggedWord : tagSentence) {
                    Token token = new Token(jCas, taggedWord.beginPosition(), taggedWord.endPosition());
                    token.setPOS(taggedWord.tag());
                    token.addToIndexes();
                }
            }
        }
    }
}
