package ai.idylnlp.nlp.tokenizers;

import ai.idylnlp.model.exceptions.ModelLoaderException;
import ai.idylnlp.model.nlp.Span;
import ai.idylnlp.model.nlp.Stemmer;
import ai.idylnlp.model.nlp.Tokenizer;
import ai.idylnlp.opennlp.custom.utils.SpansToSpans;
import com.neovisionaries.i18n.LanguageCode;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.List;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;

/* loaded from: input_file:ai/idylnlp/nlp/tokenizers/ModelTokenizer.class */
public class ModelTokenizer implements Tokenizer {
    private TokenizerME tokenizer;
    private LanguageCode languageCode;

    public ModelTokenizer(InputStream inputStream, LanguageCode languageCode) throws ModelLoaderException {
        this.languageCode = languageCode;
        try {
            this.tokenizer = new TokenizerME(new TokenizerModel(inputStream));
            inputStream.close();
        } catch (IOException e) {
            throw new ModelLoaderException("Unable to load token model.", e);
        }
    }

    public ModelTokenizer(TokenizerModel tokenizerModel, LanguageCode languageCode) {
        this.languageCode = languageCode;
        this.tokenizer = new TokenizerME(tokenizerModel);
    }

    public List<String> getLanguageCodes() {
        return Arrays.asList(this.languageCode.getAlpha3().toString());
    }

    public String[] tokenize(String str) {
        return this.tokenizer.tokenize(str);
    }

    public Span[] tokenizePos(String str) {
        return SpansToSpans.toSpans(this.tokenizer.tokenizePos(str));
    }

    public String[] tokenize(String str, Stemmer stemmer) {
        String[] strArr = this.tokenizer.tokenize(str);
        for (int i = 0; i < strArr.length; i++) {
            strArr[i] = stemmer.stem(strArr[i]);
        }
        return strArr;
    }
}
