package ai.idylnlp.nlp.sentence;

import ai.idylnlp.model.nlp.SentenceDetector;
import ai.idylnlp.model.nlp.Span;
import com.neovisionaries.i18n.LanguageCode;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import net.loomchild.segment.srx.SrxDocument;
import net.loomchild.segment.srx.SrxTextIterator;
import net.loomchild.segment.srx.io.Srx2SaxParser;

/* loaded from: input_file:ai/idylnlp/nlp/sentence/SegmentedSentenceDetector.class */
public class SegmentedSentenceDetector implements SentenceDetector {
    private LanguageCode languageCode;
    private SrxDocument srxDocument;

    public SegmentedSentenceDetector(String str, LanguageCode languageCode) throws UnsupportedEncodingException {
        this.languageCode = languageCode;
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8)), StandardCharsets.UTF_8));
        HashMap hashMap = new HashMap();
        hashMap.put("validate", true);
        this.srxDocument = new Srx2SaxParser(hashMap).parse(bufferedReader);
    }

    public List<String> getLanguageCodes() {
        return Arrays.asList(this.languageCode.getAlpha3().toString());
    }

    public String[] sentDetect(String str) {
        return Span.spansToStrings(sentPosDetect(str), str);
    }

    public Span[] sentPosDetect(String str) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = tokenize(str).iterator();
        while (it.hasNext()) {
            String trim = it.next().trim();
            int indexOf = str.indexOf(trim);
            arrayList.add(new Span(indexOf, indexOf + trim.length()));
        }
        return (Span[]) arrayList.toArray(new Span[arrayList.size()]);
    }

    private List<String> tokenize(String str) {
        ArrayList arrayList = new ArrayList();
        SrxTextIterator srxTextIterator = new SrxTextIterator(this.srxDocument, this.languageCode.getAlpha3().toString(), str);
        while (srxTextIterator.hasNext()) {
            arrayList.add(srxTextIterator.next().trim());
        }
        return arrayList;
    }
}
