package de.datexis.cdv.encoder;

import de.datexis.common.Resource;
import de.datexis.common.WordHelpers;
import de.datexis.encoder.IEncoder;
import de.datexis.encoder.impl.BloomEncoder;
import de.datexis.preprocess.IdentityPreprocessor;
import de.datexis.retrieval.tagger.LSTMSentenceTaggerIterator;
import de.datexis.tagger.AbstractMultiDataSetIterator;
import java.util.Arrays;
import java.util.Set;
import java.util.TreeSet;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/cdv/encoder/EntityEncoder.class */
public class EntityEncoder extends BloomEncoder {
    protected final Logger log;

    public EntityEncoder() {
        this.log = LoggerFactory.getLogger(getClass());
        this.preprocessor = new IdentityPreprocessor();
    }

    public EntityEncoder(int i, WordHelpers.Language language) {
        super(i, 5);
        this.log = LoggerFactory.getLogger(getClass());
        this.preprocessor = new IdentityPreprocessor();
        this.language = language;
    }

    public void trainModel(Resource resource) {
        super.trainModel(new LSTMSentenceTaggerIterator(AbstractMultiDataSetIterator.Stage.ENCODE, (IEncoder) null, (IEncoder) null, resource, "utf-8", WordHelpers.Language.EN, true, 1).getLabels(), 1, 1, this.language);
    }

    public INDArray encode(String str) {
        return super.encode(str);
    }

    public INDArray decode(String str) {
        return super.encode(str);
    }

    public static Set<String> getDiseaseStopWords() {
        return new TreeSet(Arrays.asList(",", ".", "(", ")", "[", "]", ";", "'s", "-", "and", "or", "of", "in", "the", "with", "type", "(disorder)", "unspecified", "disorder", "disorders", "disease", "diseases", "syndrome", "condition", "conditions", "problem", "problems", "infection", "infections", "illness", "illnesses"));
    }
}
