package de.datexis.cdv.encoder;

import de.datexis.cdv.index.AspectIndex;
import de.datexis.cdv.preprocess.AspectPreprocessor;
import de.datexis.common.Resource;
import de.datexis.common.WordHelpers;
import de.datexis.encoder.IEncoder;
import de.datexis.encoder.impl.BloomEncoder;
import de.datexis.retrieval.tagger.LSTMSentenceTaggerIterator;
import de.datexis.tagger.AbstractMultiDataSetIterator;
import java.util.List;
import java.util.stream.Collectors;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/cdv/encoder/AspectEncoder.class */
public class AspectEncoder extends BloomEncoder {
    protected final Logger log;
    protected int minWordFreq;
    protected int minWordLength;

    public AspectEncoder() {
        this.log = LoggerFactory.getLogger(getClass());
        this.minWordLength = 3;
        this.preprocessor = new AspectPreprocessor();
    }

    public AspectEncoder(int i, WordHelpers.Language language, int i2) {
        super(i, 5);
        this.log = LoggerFactory.getLogger(getClass());
        this.minWordLength = 3;
        this.preprocessor = new AspectPreprocessor();
        this.language = language;
        this.minWordFreq = i2;
    }

    public int getMinWordFreq() {
        return this.minWordFreq;
    }

    public void setMinWordFreq(int i) {
        this.minWordFreq = i;
    }

    public int getMinWordLength() {
        return this.minWordLength;
    }

    public void setMinWordLength(int i) {
        this.minWordLength = i;
    }

    public void trainModel(Resource resource) {
        super.trainModel((List) new LSTMSentenceTaggerIterator(AbstractMultiDataSetIterator.Stage.ENCODE, (IEncoder) null, (IEncoder) null, resource, "utf-8", WordHelpers.Language.EN, true, 1).getLabels().stream().map(str -> {
            return str.replaceAll("/", " ");
        }).collect(Collectors.toList()), this.minWordFreq, this.minWordLength, this.language);
    }

    public INDArray encode(String str) {
        String[] split = str.split(AspectIndex.HEADING_SEPARATOR_REGEX);
        INDArray zeros = Nd4j.zeros(DataType.FLOAT, new long[]{getEmbeddingVectorSize(), 1});
        for (String str2 : split) {
            INDArray encode = super.encode(str2);
            if (encode != null) {
                zeros.addi(encode);
            }
        }
        return zeros;
    }

    public INDArray decode(String str) {
        return encode(str);
    }
}
