package de.datexis.parvec.encoder;

import de.datexis.common.WordHelpers;
import de.datexis.model.Sentence;
import de.datexis.model.Span;
import de.datexis.model.Token;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/parvec/encoder/ParVecWordsEncoder.class */
public class ParVecWordsEncoder extends ParVecEncoder {
    protected static final Logger log = LoggerFactory.getLogger(ParVecWordsEncoder.class);

    @Override // de.datexis.parvec.encoder.ParVecEncoder
    public long getEmbeddingVectorSize() {
        return this.layerSize;
    }

    @Override // de.datexis.parvec.encoder.ParVecEncoder
    public INDArray encode(Span span) {
        INDArray wordVectorMatrix;
        if (!(span instanceof Sentence)) {
            if (!(span instanceof Token)) {
                return encode(span.getText());
            }
            INDArray wordVectorMatrix2 = this.model.getWordVectorMatrix(preprocessor.preProcess(((Token) span).getText()));
            return wordVectorMatrix2 != null ? wordVectorMatrix2 : Nd4j.zeros(getEmbeddingVectorSize(), 1L);
        }
        String replaceAll = ((Sentence) span).toTokenizedString().trim().replaceAll("\n", " *NL* ").replaceAll("\t", " *t* ");
        INDArray zeros = Nd4j.zeros(getEmbeddingVectorSize(), 1L);
        int i = 0;
        for (String str : WordHelpers.splitSpaces(replaceAll)) {
            if (!str.trim().isEmpty() && (wordVectorMatrix = this.model.getWordVectorMatrix(preprocessor.preProcess(str))) != null) {
                zeros.addi(wordVectorMatrix.transpose());
                i++;
            }
        }
        return i == 0 ? zeros : zeros.div(Integer.valueOf(i));
    }
}
