package de.datexis.index.impl;

import de.datexis.common.Resource;
import de.datexis.common.WordHelpers;
import de.datexis.index.ArticleRef;
import de.datexis.index.WikiDataArticle;
import de.datexis.index.encoder.EntityEncoder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.document.Document;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/index/impl/VectorArticleIndex.class */
public class VectorArticleIndex extends LuceneArticleIndex {
    protected static final int NUM_PARVEC_CANDIDATES = 512;
    EntityEncoder encoder;
    protected static final Logger log = LoggerFactory.getLogger(VectorArticleIndex.class);
    protected static final EntityEncoder.Strategy strategy = EntityEncoder.Strategy.NAME_CONTEXT;

    public VectorArticleIndex(Resource resource) throws IOException {
        this.encoder = new EntityEncoder(resource, EntityEncoder.Strategy.NAME);
    }

    @Override // de.datexis.index.impl.LuceneArticleIndex
    protected Document createLuceneDocument(WikiDataArticle wikiDataArticle) {
        Document createLuceneDocument = super.createLuceneDocument(wikiDataArticle);
        INDArray encodeEntity = this.encoder.encodeEntity(wikiDataArticle);
        if (encodeEntity != null) {
            addVectorField(createLuceneDocument, "vector", encodeEntity);
        } else {
            log.error("Could not encode entity {}", wikiDataArticle.toString());
        }
        return createLuceneDocument;
    }

    public List<ArticleRef> querySimilarArticles(String str, String str2, int i) {
        ArrayList arrayList = new ArrayList(i);
        List<ArticleRef> queryNames = queryNames(str, NUM_PARVEC_CANDIDATES);
        INDArray encodeMention = this.encoder.encodeMention(str, str2);
        for (ArticleRef articleRef : queryNames) {
            INDArray vector = articleRef.getVector();
            if (vector != null) {
                double cosineSim = WordHelpers.cosineSim(Nd4j.hstack(new INDArray[]{vector, vector}), encodeMention);
                if (Double.isFinite(cosineSim)) {
                    articleRef.setScore(cosineSim);
                } else {
                    articleRef.setScore(0.0d);
                }
            } else {
                articleRef.setScore(0.0d);
            }
            arrayList.add(articleRef);
        }
        Collections.sort(arrayList, new ArticleRef.ScoreComparator());
        return arrayList;
    }
}
