package de.datexis.nel;

import com.google.common.collect.Lists;
import de.datexis.annotator.Annotator;
import de.datexis.common.Timer;
import de.datexis.encoder.Encoder;
import de.datexis.index.ArticleIndex;
import de.datexis.index.ArticleRef;
import de.datexis.index.impl.VectorArticleIndex;
import de.datexis.model.Annotation;
import de.datexis.model.Document;
import de.datexis.model.Sentence;
import de.datexis.ner.MentionAnnotation;
import de.datexis.ner.MentionAnnotator;
import de.datexis.preprocess.DocumentFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.TreeMap;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/nel/NamedEntityAnnotator.class */
public class NamedEntityAnnotator extends Annotator {
    protected static final Logger log = LoggerFactory.getLogger(NamedEntityAnnotator.class);
    protected final MentionAnnotator ner;
    protected final ArticleIndex index;
    protected final Encoder encoder;

    public NamedEntityAnnotator(MentionAnnotator mentionAnnotator, ArticleIndex articleIndex, Encoder encoder) {
        this.ner = mentionAnnotator;
        this.index = articleIndex;
        this.encoder = encoder;
    }

    public NamedEntityAnnotator(MentionAnnotator mentionAnnotator, ArticleIndex articleIndex) {
        this.ner = mentionAnnotator;
        this.index = articleIndex;
        this.encoder = null;
    }

    public Document annotate(String str) {
        log.trace("Annotating document: " + str);
        Document fromText = DocumentFactory.fromText(str);
        if (fromText.countTokens() == 0) {
            return fromText;
        }
        annotate(fromText);
        return fromText;
    }

    public Document annotate(Document document) {
        annotate(Lists.newArrayList(new Document[]{document}));
        return document;
    }

    public void annotate(Collection<Document> collection) {
        Timer timer = new Timer();
        timer.start();
        new TreeMap();
        timer.resetSplit();
        this.ner.annotate(collection);
        timer.setSplit("NER");
        for (Document document : collection) {
            createSignature(document);
            disambiguateMentions(document, Annotation.Source.PRED);
        }
        timer.setSplit("NED");
        timer.stop();
        log.debug("Annotated " + collection.size() + " documends [" + timer.get("NER") + " NER, " + timer.get("NED") + " NED, " + timer.get() + " total]");
    }

    public ArticleIndex getKnowlegeBase() {
        return this.index;
    }

    protected void recognizeMentions(Document document) {
        this.ner.annotate(document);
    }

    protected void createSignature(Document document) {
    }

    public void disambiguateMentions(Document document, Annotation.Source source) {
        for (MentionAnnotation mentionAnnotation : (List) document.streamAnnotations(source, MentionAnnotation.class).collect(Collectors.toList())) {
            NamedEntityAnnotation namedEntityAnnotation = new NamedEntityAnnotation(mentionAnnotation, new ArrayList());
            String text = mentionAnnotation.getText();
            List<ArticleRef> querySimilarArticles = this.index instanceof VectorArticleIndex ? ((VectorArticleIndex) this.index).querySimilarArticles(text, ((Sentence) document.getSentenceAtPosition(mentionAnnotation.getBegin()).get()).toTokenizedString(), 1) : this.index.queryNames(text, 1);
            if (querySimilarArticles.size() > 0) {
                namedEntityAnnotation.setRefName(querySimilarArticles.get(0).getTitle());
                namedEntityAnnotation.setRefId(querySimilarArticles.get(0).getId());
                namedEntityAnnotation.setRefUrl(querySimilarArticles.get(0).getUrl());
            }
            namedEntityAnnotation.setSource(Annotation.Source.PRED);
            document.addAnnotation(namedEntityAnnotation);
        }
    }
}
