package de.datexis.cdv.index;

import de.datexis.cdv.retrieval.EntityAspectQueryAnnotation;
import de.datexis.common.Resource;
import de.datexis.model.Annotation;
import de.datexis.model.Dataset;
import de.datexis.model.Document;
import de.datexis.model.Query;
import de.datexis.model.Result;
import de.datexis.model.impl.PassageAnnotation;
import de.datexis.retrieval.model.RelevanceResult;
import de.datexis.retrieval.model.ScoredResult;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Random;
import java.util.TreeMap;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.icu.ICUFoldingFilterFactory;
import org.apache.lucene.analysis.icu.segmentation.ICUTokenizerFactory;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.BaseDirectory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/cdv/index/PassageIndex.class */
public class PassageIndex {
    protected final Logger log = LoggerFactory.getLogger(getClass());
    protected static final String FIELD_DOCID = "docId";
    protected static final String FIELD_PARID = "parId";
    protected static final String FIELD_TEXT = "text";
    public static final int NUM_CANDIDATES = 64;
    protected IndexReader reader;
    protected IndexSearcher searcher;
    protected Analyzer analyzer;

    /* loaded from: input_file:de/datexis/cdv/index/PassageIndex$PassageResult.class */
    public class PassageResult {
        public String documentId;
        public String passageId;
        public double score;

        public PassageResult(String str, String str2, float f) {
            this.documentId = str;
            this.passageId = str2;
            this.score = f;
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            PassageResult passageResult = (PassageResult) obj;
            return this.documentId.equals(passageResult.documentId) && this.passageId.equals(passageResult.passageId);
        }

        public int hashCode() {
            return Objects.hash(this.documentId, this.passageId);
        }
    }

    public void loadModel(Resource resource) throws IOException {
        this.reader = DirectoryReader.open(FSDirectory.open(resource.getPath()));
        this.searcher = new IndexSearcher(this.reader);
        this.analyzer = buildAnalyzer();
    }

    protected Analyzer buildAnalyzer() {
        TreeMap treeMap = new TreeMap();
        try {
            treeMap.put(FIELD_TEXT, CustomAnalyzer.builder().withTokenizer(ICUTokenizerFactory.class, new String[0]).addTokenFilter(ICUFoldingFilterFactory.class, new String[0]).build());
            KeywordAnalyzer keywordAnalyzer = new KeywordAnalyzer();
            treeMap.put(FIELD_DOCID, keywordAnalyzer);
            treeMap.put(FIELD_PARID, keywordAnalyzer);
        } catch (IOException e) {
            this.log.error("Could not create Lucene Analyzer: ");
            this.log.error(e.toString());
        }
        return new PerFieldAnalyzerWrapper(new StandardAnalyzer(), treeMap);
    }

    public void createInMemoryIndex(Dataset dataset) throws IOException {
        RAMDirectory rAMDirectory = new RAMDirectory();
        createIndex(dataset, (BaseDirectory) rAMDirectory);
        this.reader = DirectoryReader.open(rAMDirectory);
        this.searcher = new IndexSearcher(this.reader);
    }

    public void setSimilarity(Similarity similarity) {
        this.searcher.setSimilarity(similarity);
    }

    public void createIndex(Dataset dataset, Resource resource) throws IOException {
        this.log.info("creating new passage index in path '{}'...", resource.toString());
        createIndex(dataset, (BaseDirectory) FSDirectory.open(resource.getPath()));
    }

    protected void createIndex(Dataset dataset, BaseDirectory baseDirectory) throws IOException {
        this.analyzer = buildAnalyzer();
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(this.analyzer);
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        indexWriterConfig.setCommitOnClose(true);
        indexWriterConfig.setSimilarity(new BM25Similarity());
        IndexWriter indexWriter = new IndexWriter(baseDirectory, indexWriterConfig);
        int i = 0;
        this.log.info("writing passages...");
        for (Document document : dataset.getDocuments()) {
            for (PassageAnnotation passageAnnotation : (List) document.streamAnnotations(Annotation.Source.GOLD, PassageAnnotation.class, true).sorted().collect(Collectors.toList())) {
                if (passageAnnotation.getLength() >= 10) {
                    org.apache.lucene.document.Document document2 = new org.apache.lucene.document.Document();
                    document2.add(new StringField(FIELD_DOCID, document.getId(), Field.Store.YES));
                    document2.add(new StringField(FIELD_PARID, passageAnnotation.getId(), Field.Store.YES));
                    document2.add(new TextField(FIELD_TEXT, document.getText(passageAnnotation), Field.Store.NO));
                    indexWriter.addDocument(document2);
                    i++;
                    if (i % 100000 == 0) {
                        this.log.info("wrote " + i + " passages so far");
                    }
                }
            }
        }
        indexWriter.close();
        this.log.info("{} passages written to index", Integer.valueOf(i));
    }

    public void retrievePassageCandidates(Dataset dataset, int i) {
        new Random();
        for (Query query : dataset.getQueries()) {
            String entity = ((EntityAspectQueryAnnotation) query.getAnnotation(EntityAspectQueryAnnotation.class)).getEntity();
            String aspectHeading = ((EntityAspectQueryAnnotation) query.getAnnotation(EntityAspectQueryAnnotation.class)).getAspectHeading();
            if (aspectHeading == null) {
                aspectHeading = ((EntityAspectQueryAnnotation) query.getAnnotation(EntityAspectQueryAnnotation.class)).getAspect();
            }
            List<PassageResult> search = search(entity.replace("/", " ").replace("-", " ").replace(":", " ") + " " + aspectHeading.replace("/", " ").replace("-", " ").replace(":", " "), i);
            if (search.size() == 0) {
                this.log.error("no match found");
            }
            Optional document = dataset.getDocument(search.get(0).documentId);
            if (!document.isPresent()) {
                this.log.error("Document not found: {}", search.get(0).documentId);
            }
            int i2 = 1;
            for (PassageAnnotation passageAnnotation : (List) ((Document) document.get()).streamAnnotations(Annotation.Source.GOLD, PassageAnnotation.class, true).collect(Collectors.toList())) {
                int i3 = i2;
                i2++;
                search.add(i3, new PassageResult(((Document) document.get()).getId(), passageAnnotation.getId(), 0.0f));
                if (passageAnnotation.getLength() < 10) {
                    this.log.warn("Found very short passage {} in document {}", passageAnnotation.getId(), passageAnnotation.getDocumentRef().getId());
                }
            }
            for (PassageResult passageResult : search) {
                if (query.getResults().size() >= i) {
                    break;
                }
                Optional document2 = dataset.getDocument(passageResult.documentId);
                if (!document2.isPresent()) {
                    this.log.error("Document not found: {}", passageResult.documentId);
                }
                boolean z = false;
                Iterator it = query.getResults().iterator();
                while (true) {
                    if (it.hasNext()) {
                        if (((Result) it.next()).getId().equals(passageResult.passageId)) {
                            z = true;
                            break;
                        }
                    } else {
                        break;
                    }
                }
                if (!z) {
                    Iterator it2 = ((List) ((Document) document2.get()).streamAnnotations(Annotation.Source.GOLD, PassageAnnotation.class, true).collect(Collectors.toList())).iterator();
                    while (true) {
                        if (!it2.hasNext()) {
                            break;
                        }
                        PassageAnnotation passageAnnotation2 = (PassageAnnotation) it2.next();
                        if (passageAnnotation2.getId().equals(passageResult.passageId)) {
                            z = true;
                            if (passageAnnotation2.getLength() >= 10) {
                                RelevanceResult relevanceResult = new RelevanceResult(Annotation.Source.SILVER, (Document) document2.get(), passageAnnotation2.getBegin(), passageAnnotation2.getEnd());
                                relevanceResult.setRelevance(0);
                                relevanceResult.setId(passageAnnotation2.getId());
                                relevanceResult.setDocumentRef((Document) document2.get());
                                query.addResult(relevanceResult);
                                break;
                            }
                            this.log.warn("Found very short passage {} in document {}", passageAnnotation2.getId(), passageAnnotation2.getDocumentRef().getId());
                        }
                    }
                }
                if (!z) {
                    this.log.error("passage not found: {}", passageResult.passageId);
                }
            }
        }
    }

    public void retrieveAllQueries(Dataset dataset, int i, boolean z) {
        for (Query query : dataset.getQueries()) {
            String entity = ((EntityAspectQueryAnnotation) query.getAnnotation(EntityAspectQueryAnnotation.class)).getEntity();
            String aspectHeading = ((EntityAspectQueryAnnotation) query.getAnnotation(EntityAspectQueryAnnotation.class)).getAspectHeading();
            if (aspectHeading == null) {
                aspectHeading = ((EntityAspectQueryAnnotation) query.getAnnotation(EntityAspectQueryAnnotation.class)).getAspect();
            }
            String replace = entity.replace("/", " ").replace("-", " ").replace(":", " ");
            List results = query.getResults(Annotation.Source.GOLD, RelevanceResult.class);
            List<PassageResult> search = search(replace + " " + aspectHeading, i);
            if (search.size() == 0) {
                this.log.error("no match found");
            }
            for (PassageResult passageResult : search) {
                Optional document = dataset.getDocument(passageResult.documentId);
                if (!document.isPresent()) {
                    this.log.error("Document not found: {}", passageResult.documentId);
                }
                boolean z2 = false;
                Iterator it = results.iterator();
                while (it.hasNext()) {
                    if (((Result) it.next()).getId().equals(passageResult.passageId)) {
                        z2 = true;
                    }
                }
                if (z2 || !z) {
                    boolean z3 = false;
                    Iterator it2 = ((List) ((Document) document.get()).streamAnnotations(Annotation.Source.GOLD, PassageAnnotation.class, true).collect(Collectors.toList())).iterator();
                    while (true) {
                        if (!it2.hasNext()) {
                            break;
                        }
                        PassageAnnotation passageAnnotation = (PassageAnnotation) it2.next();
                        if (passageAnnotation.getId().equals(passageResult.passageId)) {
                            z3 = true;
                            ScoredResult scoredResult = new ScoredResult(Annotation.Source.PRED, (Document) document.get(), passageAnnotation.getBegin(), passageAnnotation.getEnd());
                            scoredResult.setRelevance(1);
                            scoredResult.setScore(Double.valueOf(passageResult.score));
                            scoredResult.setId(passageAnnotation.getId());
                            scoredResult.setDocumentRef((Document) document.get());
                            query.addResult(scoredResult);
                            break;
                        }
                    }
                    if (!z3) {
                        this.log.error("candidate passage not found: {}", passageResult.passageId);
                    }
                }
            }
        }
    }

    public List<PassageResult> search(String str, int i) {
        ArrayList arrayList = new ArrayList();
        try {
            for (ScoreDoc scoreDoc : this.searcher.search(new QueryParser(FIELD_TEXT, this.analyzer).parse(str), i).scoreDocs) {
                org.apache.lucene.document.Document doc = this.searcher.doc(scoreDoc.doc);
                arrayList.add(new PassageResult(doc.get(FIELD_DOCID), doc.get(FIELD_PARID), scoreDoc.score));
            }
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e2) {
            this.log.error(e2.toString());
        }
        return arrayList;
    }
}
