package de.datexis.cdv.reader;

import de.datexis.cdv.model.AspectAnnotation;
import de.datexis.cdv.model.EntityAnnotation;
import de.datexis.cdv.retrieval.EntityAspectQueryAnnotation;
import de.datexis.common.Resource;
import de.datexis.model.Annotation;
import de.datexis.model.Dataset;
import de.datexis.model.Document;
import de.datexis.model.Query;
import de.datexis.model.Result;
import de.datexis.model.impl.PassageAnnotation;
import de.datexis.retrieval.model.RelevanceResult;
import de.datexis.retrieval.preprocess.WikipediaUrlPreprocessor;
import de.datexis.sector.reader.WikiSectionReader;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/cdv/reader/WikiSectionQAReader.class */
public class WikiSectionQAReader extends WikiSectionReader {
    protected final Logger log = LoggerFactory.getLogger(getClass());
    Map<String, String> idMap = null;

    public WikiSectionQAReader withIDMapping(Resource resource) throws IOException {
        List readLines = FileUtils.readLines(resource.toFile(), "UTF-8");
        this.idMap = new ConcurrentHashMap(readLines.size());
        readLines.stream().map(str -> {
            return str.split("\\t");
        }).forEach(strArr -> {
            this.idMap.put(WikipediaUrlPreprocessor.cleanWikiPageTitle(strArr[0]), strArr[1]);
        });
        return this;
    }

    public Dataset convertTrainingPassages(Dataset dataset, boolean z) throws IOException {
        Dataset dataset2 = new Dataset();
        for (Document document : dataset.getDocuments()) {
            String id = this.idMap != null ? this.idMap.get(WikipediaUrlPreprocessor.cleanWikiPageTitle(document.getId())) : document.getId();
            Iterator it = document.getAnnotations(Annotation.Source.GOLD, EntityAnnotation.class).iterator();
            while (it.hasNext()) {
                document.removeAnnotation((EntityAnnotation) it.next());
            }
            int i = 0;
            for (AspectAnnotation aspectAnnotation : (List) document.streamAnnotations(Annotation.Source.GOLD, AspectAnnotation.class).sorted().collect(Collectors.toList())) {
                aspectAnnotation.getEnd();
                int i2 = i;
                i++;
                aspectAnnotation.setId(id + "-" + i2);
                String lowerCase = aspectAnnotation.getLabel().replace(";", " ").toLowerCase();
                if (lowerCase.equals("abstract")) {
                    lowerCase = "information";
                }
                Query create = Query.create(document.getTitle() + " ; " + lowerCase);
                EntityAspectQueryAnnotation entityAspectQueryAnnotation = new EntityAspectQueryAnnotation(document.getTitle(), lowerCase);
                entityAspectQueryAnnotation.setEntityId(id);
                create.addAnnotation(entityAspectQueryAnnotation);
                RelevanceResult relevanceResult = new RelevanceResult(Annotation.Source.GOLD, document, aspectAnnotation.getBegin(), aspectAnnotation.getEnd());
                relevanceResult.setRelevance(1);
                relevanceResult.setId(aspectAnnotation.getId());
                relevanceResult.setDocumentRef(document);
                create.addResult(relevanceResult);
                dataset2.addQuery(create);
            }
            dataset2.addDocument(document);
        }
        if (z) {
            Random random = new Random();
            for (Query query : dataset2.getQueries()) {
                Result result = (Result) query.getResults().get(0);
                while (query.getResults().size() < 10) {
                    Document document2 = (Document) dataset2.getRandomDocument().get();
                    List list = (List) document2.streamAnnotations(Annotation.Source.GOLD, PassageAnnotation.class, true).collect(Collectors.toList());
                    PassageAnnotation passageAnnotation = (PassageAnnotation) list.get(random.nextInt(list.size()));
                    if (!passageAnnotation.getId().equals(result.getId())) {
                        RelevanceResult relevanceResult2 = new RelevanceResult(Annotation.Source.SAMPLED, document2, passageAnnotation.getBegin(), passageAnnotation.getEnd());
                        relevanceResult2.setRelevance(0);
                        relevanceResult2.setId(passageAnnotation.getId());
                        relevanceResult2.setDocumentRef(document2);
                        query.addResult(relevanceResult2);
                    }
                }
            }
        }
        return dataset2;
    }
}
