package de.datexis.cdv.reader;

import de.datexis.cdv.model.EntityAspectAnnotation;
import de.datexis.cdv.retrieval.EntityAspectQueryAnnotation;
import de.datexis.common.Resource;
import de.datexis.model.Annotation;
import de.datexis.model.Dataset;
import de.datexis.model.Document;
import de.datexis.model.Query;
import de.datexis.model.impl.PassageAnnotation;
import de.datexis.preprocess.DocumentFactory;
import de.datexis.retrieval.model.RelevanceResult;
import de.datexis.retrieval.preprocess.WikipediaUrlPreprocessor;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/cdv/reader/HealthQAReader.class */
public class HealthQAReader extends MatchZooReader {
    protected List<String> labels;
    static final /* synthetic */ boolean $assertionsDisabled;
    protected final Logger log = LoggerFactory.getLogger(getClass());
    Map<String, String> idMap = null;
    Map<String, String> namesMap = null;
    Pattern entityPattern = Pattern.compile("^.+? (are|is|for|of|in|the|a|an)(?!.* (are|is|for|of|in|the|a|an) ) (.+) \\?$");
    protected Set<Class<? extends Annotation>> requestedAnnotations = new HashSet();
    List<Map.Entry<String, String>> questions = new ArrayList();

    public HealthQAReader() {
        this.questions.add(new AbstractMap.SimpleEntry("What is ", "information"));
        this.questions.add(new AbstractMap.SimpleEntry("What are ", "information"));
        this.questions.add(new AbstractMap.SimpleEntry(" infect", "infection"));
        this.questions.add(new AbstractMap.SimpleEntry(" common ", "prevalence"));
        this.questions.add(new AbstractMap.SimpleEntry(" cause", "causes"));
        this.questions.add(new AbstractMap.SimpleEntry(" symptom", "symptoms"));
        this.questions.add(new AbstractMap.SimpleEntry(" complication", "complications"));
        this.questions.add(new AbstractMap.SimpleEntry(" test", "diagnosis"));
        this.questions.add(new AbstractMap.SimpleEntry(" treat", "treatment"));
        this.questions.add(new AbstractMap.SimpleEntry(" outlook", "prognosis"));
        this.questions.add(new AbstractMap.SimpleEntry(" prognosis", "prognosis"));
        this.questions.add(new AbstractMap.SimpleEntry(" diagnose", "diagnosis"));
        this.questions.add(new AbstractMap.SimpleEntry(" vaccin", "prevention"));
        this.questions.add(new AbstractMap.SimpleEntry(" prevent", "prevention"));
    }

    public HealthQAReader withAnnotations(Class<? extends Annotation> cls) {
        this.requestedAnnotations.add(cls);
        return this;
    }

    public HealthQAReader withNameMapping(Resource resource) throws IOException {
        List readLines = FileUtils.readLines(resource.toFile(), "UTF-8");
        this.namesMap = new ConcurrentHashMap(readLines.size());
        readLines.stream().map(str -> {
            return str.split("\\t");
        }).forEach(strArr -> {
            this.namesMap.putIfAbsent(strArr[1].toLowerCase(), strArr[0]);
        });
        return this;
    }

    public HealthQAReader withIDMapping(Resource resource) throws IOException {
        List readLines = FileUtils.readLines(resource.toFile(), "UTF-8");
        this.idMap = new ConcurrentHashMap(readLines.size());
        readLines.stream().map(str -> {
            return str.split("\\t");
        }).forEach(strArr -> {
            this.idMap.put(strArr[1], WikipediaUrlPreprocessor.cleanWikiPageTitle(strArr[0]));
        });
        return this;
    }

    public HealthQAReader withPassageLabelsCSV(Resource resource) throws IOException {
        this.labels = IOUtils.readLines(resource.getInputStream(), StandardCharsets.UTF_8);
        return this;
    }

    public void resolveEntityLabels() {
        String str;
        String str2;
        Iterator<String> it = this.labels.iterator();
        while (it.hasNext()) {
            String[] split = it.next().split("\t");
            ArrayList arrayList = new ArrayList();
            String str3 = "";
            if (split.length > 3) {
                for (String str4 : split[3].split(";")) {
                    if (this.namesMap != null && (str2 = this.namesMap.get(str4.toLowerCase())) != null) {
                        arrayList.add(str2);
                    }
                    if (this.idMap != null && !arrayList.isEmpty() && (str = this.idMap.get(arrayList.get(0))) != null) {
                        str3 = str;
                    }
                }
            }
            System.out.println(split[0] + "\t" + String.join(";", arrayList) + "\t" + str3.replace("_", " "));
        }
    }

    @Override // de.datexis.cdv.reader.MatchZooReader
    protected void addDocumentFromFile(Resource resource, Dataset dataset) throws IOException {
        PassageAnnotation passageAnnotation;
        dataset.setName("HealthQA");
        dataset.setLanguage("en");
        String replaceAll = resource.getFileName().replaceAll("\\.txt$", "");
        InputStream inputStream = resource.getInputStream();
        Throwable th = null;
        try {
            LineIterator lineIterator = new LineIterator(new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8.newDecoder())));
            Iterator<String> it = this.labels != null ? this.labels.iterator() : null;
            if (it != null) {
                it.next();
            }
            Document document = new Document();
            int i = -1;
            while (lineIterator.hasNext()) {
                String[] split = ((String) lineIterator.next()).split("\\t");
                i++;
                if (split[0].trim().equals("1")) {
                    String[] split2 = it.next().split("\\t");
                    String str = replaceAll + "-" + i;
                    boolean equals = split2[1].equals("1");
                    String str2 = split[1];
                    if (!$assertionsDisabled && !str2.equals(split2[2])) {
                        throw new AssertionError();
                    }
                    String str3 = split[2] + "\n";
                    String str4 = split2[3];
                    String str5 = split2[4];
                    String str6 = split2[5];
                    String str7 = split2[6];
                    if (equals) {
                        if (!document.isEmpty()) {
                            dataset.addDocument(document);
                        }
                        document = new Document();
                        document.setId(replaceAll + "-doc-" + i);
                        document.setTitle(str5);
                    }
                    if (str3.substring(str3.length() - 2, str3.length() - 1).equals(",")) {
                        str3 = str3.substring(0, str3.length() - 2);
                    }
                    Document fromText = DocumentFactory.fromText(DocumentFactory.fromTokenizedText(str3).getText() + "\n", DocumentFactory.Newlines.KEEP);
                    document.append(fromText);
                    Query create = Query.create(str2);
                    EntityAspectQueryAnnotation entityAspectQueryAnnotation = new EntityAspectQueryAnnotation(str5, str7);
                    entityAspectQueryAnnotation.setEntityId(str4);
                    boolean z = false;
                    Iterator it2 = dataset.getQueries().iterator();
                    while (true) {
                        if (!it2.hasNext()) {
                            break;
                        }
                        Query query = (Query) it2.next();
                        if (((EntityAspectQueryAnnotation) query.getAnnotation(EntityAspectQueryAnnotation.class)).matches(entityAspectQueryAnnotation)) {
                            create = query;
                            z = true;
                            break;
                        }
                    }
                    if (!z) {
                        create.addAnnotation(entityAspectQueryAnnotation);
                    }
                    if (this.requestedAnnotations.contains(EntityAspectAnnotation.class)) {
                        passageAnnotation = new EntityAspectAnnotation(Annotation.Source.GOLD);
                        ((EntityAspectAnnotation) passageAnnotation).setAspect(entityAspectQueryAnnotation.getAspect());
                        ((EntityAspectAnnotation) passageAnnotation).setEntity(entityAspectQueryAnnotation.getEntity());
                        ((EntityAspectAnnotation) passageAnnotation).setEntityId(entityAspectQueryAnnotation.getEntityId());
                    } else {
                        passageAnnotation = new PassageAnnotation(Annotation.Source.GOLD);
                        passageAnnotation.setLabel(str2);
                    }
                    passageAnnotation.setId(str);
                    passageAnnotation.setBegin(fromText.getBegin());
                    passageAnnotation.setEnd(fromText.getEnd());
                    if (passageAnnotation.getLength() > 1) {
                        document.addAnnotation(passageAnnotation);
                    }
                    RelevanceResult relevanceResult = new RelevanceResult(Annotation.Source.GOLD, document, fromText.getBegin(), fromText.getEnd());
                    relevanceResult.setRelevance(1);
                    relevanceResult.setId(str);
                    relevanceResult.setDocumentRef(document);
                    create.addResult(relevanceResult);
                    if (!z && create.getResults().size() > 0) {
                        dataset.addQuery(create);
                    }
                }
            }
            if (!document.isEmpty()) {
                dataset.addDocument(document);
            }
            if (!$assertionsDisabled && it.hasNext()) {
                throw new AssertionError();
            }
            if (inputStream != null) {
                if (0 == 0) {
                    inputStream.close();
                    return;
                }
                try {
                    inputStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
        } catch (Throwable th3) {
            if (inputStream != null) {
                if (0 != 0) {
                    try {
                        inputStream.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    inputStream.close();
                }
            }
            throw th3;
        }
    }

    protected void matchPassage(int i, String str) {
        Matcher matcher = this.entityPattern.matcher(str);
        String group = matcher.matches() ? matcher.group(3) : "";
        String str2 = "";
        for (Map.Entry<String, String> entry : this.questions) {
            if (str.contains(entry.getKey())) {
                str2 = entry.getValue();
            }
        }
        System.out.println(i + "\t" + str + "\t" + group + "\t" + str2);
    }

    static {
        $assertionsDisabled = !HealthQAReader.class.desiredAssertionStatus();
    }
}
