package de.unistuttgart.quadrama.io.html;

import de.unistuttgart.ims.drama.api.Act;
import de.unistuttgart.ims.drama.api.ActHeading;
import de.unistuttgart.ims.drama.api.Drama;
import de.unistuttgart.ims.drama.api.DramatisPersonae;
import de.unistuttgart.ims.drama.api.Figure;
import de.unistuttgart.ims.drama.api.Footnote;
import de.unistuttgart.ims.drama.api.FrontMatter;
import de.unistuttgart.ims.drama.api.MainMatter;
import de.unistuttgart.ims.drama.api.Scene;
import de.unistuttgart.ims.drama.api.SceneHeading;
import de.unistuttgart.ims.drama.api.Speaker;
import de.unistuttgart.ims.drama.api.Speech;
import de.unistuttgart.ims.drama.api.StageDirection;
import de.unistuttgart.ims.drama.api.Utterance;
import de.unistuttgart.ims.uima.io.xml.Visitor;
import de.unistuttgart.ims.uima.io.xml.type.XMLElement;
import de.unistuttgart.ims.uimautil.AnnotationUtil;
import de.unistuttgart.quadrama.io.core.AbstractDramaUrlReader;
import de.unistuttgart.quadrama.io.core.DramaIOUtil;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeSet;
import org.apache.commons.io.IOUtils;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.factory.AnnotationFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

/* loaded from: input_file:de/unistuttgart/quadrama/io/html/GutenbergDEReader.class */
public class GutenbergDEReader extends AbstractDramaUrlReader {
    @Override // de.unistuttgart.quadrama.io.core.AbstractDramaUrlReader
    public void getNext(JCas jCas, InputStream inputStream, Drama drama) throws IOException, CollectionException {
        Document parseBodyFragment = Jsoup.parseBodyFragment(IOUtils.toString(inputStream, "UTF-8"));
        Visitor visitor = new Visitor(jCas);
        parseBodyFragment.traverse(visitor);
        JCas jCas2 = visitor.getJCas();
        Map annotationMap = visitor.getAnnotationMap();
        DramaIOUtil.select2Annotation(jCas2, parseBodyFragment, annotationMap, "div.gutenb:eq(0)", FrontMatter.class, null);
        DramaIOUtil.selectRange2Annotation(jCas2, parseBodyFragment, annotationMap, "div.gutenb:eq(1)", "div.gutenb:last-child", MainMatter.class);
        FrontMatter selectSingle = JCasUtil.selectSingle(jCas2, FrontMatter.class);
        MainMatter selectSingle2 = JCasUtil.selectSingle(jCas2, MainMatter.class);
        DramaIOUtil.select2Annotation(jCas2, parseBodyFragment, annotationMap, "span.speaker", Speaker.class, selectSingle2);
        DramaIOUtil.select2Annotation(jCas2, parseBodyFragment, annotationMap, "span.speaker", Figure.class, selectSingle);
        DramaIOUtil.select2Annotation(jCas2, parseBodyFragment, annotationMap, "span.regie", StageDirection.class, selectSingle2);
        DramaIOUtil.select2Annotation(jCas2, parseBodyFragment, annotationMap, "span.footnote", Footnote.class, selectSingle2);
        DramaIOUtil.select2Annotation(jCas2, parseBodyFragment, annotationMap, "h3 + p", DramatisPersonae.class, selectSingle);
        DramaIOUtil.select2Annotation(jCas2, parseBodyFragment, annotationMap, "p:has(span.speaker)", Utterance.class, selectSingle2);
        Iterator it = parseBodyFragment.select("p.leftmarg").iterator();
        while (it.hasNext()) {
            XMLElement xMLElement = (XMLElement) annotationMap.get(((Element) it.next()).cssSelector());
            ((Utterance) JCasUtil.selectPreceding(Utterance.class, xMLElement, 1).get(0)).setEnd(xMLElement.getEnd());
        }
        annotateSpeech(jCas2, selectSingle2);
        int i = -1;
        int i2 = -1;
        for (XMLElement xMLElement2 : JCasUtil.select(jCas2, XMLElement.class)) {
            if (xMLElement2.getTag().equals("h2") && !xMLElement2.getCls().contains("author")) {
                AnnotationFactory.createAnnotation(jCas2, xMLElement2.getBegin(), xMLElement2.getEnd(), SceneHeading.class);
                if (i >= 0) {
                    AnnotationFactory.createAnnotation(jCas2, i, xMLElement2.getBegin() - 1, Scene.class);
                }
                i = xMLElement2.getBegin();
            }
            if (xMLElement2.getTag().equals("h1") && !xMLElement2.getCls().contains("title")) {
                AnnotationFactory.createAnnotation(jCas2, xMLElement2.getBegin(), xMLElement2.getEnd(), ActHeading.class);
                if (i2 >= 0) {
                    AnnotationFactory.createAnnotation(jCas2, i2, xMLElement2.getBegin() - 1, Act.class);
                    if (i >= 0) {
                        AnnotationFactory.createAnnotation(jCas2, i, xMLElement2.getBegin() - 1, Scene.class);
                        i = -1;
                    }
                }
                i2 = xMLElement2.getBegin();
            }
        }
        if (i2 >= 0) {
            AnnotationFactory.createAnnotation(jCas2, i2, selectSingle2.getEnd(), Act.class);
        }
        if (i >= 0) {
            AnnotationFactory.createAnnotation(jCas2, i, selectSingle2.getEnd(), Scene.class);
        }
        AnnotationUtil.trim(new ArrayList(JCasUtil.select(jCas2, Scene.class)));
        AnnotationUtil.trim(new ArrayList(JCasUtil.select(jCas2, Act.class)));
        DramaIOUtil.cleanUp(jCas2);
    }

    protected void annotateSpeech(JCas jCas, Annotation annotation) {
        for (Utterance utterance : JCasUtil.selectCovered(Utterance.class, annotation)) {
            TreeSet treeSet = new TreeSet(new Comparator<Annotation>() { // from class: de.unistuttgart.quadrama.io.html.GutenbergDEReader.1
                @Override // java.util.Comparator
                public int compare(Annotation annotation2, Annotation annotation3) {
                    return Integer.compare(annotation2.getBegin(), annotation3.getBegin());
                }
            });
            treeSet.addAll(JCasUtil.selectCovered(StageDirection.class, utterance));
            treeSet.addAll(JCasUtil.selectCovered(Speaker.class, utterance));
            treeSet.addAll(JCasUtil.selectCovered(Footnote.class, utterance));
            int begin = utterance.getBegin();
            Iterator it = treeSet.iterator();
            while (it.hasNext()) {
                Annotation annotation2 = (Annotation) it.next();
                if (annotation2.getBegin() > begin) {
                    AnnotationUtil.trim(AnnotationFactory.createAnnotation(jCas, begin, annotation2.getBegin(), Speech.class));
                }
                begin = annotation2.getEnd();
            }
            if (begin < utterance.getEnd()) {
                AnnotationUtil.trim(AnnotationFactory.createAnnotation(jCas, begin, utterance.getEnd(), Speech.class));
            }
        }
        Iterator it2 = new ArrayList(JCasUtil.select(jCas, Speech.class)).iterator();
        while (it2.hasNext()) {
            Speech speech = (Speech) it2.next();
            if (speech.getCoveredText().matches("^\\s*$")) {
                speech.removeFromIndexes();
            } else {
                try {
                    AnnotationUtil.trimBegin(speech, new char[]{'.', ' '});
                } catch (ArrayIndexOutOfBoundsException e) {
                }
            }
        }
    }

    protected void assignSpeakerIds(JCas jCas) {
        DramatisPersonae selectSingle = JCasUtil.selectSingle(jCas, DramatisPersonae.class);
        int i = 1;
        HashMap hashMap = new HashMap();
        for (Speaker speaker : JCasUtil.selectCovered(Speaker.class, selectSingle)) {
            int i2 = i;
            i++;
            speaker.setId(i2);
            hashMap.put(speaker.getCoveredText(), speaker);
        }
        for (Speaker speaker2 : JCasUtil.select(jCas, Speaker.class)) {
            if (speaker2.getId() == 0) {
                try {
                    speaker2.setId(((Speaker) hashMap.get(speaker2.getCoveredText())).getId());
                } catch (NullPointerException e) {
                }
            }
        }
    }
}
