package justhalf.nlp.reader.acereader;

import edu.stanford.nlp.io.IOUtils;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.cyberneko.html.parsers.DOMParser;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/* loaded from: input_file:justhalf/nlp/reader/acereader/ACEDocument.class */
public class ACEDocument implements Serializable {
    private static final boolean CHECK_ESCAPED_ENTITIES = false;
    private static final boolean CHECK_OFFSET_TEXT = false;
    private static final boolean CHECK_OOB_MENTIONS = false;
    private static final boolean REMOVE_OOB_MENTIONS = true;
    private static final boolean TEST_STRICT_PARSING = false;
    private static final long serialVersionUID = -4698300709681532759L;
    public String text;
    public String fullText;
    public int offset;
    public String uri;
    public boolean versionIsACE2004;
    public boolean textInLowercase;
    public List<ACEEntity> entities;
    public List<ACEEntityMention> entityMentions;
    public List<ACEValue> values;
    public List<ACEValueMention> valueMentions;
    public List<ACETimex> timexes;
    public List<ACETimexMention> timexMentions;
    public List<ACERelation> relations;
    public List<ACERelationMention> relationMentions;
    public List<ACEEvent> events;
    public List<ACEEventMention> eventMentions;
    public Map<String, ACEObject> objectsById;
    public Map<String, ACEObjectMention<? extends ACEObject>> objectMentionsById;

    public ACEDocument(String str) throws IOException, SAXException {
        this(str, false);
    }

    public ACEDocument(String str, boolean z) throws IOException, SAXException {
        this(str, str.replace(".sgm", ".apf.xml"), z);
    }

    public ACEDocument(String str, String str2, boolean z) throws IOException, SAXException {
        this(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(str), IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(str2), z);
    }

    public ACEDocument(InputStream inputStream, InputStream inputStream2, boolean z) throws IOException, SAXException {
        DOMParser dOMParser = new DOMParser();
        dOMParser.parse(new InputSource(new StringReader(IOUtils.slurpInputStream(inputStream, "UTF-8").replaceAll("<(/)?BODY>", "<$1BODY_TEXT>"))));
        Document document = dOMParser.getDocument();
        this.fullText = unescape(document.getDocumentElement().getTextContent());
        if (z) {
            this.text = unescape(document.getElementsByTagName("BODY_TEXT").item(0).getTextContent());
        } else {
            this.text = this.fullText;
        }
        this.textInLowercase = this.text.equals(this.text.toLowerCase());
        this.offset = this.fullText.indexOf(this.text);
        this.entities = new ArrayList();
        this.entityMentions = new ArrayList();
        this.values = new ArrayList();
        this.valueMentions = new ArrayList();
        this.timexes = new ArrayList();
        this.timexMentions = new ArrayList();
        this.relations = new ArrayList();
        this.relationMentions = new ArrayList();
        this.events = new ArrayList();
        this.eventMentions = new ArrayList();
        this.objectsById = new HashMap();
        this.objectMentionsById = new HashMap();
        String replaceAll = IOUtils.slurpInputStream(inputStream2, "UTF-8").replaceAll("<(/)?head>", "<$1head_extent>");
        DOMParser dOMParser2 = new DOMParser();
        dOMParser2.parse(new InputSource(new StringReader(replaceAll)));
        Document document2 = dOMParser2.getDocument();
        setMetadata(document2);
        extractEntities(document2);
        extractValues(document2);
        extractTimexes(document2);
        extractRelations(document2);
        extractEvents(document2);
    }

    private void setMetadata(Document document) {
        NamedNodeMap attributes = document.getElementsByTagName("SOURCE_FILE").item(0).getAttributes();
        this.versionIsACE2004 = getAttribute(attributes, "VERSION").equals("4.0");
        this.uri = getAttribute(attributes, "URI");
    }

    private Span getSpan(Node node) {
        NamedNodeMap attributes = node.getAttributes();
        return new Span(Integer.parseInt(getAttribute(attributes, "START")) - this.offset, (Integer.parseInt(getAttribute(attributes, "END")) + REMOVE_OOB_MENTIONS) - this.offset);
    }

    private void extractEntities(Document document) throws NumberFormatException, DOMException {
        NodeList elementsByTagName = document.getElementsByTagName("ENTITY");
        for (int i = 0; i < elementsByTagName.getLength(); i += REMOVE_OOB_MENTIONS) {
            Node item = elementsByTagName.item(i);
            NamedNodeMap attributes = item.getAttributes();
            String attribute = getAttribute(attributes, "ID");
            ACEEntity aCEEntity = new ACEEntity(attribute, getAttribute(attributes, "TYPE"), getAttribute(attributes, "SUBTYPE"), getAttribute(attributes, "CLASS"));
            NodeList elementsByTagName2 = ((Element) item).getElementsByTagName("ENTITY_MENTION");
            for (int i2 = 0; i2 < elementsByTagName2.getLength(); i2 += REMOVE_OOB_MENTIONS) {
                ACEEntityMention mention = getMention(elementsByTagName2.item(i2), aCEEntity);
                aCEEntity.addMention(mention);
                this.entityMentions.add(mention);
                this.objectMentionsById.put(mention.getFullID(), mention);
            }
            this.entities.add(aCEEntity);
            this.objectsById.put(attribute, aCEEntity);
        }
        Collections.sort(this.entityMentions);
        checkAndFixMentions(this.entityMentions);
    }

    private void checkAndFixMentions(List<? extends ACEObjectMention<?>> list) {
        int i = 0;
        ArrayList arrayList = new ArrayList();
        for (ACEObjectMention<?> aCEObjectMention : list) {
            if (aCEObjectMention instanceof ACEEntityMention) {
                fixSpan(i, arrayList, aCEObjectMention, ((ACEEntityMention) aCEObjectMention).headSpan, ((ACEEntityMention) aCEObjectMention).headText);
            }
            i = fixSpan(i, arrayList, aCEObjectMention, aCEObjectMention.span, aCEObjectMention.text);
        }
        for (ACEObjectMention<?> aCEObjectMention2 : arrayList) {
            list.remove(aCEObjectMention2);
            if (aCEObjectMention2 instanceof ACEEntityMention) {
                ((ACEEntityMention) aCEObjectMention2).entity.mentions.remove(aCEObjectMention2);
            } else if (aCEObjectMention2 instanceof ACERelationMention) {
                ((ACERelationMention) aCEObjectMention2).relation.mentions.remove(aCEObjectMention2);
            } else if (aCEObjectMention2 instanceof ACETimexMention) {
                ((ACETimexMention) aCEObjectMention2).timex.mentions.remove(aCEObjectMention2);
            }
        }
        if (arrayList.size() > 0) {
            System.out.println("Removed " + arrayList.size() + " out-of-bounds mentions from " + this.uri);
        }
    }

    private int fixSpan(int i, List<ACEObjectMention<?>> list, ACEObjectMention<?> aCEObjectMention, Span span, String str) throws RuntimeException {
        String str2;
        String unescape = unescape(str);
        try {
            str2 = span.getText(this.text);
        } catch (StringIndexOutOfBoundsException e) {
            str2 = "";
        }
        if (!str2.equals(unescape)) {
            int lastIndexOf = this.text.lastIndexOf(unescape, Math.min(this.text.length(), span.start - i));
            if (lastIndexOf == -1) {
                list.add(aCEObjectMention);
                return i;
            }
            int i2 = span.start - lastIndexOf;
            span.start = lastIndexOf;
            span.end = lastIndexOf + unescape.length();
            i = i2;
        }
        return i;
    }

    private ACEEntityMention getMention(Node node, ACEEntity aCEEntity) {
        NamedNodeMap attributes = node.getAttributes();
        String attribute = getAttribute(attributes, "ID");
        String attribute2 = getAttribute(attributes, "TYPE");
        String attribute3 = getAttribute(attributes, "LDCTYPE");
        String attribute4 = getAttribute(attributes, "LDCATR");
        Node item = ((Element) ((Element) node).getElementsByTagName("EXTENT").item(0)).getElementsByTagName("CHARSEQ").item(0);
        Span span = getSpan(item);
        String textContent = item.getTextContent();
        Node item2 = ((Element) node).getElementsByTagName("HEAD_EXTENT").item(0);
        Node item3 = item2 == null ? null : ((Element) item2).getElementsByTagName("CHARSEQ").item(0);
        return new ACEEntityMention(attribute, attribute2, attribute3, attribute4, aCEEntity, span, item3 == null ? null : getSpan(item3), textContent, item3 == null ? "" : item3.getTextContent(), SpanLabel.get(aCEEntity.type.name()));
    }

    private void extractValues(Document document) {
        NodeList elementsByTagName = document.getElementsByTagName("VALUE");
        for (int i = 0; i < elementsByTagName.getLength(); i += REMOVE_OOB_MENTIONS) {
            Node item = elementsByTagName.item(i);
            NamedNodeMap attributes = item.getAttributes();
            String attribute = getAttribute(attributes, "ID");
            ACEValue aCEValue = new ACEValue(attribute, getAttribute(attributes, "TYPE"), getAttribute(attributes, "SUBTYPE"));
            NodeList elementsByTagName2 = ((Element) item).getElementsByTagName("VALUE_MENTION");
            for (int i2 = 0; i2 < elementsByTagName2.getLength(); i2 += REMOVE_OOB_MENTIONS) {
                ACEValueMention valueMention = getValueMention(elementsByTagName2.item(i2), aCEValue);
                aCEValue.addMention(valueMention);
                this.valueMentions.add(valueMention);
                this.objectMentionsById.put(valueMention.getFullID(), valueMention);
            }
            this.values.add(aCEValue);
            this.objectsById.put(attribute, aCEValue);
        }
        Collections.sort(this.valueMentions);
        checkAndFixMentions(this.valueMentions);
    }

    private ACEValueMention getValueMention(Node node, ACEValue aCEValue) {
        String attribute = getAttribute(node.getAttributes(), "ID");
        Node mentionCharseq = getMentionCharseq(node, "EXTENT");
        return new ACEValueMention(attribute, getSpan(mentionCharseq), mentionCharseq.getTextContent(), aCEValue);
    }

    private void extractTimexes(Document document) {
        NodeList elementsByTagName = document.getElementsByTagName("TIMEX2");
        for (int i = 0; i < elementsByTagName.getLength(); i += REMOVE_OOB_MENTIONS) {
            Node item = elementsByTagName.item(i);
            NamedNodeMap attributes = item.getAttributes();
            String attribute = getAttribute(attributes, "ID");
            ACETimex aCETimex = new ACETimex(attribute, getAttribute(attributes, "VAL"), getAttribute(attributes, "MOD"), getAttribute(attributes, "ANCHOR_VAL"), getAttribute(attributes, "ANCHOR_DIR"), getAttribute(attributes, "SET"), getAttribute(attributes, "COMMENT"));
            NodeList elementsByTagName2 = ((Element) item).getElementsByTagName("TIMEX2_MENTION");
            for (int i2 = 0; i2 < elementsByTagName2.getLength(); i2 += REMOVE_OOB_MENTIONS) {
                ACETimexMention timexMention = getTimexMention(elementsByTagName2.item(i2), aCETimex);
                aCETimex.addMention(timexMention);
                this.timexMentions.add(timexMention);
                this.objectMentionsById.put(timexMention.getFullID(), timexMention);
            }
            this.timexes.add(aCETimex);
            this.objectsById.put(attribute, aCETimex);
        }
        Collections.sort(this.timexMentions);
        checkAndFixMentions(this.timexMentions);
    }

    private ACETimexMention getTimexMention(Node node, ACETimex aCETimex) {
        String attribute = getAttribute(node.getAttributes(), "ID");
        Node mentionCharseq = getMentionCharseq(node, "EXTENT");
        return new ACETimexMention(attribute, getSpan(mentionCharseq), mentionCharseq.getTextContent(), aCETimex);
    }

    private void extractRelations(Document document) {
        NodeList elementsByTagName = document.getElementsByTagName("RELATION");
        for (int i = 0; i < elementsByTagName.getLength(); i += REMOVE_OOB_MENTIONS) {
            Node item = elementsByTagName.item(i);
            NamedNodeMap attributes = item.getAttributes();
            String attribute = getAttribute(attributes, "ID");
            String attribute2 = getAttribute(attributes, "TYPE");
            String attribute3 = getAttribute(attributes, "SUBTYPE");
            String attribute4 = getAttribute(attributes, "TENSE");
            String attribute5 = getAttribute(attributes, "MODALITY");
            ACEEntity[] aCEEntityArr = new ACEEntity[2];
            ACETimex[] aCETimexArr = new ACETimex[REMOVE_OOB_MENTIONS];
            String[] strArr = new String[REMOVE_OOB_MENTIONS];
            getRelationArguments(item, aCEEntityArr, aCETimexArr, strArr);
            ACERelation aCERelation = new ACERelation(aCEEntityArr, attribute, attribute2, attribute3, attribute4, attribute5, aCETimexArr[0], strArr[0]);
            NodeList elementsByTagName2 = ((Element) item).getElementsByTagName("RELATION_MENTION");
            for (int i2 = 0; i2 < elementsByTagName2.getLength(); i2 += REMOVE_OOB_MENTIONS) {
                ACERelationMention relationMention = getRelationMention(elementsByTagName2.item(i2), aCERelation);
                aCERelation.addMention(relationMention);
                this.relationMentions.add(relationMention);
            }
            this.relations.add(aCERelation);
        }
        Collections.sort(this.relationMentions);
        checkAndFixMentions(this.relationMentions);
    }

    private ACERelationMention getRelationMention(Node node, ACERelation aCERelation) {
        ACETimexMention[] aCETimexMentionArr = new ACETimexMention[REMOVE_OOB_MENTIONS];
        String[] strArr = new String[REMOVE_OOB_MENTIONS];
        ACEEntityMention[] aCEEntityMentionArr = new ACEEntityMention[2];
        getRelationMentionArguments(node, aCERelation, aCEEntityMentionArr, aCETimexMentionArr, strArr);
        NamedNodeMap attributes = node.getAttributes();
        String attribute = getAttribute(attributes, "ID");
        String attribute2 = getAttribute(attributes, this.versionIsACE2004 ? "LDCLEXICALCONDITION" : "LEXICALCONDITION");
        Node mentionCharseq = getMentionCharseq(node, this.versionIsACE2004 ? "LDC_EXTENT" : "EXTENT");
        return new ACERelationMention(aCEEntityMentionArr, attribute, attribute2, getSpan(mentionCharseq), mentionCharseq.getTextContent(), aCETimexMentionArr[0], strArr[0], aCERelation);
    }

    private Node getMentionCharseq(Node node, String str) {
        NodeList elementsByTagName = ((Element) node).getElementsByTagName(str);
        for (int i = 0; i < elementsByTagName.getLength(); i += REMOVE_OOB_MENTIONS) {
            NodeList childNodes = elementsByTagName.item(i).getChildNodes();
            for (int i2 = 0; i2 < childNodes.getLength(); i2 += REMOVE_OOB_MENTIONS) {
                Node item = childNodes.item(i2);
                if (item.getNodeName().equals("CHARSEQ")) {
                    return item;
                }
            }
        }
        throw new RuntimeException("No <charseq> found in the mention: " + node.getTextContent());
    }

    private void getRelationMentionArguments(Node node, ACERelation aCERelation, ACEEntityMention[] aCEEntityMentionArr, ACETimexMention[] aCETimexMentionArr, String[] strArr) {
        NodeList elementsByTagName = ((Element) node).getElementsByTagName(this.versionIsACE2004 ? "REL_MENTION_ARG" : "RELATION_MENTION_ARGUMENT");
        for (int i = 0; i < elementsByTagName.getLength(); i += REMOVE_OOB_MENTIONS) {
            NamedNodeMap attributes = elementsByTagName.item(i).getAttributes();
            String attribute = getAttribute(attributes, this.versionIsACE2004 ? "ENTITYMENTIONID" : "REFID");
            String attribute2 = getAttribute(attributes, this.versionIsACE2004 ? "ARGNUM" : "ROLE");
            try {
                int parseInt = Integer.parseInt(attribute2.substring(this.versionIsACE2004 ? 0 : 4));
                String str = attribute;
                if (this.versionIsACE2004) {
                    str = aCERelation.args[parseInt - REMOVE_OOB_MENTIONS].id + attribute.substring(attribute.indexOf("-"));
                }
                aCEEntityMentionArr[parseInt - REMOVE_OOB_MENTIONS] = (ACEEntityMention) this.objectMentionsById.get(str);
            } catch (NumberFormatException e) {
                aCETimexMentionArr[0] = (ACETimexMention) this.objectMentionsById.get(attribute);
                strArr[0] = attribute2;
            }
        }
    }

    private void getRelationArguments(Node node, ACEEntity[] aCEEntityArr, ACETimex[] aCETimexArr, String[] strArr) {
        NodeList childNodes = node.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i += REMOVE_OOB_MENTIONS) {
            Node item = childNodes.item(i);
            if (item.getNodeName().equals(this.versionIsACE2004 ? "REL_ENTITY_ARG" : "RELATION_ARGUMENT")) {
                NamedNodeMap attributes = item.getAttributes();
                String attribute = getAttribute(attributes, this.versionIsACE2004 ? "ENTITYID" : "REFID");
                String attribute2 = getAttribute(attributes, this.versionIsACE2004 ? "ARGNUM" : "ROLE");
                try {
                    aCEEntityArr[Integer.parseInt(attribute2.substring(this.versionIsACE2004 ? 0 : 4)) - REMOVE_OOB_MENTIONS] = (ACEEntity) this.objectsById.get(attribute);
                } catch (NumberFormatException e) {
                    aCETimexArr[0] = (ACETimex) this.objectsById.get(attribute);
                    strArr[0] = attribute2;
                }
            }
        }
    }

    private void extractEvents(Document document) {
        NodeList elementsByTagName = document.getElementsByTagName("EVENT");
        for (int i = 0; i < elementsByTagName.getLength(); i += REMOVE_OOB_MENTIONS) {
            Node item = elementsByTagName.item(i);
            NamedNodeMap attributes = item.getAttributes();
            ACEEvent aCEEvent = new ACEEvent(getAttribute(attributes, "ID"), getAttribute(attributes, "TYPE"), getAttribute(attributes, "SUBTYPE"), getAttribute(attributes, "TENSE"), getAttribute(attributes, "GENERICITY"), getAttribute(attributes, "POLARITY"), getAttribute(attributes, "MODALITY"));
            getEventArguments(item, aCEEvent);
            NodeList elementsByTagName2 = ((Element) item).getElementsByTagName("EVENT_MENTION");
            for (int i2 = 0; i2 < elementsByTagName2.getLength(); i2 += REMOVE_OOB_MENTIONS) {
                ACEEventMention eventMention = getEventMention(elementsByTagName2.item(i2), aCEEvent);
                aCEEvent.addMention(eventMention);
                this.eventMentions.add(eventMention);
            }
            this.events.add(aCEEvent);
        }
        Collections.sort(this.eventMentions);
        checkAndFixMentions(this.eventMentions);
    }

    private void getEventArguments(Node node, ACEEvent aCEEvent) {
        NodeList elementsByTagName = ((Element) node).getElementsByTagName("EVENT_ARGUMENT");
        for (int i = 0; i < elementsByTagName.getLength(); i += REMOVE_OOB_MENTIONS) {
            NamedNodeMap attributes = elementsByTagName.item(i).getAttributes();
            String attribute = getAttribute(attributes, "REFID");
            String attribute2 = getAttribute(attributes, "ROLE");
            if (attribute2.startsWith("Time")) {
                aCEEvent.addArgument("Time", this.objectsById.get(attribute));
                aCEEvent.setTimestampType(attribute2);
            } else {
                aCEEvent.addArgument(attribute2, this.objectsById.get(attribute));
            }
        }
    }

    private ACEEventMention getEventMention(Node node, ACEEvent aCEEvent) {
        ACEObjectMention<?>[] eventMentionArguments = getEventMentionArguments(node, aCEEvent);
        String attribute = getAttribute(node.getAttributes(), "ID");
        Node mentionCharseq = getMentionCharseq(node, "EXTENT");
        Span span = getSpan(mentionCharseq);
        String textContent = mentionCharseq.getTextContent();
        Node mentionCharseq2 = getMentionCharseq(node, "LDC_SCOPE");
        Span span2 = getSpan(mentionCharseq2);
        String textContent2 = mentionCharseq2.getTextContent();
        Node mentionCharseq3 = getMentionCharseq(node, "ANCHOR");
        return new ACEEventMention(attribute, span, textContent, aCEEvent, span2, textContent2, getSpan(mentionCharseq3), mentionCharseq3.getTextContent(), eventMentionArguments);
    }

    private ACEObjectMention<?>[] getEventMentionArguments(Node node, ACEEvent aCEEvent) {
        NodeList elementsByTagName = ((Element) node).getElementsByTagName("EVENT_MENTION_ARGUMENT");
        ACEObjectMention<?>[] aCEObjectMentionArr = new ACEObjectMention[elementsByTagName.getLength()];
        for (int i = 0; i < elementsByTagName.getLength(); i += REMOVE_OOB_MENTIONS) {
            aCEObjectMentionArr[i] = this.objectMentionsById.get(getAttribute(elementsByTagName.item(i).getAttributes(), "REFID"));
        }
        return aCEObjectMentionArr;
    }

    private static String getAttribute(NamedNodeMap namedNodeMap, String str) {
        String str2 = "";
        try {
            str2 = namedNodeMap.getNamedItem(str.toLowerCase()).getTextContent();
        } catch (NullPointerException e) {
        }
        return str2;
    }

    public static void printMentions(ACEDocument aCEDocument, List<? extends ACEObjectMention<?>> list) {
        for (ACEObjectMention<?> aCEObjectMention : list) {
            try {
                System.out.println(aCEObjectMention.toString(aCEDocument.text));
            } catch (RuntimeException e) {
                System.out.println("===TEXT===");
                System.out.println(aCEDocument.text);
                System.out.println("===FULL TEXT===");
                System.out.println(aCEDocument.fullText);
                System.out.println("===SGM===");
                System.out.println(aCEDocument.uri);
                System.out.println("===TEXT LENGTH===");
                System.out.println(aCEDocument.text.length());
                System.out.println("===OFFSET===");
                System.out.println(aCEDocument.offset);
                System.out.println("===MENTION===");
                System.out.println(aCEObjectMention.text);
                System.out.println(aCEObjectMention.span);
                throw e;
            }
        }
    }

    public static String unescape(String str) {
        return str.replaceAll("(?i)&amp;", "&").replaceAll("(?i)&lt;", "<").replaceAll("(?i)&gt;", ">");
    }
}
