package de.digitalcollections.solrocr.formats.alto;

import de.digitalcollections.solrocr.formats.OcrParser;
import de.digitalcollections.solrocr.model.OcrBox;
import de.digitalcollections.solrocr.model.OcrPage;
import de.digitalcollections.solrocr.util.CharBufUtils;
import java.awt.Dimension;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.util.Set;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.XMLStreamReader2;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/digitalcollections/solrocr/formats/alto/AltoParser.class */
public class AltoParser extends OcrParser {
    private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private boolean noMoreWords;
    private OcrPage currentPage;
    private Boolean hasExplicitSpaces;
    private OcrBox hyphenEnd;
    private boolean inHyphenation;

    public AltoParser(Reader reader, OcrParser.ParsingFeature... parsingFeatureArr) throws XMLStreamException {
        super(reader, parsingFeatureArr);
        this.hasExplicitSpaces = null;
        this.hyphenEnd = null;
        this.inHyphenation = false;
    }

    @Override // de.digitalcollections.solrocr.formats.OcrParser
    protected OcrBox readNext(XMLStreamReader2 xMLStreamReader2, Set<OcrParser.ParsingFeature> set) throws XMLStreamException {
        String attributeValue;
        if (this.hasExplicitSpaces == null) {
            this.hasExplicitSpaces = Boolean.valueOf(this.input.peekBeginning().contains("<SP"));
        }
        if (this.hyphenEnd != null) {
            OcrBox ocrBox = this.hyphenEnd;
            this.hyphenEnd = null;
            return ocrBox;
        }
        if (xMLStreamReader2.getEventType() != 1 || !"String".equals(xMLStreamReader2.getLocalName())) {
            seekToNextWord(xMLStreamReader2, set.contains(OcrParser.ParsingFeature.PAGES));
        }
        if (this.noMoreWords) {
            return null;
        }
        OcrBox ocrBox2 = new OcrBox();
        if (set.contains(OcrParser.ParsingFeature.TEXT)) {
            String attributeValue2 = xMLStreamReader2.getAttributeValue("", "CONTENT");
            String attributeValue3 = xMLStreamReader2.getAttributeValue("", "SUBS_TYPE");
            Boolean valueOf = attributeValue3 == null ? null : Boolean.valueOf("HypPart1".equals(attributeValue3));
            if (valueOf != null && valueOf.booleanValue()) {
                attributeValue2 = attributeValue2 + "-";
            }
            ocrBox2.setText(attributeValue2);
            if (valueOf != null) {
                ocrBox2.setHyphenInfo(valueOf, xMLStreamReader2.getAttributeValue("", "SUBS_CONTENT"));
            }
            if (set.contains(OcrParser.ParsingFeature.HIGHLIGHTS) && ocrBox2.getHighlightSpan() == null) {
                ocrBox2.setHighlightSpan(trackHighlightSpan(attributeValue2, ocrBox2));
            }
            if (set.contains(OcrParser.ParsingFeature.OFFSETS)) {
                ocrBox2.setTextOffset(Math.toIntExact(getAttributeValueOffset("CONTENT", xMLStreamReader2)));
                if (valueOf != null) {
                    ocrBox2.setDehyphenatedOffset(Integer.valueOf(Math.toIntExact(getAttributeValueOffset("SUBS_CONTENT", xMLStreamReader2))));
                }
            }
        }
        if (set.contains(OcrParser.ParsingFeature.COORDINATES)) {
            String attributeValue4 = xMLStreamReader2.getAttributeValue("", "HPOS");
            String attributeValue5 = xMLStreamReader2.getAttributeValue("", "VPOS");
            String attributeValue6 = xMLStreamReader2.getAttributeValue("", "WIDTH");
            String attributeValue7 = xMLStreamReader2.getAttributeValue("", "HEIGHT");
            if (attributeValue4 != null && !attributeValue4.isEmpty()) {
                double parseDouble = Double.parseDouble(attributeValue4);
                ocrBox2.setUlx((int) parseDouble);
                if (attributeValue6 != null && !attributeValue6.isEmpty()) {
                    ocrBox2.setLrx(((int) parseDouble) + ((int) Double.parseDouble(attributeValue6)));
                }
            }
            if (attributeValue5 != null && !attributeValue5.isEmpty()) {
                double parseDouble2 = Double.parseDouble(attributeValue5);
                ocrBox2.setUly((int) parseDouble2);
                if (attributeValue7 != null && !attributeValue7.isEmpty()) {
                    ocrBox2.setLry(((int) parseDouble2) + ((int) Double.parseDouble(attributeValue7)));
                }
            }
            if (ocrBox2.getLrx() < 0.0f || ocrBox2.getLry() < 0.0f) {
                log.warn("Incomplete coordinates encountered: 'HPOS={}, VPOS={}, WIDTH={}, HEIGHT={}", new Object[]{attributeValue4, attributeValue5, attributeValue6, attributeValue7});
            }
        }
        if (set.contains(OcrParser.ParsingFeature.CONFIDENCE) && (attributeValue = xMLStreamReader2.getAttributeValue("", "WC")) != null && !attributeValue.isEmpty()) {
            ocrBox2.setConfidence(Double.valueOf(Double.parseDouble(attributeValue)));
        }
        if (set.contains(OcrParser.ParsingFeature.ALTERNATIVES)) {
            while (xMLStreamReader2.hasNext() && xMLStreamReader2.next() != 2) {
                if (xMLStreamReader2.getEventType() == 1 && "ALTERNATIVE".equals(xMLStreamReader2.getLocalName())) {
                    if (xMLStreamReader2.next() != 4) {
                        throw new IllegalStateException("An ALTERNATIVE element can only have a text node as its sole child");
                    }
                    Long valueOf2 = set.contains(OcrParser.ParsingFeature.OFFSETS) ? Long.valueOf(xMLStreamReader2.getLocationInfo().getStartingCharOffset()) : null;
                    String text = xMLStreamReader2.getText();
                    ocrBox2.addAlternative(text, valueOf2 != null ? Integer.valueOf(Math.toIntExact(valueOf2.longValue())) : null);
                    if (set.contains(OcrParser.ParsingFeature.HIGHLIGHTS) && ocrBox2.getHighlightSpan() == null) {
                        ocrBox2.setHighlightSpan(trackHighlightSpan(text, ocrBox2));
                    }
                    if (xMLStreamReader2.next() != 2) {
                        throw new IllegalStateException("An ALTERNATIVE element can only have a text node as its sole child");
                    }
                }
            }
        }
        if (set.contains(OcrParser.ParsingFeature.PAGES) && this.currentPage != null) {
            ocrBox2.setPage(this.currentPage);
        }
        int seekToNextWord = seekToNextWord(xMLStreamReader2, set.contains(OcrParser.ParsingFeature.PAGES));
        if (!this.hasExplicitSpaces.booleanValue() || seekToNextWord > 0) {
            ocrBox2.setTrailingChars(" ");
        }
        if (ocrBox2.isHyphenStart().booleanValue()) {
            if (this.inHyphenation) {
                return ocrBox2;
            }
            this.inHyphenation = true;
            this.hyphenEnd = readNext(xMLStreamReader2, set);
            if (this.hyphenEnd == null || !this.hyphenEnd.isHyphenated() || this.hyphenEnd.isHyphenStart().booleanValue()) {
                ocrBox2.setHyphenInfo(null, null);
                ocrBox2.setDehyphenatedOffset(null);
            } else {
                boolean z = false;
                StringBuilder sb = new StringBuilder(this.hyphenEnd.getDehyphenatedForm());
                if (ocrBox2.getText().contains("��")) {
                    sb.insert(ocrBox2.getText().indexOf("��"), "��");
                    z = true;
                }
                if (ocrBox2.getText().contains("��")) {
                    sb.insert(ocrBox2.getText().indexOf("��"), "��");
                    z = true;
                }
                int indexOf = sb.indexOf(this.hyphenEnd.getText().replace("��", "").replace("��", ""));
                if (this.hyphenEnd.getText().contains("��") && indexOf >= 0) {
                    sb.insert(indexOf + this.hyphenEnd.getText().indexOf("��"), "��");
                    z = true;
                }
                if (this.hyphenEnd.getText().contains("��") && indexOf >= 0) {
                    sb.insert(indexOf + this.hyphenEnd.getText().indexOf("��"), "��");
                    z = true;
                }
                if (z) {
                    ocrBox2.setHyphenInfo(true, sb.toString());
                    this.hyphenEnd.setHyphenInfo(false, sb.toString());
                }
                ocrBox2.setTrailingChars("");
            }
            this.inHyphenation = false;
        }
        if ((set.contains(OcrParser.ParsingFeature.TEXT) && (ocrBox2.getText() == null || ocrBox2.getText().isEmpty())) || (set.contains(OcrParser.ParsingFeature.COORDINATES) && ocrBox2.getLrx() < 0.0f && ocrBox2.getLry() < 0.0f && ocrBox2.getUlx() < 0.0f && ocrBox2.getUly() < 0.0f)) {
            return null;
        }
        return ocrBox2;
    }

    private int seekToNextWord(XMLStreamReader2 xMLStreamReader2, boolean z) throws XMLStreamException {
        int i = 0;
        boolean z2 = false;
        while (true) {
            if (!xMLStreamReader2.hasNext()) {
                break;
            }
            if (xMLStreamReader2.next() == 1) {
                String localName = xMLStreamReader2.getLocalName();
                if ("String".equals(localName)) {
                    z2 = true;
                    break;
                }
                if ("SP".equals(localName)) {
                    this.hasExplicitSpaces = true;
                    i++;
                } else if ("TextLine".equals(localName)) {
                    i++;
                } else if ("Page".equals(localName) && z) {
                    String attributeValue = xMLStreamReader2.getAttributeValue("", "WIDTH");
                    String attributeValue2 = xMLStreamReader2.getAttributeValue("", "HEIGHT");
                    Dimension dimension = null;
                    if (attributeValue != null && attributeValue2 != null) {
                        try {
                            dimension = new Dimension((int) Double.parseDouble(attributeValue), (int) Double.parseDouble(attributeValue2));
                        } catch (NumberFormatException e) {
                        }
                    }
                    this.currentPage = new OcrPage(xMLStreamReader2.getAttributeValue("", "ID"), dimension);
                }
            }
        }
        this.noMoreWords = !z2;
        return i;
    }

    private long getAttributeValueOffset(String str, XMLStreamReader2 xMLStreamReader2) {
        if (xMLStreamReader2.getEventType() != 1) {
            throw new IllegalStateException("XMLStreamReader must be on a START_ELEMENT event.");
        }
        char[] peekBackContextBuffer = this.input.peekBackContextBuffer();
        int backContextSize = this.input.getBackContextSize();
        int indexOf = CharBufUtils.indexOf(peekBackContextBuffer, Math.toIntExact(xMLStreamReader2.getLocationInfo().getStartingCharOffset() - this.input.getBackContextStartOffset()), backContextSize, (" " + str + "=").toCharArray());
        if (indexOf >= 0) {
            return this.input.getBackContextStartOffset() + indexOf + r0.length + 1;
        }
        return -1L;
    }
}
