package de.digitalcollections.solrocr.formats.mini;

import de.digitalcollections.solrocr.util.Streams;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.stream.Collectors;
import net.byteseek.compiler.CompileException;
import net.byteseek.compiler.matcher.SequenceMatcherCompiler;
import net.byteseek.matcher.sequence.ByteSequenceMatcher;
import net.byteseek.matcher.sequence.SequenceMatcher;
import net.byteseek.searcher.ForwardSearchIterator;
import net.byteseek.searcher.SearchResult;
import net.byteseek.searcher.Searcher;
import net.byteseek.searcher.sequence.SequenceMatcherSearcher;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;

/* loaded from: input_file:de/digitalcollections/solrocr/formats/mini/MiniOcrByteOffsetsParser.class */
public class MiniOcrByteOffsetsParser {
    private static final Searcher<SequenceMatcher> BEGIN_WORD_SEARCHER;
    private static final Searcher<SequenceMatcher> END_WORD_SEARCHER;
    static final /* synthetic */ boolean $assertionsDisabled;

    private static int getClosingOffsetFrom(byte[] bArr, char c, int i) {
        ForwardSearchIterator forwardSearchIterator = new ForwardSearchIterator(new SequenceMatcherSearcher(new ByteSequenceMatcher("</" + c + ">")), bArr, i);
        if (forwardSearchIterator.hasNext()) {
            return (int) ((SearchResult) forwardSearchIterator.next().get(0)).getMatchPosition();
        }
        throw new IllegalArgumentException("Invalid MiniOCR, could not find closing tag for '" + c + "'");
    }

    private static int getIdOffset(byte[] bArr, int i, String str) {
        try {
            ForwardSearchIterator forwardSearchIterator = new ForwardSearchIterator(new SequenceMatcherSearcher(SequenceMatcherCompiler.compileFrom("'<' . ' xml:id=\"" + str + "'")), bArr, i);
            if (forwardSearchIterator.hasNext()) {
                return (int) ((SearchResult) forwardSearchIterator.next().get(0)).getMatchPosition();
            }
            throw new IllegalArgumentException("Could not find element with id '" + str + "'");
        } catch (CompileException e) {
            throw new RuntimeException(e);
        }
    }

    public static List<Pair<String, Integer>> parse(byte[] bArr, int i, String str, String str2) throws IOException {
        if (str != null) {
            i = getIdOffset(bArr, i, str);
        }
        int length = bArr.length - 1;
        if (str2 != null && str2.equals("\uffff")) {
            length = getClosingOffsetFrom(bArr, new String(bArr, i, 6, StandardCharsets.UTF_8).charAt(1), i);
        } else if (str2 != null) {
            int idOffset = getIdOffset(bArr, i, str2);
            length = getClosingOffsetFrom(bArr, new String(bArr, idOffset, 6, StandardCharsets.UTF_8).charAt(1), idOffset);
        }
        return (List) Streams.zip(Streams.stream(new ForwardSearchIterator(BEGIN_WORD_SEARCHER, i, length, bArr)).flatMap((v0) -> {
            return v0.stream();
        }).map((v0) -> {
            return v0.getMatchPosition();
        }), Streams.stream(new ForwardSearchIterator(END_WORD_SEARCHER, i, length, bArr)).flatMap((v0) -> {
            return v0.stream();
        }).map((v0) -> {
            return v0.getMatchPosition();
        }), (v1, v2) -> {
            return new ImmutablePair(v1, v2);
        }).map(immutablePair -> {
            return mapOffsetsToTerm(immutablePair, bArr);
        }).collect(Collectors.toList());
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static Pair<String, Integer> mapOffsetsToTerm(Pair<Long, Long> pair, byte[] bArr) {
        int intValue = ((Long) pair.getLeft()).intValue();
        int intValue2 = ((Long) pair.getRight()).intValue();
        int indexOf = ArrayUtils.indexOf(bArr, (byte) 62, intValue) + 1;
        if ($assertionsDisabled || indexOf < intValue2) {
            return ImmutablePair.of(new String(bArr, indexOf, intValue2 - indexOf, StandardCharsets.UTF_8), Integer.valueOf(indexOf));
        }
        throw new AssertionError();
    }

    public static void parse(byte[] bArr, OutputStream outputStream) throws IOException {
        parse(bArr, outputStream, (String) null, (String) null);
    }

    public static void parse(byte[] bArr, OutputStream outputStream, String str) throws IOException {
        parse(bArr, outputStream, str, "\uffff");
    }

    public static void parse(byte[] bArr, OutputStream outputStream, String str, String str2) throws IOException {
        for (Pair<String, Integer> pair : parse(bArr, 0, str, str2)) {
            outputStream.write(((String) pair.getLeft()).getBytes(StandardCharsets.UTF_8));
            outputStream.write(String.format("⚑%d ", pair.getRight()).getBytes(StandardCharsets.UTF_8));
        }
    }

    public static void main(String[] strArr) throws IOException {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        long nanoTime = System.nanoTime();
        parse(Files.readAllBytes(Paths.get("src/test/resources/data/31337_utf8ocr.xml", new String[0])), byteArrayOutputStream, "28");
        System.out.println(String.format("Parsing took %.2fms", Double.valueOf((System.nanoTime() - nanoTime) / 1000000.0d)));
        System.out.println(byteArrayOutputStream.toString(StandardCharsets.UTF_8.toString()));
    }

    static {
        $assertionsDisabled = !MiniOcrByteOffsetsParser.class.desiredAssertionStatus();
        BEGIN_WORD_SEARCHER = new SequenceMatcherSearcher(new ByteSequenceMatcher("<w x=\""));
        END_WORD_SEARCHER = new SequenceMatcherSearcher(new ByteSequenceMatcher("</w>"));
    }
}
