package de.digitalcollections.solrocr.lucene.filters;

import com.google.common.collect.ImmutableSet;
import de.digitalcollections.solrocr.formats.alto.AltoFormat;
import de.digitalcollections.solrocr.formats.hocr.HocrFormat;
import de.digitalcollections.solrocr.formats.miniocr.MiniOcrFormat;
import de.digitalcollections.solrocr.model.OcrFormat;
import de.digitalcollections.solrocr.reader.PeekingReader;
import java.io.Reader;
import java.util.Map;
import org.apache.lucene.analysis.util.CharFilterFactory;

/* loaded from: input_file:de/digitalcollections/solrocr/lucene/filters/OcrCharFilterFactory.class */
public class OcrCharFilterFactory extends CharFilterFactory {
    public static final String ALTERNATIVE_MARKER = "\u2060\u2060";
    private static final int BEGIN_BUF_SIZE = 2048;
    private static final int CTX_BUF_SIZE = 16384;
    private final boolean expandAlternatives;
    private final boolean fixMarkup;
    private static final ImmutableSet<OcrFormat> FORMATS = ImmutableSet.of(new HocrFormat(), new AltoFormat(), new MiniOcrFormat());

    public OcrCharFilterFactory(Map<String, String> map) {
        super(map);
        this.expandAlternatives = "true".equals(map.get("expandAlternatives"));
        this.fixMarkup = "true".equals(map.get("fixMarkup"));
    }

    public Reader create(Reader reader) {
        Reader filter;
        PeekingReader peekingReader = new PeekingReader(new SanitizingXmlFilter(reader, this.fixMarkup), BEGIN_BUF_SIZE, CTX_BUF_SIZE);
        if (!peekingReader.peekBeginning().isEmpty() && (filter = ((OcrFormat) FORMATS.stream().filter(ocrFormat -> {
            return ocrFormat.hasFormat(peekingReader.peekBeginning());
        }).findFirst().orElseThrow(() -> {
            return new RuntimeException("Could not determine OCR format from chunk: " + peekingReader.peekBeginning());
        })).filter(peekingReader, this.expandAlternatives)) != null) {
            return filter;
        }
        return OcrCharFilter.nopFilter();
    }
}
