package de.datexis.sector.reader;

import de.datexis.common.Resource;
import de.datexis.model.Annotation;
import de.datexis.model.Document;
import de.datexis.preprocess.DocumentFactory;
import de.datexis.reader.RawTextDatasetReader;
import de.datexis.sector.model.SectionAnnotation;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.LineIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/sector/reader/Wiki727Reader.class */
public class Wiki727Reader extends RawTextDatasetReader {
    protected static final Logger log = LoggerFactory.getLogger(Wiki727Reader.class);
    protected int sectionLevel = 2;
    protected boolean skipPrefaceText = false;
    protected boolean skipPrefaceAnnotation = false;
    protected Pattern SECTION_PATTERN = Pattern.compile("^========,(\\d+),(.+?)\\.$");

    public Wiki727Reader withSectionLevel(int i) {
        this.sectionLevel = i;
        return this;
    }

    public Wiki727Reader withSkipPreface(boolean z) {
        this.skipPrefaceText = z;
        return this;
    }

    public Document readDocumentFromFile(Resource resource) {
        try {
            InputStream inputStream = resource.getInputStream();
            Throwable th = null;
            try {
                try {
                    LineIterator lineIterator = new LineIterator(new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8.newDecoder())));
                    Document document = new Document();
                    document.setId(resource.getFileName());
                    document.setSource(resource.toString());
                    document.setType("wiki");
                    StringBuilder sb = new StringBuilder();
                    new SectionAnnotation(Annotation.Source.GOLD);
                    String str = "";
                    while (lineIterator.hasNext()) {
                        String str2 = (String) lineIterator.next();
                        Matcher matcher = this.SECTION_PATTERN.matcher(str2);
                        if (str2.startsWith("=====") && matcher.matches()) {
                            int parseInt = Integer.parseInt(matcher.group(1));
                            String group = matcher.group(2);
                            if (this.sectionLevel == 0 || parseInt <= this.sectionLevel) {
                                String sb2 = sb.toString();
                                if (sb2.trim().length() > 0) {
                                    addToDocument(sb2, str, document);
                                }
                                int i = 0;
                                while (true) {
                                    parseInt--;
                                    if (parseInt <= 1) {
                                        break;
                                    }
                                    i = str.indexOf(" | ", i + 1);
                                }
                                str = (i > 0 ? str.substring(0, i) + " | " : i < 0 ? str + " | " : "") + group;
                                sb = new StringBuilder();
                            }
                        } else {
                            if (sb.length() > 0) {
                                sb.append(" ");
                            }
                            String replaceAll = str2.replaceAll("\\*\\*\\*LIST\\*\\*\\*", "").replaceAll("\\*\\*\\*formula\\*\\*\\*", "").replaceAll("\\*\\*\\*codice\\*\\*\\*", "");
                            if (!replaceAll.trim().isEmpty()) {
                                sb.append(replaceAll).append("\n");
                            }
                        }
                    }
                    String sb3 = sb.toString();
                    if (sb3.trim().length() > 0) {
                        addToDocument(sb3, str, document);
                    }
                    if (inputStream != null) {
                        if (0 != 0) {
                            try {
                                inputStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            inputStream.close();
                        }
                    }
                    return document;
                } finally {
                }
            } finally {
            }
        } catch (IOException e) {
            log.error(e.toString());
            throw new RuntimeException(e.toString(), e.getCause());
        }
    }

    private void addToDocument(String str, String str2, Document document) {
        if (str.trim().length() == 0) {
            return;
        }
        Document document2 = new Document();
        for (String str3 : str.split("\n")) {
            if (!str3.trim().isEmpty()) {
                document2.addSentence(DocumentFactory.createSentenceFromTokens(DocumentFactory.fromText(str3.trim() + "\n", DocumentFactory.Newlines.KEEP).getTokens()));
            }
        }
        if (str2 == null) {
            return;
        }
        String lowerCase = str2.replaceFirst("\\|.+$", "").trim().toLowerCase();
        if (this.skipPrefaceText && lowerCase.equals("preface")) {
            return;
        }
        document.append(document2);
        SectionAnnotation sectionAnnotation = new SectionAnnotation(Annotation.Source.GOLD, document.getType(), str2);
        sectionAnnotation.setSectionLabel(str2);
        sectionAnnotation.setBegin(document2.getBegin());
        sectionAnnotation.setEnd(document2.getEnd());
        document.addAnnotation(sectionAnnotation);
    }
}
