package de.datexis.sector.reader;

import de.datexis.common.Resource;
import de.datexis.model.Annotation;
import de.datexis.model.Dataset;
import de.datexis.model.Document;
import de.datexis.model.Sentence;
import de.datexis.preprocess.DocumentFactory;
import de.datexis.reader.RawTextDatasetReader;
import de.datexis.sector.model.SectionAnnotation;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.LineIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/sector/reader/ChoiDatasetReader.class */
public class ChoiDatasetReader extends RawTextDatasetReader {
    protected static final Logger log = LoggerFactory.getLogger(ChoiDatasetReader.class);
    protected static final String SEGMENT_SPLIT = "==========";

    public Document readDocumentFromFile(Resource resource) {
        try {
            InputStream inputStream = resource.getInputStream();
            Throwable th = null;
            try {
                try {
                    LineIterator lineIterator = new LineIterator(new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8.newDecoder())));
                    Document document = new Document();
                    document.setId(resource.getFileName());
                    document.setSource(resource.toString());
                    document.setType("seg");
                    StringBuilder sb = new StringBuilder();
                    new SectionAnnotation(Annotation.Source.GOLD);
                    while (lineIterator.hasNext()) {
                        String str = (String) lineIterator.next();
                        if (str.equals(SEGMENT_SPLIT)) {
                            String sb2 = sb.toString();
                            if (sb2.trim().length() > 0) {
                                addToDocument(sb2, document);
                            }
                            sb = new StringBuilder();
                        } else {
                            if (sb.length() > 0) {
                                sb.append(" ");
                            }
                            if (!str.trim().isEmpty()) {
                                if (!str.endsWith(".") && !str.endsWith("!") && !str.endsWith("?")) {
                                    str = str + ".";
                                }
                                sb.append(str).append("\n");
                            }
                        }
                    }
                    String sb3 = sb.toString();
                    if (sb3.trim().length() > 0) {
                        addToDocument(sb3, document);
                    }
                    if (inputStream != null) {
                        if (0 != 0) {
                            try {
                                inputStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            inputStream.close();
                        }
                    }
                    return document;
                } finally {
                }
            } finally {
            }
        } catch (IOException e) {
            log.error(e.toString());
            throw new RuntimeException(e.toString(), e.getCause());
        }
    }

    private void addToDocument(String str, Document document) {
        if (str.trim().length() == 0) {
            return;
        }
        Document document2 = new Document();
        for (String str2 : str.split("\n")) {
            if (!str2.trim().isEmpty()) {
                document2.addSentence(DocumentFactory.createSentenceFromTokens(DocumentFactory.fromText(str2.trim() + "\n", DocumentFactory.Newlines.KEEP).getTokens()));
            }
        }
        document.append(document2);
        String num = Integer.toString(document2.getBegin());
        SectionAnnotation sectionAnnotation = new SectionAnnotation(Annotation.Source.GOLD, document.getType(), num);
        sectionAnnotation.setSectionLabel(num);
        sectionAnnotation.setBegin(document2.getBegin());
        sectionAnnotation.setEnd(document2.getEnd());
        document.addAnnotation(sectionAnnotation);
    }

    public static void readC99Result(Dataset dataset, Resource resource) throws IOException {
        Iterator<Path> it = Files.walk(resource.getPath(), new FileVisitOption[0]).filter(path -> {
            return Files.isRegularFile(path, LinkOption.NOFOLLOW_LINKS);
        }).filter(path2 -> {
            return path2.getFileName().toString().endsWith(".pred");
        }).sorted().iterator();
        Pattern compile = Pattern.compile("\\/(\\d+)(.ref)?.pred$");
        while (it.hasNext()) {
            String path3 = it.next().toString();
            Matcher matcher = compile.matcher(path3);
            matcher.find();
            int parseInt = Integer.parseInt(matcher.group(1));
            log.info("reading doc id {} from file {}", Integer.valueOf(parseInt), path3);
            InputStream inputStream = Resource.fromFile(path3).getInputStream();
            Throwable th = null;
            try {
                try {
                    LineIterator lineIterator = new LineIterator(new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8.newDecoder())));
                    int i = 0;
                    int i2 = 0;
                    int i3 = 0;
                    Document document = (Document) dataset.getDocument(parseInt).get();
                    Annotation sectionAnnotation = new SectionAnnotation(Annotation.Source.PRED);
                    while (lineIterator.hasNext()) {
                        String str = (String) lineIterator.next();
                        if (str.equals(SEGMENT_SPLIT)) {
                            i2++;
                            if (i3 > 0) {
                                document.addAnnotation(sectionAnnotation);
                            }
                            sectionAnnotation = new SectionAnnotation(Annotation.Source.PRED);
                            sectionAnnotation.setSectionLabel(Integer.toString(i2));
                            i3 = 0;
                        } else {
                            Sentence sentence = document.getSentence(i);
                            if (i3 == 0) {
                                sectionAnnotation.setBegin(sentence.getBegin());
                            } else {
                                sectionAnnotation.setEnd(sentence.getEnd());
                            }
                            i3++;
                            i++;
                            if (!sentence.getText().trim().equals(str.trim())) {
                                log.warn("docId {} k={} different sentences\n{}\n{}", new Object[]{Integer.valueOf(parseInt), Integer.valueOf(i), str, sentence.getText()});
                            }
                        }
                    }
                    if (inputStream != null) {
                        if (0 != 0) {
                            try {
                                inputStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            inputStream.close();
                        }
                    }
                } finally {
                }
            } catch (Throwable th3) {
                if (inputStream != null) {
                    if (th != null) {
                        try {
                            inputStream.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    } else {
                        inputStream.close();
                    }
                }
                throw th3;
            }
        }
    }
}
