package de.datexis.reader;

import de.datexis.common.Resource;
import de.datexis.model.Document;
import de.datexis.preprocess.DocumentFactory;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/reader/RawTextDatasetReader.class */
public class RawTextDatasetReader extends DirectoryDatasetReader<RawTextDatasetReader> {
    protected static final Logger log = LoggerFactory.getLogger(RawTextDatasetReader.class);
    protected boolean useFirstSentenceAsTitle = false;
    protected boolean isTokenized = false;
    protected boolean generateUIDs = false;

    public RawTextDatasetReader withFirstSentenceAsTitle(boolean z) {
        this.useFirstSentenceAsTitle = z;
        return this;
    }

    public RawTextDatasetReader withTokenizedInput(boolean z) {
        this.isTokenized = z;
        return this;
    }

    public RawTextDatasetReader withGeneratedUIDs(boolean z) {
        this.generateUIDs = z;
        return this;
    }

    @Override // de.datexis.reader.DirectoryDatasetReader
    public Document readDocumentFromFile(Resource resource) throws IOException {
        InputStream inputStream = resource.getInputStream();
        Throwable th = null;
        try {
            String str = (String) new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8.newDecoder())).lines().collect(Collectors.joining("\n"));
            Document fromTokens = this.isTokenized ? DocumentFactory.fromTokens(DocumentFactory.createTokensFromTokenizedText(str)) : DocumentFactory.fromText(str, DocumentFactory.Newlines.KEEP);
            fromTokens.setId(resource.getFileName());
            fromTokens.setSource(resource.toString());
            if (this.useFirstSentenceAsTitle) {
                if (fromTokens.countSentences() > 0) {
                    fromTokens.setTitle(fromTokens.getSentence(0).getText().trim());
                } else {
                    fromTokens.setTitle("");
                }
            }
            return fromTokens;
        } finally {
            if (inputStream != null) {
                if (0 != 0) {
                    try {
                        inputStream.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                } else {
                    inputStream.close();
                }
            }
        }
    }
}
