package de.datexis.cdv.reader;

import de.datexis.cdv.index.PassageIndex;
import de.datexis.common.InternalResource;
import de.datexis.common.Resource;
import de.datexis.model.Dataset;
import de.datexis.reader.DatasetReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.util.concurrent.atomic.AtomicInteger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/cdv/reader/MatchZooReader.class */
public abstract class MatchZooReader implements DatasetReader {
    protected static final Logger log = LoggerFactory.getLogger(MatchZooReader.class);

    public Dataset read(Resource resource) throws IOException {
        if ((resource instanceof InternalResource) || resource.isFile()) {
            Dataset dataset = new Dataset(resource.getFileName());
            addDocumentFromFile(resource, dataset);
            return dataset;
        }
        if (resource.isDirectory()) {
            return readDatasetFromDirectory(resource, "\\.txt$");
        }
        throw new FileNotFoundException("cannot open path: " + resource.toString());
    }

    public Dataset readDatasetFromDirectory(Resource resource, String str) throws IOException {
        log.info("Reading Documents from {}", resource.toString());
        Dataset dataset = new Dataset(resource.getPath().getFileName().toString());
        AtomicInteger atomicInteger = new AtomicInteger();
        Files.walk(resource.getPath(), new FileVisitOption[0]).filter(path -> {
            return Files.isRegularFile(path, LinkOption.NOFOLLOW_LINKS);
        }).filter(path2 -> {
            return path2.toString().matches(str);
        }).forEach(path3 -> {
            try {
                addDocumentFromFile(Resource.fromFile(path3.toString()), dataset);
            } catch (IOException e) {
                e.printStackTrace();
            }
            int incrementAndGet = atomicInteger.incrementAndGet();
            if (incrementAndGet % 1000 == 0) {
                double freeMemory = Runtime.getRuntime().freeMemory() / 1.073741824E9d;
                log.debug("read {}k documents, memory usage {} GB", Integer.valueOf(incrementAndGet / 1000), Double.valueOf(((int) (((Runtime.getRuntime().totalMemory() / 1.073741824E9d) - freeMemory) * 10.0d)) / 10.0d));
            }
        });
        return dataset;
    }

    protected abstract void addDocumentFromFile(Resource resource, Dataset dataset) throws IOException;

    public static void addCandidateSamples(Dataset dataset, int i) throws IOException {
        log.info("Adding {} passage candidates to queries...", Integer.valueOf(i));
        PassageIndex passageIndex = new PassageIndex();
        passageIndex.createInMemoryIndex(dataset);
        passageIndex.retrievePassageCandidates(dataset, i);
    }
}
