package de.uni_mannheim.informatik.dws.jrdf2vec.walk_generators.parsers;

import de.uni_mannheim.informatik.dws.jrdf2vec.walk_generators.base.IsearchCondition;
import de.uni_mannheim.informatik.dws.jrdf2vec.walk_generators.base.WalkGenerator;
import de.uni_mannheim.informatik.dws.jrdf2vec.walk_generators.data_structures.TripleDataSetMemory;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.jena.ontology.OntModel;
import org.apache.jena.riot.RDFLanguages;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/uni_mannheim/informatik/dws/jrdf2vec/walk_generators/parsers/NtMemoryParser.class */
public class NtMemoryParser extends MemoryParser {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) NtMemoryParser.class);
    private IsearchCondition skipCondition;
    boolean isWriteOptimizedFile;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:de/uni_mannheim/informatik/dws/jrdf2vec/walk_generators/parsers/NtMemoryParser$FileReaderThread.class */
    public class FileReaderThread extends Thread {
        private boolean isOptimizedFile;
        private NtMemoryParser parser;
        private File fileToRead;
        private boolean isGzipped;

        public FileReaderThread(NtMemoryParser ntMemoryParser, File file, boolean z, boolean z2) {
            this.fileToRead = file;
            this.parser = ntMemoryParser;
            this.isGzipped = z;
            this.isOptimizedFile = z2;
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            if (this.isOptimizedFile) {
                NtMemoryParser.LOGGER.info("STARTED (optimized) thread for file " + this.fileToRead.getName());
                this.parser.readNTriplesOptimized(this.fileToRead);
            } else {
                NtMemoryParser.LOGGER.info("STARTED thread for file " + this.fileToRead.getName());
                this.parser.readNTriples(this.fileToRead, this.isGzipped);
            }
            NtMemoryParser.LOGGER.info("Thread for file " + this.fileToRead.getName() + " completed.");
        }
    }

    public NtMemoryParser(WalkGenerator walkGenerator) {
        this.isWriteOptimizedFile = false;
        this.data = new TripleDataSetMemory();
        this.specificWalkGenerator = walkGenerator;
        this.skipCondition = new IsearchCondition() { // from class: de.uni_mannheim.informatik.dws.jrdf2vec.walk_generators.parsers.NtMemoryParser.1
            Pattern pattern = Pattern.compile("\".*\"");

            @Override // de.uni_mannheim.informatik.dws.jrdf2vec.walk_generators.base.IsearchCondition
            public boolean isHit(String str) {
                return str.trim().startsWith("#") || str.trim().equals("") || this.pattern.matcher(str).find();
            }
        };
    }

    public NtMemoryParser(String str, WalkGenerator walkGenerator) {
        this(walkGenerator);
        readNTriples(str);
    }

    public NtMemoryParser(File file, WalkGenerator walkGenerator) {
        this(walkGenerator);
        readNTriples(file, false);
    }

    public static void saveAsNt(OntModel ontModel, String str) {
        saveAsNt(ontModel, new File(str));
    }

    public static void saveAsNt(OntModel ontModel, File file) {
        try {
            ontModel.write(new FileWriter(file), RDFLanguages.strLangNTriples);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void readNTriples(String str) {
        readNTriples(str, false);
    }

    public void readNTriplesFilesFromDirectory(String str) {
        File file = new File(str);
        if (!file.isDirectory()) {
            LOGGER.error("The given pathToDirectory is no directory, aborting. (given: " + str + ")");
            return;
        }
        for (File file2 : file.listFiles()) {
            LOGGER.info("Processing file " + file2.getName());
            if (file2.getName().endsWith(".gz")) {
                readNTriples(file2, true);
            } else if (file2.getName().endsWith(".nt") || file2.getName().endsWith(".ttl")) {
                readNTriples(file2, false);
            } else {
                LOGGER.info("Skipping file: " + file2.getName());
            }
        }
    }

    public void readNtTriplesFromDirectoryMultiThreaded(String str, boolean z) {
        readNtTriplesFromDirectoryMultiThreaded(new File(str), z);
    }

    public void readNtTriplesFromDirectoryMultiThreaded(File file, boolean z) {
        String absolutePath = file.getAbsolutePath();
        this.isWriteOptimizedFile = z;
        if (!file.isDirectory()) {
            LOGGER.error("The given pathToDirectory is no directory, aborting. (given: " + absolutePath + ")");
            return;
        }
        HashMap hashMap = new HashMap();
        File file2 = new File("./optimized");
        if (file2.exists() && file2.isDirectory()) {
            LOGGER.info("Found optimized directory. Will use it for reading.");
            for (File file3 : file2.listFiles()) {
                hashMap.put(file3.getName(), file3);
            }
        }
        ArrayList arrayList = new ArrayList();
        for (File file4 : file.listFiles()) {
            if (hashMap.containsKey(file4.getName())) {
                LOGGER.info("Found optimized file for " + file4.getName() + ", will use that one.");
                FileReaderThread fileReaderThread = new FileReaderThread(this, (File) hashMap.get(file4.getName()), true, true);
                fileReaderThread.start();
                arrayList.add(fileReaderThread);
            } else if (file4.getName().endsWith(".gz")) {
                FileReaderThread fileReaderThread2 = new FileReaderThread(this, file4, true, false);
                fileReaderThread2.start();
                arrayList.add(fileReaderThread2);
            } else if (file4.getName().endsWith(".nt") || file4.getName().endsWith(".ttl")) {
                FileReaderThread fileReaderThread3 = new FileReaderThread(this, file4, false, false);
                fileReaderThread3.start();
                arrayList.add(fileReaderThread3);
            } else {
                LOGGER.info("Skipping file: " + file4.getName());
            }
        }
        try {
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                ((Thread) it.next()).join();
            }
        } catch (InterruptedException e) {
            LOGGER.error("Problem waiting for thread...", (Throwable) e);
        }
        LOGGER.info("Data read.");
    }

    public void readNTriples(String str, boolean z) {
        readNTriples(new File(str), z);
    }

    public void readNTriplesOptimized(File file) {
        if (!file.exists()) {
            LOGGER.error("File does not exist. Cannot parse.");
            return;
        }
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), StandardCharsets.UTF_8));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    return;
                }
                String[] split = readLine.split(" ");
                if (split.length != 3) {
                    LOGGER.error("Problem with line: \n" + readLine);
                } else {
                    this.data.add(split[0], split[1], split[2]);
                }
            }
        } catch (IOException e) {
            LOGGER.error("Could not initialize optimized reader for file " + file.getName());
        }
    }

    public void readNTriples(File file, boolean z) {
        BufferedReader bufferedReader;
        long j;
        if (!file.exists()) {
            LOGGER.error("File does not exist. Cannot parse.");
            return;
        }
        BufferedWriter bufferedWriter = null;
        if (this.isWriteOptimizedFile) {
            try {
                File file2 = new File("./optimized/" + file.getName());
                file2.getParentFile().mkdirs();
                bufferedWriter = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(file2)), StandardCharsets.UTF_8));
                LOGGER.info("Writer initialized.");
            } catch (FileNotFoundException e) {
                LOGGER.error("Could not initialize gzip output stream.", (Throwable) e);
            } catch (IOException e2) {
                LOGGER.error("Problem initializing gzip output stream.", (Throwable) e2);
            }
        }
        Pattern compile = Pattern.compile("\".*");
        try {
            bufferedReader = z ? new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), StandardCharsets.UTF_8)) : new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8));
            j = 0;
        } catch (Exception e3) {
            LOGGER.error("Error while parsing file.", (Throwable) e3);
            return;
        }
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            try {
                j++;
                if (!this.skipCondition.isHit(readLine)) {
                    String replaceAll = readLine.replaceAll("(?<=>)*[ ]*.[ ]*$", "");
                    if (this.isIncludeDatatypeProperties) {
                        Matcher matcher = compile.matcher(replaceAll);
                        if (matcher.find()) {
                            String group = matcher.group(0);
                            replaceAll = replaceAll.replace(group, group.replaceAll(" ", "_"));
                        }
                    }
                    String[] split = replaceAll.split(" ");
                    if (split.length != 3) {
                        LOGGER.error("Error in file " + file.getName() + " in line " + j + " while parsing the following line:\n" + replaceAll + "\n Required tokens: 3\nActual tokens: " + split.length);
                        int i = 1;
                        for (String str : split) {
                            int i2 = i;
                            i++;
                            LOGGER.error("Token " + i2 + ": " + str);
                        }
                        LOGGER.error("Line is ignored. Parsing continues.");
                    } else {
                        String intern = this.specificWalkGenerator.shortenUri(removeTags(split[0])).intern();
                        String shortenUri = this.specificWalkGenerator.shortenUri(removeTags(split[1]).intern());
                        String intern2 = this.specificWalkGenerator.shortenUri(removeTags(split[2])).intern();
                        this.data.add(intern, shortenUri, intern2);
                        if (this.isWriteOptimizedFile) {
                            bufferedWriter.write(intern + " " + shortenUri + " " + intern2 + "\n");
                        }
                    }
                }
            } catch (Exception e4) {
                LOGGER.error("A problem occurred while parsing line number " + j + " of file " + file.getName(), (Throwable) e4);
                LOGGER.error("The problem occurred in the following line:\n" + readLine);
            }
            LOGGER.error("Error while parsing file.", (Throwable) e3);
            return;
        }
        LOGGER.info("File " + file.getName() + " successfully read. " + this.data.getSize() + " subjects loaded.");
        if (this.isWriteOptimizedFile) {
            bufferedWriter.flush();
            bufferedWriter.close();
        }
        bufferedReader.close();
    }

    public IsearchCondition getSkipCondition() {
        return this.skipCondition;
    }

    public void setSkipCondition(IsearchCondition isearchCondition) {
        this.skipCondition = isearchCondition;
    }

    public static String removeTags(String str) {
        if (str.startsWith("<")) {
            str = str.substring(1);
        }
        if (str.endsWith(">")) {
            str = str.substring(0, str.length() - 1);
        }
        return str;
    }

    public void setIncludeDatatypeProperties(boolean z) {
        LOGGER.warn("Overwriting skip condition.");
        this.skipCondition = new IsearchCondition() { // from class: de.uni_mannheim.informatik.dws.jrdf2vec.walk_generators.parsers.NtMemoryParser.2
            @Override // de.uni_mannheim.informatik.dws.jrdf2vec.walk_generators.base.IsearchCondition
            public boolean isHit(String str) {
                return str.trim().startsWith("#") || str.trim().equals("");
            }
        };
        this.isIncludeDatatypeProperties = z;
    }
}
