package de.uni_mannheim.informatik.dws.jrdf2vec.debugging;

import de.uni_mannheim.informatik.dws.jrdf2vec.training.Gensim;
import de.uni_mannheim.informatik.dws.jrdf2vec.walk_generation.data_structures.TripleDataSetMemory;
import de.uni_mannheim.informatik.dws.jrdf2vec.walk_generation.entity_selector.EntitySelector;
import de.uni_mannheim.informatik.dws.jrdf2vec.walk_generation.walk_generators.HdtWalkGenerator;
import de.uni_mannheim.informatik.dws.jrdf2vec.walk_generation.walk_generators.IWalkGenerator;
import de.uni_mannheim.informatik.dws.jrdf2vec.walk_generation.walk_generators.MemoryWalkGenerator;
import de.uni_mannheim.informatik.dws.jrdf2vec.walk_generation.walk_generators.WalkGeneratorManager;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Set;
import org.javatuples.Pair;
import org.javatuples.Triplet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/uni_mannheim/informatik/dws/jrdf2vec/debugging/VocabularyAnalyzer.class */
public class VocabularyAnalyzer {
    private static Logger LOGGER = LoggerFactory.getLogger((Class<?>) VocabularyAnalyzer.class);

    public static Set<String> detectMissingEntities(String str, String str2) {
        Set<String> value0 = getModelVocabulary(str).getValue0();
        Set<String> readSetFromFile = readSetFromFile(str2);
        readSetFromFile.removeAll(value0);
        return readSetFromFile;
    }

    public static Set<String> detectAdditionalEntities(String str, String str2) {
        Set<String> value0 = getModelVocabulary(str).getValue0();
        value0.removeAll(readSetFromFile(str2));
        return value0;
    }

    static Set<String> readSetFromFile(String str) {
        return readSetFromFile(new File(str));
    }

    static Set<String> readSetFromFile(File file) {
        HashSet hashSet = new HashSet();
        if (!file.exists()) {
            LOGGER.error("File does not exist.");
            return hashSet;
        }
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                hashSet.add(readLine.trim());
            }
        } catch (FileNotFoundException e) {
            LOGGER.error("File not found.", (Throwable) e);
            e.printStackTrace();
        } catch (IOException e2) {
            LOGGER.error("IOException occured.", (Throwable) e2);
            e2.printStackTrace();
        }
        LOGGER.info("Entities read into cache.");
        return hashSet;
    }

    public static VocabularyAnalyzerResult analyze(String str, String str2) {
        VocabularyAnalyzerResult vocabularyAnalyzerResult = new VocabularyAnalyzerResult();
        Pair<IWalkGenerator, EntitySelector> parseSingleFile = WalkGeneratorManager.parseSingleFile(str2);
        if (parseSingleFile.getValue0().getClass() == HdtWalkGenerator.class) {
            LOGGER.error("Analysis is not implemented for HDT parser!");
            return vocabularyAnalyzerResult;
        }
        Triplet<Set<String>, Integer, Boolean> modelVocabulary = getModelVocabulary(str);
        vocabularyAnalyzerResult.setDimension(modelVocabulary.getValue1().intValue());
        vocabularyAnalyzerResult.setDimensionConsistent(modelVocabulary.getValue2().booleanValue());
        Set<String> value0 = modelVocabulary.getValue0();
        TripleDataSetMemory data = ((MemoryWalkGenerator) parseSingleFile.getValue0()).getData();
        HashSet hashSet = new HashSet(data.getUniqueObjectTripleSubjects());
        hashSet.removeAll(value0);
        vocabularyAnalyzerResult.setSubjectsNotFound(hashSet);
        HashSet hashSet2 = new HashSet(data.getUniqueObjectTriplePredicates());
        hashSet2.removeAll(value0);
        vocabularyAnalyzerResult.setPredicatesNotFound(hashSet2);
        HashSet hashSet3 = new HashSet(data.getUniqueObjectTripleObjects());
        hashSet3.removeAll(value0);
        vocabularyAnalyzerResult.setObjectsNotFound(hashSet3);
        HashSet hashSet4 = new HashSet(value0);
        hashSet4.removeAll(data.getUniqueObjectTripleSubjects());
        hashSet4.removeAll(data.getUniqueObjectTriplePredicates());
        hashSet4.removeAll(data.getUniqueObjectTripleObjects());
        vocabularyAnalyzerResult.setAllAdditional(hashSet4);
        return vocabularyAnalyzerResult;
    }

    private static Triplet<Set<String>, Integer, Boolean> getModelVocabulary(String str) {
        return getModelVocabulary(new File(str));
    }

    private static Triplet<Set<String>, Integer, Boolean> getModelVocabulary(File file) {
        Triplet<Set<String>, Integer, Boolean> at2;
        Triplet triplet = new Triplet(null, -1, false);
        if (!file.exists()) {
            LOGGER.error("The provided model file (" + file.getAbsolutePath() + ") does not exist. ABORTING operation.");
        }
        if (file.isDirectory()) {
            LOGGER.error("The provided model file (" + file.getAbsolutePath() + ") is a directory. ABORTING operation.");
        }
        if (file.getAbsolutePath().endsWith(".txt")) {
            return readTextVectorFile(file);
        }
        Set<String> vocabularyTerms = Gensim.getInstance().getVocabularyTerms(file.getAbsolutePath());
        if (vocabularyTerms != null) {
            Triplet at0 = triplet.setAt0(vocabularyTerms);
            System.out.println(at0.getValue0());
            if (((Set) at0.getValue0()).size() > 1) {
                at2 = at0.setAt1(Integer.valueOf(Gensim.getInstance().getVector((String) ((Set) at0.getValue0()).stream().findFirst().get(), file.getAbsolutePath()).length)).setAt2(true);
            } else {
                at2 = at0.setAt1(-1).setAt2(false);
            }
        } else {
            at2 = triplet.setAt1(-1).setAt2(false);
        }
        return at2;
    }

    static Triplet<Set<String>, Integer, Boolean> readTextVectorFile(File file) {
        int i = -1;
        boolean z = true;
        HashSet hashSet = new HashSet();
        try {
            try {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8));
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        return new Triplet<>(hashSet, Integer.valueOf(i), Boolean.valueOf(z));
                    }
                    String[] split = readLine.split(" ");
                    if (split.length > 1) {
                        hashSet.add(split[0]);
                        if (i == -1) {
                            i = split.length - 1;
                        } else if (i != split.length - 1) {
                            z = false;
                        }
                    } else {
                        LOGGER.warn("Problem while reading the following line: " + readLine);
                    }
                }
            } catch (FileNotFoundException e) {
                LOGGER.error("Error while trying to read text model file: FileNotFoundException", (Throwable) e);
                return new Triplet<>(hashSet, Integer.valueOf(i), Boolean.valueOf(z));
            } catch (IOException e2) {
                LOGGER.error("Error while trying to read text model file.", (Throwable) e2);
                return new Triplet<>(hashSet, Integer.valueOf(i), Boolean.valueOf(z));
            }
        } catch (Throwable th) {
            return new Triplet<>(hashSet, Integer.valueOf(i), Boolean.valueOf(z));
        }
    }
}
