package de.l3s.icrawl.contentanalysis;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import de.l3s.icrawl.contentanalysis.LanguageModel;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/l3s/icrawl/contentanalysis/LanguageModels.class */
public class LanguageModels {
    private static final Logger logger = LoggerFactory.getLogger(LanguageModels.class);
    private final Locale defaultLanguage;
    private final Map<Locale, LanguageModel> models;

    public LanguageModels(Locale locale, Map<String, Double> map, Locale locale2) {
        this(locale, ImmutableMap.of(locale, new LanguageModel(getAnalyzerForLanguage(locale), map)));
    }

    public LanguageModels(Locale locale, Map<Locale, LanguageModel> map) {
        this.models = map;
        this.defaultLanguage = locale;
    }

    public Locale getDefaultLanguage() {
        return this.defaultLanguage;
    }

    public double getSimilarity(Locale locale, String str, DocumentVector documentVector, LanguageModel.KeywordMatcher keywordMatcher) {
        Preconditions.checkArgument(!str.isEmpty(), "Document must have length > 0.");
        double cosineSimilarity = documentVector.cosineSimilarity(getLanguageModel(locale).buildDocumentVector(str, keywordMatcher));
        logger.trace("result: {}", Double.valueOf(cosineSimilarity));
        if (!Double.isInfinite(cosineSimilarity) && !Double.isNaN(cosineSimilarity)) {
            return cosineSimilarity;
        }
        logger.debug("Got NaN similarity for input '{}'@{}: {}", new Object[]{str, locale, Double.valueOf(cosineSimilarity)});
        return 0.0d;
    }

    LanguageModel getLanguageModel(Locale locale) {
        LanguageModel languageModel = this.models.get(locale);
        if (languageModel == null) {
            languageModel = this.models.get(this.defaultLanguage);
        }
        if (languageModel == null) {
            throw new IllegalArgumentException("Could not find model for language " + locale);
        }
        return languageModel;
    }

    static Analyzer getAnalyzerForLanguage(Locale locale) {
        if (Locale.GERMAN.equals(locale)) {
            return new GermanAnalyzer(CharArraySet.EMPTY_SET);
        }
        if (Locale.ENGLISH.equals(locale)) {
            return new EnglishAnalyzer();
        }
        if (Locale.ITALIAN.equals(locale)) {
            return new ItalianAnalyzer();
        }
        if (Locale.FRENCH.equals(locale)) {
            return new FrenchAnalyzer();
        }
        throw new IllegalArgumentException("Could not find model for language " + locale);
    }

    public LanguageModel.KeywordMatcher buildMatcher(Locale locale, Iterable<String> iterable, int i) {
        return getLanguageModel(locale).buildMatcher(iterable, i);
    }

    public DocumentVector buildDocumentVector(Locale locale, String str, LanguageModel.KeywordMatcher keywordMatcher) {
        return getLanguageModel(locale).buildDocumentVector(str, keywordMatcher);
    }

    public static Map<String, Double> readIdfDictionary(InputStream inputStream) throws IOException {
        Pattern compile = Pattern.compile("\\s+");
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8), 8048);
        Throwable th = null;
        try {
            try {
                ImmutableMap.Builder builder = ImmutableMap.builder();
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    String[] split = compile.split(readLine, 2);
                    builder.put(split[0], Double.valueOf(split[1]));
                }
                ImmutableMap build = builder.build();
                if (bufferedReader != null) {
                    if (0 != 0) {
                        try {
                            bufferedReader.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        bufferedReader.close();
                    }
                }
                return build;
            } finally {
            }
        } catch (Throwable th3) {
            if (bufferedReader != null) {
                if (th != null) {
                    try {
                        bufferedReader.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    bufferedReader.close();
                }
            }
            throw th3;
        }
    }

    public static LanguageModel readLanguageModel(Locale locale, InputStream inputStream) throws IOException {
        return new LanguageModel(getAnalyzerForLanguage(locale), readIdfDictionary(inputStream));
    }
}
