package org.deeplearning4j.spark.text;

import java.util.HashSet;
import java.util.List;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.broadcast.Broadcast;
import org.deeplearning4j.berkeley.Pair;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.text.movingwindow.Util;

/* loaded from: input_file:org/deeplearning4j/spark/text/VocabCacheFunction.class */
public class VocabCacheFunction implements Function<Pair<List<String>, Long>, Pair<VocabCache, Long>> {
    private int minWordFrequency;
    private VocabCache vocab;
    private Broadcast<List<String>> stopWords;

    public VocabCacheFunction(int i, VocabCache vocabCache, Broadcast<List<String>> broadcast) {
        this.minWordFrequency = 5;
        this.minWordFrequency = i;
        this.vocab = vocabCache;
        this.stopWords = broadcast;
    }

    public Pair<VocabCache, Long> call(Pair<List<String>, Long> pair) throws Exception {
        VocabWord vocabWord;
        HashSet hashSet = new HashSet();
        long longValue = ((Long) pair.getSecond()).longValue() + ((List) pair.getFirst()).size();
        for (String str : (List) pair.getFirst()) {
            if (((List) this.stopWords.getValue()).contains(str)) {
                str = "STOP";
            }
            if (!str.isEmpty()) {
                String str2 = str;
                if (str.isEmpty()) {
                    str = str2;
                }
                this.vocab.incrementWordCount(str);
                if (!hashSet.contains(str)) {
                    this.vocab.incrementDocCount(str, 1);
                    hashSet.add(str);
                }
                if (this.vocab.hasToken(str)) {
                    vocabWord = this.vocab.tokenFor(str);
                } else {
                    vocabWord = new VocabWord(1.0d, str);
                    this.vocab.addToken(vocabWord);
                }
                if (!Util.matchesAnyStopWord((List) this.stopWords.getValue(), str) && str != null && !str.isEmpty()) {
                    if (!this.vocab.containsWord(str) && this.vocab.wordFrequency(str) >= this.minWordFrequency) {
                        vocabWord.setIndex(this.vocab.numWords());
                        this.vocab.putVocabWord(str);
                    } else if (Util.matchesAnyStopWord((List) this.stopWords.getValue(), str) && str != null && !str.isEmpty() && !this.vocab.containsWord("STOP") && this.vocab.wordFrequency("STOP") >= this.minWordFrequency) {
                        vocabWord.setIndex(this.vocab.numWords());
                        this.vocab.putVocabWord("STOP");
                    }
                }
            }
        }
        return new Pair<>(this.vocab, Long.valueOf(longValue));
    }
}
