package org.scify.jedai.textmodels;

import gnu.trove.map.hash.TObjectIntHashMap;
import java.util.HashSet;
import java.util.Set;
import org.scify.jedai.utilities.enumerations.RepresentationModel;
import org.scify.jedai.utilities.enumerations.SimilarityMetric;

/* loaded from: input_file:org/scify/jedai/textmodels/MinHashUnigrams.class */
public class MinHashUnigrams extends TokenNGramsWithGlobalWeights {
    protected final Set<String> termsList;

    public MinHashUnigrams(String str) {
        super(0, 1, RepresentationModel.TOKEN_UNIGRAMS, SimilarityMetric.JACCARD_SIMILARITY, str);
        this.termsList = new HashSet();
    }

    @Override // org.scify.jedai.textmodels.TokenNGramsWithGlobalWeights, org.scify.jedai.textmodels.BagModel, org.scify.jedai.textmodels.ITextModel
    public void finalizeModel() {
        if (DOC_FREQ[this.datasetId] == null) {
            DOC_FREQ[this.datasetId] = new TObjectIntHashMap();
        }
        this.termsList.forEach(str -> {
            DOC_FREQ[0].putIfAbsent(str, DOC_FREQ[0].size());
        });
    }

    public static int getCorpusDimensionality() {
        return DOC_FREQ[0].size();
    }

    public Set<Integer> getTermIds() {
        HashSet hashSet = new HashSet();
        this.termsList.forEach(str -> {
            hashSet.add(Integer.valueOf(DOC_FREQ[0].get(str)));
        });
        return hashSet;
    }

    @Override // org.scify.jedai.textmodels.TokenNGrams, org.scify.jedai.textmodels.ITextModel
    public void updateModel(String str) {
        String str2;
        for (String str3 : str.toLowerCase().split("[\\W_]")) {
            int i = 0;
            do {
                i++;
                str2 = str3 + "#" + i;
            } while (this.termsList.contains(str2));
            this.termsList.add(str2);
        }
    }
}
