package org.scify.jedai.blockbuilding;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.jena.atlas.json.JsonArray;
import org.apache.jena.atlas.json.JsonObject;
import org.scify.jedai.configuration.gridsearch.DblGridSearchConfiguration;
import org.scify.jedai.configuration.randomsearch.DblRandomSearchConfiguration;
import org.scify.jedai.utilities.IConstants;

/* loaded from: input_file:org/scify/jedai/blockbuilding/ExtendedQGramsBlocking.class */
public class ExtendedQGramsBlocking extends QGramsBlocking {
    private static final int MAX_Q_GRAMS = 15;
    private float threshold;
    private final DblGridSearchConfiguration gridThreshold;
    private final DblRandomSearchConfiguration randomThreshold;

    public ExtendedQGramsBlocking() {
        this(0.95f, 6);
    }

    public ExtendedQGramsBlocking(float f, int i) {
        super(i);
        this.threshold = f;
        this.randomThreshold = new DblRandomSearchConfiguration(0.99f, 0.8f);
        this.gridThreshold = new DblGridSearchConfiguration(0.95f, 0.8f, 0.05f);
    }

    @Override // org.scify.jedai.blockbuilding.QGramsBlocking, org.scify.jedai.blockbuilding.StandardBlocking, org.scify.jedai.blockbuilding.AbstractBlockBuilding
    protected Set<String> getBlockingKeys(String str) {
        HashSet hashSet = new HashSet();
        for (String str2 : getTokens(str)) {
            List<String> nGrams = getNGrams(this.nGramSize, str2);
            if (nGrams.size() == 1) {
                hashSet.add(nGrams.get(0));
            } else {
                if (MAX_Q_GRAMS < nGrams.size()) {
                    nGrams = nGrams.subList(0, MAX_Q_GRAMS);
                }
                for (int max = (int) Math.max(1.0d, Math.floor(nGrams.size() * this.threshold)); max <= nGrams.size(); max++) {
                    hashSet.addAll(getCombinationsFor(nGrams, max));
                }
            }
        }
        return hashSet;
    }

    protected Set<String> getCombinationsFor(List<String> list, int i) {
        if (i == 0 || list.size() < i) {
            return new HashSet();
        }
        ArrayList arrayList = new ArrayList(list);
        String remove = arrayList.remove(list.size() - 1);
        Set<String> combinationsFor = getCombinationsFor(arrayList, i);
        Set<String> combinationsFor2 = getCombinationsFor(arrayList, i - 1);
        HashSet hashSet = new HashSet(combinationsFor);
        if (combinationsFor2.isEmpty()) {
            hashSet.add(remove);
        } else {
            combinationsFor2.stream().forEach(str -> {
                hashSet.add(str + remove);
            });
        }
        return hashSet;
    }

    @Override // org.scify.jedai.blockbuilding.QGramsBlocking, org.scify.jedai.blockbuilding.StandardBlocking, org.scify.jedai.utilities.IDocumentation
    public String getMethodConfiguration() {
        return getParameterName(0) + "=" + this.nGramSize + ",\t" + getParameterName(1) + "=" + this.threshold;
    }

    @Override // org.scify.jedai.blockbuilding.QGramsBlocking, org.scify.jedai.blockbuilding.StandardBlocking, org.scify.jedai.utilities.IDocumentation
    public String getMethodInfo() {
        return getMethodName() + ": it creates one block for every combination of q-grams that represents at least two entities.\nThe q-grams are extracted from any token in the attribute values of any entity.";
    }

    @Override // org.scify.jedai.blockbuilding.QGramsBlocking, org.scify.jedai.blockbuilding.StandardBlocking, org.scify.jedai.utilities.IDocumentation
    public String getMethodName() {
        return "Extended Q-Grams Blocking";
    }

    @Override // org.scify.jedai.blockbuilding.QGramsBlocking, org.scify.jedai.blockbuilding.StandardBlocking, org.scify.jedai.utilities.IDocumentation
    public String getMethodParameters() {
        return getMethodName() + " involves two parameters:\n1)" + getParameterDescription(0) + ".\n2)" + getParameterDescription(1) + ".";
    }

    @Override // org.scify.jedai.blockbuilding.QGramsBlocking, org.scify.jedai.blockbuilding.StandardBlocking, org.scify.jedai.configuration.IConfiguration
    public int getNumberOfGridConfigurations() {
        return this.gridNGSize.getNumberOfConfigurations() * this.gridThreshold.getNumberOfConfigurations();
    }

    @Override // org.scify.jedai.blockbuilding.QGramsBlocking, org.scify.jedai.blockbuilding.StandardBlocking, org.scify.jedai.utilities.IDocumentation
    public JsonArray getParameterConfiguration() {
        JsonObject jsonObject = new JsonObject();
        jsonObject.put("class", "java.lang.Integer");
        jsonObject.put("name", getParameterName(0));
        jsonObject.put("defaultValue", "6");
        jsonObject.put("minValue", "2");
        jsonObject.put("maxValue", "6");
        jsonObject.put("stepValue", "1");
        jsonObject.put("description", getParameterDescription(0));
        JsonObject jsonObject2 = new JsonObject();
        jsonObject2.put("class", "java.lang.Float");
        jsonObject2.put("name", getParameterName(1));
        jsonObject2.put("defaultValue", "0.95");
        jsonObject2.put("minValue", "0.8");
        jsonObject2.put("maxValue", "0.95");
        jsonObject2.put("stepValue", "0.05");
        jsonObject2.put("description", getParameterDescription(1));
        JsonArray jsonArray = new JsonArray();
        jsonArray.add(jsonObject);
        jsonArray.add(jsonObject2);
        return jsonArray;
    }

    @Override // org.scify.jedai.blockbuilding.QGramsBlocking, org.scify.jedai.blockbuilding.StandardBlocking, org.scify.jedai.utilities.IDocumentation
    public String getParameterDescription(int i) {
        switch (i) {
            case IConstants.DATASET_1 /* 0 */:
                return "The " + getParameterName(0) + " defines the number of characters that comprise every q-gram.";
            case IConstants.DATASET_2 /* 1 */:
                return "The " + getParameterName(1) + " (t) defines the number N of q-grams that are combined to form an individual blocking key.\nIn more detail, the minimum number l_{min} of q-grams per blocking key is defined as l_{min} = max (1, \\floor{k \\cdot t}),\nwhere k is the number of q-grams from the original blocking key (token).";
            default:
                return "invalid parameter id";
        }
    }

    @Override // org.scify.jedai.blockbuilding.QGramsBlocking, org.scify.jedai.blockbuilding.StandardBlocking, org.scify.jedai.utilities.IDocumentation
    public String getParameterName(int i) {
        switch (i) {
            case IConstants.DATASET_1 /* 0 */:
                return "Q-gram Size";
            case IConstants.DATASET_2 /* 1 */:
                return "Combination Threshold";
            default:
                return "invalid parameter id";
        }
    }

    @Override // org.scify.jedai.blockbuilding.QGramsBlocking, org.scify.jedai.blockbuilding.StandardBlocking, org.scify.jedai.configuration.IConfiguration
    public void setNextRandomConfiguration() {
        super.setNextRandomConfiguration();
        this.threshold = ((Float) this.randomThreshold.getNextRandomValue()).floatValue();
    }

    @Override // org.scify.jedai.blockbuilding.QGramsBlocking, org.scify.jedai.blockbuilding.StandardBlocking, org.scify.jedai.configuration.IConfiguration
    public void setNumberedGridConfiguration(int i) {
        int numberOfConfigurations = i / this.gridThreshold.getNumberOfConfigurations();
        this.nGramSize = ((Integer) this.gridNGSize.getNumberedValue(numberOfConfigurations)).intValue();
        this.threshold = ((Float) this.gridThreshold.getNumberedValue(i - (numberOfConfigurations * this.gridThreshold.getNumberOfConfigurations()))).floatValue();
    }

    @Override // org.scify.jedai.blockbuilding.QGramsBlocking, org.scify.jedai.blockbuilding.StandardBlocking, org.scify.jedai.configuration.IConfiguration
    public void setNumberedRandomConfiguration(int i) {
        super.setNumberedRandomConfiguration(i);
        this.threshold = ((Float) this.randomThreshold.getNumberedRandom(i)).floatValue();
    }
}
