package org.scify.jedai.similarityjoins.tokenbased;

import gnu.trove.list.TIntList;
import gnu.trove.list.array.TIntArrayList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.scify.jedai.datamodel.Comparison;
import org.scify.jedai.datamodel.EntityProfile;
import org.scify.jedai.datamodel.SimilarityPairs;
import org.scify.jedai.datamodel.joins.Category;

/* loaded from: input_file:org/scify/jedai/similarityjoins/tokenbased/PartEnumJoin.class */
public class PartEnumJoin extends AbstractTokenBasedJoin {
    private static final int MAX_LEN = 3300;
    private static final int MAX_CATEGORY = 100;
    private int categoryN;
    private float categoryTHRESHOLD;
    private int[] originalId;
    private Category[] helper;
    private List<Comparison> executedComparisons;
    private final List<String> attributeValues;
    private TIntList[] records;
    static final /* synthetic */ boolean $assertionsDisabled;

    public PartEnumJoin(float f) {
        super(f);
        this.attributeValues = new ArrayList();
    }

    @Override // org.scify.jedai.similarityjoins.AbstractSimilarityJoin
    public SimilarityPairs applyJoin() {
        init();
        this.helper = new Category[MAX_CATEGORY];
        int i = 1;
        this.categoryTHRESHOLD = this.threshold;
        for (int i2 = 0; i2 < MAX_CATEGORY; i2++) {
            this.helper[i2] = new Category(i, this.threshold, this.categoryN);
            i = this.helper[i2].e_len + 1;
            if (i > MAX_LEN) {
                break;
            }
        }
        convert_to_signature();
        return getSimilarityPairs(performJoin());
    }

    @Override // org.scify.jedai.utilities.IDocumentation
    public String getMethodInfo() {
        return getMethodName() + ": TO BE ADDED!.";
    }

    @Override // org.scify.jedai.utilities.IDocumentation
    public String getMethodName() {
        return "Part Enum Join";
    }

    private void init() {
        int i = 0;
        ArrayList arrayList = new ArrayList();
        Iterator<EntityProfile> it = this.profilesD1.iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            arrayList.add(new ImmutablePair(getAttributeValue(this.attributeNameD1, it.next()), Integer.valueOf(i2)));
        }
        if (this.isCleanCleanER) {
            Iterator<EntityProfile> it2 = this.profilesD2.iterator();
            while (it2.hasNext()) {
                int i3 = i;
                i++;
                arrayList.add(new ImmutablePair(getAttributeValue(this.attributeNameD2, it2.next()), Integer.valueOf(i3)));
            }
        }
        arrayList.sort(Comparator.comparingInt(pair -> {
            return ((String) pair.getKey()).split(" ").length;
        }));
        this.attributeValues.clear();
        this.originalId = new int[this.noOfEntities];
        this.records = new TIntList[this.noOfEntities];
        for (int i4 = 0; i4 < this.noOfEntities; i4++) {
            Pair pair2 = (Pair) arrayList.get(i4);
            this.attributeValues.add((String) pair2.getKey());
            this.originalId[i4] = ((Integer) pair2.getValue()).intValue();
            this.records[i4] = new TIntArrayList();
        }
        for (int i5 = 0; i5 < this.noOfEntities; i5++) {
            String trim = this.attributeValues.get(i5).trim();
            if (trim.length() >= 1) {
                for (String str : trim.split(" ")) {
                    this.records[i5].add(djbHash(str));
                }
                this.records[i5].sort();
            }
        }
    }

    private List<Comparison> performJoin() {
        return this.executedComparisons;
    }

    private int check_overlap(TIntList tIntList, TIntList tIntList2, int i) {
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        while (i2 < tIntList.size() && i3 < tIntList2.size()) {
            if (i4 + Math.min(tIntList.size() - i2, tIntList2.size() - i3) < i) {
                return -1;
            }
            if (tIntList.get(i2) == tIntList2.get(i3)) {
                i4++;
                i2++;
                i3++;
            } else if (tIntList.get(i2) < tIntList2.get(i3)) {
                i2++;
            } else {
                i3++;
            }
        }
        return i4;
    }

    private float verify(TIntList tIntList, TIntList tIntList2) {
        int check_overlap = check_overlap(tIntList, tIntList2, (int) Math.ceil(((this.categoryTHRESHOLD / (1.0f + this.categoryTHRESHOLD)) * (tIntList.size() + tIntList2.size())) - 1.0E-6d));
        if (check_overlap == -1) {
            return -1.0f;
        }
        return check_overlap / ((tIntList.size() + tIntList2.size()) - check_overlap);
    }

    void perform_join(int i, int i2, boolean[] zArr) {
        int i3 = -1;
        for (int i4 = 0; i4 < this.helper[i].N1; i4++) {
            for (TIntList tIntList : this.helper[i].subs) {
                i3++;
                TIntArrayList tIntArrayList = new TIntArrayList();
                for (int i5 = 0; i5 < tIntList.size(); i5++) {
                    int i6 = -1;
                    int i7 = 0;
                    while (true) {
                        if (i7 >= this.records[i2].size()) {
                            break;
                        }
                        if (this.records[i2].get(i7) >= this.helper[i].range_start[i4][tIntList.get(i5)]) {
                            i6 = i7;
                            break;
                        } else {
                            if (i7 == this.records[i2].size() - 1) {
                                i6 = i7 + 1;
                            }
                            i7++;
                        }
                    }
                    int i8 = -1;
                    int i9 = 0;
                    while (true) {
                        if (i9 >= this.records[i2].size()) {
                            break;
                        }
                        if (this.records[i2].get(i9) >= this.helper[i].range_end[i4][tIntList.get(i5)]) {
                            i8 = i9;
                            break;
                        } else {
                            if (i9 == this.records[i2].size() - 1) {
                                i8 = i9 + 1;
                            }
                            i9++;
                        }
                    }
                    while (i6 != i8) {
                        tIntArrayList.add(this.records[i2].get(i6));
                        i6++;
                    }
                }
                int hashCode = Arrays.hashCode(tIntArrayList.toArray());
                TIntArrayList tIntArrayList2 = this.helper[i].sig_map[i3].get(Integer.valueOf(hashCode)) == null ? new TIntArrayList() : (TIntList) this.helper[i].sig_map[i3].get(Integer.valueOf(hashCode));
                for (int i10 = 0; i10 < tIntArrayList2.size(); i10++) {
                    if ((!this.isCleanCleanER || ((this.originalId[i2] >= this.datasetDelimiter || this.originalId[i10] >= this.datasetDelimiter) && (this.datasetDelimiter > this.originalId[i2] || this.datasetDelimiter > this.originalId[i10]))) && !zArr[tIntArrayList2.get(i10)]) {
                        zArr[tIntArrayList2.get(i10)] = true;
                        float verify = verify(this.records[i10], this.records[i2]);
                        if (verify >= this.threshold) {
                            Comparison comparison = getComparison(this.originalId[i10], this.originalId[i2]);
                            comparison.setUtilityMeasure(verify);
                            this.executedComparisons.add(comparison);
                        }
                    }
                }
                if (tIntArrayList2.size() == 0 || tIntArrayList2.get(tIntArrayList2.size() - 1) != i2) {
                    tIntArrayList2.add(i2);
                }
                this.helper[i].sig_map[i3].put(Integer.valueOf(hashCode), tIntArrayList2);
            }
        }
    }

    void convert_to_signature() {
        this.executedComparisons = new ArrayList();
        boolean[] zArr = new boolean[this.records.length];
        for (int i = 0; i < zArr.length; i++) {
            zArr[i] = true;
        }
        for (int i2 = 0; i2 < this.records.length; i2++) {
            int i3 = 0;
            while (i3 < MAX_CATEGORY && (this.helper[i3].s_len > this.records[i2].size() || this.helper[i3].e_len < this.records[i2].size())) {
                i3++;
            }
            if (!$assertionsDisabled && i3 >= MAX_CATEGORY) {
                throw new AssertionError();
            }
            for (int i4 = 0; i4 < i2; i4++) {
                zArr[i4] = false;
            }
            perform_join(i3, i2, zArr);
            perform_join(i3 + 1, i2, zArr);
        }
    }

    static {
        $assertionsDisabled = !PartEnumJoin.class.desiredAssertionStatus();
    }
}
