package de.l3s.icrawl.contentanalysis;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Maps;
import com.google.common.collect.Multiset;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import java.io.Serializable;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;

/* loaded from: input_file:de/l3s/icrawl/contentanalysis/DocumentVector.class */
public class DocumentVector implements Serializable {
    private static final Ordering<Map.Entry<String, Double>> ORDER_BY_WEIGHT = Ordering.natural().onResultOf((v0) -> {
        return v0.getValue();
    });
    private static final long serialVersionUID = 1;

    @JsonProperty
    private final Map<String, Double> elements;
    private final double norm;

    @JsonCreator
    public DocumentVector(@JsonProperty("elements") Map<String, Double> map) {
        this.elements = (Map) Objects.requireNonNull(map);
        this.norm = norm(map);
    }

    public DocumentVector(Collection<String> collection) {
        this(toDocumentVector(collection));
    }

    private static Map<String, Double> toDocumentVector(Collection<String> collection) {
        HashMultiset create = HashMultiset.create(collection);
        HashMap newHashMapWithExpectedSize = Maps.newHashMapWithExpectedSize(create.elementSet().size());
        double size = collection.size();
        Iterator it = create.entrySet().iterator();
        while (it.hasNext()) {
            newHashMapWithExpectedSize.put(((Multiset.Entry) it.next()).getElement(), Double.valueOf(r0.getCount() / size));
        }
        return newHashMapWithExpectedSize;
    }

    public double cosineSimilarity(DocumentVector documentVector) {
        return dotProduct(documentVector) / (this.norm * documentVector.norm);
    }

    public double dotProduct(DocumentVector documentVector) {
        double d = 0.0d;
        Iterator it = Sets.intersection(this.elements.keySet(), documentVector.elements.keySet()).iterator();
        while (it.hasNext()) {
            String str = (String) it.next();
            d += this.elements.get(str).doubleValue() * documentVector.elements.get(str).doubleValue();
        }
        return d;
    }

    public static DocumentVector merge(Collection<DocumentVector> collection, boolean z) {
        Preconditions.checkArgument(!collection.isEmpty(), "Cannot merge zero vectors");
        HashMultiset create = HashMultiset.create(collection.size() * collection.iterator().next().elements.size());
        Iterator<DocumentVector> it = collection.iterator();
        while (it.hasNext()) {
            create.addAll(it.next().elements.keySet());
        }
        HashMap newHashMapWithExpectedSize = Maps.newHashMapWithExpectedSize(create.size());
        for (Multiset.Entry entry : create.entrySet()) {
            double d = 0.0d;
            Iterator<DocumentVector> it2 = collection.iterator();
            while (it2.hasNext()) {
                Double d2 = it2.next().elements.get(entry.getElement());
                if (d2 != null) {
                    d += d2.doubleValue();
                }
            }
            if (z) {
                d *= entry.getCount() / collection.size();
            }
            newHashMapWithExpectedSize.put(entry.getElement(), Double.valueOf(d));
        }
        return new DocumentVector(newHashMapWithExpectedSize);
    }

    private static <T> double norm(Map<T, Double> map) {
        double d = 0.0d;
        for (Double d2 : map.values()) {
            d += d2.doubleValue() * d2.doubleValue();
        }
        return Math.sqrt(d);
    }

    public DocumentVector topN(int i) {
        HashMap hashMap = new HashMap();
        for (Map.Entry<String, Double> entry : topComponents(i)) {
            hashMap.put(entry.getKey(), entry.getValue());
        }
        return new DocumentVector(hashMap);
    }

    public List<Map.Entry<String, Double>> topComponents(int i) {
        return ORDER_BY_WEIGHT.greatestOf(this.elements.entrySet(), i);
    }

    public int hashCode() {
        return Objects.hash(this.elements, Double.valueOf(this.norm));
    }

    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (obj == null || getClass() != obj.getClass()) {
            return false;
        }
        DocumentVector documentVector = (DocumentVector) obj;
        return Double.doubleToLongBits(this.norm) == Double.doubleToLongBits(documentVector.norm) && this.elements.equals(documentVector.elements);
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("{");
        Joiner.on(", ").withKeyValueSeparator(": ").appendTo(sb, ORDER_BY_WEIGHT.sortedCopy(this.elements.entrySet()));
        return sb.append("}").toString();
    }
}
