package org.apache.flink.api.java.utils;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.common.distributions.DataDistribution;
import org.apache.flink.api.common.functions.BroadcastVariableInitializer;
import org.apache.flink.api.common.functions.MapPartitionFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.functions.RichMapPartitionFunction;
import org.apache.flink.api.common.operators.Keys;
import org.apache.flink.api.common.operators.base.PartitionOperatorBase;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.Utils;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.functions.SampleInCoordinator;
import org.apache.flink.api.java.functions.SampleInPartition;
import org.apache.flink.api.java.functions.SampleWithFraction;
import org.apache.flink.api.java.operators.GroupReduceOperator;
import org.apache.flink.api.java.operators.MapPartitionOperator;
import org.apache.flink.api.java.operators.Operator;
import org.apache.flink.api.java.operators.PartitionOperator;
import org.apache.flink.api.java.summarize.aggregation.SummaryAggregatorFactory;
import org.apache.flink.api.java.summarize.aggregation.TupleSummaryAggregator;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.TupleTypeInfoBase;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.util.AbstractID;
import org.apache.flink.util.Collector;
import org.apache.hadoop.hbase.HConstants;

@PublicEvolving
/* loaded from: input_file:org/apache/flink/api/java/utils/DataSetUtils.class */
public final class DataSetUtils {
    public static <T> DataSet<Tuple2<Integer, Long>> countElementsPerPartition(DataSet<T> dataSet) {
        return dataSet.mapPartition(new RichMapPartitionFunction<T, Tuple2<Integer, Long>>() { // from class: org.apache.flink.api.java.utils.DataSetUtils.1
            public void mapPartition(Iterable<T> iterable, Collector<Tuple2<Integer, Long>> collector) throws Exception {
                long j = 0;
                for (T t : iterable) {
                    j++;
                }
                collector.collect(new Tuple2(Integer.valueOf(getRuntimeContext().getIndexOfThisSubtask()), Long.valueOf(j)));
            }
        });
    }

    public static <T> DataSet<Tuple2<Long, T>> zipWithIndex(DataSet<T> dataSet) {
        return dataSet.mapPartition(new RichMapPartitionFunction<T, Tuple2<Long, T>>() { // from class: org.apache.flink.api.java.utils.DataSetUtils.2
            long start = 0;

            public void open(Configuration configuration) throws Exception {
                super.open(configuration);
                List list = (List) getRuntimeContext().getBroadcastVariableWithInitializer("counts", new BroadcastVariableInitializer<Tuple2<Integer, Long>, List<Tuple2<Integer, Long>>>() { // from class: org.apache.flink.api.java.utils.DataSetUtils.2.1
                    public List<Tuple2<Integer, Long>> initializeBroadcastVariable(Iterable<Tuple2<Integer, Long>> iterable) {
                        ArrayList arrayList = new ArrayList();
                        Iterator<Tuple2<Integer, Long>> it = iterable.iterator();
                        while (it.hasNext()) {
                            arrayList.add(it.next());
                        }
                        Collections.sort(arrayList, new Comparator<Tuple2<Integer, Long>>() { // from class: org.apache.flink.api.java.utils.DataSetUtils.2.1.1
                            @Override // java.util.Comparator
                            public int compare(Tuple2<Integer, Long> tuple2, Tuple2<Integer, Long> tuple22) {
                                return ((Integer) tuple2.f0).compareTo((Integer) tuple22.f0);
                            }
                        });
                        return arrayList;
                    }

                    /* renamed from: initializeBroadcastVariable, reason: collision with other method in class */
                    public /* bridge */ /* synthetic */ Object m4426initializeBroadcastVariable(Iterable iterable) {
                        return initializeBroadcastVariable((Iterable<Tuple2<Integer, Long>>) iterable);
                    }
                });
                for (int i = 0; i < getRuntimeContext().getIndexOfThisSubtask(); i++) {
                    this.start += ((Long) ((Tuple2) list.get(i)).f1).longValue();
                }
            }

            public void mapPartition(Iterable<T> iterable, Collector<Tuple2<Long, T>> collector) throws Exception {
                for (T t : iterable) {
                    long j = this.start;
                    this.start = j + 1;
                    collector.collect(new Tuple2(Long.valueOf(j), t));
                }
            }
        }).withBroadcastSet(countElementsPerPartition(dataSet), "counts");
    }

    public static <T> DataSet<Tuple2<Long, T>> zipWithUniqueId(DataSet<T> dataSet) {
        return dataSet.mapPartition(new RichMapPartitionFunction<T, Tuple2<Long, T>>() { // from class: org.apache.flink.api.java.utils.DataSetUtils.3
            long maxBitSize = DataSetUtils.getBitSize(HConstants.LATEST_TIMESTAMP);
            long shifter = 0;
            long start = 0;
            long taskId = 0;
            long label = 0;

            public void open(Configuration configuration) throws Exception {
                super.open(configuration);
                this.shifter = DataSetUtils.getBitSize(getRuntimeContext().getNumberOfParallelSubtasks() - 1);
                this.taskId = getRuntimeContext().getIndexOfThisSubtask();
            }

            public void mapPartition(Iterable<T> iterable, Collector<Tuple2<Long, T>> collector) throws Exception {
                for (T t : iterable) {
                    this.label = (this.start << ((int) this.shifter)) + this.taskId;
                    if (DataSetUtils.getBitSize(this.start) + this.shifter >= this.maxBitSize) {
                        throw new Exception("Exceeded Long value range while generating labels");
                    }
                    collector.collect(new Tuple2(Long.valueOf(this.label), t));
                    this.start++;
                }
            }
        });
    }

    public static <T> MapPartitionOperator<T, T> sample(DataSet<T> dataSet, boolean z, double d) {
        return sample(dataSet, z, d, Utils.RNG.nextLong());
    }

    public static <T> MapPartitionOperator<T, T> sample(DataSet<T> dataSet, boolean z, double d, long j) {
        return (MapPartitionOperator<T, T>) dataSet.mapPartition(new SampleWithFraction(z, d, j));
    }

    public static <T> DataSet<T> sampleWithSize(DataSet<T> dataSet, boolean z, int i) {
        return sampleWithSize(dataSet, z, i, Utils.RNG.nextLong());
    }

    public static <T> DataSet<T> sampleWithSize(DataSet<T> dataSet, boolean z, int i, long j) {
        Operator mapPartition = dataSet.mapPartition(new SampleInPartition(z, i, j));
        String callLocationName = Utils.getCallLocationName();
        return new GroupReduceOperator(mapPartition, dataSet.getType(), new SampleInCoordinator(z, i, j), callLocationName);
    }

    public static <T> PartitionOperator<T> partitionByRange(DataSet<T> dataSet, DataDistribution dataDistribution, int... iArr) {
        return new PartitionOperator<>((DataSet) dataSet, PartitionOperatorBase.PartitionMethod.RANGE, (Keys) new Keys.ExpressionKeys(iArr, dataSet.getType(), false), dataDistribution, Utils.getCallLocationName());
    }

    public static <T> PartitionOperator<T> partitionByRange(DataSet<T> dataSet, DataDistribution dataDistribution, String... strArr) {
        return new PartitionOperator<>((DataSet) dataSet, PartitionOperatorBase.PartitionMethod.RANGE, (Keys) new Keys.ExpressionKeys(strArr, dataSet.getType()), dataDistribution, Utils.getCallLocationName());
    }

    public static <T, K extends Comparable<K>> PartitionOperator<T> partitionByRange(DataSet<T> dataSet, DataDistribution dataDistribution, KeySelector<T, K> keySelector) {
        return new PartitionOperator<>((DataSet) dataSet, PartitionOperatorBase.PartitionMethod.RANGE, (Keys) new Keys.SelectorFunctionKeys((KeySelector) dataSet.clean(keySelector), dataSet.getType(), TypeExtractor.getKeySelectorTypes(keySelector, dataSet.getType())), dataDistribution, Utils.getCallLocationName());
    }

    public static <R extends Tuple, T extends Tuple> R summarize(DataSet<T> dataSet) throws Exception {
        if (!dataSet.getType().isTupleType()) {
            throw new IllegalArgumentException("summarize() is only implemented for DataSet's of Tuples");
        }
        final TupleTypeInfoBase type = dataSet.getType();
        return (R) ((TupleSummaryAggregator) dataSet.mapPartition(new MapPartitionFunction<T, TupleSummaryAggregator<R>>() { // from class: org.apache.flink.api.java.utils.DataSetUtils.5
            public void mapPartition(Iterable<T> iterable, Collector<TupleSummaryAggregator<R>> collector) throws Exception {
                TupleSummaryAggregator create = SummaryAggregatorFactory.create((TupleTypeInfoBase<?>) type);
                Iterator<T> it = iterable.iterator();
                while (it.hasNext()) {
                    create.aggregate((Tuple) it.next());
                }
                collector.collect(create);
            }
        }).reduce(new ReduceFunction<TupleSummaryAggregator<R>>() { // from class: org.apache.flink.api.java.utils.DataSetUtils.4
            public TupleSummaryAggregator<R> reduce(TupleSummaryAggregator<R> tupleSummaryAggregator, TupleSummaryAggregator<R> tupleSummaryAggregator2) throws Exception {
                tupleSummaryAggregator.combine(tupleSummaryAggregator2);
                return tupleSummaryAggregator;
            }
        }).collect().get(0)).result();
    }

    @Deprecated
    public static <T> Utils.ChecksumHashCode checksumHashCode(DataSet<T> dataSet) throws Exception {
        String abstractID = new AbstractID().toString();
        dataSet.output(new Utils.ChecksumHashCodeHelper(abstractID)).name("ChecksumHashCode");
        return (Utils.ChecksumHashCode) dataSet.getExecutionEnvironment().execute().getAccumulatorResult(abstractID);
    }

    public static int getBitSize(long j) {
        return j > 2147483647L ? 64 - Integer.numberOfLeadingZeros((int) (j >> 32)) : 32 - Integer.numberOfLeadingZeros((int) j);
    }

    private DataSetUtils() {
        throw new RuntimeException();
    }
}
