package com.databricks.labs.automl.model.tools.split;

import com.databricks.labs.automl.utils.SparkSessionWrapper;
import java.util.UUID;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.expressions.Window$;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.storage.StorageLevel$;
import scala.Array$;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Some;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.SeqLike;
import scala.collection.mutable.StringBuilder;
import scala.math.package$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;
import scala.util.Random$;

/* compiled from: SplitOperators.scala */
/* loaded from: input_file:com/databricks/labs/automl/model/tools/split/SplitOperators$.class */
public final class SplitOperators$ implements SparkSessionWrapper {
    public static final SplitOperators$ MODULE$ = null;
    private final transient Logger com$databricks$labs$automl$model$tools$split$SplitOperators$$logger;
    private final SparkSession spark;
    private final SparkContext sc;
    private volatile byte bitmap$0;

    static {
        new SplitOperators$();
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v7 */
    private SparkSession spark$lzycompute() {
        ?? r0 = this;
        synchronized (r0) {
            if (((byte) (this.bitmap$0 & 1)) == 0) {
                this.spark = SparkSessionWrapper.Cclass.spark(this);
                this.bitmap$0 = (byte) (this.bitmap$0 | 1);
            }
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
            r0 = r0;
            return this.spark;
        }
    }

    @Override // com.databricks.labs.automl.utils.SparkSessionWrapper
    public SparkSession spark() {
        return ((byte) (this.bitmap$0 & 1)) == 0 ? spark$lzycompute() : this.spark;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v7 */
    private SparkContext sc$lzycompute() {
        ?? r0 = this;
        synchronized (r0) {
            if (((byte) (this.bitmap$0 & 2)) == 0) {
                this.sc = SparkSessionWrapper.Cclass.sc(this);
                this.bitmap$0 = (byte) (this.bitmap$0 | 2);
            }
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
            r0 = r0;
            return this.sc;
        }
    }

    @Override // com.databricks.labs.automl.utils.SparkSessionWrapper
    public SparkContext sc() {
        return ((byte) (this.bitmap$0 & 2)) == 0 ? sc$lzycompute() : this.sc;
    }

    public Logger com$databricks$labs$automl$model$tools$split$SplitOperators$$logger() {
        return this.com$databricks$labs$automl$model$tools$split$SplitOperators$$logger;
    }

    public Option<Object> com$databricks$labs$automl$model$tools$split$SplitOperators$$toDoubleType(Object obj) {
        return obj instanceof Integer ? new Some(BoxesRunTime.boxToDouble(BoxesRunTime.unboxToInt(obj))) : obj instanceof Double ? new Some(BoxesRunTime.boxToDouble(BoxesRunTime.unboxToDouble(obj))) : None$.MODULE$;
    }

    private Tuple2<Dataset<Row>, Dataset<Row>> generateEmptyTrainTest(StructType structType) {
        return new Tuple2<>(spark().createDataFrame(sc().emptyRDD(ClassTag$.MODULE$.apply(Row.class)), structType), spark().createDataFrame(sc().emptyRDD(ClassTag$.MODULE$.apply(Row.class)), structType));
    }

    public Dataset<Row>[] stratifiedSplit(Dataset<Row> dataset, long j, Row[] rowArr, String str, double d) {
        com$databricks$labs$automl$model$tools$split$SplitOperators$$logger().log(Level.DEBUG, "DEBUG: Generating empty train/test split sets");
        Tuple2<Dataset<Row>, Dataset<Row>> generateEmptyTrainTest = generateEmptyTrainTest(dataset.schema());
        if (generateEmptyTrainTest == null) {
            throw new MatchError(generateEmptyTrainTest);
        }
        Tuple2 tuple2 = new Tuple2((Dataset) generateEmptyTrainTest._1(), (Dataset) generateEmptyTrainTest._2());
        ObjectRef create = ObjectRef.create((Dataset) tuple2._1());
        ObjectRef create2 = ObjectRef.create((Dataset) tuple2._2());
        Predef$.MODULE$.refArrayOps(rowArr).foreach(new SplitOperators$$anonfun$stratifiedSplit$1(dataset, j, str, d, create, create2));
        return new Dataset[]{(Dataset) create.elem, (Dataset) create2.elem};
    }

    public Dataset<Row>[] underSampleSplit(Dataset<Row> dataset, long j, String str, double d) {
        com$databricks$labs$automl$model$tools$split$SplitOperators$$logger().log(Level.DEBUG, "DEBUG: Generating empty train/test split sets");
        Tuple2<Dataset<Row>, Dataset<Row>> generateEmptyTrainTest = generateEmptyTrainTest(dataset.schema());
        if (generateEmptyTrainTest == null) {
            throw new MatchError(generateEmptyTrainTest);
        }
        Tuple2 tuple2 = new Tuple2((Dataset) generateEmptyTrainTest._1(), (Dataset) generateEmptyTrainTest._2());
        ObjectRef create = ObjectRef.create((Dataset) tuple2._1());
        ObjectRef create2 = ObjectRef.create((Dataset) tuple2._2());
        Dataset select = dataset.select(str, Predef$.MODULE$.wrapRefArray(new String[0])).groupBy(str, Predef$.MODULE$.wrapRefArray(new String[0])).agg(functions$.MODULE$.count("*").as("counts"), Predef$.MODULE$.wrapRefArray(new Column[0])).withColumn("skew", functions$.MODULE$.col("counts").$div(functions$.MODULE$.lit(BoxesRunTime.boxToLong(dataset.count())))).select(str, Predef$.MODULE$.wrapRefArray(new String[]{"skew"}));
        Predef$.MODULE$.refArrayOps((Row[]) select.collect()).foreach(new SplitOperators$$anonfun$underSampleSplit$1(dataset, j, str, d, create, create2, ((Row) select.sort(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("skew").asc()})).select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("skew")})).first()).getDouble(0)));
        com$databricks$labs$automl$model$tools$split$SplitOperators$$logger().log(Level.DEBUG, "DEBUG: returning train & test datasets");
        return new Dataset[]{(Dataset) create.elem, (Dataset) create2.elem};
    }

    public Dataset<Row>[] overSampleSplit(Dataset<Row> dataset, long j, String str, double d) {
        com$databricks$labs$automl$model$tools$split$SplitOperators$$logger().log(Level.DEBUG, "DEBUG: Generating empty train/test split sets");
        Tuple2<Dataset<Row>, Dataset<Row>> generateEmptyTrainTest = generateEmptyTrainTest(dataset.schema());
        if (generateEmptyTrainTest == null) {
            throw new MatchError(generateEmptyTrainTest);
        }
        Tuple2 tuple2 = new Tuple2((Dataset) generateEmptyTrainTest._1(), (Dataset) generateEmptyTrainTest._2());
        ObjectRef create = ObjectRef.create((Dataset) tuple2._1());
        ObjectRef create2 = ObjectRef.create((Dataset) tuple2._2());
        Dataset agg = dataset.select(str, Predef$.MODULE$.wrapRefArray(new String[0])).groupBy(str, Predef$.MODULE$.wrapRefArray(new String[0])).agg(functions$.MODULE$.count("*").as("counts"), Predef$.MODULE$.wrapRefArray(new Column[0]));
        Predef$.MODULE$.refArrayOps((Row[]) agg.collect()).foreach(new SplitOperators$$anonfun$overSampleSplit$1(dataset, j, str, d, create, create2, ((Row) agg.sort(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("counts").desc()})).select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("counts")})).first()).getLong(0)));
        com$databricks$labs$automl$model$tools$split$SplitOperators$$logger().log(Level.DEBUG, "DEBUG: returning train & test datasets");
        return new Dataset[]{(Dataset) create.elem, (Dataset) create2.elem};
    }

    public Dataset<Row>[] stratifyReduce(Dataset<Row> dataset, double d, long j, Row[] rowArr, String str, double d2) {
        com$databricks$labs$automl$model$tools$split$SplitOperators$$logger().log(Level.DEBUG, "DEBUG: Generating empty train/test split sets");
        Tuple2<Dataset<Row>, Dataset<Row>> generateEmptyTrainTest = generateEmptyTrainTest(dataset.schema());
        if (generateEmptyTrainTest == null) {
            throw new MatchError(generateEmptyTrainTest);
        }
        Tuple2 tuple2 = new Tuple2((Dataset) generateEmptyTrainTest._1(), (Dataset) generateEmptyTrainTest._2());
        ObjectRef create = ObjectRef.create((Dataset) tuple2._1());
        ObjectRef create2 = ObjectRef.create((Dataset) tuple2._2());
        Predef$.MODULE$.refArrayOps(rowArr).foreach(new SplitOperators$$anonfun$stratifyReduce$1(dataset, d, j, str, d2, create, create2));
        com$databricks$labs$automl$model$tools$split$SplitOperators$$logger().log(Level.DEBUG, "DEBUG: returning train & test datasets");
        return new Dataset[]{(Dataset) create.elem, (Dataset) create2.elem};
    }

    public Dataset<Row>[] chronologicalSplit(Dataset<Row> dataset, long j, String str, double d, double d2) {
        Predef$.MODULE$.require(Predef$.MODULE$.refArrayOps(dataset.schema().fieldNames()).contains(str), new SplitOperators$$anonfun$chronologicalSplit$1(dataset, str));
        if (d > 0.0d) {
            Predef$.MODULE$.require(((((double) 1) - d2) * d) / ((double) 100) < 0.5d, new SplitOperators$$anonfun$chronologicalSplit$2(d, d2));
        }
        double count = dataset.count();
        int round = (int) package$.MODULE$.round(count * d2);
        int nextInt = d <= 0.0d ? round : round - Random$.MODULE$.nextInt((int) package$.MODULE$.round(((count * (1 - d2)) * d) / 100));
        String stringBuilder = new StringBuilder().append("chron_grp_autoML_").append(UUID.randomUUID().toString()).toString();
        String stringBuilder2 = new StringBuilder().append("row_").append(UUID.randomUUID().toString()).toString();
        Dataset drop = dataset.withColumn(stringBuilder, functions$.MODULE$.lit("grp")).withColumn(stringBuilder2, functions$.MODULE$.row_number().over(Window$.MODULE$.partitionBy(stringBuilder, Predef$.MODULE$.wrapRefArray(new String[0])).orderBy(str, Predef$.MODULE$.wrapRefArray(new String[0])))).drop(stringBuilder);
        com$databricks$labs$automl$model$tools$split$SplitOperators$$logger().log(Level.DEBUG, "DEBUG: returning train & test datasets");
        return new Dataset[]{drop.filter(functions$.MODULE$.col(stringBuilder2).$less$eq(BoxesRunTime.boxToInteger(nextInt))).drop(stringBuilder2), drop.filter(functions$.MODULE$.col(stringBuilder2).$greater(BoxesRunTime.boxToInteger(nextInt))).drop(stringBuilder2)};
    }

    public Dataset<Row>[] kSamplingSplit(Dataset<Row> dataset, long j, Row[] rowArr, String str, String str2, double d) {
        com$databricks$labs$automl$model$tools$split$SplitOperators$$logger().log(Level.DEBUG, "DEBUG: generating KSample data sets");
        Dataset<Row> filter = dataset.filter(functions$.MODULE$.col(str).unary_$bang());
        Dataset<Row> filter2 = dataset.filter(functions$.MODULE$.col(str));
        Dataset<Row>[] stratifiedSplit = stratifiedSplit(filter, j, rowArr, str2, d);
        Option unapplySeq = Array$.MODULE$.unapplySeq(stratifiedSplit);
        if (unapplySeq.isEmpty() || unapplySeq.get() == null || ((SeqLike) unapplySeq.get()).lengthCompare(2) != 0) {
            throw new MatchError(stratifiedSplit);
        }
        Tuple2 tuple2 = new Tuple2((Dataset) ((SeqLike) unapplySeq.get()).apply(0), (Dataset) ((SeqLike) unapplySeq.get()).apply(1));
        Dataset dataset2 = (Dataset) tuple2._1();
        Dataset<Row> dataset3 = (Dataset) tuple2._2();
        Dataset<Row>[] stratifiedSplit2 = stratifiedSplit(filter2, j, rowArr, str2, d);
        Option unapplySeq2 = Array$.MODULE$.unapplySeq(stratifiedSplit2);
        if (unapplySeq2.isEmpty() || unapplySeq2.get() == null || ((SeqLike) unapplySeq2.get()).lengthCompare(2) != 0) {
            throw new MatchError(stratifiedSplit2);
        }
        Tuple2 tuple22 = new Tuple2((Dataset) ((SeqLike) unapplySeq2.get()).apply(0), (Dataset) ((SeqLike) unapplySeq2.get()).apply(1));
        Dataset dataset4 = (Dataset) tuple22._1();
        com$databricks$labs$automl$model$tools$split$SplitOperators$$logger().log(Level.DEBUG, "DEBUG: returning data sets augmented with KSample synthetic data");
        return new Dataset[]{dataset2.union(dataset4), dataset3};
    }

    public Dataset<Row>[] genTestTrain(Dataset<Row> dataset, long j, Row[] rowArr, String str, String str2, double d, String str3, String str4, double d2, double d3) {
        Dataset<Row>[] kSamplingSplit;
        com$databricks$labs$automl$model$tools$split$SplitOperators$$logger().log(Level.DEBUG, new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"DEBUG: Split Method: ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str})));
        if ("random".equals(str)) {
            kSamplingSplit = dataset.randomSplit(new double[]{d, 1 - d}, j);
        } else if ("chronological".equals(str)) {
            kSamplingSplit = chronologicalSplit(dataset, j, str4, d2, d);
        } else if ("stratified".equals(str)) {
            kSamplingSplit = stratifiedSplit(dataset, j, rowArr, str2, d);
        } else if ("overSample".equals(str)) {
            kSamplingSplit = overSampleSplit(dataset, j, str2, d);
        } else if ("underSample".equals(str)) {
            kSamplingSplit = underSampleSplit(dataset, j, str2, d);
        } else if ("stratifyReduce".equals(str)) {
            kSamplingSplit = stratifyReduce(dataset, d3, j, rowArr, str2, d);
        } else {
            if (!"kSample".equals(str)) {
                throw new IllegalArgumentException(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Cannot conduct train test split in mode: '", "'"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str})));
            }
            kSamplingSplit = kSamplingSplit(dataset, j, rowArr, str3, str2, d);
        }
        return kSamplingSplit;
    }

    public String genTestTrain$default$7() {
        return "syntheticColumn";
    }

    public String genTestTrain$default$8() {
        return "datetime";
    }

    public double genTestTrain$default$9() {
        return 0.05d;
    }

    public double genTestTrain$default$10() {
        return 0.5d;
    }

    public Tuple2<Dataset<Row>, Dataset<Row>> optimizeTestTrain(Dataset<Row> dataset, Dataset<Row> dataset2, int i, boolean z) {
        com$databricks$labs$automl$model$tools$split$SplitOperators$$logger().log(Level.DEBUG, new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"DEBUG: Train persist called. Shuffle = ", ". Optimal parts: ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToBoolean(z), BoxesRunTime.boxToInteger(i)})));
        Dataset persist = z ? dataset.repartition(i).persist(StorageLevel$.MODULE$.DISK_ONLY()) : dataset.coalesce(i).persist(StorageLevel$.MODULE$.DISK_ONLY());
        com$databricks$labs$automl$model$tools$split$SplitOperators$$logger().log(Level.DEBUG, new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"DEBUG: Test persist called. Shuffle = ", ". Optimal parts: ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToBoolean(z), BoxesRunTime.boxToInteger(i)})));
        Dataset persist2 = z ? dataset2.repartition(i).persist(StorageLevel$.MODULE$.DISK_ONLY()) : dataset2.coalesce(i).persist(StorageLevel$.MODULE$.DISK_ONLY());
        com$databricks$labs$automl$model$tools$split$SplitOperators$$logger().log(Level.DEBUG, "DEBUG: Forcing the persist for Train");
        persist.foreach(new SplitOperators$$anonfun$optimizeTestTrain$1());
        com$databricks$labs$automl$model$tools$split$SplitOperators$$logger().log(Level.DEBUG, "DEBUG: Forcing the persist for Test");
        persist2.foreach(new SplitOperators$$anonfun$optimizeTestTrain$2());
        return new Tuple2<>(persist, persist2);
    }

    public boolean optimizeTestTrain$default$4() {
        return false;
    }

    private Object readResolve() {
        return MODULE$;
    }

    private SplitOperators$() {
        MODULE$ = this;
        SparkSessionWrapper.Cclass.$init$(this);
        this.com$databricks$labs$automl$model$tools$split$SplitOperators$$logger = Logger.getLogger(getClass());
    }
}
