package com.databricks.labs.automl.model.tools.split;

import com.databricks.labs.automl.model.tools.split.SplitUtilityTooling;
import com.databricks.labs.automl.model.tools.structures.TrainSplitReferences;
import com.databricks.labs.automl.model.tools.structures.TrainTestData;
import com.databricks.labs.automl.model.tools.structures.TrainTestPaths;
import com.databricks.labs.automl.utils.SparkSessionWrapper;
import org.apache.log4j.Logger;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import scala.Predef$;
import scala.StringContext;
import scala.collection.TraversableOnce;
import scala.collection.immutable.IndexedSeq$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.RichInt$;

/* compiled from: DataSplitUtility.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005ud\u0001B\u0001\u0003\u0001E\u0011\u0001\u0003R1uCN\u0003H.\u001b;Vi&d\u0017\u000e^=\u000b\u0005\r!\u0011!B:qY&$(BA\u0003\u0007\u0003\u0015!xn\u001c7t\u0015\t9\u0001\"A\u0003n_\u0012,GN\u0003\u0002\n\u0015\u00051\u0011-\u001e;p[2T!a\u0003\u0007\u0002\t1\f'm\u001d\u0006\u0003\u001b9\t!\u0002Z1uC\n\u0014\u0018nY6t\u0015\u0005y\u0011aA2p[\u000e\u00011c\u0001\u0001\u00131A\u00111CF\u0007\u0002))\tQ#A\u0003tG\u0006d\u0017-\u0003\u0002\u0018)\t1\u0011I\\=SK\u001a\u0004\"!\u0007\u000e\u000e\u0003\tI!a\u0007\u0002\u0003'M\u0003H.\u001b;Vi&d\u0017\u000e^=U_>d\u0017N\\4\t\u0011u\u0001!\u0011!Q\u0001\ny\t1\"\\1j]\u0012\u000bG/Y:fiB\u0011q$\u000e\b\u0003AIr!!I\u0018\u000f\u0005\tbcBA\u0012*\u001d\t!s%D\u0001&\u0015\t1\u0003#\u0001\u0004=e>|GOP\u0005\u0002Q\u0005\u0019qN]4\n\u0005)Z\u0013AB1qC\u000eDWMC\u0001)\u0013\tic&A\u0003ta\u0006\u00148N\u0003\u0002+W%\u0011\u0001'M\u0001\u0004gFd'BA\u0017/\u0013\t\u0019D'A\u0004qC\u000e\\\u0017mZ3\u000b\u0005A\n\u0014B\u0001\u001c8\u0005%!\u0015\r^1Ge\u0006lWM\u0003\u00024i!A\u0011\b\u0001B\u0001B\u0003%!(A\u0006l\u0013R,'/\u0019;j_:\u001c\bCA\n<\u0013\taDCA\u0002J]RD\u0001B\u0010\u0001\u0003\u0002\u0003\u0006IaP\u0001\fgBd\u0017\u000e^'fi\"|G\r\u0005\u0002A\u0007:\u00111#Q\u0005\u0003\u0005R\ta\u0001\u0015:fI\u00164\u0017B\u0001#F\u0005\u0019\u0019FO]5oO*\u0011!\t\u0006\u0005\t\u000f\u0002\u0011\t\u0011)A\u0005\u007f\u0005YA.\u00192fY\u000e{G.^7o\u0011!I\u0005A!A!\u0002\u0013y\u0014a\u0002:p_R$\u0015N\u001d\u0005\t\u0017\u0002\u0011\t\u0011)A\u0005\u007f\u0005Y\u0001/\u001a:tSN$Xj\u001c3f\u0011!i\u0005A!A!\u0002\u0013y\u0014aC7pI\u0016dg)Y7jYfD\u0001b\u0014\u0001\u0003\u0002\u0003\u0006IAO\u0001\fa\u0006\u0014\u0018\r\u001c7fY&\u001cX\u000e\u0003\u0005R\u0001\t\u0005\t\u0015!\u0003S\u00031!(/Y5o!>\u0014H/[8o!\t\u00192+\u0003\u0002U)\t1Ai\\;cY\u0016D\u0001B\u0016\u0001\u0003\u0002\u0003\u0006IaP\u0001\rgftG\u000f[3uS\u000e\u001cu\u000e\u001c\u0005\t1\u0002\u0011\t\u0011)A\u0005\u007f\u0005iBO]1j]N\u0003H.\u001b;DQJ|gn\u001c7pO&\u001c\u0017\r\\\"pYVlg\u000e\u0003\u0005[\u0001\t\u0005\t\u0015!\u0003S\u0003\u001d\"(/Y5o'Bd\u0017\u000e^\"ie>tw\u000e\\8hS\u000e\fGNU1oI>l\u0007+\u001a:dK:$\u0018mZ3\t\u0011q\u0003!\u0011!Q\u0001\nI\u000bqB]3ek\u000e$\u0018n\u001c8GC\u000e$xN\u001d\u0005\u0006=\u0002!\taX\u0001\u0007y%t\u0017\u000e\u001e \u0015\u001d\u0001\f'm\u00193fM\u001eD\u0017N[6m[B\u0011\u0011\u0004\u0001\u0005\u0006;u\u0003\rA\b\u0005\u0006su\u0003\rA\u000f\u0005\u0006}u\u0003\ra\u0010\u0005\u0006\u000fv\u0003\ra\u0010\u0005\u0006\u0013v\u0003\ra\u0010\u0005\u0006\u0017v\u0003\ra\u0010\u0005\u0006\u001bv\u0003\ra\u0010\u0005\u0006\u001fv\u0003\rA\u000f\u0005\u0006#v\u0003\rA\u0015\u0005\u0006-v\u0003\ra\u0010\u0005\u00061v\u0003\ra\u0010\u0005\u00065v\u0003\rA\u0015\u0005\u00069v\u0003\rA\u0015\u0005\b_\u0002\u0011\r\u0011\"\u0003q\u0003\u0019awnZ4feV\t\u0011\u000f\u0005\u0002sk6\t1O\u0003\u0002u]\u0005)An\\45U&\u0011ao\u001d\u0002\u0007\u0019><w-\u001a:\t\ra\u0004\u0001\u0015!\u0003r\u0003\u001dawnZ4fe\u0002B#a\u001e>\u0011\u0005MY\u0018B\u0001?\u0015\u0005%!(/\u00198tS\u0016tG\u000fC\u0004\u007f\u0001\t\u0007IQA@\u0002\u0019Ut\u0017.];f\u0019\u0006\u0014W\r\\:\u0016\u0005\u0005\u0005\u0001#B\n\u0002\u0004\u0005\u001d\u0011bAA\u0003)\t)\u0011I\u001d:bsB!\u0011\u0011BA\u0006\u001b\u0005!\u0014bAA\u0007i\t\u0019!k\\<\t\u0011\u0005E\u0001\u0001)A\u0007\u0003\u0003\tQ\"\u001e8jcV,G*\u00192fYN\u0004\u0003bBA\u000b\u0001\u0011%\u0011qC\u0001\u0012iJ\f\u0017N\\*qY&$\b+\u001a:tSN$XCAA\r!\u0015\u0019\u00121AA\u000e!\u0011\ti\"a\t\u000e\u0005\u0005}!bAA\u0011\t\u0005Q1\u000f\u001e:vGR,(/Z:\n\t\u0005\u0015\u0012q\u0004\u0002\u0015)J\f\u0017N\\*qY&$(+\u001a4fe\u0016t7-Z:\t\u000f\u0005%\u0002\u0001\"\u0003\u0002\u0018\u0005yAO]1j]N\u0003H.\u001b;DC\u000eDW\rC\u0004\u0002.\u0001!I!a\u0006\u0002\u001fQ\u0014\u0018-\u001b8Ta2LG\u000fR3mi\u0006Dq!!\r\u0001\t\u0003\t9\"\u0001\u0007qKJ4wN]7Ta2LGoB\u0004\u00026\tA\t!a\u000e\u0002!\u0011\u000bG/Y*qY&$X\u000b^5mSRL\bcA\r\u0002:\u00191\u0011A\u0001E\u0001\u0003w\u0019R!!\u000f\u0013\u0003{\u00012aEA \u0013\r\t\t\u0005\u0006\u0002\r'\u0016\u0014\u0018.\u00197ju\u0006\u0014G.\u001a\u0005\b=\u0006eB\u0011AA#)\t\t9\u0004C\u0004\u0004\u0003s!\t!!\u0013\u00159\u0005e\u00111JA(\u0003#\n\u0019&!\u0016\u0002X\u0005e\u00131LA/\u0003?\n\t'a\u0019\u0002f!9\u0011QJA$\u0001\u0004q\u0012aC7bS:$\u0015\r^1TKRDa!OA$\u0001\u0004Q\u0004B\u0002 \u0002H\u0001\u0007q\b\u0003\u0004H\u0003\u000f\u0002\ra\u0010\u0005\u0007\u0013\u0006\u001d\u0003\u0019A \t\r-\u000b9\u00051\u0001@\u0011\u0019i\u0015q\ta\u0001\u007f!1q*a\u0012A\u0002iBa!UA$\u0001\u0004\u0011\u0006B\u0002,\u0002H\u0001\u0007q\b\u0003\u0004Y\u0003\u000f\u0002\ra\u0010\u0005\u00075\u0006\u001d\u0003\u0019\u0001*\t\rq\u000b9\u00051\u0001S\u0011)\tI'!\u000f\u0002\u0002\u0013%\u00111N\u0001\fe\u0016\fGMU3t_24X\r\u0006\u0002\u0002nA!\u0011qNA=\u001b\t\t\tH\u0003\u0003\u0002t\u0005U\u0014\u0001\u00027b]\u001eT!!a\u001e\u0002\t)\fg/Y\u0005\u0005\u0003w\n\tH\u0001\u0004PE*,7\r\u001e")
/* loaded from: input_file:com/databricks/labs/automl/model/tools/split/DataSplitUtility.class */
public class DataSplitUtility implements SplitUtilityTooling {
    public final Dataset<Row> com$databricks$labs$automl$model$tools$split$DataSplitUtility$$mainDataset;
    private final int kIterations;
    public final String com$databricks$labs$automl$model$tools$split$DataSplitUtility$$splitMethod;
    public final String com$databricks$labs$automl$model$tools$split$DataSplitUtility$$labelColumn;
    public final String com$databricks$labs$automl$model$tools$split$DataSplitUtility$$rootDir;
    private final String persistMode;
    private final String modelFamily;
    private final int parallelism;
    public final double com$databricks$labs$automl$model$tools$split$DataSplitUtility$$trainPortion;
    public final String com$databricks$labs$automl$model$tools$split$DataSplitUtility$$syntheticCol;
    public final String com$databricks$labs$automl$model$tools$split$DataSplitUtility$$trainSplitChronologicalColumn;
    public final double com$databricks$labs$automl$model$tools$split$DataSplitUtility$$trainSplitChronologicalRandomPercentage;
    public final double com$databricks$labs$automl$model$tools$split$DataSplitUtility$$reductionFactor;
    private final transient Logger com$databricks$labs$automl$model$tools$split$DataSplitUtility$$logger;
    private final Row[] uniqueLabels;
    private final SparkSession spark;
    private final SparkContext sc;
    private volatile byte bitmap$0;

    public static TrainSplitReferences[] split(Dataset<Row> dataset, int i, String str, String str2, String str3, String str4, String str5, int i2, double d, String str6, String str7, double d2, double d3) {
        return DataSplitUtility$.MODULE$.split(dataset, i, str, str2, str3, str4, str5, i2, d, str6, str7, d2, d3);
    }

    @Override // com.databricks.labs.automl.model.tools.split.SplitUtilityTooling
    public String formRootPath(String str) {
        return SplitUtilityTooling.Cclass.formRootPath(this, str);
    }

    @Override // com.databricks.labs.automl.model.tools.split.SplitUtilityTooling
    public TrainTestPaths formTrainTestPaths(String str) {
        return SplitUtilityTooling.Cclass.formTrainTestPaths(this, str);
    }

    @Override // com.databricks.labs.automl.model.tools.split.SplitUtilityTooling
    public TrainTestData storeLoadDelta(Dataset<Row> dataset, Dataset<Row> dataset2, TrainTestPaths trainTestPaths) {
        return SplitUtilityTooling.Cclass.storeLoadDelta(this, dataset, dataset2, trainTestPaths);
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v7 */
    private SparkSession spark$lzycompute() {
        ?? r0 = this;
        synchronized (r0) {
            if (((byte) (this.bitmap$0 & 1)) == 0) {
                this.spark = SparkSessionWrapper.Cclass.spark(this);
                this.bitmap$0 = (byte) (this.bitmap$0 | 1);
            }
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
            r0 = r0;
            return this.spark;
        }
    }

    @Override // com.databricks.labs.automl.utils.SparkSessionWrapper
    public SparkSession spark() {
        return ((byte) (this.bitmap$0 & 1)) == 0 ? spark$lzycompute() : this.spark;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v7 */
    private SparkContext sc$lzycompute() {
        ?? r0 = this;
        synchronized (r0) {
            if (((byte) (this.bitmap$0 & 2)) == 0) {
                this.sc = SparkSessionWrapper.Cclass.sc(this);
                this.bitmap$0 = (byte) (this.bitmap$0 | 2);
            }
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
            r0 = r0;
            return this.sc;
        }
    }

    @Override // com.databricks.labs.automl.utils.SparkSessionWrapper
    public SparkContext sc() {
        return ((byte) (this.bitmap$0 & 2)) == 0 ? sc$lzycompute() : this.sc;
    }

    public Logger com$databricks$labs$automl$model$tools$split$DataSplitUtility$$logger() {
        return this.com$databricks$labs$automl$model$tools$split$DataSplitUtility$$logger;
    }

    public final Row[] uniqueLabels() {
        return this.uniqueLabels;
    }

    private TrainSplitReferences[] trainSplitPersist() {
        return (TrainSplitReferences[]) ((TraversableOnce) RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), this.kIterations).map(new DataSplitUtility$$anonfun$trainSplitPersist$1(this, "XGBoost".equals(this.modelFamily) ? PerformanceSettings$.MODULE$.xgbWorkers(this.parallelism) : PerformanceSettings$.MODULE$.optimalJVMModelPartitions(this.parallelism)), IndexedSeq$.MODULE$.canBuildFrom())).toArray(ClassTag$.MODULE$.apply(TrainSplitReferences.class));
    }

    private TrainSplitReferences[] trainSplitCache() {
        String str = this.modelFamily;
        return (TrainSplitReferences[]) ((TraversableOnce) RichInt$.MODULE$.to$extension0(Predef$.MODULE$.intWrapper(0), this.kIterations).map(new DataSplitUtility$$anonfun$trainSplitCache$1(this, "XGBoost".equals(str) ? PerformanceSettings$.MODULE$.xgbWorkers(this.parallelism) : "RandomForest".equals(str) ? PerformanceSettings$.MODULE$.optimalJVMModelPartitions(this.parallelism) * 4 : PerformanceSettings$.MODULE$.optimalJVMModelPartitions(this.parallelism)), IndexedSeq$.MODULE$.canBuildFrom())).toArray(ClassTag$.MODULE$.apply(TrainSplitReferences.class));
    }

    private TrainSplitReferences[] trainSplitDelta() {
        return (TrainSplitReferences[]) ((TraversableOnce) RichInt$.MODULE$.to$extension0(Predef$.MODULE$.intWrapper(0), this.kIterations).map(new DataSplitUtility$$anonfun$trainSplitDelta$1(this), IndexedSeq$.MODULE$.canBuildFrom())).toArray(ClassTag$.MODULE$.apply(TrainSplitReferences.class));
    }

    public TrainSplitReferences[] performSplit() {
        TrainSplitReferences[] trainSplitCache;
        String str = this.persistMode;
        if ("persist".equals(str)) {
            trainSplitCache = trainSplitPersist();
        } else if ("delta".equals(str)) {
            trainSplitCache = trainSplitDelta();
        } else {
            if (!"cache".equals(str)) {
                throw new UnsupportedOperationException(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Train Split mode ", " is not supported."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{this.persistMode})));
            }
            trainSplitCache = trainSplitCache();
        }
        return trainSplitCache;
    }

    public DataSplitUtility(Dataset<Row> dataset, int i, String str, String str2, String str3, String str4, String str5, int i2, double d, String str6, String str7, double d2, double d3) {
        this.com$databricks$labs$automl$model$tools$split$DataSplitUtility$$mainDataset = dataset;
        this.kIterations = i;
        this.com$databricks$labs$automl$model$tools$split$DataSplitUtility$$splitMethod = str;
        this.com$databricks$labs$automl$model$tools$split$DataSplitUtility$$labelColumn = str2;
        this.com$databricks$labs$automl$model$tools$split$DataSplitUtility$$rootDir = str3;
        this.persistMode = str4;
        this.modelFamily = str5;
        this.parallelism = i2;
        this.com$databricks$labs$automl$model$tools$split$DataSplitUtility$$trainPortion = d;
        this.com$databricks$labs$automl$model$tools$split$DataSplitUtility$$syntheticCol = str6;
        this.com$databricks$labs$automl$model$tools$split$DataSplitUtility$$trainSplitChronologicalColumn = str7;
        this.com$databricks$labs$automl$model$tools$split$DataSplitUtility$$trainSplitChronologicalRandomPercentage = d2;
        this.com$databricks$labs$automl$model$tools$split$DataSplitUtility$$reductionFactor = d3;
        SparkSessionWrapper.Cclass.$init$(this);
        SplitUtilityTooling.Cclass.$init$(this);
        this.com$databricks$labs$automl$model$tools$split$DataSplitUtility$$logger = Logger.getLogger(getClass());
        this.uniqueLabels = (Row[]) dataset.select(str2, Predef$.MODULE$.wrapRefArray(new String[0])).distinct().collect();
    }
}
