package de.unistuttgart.isa.liquidsvm.spark;

import de.unistuttgart.isa.liquidsvm.Config;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.SparkContext;
import org.apache.spark.SparkContext$;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.mllib.clustering.KMeans$;
import org.apache.spark.mllib.clustering.KMeansModel;
import org.apache.spark.mllib.feature.StandardScaler;
import org.apache.spark.mllib.feature.StandardScalerModel;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.regression.LabeledPoint;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.storage.StorageLevel$;
import scala.Array$;
import scala.Function1;
import scala.Predef$;
import scala.StringContext;
import scala.Tuple2;
import scala.Tuple3;
import scala.Tuple4;
import scala.collection.Map;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List$;
import scala.collection.mutable.StringBuilder;
import scala.math.Ordering$Int$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;
import scala.runtime.ScalaRunTime$;

/* compiled from: spark.scala */
/* loaded from: input_file:de/unistuttgart/isa/liquidsvm/spark/MyUtil2$.class */
public final class MyUtil2$ {
    public static final MyUtil2$ MODULE$ = null;

    static {
        new MyUtil2$();
    }

    public RDD<LabeledPoint> loadData(String str, int i, String str2, StorageLevel storageLevel, Function1<Object, Object> function1) {
        RDD map = SparkContext$.MODULE$.getOrCreate().textFile(str, SparkContext$.MODULE$.getOrCreate().defaultParallelism() * 10).map(new MyUtil2$$anonfun$23(str2), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Double.TYPE))).map(new MyUtil2$$anonfun$24(function1), ClassTag$.MODULE$.apply(LabeledPoint.class));
        if (i > 0) {
            double count = i / map.count();
            if (count < 1) {
                map = map.sample(false, count, map.sample$default$3());
            }
        }
        return map.setName(str).cache();
    }

    public int loadData$default$2() {
        return 0;
    }

    public String loadData$default$3() {
        return ", ";
    }

    public StorageLevel loadData$default$4() {
        return StorageLevel$.MODULE$.MEMORY_ONLY();
    }

    public Function1<Object, Object> loadData$default$5() {
        return new MyUtil2$$anonfun$loadData$default$5$1();
    }

    public void saveTrainTestP(String str, RDD<Tuple3<LabeledPoint[], LabeledPoint[], Object>> rdd, String str2) {
        RDD map = rdd.map(new MyUtil2$$anonfun$25(), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(LabeledPoint.class)));
        map.mapPartitionsWithIndex(new MyUtil2$$anonfun$26(), map.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(String.class)).saveAsTextFile(new StringBuilder().append(str).append(".train.splits").toString());
        RDD map2 = rdd.map(new MyUtil2$$anonfun$27(), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(LabeledPoint.class)));
        map2.mapPartitionsWithIndex(new MyUtil2$$anonfun$28(), map2.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(String.class)).saveAsTextFile(new StringBuilder().append(str).append(".test.splits").toString());
    }

    public String saveTrainTestP$default$3() {
        return ", ";
    }

    public RDD<Tuple3<LabeledPoint[], LabeledPoint[], Object>> readTrainTestP(String str, String str2, Function1<Object, Object> function1, Function1<Object, Object> function12, StorageLevel storageLevel) {
        int size = Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(FileSystem.get(SparkContext$.MODULE$.getOrCreate().hadoopConfiguration()).listStatus(new Path(new StringBuilder().append(str).append(".train.splits/").toString()))).map(new MyUtil2$$anonfun$29(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))).filter(new MyUtil2$$anonfun$30())).size();
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Reading ", " cells"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(size)})));
        SparkContext orCreate = SparkContext$.MODULE$.getOrCreate();
        RDD name = orCreate.textFile(new StringBuilder().append(str).append(".train.splits").toString(), orCreate.textFile$default$2()).map(new MyUtil2$$anonfun$31(str2, function1), ClassTag$.MODULE$.apply(Tuple2.class)).setName(new StringBuilder().append(str).append(".train.splits").toString());
        SparkContext orCreate2 = SparkContext$.MODULE$.getOrCreate();
        return RDD$.MODULE$.rddToPairRDDFunctions(name, ClassTag$.MODULE$.Int(), ClassTag$.MODULE$.apply(LabeledPoint.class), Ordering$Int$.MODULE$).cogroup(orCreate2.textFile(new StringBuilder().append(str).append(".test.splits").toString(), orCreate2.textFile$default$2()).map(new MyUtil2$$anonfun$33(str2, function1), ClassTag$.MODULE$.apply(Tuple2.class)).setName(new StringBuilder().append(str).append("test.splits").toString()), new CellPartitioner(size)).map(new MyUtil2$$anonfun$readTrainTestP$1(), ClassTag$.MODULE$.apply(Tuple3.class)).setName(new StringBuilder().append(str).append("-trainTestP").toString()).persist(storageLevel);
    }

    public String readTrainTestP$default$2() {
        return ", ";
    }

    public Function1<Object, Object> readTrainTestP$default$3() {
        return new MyUtil2$$anonfun$readTrainTestP$default$3$1();
    }

    public Function1<Object, Object> readTrainTestP$default$4() {
        return new MyUtil2$$anonfun$readTrainTestP$default$4$1();
    }

    public StorageLevel readTrainTestP$default$5() {
        return StorageLevel$.MODULE$.MEMORY_ONLY();
    }

    public Tuple4<BinaryEvaluator, Object, Object, Object> evalBinary(RDD<Tuple2<Object, double[]>> rdd, int i) {
        BinaryEvaluator binaryEvaluator = (BinaryEvaluator) rdd.map(new MyUtil2$$anonfun$35(i), ClassTag$.MODULE$.apply(BinaryEvaluator.class)).reduce(new MyUtil2$$anonfun$36());
        long count = rdd.filter(new MyUtil2$$anonfun$37()).count();
        double count2 = rdd.filter(new MyUtil2$$anonfun$39()).count() / count;
        double count3 = rdd.filter(new MyUtil2$$anonfun$40()).count() / rdd.filter(new MyUtil2$$anonfun$38()).count();
        return new Tuple4<>(binaryEvaluator, BoxesRunTime.boxToDouble(count2 * count3), BoxesRunTime.boxToDouble(count2), BoxesRunTime.boxToDouble(count3));
    }

    public int evalBinary$default$2() {
        return 0;
    }

    public RDD<Tuple3<LabeledPoint[], LabeledPoint[], Object>> doExperimentSplit(String str, int i, int i2, int i3, String str2, String str3, Config config) {
        RDD<LabeledPoint> loadData = loadData(new StringBuilder().append(str2).append(str).append(".train.csv").toString(), i, str3, loadData$default$4(), loadData$default$5());
        RDD<Tuple3<LabeledPoint[], LabeledPoint[], Object>> createTrainAndTest = new DistributedSVM("MC", loadData, i2, i3, config).createTrainAndTest(loadData(new StringBuilder().append(str2).append(str).append(".test.csv").toString(), i, str3, loadData$default$4(), loadData$default$5()));
        saveTrainTestP(str, createTrainAndTest, saveTrainTestP$default$3());
        return createTrainAndTest;
    }

    public int doExperimentSplit$default$2() {
        return 0;
    }

    public int doExperimentSplit$default$3() {
        return 50000;
    }

    public int doExperimentSplit$default$4() {
        return 2000;
    }

    public String doExperimentSplit$default$5() {
        return SparkContext$.MODULE$.getOrCreate().isLocal() ? "../../data/" : "/";
    }

    public String doExperimentSplit$default$6() {
        return ", ";
    }

    public Config doExperimentSplit$default$7() {
        return new Config();
    }

    public Tuple3<Object, RDD<Tuple2<Object, double[]>>, DistributedSVM> doExperimentTrainTest(String str, RDD<Tuple3<LabeledPoint[], LabeledPoint[], Object>> rdd, int i, int i2, int i3, int i4, int i5) {
        DistributedSVM distributedSVM = new DistributedSVM("MC", null, DistributedSVM$.MODULE$.$lessinit$greater$default$3(), DistributedSVM$.MODULE$.$lessinit$greater$default$4(), DistributedSVM$.MODULE$.$lessinit$greater$default$5());
        distributedSVM.config().display(i).threads(1).set("VORONOI", "6 4000 1 100000").set("WEIGHTS", str.toString());
        return new Tuple3<>(BoxesRunTime.boxToDouble(r0.filter(new MyUtil2$$anonfun$41()).count() / r0.count()), distributedSVM.trainAndPredict(distributedSVM.trainAndPredict$default$1(), rdd, i2, i3, i4, i5).setName("res"), distributedSVM);
    }

    public String doExperimentTrainTest$default$1() {
        return "0.2 0.5 0.8";
    }

    public RDD<Tuple3<LabeledPoint[], LabeledPoint[], Object>> doExperimentTrainTest$default$2() {
        return readTrainTestP("covtype-full", readTrainTestP$default$2(), new MyUtil2$$anonfun$1(), readTrainTestP$default$4(), StorageLevel$.MODULE$.MEMORY_ONLY());
    }

    public int doExperimentTrainTest$default$3() {
        return 3;
    }

    public int doExperimentTrainTest$default$4() {
        return 6;
    }

    public int doExperimentTrainTest$default$5() {
        return 11;
    }

    public int doExperimentTrainTest$default$6() {
        return 6;
    }

    public int doExperimentTrainTest$default$7() {
        return 12;
    }

    public Tuple2<DistributedSVM, RDD<LabeledPoint>> doExperimentBdcomp(String str, int i, int i2, int i3, String str2, String str3, Config config) {
        RDD<LabeledPoint> loadData = loadData(new StringBuilder().append(str2).append(str).append(".train.csv").toString(), i, str3, loadData$default$4(), loadData$default$5());
        loadData(new StringBuilder().append(str2).append(str).append(".test.csv").toString(), i, str3, loadData$default$4(), loadData$default$5());
        DistributedSVM distributedSVM = new DistributedSVM("MC", loadData, i2, i3, config);
        distributedSVM.config().display(3).threads(-1).set("VORONOI", "6 4000 1 100000");
        loadData.count();
        return new Tuple2<>(distributedSVM, loadData);
    }

    public String doExperimentBdcomp$default$1() {
        return "BDCOMP-scaled";
    }

    public int doExperimentBdcomp$default$2() {
        return 0;
    }

    public int doExperimentBdcomp$default$3() {
        return 50000;
    }

    public int doExperimentBdcomp$default$4() {
        return 2000;
    }

    public String doExperimentBdcomp$default$5() {
        return SparkContext$.MODULE$.getOrCreate().isLocal() ? "../../data/" : "/";
    }

    public String doExperimentBdcomp$default$6() {
        return ",";
    }

    public Config doExperimentBdcomp$default$7() {
        return new Config();
    }

    public Tuple3<RDD<Tuple2<Object, double[]>>, RDD<Tuple3<LabeledPoint[], LabeledPoint[], Object>>, DistributedSVM> doExperimentBdcompTrainTest(String str, RDD<Tuple3<LabeledPoint[], LabeledPoint[], Object>> rdd, String str2, int i, int i2, int i3, int i4) {
        DistributedSVM distributedSVM = new DistributedSVM("MC", null, DistributedSVM$.MODULE$.$lessinit$greater$default$3(), DistributedSVM$.MODULE$.$lessinit$greater$default$4(), DistributedSVM$.MODULE$.$lessinit$greater$default$5());
        distributedSVM.config().display(3).threads(1).set("VORONOI", str2).set("WEIGHTS", str.toString()).gridChoice(2);
        return new Tuple3<>(distributedSVM.trainAndPredict(distributedSVM.trainAndPredict$default$1(), rdd, i, i2, i3, i4).setName("res"), rdd, distributedSVM);
    }

    public String doExperimentBdcompTrainTest$default$1() {
        return "0.975";
    }

    public RDD<Tuple3<LabeledPoint[], LabeledPoint[], Object>> doExperimentBdcompTrainTest$default$2() {
        return readTrainTestP("bdcomp-scaled", readTrainTestP$default$2(), new MyUtil2$$anonfun$2(), readTrainTestP$default$4(), StorageLevel$.MODULE$.MEMORY_ONLY());
    }

    public String doExperimentBdcompTrainTest$default$3() {
        return "6 20000 1 200000";
    }

    public int doExperimentBdcompTrainTest$default$4() {
        return 6;
    }

    public int doExperimentBdcompTrainTest$default$5() {
        return 11;
    }

    public int doExperimentBdcompTrainTest$default$6() {
        return 6;
    }

    public int doExperimentBdcompTrainTest$default$7() {
        return 12;
    }

    public void scaleAndSave(String str, RDD<LabeledPoint> rdd, RDD<LabeledPoint> rdd2) {
        StandardScalerModel fit = new StandardScaler(true, true).fit(rdd.map(new MyUtil2$$anonfun$42(), ClassTag$.MODULE$.apply(Vector.class)));
        rdd.map(new MyUtil2$$anonfun$scaleAndSave$1(fit), ClassTag$.MODULE$.apply(String.class)).saveAsTextFile(new StringBuilder().append(str).append("-scaled.train.csv").toString());
        rdd2.map(new MyUtil2$$anonfun$scaleAndSave$2(fit), ClassTag$.MODULE$.apply(String.class)).saveAsTextFile(new StringBuilder().append(str).append("-scaled.test.csv").toString());
    }

    public double changeNN(double d) {
        if (Predef$.MODULE$.double2Double(d).isNaN()) {
            return -1.0d;
        }
        return d;
    }

    public Tuple3<RDD<Tuple2<Object, LabeledPoint>>, RDD<Tuple2<Object, LabeledPoint>>, Vector[]> splitData(String str, String str2, int i, int i2, int i3, Function1<Object, Object> function1, int i4, int i5) {
        RDD<LabeledPoint> loadData = loadData(new StringBuilder().append(str).append(".train.csv").toString(), i4, ",", StorageLevel$.MODULE$.MEMORY_ONLY(), function1);
        RDD<LabeledPoint> loadData2 = loadData(new StringBuilder().append(str).append(".test.csv").toString(), i5, ",", StorageLevel$.MODULE$.NONE(), function1);
        Predef$.MODULE$.println(":::::::Scaling data...");
        StandardScalerModel fit = new StandardScaler(true, true).fit(loadData.map(new MyUtil2$$anonfun$43(), ClassTag$.MODULE$.apply(Vector.class)));
        RDD<LabeledPoint> map = loadData2.map(new MyUtil2$$anonfun$44(fit), ClassTag$.MODULE$.apply(LabeledPoint.class));
        RDD<LabeledPoint> cache = loadData.map(new MyUtil2$$anonfun$45(fit), ClassTag$.MODULE$.apply(LabeledPoint.class)).setName(new StringBuilder().append(str).append(".train.scaled").toString()).cache();
        Predef$.MODULE$.println(":::::::Calculating first batch of centers...");
        DistributedSVM distributedSVM = new DistributedSVM("MC", cache, i3, i2, DistributedSVM$.MODULE$.$lessinit$greater$default$5());
        ObjectRef create = ObjectRef.create(distributedSVM.calculateCenters(false));
        Predef$.MODULE$.println(":::::::Saving first batch of centers...");
        SparkContext orCreate = SparkContext$.MODULE$.getOrCreate();
        orCreate.parallelize(Predef$.MODULE$.wrapRefArray((Vector[]) create.elem), orCreate.parallelize$default$2(), ClassTag$.MODULE$.apply(Vector.class)).zipWithIndex().map(new MyUtil2$$anonfun$splitData$1(), ClassTag$.MODULE$.apply(LabeledPoint.class)).saveAsTextFile(new StringBuilder().append(str2).append(".origcenters.csv").toString());
        Predef$.MODULE$.println(":::::::Calculating distances to centers...");
        ObjectRef create2 = ObjectRef.create(SparkContext$.MODULE$.getOrCreate().broadcast((Vector[]) create.elem, ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Vector.class))));
        Map countByValue = cache.map(new MyUtil2$$anonfun$46(create2), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Double.TYPE))).setName(new StringBuilder().append(str).append(".train.dists").toString()).map(new MyUtil2$$anonfun$47(), ClassTag$.MODULE$.Int()).countByValue(Ordering$Int$.MODULE$);
        Predef$.MODULE$.println(":::::::Calculating reduced distances...");
        create.elem = (Vector[]) ((TraversableOnce) countByValue.keys().toList().map(new MyUtil2$$anonfun$splitData$2(create), List$.MODULE$.canBuildFrom())).toArray(ClassTag$.MODULE$.apply(Vector.class));
        create2.elem = SparkContext$.MODULE$.getOrCreate().broadcast((Vector[]) create.elem, ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Vector.class)));
        Predef$.MODULE$.println(":::::::Saving centers...");
        SparkContext orCreate2 = SparkContext$.MODULE$.getOrCreate();
        orCreate2.parallelize(Predef$.MODULE$.wrapRefArray((Vector[]) create.elem), orCreate2.parallelize$default$2(), ClassTag$.MODULE$.apply(Vector.class)).zipWithIndex().map(new MyUtil2$$anonfun$splitData$3(), ClassTag$.MODULE$.apply(String.class)).saveAsTextFile(new StringBuilder().append(str2).append(".centers.csv").toString());
        distributedSVM.centersB_$eq((Broadcast) create2.elem);
        Predef$.MODULE$.println(":::::::Splitting partitions...");
        RDD<Tuple2<Object, LabeledPoint>> partitionInCells = distributedSVM.partitionInCells(cache);
        RDD<Tuple2<Object, LabeledPoint>> partitionInCells2 = distributedSVM.partitionInCells(map);
        Predef$.MODULE$.println(":::::::Saving splits...");
        partitionInCells.mapPartitionsWithIndex(new MyUtil2$$anonfun$49(), partitionInCells.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(String.class)).saveAsTextFile(new StringBuilder().append(str2).append(".train.splits").toString());
        partitionInCells2.mapPartitionsWithIndex(new MyUtil2$$anonfun$50(), partitionInCells2.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(String.class)).saveAsTextFile(new StringBuilder().append(str2).append(".test.splits").toString());
        return new Tuple3<>(partitionInCells, partitionInCells2, (Vector[]) create.elem);
    }

    public String splitData$default$1() {
        return "/BDCOMP-all";
    }

    public String splitData$default$2() {
        return "BDCOMP-all-scaled";
    }

    public int splitData$default$3() {
        return 5000;
    }

    public int splitData$default$4() {
        return 100000;
    }

    public int splitData$default$5() {
        return 100000;
    }

    public Function1<Object, Object> splitData$default$6() {
        return new MyUtil2$$anonfun$splitData$default$6$1();
    }

    public int splitData$default$7() {
        return 0;
    }

    public int splitData$default$8() {
        return 0;
    }

    public void splitDataKmeans(RDD<LabeledPoint> rdd, RDD<LabeledPoint> rdd2, int i, int i2, String str, int i3, int i4, int i5, Function1<Object, Object> function1, int i6, int i7) {
        KMeansModel train = KMeans$.MODULE$.train(rdd.map(new MyUtil2$$anonfun$51(), ClassTag$.MODULE$.apply(Vector.class)), 300, 20);
        train.save(SparkContext$.MODULE$.getOrCreate(), new StringBuilder().append(str).append(".kmeansmodel").toString());
        Vector[] clusterCenters = train.clusterCenters();
        SparkContext orCreate = SparkContext$.MODULE$.getOrCreate();
        orCreate.parallelize(Predef$.MODULE$.wrapRefArray(clusterCenters), orCreate.parallelize$default$2(), ClassTag$.MODULE$.apply(Vector.class)).zipWithIndex().map(new MyUtil2$$anonfun$splitDataKmeans$1(), ClassTag$.MODULE$.apply(String.class)).saveAsTextFile(new StringBuilder().append(str).append(".centers.csv").toString());
        DistributedSVM distributedSVM = new DistributedSVM("MC", rdd, DistributedSVM$.MODULE$.$lessinit$greater$default$3(), DistributedSVM$.MODULE$.$lessinit$greater$default$4(), DistributedSVM$.MODULE$.$lessinit$greater$default$5());
        distributedSVM.centers_$eq(clusterCenters);
        distributedSVM.centersB_$eq(SparkContext$.MODULE$.getOrCreate().broadcast(clusterCenters, ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Vector.class))));
        RDD<Tuple2<Object, LabeledPoint>> partitionInCells = distributedSVM.partitionInCells(rdd);
        RDD<Tuple2<Object, LabeledPoint>> partitionInCells2 = distributedSVM.partitionInCells(rdd2);
        partitionInCells.mapPartitionsWithIndex(new MyUtil2$$anonfun$52(), partitionInCells.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(String.class)).saveAsTextFile(new StringBuilder().append(str).append(".train.splits").toString());
        partitionInCells2.mapPartitionsWithIndex(new MyUtil2$$anonfun$53(), partitionInCells2.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(String.class)).saveAsTextFile(new StringBuilder().append(str).append(".test.splits").toString());
    }

    public int splitDataKmeans$default$3() {
        return 300;
    }

    public int splitDataKmeans$default$4() {
        return 40;
    }

    public String splitDataKmeans$default$5() {
        return "BDCOMP-all-scaled.kmeans";
    }

    public int splitDataKmeans$default$6() {
        return 5000;
    }

    public int splitDataKmeans$default$7() {
        return 100000;
    }

    public int splitDataKmeans$default$8() {
        return 100000;
    }

    public Function1<Object, Object> splitDataKmeans$default$9() {
        return new MyUtil2$$anonfun$splitDataKmeans$default$9$1();
    }

    public int splitDataKmeans$default$10() {
        return 0;
    }

    public int splitDataKmeans$default$11() {
        return 0;
    }

    private MyUtil2$() {
        MODULE$ = this;
    }
}
