package com.databricks.labs.automl.pipeline;

import com.databricks.labs.automl.inference.NaFillConfig;
import com.databricks.labs.automl.pipeline.HasFeatureColumn;
import com.databricks.labs.automl.pipeline.HasLabelColumn;
import com.databricks.labs.automl.sanitize.DataSanitizer;
import com.databricks.labs.automl.utils.AutoMlPipelineMlFlowUtils$;
import com.databricks.labs.automl.utils.SchemaUtils$;
import java.io.IOException;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.DoubleArrayParam;
import org.apache.spark.ml.param.DoubleParam;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.StringArrayParam;
import org.apache.spark.ml.util.DefaultParamsWritable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.ml.util.MLWritable;
import org.apache.spark.ml.util.MLWriter;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StructType;
import scala.Array$;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.Some;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.immutable.Map;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

/* compiled from: DataSanitizerTransformer.scala */
@ScalaSignature(bytes = "\u0006\u0001\rMc\u0001B\u0001\u0003\u00015\u0011\u0001\u0004R1uCN\u000bg.\u001b;ju\u0016\u0014HK]1og\u001a|'/\\3s\u0015\t\u0019A!\u0001\u0005qSB,G.\u001b8f\u0015\t)a!\u0001\u0004bkR|W\u000e\u001c\u0006\u0003\u000f!\tA\u0001\\1cg*\u0011\u0011BC\u0001\u000bI\u0006$\u0018M\u0019:jG.\u001c(\"A\u0006\u0002\u0007\r|Wn\u0001\u0001\u0014\u000b\u0001q!\u0003I\u0012\u0011\u0005=\u0001R\"\u0001\u0002\n\u0005E\u0011!aE!cgR\u0014\u0018m\u0019;Ue\u0006t7OZ8s[\u0016\u0014\bCA\n\u001f\u001b\u0005!\"BA\u000b\u0017\u0003\u0011)H/\u001b7\u000b\u0005]A\u0012AA7m\u0015\tI\"$A\u0003ta\u0006\u00148N\u0003\u0002\u001c9\u00051\u0011\r]1dQ\u0016T\u0011!H\u0001\u0004_J<\u0017BA\u0010\u0015\u0005U!UMZ1vYR\u0004\u0016M]1ng^\u0013\u0018\u000e^1cY\u0016\u0004\"aD\u0011\n\u0005\t\u0012!A\u0004%bg2\u000b'-\u001a7D_2,XN\u001c\t\u0003\u001f\u0011J!!\n\u0002\u0003!!\u000b7OR3biV\u0014XmQ8mk6t\u0007\u0002C\u0014\u0001\u0005\u000b\u0007I\u0011\t\u0015\u0002\u0007ULG-F\u0001*!\tQ\u0003G\u0004\u0002,]5\tAFC\u0001.\u0003\u0015\u00198-\u00197b\u0013\tyC&\u0001\u0004Qe\u0016$WMZ\u0005\u0003cI\u0012aa\u0015;sS:<'BA\u0018-\u0011!!\u0004A!A!\u0002\u0013I\u0013\u0001B;jI\u0002BQA\u000e\u0001\u0005\u0002]\na\u0001P5oSRtDC\u0001\u001d:!\ty\u0001\u0001C\u0003(k\u0001\u0007\u0011\u0006C\u0004<\u0001\t\u0007IQ\u0001\u001f\u0002\u001f9,X.\u001a:jG\u001aKG\u000e\\*uCR,\u0012!\u0010\t\u0004}\u0005KS\"A \u000b\u0005\u00013\u0012!\u00029be\u0006l\u0017B\u0001\"@\u0005\u0015\u0001\u0016M]1n\u0011\u0019!\u0005\u0001)A\u0007{\u0005\u0001b.^7fe&\u001cg)\u001b7m'R\fG\u000f\t\u0005\b\r\u0002\u0011\r\u0011\"\u0002=\u0003E\u0019\u0007.\u0019:bGR,'OR5mYN#\u0018\r\u001e\u0005\u0007\u0011\u0002\u0001\u000bQB\u001f\u0002%\rD\u0017M]1di\u0016\u0014h)\u001b7m'R\fG\u000f\t\u0005\b\u0015\u0002\u0011\r\u0011\"\u0002L\u0003}iw\u000eZ3m'\u0016dWm\u0019;j_:$\u0015n\u001d;j]\u000e$H\u000b\u001b:fg\"|G\u000eZ\u000b\u0002\u0019B\u0011a(T\u0005\u0003\u001d~\u0012\u0001\"\u00138u!\u0006\u0014\u0018-\u001c\u0005\u0007!\u0002\u0001\u000bQ\u0002'\u0002A5|G-\u001a7TK2,7\r^5p]\u0012K7\u000f^5oGR$\u0006N]3tQ>dG\r\t\u0005\b%\u0002\u0011\r\u0011\"\u0002T\u0003=1\u0017\u000e\u001c;feB\u0013XmY5tS>tW#\u0001+\u0011\u0005y*\u0016B\u0001,@\u0005-!u.\u001e2mKB\u000b'/Y7\t\ra\u0003\u0001\u0015!\u0004U\u0003A1\u0017\u000e\u001c;feB\u0013XmY5tS>t\u0007\u0005C\u0004[\u0001\t\u0007IQA&\u0002\u0017A\f'/\u00197mK2L7/\u001c\u0005\u00079\u0002\u0001\u000bQ\u0002'\u0002\u0019A\f'/\u00197mK2L7/\u001c\u0011\t\u000fy\u0003!\u0019!C\u0003?\u0006Qa.\u0019$jY24E.Y4\u0016\u0003\u0001\u0004\"AP1\n\u0005\t|$\u0001\u0004\"p_2,\u0017M\u001c)be\u0006l\u0007B\u00023\u0001A\u00035\u0001-A\u0006oC\u001aKG\u000e\u001c$mC\u001e\u0004\u0003b\u00024\u0001\u0005\u0004%)aZ\u0001\u0017G\u0006$XmZ8sS\u000e\fGnQ8mk6tg*Y7fgV\t\u0001\u000e\u0005\u0002?S&\u0011!n\u0010\u0002\u0011'R\u0014\u0018N\\4BeJ\f\u0017\u0010U1sC6Da\u0001\u001c\u0001!\u0002\u001bA\u0017aF2bi\u0016<wN]5dC2\u001cu\u000e\\;n]:\u000bW.Z:!\u0011\u001dq\u0007A1A\u0005\u0006\u001d\fqcY1uK\u001e|'/[2bY\u000e{G.^7o-\u0006dW/Z:\t\rA\u0004\u0001\u0015!\u0004i\u0003a\u0019\u0017\r^3h_JL7-\u00197D_2,XN\u001c,bYV,7\u000f\t\u0005\be\u0002\u0011\r\u0011\"\u0002h\u0003IqW/\\3sS\u000e\u001cu\u000e\\;n]:\u000bW.Z:\t\rQ\u0004\u0001\u0015!\u0004i\u0003MqW/\\3sS\u000e\u001cu\u000e\\;n]:\u000bW.Z:!\u0011\u001d1\bA1A\u0005\u0006]\f1C\\;nKJL7mQ8mk6tg+\u00197vKN,\u0012\u0001\u001f\t\u0003}eL!A_ \u0003!\u0011{WO\u00197f\u0003J\u0014\u0018-\u001f)be\u0006l\u0007B\u0002?\u0001A\u00035\u00010\u0001\u000bok6,'/[2D_2,XN\u001c,bYV,7\u000f\t\u0005\b}\u0002\u0011\r\u0011\"\u0002h\u0003I\u0011wn\u001c7fC:\u001cu\u000e\\;n]:\u000bW.Z:\t\u000f\u0005\u0005\u0001\u0001)A\u0007Q\u0006\u0019\"m\\8mK\u0006t7i\u001c7v[:t\u0015-\\3tA!A\u0011Q\u0001\u0001C\u0002\u0013\u0015q-A\nc_>dW-\u00198D_2,XN\u001c,bYV,7\u000fC\u0004\u0002\n\u0001\u0001\u000bQ\u00025\u0002)\t|w\u000e\\3b]\u000e{G.^7o-\u0006dW/Z:!\u0011!\ti\u0001\u0001b\u0001\n\u000ba\u0014a\u00033fG&$W-T8eK2Dq!!\u0005\u0001A\u00035Q(\u0001\u0007eK\u000eLG-Z'pI\u0016d\u0007\u0005\u0003\u0005\u0002\u0016\u0001\u0011\r\u0011\"\u0002=\u0003!1\u0017\u000e\u001c7N_\u0012,\u0007bBA\r\u0001\u0001\u0006i!P\u0001\nM&dG.T8eK\u0002B\u0001\"!\b\u0001\u0005\u0004%)\u0001P\u0001\u0017G\"\f'/Y2uKJt\u0015I\u00117b].,GOR5mY\"9\u0011\u0011\u0005\u0001!\u0002\u001bi\u0014aF2iCJ\f7\r^3s\u001d\u0006\u0013E.\u00198lKR4\u0015\u000e\u001c7!\u0011!\t)\u0003\u0001b\u0001\n\u000b\u0019\u0016\u0001\u00068v[\u0016\u0014\u0018n\u0019(B\u00052\fgn[3u\r&dG\u000eC\u0004\u0002*\u0001\u0001\u000bQ\u0002+\u0002+9,X.\u001a:jG:\u000b%\t\\1oW\u0016$h)\u001b7mA!A\u0011Q\u0006\u0001C\u0002\u0013\u0015q-\u0001\rdCR,wm\u001c:jG\u0006dg*\u0011$jY2l\u0015\r]&fsNDq!!\r\u0001A\u00035\u0001.A\rdCR,wm\u001c:jG\u0006dg*\u0011$jY2l\u0015\r]&fsN\u0004\u0003\u0002CA\u001b\u0001\t\u0007IQA4\u00025\r\fG/Z4pe&\u001c\u0017\r\u001c(B\r&dG.T1q-\u0006dW/Z:\t\u000f\u0005e\u0002\u0001)A\u0007Q\u0006Y2-\u0019;fO>\u0014\u0018nY1m\u001d\u00063\u0015\u000e\u001c7NCB4\u0016\r\\;fg\u0002B\u0001\"!\u0010\u0001\u0005\u0004%)aZ\u0001\u0015]VlWM]5d\u001d\u00063\u0015\u000e\u001c7NCB\\U-_:\t\u000f\u0005\u0005\u0003\u0001)A\u0007Q\u0006)b.^7fe&\u001cg*\u0011$jY2l\u0015\r]&fsN\u0004\u0003\u0002CA#\u0001\t\u0007IQA<\u0002-9,X.\u001a:jG:\u000be)\u001b7m\u001b\u0006\u0004h+\u00197vKNDq!!\u0013\u0001A\u00035\u00010A\fok6,'/[2O\u0003\u001aKG\u000e\\'baZ\u000bG.^3tA!9\u0011Q\n\u0001\u0005\u0002\u0005=\u0013AE:fi:+X.\u001a:jG\u001aKG\u000e\\*uCR$B!!\u0015\u0002T5\t\u0001\u0001C\u0004\u0002V\u0005-\u0003\u0019A\u0015\u0002\u000bY\fG.^3\t\r\u0005e\u0003\u0001\"\u0001)\u0003I9W\r\u001e(v[\u0016\u0014\u0018n\u0019$jY2\u001cF/\u0019;\t\u000f\u0005u\u0003\u0001\"\u0001\u0002`\u0005!2/\u001a;DQ\u0006\u0014\u0018m\u0019;fe\u001aKG\u000e\\*uCR$B!!\u0015\u0002b!9\u0011QKA.\u0001\u0004I\u0003BBA3\u0001\u0011\u0005\u0001&\u0001\u000bhKR\u001c\u0005.\u0019:bGR,'OR5mYN#\u0018\r\u001e\u0005\b\u0003S\u0002A\u0011AA6\u0003\t\u001aX\r^'pI\u0016d7+\u001a7fGRLwN\u001c#jgRLgn\u0019;UQJ,7\u000f[8mIR!\u0011\u0011KA7\u0011!\t)&a\u001aA\u0002\u0005=\u0004cA\u0016\u0002r%\u0019\u00111\u000f\u0017\u0003\u0007%sG\u000fC\u0004\u0002x\u0001!\t!!\u001f\u0002E\u001d,G/T8eK2\u001cV\r\\3di&|g\u000eR5ti&t7\r\u001e+ie\u0016\u001c\bn\u001c7e+\t\ty\u0007C\u0004\u0002~\u0001!\t!a \u0002%M,GOR5mi\u0016\u0014\bK]3dSNLwN\u001c\u000b\u0005\u0003#\n\t\t\u0003\u0005\u0002V\u0005m\u0004\u0019AAB!\rY\u0013QQ\u0005\u0004\u0003\u000fc#A\u0002#pk\ndW\rC\u0004\u0002\f\u0002!\t!!$\u0002%\u001d,GOR5mi\u0016\u0014\bK]3dSNLwN\\\u000b\u0003\u0003\u0007Cq!!%\u0001\t\u0003\t\u0019*\u0001\btKR\u0004\u0016M]1mY\u0016d\u0017n]7\u0015\t\u0005E\u0013Q\u0013\u0005\t\u0003+\ny\t1\u0001\u0002p!9\u0011\u0011\u0014\u0001\u0005\u0002\u0005e\u0014AD4fiB\u000b'/\u00197mK2L7/\u001c\u0005\b\u0003;\u0003A\u0011AAP\u00035\u0019X\r\u001e(b\r&dGN\u00127bOR!\u0011\u0011KAQ\u0011!\t)&a'A\u0002\u0005\r\u0006cA\u0016\u0002&&\u0019\u0011q\u0015\u0017\u0003\u000f\t{w\u000e\\3b]\"9\u00111\u0016\u0001\u0005\u0002\u00055\u0016!D4fi:\u000bg)\u001b7m\r2\fw-\u0006\u0002\u0002$\"9\u0011\u0011\u0017\u0001\u0005\u0002\u0005M\u0016!G:fi\u000e\u000bG/Z4pe&\u001c\u0017\r\\\"pYVlgNT1nKN$B!!\u0015\u00026\"A\u0011QKAX\u0001\u0004\t9\f\u0005\u0003,\u0003sK\u0013bAA^Y\t)\u0011I\u001d:bs\"9\u0011q\u0018\u0001\u0005\u0002\u0005\u0005\u0017!G4fi\u000e\u000bG/Z4pe&\u001c\u0017\r\\\"pYVlgNT1nKN,\"!a.\t\u000f\u0005\u0015\u0007\u0001\"\u0001\u0002H\u0006Q2/\u001a;DCR,wm\u001c:jG\u0006d7i\u001c7v[:4\u0016\r\\;fgR!\u0011\u0011KAe\u0011!\t)&a1A\u0002\u0005]\u0006bBAg\u0001\u0011\u0005\u0011\u0011Y\u0001\u001bO\u0016$8)\u0019;fO>\u0014\u0018nY1m\u0007>dW/\u001c8WC2,Xm\u001d\u0005\b\u0003#\u0004A\u0011AAj\u0003U\u0019X\r\u001e(v[\u0016\u0014\u0018nY\"pYVlgNT1nKN$B!!\u0015\u0002V\"A\u0011QKAh\u0001\u0004\t9\fC\u0004\u0002Z\u0002!\t!!1\u0002+\u001d,GOT;nKJL7mQ8mk6tg*Y7fg\"9\u0011Q\u001c\u0001\u0005\u0002\u0005}\u0017!F:fi\n{w\u000e\\3b]\u000e{G.^7o\u001d\u0006lWm\u001d\u000b\u0005\u0003#\n\t\u000f\u0003\u0005\u0002V\u0005m\u0007\u0019AA\\\u0011\u001d\t)\u000f\u0001C\u0001\u0003\u0003\fQcZ3u\u0005>|G.Z1o\u0007>dW/\u001c8OC6,7\u000fC\u0004\u0002j\u0002!\t!a;\u0002-M,GOQ8pY\u0016\fgnQ8mk6tg+\u00197vKN$B!!\u0015\u0002n\"A\u0011QKAt\u0001\u0004\ty\u000fE\u0003,\u0003s\u000b\u0019\u000bC\u0004\u0002t\u0002!\t!!>\u0002-\u001d,GOQ8pY\u0016\fgnQ8mk6tg+\u00197vKN,\"!a<\t\u000f\u0005e\b\u0001\"\u0001\u0002|\u000612/\u001a;Ok6,'/[2D_2,XN\u001c,bYV,7\u000f\u0006\u0003\u0002R\u0005u\b\u0002CA+\u0003o\u0004\r!a@\u0011\u000b-\nI,a!\t\u000f\t\r\u0001\u0001\"\u0001\u0003\u0006\u00051r-\u001a;Ok6,'/[2D_2,XN\u001c,bYV,7/\u0006\u0002\u0002��\"9!\u0011\u0002\u0001\u0005\u0002\t-\u0011AD:fi\u0012+7-\u001b3f\u001b>$W\r\u001c\u000b\u0005\u0003#\u0012i\u0001C\u0004\u0002V\t\u001d\u0001\u0019A\u0015\t\r\tE\u0001\u0001\"\u0001)\u000399W\r\u001e#fG&$W-T8eK2DqA!\u0006\u0001\t\u0003\u00119\"A\u0006tKR4\u0015\u000e\u001c7N_\u0012,G\u0003BA)\u00053Aq!!\u0016\u0003\u0014\u0001\u0007\u0011\u0006\u0003\u0004\u0003\u001e\u0001!\t\u0001K\u0001\fO\u0016$h)\u001b7m\u001b>$W\rC\u0004\u0003\"\u0001!\tAa\t\u00023M,Go\u00115be\u0006\u001cG/\u001a:O\u0003\nc\u0017M\\6fi\u001aKG\u000e\u001c\u000b\u0005\u0003#\u0012)\u0003C\u0004\u0002V\t}\u0001\u0019A\u0015\t\r\t%\u0002\u0001\"\u0001)\u0003e9W\r^\"iCJ\f7\r^3s\u001d\u0006\u0013E.\u00198lKR4\u0015\u000e\u001c7\t\u000f\t5\u0002\u0001\"\u0001\u00030\u000592/\u001a;Ok6,'/[2O\u0003\nc\u0017M\\6fi\u001aKG\u000e\u001c\u000b\u0005\u0003#\u0012\t\u0004\u0003\u0005\u0002V\t-\u0002\u0019AAB\u0011\u001d\u0011)\u0004\u0001C\u0001\u0003\u001b\u000bqcZ3u\u001dVlWM]5d\u001d\u0006\u0013E.\u00198lKR4\u0015\u000e\u001c7\t\u000f\te\u0002\u0001\"\u0001\u0003<\u0005Y2/\u001a;DCR,wm\u001c:jG\u0006dg*\u0011$jY2l\u0015\r]&fsN$B!!\u0015\u0003>!A\u0011Q\u000bB\u001c\u0001\u0004\t9\fC\u0004\u0003B\u0001!\t!!1\u00027\u001d,GoQ1uK\u001e|'/[2bY:\u000be)\u001b7m\u001b\u0006\u00048*Z=t\u0011\u001d\u0011)\u0005\u0001C\u0001\u0005\u000f\nQd]3u\u0007\u0006$XmZ8sS\u000e\fGNT!GS2dW*\u00199WC2,Xm\u001d\u000b\u0005\u0003#\u0012I\u0005\u0003\u0005\u0002V\t\r\u0003\u0019AA\\\u0011\u001d\u0011i\u0005\u0001C\u0001\u0003\u0003\fQdZ3u\u0007\u0006$XmZ8sS\u000e\fGNT!GS2dW*\u00199WC2,Xm\u001d\u0005\b\u0005#\u0002A\u0011\u0001B*\u0003]\u0019X\r\u001e(v[\u0016\u0014\u0018n\u0019(B\r&dG.T1q\u0017\u0016L8\u000f\u0006\u0003\u0002R\tU\u0003\u0002CA+\u0005\u001f\u0002\r!a.\t\u000f\te\u0003\u0001\"\u0001\u0002B\u00069r-\u001a;Ok6,'/[2O\u0003\u001aKG\u000e\\'ba.+\u0017p\u001d\u0005\b\u0005;\u0002A\u0011\u0001B0\u0003e\u0019X\r\u001e(v[\u0016\u0014\u0018n\u0019(B\r&dG.T1q-\u0006dW/Z:\u0015\t\u0005E#\u0011\r\u0005\t\u0003+\u0012Y\u00061\u0001\u0002��\"9!Q\r\u0001\u0005\u0002\t\u0015\u0011!G4fi:+X.\u001a:jG:\u000be)\u001b7m\u001b\u0006\u0004h+\u00197vKNDqA!\u001b\u0001\t\u0003\u0011Y'A\ftKR\u001c\u0015\r^3h_JL7-\u00197O\u0003\u001aKG\u000e\\'baR!\u0011\u0011\u000bB7\u0011!\t)Fa\u001aA\u0002\t=\u0004#\u0002\u0016\u0003r%J\u0013b\u0001B:e\t\u0019Q*\u00199\t\u000f\t]\u0004\u0001\"\u0001\u0003z\u0005\u00192/\u001a;Ok6,'/[2O\u0003\u001aKG\u000e\\'baR!\u0011\u0011\u000bB>\u0011!\t)F!\u001eA\u0002\tu\u0004C\u0002\u0016\u0003r%\n\u0019\t\u0003\u00047\u0001\u0011\u0005!\u0011\u0011\u000b\u0002q!9!Q\u0011\u0001\u0005B\t\u001d\u0015!\u0005;sC:\u001chm\u001c:n\u0013:$XM\u001d8bYR!!\u0011\u0012BY!\u0011\u0011YIa+\u000f\t\t5%Q\u0015\b\u0005\u0005\u001f\u0013\tK\u0004\u0003\u0003\u0012\n}e\u0002\u0002BJ\u0005;sAA!&\u0003\u001c6\u0011!q\u0013\u0006\u0004\u00053c\u0011A\u0002\u001fs_>$h(C\u0001\u001e\u0013\tYB$\u0003\u0002\u001a5%\u0019!1\u0015\r\u0002\u0007M\fH.\u0003\u0003\u0003(\n%\u0016a\u00029bG.\fw-\u001a\u0006\u0004\u0005GC\u0012\u0002\u0002BW\u0005_\u0013\u0011\u0002R1uC\u001a\u0013\u0018-\\3\u000b\t\t\u001d&\u0011\u0016\u0005\t\u0005g\u0013\u0019\t1\u0001\u00036\u00069A-\u0019;bg\u0016$\b\u0007\u0002B\\\u0005\u0007\u0004bA!/\u0003<\n}VB\u0001BU\u0013\u0011\u0011iL!+\u0003\u000f\u0011\u000bG/Y:fiB!!\u0011\u0019Bb\u0019\u0001!AB!2\u00032\u0006\u0005\t\u0011!B\u0001\u0005\u000f\u00141a\u0018\u00132#\u0011\u0011IMa4\u0011\u0007-\u0012Y-C\u0002\u0003N2\u0012qAT8uQ&tw\rE\u0002,\u0005#L1Aa5-\u0005\r\te.\u001f\u0005\b\u0005/\u0004A\u0011\u0002Bm\u00035\u0011W/\u001b7e\u001d\u0006\u001cuN\u001c4jOR\u0011!1\u001c\t\u0006W\tu'\u0011]\u0005\u0004\u0005?d#AB(qi&|g\u000e\u0005\u0003\u0003d\n%XB\u0001Bs\u0015\r\u00119\u000fB\u0001\nS:4WM]3oG\u0016LAAa;\u0003f\naa*\u0019$jY2\u001cuN\u001c4jO\"9!q\u001e\u0001\u0005B\tE\u0018a\u0006;sC:\u001chm\u001c:n'\u000eDW-\\1J]R,'O\\1m)\u0011\u0011\u0019Pa@\u0011\t\tU(1`\u0007\u0003\u0005oTAA!?\u0003*\u0006)A/\u001f9fg&!!Q B|\u0005)\u0019FO];diRK\b/\u001a\u0005\t\u0007\u0003\u0011i\u000f1\u0001\u0003t\u000611o\u00195f[\u0006Dqa!\u0002\u0001\t\u0003\u001a9!\u0001\u0003d_BLHc\u0001\u001d\u0004\n!A11BB\u0002\u0001\u0004\u0019i!A\u0003fqR\u0014\u0018\rE\u0002?\u0007\u001fI1a!\u0005@\u0005!\u0001\u0016M]1n\u001b\u0006\u0004xaBB\u000b\u0005!\u00051qC\u0001\u0019\t\u0006$\u0018mU1oSRL'0\u001a:Ue\u0006t7OZ8s[\u0016\u0014\bcA\b\u0004\u001a\u00191\u0011A\u0001E\u0001\u00077\u0019\u0002b!\u0007\u0004\u001e\r\r2\u0011\u0006\t\u0004W\r}\u0011bAB\u0011Y\t1\u0011I\\=SK\u001a\u0004BaEB\u0013q%\u00191q\u0005\u000b\u0003+\u0011+g-Y;miB\u000b'/Y7t%\u0016\fG-\u00192mKB\u00191fa\u000b\n\u0007\r5BF\u0001\u0007TKJL\u0017\r\\5{C\ndW\rC\u00047\u00073!\ta!\r\u0015\u0005\r]\u0001\u0002CB\u001b\u00073!\tea\u000e\u0002\t1|\u0017\r\u001a\u000b\u0004q\re\u0002bBB\u001e\u0007g\u0001\r!K\u0001\u0005a\u0006$\b\u000e\u0003\u0006\u0004@\re\u0011\u0011!C\u0005\u0007\u0003\n1B]3bIJ+7o\u001c7wKR\u001111\t\t\u0005\u0007\u000b\u001ay%\u0004\u0002\u0004H)!1\u0011JB&\u0003\u0011a\u0017M\\4\u000b\u0005\r5\u0013\u0001\u00026bm\u0006LAa!\u0015\u0004H\t1qJ\u00196fGR\u0004")
/* loaded from: input_file:com/databricks/labs/automl/pipeline/DataSanitizerTransformer.class */
public class DataSanitizerTransformer extends AbstractTransformer implements DefaultParamsWritable, HasLabelColumn, HasFeatureColumn {
    private final String uid;
    private final Param<String> numericFillStat;
    private final Param<String> characterFillStat;
    private final IntParam modelSelectionDistinctThreshold;
    private final DoubleParam filterPrecision;
    private final IntParam parallelism;
    private final BooleanParam naFillFlag;
    private final StringArrayParam categoricalColumnNames;
    private final StringArrayParam categoricalColumnValues;
    private final StringArrayParam numericColumnNames;
    private final DoubleArrayParam numericColumnValues;
    private final StringArrayParam booleanColumnNames;
    private final StringArrayParam booleanColumnValues;
    private final Param<String> decideModel;
    private final Param<String> fillMode;
    private final Param<String> characterNABlanketFill;
    private final DoubleParam numericNABlanketFill;
    private final StringArrayParam categoricalNAFillMapKeys;
    private final StringArrayParam categoricalNAFillMapValues;
    private final StringArrayParam numericNAFillMapKeys;
    private final DoubleArrayParam numericNAFillMapValues;
    private final Param<String> featureCol;
    private final Param<String> labelColumn;

    public static MLReader<DataSanitizerTransformer> read() {
        return DataSanitizerTransformer$.MODULE$.read();
    }

    public static DataSanitizerTransformer load(String str) {
        return DataSanitizerTransformer$.MODULE$.m413load(str);
    }

    @Override // com.databricks.labs.automl.pipeline.HasFeatureColumn
    public final Param<String> featureCol() {
        return this.featureCol;
    }

    @Override // com.databricks.labs.automl.pipeline.HasFeatureColumn
    public final void com$databricks$labs$automl$pipeline$HasFeatureColumn$_setter_$featureCol_$eq(Param param) {
        this.featureCol = param;
    }

    @Override // com.databricks.labs.automl.pipeline.HasFeatureColumn
    public HasFeatureColumn setFeatureCol(String str) {
        return HasFeatureColumn.Cclass.setFeatureCol(this, str);
    }

    @Override // com.databricks.labs.automl.pipeline.HasFeatureColumn
    public String getFeatureCol() {
        return HasFeatureColumn.Cclass.getFeatureCol(this);
    }

    @Override // com.databricks.labs.automl.pipeline.HasLabelColumn
    public final Param<String> labelColumn() {
        return this.labelColumn;
    }

    @Override // com.databricks.labs.automl.pipeline.HasLabelColumn
    public final void com$databricks$labs$automl$pipeline$HasLabelColumn$_setter_$labelColumn_$eq(Param param) {
        this.labelColumn = param;
    }

    @Override // com.databricks.labs.automl.pipeline.HasLabelColumn
    public HasLabelColumn setLabelColumn(String str) {
        return HasLabelColumn.Cclass.setLabelColumn(this, str);
    }

    @Override // com.databricks.labs.automl.pipeline.HasLabelColumn
    public String getLabelColumn() {
        return HasLabelColumn.Cclass.getLabelColumn(this);
    }

    public MLWriter write() {
        return DefaultParamsWritable.class.write(this);
    }

    public void save(String str) throws IOException {
        MLWritable.class.save(this, str);
    }

    public String uid() {
        return this.uid;
    }

    public final Param<String> numericFillStat() {
        return this.numericFillStat;
    }

    public final Param<String> characterFillStat() {
        return this.characterFillStat;
    }

    public final IntParam modelSelectionDistinctThreshold() {
        return this.modelSelectionDistinctThreshold;
    }

    public final DoubleParam filterPrecision() {
        return this.filterPrecision;
    }

    public final IntParam parallelism() {
        return this.parallelism;
    }

    public final BooleanParam naFillFlag() {
        return this.naFillFlag;
    }

    public final StringArrayParam categoricalColumnNames() {
        return this.categoricalColumnNames;
    }

    public final StringArrayParam categoricalColumnValues() {
        return this.categoricalColumnValues;
    }

    public final StringArrayParam numericColumnNames() {
        return this.numericColumnNames;
    }

    public final DoubleArrayParam numericColumnValues() {
        return this.numericColumnValues;
    }

    public final StringArrayParam booleanColumnNames() {
        return this.booleanColumnNames;
    }

    public final StringArrayParam booleanColumnValues() {
        return this.booleanColumnValues;
    }

    public final Param<String> decideModel() {
        return this.decideModel;
    }

    public final Param<String> fillMode() {
        return this.fillMode;
    }

    public final Param<String> characterNABlanketFill() {
        return this.characterNABlanketFill;
    }

    public final DoubleParam numericNABlanketFill() {
        return this.numericNABlanketFill;
    }

    public final StringArrayParam categoricalNAFillMapKeys() {
        return this.categoricalNAFillMapKeys;
    }

    public final StringArrayParam categoricalNAFillMapValues() {
        return this.categoricalNAFillMapValues;
    }

    public final StringArrayParam numericNAFillMapKeys() {
        return this.numericNAFillMapKeys;
    }

    public final DoubleArrayParam numericNAFillMapValues() {
        return this.numericNAFillMapValues;
    }

    public DataSanitizerTransformer setNumericFillStat(String str) {
        return (DataSanitizerTransformer) set(numericFillStat(), str);
    }

    public String getNumericFillStat() {
        return (String) $(numericFillStat());
    }

    public DataSanitizerTransformer setCharacterFillStat(String str) {
        return (DataSanitizerTransformer) set(characterFillStat(), str);
    }

    public String getCharacterFillStat() {
        return (String) $(characterFillStat());
    }

    public DataSanitizerTransformer setModelSelectionDistinctThreshold(int i) {
        return (DataSanitizerTransformer) set(modelSelectionDistinctThreshold(), BoxesRunTime.boxToInteger(i));
    }

    public int getModelSelectionDistinctThreshold() {
        return BoxesRunTime.unboxToInt($(modelSelectionDistinctThreshold()));
    }

    public DataSanitizerTransformer setFilterPrecision(double d) {
        return (DataSanitizerTransformer) set(filterPrecision(), BoxesRunTime.boxToDouble(d));
    }

    public double getFilterPrecision() {
        return BoxesRunTime.unboxToDouble($(filterPrecision()));
    }

    public DataSanitizerTransformer setParallelism(int i) {
        return (DataSanitizerTransformer) set(parallelism(), BoxesRunTime.boxToInteger(i));
    }

    public int getParallelism() {
        return BoxesRunTime.unboxToInt($(parallelism()));
    }

    public DataSanitizerTransformer setNaFillFlag(boolean z) {
        return (DataSanitizerTransformer) set(naFillFlag(), BoxesRunTime.boxToBoolean(z));
    }

    public boolean getNaFillFlag() {
        return BoxesRunTime.unboxToBoolean($(naFillFlag()));
    }

    public DataSanitizerTransformer setCategoricalColumnNames(String[] strArr) {
        return (DataSanitizerTransformer) set(categoricalColumnNames(), strArr);
    }

    public String[] getCategoricalColumnNames() {
        return (String[]) $(categoricalColumnNames());
    }

    public DataSanitizerTransformer setCategoricalColumnValues(String[] strArr) {
        return (DataSanitizerTransformer) set(categoricalColumnValues(), strArr);
    }

    public String[] getCategoricalColumnValues() {
        return (String[]) $(categoricalColumnValues());
    }

    public DataSanitizerTransformer setNumericColumnNames(String[] strArr) {
        return (DataSanitizerTransformer) set(numericColumnNames(), strArr);
    }

    public String[] getNumericColumnNames() {
        return (String[]) $(numericColumnNames());
    }

    public DataSanitizerTransformer setBooleanColumnNames(String[] strArr) {
        return (DataSanitizerTransformer) set(booleanColumnNames(), strArr);
    }

    public String[] getBooleanColumnNames() {
        return (String[]) $(booleanColumnNames());
    }

    public DataSanitizerTransformer setBooleanColumnValues(boolean[] zArr) {
        return (DataSanitizerTransformer) set(booleanColumnValues(), Predef$.MODULE$.booleanArrayOps(zArr).map(new DataSanitizerTransformer$$anonfun$setBooleanColumnValues$1(this), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class))));
    }

    public boolean[] getBooleanColumnValues() {
        return (boolean[]) Predef$.MODULE$.refArrayOps((Object[]) $(booleanColumnValues())).map(new DataSanitizerTransformer$$anonfun$getBooleanColumnValues$1(this), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Boolean()));
    }

    public DataSanitizerTransformer setNumericColumnValues(double[] dArr) {
        return (DataSanitizerTransformer) set(numericColumnValues(), dArr);
    }

    public double[] getNumericColumnValues() {
        return (double[]) $(numericColumnValues());
    }

    public DataSanitizerTransformer setDecideModel(String str) {
        return (DataSanitizerTransformer) set(decideModel(), str);
    }

    public String getDecideModel() {
        return (String) $(decideModel());
    }

    public DataSanitizerTransformer setFillMode(String str) {
        return (DataSanitizerTransformer) set(fillMode(), str);
    }

    public String getFillMode() {
        return (String) $(fillMode());
    }

    public DataSanitizerTransformer setCharacterNABlanketFill(String str) {
        return (DataSanitizerTransformer) set(characterNABlanketFill(), str);
    }

    public String getCharacterNABlanketFill() {
        return (String) $(characterNABlanketFill());
    }

    public DataSanitizerTransformer setNumericNABlanketFill(double d) {
        return (DataSanitizerTransformer) set(numericNABlanketFill(), BoxesRunTime.boxToDouble(d));
    }

    public double getNumericNABlanketFill() {
        return BoxesRunTime.unboxToDouble($(numericNABlanketFill()));
    }

    public DataSanitizerTransformer setCategoricalNAFillMapKeys(String[] strArr) {
        return (DataSanitizerTransformer) set(categoricalNAFillMapKeys(), strArr);
    }

    public String[] getCategoricalNAFillMapKeys() {
        return (String[]) $(categoricalNAFillMapKeys());
    }

    public DataSanitizerTransformer setCategoricalNAFillMapValues(String[] strArr) {
        return (DataSanitizerTransformer) set(categoricalNAFillMapValues(), strArr);
    }

    public String[] getCategoricalNAFillMapValues() {
        return (String[]) $(categoricalNAFillMapValues());
    }

    public DataSanitizerTransformer setNumericNAFillMapKeys(String[] strArr) {
        return (DataSanitizerTransformer) set(numericNAFillMapKeys(), strArr);
    }

    public String[] getNumericNAFillMapKeys() {
        return (String[]) $(numericNAFillMapKeys());
    }

    public DataSanitizerTransformer setNumericNAFillMapValues(double[] dArr) {
        return (DataSanitizerTransformer) set(numericNAFillMapValues(), dArr);
    }

    public double[] getNumericNAFillMapValues() {
        return (double[]) $(numericNAFillMapValues());
    }

    public DataSanitizerTransformer setCategoricalNAFillMap(Map<String, String> map) {
        setCategoricalNAFillMapKeys((String[]) map.keys().toArray(ClassTag$.MODULE$.apply(String.class)));
        return setCategoricalNAFillMapValues((String[]) map.values().toArray(ClassTag$.MODULE$.apply(String.class)));
    }

    public DataSanitizerTransformer setNumericNAFillMap(Map<String, Object> map) {
        setNumericNAFillMapKeys((String[]) map.keys().toArray(ClassTag$.MODULE$.apply(String.class)));
        return setNumericNAFillMapValues((double[]) map.values().toArray(ClassTag$.MODULE$.Double()));
    }

    @Override // com.databricks.labs.automl.pipeline.AbstractTransformer
    public Dataset<Row> transformInternal(Dataset<?> dataset) {
        Tuple3<Dataset<Row>, NaFillConfig, String> tuple3;
        DataSanitizer fieldsToIgnoreInVector = new DataSanitizer(dataset.toDF()).setLabelCol(getLabelColumn()).setFeatureCol(getFeatureCol()).setModelSelectionDistinctThreshold(getModelSelectionDistinctThreshold()).setNumericFillStat(getNumericFillStat()).setCharacterFillStat(getCharacterFillStat()).setParallelism(getParallelism()).setCategoricalNAFillMap(SchemaUtils$.MODULE$.generateMapFromKeysValues(getCategoricalNAFillMapKeys(), getCategoricalNAFillMapValues())).setCharacterNABlanketFillValue(getCharacterNABlanketFill()).setNumericNABlanketFillValue(getNumericNABlanketFill()).setNumericNAFillMap(SchemaUtils$.MODULE$.generateMapFromKeysValues(getNumericNAFillMapKeys(), getNumericNAFillMapValues())).setNAFillMode(getFillMode()).setFilterPrecision(getFilterPrecision()).setFieldsToIgnoreInVector(new String[]{getAutomlInternalId()});
        if (getNaFillFlag()) {
            Option<NaFillConfig> buildNaConfig = buildNaConfig();
            tuple3 = buildNaConfig.isDefined() ? fieldsToIgnoreInVector.generateCleanData((NaFillConfig) buildNaConfig.get(), false, getDecideModel()) : fieldsToIgnoreInVector.generateCleanData(fieldsToIgnoreInVector.generateCleanData$default$1(), false, getDecideModel());
        } else {
            tuple3 = new Tuple3<>(dataset, new NaFillConfig(Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(""), "")})), Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(""), BoxesRunTime.boxToDouble(0.0d))})), Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(""), BoxesRunTime.boxToBoolean(false))}))), fieldsToIgnoreInVector.decideModel());
        }
        Tuple3<Dataset<Row>, NaFillConfig, String> tuple32 = tuple3;
        if (tuple32 == null) {
            throw new MatchError(tuple32);
        }
        Tuple3 tuple33 = new Tuple3((Dataset) tuple32._1(), (NaFillConfig) tuple32._2(), (String) tuple32._3());
        Dataset dataset2 = (Dataset) tuple33._1();
        NaFillConfig naFillConfig = (NaFillConfig) tuple33._2();
        String str = (String) tuple33._3();
        if (getDecideModel() == null || getDecideModel().isEmpty()) {
            setCategoricalColumnNames((String[]) naFillConfig.categoricalColumns().keys().toArray(ClassTag$.MODULE$.apply(String.class)));
            setCategoricalColumnValues((String[]) naFillConfig.categoricalColumns().values().toArray(ClassTag$.MODULE$.apply(String.class)));
            setNumericColumnNames((String[]) naFillConfig.numericColumns().keys().toArray(ClassTag$.MODULE$.apply(String.class)));
            setNumericColumnValues((double[]) naFillConfig.numericColumns().values().toArray(ClassTag$.MODULE$.Double()));
            setDecideModel(str);
        } else {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        }
        return dataset2.toDF().filter(functions$.MODULE$.col((String) $(labelColumn())).isNotNull()).filter(functions$.MODULE$.col((String) $(labelColumn())).isNaN().unary_$bang());
    }

    private Option<NaFillConfig> buildNaConfig() {
        return (SchemaUtils$.MODULE$.isNotEmpty(getCategoricalColumnNames()) && SchemaUtils$.MODULE$.isNotEmpty(getNumericColumnNames())) ? new Some(new NaFillConfig(SchemaUtils$.MODULE$.generateMapFromKeysValues(getCategoricalColumnNames(), getCategoricalColumnValues()), SchemaUtils$.MODULE$.generateMapFromKeysValues(getNumericColumnNames(), getNumericColumnValues()), SchemaUtils$.MODULE$.generateMapFromKeysValues(getBooleanColumnNames(), getBooleanColumnValues()))) : None$.MODULE$;
    }

    @Override // com.databricks.labs.automl.pipeline.AbstractTransformer
    public StructType transformSchemaInternal(StructType structType) {
        return structType;
    }

    /* renamed from: copy, reason: merged with bridge method [inline-methods] and merged with bridge method [inline-methods] and merged with bridge method [inline-methods] */
    public DataSanitizerTransformer m411copy(ParamMap paramMap) {
        return (DataSanitizerTransformer) defaultCopy(paramMap);
    }

    public DataSanitizerTransformer(String str) {
        this.uid = str;
        MLWritable.class.$init$(this);
        DefaultParamsWritable.class.$init$(this);
        com$databricks$labs$automl$pipeline$HasLabelColumn$_setter_$labelColumn_$eq(new Param(this, "labelColumn", "Label Column Name"));
        com$databricks$labs$automl$pipeline$HasFeatureColumn$_setter_$featureCol_$eq(new Param(this, "featureCol", "Feature Column Name"));
        this.numericFillStat = new Param<>(this, "numericFillStat", "Numeric fill stats");
        this.characterFillStat = new Param<>(this, "characterFillStat", "Character fill stat");
        this.modelSelectionDistinctThreshold = new IntParam(this, "modelSelectionDistinctThreshold", "model selection distinct threshold");
        this.filterPrecision = new DoubleParam(this, "filterPrecision", "Filter precision");
        this.parallelism = new IntParam(this, "parallelism", "filter parallelism");
        this.naFillFlag = new BooleanParam(this, "naFillFlag", "Na Fill flag");
        this.categoricalColumnNames = new StringArrayParam(this, "categoricalColumnNames", "Categorical Columns");
        this.categoricalColumnValues = new StringArrayParam(this, "categoricalColumnValues", "Categorical Columns' Values");
        this.numericColumnNames = new StringArrayParam(this, "numericColumnNames", "Numeric Columns");
        this.numericColumnValues = new DoubleArrayParam(this, "numericColumnValues", "Numeric Columns' Values");
        this.booleanColumnNames = new StringArrayParam(this, "booleanColumnNames", "Boolean Columns");
        this.booleanColumnValues = new StringArrayParam(this, "booleanColumnValues", "Boolean Columns' Values");
        this.decideModel = new Param<>(this, "decideModel", "Decided model");
        this.fillMode = new Param<>(this, "fillMode", "fillMode");
        this.characterNABlanketFill = new Param<>(this, "characterNABlanketFill", "characterNABlanketFill");
        this.numericNABlanketFill = new DoubleParam(this, "numericNABlanketFill", "numericNABlanketFill");
        this.categoricalNAFillMapKeys = new StringArrayParam(this, "categoricalNAFillMapKeys", "categoricalNAFillMapKeys");
        this.categoricalNAFillMapValues = new StringArrayParam(this, "categoricalNAFillMapValues", "categoricalNAFillMapValues");
        this.numericNAFillMapKeys = new StringArrayParam(this, "numericNAFillMapKeys", "numericNAFillMapKeys");
        this.numericNAFillMapValues = new DoubleArrayParam(this, "numericNAFillMapValues", "numericNAFillMapValues");
    }

    public DataSanitizerTransformer() {
        this(Identifiable$.MODULE$.randomUID("DataSanitizerTransformer"));
        setAutomlInternalId(AutoMlPipelineMlFlowUtils$.MODULE$.AUTOML_INTERNAL_ID_COL());
        setFeatureCol("features");
        setNumericFillStat("mean");
        setCharacterFillStat("max");
        setModelSelectionDistinctThreshold(10);
        setFilterPrecision(0.01d);
        setParallelism(20);
        setNaFillFlag(false);
        setDecideModel("");
        setCategoricalColumnNames((String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class)));
        setNumericColumnValues((double[]) Array$.MODULE$.empty(ClassTag$.MODULE$.Double()));
        setNumericColumnNames((String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class)));
        setNumericColumnValues((double[]) Array$.MODULE$.empty(ClassTag$.MODULE$.Double()));
        setBooleanColumnNames((String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class)));
        setBooleanColumnValues((boolean[]) Array$.MODULE$.empty(ClassTag$.MODULE$.Boolean()));
        setCategoricalNAFillMapKeys((String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class)));
        setCategoricalNAFillMapValues((String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class)));
        setNumericNAFillMapKeys((String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class)));
        setNumericNAFillMapValues((double[]) Array$.MODULE$.empty(ClassTag$.MODULE$.Double()));
        setCharacterNABlanketFill("");
        setNumericNABlanketFill(0.0d);
        setFillMode("auto");
        setDebugEnabled(false);
    }
}
