package com.databricks.labs.automl.sanitize;

import com.databricks.labs.automl.params.PearsonPayload;
import com.databricks.labs.automl.sanitize.SanitizerDefaults;
import com.databricks.labs.automl.utils.DataValidation;
import com.databricks.labs.automl.utils.ValidatedCategoricalFields;
import org.apache.log4j.Logger;
import org.apache.spark.ml.feature.OneHotEncoderEstimator;
import org.apache.spark.ml.feature.StringIndexer;
import org.apache.spark.ml.feature.VectorAssembler;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.ml.stat.ChiSquareTest$;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.DoubleType$;
import org.apache.spark.sql.types.StructField;
import scala.Array$;
import scala.MatchError;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.StringContext;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ListBuffer;
import scala.collection.mutable.StringBuilder;
import scala.collection.parallel.ForkJoinTaskSupport;
import scala.collection.parallel.mutable.ParArray;
import scala.concurrent.forkjoin.ForkJoinPool;
import scala.math.package$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

/* compiled from: PearsonFiltering.scala */
@ScalaSignature(bytes = "\u0006\u0001\r\u001db\u0001B\u0001\u0003\u00015\u0011\u0001\u0003U3beN|gNR5mi\u0016\u0014\u0018N\\4\u000b\u0005\r!\u0011\u0001C:b]&$\u0018N_3\u000b\u0005\u00151\u0011AB1vi>lGN\u0003\u0002\b\u0011\u0005!A.\u00192t\u0015\tI!\"\u0001\u0006eCR\f'M]5dWNT\u0011aC\u0001\u0004G>l7\u0001A\n\u0005\u00019!\"\u0004\u0005\u0002\u0010%5\t\u0001CC\u0001\u0012\u0003\u0015\u00198-\u00197b\u0013\t\u0019\u0002C\u0001\u0004B]f\u0014VM\u001a\t\u0003+ai\u0011A\u0006\u0006\u0003/\u0011\tQ!\u001e;jYNL!!\u0007\f\u0003\u001d\u0011\u000bG/\u0019,bY&$\u0017\r^5p]B\u00111\u0004H\u0007\u0002\u0005%\u0011QD\u0001\u0002\u0012'\u0006t\u0017\u000e^5{KJ$UMZ1vYR\u001c\b\u0002C\u0010\u0001\u0005\u0003\u0005\u000b\u0011\u0002\u0011\u0002\u0005\u00114\u0007CA\u00118\u001d\t\u0011CG\u0004\u0002$c9\u0011AE\f\b\u0003K-r!AJ\u0015\u000e\u0003\u001dR!\u0001\u000b\u0007\u0002\rq\u0012xn\u001c;?\u0013\u0005Q\u0013aA8sO&\u0011A&L\u0001\u0007CB\f7\r[3\u000b\u0003)J!a\f\u0019\u0002\u000bM\u0004\u0018M]6\u000b\u00051j\u0013B\u0001\u001a4\u0003\r\u0019\u0018\u000f\u001c\u0006\u0003_AJ!!\u000e\u001c\u0002\u000fA\f7m[1hK*\u0011!gM\u0005\u0003qe\u0012\u0011\u0002R1uC\u001a\u0013\u0018-\\3\u000b\u0005U2\u0004\u0002C\u001e\u0001\u0005\u0003\u0005\u000b\u0011\u0002\u001f\u0002)\u0019,\u0017\r^;sK\u000e{G.^7o\u0019&\u001cH/\u001b8h!\ryQhP\u0005\u0003}A\u0011Q!\u0011:sCf\u0004\"\u0001Q\"\u000f\u0005=\t\u0015B\u0001\"\u0011\u0003\u0019\u0001&/\u001a3fM&\u0011A)\u0012\u0002\u0007'R\u0014\u0018N\\4\u000b\u0005\t\u0003\u0002\u0002C$\u0001\u0005\u0003\u0005\u000b\u0011B \u0002\u00135|G-\u001a7UsB,\u0007\"B%\u0001\t\u0003Q\u0015A\u0002\u001fj]&$h\b\u0006\u0003L\u00196s\u0005CA\u000e\u0001\u0011\u0015y\u0002\n1\u0001!\u0011\u0015Y\u0004\n1\u0001=\u0011\u00159\u0005\n1\u0001@\u0011\u001d\u0001\u0006A1A\u0005\u000eE\u000bq\u0001\u0015*P\tV\u001bE+F\u0001S\u001f\u0005\u0019\u0016%\u0001+\u0002\u000fA\u0014x\u000eZ;di\"1a\u000b\u0001Q\u0001\u000eI\u000b\u0001\u0002\u0015*P\tV\u001bE\u000b\t\u0005\b1\u0002\u0011\r\u0011\"\u0004Z\u0003%\u0019uJV0W\u00032+V)F\u0001[\u001f\u0005Y\u0016%\u0001/\u0002\u001f\r|goX2bY\u000e,H.\u0019;j_:DaA\u0018\u0001!\u0002\u001bQ\u0016AC\"P-~3\u0016\tT+FA!9\u0001\r\u0001b\u0001\n\u001b\t\u0017!\u0003#F-&\u000bE+S(O+\u0005\u0011w\"A2\"\u0003\u0011\f!b\u00183fm&\fG/[8o\u0011\u00191\u0007\u0001)A\u0007E\u0006QA)\u0012,J\u0003RKuJ\u0014\u0011\t\u000f!\u0004!\u0019!C\u0007S\u000691+U+B%\u0016#U#\u00016\u0010\u0003-\f\u0013\u0001\\\u0001\t?N\fX/\u0019:fI\"1a\u000e\u0001Q\u0001\u000e)\f\u0001bU)V\u0003J+E\t\t\u0005\ba\u0002\u0001\r\u0011\"\u0003r\u0003%yF.\u00192fY\u000e{G.F\u0001@\u0011\u001d\u0019\b\u00011A\u0005\nQ\fQb\u00187bE\u0016d7i\u001c7`I\u0015\fHCA;y!\tya/\u0003\u0002x!\t!QK\\5u\u0011\u001dI(/!AA\u0002}\n1\u0001\u001f\u00132\u0011\u0019Y\b\u0001)Q\u0005\u007f\u0005Qq\f\\1cK2\u001cu\u000e\u001c\u0011\t\u000fu\u0004\u0001\u0019!C\u0005c\u0006aqLZ3biV\u0014Xm]\"pY\"Aq\u0010\u0001a\u0001\n\u0013\t\t!\u0001\t`M\u0016\fG/\u001e:fg\u000e{Gn\u0018\u0013fcR\u0019Q/a\u0001\t\u000fet\u0018\u0011!a\u0001\u007f!9\u0011q\u0001\u0001!B\u0013y\u0014!D0gK\u0006$XO]3t\u0007>d\u0007\u0005\u0003\u0005\u0002\f\u0001\u0001\r\u0011\"\u0003r\u0003Ayf-\u001b7uKJ\u001cF/\u0019;jgRL7\rC\u0005\u0002\u0010\u0001\u0001\r\u0011\"\u0003\u0002\u0012\u0005!rLZ5mi\u0016\u00148\u000b^1uSN$\u0018nY0%KF$2!^A\n\u0011!I\u0018QBA\u0001\u0002\u0004y\u0004bBA\f\u0001\u0001\u0006KaP\u0001\u0012?\u001aLG\u000e^3s'R\fG/[:uS\u000e\u0004\u0003\u0002CA\u000e\u0001\u0001\u0007I\u0011B9\u0002!}3\u0017\u000e\u001c;fe\u0012K'/Z2uS>t\u0007\"CA\u0010\u0001\u0001\u0007I\u0011BA\u0011\u0003Qyf-\u001b7uKJ$\u0015N]3di&|gn\u0018\u0013fcR\u0019Q/a\t\t\u0011e\fi\"!AA\u0002}Bq!a\n\u0001A\u0003&q(A\t`M&dG/\u001a:ESJ,7\r^5p]\u0002B\u0011\"a\u000b\u0001\u0001\u0004%I!!\f\u0002%}3\u0017\u000e\u001c;fe6\u000bg.^1m-\u0006dW/Z\u000b\u0003\u0003_\u00012aDA\u0019\u0013\r\t\u0019\u0004\u0005\u0002\u0007\t>,(\r\\3\t\u0013\u0005]\u0002\u00011A\u0005\n\u0005e\u0012AF0gS2$XM]'b]V\fGNV1mk\u0016|F%Z9\u0015\u0007U\fY\u0004C\u0005z\u0003k\t\t\u00111\u0001\u00020!A\u0011q\b\u0001!B\u0013\ty#A\n`M&dG/\u001a:NC:,\u0018\r\u001c,bYV,\u0007\u0005\u0003\u0005\u0002D\u0001\u0001\r\u0011\"\u0003r\u0003-yf-\u001b7uKJlu\u000eZ3\t\u0013\u0005\u001d\u0003\u00011A\u0005\n\u0005%\u0013aD0gS2$XM]'pI\u0016|F%Z9\u0015\u0007U\fY\u0005\u0003\u0005z\u0003\u000b\n\t\u00111\u0001@\u0011\u001d\ty\u0005\u0001Q!\n}\nAb\u00184jYR,'/T8eK\u0002B\u0011\"a\u0015\u0001\u0001\u0004%I!!\f\u0002!}\u000bW\u000f^8GS2$XM\u001d(US2,\u0007\"CA,\u0001\u0001\u0007I\u0011BA-\u0003Qy\u0016-\u001e;p\r&dG/\u001a:O)&dWm\u0018\u0013fcR\u0019Q/a\u0017\t\u0013e\f)&!AA\u0002\u0005=\u0002\u0002CA0\u0001\u0001\u0006K!a\f\u0002#}\u000bW\u000f^8GS2$XM\u001d(US2,\u0007\u0005C\u0005\u0002d\u0001\u0001\r\u0011\"\u0003\u0002f\u0005aq\f]1sC2dW\r\\5t[V\u0011\u0011q\r\t\u0004\u001f\u0005%\u0014bAA6!\t\u0019\u0011J\u001c;\t\u0013\u0005=\u0004\u00011A\u0005\n\u0005E\u0014\u0001E0qCJ\fG\u000e\\3mSNlw\fJ3r)\r)\u00181\u000f\u0005\ns\u00065\u0014\u0011!a\u0001\u0003OB\u0001\"a\u001e\u0001A\u0003&\u0011qM\u0001\u000e?B\f'/\u00197mK2L7/\u001c\u0011\t\u0013\u0005m\u0004A1A\u0005\u000e\u0005u\u0014aD0eCR\fg)[3mI:\u000bW.Z:\u0016\u0003qBq!!!\u0001A\u00035A(\u0001\t`I\u0006$\u0018MR5fY\u0012t\u0015-\\3tA!I\u0011Q\u0011\u0001C\u0002\u00135\u0011qQ\u0001\u0010?\u0012\fG/\u0019$jK2$G+\u001f9fgV\u0011\u0011\u0011\u0012\t\u0005\u001fu\nY\t\u0005\u0003\u0002\u000e\u0006MUBAAH\u0015\r\t\tJN\u0001\u0006if\u0004Xm]\u0005\u0005\u0003+\u000byIA\u0006TiJ,8\r\u001e$jK2$\u0007\u0002CAM\u0001\u0001\u0006i!!#\u0002!}#\u0017\r^1GS\u0016dG\rV=qKN\u0004\u0003bBAO\u0001\u0011\u0005\u0011qT\u0001\fg\u0016$H*\u00192fY\u000e{G\u000e\u0006\u0003\u0002\"\u0006\rV\"\u0001\u0001\t\u000f\u0005\u0015\u00161\u0014a\u0001\u007f\u0005)a/\u00197vK\"9\u0011\u0011\u0016\u0001\u0005\u0002\u0005-\u0016AD:fi\u001a+\u0017\r^;sKN\u001cu\u000e\u001c\u000b\u0005\u0003C\u000bi\u000bC\u0004\u0002&\u0006\u001d\u0006\u0019A \t\u000f\u0005E\u0006\u0001\"\u0001\u00024\u0006\u00112/\u001a;GS2$XM]*uCRL7\u000f^5d)\u0011\t\t+!.\t\u000f\u0005\u0015\u0016q\u0016a\u0001\u007f!9\u0011\u0011\u0018\u0001\u0005\u0002\u0005m\u0016AE:fi\u001aKG\u000e^3s\t&\u0014Xm\u0019;j_:$B!!)\u0002>\"9\u0011QUA\\\u0001\u0004y\u0004bBAa\u0001\u0011\u0005\u00111Y\u0001\u0015g\u0016$h)\u001b7uKJl\u0015M\\;bYZ\u000bG.^3\u0015\t\u0005\u0005\u0016Q\u0019\u0005\t\u0003K\u000by\f1\u0001\u00020!9\u0011\u0011\u0019\u0001\u0005\u0002\u0005%G\u0003BAQ\u0003\u0017D\u0001\"!*\u0002H\u0002\u0007\u0011q\r\u0005\b\u0003\u001f\u0004A\u0011AAi\u00035\u0019X\r\u001e$jYR,'/T8eKR!\u0011\u0011UAj\u0011\u001d\t)+!4A\u0002}Bq!a6\u0001\t\u0003\tI.\u0001\ntKR\fU\u000f^8GS2$XM\u001d(US2,G\u0003BAQ\u00037D\u0001\"!*\u0002V\u0002\u0007\u0011q\u0006\u0005\b\u0003?\u0004A\u0011AAq\u00039\u0019X\r\u001e)be\u0006dG.\u001a7jg6$B!!)\u0002d\"A\u0011QUAo\u0001\u0004\t9\u0007\u0003\u0004\u0002h\u0002!\t!]\u0001\fO\u0016$H*\u00192fY\u000e{G\u000e\u0003\u0004\u0002l\u0002!\t!]\u0001\u000fO\u0016$h)Z1ukJ,7oQ8m\u0011\u0019\ty\u000f\u0001C\u0001c\u0006\u0011r-\u001a;GS2$XM]*uCRL7\u000f^5d\u0011\u0019\t\u0019\u0010\u0001C\u0001c\u0006\u0011r-\u001a;GS2$XM\u001d#je\u0016\u001cG/[8o\u0011\u001d\t9\u0010\u0001C\u0001\u0003[\tAcZ3u\r&dG/\u001a:NC:,\u0018\r\u001c,bYV,\u0007BBA~\u0001\u0011\u0005\u0011/A\u0007hKR4\u0015\u000e\u001c;fe6{G-\u001a\u0005\b\u0003\u007f\u0004A\u0011AA\u0017\u0003I9W\r^!vi>4\u0015\u000e\u001c;fe:#\u0016\u000e\\3\t\u000f\t\r\u0001\u0001\"\u0001\u0002f\u0005qq-\u001a;QCJ\fG\u000e\\3mSNl\u0007\"\u0003B\u0004\u0001\u0001\u0007I\u0011BA?\u0003Qy\u0006/Z1sg>tg+Z2u_J4\u0015.\u001a7eg\"I!1\u0002\u0001A\u0002\u0013%!QB\u0001\u0019?B,\u0017M]:p]Z+7\r^8s\r&,G\u000eZ:`I\u0015\fHcA;\u0003\u0010!A\u0011P!\u0003\u0002\u0002\u0003\u0007A\bC\u0004\u0003\u0014\u0001\u0001\u000b\u0015\u0002\u001f\u0002+}\u0003X-\u0019:t_:4Vm\u0019;pe\u001aKW\r\u001c3tA!I!q\u0003\u0001A\u0002\u0013%\u0011QP\u0001\u001d?B,\u0017M]:p]:{gnQ1uK\u001e|'/[2bY\u001aKW\r\u001c3t\u0011%\u0011Y\u0002\u0001a\u0001\n\u0013\u0011i\"\u0001\u0011`a\u0016\f'o]8o\u001d>t7)\u0019;fO>\u0014\u0018nY1m\r&,G\u000eZ:`I\u0015\fHcA;\u0003 !A\u0011P!\u0007\u0002\u0002\u0003\u0007A\bC\u0004\u0003$\u0001\u0001\u000b\u0015\u0002\u001f\u0002;}\u0003X-\u0019:t_:tuN\\\"bi\u0016<wN]5dC24\u0015.\u001a7eg\u0002BqAa\n\u0001\t\u0013\u0011I#\u0001\u0010tKR\u0004V-\u0019:t_:tuN\\\"bi\u0016<wN]5dC24\u0015.\u001a7egR!\u0011\u0011\u0015B\u0016\u0011\u001d\t)K!\nA\u0002qBqAa\f\u0001\t\u0013\u0011\t$\u0001\ftKR\u0004V-\u0019:t_:4Vm\u0019;pe\u001aKW\r\u001c3t)\u0011\t\tKa\r\t\u000f\u0005\u0015&Q\u0006a\u0001y!9!q\u0007\u0001\u0005\n\te\u0012A\u00032vS2$7\t[5TcR1!1\bB,\u00057\u0002bA!\u0010\u0003F\t-c\u0002\u0002B \u0005\u0007r1A\nB!\u0013\u0005\t\u0012BA\u001b\u0011\u0013\u0011\u00119E!\u0013\u0003\t1K7\u000f\u001e\u0006\u0003kA\u0001BA!\u0014\u0003T5\u0011!q\n\u0006\u0004\u0005#\"\u0011A\u00029be\u0006l7/\u0003\u0003\u0003V\t=#A\u0004)fCJ\u001cxN\u001c)bs2|\u0017\r\u001a\u0005\b\u00053\u0012)\u00041\u0001!\u0003\u0011!\u0017\r^1\t\u000f\tu#Q\u0007a\u0001\u007f\u0005ia-Z1ukJ,7i\u001c7v[:DqA!\u0019\u0001\t\u0013\u0011\u0019'\u0001\nbGF,\u0018N]3DCJ$\u0017N\\1mSRLH\u0003\u0002B3\u0005W\u00022a\u0004B4\u0013\r\u0011I\u0007\u0005\u0002\u0005\u0019>tw\rC\u0004\u0003n\t}\u0003\u0019A \u0002\r\r|G.^7o\u0011\u001d\u0011\t\b\u0001C\u0005\u0005g\n1CZ3biV\u0014Xm]\"be\u0012Lg.\u00197jif$\"A!\u001e\u0011\t=i$q\u000f\t\u0007\u001f\tetH!\u001a\n\u0007\tm\u0004C\u0001\u0004UkBdWM\r\u0005\b\u0005\u007f\u0002A\u0011\u0002BA\u0003I\u0011Xm\u001d;sS\u000e$h)Z1ukJ,7+\u001a;\u0015\u0005\u0005\u0005\u0006b\u0002BC\u0001\u0011%!qQ\u0001\fe\u00164Vm\u0019;pe&TX\rF\u0001!\u0011\u001d\u0011Y\t\u0001C\u0005\u0005\u001b\u000b1BZ5mi\u0016\u00148\t[5TcR1!q\u0012BI\u0005+\u0003RA!\u0010\u0003F}B\u0001Ba%\u0003\n\u0002\u0007!1H\u0001\fgR\fG\u000fU1zY>\fG\r\u0003\u0005\u0003\u0018\n%\u0005\u0019AA\u0018\u0003-1\u0017\u000e\u001c;feZ\u000bG.^3\t\u000f\tm\u0005\u0001\"\u0003\u0003\u001e\u0006\t\u0012/^1oi&dWmR3oKJ\fGo\u001c:\u0015\t\u0005=\"q\u0014\u0005\t\u0005C\u0013I\n1\u0001\u0003<\u0005q\u0001/Z1sg>t'+Z:vYR\u001c\bb\u0002BS\u0001\u0011%!qU\u0001\u0011M&dG/\u001a:DY\u0006\u001c8/\u001b4jKJ$2\u0001\tBU\u0011%\u0011YKa)\u0011\u0002\u0003\u0007A(\u0001\u0007jO:|'/\u001a$jK2$7\u000fC\u0004\u00030\u0002!\tA!-\u0002\u0019\u0019LG\u000e^3s\r&,G\u000eZ:\u0015\u0007\u0001\u0012\u0019\fC\u0005\u0003,\n5\u0006\u0013!a\u0001y!9!q\u0017\u0001\u0005\n\te\u0016!\u0006:fOJ,7o]8s\u001b\u0006tW/\u00197GS2$XM\u001d\u000b\u0004y\tm\u0006\u0002\u0003B_\u0005k\u0003\rAa0\u0002\u001f\r|'O]3mCRLwN\u001c#bi\u0006\u0004b\u0001\u0011Ba\u007f\t\u0015\u0017b\u0001Bb\u000b\n\u0019Q*\u00199\u0011\u000f=\u0011I(a\f\u00020!9!\u0011\u001a\u0001\u0005\n\t-\u0017\u0001\u0006:fOJ,7o]5p]\u0006+Ho\u001c$jYR,'\u000fF\u0002=\u0005\u001bD\u0001B!0\u0003H\u0002\u0007!q\u0018\u0005\b\u0005#\u0004A\u0011\u0002Bj\u0003=1\u0017\u000e\u001c;feJ+wM]3tg>\u0014Hc\u0001\u0011\u0003V\"I!1\u0016Bh!\u0003\u0005\r\u0001\u0010\u0005\b\u00053\u0004A\u0011\u0002Bn\u0003u\u0019\u0017\r\\2vY\u0006$XMU3he\u0016\u001c8/[8o\u0007>4\u0018M]5b]\u000e,G\u0003\u0002Bo\u0005W\u0004rAa8\u0003j~\u0012)-\u0004\u0002\u0003b*!!1\u001dBs\u0003%IW.\\;uC\ndWMC\u0002\u0003hB\t!bY8mY\u0016\u001cG/[8o\u0013\u0011\u0011\u0019M!9\t\u0013\t-&q\u001bI\u0001\u0002\u0004a\u0004b\u0002Bx\u0001\u0011%!\u0011_\u0001\u0016G>4\u0018M]5b]\u000e,7)\u00197dk2\fG/[8o)!\u0011yLa=\u0003x\nu\bb\u0002B{\u0005[\u0004\raP\u0001\u0006M&,G\u000e\u001a\u0005\t\u0005s\u0014i\u000f1\u0001\u0003|\u00061\u0011M^4NCB\u0004b\u0001\u0011Ba\u007f\u0005=\u0002\u0002\u0003B��\u0005[\u0004\r!a\f\u0002\u0011I|woQ8v]RD\u0011ba\u0001\u0001#\u0003%\ta!\u0002\u0002-\u0019LG\u000e^3s\r&,G\u000eZ:%I\u00164\u0017-\u001e7uIE*\"aa\u0002+\u0007q\u001aIa\u000b\u0002\u0004\fA!1QBB\f\u001b\t\u0019yA\u0003\u0003\u0004\u0012\rM\u0011!C;oG\",7m[3e\u0015\r\u0019)\u0002E\u0001\u000bC:tw\u000e^1uS>t\u0017\u0002BB\r\u0007\u001f\u0011\u0011#\u001e8dQ\u0016\u001c7.\u001a3WCJL\u0017M\\2f\u0011%\u0019i\u0002AI\u0001\n\u0013\u0019)!\u0001\u000egS2$XM]\"mCN\u001c\u0018NZ5fe\u0012\"WMZ1vYR$\u0013\u0007C\u0005\u0004\"\u0001\t\n\u0011\"\u0003\u0004\u0006\u0005Ib-\u001b7uKJ\u0014Vm\u001a:fgN|'\u000f\n3fM\u0006,H\u000e\u001e\u00132\u0011%\u0019)\u0003AI\u0001\n\u0013\u0019)!A\u0014dC2\u001cW\u000f\\1uKJ+wM]3tg&|gnQ8wCJL\u0017M\\2fI\u0011,g-Y;mi\u0012\n\u0004")
/* loaded from: input_file:com/databricks/labs/automl/sanitize/PearsonFiltering.class */
public class PearsonFiltering implements DataValidation, SanitizerDefaults {
    private final Dataset<Row> df;
    private final String[] featureColumnListing;
    private final String modelType;
    private final String PRODUCT;
    private final String COV_VALUE;
    private final String DEVIATION;
    private final String SQUARED;
    private String com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol;
    private String _featuresCol;
    private String com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterStatistic;
    private String _filterDirection;
    private double com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterManualValue;
    private String _filterMode;
    private double com$databricks$labs$automl$sanitize$PearsonFiltering$$_autoFilterNTile;
    private int _parallelism;
    private final String[] _dataFieldNames;
    private final StructField[] _dataFieldTypes;
    private String[] com$databricks$labs$automl$sanitize$PearsonFiltering$$_pearsonVectorFields;
    private String[] _pearsonNonCategoricalFields;
    private final String[] _allowedStats;
    private final String[] _allowedFilterDirections;
    private final String[] _allowedFilterModes;
    private final String[] allowableScalers;
    private final transient Logger com$databricks$labs$automl$utils$DataValidation$$logger;
    private volatile transient boolean bitmap$trans$0;

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public final String[] _allowedStats() {
        return this._allowedStats;
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public final String[] _allowedFilterDirections() {
        return this._allowedFilterDirections;
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public final String[] _allowedFilterModes() {
        return this._allowedFilterModes;
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public final String[] allowableScalers() {
        return this.allowableScalers;
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public final void com$databricks$labs$automl$sanitize$SanitizerDefaults$_setter_$_allowedStats_$eq(String[] strArr) {
        this._allowedStats = strArr;
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public final void com$databricks$labs$automl$sanitize$SanitizerDefaults$_setter_$_allowedFilterDirections_$eq(String[] strArr) {
        this._allowedFilterDirections = strArr;
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public final void com$databricks$labs$automl$sanitize$SanitizerDefaults$_setter_$_allowedFilterModes_$eq(String[] strArr) {
        this._allowedFilterModes = strArr;
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public final void com$databricks$labs$automl$sanitize$SanitizerDefaults$_setter_$allowableScalers_$eq(String[] strArr) {
        this.allowableScalers = strArr;
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public String defaultLabelCol() {
        return SanitizerDefaults.Cclass.defaultLabelCol(this);
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public String defaultFeaturesCol() {
        return SanitizerDefaults.Cclass.defaultFeaturesCol(this);
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public String defaultPearsonFilterStatistic() {
        return SanitizerDefaults.Cclass.defaultPearsonFilterStatistic(this);
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public String defaultPearsonFilterDirection() {
        return SanitizerDefaults.Cclass.defaultPearsonFilterDirection(this);
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public double defaultPearsonFilterManualValue() {
        return SanitizerDefaults.Cclass.defaultPearsonFilterManualValue(this);
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public String defaultPearsonFilterMode() {
        return SanitizerDefaults.Cclass.defaultPearsonFilterMode(this);
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public double defaultPearsonAutoFilterNTile() {
        return SanitizerDefaults.Cclass.defaultPearsonAutoFilterNTile(this);
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public String defaultRenamedFeaturesCol() {
        return SanitizerDefaults.Cclass.defaultRenamedFeaturesCol(this);
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public String defaultScalerType() {
        return SanitizerDefaults.Cclass.defaultScalerType(this);
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public double defaultScalerMin() {
        return SanitizerDefaults.Cclass.defaultScalerMin(this);
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public double defaultScalerMax() {
        return SanitizerDefaults.Cclass.defaultScalerMax(this);
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public boolean defaultStandardScalerMeanFlag() {
        return SanitizerDefaults.Cclass.defaultStandardScalerMeanFlag(this);
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public boolean defaultStandardScalerStdDevFlag() {
        return SanitizerDefaults.Cclass.defaultStandardScalerStdDevFlag(this);
    }

    @Override // com.databricks.labs.automl.sanitize.SanitizerDefaults
    public double defaultPNorm() {
        return SanitizerDefaults.Cclass.defaultPNorm(this);
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v5 */
    private Logger com$databricks$labs$automl$utils$DataValidation$$logger$lzycompute() {
        Logger logger;
        ?? r0 = this;
        synchronized (r0) {
            if (!this.bitmap$trans$0) {
                logger = Logger.getLogger(getClass());
                this.com$databricks$labs$automl$utils$DataValidation$$logger = logger;
                this.bitmap$trans$0 = true;
            }
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
            r0 = r0;
            return this.com$databricks$labs$automl$utils$DataValidation$$logger;
        }
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public Logger com$databricks$labs$automl$utils$DataValidation$$logger() {
        return this.bitmap$trans$0 ? this.com$databricks$labs$automl$utils$DataValidation$$logger : com$databricks$labs$automl$utils$DataValidation$$logger$lzycompute();
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public List<String> _allowableDateTimeConversions() {
        return DataValidation.Cclass._allowableDateTimeConversions(this);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public List<String> _allowableCategoricalFilterModes() {
        return DataValidation.Cclass._allowableCategoricalFilterModes(this);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public List<String> _allowableCardinalilties() {
        return DataValidation.Cclass._allowableCardinalilties(this);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public String invalidateSelection(String str, Seq<String> seq) {
        return DataValidation.Cclass.invalidateSelection(this, str, seq);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public Tuple2<OneHotEncoderEstimator, String[]> oneHotEncodeStrings(List<String> list) {
        return DataValidation.Cclass.oneHotEncodeStrings(this, list);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public Tuple2<StringIndexer[], String[]> indexStrings(List<String> list) {
        return DataValidation.Cclass.indexStrings(this, list);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public Tuple2<Dataset<Row>, List<String>> convertDateAndTime(Dataset<Row> dataset, List<String> list, List<String> list2, String str) {
        return DataValidation.Cclass.convertDateAndTime(this, dataset, list, list2, str);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public Tuple3<StringIndexer[], String[], VectorAssembler> generateAssembly(List<String> list, List<String> list2, String str) {
        return DataValidation.Cclass.generateAssembly(this, list, list2, str);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public void validateLabelAndFeatures(Dataset<Row> dataset, String str, String str2) {
        DataValidation.Cclass.validateLabelAndFeatures(this, dataset, str, str2);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public void validateFieldPresence(Dataset<Row> dataset, String str) {
        DataValidation.Cclass.validateFieldPresence(this, dataset, str);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public void validateInputDataframe(Dataset<Row> dataset) {
        DataValidation.Cclass.validateInputDataframe(this, dataset);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public ValidatedCategoricalFields validateCardinality(Dataset<Row> dataset, List<String> list, int i, int i2) {
        return DataValidation.Cclass.validateCardinality(this, dataset, list, i, i2);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public int validateCardinality$default$3() {
        return DataValidation.Cclass.validateCardinality$default$3(this);
    }

    @Override // com.databricks.labs.automl.utils.DataValidation
    public int validateCardinality$default$4() {
        return DataValidation.Cclass.validateCardinality$default$4(this);
    }

    private final String PRODUCT() {
        return "product";
    }

    private final String COV_VALUE() {
        return "cov_calculation";
    }

    private final String DEVIATION() {
        return "_deviation";
    }

    private final String SQUARED() {
        return "_squared";
    }

    public String com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol() {
        return this.com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol;
    }

    private void com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol_$eq(String str) {
        this.com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol = str;
    }

    private String _featuresCol() {
        return this._featuresCol;
    }

    private void _featuresCol_$eq(String str) {
        this._featuresCol = str;
    }

    public String com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterStatistic() {
        return this.com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterStatistic;
    }

    private void com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterStatistic_$eq(String str) {
        this.com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterStatistic = str;
    }

    private String _filterDirection() {
        return this._filterDirection;
    }

    private void _filterDirection_$eq(String str) {
        this._filterDirection = str;
    }

    public double com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterManualValue() {
        return this.com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterManualValue;
    }

    private void com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterManualValue_$eq(double d) {
        this.com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterManualValue = d;
    }

    private String _filterMode() {
        return this._filterMode;
    }

    private void _filterMode_$eq(String str) {
        this._filterMode = str;
    }

    public double com$databricks$labs$automl$sanitize$PearsonFiltering$$_autoFilterNTile() {
        return this.com$databricks$labs$automl$sanitize$PearsonFiltering$$_autoFilterNTile;
    }

    private void com$databricks$labs$automl$sanitize$PearsonFiltering$$_autoFilterNTile_$eq(double d) {
        this.com$databricks$labs$automl$sanitize$PearsonFiltering$$_autoFilterNTile = d;
    }

    private int _parallelism() {
        return this._parallelism;
    }

    private void _parallelism_$eq(int i) {
        this._parallelism = i;
    }

    private final String[] _dataFieldNames() {
        return this._dataFieldNames;
    }

    private final StructField[] _dataFieldTypes() {
        return this._dataFieldTypes;
    }

    public PearsonFiltering setLabelCol(String str) {
        Predef$.MODULE$.require(Predef$.MODULE$.refArrayOps(_dataFieldNames()).contains(str), new PearsonFiltering$$anonfun$setLabelCol$1(this, str));
        com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol_$eq(str);
        return this;
    }

    public PearsonFiltering setFeaturesCol(String str) {
        Predef$.MODULE$.require(Predef$.MODULE$.refArrayOps(_dataFieldNames()).contains(str), new PearsonFiltering$$anonfun$setFeaturesCol$1(this, str));
        Predef$ predef$ = Predef$.MODULE$;
        String typeName = ((StructField[]) Predef$.MODULE$.refArrayOps(_dataFieldTypes()).filter(new PearsonFiltering$$anonfun$setFeaturesCol$3(this, str)))[0].dataType().typeName();
        predef$.require(typeName != null ? typeName.equals("vector") : "vector" == 0, new PearsonFiltering$$anonfun$setFeaturesCol$2(this, str));
        _featuresCol_$eq(str);
        return this;
    }

    public PearsonFiltering setFilterStatistic(String str) {
        Predef$.MODULE$.require(Predef$.MODULE$.refArrayOps(_allowedStats()).contains(str), new PearsonFiltering$$anonfun$setFilterStatistic$1(this, str));
        com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterStatistic_$eq(str);
        return this;
    }

    public PearsonFiltering setFilterDirection(String str) {
        Predef$.MODULE$.require(Predef$.MODULE$.refArrayOps(_allowedFilterDirections()).contains(str), new PearsonFiltering$$anonfun$setFilterDirection$1(this, str));
        _filterDirection_$eq(str);
        return this;
    }

    public PearsonFiltering setFilterManualValue(double d) {
        com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterManualValue_$eq(d);
        return this;
    }

    public PearsonFiltering setFilterManualValue(int i) {
        com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterManualValue_$eq(i);
        return this;
    }

    public PearsonFiltering setFilterMode(String str) {
        Predef$.MODULE$.require(Predef$.MODULE$.refArrayOps(_allowedFilterModes()).contains(str), new PearsonFiltering$$anonfun$setFilterMode$1(this, str));
        _filterMode_$eq(str);
        return this;
    }

    public PearsonFiltering setAutoFilterNTile(double d) {
        Predef$.MODULE$.require((d <= 1.0d) & (d >= 0.0d), new PearsonFiltering$$anonfun$setAutoFilterNTile$1(this));
        com$databricks$labs$automl$sanitize$PearsonFiltering$$_autoFilterNTile_$eq(d);
        return this;
    }

    public PearsonFiltering setParallelism(int i) {
        _parallelism_$eq(i);
        return this;
    }

    public String getLabelCol() {
        return com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol();
    }

    public String getFeaturesCol() {
        return _featuresCol();
    }

    public String getFilterStatistic() {
        return com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterStatistic();
    }

    public String getFilterDirection() {
        return _filterDirection();
    }

    public double getFilterManualValue() {
        return com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterManualValue();
    }

    public String getFilterMode() {
        return _filterMode();
    }

    public double getAutoFilterNTile() {
        return com$databricks$labs$automl$sanitize$PearsonFiltering$$_autoFilterNTile();
    }

    public int getParallelism() {
        return _parallelism();
    }

    public String[] com$databricks$labs$automl$sanitize$PearsonFiltering$$_pearsonVectorFields() {
        return this.com$databricks$labs$automl$sanitize$PearsonFiltering$$_pearsonVectorFields;
    }

    private void com$databricks$labs$automl$sanitize$PearsonFiltering$$_pearsonVectorFields_$eq(String[] strArr) {
        this.com$databricks$labs$automl$sanitize$PearsonFiltering$$_pearsonVectorFields = strArr;
    }

    private String[] _pearsonNonCategoricalFields() {
        return this._pearsonNonCategoricalFields;
    }

    private void _pearsonNonCategoricalFields_$eq(String[] strArr) {
        this._pearsonNonCategoricalFields = strArr;
    }

    private PearsonFiltering setPearsonNonCategoricalFields(String[] strArr) {
        _pearsonNonCategoricalFields_$eq(strArr);
        return this;
    }

    private PearsonFiltering setPearsonVectorFields(String[] strArr) {
        com$databricks$labs$automl$sanitize$PearsonFiltering$$_pearsonVectorFields_$eq(strArr);
        return this;
    }

    private List<PearsonPayload> buildChiSq(Dataset<Row> dataset, String str) {
        ListBuffer listBuffer = new ListBuffer();
        Row row = (Row) ChiSquareTest$.MODULE$.test(dataset, str, com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()).head();
        Predef$.MODULE$.refArrayOps(com$databricks$labs$automl$sanitize$PearsonFiltering$$_pearsonVectorFields()).indices().foreach(new PearsonFiltering$$anonfun$buildChiSq$1(this, listBuffer, ((Vector) row.getAs(0)).toArray(), (int[]) row.getSeq(1).toArray(ClassTag$.MODULE$.Int()), ((Vector) row.getAs(2)).toArray()));
        return listBuffer.result();
    }

    public long com$databricks$labs$automl$sanitize$PearsonFiltering$$acquireCardinality(String str) {
        return this.df.select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col(str)})).groupBy(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col(str)})).agg(functions$.MODULE$.count(functions$.MODULE$.col(str)), Predef$.MODULE$.wrapRefArray(new Column[0])).count();
    }

    private Tuple2<String, Object>[] featuresCardinality() {
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        ParArray par = Predef$.MODULE$.refArrayOps(this.featureColumnListing).par();
        par.tasksupport_$eq(new ForkJoinTaskSupport(new ForkJoinPool(_parallelism())));
        par.foreach(new PearsonFiltering$$anonfun$featuresCardinality$1(this, arrayBuffer));
        return (Tuple2[]) arrayBuffer.result().toArray(ClassTag$.MODULE$.apply(Tuple2.class));
    }

    private PearsonFiltering restrictFeatureSet() {
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        ArrayBuffer arrayBuffer2 = new ArrayBuffer();
        Predef$.MODULE$.refArrayOps(featuresCardinality()).foreach(new PearsonFiltering$$anonfun$restrictFeatureSet$1(this, arrayBuffer, arrayBuffer2));
        setPearsonNonCategoricalFields((String[]) arrayBuffer2.result().toArray(ClassTag$.MODULE$.apply(String.class)));
        return setPearsonVectorFields((String[]) arrayBuffer.result().toArray(ClassTag$.MODULE$.apply(String.class)));
    }

    private Dataset<Row> reVectorize() {
        restrictFeatureSet();
        Predef$.MODULE$.require(Predef$.MODULE$.refArrayOps(com$databricks$labs$automl$sanitize$PearsonFiltering$$_pearsonVectorFields()).nonEmpty(), new PearsonFiltering$$anonfun$reVectorize$1(this));
        return new VectorAssembler().setInputCols(com$databricks$labs$automl$sanitize$PearsonFiltering$$_pearsonVectorFields()).setOutputCol("pearsonVector").transform(this.df);
    }

    private List<String> filterChiSq(List<PearsonPayload> list, double d) {
        ListBuffer listBuffer = new ListBuffer();
        String _filterDirection = _filterDirection();
        if ("greater".equals(_filterDirection)) {
            list.foreach(new PearsonFiltering$$anonfun$filterChiSq$1(this, d, listBuffer));
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        } else {
            if (!"lesser".equals(_filterDirection)) {
                throw new UnsupportedOperationException(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", " is not supported for manualFilterChiSq"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{_filterDirection()})));
            }
            list.foreach(new PearsonFiltering$$anonfun$filterChiSq$2(this, d, listBuffer));
            BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
        }
        return listBuffer.result();
    }

    private double quantileGenerator(List<PearsonPayload> list) {
        ListBuffer listBuffer = new ListBuffer();
        list.foreach(new PearsonFiltering$$anonfun$quantileGenerator$1(this, listBuffer));
        List list2 = (List) listBuffer.result().sortWith(new PearsonFiltering$$anonfun$1(this));
        if (list2.size() % 2 == 1) {
            return BoxesRunTime.unboxToDouble(list2.apply((int) (list2.size() * com$databricks$labs$automl$sanitize$PearsonFiltering$$_autoFilterNTile())));
        }
        int floor = (int) package$.MODULE$.floor(list2.size() * com$databricks$labs$automl$sanitize$PearsonFiltering$$_autoFilterNTile());
        Tuple2 splitAt = list2.splitAt(floor < 1 ? 1 : floor);
        if (splitAt == null) {
            throw new MatchError(splitAt);
        }
        Tuple2 tuple2 = new Tuple2((List) splitAt._1(), (List) splitAt._2());
        return (BoxesRunTime.unboxToDouble(((List) tuple2._1()).last()) + BoxesRunTime.unboxToDouble(((List) tuple2._2()).head())) / 2;
    }

    private Dataset<Row> filterClassifier(String[] strArr) {
        List<PearsonPayload> buildChiSq = buildChiSq(reVectorize(), "pearsonVector");
        List<String> filterChiSq = "manual".equals(_filterMode()) ? filterChiSq(buildChiSq, com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterManualValue()) : filterChiSq(buildChiSq, quantileGenerator(buildChiSq));
        Predef$.MODULE$.require(filterChiSq.nonEmpty(), new PearsonFiltering$$anonfun$filterClassifier$1(this));
        return this.df.select((Seq) Predef$.MODULE$.refArrayOps(_pearsonNonCategoricalFields()).toList().$colon$colon$colon(Predef$.MODULE$.refArrayOps(strArr).toList()).$colon$colon$colon(List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()}))).$colon$colon$colon(filterChiSq).map(new PearsonFiltering$$anonfun$filterClassifier$2(this), List$.MODULE$.canBuildFrom()));
    }

    private String[] filterClassifier$default$1() {
        return (String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class));
    }

    public Dataset<Row> filterFields(String[] strArr) {
        BoxesRunTime.unboxToLong(((Row) this.df.select(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol(), Predef$.MODULE$.wrapRefArray(new String[0])).agg(functions$.MODULE$.count(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()).alias("uniques"), Predef$.MODULE$.wrapRefArray(new Column[0])).first()).getAs("uniques"));
        return "classifier".equals(this.modelType) ? filterClassifier(strArr) : filterRegressor(strArr);
    }

    public String[] filterFields$default$1() {
        return (String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class));
    }

    private String[] regressorManualFilter(Map<String, Tuple2<Object, Object>> map) {
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        map.keys().foreach(new PearsonFiltering$$anonfun$regressorManualFilter$1(this, map, arrayBuffer));
        return (String[]) arrayBuffer.toArray(ClassTag$.MODULE$.apply(String.class));
    }

    private String[] regressionAutoFilter(Map<String, Tuple2<Object, Object>> map) {
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        map.keys().foreach(new PearsonFiltering$$anonfun$regressionAutoFilter$1(this, map, arrayBuffer));
        return (String[]) arrayBuffer.toArray(ClassTag$.MODULE$.apply(String.class));
    }

    private Dataset<Row> filterRegressor(String[] strArr) {
        String[] regressorManualFilter = "manual".equals(_filterMode()) ? regressorManualFilter(calculateRegressionCovariance(strArr)) : regressionAutoFilter(calculateRegressionCovariance(strArr));
        Predef$.MODULE$.require(Predef$.MODULE$.refArrayOps(regressorManualFilter).nonEmpty(), new PearsonFiltering$$anonfun$filterRegressor$1(this));
        return this.df.select((Seq) Predef$.MODULE$.refArrayOps(strArr).toList().$colon$colon$colon(List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()}))).$colon$colon$colon(Predef$.MODULE$.refArrayOps(regressorManualFilter).toList()).map(new PearsonFiltering$$anonfun$filterRegressor$2(this), List$.MODULE$.canBuildFrom()));
    }

    private String[] filterRegressor$default$1() {
        return (String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class));
    }

    private Map<String, Tuple2<Object, Object>> calculateRegressionCovariance(String[] strArr) {
        Dataset summary = this.df.select(Predef$.MODULE$.wrapRefArray((Object[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(this.featureColumnListing).$plus$plus(Predef$.MODULE$.refArrayOps(new String[]{com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()}), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))).map(new PearsonFiltering$$anonfun$2(this), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class))))).summary(Predef$.MODULE$.wrapRefArray(new String[]{"mean"}));
        double unboxToDouble = BoxesRunTime.unboxToDouble(((Row) this.df.select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol())})).agg(functions$.MODULE$.count(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()).alias(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()), Predef$.MODULE$.wrapRefArray(new Column[0])).withColumn(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol(), functions$.MODULE$.col(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()).cast(DoubleType$.MODULE$)).first()).getAs(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()));
        Dataset drop = summary.filter(functions$.MODULE$.col("summary").$eq$eq$eq("mean")).drop("summary");
        Map valuesMap = ((Row) drop.first()).getValuesMap(Predef$.MODULE$.wrapRefArray(drop.schema().fieldNames()));
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        valuesMap.keys().foreach(new PearsonFiltering$$anonfun$calculateRegressionCovariance$1(this, unboxToDouble, valuesMap, arrayBuffer));
        return arrayBuffer.result().flatten(Predef$.MODULE$.$conforms()).toMap(Predef$.MODULE$.$conforms());
    }

    private String[] calculateRegressionCovariance$default$1() {
        return (String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class));
    }

    public Map<String, Tuple2<Object, Object>> com$databricks$labs$automl$sanitize$PearsonFiltering$$covarianceCalculation(String str, Map<String, Object> map, double d) {
        Map valuesMap = ((Row) this.df.withColumn(str, functions$.MODULE$.col(str).cast(DoubleType$.MODULE$)).select(str, Predef$.MODULE$.wrapRefArray(new String[]{com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()})).withColumn(new StringBuilder().append(str).append("_deviation").toString(), functions$.MODULE$.col(str).$minus(map.apply(str))).withColumn(new StringBuilder().append(str).append("_squared").toString(), functions$.MODULE$.col(str).$times(functions$.MODULE$.col(str))).withColumn(new StringBuilder().append(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()).append("_deviation").toString(), functions$.MODULE$.col(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()).$minus(map.apply(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()))).withColumn(new StringBuilder().append(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()).append("_squared").toString(), functions$.MODULE$.col(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()).$times(functions$.MODULE$.col(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()))).withColumn("cov_calculation", functions$.MODULE$.col(new StringBuilder().append(str).append("_deviation").toString()).$times(functions$.MODULE$.col(new StringBuilder().append(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()).append("_deviation").toString()))).withColumn("product", functions$.MODULE$.col(str).$times(functions$.MODULE$.col(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()))).agg(functions$.MODULE$.sum(str).alias(str), Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.sum(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()).alias(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()), functions$.MODULE$.sum("product").alias("product"), functions$.MODULE$.sum("cov_calculation").alias("cov_calculation"), functions$.MODULE$.sum(new StringBuilder().append(str).append("_squared").toString()).alias(new StringBuilder().append(str).append("_squared").toString()), functions$.MODULE$.sum(new StringBuilder().append(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()).append("_squared").toString()).alias(new StringBuilder().append(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()).append("_squared").toString())})).first()).getValuesMap(Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{"cov_calculation", str, com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol(), "product", new StringBuilder().append(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()).append("_squared").toString(), new StringBuilder().append(str).append("_squared").toString()})));
        return Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(str), new Tuple2.mcDD.sp(BoxesRunTime.unboxToDouble(valuesMap.apply("cov_calculation")) / d, (BoxesRunTime.unboxToDouble(valuesMap.apply("product")) - ((BoxesRunTime.unboxToDouble(valuesMap.apply(str)) * BoxesRunTime.unboxToDouble(valuesMap.apply(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()))) / d)) / package$.MODULE$.sqrt((BoxesRunTime.unboxToDouble(valuesMap.apply(new StringBuilder().append(str).append("_squared").toString())) - (package$.MODULE$.pow(BoxesRunTime.unboxToDouble(valuesMap.apply(str)), 2.0d) / d)) * (BoxesRunTime.unboxToDouble(valuesMap.apply(new StringBuilder().append(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol()).append("_squared").toString())) - (package$.MODULE$.pow(BoxesRunTime.unboxToDouble(valuesMap.apply(com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol())), 2.0d) / d)))))}));
    }

    public PearsonFiltering(Dataset<Row> dataset, String[] strArr, String str) {
        this.df = dataset;
        this.featureColumnListing = strArr;
        this.modelType = str;
        DataValidation.Cclass.$init$(this);
        SanitizerDefaults.Cclass.$init$(this);
        this.com$databricks$labs$automl$sanitize$PearsonFiltering$$_labelCol = defaultLabelCol();
        this._featuresCol = defaultFeaturesCol();
        this.com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterStatistic = defaultPearsonFilterStatistic();
        this._filterDirection = defaultPearsonFilterDirection();
        this.com$databricks$labs$automl$sanitize$PearsonFiltering$$_filterManualValue = defaultPearsonFilterManualValue();
        this._filterMode = defaultPearsonFilterMode();
        this.com$databricks$labs$automl$sanitize$PearsonFiltering$$_autoFilterNTile = defaultPearsonAutoFilterNTile();
        this._parallelism = 20;
        this._dataFieldNames = dataset.schema().fieldNames();
        this._dataFieldTypes = dataset.schema().fields();
        this.com$databricks$labs$automl$sanitize$PearsonFiltering$$_pearsonVectorFields = (String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class));
        this._pearsonNonCategoricalFields = (String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class));
    }
}
