package de.viadee.bpmnai.core.processing.steps.dataprocessing;

import de.viadee.bpmnai.core.annotation.PreprocessingStepDescription;
import de.viadee.bpmnai.core.configuration.Configuration;
import de.viadee.bpmnai.core.configuration.preprocessing.ColumnConfiguration;
import de.viadee.bpmnai.core.configuration.preprocessing.PreprocessingConfiguration;
import de.viadee.bpmnai.core.configuration.preprocessing.VariableConfiguration;
import de.viadee.bpmnai.core.configuration.util.ConfigurationUtils;
import de.viadee.bpmnai.core.processing.interfaces.PreprocessingStepInterface;
import de.viadee.bpmnai.core.runner.config.SparkRunnerConfig;
import de.viadee.bpmnai.core.util.BpmnaiUtils;
import de.viadee.bpmnai.core.util.BpmnaiVariables;
import de.viadee.bpmnai.core.util.helper.SparkBroadcastHelper;
import de.viadee.bpmnai.core.util.logging.BpmnaiLogger;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;

@PreprocessingStepDescription(name = "Type cast", description = "In this step the columns are casted into the data type they have been defined in the configuration. If the cast could not be done by Spark the value is null afterwards.")
/* loaded from: input_file:de/viadee/bpmnai/core/processing/steps/dataprocessing/TypeCastStep.class */
public class TypeCastStep implements PreprocessingStepInterface {
    @Override // de.viadee.bpmnai.core.processing.interfaces.PreprocessingStepInterface
    public Dataset<Row> runPreprocessingStep(Dataset<Row> dataset, Map<String, Object> map, SparkRunnerConfig sparkRunnerConfig) {
        Map map2 = (Map) SparkBroadcastHelper.getInstance().getBroadcastVariable(SparkBroadcastHelper.BROADCAST_VARIABLE.PROCESS_VARIABLES_ESCALATED);
        List<StructField> asList = Arrays.asList(dataset.schema().fields());
        List<ColumnConfiguration> list = null;
        List<VariableConfiguration> list2 = null;
        Configuration configuration = ConfigurationUtils.getInstance().getConfiguration(sparkRunnerConfig);
        if (configuration != null) {
            PreprocessingConfiguration preprocessingConfiguration = configuration.getPreprocessingConfiguration();
            list = preprocessingConfiguration.getColumnConfiguration();
            list2 = preprocessingConfiguration.getVariableConfiguration();
        }
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        if (list != null) {
            for (ColumnConfiguration columnConfiguration : list) {
                hashMap.put(columnConfiguration.getColumnName(), columnConfiguration);
            }
        }
        if (list2 != null) {
            for (VariableConfiguration variableConfiguration : list2) {
                hashMap2.put(variableConfiguration.getVariableName(), variableConfiguration);
            }
        }
        for (String str : dataset.columns()) {
            if (!str.endsWith("_rev")) {
                boolean z = false;
                String str2 = null;
                String str3 = null;
                if (hashMap2.keySet().contains(str)) {
                    str2 = ((VariableConfiguration) hashMap2.get(str)).getVariableType();
                    str3 = ((VariableConfiguration) hashMap2.get(str)).getParseFormat();
                    z = sparkRunnerConfig.getPipelineMode().equals(BpmnaiVariables.PIPELINE_MODE_LEARN) ? map2.keySet().contains(str) : true;
                } else if (hashMap.keySet().contains(str)) {
                    str2 = ((ColumnConfiguration) hashMap.get(str)).getColumnType();
                    str3 = ((ColumnConfiguration) hashMap.get(str)).getParseFormat();
                }
                DataType mapDataType = mapDataType(asList, str, str2);
                if (!sparkRunnerConfig.isDevTypeCastCheckEnabled() || mapDataType.equals(getCurrentDataType(asList, str))) {
                    dataset = castColumn(dataset, str, str, mapDataType, str3);
                } else {
                    Dataset castColumn = castColumn(dataset, str, str + "_casted", mapDataType, str3);
                    dataset = castColumn.withColumn(str + "_castresult", functions.when(castColumn.col(str).isNotNull().and(castColumn.col(str).notEqual(functions.lit(""))), functions.when(castColumn.col(str + "_casted").isNull(), functions.lit("CAST_ERROR?")).otherwise(functions.lit(""))).otherwise(functions.lit("")));
                    dataset.cache();
                    if (dataset.filter(str + "_castresult == 'CAST_ERROR?'").count() > 0) {
                        BpmnaiLogger.getInstance().writeWarn("Column '" + str + "' seems to have cast errors. Please check the data type (is defined as '" + str2 + "')");
                    } else {
                        dataset = dataset.drop(new String[]{str, str + "_castresult"}).withColumnRenamed(str + "_casted", str);
                    }
                }
                if (sparkRunnerConfig.getDataLevel().equals(BpmnaiVariables.DATA_LEVEL_PROCESS) && sparkRunnerConfig.isRevCountEnabled() && z) {
                    dataset = dataset.withColumn(str + "_rev", dataset.col(str + "_rev").cast("integer"));
                }
            }
        }
        if (sparkRunnerConfig.isWriteStepResultsIntoFile()) {
            BpmnaiUtils.getInstance().writeDatasetToCSV(dataset, "type_cast_columns", sparkRunnerConfig);
        }
        return dataset;
    }

    private Dataset castColumn(Dataset<Row> dataset, String str, String str2, DataType dataType, String str3) {
        return dataType.equals(DataTypes.DateType) ? (str3 == null || str3.equals("")) ? dataset.withColumn(str2, functions.when(functions.callUDF("isalong", new Column[]{dataset.col(str)}), functions.to_date(functions.from_unixtime(functions.callUDF("timestampstringtolong", new Column[]{dataset.col(str)})))).otherwise(functions.to_date(dataset.col(str)))) : dataset.withColumn(str2, functions.when(functions.callUDF("isalong", new Column[]{dataset.col(str)}), functions.to_date(functions.from_unixtime(functions.callUDF("timestampstringtolong", new Column[]{dataset.col(str)})), str3)).otherwise(functions.to_date(dataset.col(str), str3))) : dataType.equals(DataTypes.TimestampType) ? (str3 == null || str3.equals("")) ? dataset.withColumn(str2, functions.when(functions.callUDF("isalong", new Column[]{dataset.col(str)}), functions.to_timestamp(functions.from_unixtime(functions.callUDF("timestampstringtolong", new Column[]{dataset.col(str)})))).otherwise(functions.to_timestamp(dataset.col(str)))) : dataset.withColumn(str2, functions.when(functions.callUDF("isalong", new Column[]{dataset.col(str)}), functions.to_timestamp(functions.from_unixtime(functions.callUDF("timestampstringtolong", new Column[]{dataset.col(str)})), str3)).otherwise(functions.to_timestamp(dataset.col(str), str3))) : dataset.withColumn(str2, dataset.col(str).cast(dataType));
    }

    private DataType getCurrentDataType(List<StructField> list, String str) {
        for (StructField structField : list) {
            if (structField.name().equals(str)) {
                return structField.dataType();
            }
        }
        return null;
    }

    private DataType mapDataType(List<StructField> list, String str, String str2) {
        DataType currentDataType = getCurrentDataType(list, str);
        if (str2 == null) {
            return currentDataType;
        }
        boolean z = -1;
        switch (str2.hashCode()) {
            case -1325958191:
                if (str2.equals("double")) {
                    z = 2;
                    break;
                }
                break;
            case 3076014:
                if (str2.equals("date")) {
                    z = 4;
                    break;
                }
                break;
            case 3327612:
                if (str2.equals("long")) {
                    z = true;
                    break;
                }
                break;
            case 55126294:
                if (str2.equals("timestamp")) {
                    z = 5;
                    break;
                }
                break;
            case 64711720:
                if (str2.equals("boolean")) {
                    z = 3;
                    break;
                }
                break;
            case 1958052158:
                if (str2.equals("integer")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                return DataTypes.IntegerType;
            case true:
                return DataTypes.LongType;
            case true:
                return DataTypes.DoubleType;
            case true:
                return DataTypes.BooleanType;
            case true:
                return DataTypes.DateType;
            case true:
                return DataTypes.TimestampType;
            default:
                return DataTypes.StringType;
        }
    }
}
