package de.viadee.bpmnai.core.processing.steps.dataprocessing;

import de.viadee.bpmnai.core.annotation.PreprocessingStepDescription;
import de.viadee.bpmnai.core.configuration.Configuration;
import de.viadee.bpmnai.core.configuration.preprocessing.ColumnHashConfiguration;
import de.viadee.bpmnai.core.configuration.preprocessing.PreprocessingConfiguration;
import de.viadee.bpmnai.core.configuration.util.ConfigurationUtils;
import de.viadee.bpmnai.core.processing.interfaces.PreprocessingStepInterface;
import de.viadee.bpmnai.core.runner.config.SparkRunnerConfig;
import de.viadee.bpmnai.core.util.BpmnaiUtils;
import de.viadee.bpmnai.core.util.logging.BpmnaiLogger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions;

@PreprocessingStepDescription(name = "Hash column", description = "In this step the columns that are configured to be hashed for anonymization are run through a SHA-1 hash operation.")
/* loaded from: input_file:de/viadee/bpmnai/core/processing/steps/dataprocessing/ColumnHashStep.class */
public class ColumnHashStep implements PreprocessingStepInterface {
    @Override // de.viadee.bpmnai.core.processing.interfaces.PreprocessingStepInterface
    public Dataset<Row> runPreprocessingStep(Dataset<Row> dataset, Map<String, Object> map, SparkRunnerConfig sparkRunnerConfig) {
        PreprocessingConfiguration preprocessingConfiguration;
        ArrayList arrayList = new ArrayList(Arrays.asList(dataset.columns()));
        Configuration configuration = ConfigurationUtils.getInstance().getConfiguration(sparkRunnerConfig);
        if (configuration != null && (preprocessingConfiguration = configuration.getPreprocessingConfiguration()) != null) {
            for (ColumnHashConfiguration columnHashConfiguration : preprocessingConfiguration.getColumnHashConfiguration()) {
                if (columnHashConfiguration.isHashColumn()) {
                    if (arrayList.contains(columnHashConfiguration.getColumnName())) {
                        dataset = dataset.withColumn(columnHashConfiguration.getColumnName(), functions.sha1(dataset.col(columnHashConfiguration.getColumnName())));
                        BpmnaiLogger.getInstance().writeInfo("The column '" + columnHashConfiguration.getColumnName() + "' is being hashed.");
                    } else {
                        BpmnaiLogger.getInstance().writeWarn("The column '" + columnHashConfiguration.getColumnName() + "' is configured to be hashed, but does not exist in the data.");
                    }
                }
            }
        }
        if (sparkRunnerConfig.isWriteStepResultsIntoFile()) {
            BpmnaiUtils.getInstance().writeDatasetToCSV(dataset, "column_hash_step", sparkRunnerConfig);
        }
        return dataset;
    }
}
