package org.canova.cli.vectorization;

import com.google.common.base.Strings;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import org.canova.api.conf.Configuration;
import org.canova.api.exceptions.CanovaException;
import org.canova.api.io.data.DoubleWritable;
import org.canova.api.io.data.Text;
import org.canova.api.records.writer.RecordWriter;
import org.canova.api.writable.Writable;
import org.canova.cli.csv.schema.CSVInputSchema;
import org.canova.cli.csv.schema.CSVSchemaColumn;
import org.canova.cli.shuffle.Shuffler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/canova/cli/vectorization/CSVVectorizationEngine.class */
public class CSVVectorizationEngine extends VectorizationEngine {
    private static final Logger log = LoggerFactory.getLogger(CSVVectorizationEngine.class);
    public static final String SKIP_HEADER_KEY = "canova.input.header.skip";
    private CSVInputSchema inputSchema = null;
    private boolean skipHeader = false;

    private void loadInputSchemaFile() throws Exception {
        String str = (String) this.configProps.get("canova.input.vector.schema");
        this.inputSchema = new CSVInputSchema();
        this.inputSchema.parseSchemaFile(str);
        if (null != this.configProps.get(SKIP_HEADER_KEY)) {
            this.skipHeader = "true".equals(((String) this.configProps.get(SKIP_HEADER_KEY)).trim().toLowerCase());
        }
    }

    @Override // org.canova.cli.vectorization.VectorizationEngine
    public void execute() throws CanovaException, IOException, InterruptedException {
        long j = 0;
        long j2 = 0;
        try {
            loadInputSchemaFile();
            while (this.reader.hasNext()) {
                Collection next = this.reader.next();
                if (this.skipHeader && j == 0) {
                    System.out.println("Skipping Header: " + next.toArray()[0].toString());
                } else {
                    try {
                        this.inputSchema.evaluateInputRecord(next.toArray()[0].toString());
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
                j++;
            }
            this.reader.close();
            this.inputSchema.computeDatasetStatistics();
            if (this.printStats) {
                this.inputSchema.debugPringDatasetStatistics();
            }
            this.reader = this.inputFormat.createReader(this.split);
            Configuration configuration = new Configuration();
            configuration.set("org.nd4j.outputpath", this.outputFilename);
            boolean z = false;
            if (this.shuffleOn) {
                Shuffler shuffler = new Shuffler();
                RecordWriter createWriter = this.outputFormat.createWriter(configuration);
                while (this.reader.hasNext()) {
                    if (!this.skipHeader || z) {
                        String obj = this.reader.next().toArray()[0].toString();
                        if (!Strings.isNullOrEmpty(obj)) {
                            shuffler.addRecord(vectorizeToWritable("", obj, this.inputSchema));
                        }
                        j2++;
                    } else {
                        z = true;
                        this.reader.next();
                    }
                }
                while (shuffler.hasNext()) {
                    createWriter.write(shuffler.next());
                }
                this.reader.close();
                createWriter.close();
            } else {
                RecordWriter createWriter2 = this.outputFormat.createWriter(configuration);
                while (this.reader.hasNext()) {
                    if (!this.skipHeader || z) {
                        String obj2 = this.reader.next().toArray()[0].toString();
                        if (!Strings.isNullOrEmpty(obj2)) {
                            createWriter2.write(vectorizeToWritable("", obj2, this.inputSchema));
                        }
                        j2++;
                    } else {
                        z = true;
                        this.reader.next();
                    }
                }
                this.reader.close();
                createWriter2.close();
            }
            System.out.println("CSV Lines Read: " + j);
            System.out.println("Vector Records Written: " + j2);
        } catch (Exception e2) {
            throw new CanovaException(e2.toString());
        }
    }

    public Collection<Writable> vectorize(String str, String str2, CSVInputSchema cSVInputSchema) {
        ArrayList arrayList = new ArrayList();
        String[] split = str2.split(cSVInputSchema.delimiter);
        if (split[0].trim().equals("")) {
            return null;
        }
        int i = 0;
        int i2 = 0;
        double d = 0.0d;
        for (Map.Entry<String, CSVSchemaColumn> entry : cSVInputSchema.getColumnSchemas().entrySet()) {
            entry.getKey();
            CSVSchemaColumn value = entry.getValue();
            switch (value.transform) {
                case SKIP:
                    break;
                case LABEL:
                    d = value.transformColumnValue(split[i].trim());
                    break;
                default:
                    arrayList.add(new DoubleWritable(value.transformColumnValue(split[i].trim())));
                    i2++;
                    break;
            }
            i++;
        }
        arrayList.add(new DoubleWritable(d));
        return arrayList;
    }

    public Collection<Writable> vectorizeToWritable(String str, String str2, CSVInputSchema cSVInputSchema) {
        ArrayList arrayList = new ArrayList();
        String[] split = str2.split(cSVInputSchema.delimiter);
        if (split[0].trim().equals("")) {
            return null;
        }
        int i = 0;
        int i2 = 0;
        for (Map.Entry<String, CSVSchemaColumn> entry : cSVInputSchema.getColumnSchemas().entrySet()) {
            entry.getKey();
            CSVSchemaColumn value = entry.getValue();
            switch (value.transform) {
                case SKIP:
                    break;
                default:
                    arrayList.add(new Text(value.transformColumnValue(split[i].trim()) + ""));
                    i2++;
                    break;
            }
            i++;
        }
        return arrayList;
    }
}
