package org.canova.cli.subcommands;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Enumeration;
import java.util.Properties;
import org.canova.api.conf.Configuration;
import org.canova.api.exceptions.CanovaException;
import org.canova.api.formats.input.InputFormat;
import org.canova.api.formats.output.OutputFormat;
import org.canova.api.records.reader.RecordReader;
import org.canova.api.records.writer.RecordWriter;
import org.canova.api.split.FileSplit;
import org.canova.cli.csv.schema.CSVInputSchema;
import org.canova.cli.csv.vectorization.CSVVectorizationEngine;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/canova/cli/subcommands/Vectorize.class */
public class Vectorize implements SubCommand {
    private static final Logger log = LoggerFactory.getLogger(Vectorize.class);
    public static final String OUTPUT_FILENAME_KEY = "output.directory";
    public static final String INPUT_FORMAT = "input.format";
    public static final String DEFAULT_INPUT_FORMAT_CLASSNAME = "org.canova.api.formats.input.impl.LineInputFormat";
    public static final String OUTPUT_FORMAT = "output.format";
    public static final String DEFAULT_OUTPUT_FORMAT_CLASSNAME = "org.canova.api.formats.output.impl.SVMLightOutputFormat";
    protected String[] args;
    public boolean validCommandLineParameters;

    @Option(name = "-conf", usage = "Sets a configuration file to drive the vectorization process")
    public String configurationFile;
    public Properties configProps;
    public String outputVectorFilename;
    private CSVInputSchema inputSchema;
    private CSVVectorizationEngine vectorizer;

    public Vectorize() {
        this.validCommandLineParameters = true;
        this.configurationFile = "";
        this.configProps = null;
        this.outputVectorFilename = "";
        this.inputSchema = null;
        this.vectorizer = null;
    }

    private void loadInputSchemaFile() throws Exception {
        String str = (String) this.configProps.get("input.vector.schema");
        this.inputSchema = new CSVInputSchema();
        this.inputSchema.parseSchemaFile(str);
        this.vectorizer = new CSVVectorizationEngine();
    }

    public void loadConfigFile() throws IOException {
        this.configProps = new Properties();
        FileInputStream fileInputStream = null;
        try {
            fileInputStream = new FileInputStream(this.configurationFile);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        try {
            this.configProps.load(fileInputStream);
            fileInputStream.close();
        } catch (IOException e2) {
            e2.printStackTrace();
        }
        if (null == this.configProps.get(OUTPUT_FILENAME_KEY)) {
            this.outputVectorFilename = "/tmp/canova_vectors_" + new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss").format(new Date()) + ".txt";
            return;
        }
        this.outputVectorFilename = (String) this.configProps.get(OUTPUT_FILENAME_KEY);
        if (!new File(this.outputVectorFilename).exists()) {
            File file = new File(this.outputVectorFilename);
            if (file.exists()) {
                return;
            }
            file.createNewFile();
            return;
        }
        if (!new File(this.outputVectorFilename).isDirectory()) {
            new File(this.outputVectorFilename).delete();
            System.out.println("File path already exists, deleting the old file before proceeding...");
        } else {
            this.outputVectorFilename += "/canova_vectors_" + new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss").format(new Date()) + ".txt";
        }
    }

    public void debugLoadedConfProperties() {
        Properties properties = this.configProps;
        Enumeration<?> propertyNames = properties.propertyNames();
        System.out.println("\n--- Canova Configuration ---");
        while (propertyNames.hasMoreElements()) {
            String str = (String) propertyNames.nextElement();
            System.out.println(str + " -- " + properties.getProperty(str));
        }
        System.out.println("--- Canova Configuration ---\n");
    }

    public void execute() throws CanovaException, IOException, InterruptedException {
        boolean z;
        if (false == this.validCommandLineParameters) {
            System.out.println("Vectorize function is not configured properly, stopping.");
            return;
        }
        loadConfigFile();
        if (null != this.configProps.get("conf.print") && "true".equals(((String) this.configProps.get("conf.print")).trim().toLowerCase())) {
            debugLoadedConfProperties();
        }
        try {
            loadInputSchemaFile();
            z = true;
        } catch (Exception e) {
            e.printStackTrace();
            z = false;
        }
        if (false == z) {
        }
        FileSplit fileSplit = new FileSplit(new File((String) this.configProps.get("input.directory")));
        InputFormat createInputFormat = createInputFormat();
        RecordReader createReader = createInputFormat.createReader(fileSplit);
        while (createReader.hasNext()) {
            try {
                this.inputSchema.evaluateInputRecord(createReader.next().toArray()[0].toString());
            } catch (Exception e2) {
                e2.printStackTrace();
            }
        }
        createReader.close();
        this.inputSchema.computeDatasetStatistics();
        if (null != this.configProps.get("input.statistics.debug.print") && "true".equals(((String) this.configProps.get("input.statistics.debug.print")).trim().toLowerCase())) {
            this.inputSchema.debugPringDatasetStatistics();
        }
        RecordReader createReader2 = createInputFormat.createReader(fileSplit);
        OutputFormat createOutputFormat = createOutputFormat();
        Configuration configuration = new Configuration();
        configuration.set("org.nd4j.outputpath", this.outputVectorFilename);
        RecordWriter createWriter = createOutputFormat.createWriter(configuration);
        while (createReader2.hasNext()) {
            String obj = createReader2.next().toArray()[0].toString();
            if (!obj.trim().equals("")) {
                createWriter.write(this.vectorizer.vectorizeToWritable("", obj, this.inputSchema));
            }
        }
        createReader2.close();
        createWriter.close();
        System.out.println("Output vectors written to: " + this.outputVectorFilename);
    }

    public Vectorize(String[] strArr) {
        this.validCommandLineParameters = true;
        this.configurationFile = "";
        this.configProps = null;
        this.outputVectorFilename = "";
        this.inputSchema = null;
        this.vectorizer = null;
        this.args = strArr;
        CmdLineParser cmdLineParser = new CmdLineParser(this);
        try {
            cmdLineParser.parseArgument(strArr);
        } catch (CmdLineException e) {
            this.validCommandLineParameters = false;
            cmdLineParser.printUsage(System.err);
            log.error("Unable to parse args", e);
        }
    }

    public InputFormat createInputFormat() {
        String str = (String) this.configProps.get(INPUT_FORMAT);
        if (null == str) {
            str = DEFAULT_INPUT_FORMAT_CLASSNAME;
        }
        try {
            return (InputFormat) Class.forName(str).newInstance();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public OutputFormat createOutputFormat() {
        String str = (String) this.configProps.get(OUTPUT_FORMAT);
        if (null == str) {
            str = DEFAULT_OUTPUT_FORMAT_CLASSNAME;
        }
        try {
            return (OutputFormat) Class.forName(str).newInstance();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}
