package org.apache.mahout.text;

import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.iterator.FileLineIterable;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/mahout/text/SequenceFilesFromCsvFilter.class */
public final class SequenceFilesFromCsvFilter extends SequenceFilesFromDirectoryFilter {
    private static final Logger log = LoggerFactory.getLogger(SequenceFilesFromCsvFilter.class);
    private static final Pattern TAB = Pattern.compile("\\t");
    public static final String[] KEY_COLUMN_OPTION = {"keyColumn", "kcol"};
    public static final String[] VALUE_COLUMN_OPTION = {"valueColumn", "vcol"};
    private volatile int keyColumn;
    private volatile int valueColumn;

    private SequenceFilesFromCsvFilter() {
    }

    public SequenceFilesFromCsvFilter(Configuration configuration, String str, Map<String, String> map, ChunkedWriter chunkedWriter) throws IOException {
        super(configuration, str, map, chunkedWriter);
        this.keyColumn = Integer.parseInt(map.get(KEY_COLUMN_OPTION[0]));
        this.valueColumn = Integer.parseInt(map.get(VALUE_COLUMN_OPTION[0]));
    }

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new SequenceFilesFromCsvFilter(), strArr);
    }

    @Override // org.apache.mahout.text.SequenceFilesFromDirectory
    public void addOptions() {
        super.addOptions();
        addOption(KEY_COLUMN_OPTION[0], KEY_COLUMN_OPTION[1], "The key column. Default to 0", SchemaSymbols.ATTVAL_FALSE_0);
        addOption(VALUE_COLUMN_OPTION[0], VALUE_COLUMN_OPTION[1], "The value column. Default to 1", SchemaSymbols.ATTVAL_TRUE_1);
    }

    @Override // org.apache.mahout.text.SequenceFilesFromDirectory
    public Map<String, String> parseOptions() throws IOException {
        Map<String, String> parseOptions = super.parseOptions();
        parseOptions.put(SequenceFilesFromDirectory.FILE_FILTER_CLASS_OPTION[0], getClass().getName());
        parseOptions.put(KEY_COLUMN_OPTION[0], getOption(KEY_COLUMN_OPTION[0]));
        parseOptions.put(VALUE_COLUMN_OPTION[0], getOption(VALUE_COLUMN_OPTION[0]));
        return parseOptions;
    }

    @Override // org.apache.mahout.text.SequenceFilesFromDirectoryFilter
    protected void process(FileStatus fileStatus, Path path) throws IOException {
        if (fileStatus.isDir()) {
            this.fs.listStatus(fileStatus.getPath(), new SequenceFilesFromCsvFilter(this.conf, this.prefix + "/" + path.getName(), this.options, this.writer));
            return;
        }
        Iterator<String> it = new FileLineIterable((InputStream) this.fs.open(fileStatus.getPath()), this.charset, false).iterator();
        while (it.hasNext()) {
            String[] split = TAB.split(it.next());
            log.info("key : {}, value : {}", split[this.keyColumn], split[this.valueColumn]);
            this.writer.write(this.prefix + split[this.keyColumn], split[this.valueColumn]);
        }
    }
}
