package org.apache.mahout.classifier.naivebayes.trainer;

import java.io.IOException;
import java.net.URI;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.math.VectorWritable;

/* loaded from: input_file:org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.class */
public final class NaiveBayesTrainer {
    public static final String THETA_SUM = "thetaSum";
    public static final String SUM_VECTORS = "sumVectors";
    public static final String CLASS_VECTORS = "classVectors";
    public static final String LABEL_MAP = "labelMap";
    public static final String ALPHA_I = "alphaI";

    private NaiveBayesTrainer() {
    }

    public static void trainNaiveBayes(Path path, Configuration configuration, Iterable<String> iterable, Path path2, int i, float f, boolean z) throws IOException, InterruptedException, ClassNotFoundException {
        configuration.setFloat(ALPHA_I, f);
        Path createLabelMapFile = createLabelMapFile(iterable, configuration, new Path(path2, LABEL_MAP));
        Path path3 = new Path(path2, CLASS_VECTORS);
        runNaiveBayesByLabelSummer(path, configuration, createLabelMapFile, path3, i);
        Path path4 = new Path(path2, SUM_VECTORS);
        runNaiveBayesWeightSummer(path3, configuration, createLabelMapFile, path4, i);
        Path path5 = new Path(path2, THETA_SUM);
        if (z) {
            runNaiveBayesThetaComplementarySummer(path3, configuration, path4, path5, i);
        } else {
            runNaiveBayesThetaSummer(path3, configuration, path4, path5, i);
        }
    }

    private static void runNaiveBayesByLabelSummer(Path path, Configuration configuration, Path path2, Path path3, int i) throws IOException, InterruptedException, ClassNotFoundException {
        configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
        DistributedCache.setCacheFiles(new URI[]{path2.toUri()}, configuration);
        Job job = new Job(configuration);
        job.setJobName("Train Naive Bayes: input-folder: " + path + ", label-map-file: " + path2.toString());
        job.setJarByClass(NaiveBayesTrainer.class);
        FileInputFormat.setInputPaths(job, new Path[]{path});
        FileOutputFormat.setOutputPath(job, path3);
        job.setMapperClass(NaiveBayesInstanceMapper.class);
        job.setCombinerClass(NaiveBayesSumReducer.class);
        job.setReducerClass(NaiveBayesSumReducer.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(VectorWritable.class);
        job.setNumReduceTasks(i);
        HadoopUtil.delete(configuration, path3);
        job.waitForCompletion(true);
    }

    private static void runNaiveBayesWeightSummer(Path path, Configuration configuration, Path path2, Path path3, int i) throws IOException, InterruptedException, ClassNotFoundException {
        configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
        DistributedCache.setCacheFiles(new URI[]{path2.toUri()}, configuration);
        Job job = new Job(configuration);
        job.setJobName("Train Naive Bayes: input-folder: " + path);
        job.setJarByClass(NaiveBayesTrainer.class);
        FileInputFormat.setInputPaths(job, new Path[]{path});
        FileOutputFormat.setOutputPath(job, path3);
        job.setMapperClass(NaiveBayesWeightsMapper.class);
        job.setReducerClass(NaiveBayesSumReducer.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(VectorWritable.class);
        job.setNumReduceTasks(i);
        HadoopUtil.delete(configuration, path3);
        job.waitForCompletion(true);
    }

    private static void runNaiveBayesThetaSummer(Path path, Configuration configuration, Path path2, Path path3, int i) throws IOException, InterruptedException, ClassNotFoundException {
        configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
        DistributedCache.setCacheFiles(new URI[]{path2.toUri()}, configuration);
        Job job = new Job(configuration);
        job.setJobName("Train Naive Bayes: input-folder: " + path + ", label-map-file: " + path2.toString());
        job.setJarByClass(NaiveBayesTrainer.class);
        FileInputFormat.setInputPaths(job, new Path[]{path});
        FileOutputFormat.setOutputPath(job, path3);
        job.setMapperClass(NaiveBayesThetaMapper.class);
        job.setReducerClass(NaiveBayesSumReducer.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(VectorWritable.class);
        job.setNumReduceTasks(i);
        HadoopUtil.delete(configuration, path3);
        job.waitForCompletion(true);
    }

    private static void runNaiveBayesThetaComplementarySummer(Path path, Configuration configuration, Path path2, Path path3, int i) throws IOException, InterruptedException, ClassNotFoundException {
        configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
        DistributedCache.setCacheFiles(new URI[]{path2.toUri()}, configuration);
        Job job = new Job(configuration);
        job.setJobName("Train Naive Bayes: input-folder: " + path + ", label-map-file: " + path2.toString());
        job.setJarByClass(NaiveBayesTrainer.class);
        FileInputFormat.setInputPaths(job, new Path[]{path});
        FileOutputFormat.setOutputPath(job, path3);
        job.setMapperClass(NaiveBayesThetaComplementaryMapper.class);
        job.setReducerClass(NaiveBayesSumReducer.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(VectorWritable.class);
        job.setNumReduceTasks(i);
        HadoopUtil.delete(configuration, path3);
        job.waitForCompletion(true);
    }

    public static Path createLabelMapFile(Iterable<String> iterable, Configuration configuration, Path path) throws IOException {
        FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
        Path path2 = new Path(path, LABEL_MAP);
        SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, configuration, path2, Text.class, IntWritable.class);
        int i = 0;
        Iterator<String> it = iterable.iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            writer.append(new Text(it.next()), new IntWritable(i2));
        }
        writer.close();
        return path2;
    }
}
