package io.druid.indexer;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Optional;
import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import com.metamx.common.IAE;
import com.metamx.common.ISE;
import com.metamx.common.logger.Logger;
import io.druid.collections.StupidPool;
import io.druid.data.input.InputRow;
import io.druid.data.input.Row;
import io.druid.data.input.Rows;
import io.druid.indexer.HadoopDruidIndexerConfig;
import io.druid.indexer.hadoop.SegmentInputRow;
import io.druid.offheap.OffheapBufferPool;
import io.druid.query.aggregation.AggregatorFactory;
import io.druid.segment.IndexIO;
import io.druid.segment.IndexMaker;
import io.druid.segment.LoggingProgressIndicator;
import io.druid.segment.ProgressIndicator;
import io.druid.segment.QueryableIndex;
import io.druid.segment.incremental.IncrementalIndex;
import io.druid.segment.incremental.IncrementalIndexSchema;
import io.druid.segment.incremental.OffheapIncrementalIndex;
import io.druid.segment.incremental.OnheapIncrementalIndex;
import io.druid.timeline.DataSegment;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.InvalidJobConfException;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.joda.time.DateTime;
import org.joda.time.Interval;

/* loaded from: input_file:io/druid/indexer/IndexGeneratorJob.class */
public class IndexGeneratorJob implements Jobby {
    private static final Logger log = new Logger(IndexGeneratorJob.class);
    private final HadoopDruidIndexerConfig config;
    private IndexGeneratorStats jobStats = new IndexGeneratorStats();

    /* loaded from: input_file:io/druid/indexer/IndexGeneratorJob$IndexGeneratorCombiner.class */
    public static class IndexGeneratorCombiner extends Reducer<BytesWritable, BytesWritable, BytesWritable, BytesWritable> {
        private HadoopDruidIndexerConfig config;
        private AggregatorFactory[] aggregators;
        private AggregatorFactory[] combiningAggs;

        protected void setup(Reducer<BytesWritable, BytesWritable, BytesWritable, BytesWritable>.Context context) throws IOException, InterruptedException {
            this.config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
            this.aggregators = this.config.getSchema().getDataSchema().getAggregators();
            this.combiningAggs = new AggregatorFactory[this.aggregators.length];
            for (int i = 0; i < this.aggregators.length; i++) {
                this.combiningAggs[i] = this.aggregators[i].getCombiningFactory();
            }
        }

        protected void reduce(BytesWritable bytesWritable, Iterable<BytesWritable> iterable, Reducer<BytesWritable, BytesWritable, BytesWritable, BytesWritable>.Context context) throws IOException, InterruptedException {
            Iterator<BytesWritable> it = iterable.iterator();
            BytesWritable next = it.next();
            if (!it.hasNext()) {
                context.write(bytesWritable, next);
                return;
            }
            Bucket bucket = (Bucket) Bucket.fromGroupKey(SortableBytes.fromBytesWritable(bytesWritable).getGroupKey()).lhs;
            IncrementalIndex makeIncrementalIndex = IndexGeneratorJob.makeIncrementalIndex(bucket, this.combiningAggs, this.config, false, null);
            makeIncrementalIndex.add(InputRowSerde.fromBytes(next.getBytes(), this.aggregators));
            while (it.hasNext()) {
                context.progress();
                InputRow fromBytes = InputRowSerde.fromBytes(it.next().getBytes(), this.aggregators);
                if (!makeIncrementalIndex.canAppendRow()) {
                    IndexGeneratorJob.log.info("current index full due to [%s]. creating new index.", new Object[]{makeIncrementalIndex.getOutOfRowsReason()});
                    flushIndexToContextAndClose(bytesWritable, makeIncrementalIndex, context);
                    makeIncrementalIndex = IndexGeneratorJob.makeIncrementalIndex(bucket, this.combiningAggs, this.config, false, null);
                }
                makeIncrementalIndex.add(fromBytes);
            }
            flushIndexToContextAndClose(bytesWritable, makeIncrementalIndex, context);
        }

        private void flushIndexToContextAndClose(BytesWritable bytesWritable, IncrementalIndex incrementalIndex, Reducer<BytesWritable, BytesWritable, BytesWritable, BytesWritable>.Context context) throws IOException, InterruptedException {
            Iterator it = incrementalIndex.iterator();
            while (it.hasNext()) {
                context.progress();
                context.write(bytesWritable, new BytesWritable(InputRowSerde.toBytes(getInputRowFromRow((Row) it.next(), incrementalIndex.getDimensions()), this.combiningAggs)));
            }
            incrementalIndex.close();
        }

        private InputRow getInputRowFromRow(final Row row, final List<String> list) {
            return new InputRow() { // from class: io.druid.indexer.IndexGeneratorJob.IndexGeneratorCombiner.1
                public List<String> getDimensions() {
                    return list;
                }

                public long getTimestampFromEpoch() {
                    return row.getTimestampFromEpoch();
                }

                public DateTime getTimestamp() {
                    return row.getTimestamp();
                }

                public List<String> getDimension(String str) {
                    return row.getDimension(str);
                }

                public Object getRaw(String str) {
                    return row.getRaw(str);
                }

                public float getFloatMetric(String str) {
                    return row.getFloatMetric(str);
                }

                public long getLongMetric(String str) {
                    return row.getLongMetric(str);
                }

                public int compareTo(Row row2) {
                    return row.compareTo(row2);
                }
            };
        }

        protected /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((BytesWritable) obj, (Iterable<BytesWritable>) iterable, (Reducer<BytesWritable, BytesWritable, BytesWritable, BytesWritable>.Context) context);
        }
    }

    /* loaded from: input_file:io/druid/indexer/IndexGeneratorJob$IndexGeneratorMapper.class */
    public static class IndexGeneratorMapper extends HadoopDruidIndexerMapper<BytesWritable, BytesWritable> {
        private static final HashFunction hashFunction = Hashing.murmur3_128();
        private AggregatorFactory[] aggregators;
        private AggregatorFactory[] combiningAggs;

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // io.druid.indexer.HadoopDruidIndexerMapper
        public void setup(Mapper<Object, Object, BytesWritable, BytesWritable>.Context context) throws IOException, InterruptedException {
            super.setup(context);
            this.aggregators = this.config.getSchema().getDataSchema().getAggregators();
            this.combiningAggs = new AggregatorFactory[this.aggregators.length];
            for (int i = 0; i < this.aggregators.length; i++) {
                this.combiningAggs[i] = this.aggregators[i].getCombiningFactory();
            }
        }

        /* JADX WARN: Multi-variable type inference failed */
        /* JADX WARN: Type inference failed for: r4v1, types: [byte[], byte[][]] */
        @Override // io.druid.indexer.HadoopDruidIndexerMapper
        protected void innerMap(InputRow inputRow, Object obj, Mapper<Object, Object, BytesWritable, BytesWritable>.Context context) throws IOException, InterruptedException {
            Optional<Bucket> bucket = getConfig().getBucket(inputRow);
            if (!bucket.isPresent()) {
                throw new ISE("WTF?! No bucket found for row: %s", new Object[]{inputRow});
            }
            long truncate = this.granularitySpec.getQueryGranularity().truncate(inputRow.getTimestampFromEpoch());
            byte[] asBytes = hashFunction.hashBytes(HadoopDruidIndexerConfig.jsonMapper.writeValueAsBytes(Rows.toGroupKey(truncate, inputRow))).asBytes();
            context.write(new SortableBytes(((Bucket) bucket.get()).toGroupKey(new byte[0]), ByteBuffer.allocate(8 + asBytes.length).putLong(truncate).put(asBytes).array()).toBytesWritable(), new BytesWritable(inputRow instanceof SegmentInputRow ? InputRowSerde.toBytes(inputRow, this.combiningAggs) : InputRowSerde.toBytes(inputRow, this.aggregators)));
        }
    }

    /* loaded from: input_file:io/druid/indexer/IndexGeneratorJob$IndexGeneratorOutputFormat.class */
    public static class IndexGeneratorOutputFormat extends TextOutputFormat {
        public void checkOutputSpecs(JobContext jobContext) throws IOException {
            if (getOutputPath(jobContext) == null) {
                throw new InvalidJobConfException("Output directory not set.");
            }
        }
    }

    /* loaded from: input_file:io/druid/indexer/IndexGeneratorJob$IndexGeneratorPartitioner.class */
    public static class IndexGeneratorPartitioner extends Partitioner<BytesWritable, Writable> implements Configurable {
        private Configuration config;

        public int getPartition(BytesWritable bytesWritable, Writable writable, int i) {
            ByteBuffer wrap = ByteBuffer.wrap(bytesWritable.getBytes());
            wrap.position(4);
            int i2 = wrap.getInt();
            if (this.config.get("mapred.job.tracker").equals("local")) {
                return i2 % i;
            }
            if (i2 >= i) {
                throw new ISE("Not enough partitions, shard[%,d] >= numPartitions[%,d]", new Object[]{Integer.valueOf(i2), Integer.valueOf(i)});
            }
            return i2;
        }

        public Configuration getConf() {
            return this.config;
        }

        public void setConf(Configuration configuration) {
            this.config = configuration;
        }
    }

    /* loaded from: input_file:io/druid/indexer/IndexGeneratorJob$IndexGeneratorReducer.class */
    public static class IndexGeneratorReducer extends Reducer<BytesWritable, BytesWritable, BytesWritable, Text> {
        protected HadoopDruidIndexerConfig config;
        private List<String> metricNames = Lists.newArrayList();
        private AggregatorFactory[] aggregators;
        private AggregatorFactory[] combiningAggs;

        protected ProgressIndicator makeProgressIndicator(final Reducer<BytesWritable, BytesWritable, BytesWritable, Text>.Context context) {
            return new LoggingProgressIndicator("IndexGeneratorJob") { // from class: io.druid.indexer.IndexGeneratorJob.IndexGeneratorReducer.1
                public void progress() {
                    context.progress();
                }
            };
        }

        protected File persist(IncrementalIndex incrementalIndex, Interval interval, File file, ProgressIndicator progressIndicator) throws IOException {
            return IndexMaker.persist(incrementalIndex, interval, file, (Map) null, this.config.getIndexSpec(), progressIndicator);
        }

        protected File mergeQueryableIndex(List<QueryableIndex> list, AggregatorFactory[] aggregatorFactoryArr, File file, ProgressIndicator progressIndicator) throws IOException {
            return IndexMaker.mergeQueryableIndex(list, aggregatorFactoryArr, file, this.config.getIndexSpec(), progressIndicator);
        }

        protected void setup(Reducer<BytesWritable, BytesWritable, BytesWritable, Text>.Context context) throws IOException, InterruptedException {
            this.config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
            this.aggregators = this.config.getSchema().getDataSchema().getAggregators();
            this.combiningAggs = new AggregatorFactory[this.aggregators.length];
            for (int i = 0; i < this.aggregators.length; i++) {
                this.metricNames.add(this.aggregators[i].getName());
                this.combiningAggs[i] = this.aggregators[i].getCombiningFactory();
            }
        }

        protected void reduce(BytesWritable bytesWritable, Iterable<BytesWritable> iterable, Reducer<BytesWritable, BytesWritable, BytesWritable, Text>.Context context) throws IOException, InterruptedException {
            File mergeQueryableIndex;
            Bucket bucket = (Bucket) Bucket.fromGroupKey(SortableBytes.fromBytesWritable(bytesWritable).getGroupKey()).lhs;
            Interval interval = (Interval) this.config.getGranularitySpec().bucketInterval(bucket.time).get();
            OffheapBufferPool offheapBufferPool = new OffheapBufferPool((int) (this.config.getSchema().m8getTuningConfig().getBufferSize() * this.config.getSchema().m8getTuningConfig().getAggregationBufferRatio()));
            IncrementalIndex makeIncrementalIndex = IndexGeneratorJob.makeIncrementalIndex(bucket, this.combiningAggs, this.config, this.config.getSchema().m8getTuningConfig().isIngestOffheap(), offheapBufferPool);
            try {
                File createTempFile = File.createTempFile("base", "flush");
                createTempFile.delete();
                createTempFile.mkdirs();
                TreeSet newTreeSet = Sets.newTreeSet();
                int i = 0;
                int i2 = 0;
                int i3 = 0;
                long currentTimeMillis = System.currentTimeMillis();
                HashSet newHashSet = Sets.newHashSet();
                ProgressIndicator makeProgressIndicator = makeProgressIndicator(context);
                for (BytesWritable bytesWritable2 : iterable) {
                    context.progress();
                    InputRow formatRow = makeIncrementalIndex.formatRow(InputRowSerde.fromBytes(bytesWritable2.getBytes(), this.aggregators));
                    newHashSet.addAll(formatRow.getDimensions());
                    int add = makeIncrementalIndex.add(formatRow);
                    i2++;
                    if (!makeIncrementalIndex.canAppendRow()) {
                        IndexGeneratorJob.log.info(makeIncrementalIndex.getOutOfRowsReason(), new Object[0]);
                        IndexGeneratorJob.log.info("%,d lines to %,d rows in %,d millis", new Object[]{Integer.valueOf(i2 - i3), Integer.valueOf(add), Long.valueOf(System.currentTimeMillis() - currentTimeMillis)});
                        i3 = i2;
                        File file = new File(createTempFile, String.format("index%,05d", Integer.valueOf(i)));
                        newTreeSet.add(file);
                        context.progress();
                        persist(makeIncrementalIndex, interval, file, makeProgressIndicator);
                        makeIncrementalIndex.close();
                        makeIncrementalIndex = IndexGeneratorJob.makeIncrementalIndex(bucket, this.combiningAggs, this.config, this.config.getSchema().m8getTuningConfig().isIngestOffheap(), offheapBufferPool);
                        currentTimeMillis = System.currentTimeMillis();
                        i++;
                    }
                }
                IndexGeneratorJob.log.info("%,d lines completed.", new Object[]{Integer.valueOf(i2)});
                ArrayList newArrayListWithCapacity = Lists.newArrayListWithCapacity(i);
                if (newTreeSet.size() != 0) {
                    if (!makeIncrementalIndex.isEmpty()) {
                        File file2 = new File(createTempFile, "final");
                        persist(makeIncrementalIndex, interval, file2, makeProgressIndicator);
                        newTreeSet.add(file2);
                    }
                    Iterator it = newTreeSet.iterator();
                    while (it.hasNext()) {
                        newArrayListWithCapacity.add(IndexIO.loadIndex((File) it.next()));
                    }
                    mergeQueryableIndex = mergeQueryableIndex(newArrayListWithCapacity, this.aggregators, new File(createTempFile, "merged"), makeProgressIndicator);
                } else {
                    if (makeIncrementalIndex.isEmpty()) {
                        throw new IAE("If you try to persist empty indexes you are going to have a bad time", new Object[0]);
                    }
                    mergeQueryableIndex = new File(createTempFile, "merged");
                    persist(makeIncrementalIndex, interval, mergeQueryableIndex, makeProgressIndicator);
                }
                DataSegment serializeOutIndex = JobHelper.serializeOutIndex(new DataSegment(this.config.getDataSource(), interval, this.config.getSchema().m8getTuningConfig().getVersion(), (Map) null, ImmutableList.copyOf(newHashSet), this.metricNames, this.config.getShardSpec(bucket).getActualSpec(), -1, -1L), context.getConfiguration(), context, context.getTaskAttemptID(), mergeQueryableIndex, JobHelper.makeSegmentOutputPath(new Path(this.config.getSchema().m9getIOConfig().getSegmentOutputPath()), new Path(this.config.getSchema().m9getIOConfig().getSegmentOutputPath()).getFileSystem(context.getConfiguration()), this.config.getSchema().getDataSchema().getDataSource(), this.config.getSchema().m8getTuningConfig().getVersion(), (Interval) this.config.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), bucket.partitionNum));
                Path makeDescriptorInfoPath = this.config.makeDescriptorInfoPath(serializeOutIndex);
                Path prependFSIfNullScheme = JobHelper.prependFSIfNullScheme(FileSystem.get(makeDescriptorInfoPath.toUri(), context.getConfiguration()), makeDescriptorInfoPath);
                IndexGeneratorJob.log.info("Writing descriptor to path[%s]", new Object[]{prependFSIfNullScheme});
                JobHelper.writeSegmentDescriptor(this.config.makeDescriptorInfoDir().getFileSystem(context.getConfiguration()), serializeOutIndex, prependFSIfNullScheme, context);
                Iterator it2 = newTreeSet.iterator();
                while (it2.hasNext()) {
                    FileUtils.deleteDirectory((File) it2.next());
                }
                makeIncrementalIndex = makeIncrementalIndex;
            } finally {
                makeIncrementalIndex.close();
            }
        }

        protected /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((BytesWritable) obj, (Iterable<BytesWritable>) iterable, (Reducer<BytesWritable, BytesWritable, BytesWritable, Text>.Context) context);
        }
    }

    /* loaded from: input_file:io/druid/indexer/IndexGeneratorJob$IndexGeneratorStats.class */
    public static class IndexGeneratorStats {
        private long invalidRowCount = 0;

        public long getInvalidRowCount() {
            return this.invalidRowCount;
        }

        public void setInvalidRowCount(long j) {
            this.invalidRowCount = j;
        }
    }

    public static List<DataSegment> getPublishedSegments(HadoopDruidIndexerConfig hadoopDruidIndexerConfig) {
        Configuration injectSystemProperties = JobHelper.injectSystemProperties(new Configuration());
        ObjectMapper objectMapper = HadoopDruidIndexerConfig.jsonMapper;
        ImmutableList.Builder builder = ImmutableList.builder();
        Path makeDescriptorInfoDir = hadoopDruidIndexerConfig.makeDescriptorInfoDir();
        try {
            FileSystem fileSystem = makeDescriptorInfoDir.getFileSystem(injectSystemProperties);
            for (FileStatus fileStatus : fileSystem.listStatus(makeDescriptorInfoDir)) {
                DataSegment dataSegment = (DataSegment) objectMapper.readValue(fileSystem.open(fileStatus.getPath()), DataSegment.class);
                builder.add(dataSegment);
                log.info("Adding segment %s to the list of published segments", new Object[]{dataSegment.getIdentifier()});
            }
        } catch (FileNotFoundException e) {
            log.error("[%s] SegmentDescriptorInfo is not found usually when indexing process did not produce any segments meaning either there was no input data to process or all the input events were discarded due to some error", new Object[]{e.getMessage()});
            Throwables.propagate(e);
        } catch (IOException e2) {
            throw Throwables.propagate(e2);
        }
        return builder.build();
    }

    public IndexGeneratorJob(HadoopDruidIndexerConfig hadoopDruidIndexerConfig) {
        this.config = hadoopDruidIndexerConfig;
    }

    protected void setReducerClass(Job job) {
        job.setReducerClass(IndexGeneratorReducer.class);
    }

    public IndexGeneratorStats getJobStats() {
        return this.jobStats;
    }

    public boolean run() {
        try {
            Job job = Job.getInstance(new Configuration(), String.format("%s-index-generator-%s", this.config.getDataSource(), this.config.getIntervals()));
            job.getConfiguration().set("io.sort.record.percent", "0.23");
            JobHelper.injectSystemProperties(job);
            this.config.addJobProperties(job);
            job.setMapperClass(IndexGeneratorMapper.class);
            job.setMapOutputValueClass(BytesWritable.class);
            SortableBytes.useSortableBytesAsMapOutputKey(job);
            int size = Iterables.size((Iterable) this.config.getAllBuckets().get());
            if (size == 0) {
                throw new RuntimeException("No buckets?? seems there is no data to index.");
            }
            if (this.config.getSchema().m8getTuningConfig().getUseCombiner()) {
                job.setCombinerClass(IndexGeneratorCombiner.class);
                job.setCombinerKeyGroupingComparatorClass(BytesWritable.Comparator.class);
            }
            job.setNumReduceTasks(size);
            job.setPartitionerClass(IndexGeneratorPartitioner.class);
            setReducerClass(job);
            job.setOutputKeyClass(BytesWritable.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
            FileOutputFormat.setOutputPath(job, this.config.makeIntermediatePath());
            this.config.addInputPaths(job);
            String property = HadoopDruidIndexerConfig.properties.getProperty("druid.processing.bitmap.type");
            if (property != null) {
                for (String str : new String[]{"mapreduce.reduce.java.opts", "mapreduce.map.java.opts"}) {
                    job.getConfiguration().set(str, String.format("-D%s=%s %s", "druid.processing.bitmap.type", property, Strings.nullToEmpty(job.getConfiguration().get(str))));
                }
            }
            this.config.intoConfiguration(job);
            JobHelper.setupClasspath(JobHelper.distributedClassPath(this.config.getWorkingPath()), job);
            job.submit();
            log.info("Job %s submitted, status available at %s", new Object[]{job.getJobName(), job.getTrackingURL()});
            boolean waitForCompletion = job.waitForCompletion(true);
            this.jobStats.setInvalidRowCount(job.getCounters().findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER).getValue());
            return waitForCompletion;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static IncrementalIndex makeIncrementalIndex(Bucket bucket, AggregatorFactory[] aggregatorFactoryArr, HadoopDruidIndexerConfig hadoopDruidIndexerConfig, boolean z, StupidPool stupidPool) {
        HadoopTuningConfig m8getTuningConfig = hadoopDruidIndexerConfig.getSchema().m8getTuningConfig();
        IncrementalIndexSchema build = new IncrementalIndexSchema.Builder().withMinTimestamp(bucket.time.getMillis()).withDimensionsSpec(hadoopDruidIndexerConfig.getSchema().getDataSchema().getParser()).withQueryGranularity(hadoopDruidIndexerConfig.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity()).withMetrics(aggregatorFactoryArr).build();
        return z ? new OffheapIncrementalIndex(build, stupidPool, true, m8getTuningConfig.getBufferSize()) : new OnheapIncrementalIndex(build, m8getTuningConfig.getRowFlushBoundary());
    }
}
