package com.mongodb.hadoop;

import com.mongodb.DBCollection;
import com.mongodb.DBObject;
import com.mongodb.MongoClientURI;
import com.mongodb.gridfs.GridFS;
import com.mongodb.gridfs.GridFSDBFile;
import com.mongodb.hadoop.input.GridFSSplit;
import com.mongodb.hadoop.util.MongoConfigUtil;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.bson.types.ObjectId;

/* loaded from: input_file:com/mongodb/hadoop/GridFSInputFormat.class */
public class GridFSInputFormat extends InputFormat<NullWritable, BinaryComparable> {
    private static final Log LOG = LogFactory.getLog(GridFSInputFormat.class);

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/mongodb/hadoop/GridFSInputFormat$ChunkReadingCharSequence.class */
    public static class ChunkReadingCharSequence implements CharSequence, Closeable {
        private Reader reader;
        private int chunkSize;
        private int length;
        private StringBuilder builder = new StringBuilder();
        private char[] buff = new char[16777216];

        public ChunkReadingCharSequence(GridFSSplit gridFSSplit) throws IOException {
            this.reader = new BufferedReader(new InputStreamReader(gridFSSplit.getData()));
            this.chunkSize = gridFSSplit.getChunkSize();
            this.length = ((int) gridFSSplit.getLength()) - (gridFSSplit.getChunkId() * this.chunkSize);
        }

        @Override // java.lang.CharSequence
        public int length() {
            return this.length;
        }

        private void advanceToIndex(int i) throws IOException {
            int read;
            if (i >= this.builder.length()) {
                while (i >= this.builder.length() && (read = this.reader.read(this.buff)) > 0) {
                    this.builder.append(this.buff, 0, read);
                }
            }
        }

        @Override // java.lang.CharSequence
        public char charAt(int i) {
            try {
                advanceToIndex(i);
                return this.builder.charAt(i);
            } catch (IOException e) {
                throw new IndexOutOfBoundsException("Could not advance stream to index: " + i + "; reason: " + e.getMessage());
            }
        }

        @Override // java.lang.CharSequence
        public CharSequence subSequence(int i, int i2) {
            try {
                advanceToIndex(i2);
                return this.builder.subSequence(i, i2);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }

        public CharSequence chunkContents() {
            return subSequence(0, Math.min(this.chunkSize, this.length));
        }

        public CharSequence fileContents() {
            return subSequence(0, this.length);
        }

        @Override // java.io.Closeable, java.lang.AutoCloseable
        public void close() throws IOException {
            this.reader.close();
        }
    }

    /* loaded from: input_file:com/mongodb/hadoop/GridFSInputFormat$GridFSBinaryRecordReader.class */
    static class GridFSBinaryRecordReader extends RecordReader<NullWritable, BinaryComparable> {
        private final BytesWritable bw = new BytesWritable();
        private GridFSSplit split;
        private InputStream stream;
        private boolean readLast;
        private byte[] buff;

        GridFSBinaryRecordReader() {
        }

        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            this.split = (GridFSSplit) inputSplit;
            this.readLast = false;
            this.buff = new byte[16777216];
            this.stream = this.split.getData();
        }

        public boolean nextKeyValue() throws IOException, InterruptedException {
            int read;
            if (this.readLast) {
                return false;
            }
            int i = 0;
            do {
                read = this.stream.read(this.buff, i, this.buff.length - i);
                if (read > 0) {
                    i += read;
                }
            } while (read > 0);
            this.bw.set(this.buff, 0, i);
            this.readLast = true;
            return true;
        }

        /* renamed from: getCurrentKey, reason: merged with bridge method [inline-methods] */
        public NullWritable m5getCurrentKey() throws IOException, InterruptedException {
            return NullWritable.get();
        }

        /* renamed from: getCurrentValue, reason: merged with bridge method [inline-methods] */
        public BytesWritable m4getCurrentValue() throws IOException, InterruptedException {
            return this.bw;
        }

        public float getProgress() throws IOException, InterruptedException {
            return this.readLast ? 1.0f : 0.0f;
        }

        public void close() throws IOException {
            this.stream.close();
        }
    }

    /* loaded from: input_file:com/mongodb/hadoop/GridFSInputFormat$GridFSTextRecordReader.class */
    static class GridFSTextRecordReader extends RecordReader<NullWritable, BinaryComparable> {
        private GridFSSplit split;
        private long chunkSize;
        private boolean readLast;
        private boolean readWholeFile;
        private Pattern delimiterPattern;
        private Matcher matcher;
        private ChunkReadingCharSequence chunkData;
        private final Text text = new Text();
        private int totalMatches = 0;
        private int previousMatchIndex = 0;

        GridFSTextRecordReader() {
        }

        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            this.split = (GridFSSplit) inputSplit;
            Configuration configuration = taskAttemptContext.getConfiguration();
            String gridFSDelimiterPattern = MongoConfigUtil.getGridFSDelimiterPattern(configuration);
            this.chunkSize = this.split.getChunkSize();
            this.chunkData = new ChunkReadingCharSequence(this.split);
            this.readLast = false;
            this.readWholeFile = MongoConfigUtil.isGridFSWholeFileSplit(configuration);
            if (null == gridFSDelimiterPattern || gridFSDelimiterPattern.isEmpty()) {
                return;
            }
            this.delimiterPattern = Pattern.compile(gridFSDelimiterPattern);
            this.matcher = this.delimiterPattern.matcher(this.chunkData);
            if (this.split.getChunkId() > 0) {
                nextToken();
            }
        }

        private CharSequence nextToken() {
            if (!this.matcher.find()) {
                this.readLast = true;
                return this.chunkData.subSequence(this.previousMatchIndex, this.chunkData.length());
            }
            CharSequence subSequence = this.chunkData.subSequence(this.previousMatchIndex, this.matcher.start());
            this.previousMatchIndex = this.matcher.end();
            return subSequence;
        }

        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (this.readLast) {
                GridFSInputFormat.LOG.debug("skipping the rest of this chunk because we've read beyond the end: " + this.previousMatchIndex + "; read " + this.totalMatches + " matches here.");
                return false;
            }
            if (null == this.matcher) {
                if (this.readWholeFile) {
                    this.text.set(this.chunkData.fileContents().toString());
                } else {
                    this.text.set(this.chunkData.chunkContents().toString());
                }
                this.totalMatches++;
                this.readLast = true;
                return true;
            }
            CharSequence nextToken = nextToken();
            if (nextToken == null) {
                if (!GridFSInputFormat.LOG.isDebugEnabled()) {
                    return false;
                }
                GridFSInputFormat.LOG.debug("Read " + this.totalMatches + " segments.");
                return false;
            }
            if (!this.readWholeFile && this.previousMatchIndex >= this.chunkSize) {
                this.readLast = true;
            }
            this.text.set(nextToken.toString());
            this.totalMatches++;
            return true;
        }

        /* renamed from: getCurrentKey, reason: merged with bridge method [inline-methods] */
        public NullWritable m7getCurrentKey() throws IOException, InterruptedException {
            return NullWritable.get();
        }

        /* renamed from: getCurrentValue, reason: merged with bridge method [inline-methods] */
        public Text m6getCurrentValue() throws IOException, InterruptedException {
            return this.text;
        }

        public float getProgress() throws IOException, InterruptedException {
            return (float) Math.min(this.previousMatchIndex / ((float) this.chunkSize), 1.0d);
        }

        public void close() throws IOException {
            this.chunkData.close();
        }
    }

    public List<InputSplit> getSplits(JobContext jobContext) throws IOException, InterruptedException {
        Configuration configuration = jobContext.getConfiguration();
        DBCollection inputCollection = MongoConfigUtil.getInputCollection(configuration);
        MongoClientURI inputURI = MongoConfigUtil.getInputURI(configuration);
        GridFS gridFS = new GridFS(inputCollection.getDB(), inputCollection.getName());
        DBObject query = MongoConfigUtil.getQuery(configuration);
        LinkedList linkedList = new LinkedList();
        for (GridFSDBFile gridFSDBFile : gridFS.find(query)) {
            if (MongoConfigUtil.isGridFSWholeFileSplit(configuration)) {
                linkedList.add(new GridFSSplit(inputURI, (ObjectId) gridFSDBFile.getId(), (int) gridFSDBFile.getChunkSize(), gridFSDBFile.getLength()));
            } else {
                for (int i = 0; i < gridFSDBFile.numChunks(); i++) {
                    linkedList.add(new GridFSSplit(inputURI, (ObjectId) gridFSDBFile.getId(), (int) gridFSDBFile.getChunkSize(), gridFSDBFile.getLength(), i));
                }
            }
        }
        LOG.debug("Found GridFS splits: " + linkedList);
        return linkedList;
    }

    public RecordReader<NullWritable, BinaryComparable> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        return MongoConfigUtil.isGridFSReadBinary(taskAttemptContext.getConfiguration()) ? new GridFSBinaryRecordReader() : new GridFSTextRecordReader();
    }
}
