package com.mongodb.hadoop.splitter;

import com.mongodb.BasicDBObjectBuilder;
import com.mongodb.hadoop.input.BSONFileSplit;
import com.mongodb.hadoop.util.CompatUtils;
import com.mongodb.hadoop.util.MongoConfigUtil;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CodecPool;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.Compressor;
import org.apache.hadoop.io.compress.DefaultCodec;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.bson.BSONObject;
import org.bson.BasicBSONCallback;
import org.bson.BasicBSONDecoder;
import org.bson.BasicBSONEncoder;
import org.bson.LazyBSONCallback;
import org.bson.LazyBSONDecoder;
import org.bson.LazyBSONObject;

/* loaded from: input_file:com/mongodb/hadoop/splitter/BSONSplitter.class */
public class BSONSplitter extends Configured implements Tool {
    private static final String CORE_JAR = "mongo-hadoop-core.jar";
    private static final Log LOG = LogFactory.getLog(BSONSplitter.class);
    private ArrayList<BSONFileSplit> splitsList;
    private Path inputPath;
    private final BasicBSONCallback callback = new BasicBSONCallback();
    private final LazyBSONCallback lazyCallback = new LazyBSONCallback();
    private final LazyBSONDecoder lazyDec = new LazyBSONDecoder();
    private final BasicBSONDecoder bsonDec = new BasicBSONDecoder();
    private final BasicBSONEncoder bsonEnc = new BasicBSONEncoder();

    /* loaded from: input_file:com/mongodb/hadoop/splitter/BSONSplitter$NoSplitFileException.class */
    public static class NoSplitFileException extends Exception {
    }

    public void setInputPath(Path path) {
        this.inputPath = path;
    }

    public ArrayList<BSONFileSplit> getAllSplits() {
        return this.splitsList == null ? new ArrayList<>(0) : this.splitsList;
    }

    public BSONFileSplit createFileSplitFromBSON(BSONObject bSONObject, FileSystem fileSystem, FileStatus fileStatus) throws IOException {
        return createFileSplit(fileStatus, fileSystem, ((Long) bSONObject.get("s")).longValue(), ((Long) bSONObject.get("l")).longValue());
    }

    public BSONFileSplit createFileSplit(FileStatus fileStatus, FileSystem fileSystem, long j, long j2) {
        BSONFileSplit bSONFileSplit;
        try {
            BlockLocation[] fileBlockLocations = CompatUtils.isInstance(fileStatus, "org.apache.hadoop.fs.LocatedFileStatus", getConf(), FileStatus.class) ? (BlockLocation[]) CompatUtils.invokeMethod(FileStatus.class, fileStatus, "getBlockLocations", new Object[0], new Class[0]) : fileSystem.getFileBlockLocations(fileStatus, j, j2);
            bSONFileSplit = new BSONFileSplit(fileStatus.getPath(), j, j2, fileBlockLocations[getBlockIndex(fileBlockLocations, j)].getHosts());
        } catch (IOException e) {
            LOG.warn("Couldn't find block locations when constructing input split from byte offset. Using non-block-aware input split; " + e.getMessage());
            bSONFileSplit = new BSONFileSplit(fileStatus.getPath(), j, j2, null);
        }
        bSONFileSplit.setKeyField(MongoConfigUtil.getInputKey(getConf()));
        return bSONFileSplit;
    }

    public void loadSplitsFromSplitFile(FileStatus fileStatus, Path path) throws NoSplitFileException, IOException {
        ArrayList<BSONFileSplit> arrayList = new ArrayList<>();
        FileSystem fileSystem = path.getFileSystem(getConf());
        try {
            try {
                FileStatus fileStatus2 = fileSystem.getFileStatus(path);
                LOG.info("Found split file at : " + fileStatus2);
                InputStream open = fileSystem.open(path);
                while (open.getPos() < fileStatus2.getLen()) {
                    this.callback.reset();
                    this.bsonDec.decode(open, this.callback);
                    arrayList.add(createFileSplitFromBSON((BSONObject) this.callback.get(), fileSystem, fileStatus));
                }
                if (null != open) {
                    open.close();
                }
                this.splitsList = arrayList;
            } catch (Exception e) {
                throw new NoSplitFileException();
            }
        } finally {
            if (null != r0) {
                r0.close();
            }
        }
    }

    public static long getSplitSize(Configuration configuration, FileStatus fileStatus) {
        long j = configuration.getLong("mapreduce.input.fileinputformat.split.maxsize", configuration.getLong("mapred.max.split.size", Long.MAX_VALUE));
        long max = Math.max(1L, configuration.getLong("mapreduce.input.fileinputformat.split.minsize", configuration.getLong("mapred.min.split.size", 1L)));
        return fileStatus != null ? Math.max(max, Math.min(j, fileStatus.getBlockSize())) : Math.max(max, Math.min(j, configuration.getLong("dfs.blockSize", 67108864L)));
    }

    public void readSplitsForFile(FileStatus fileStatus) throws IOException {
        long len = fileStatus.getLen();
        if (MongoConfigUtil.getBSONReadSplits(getConf())) {
            if (len == 0) {
                LOG.warn("Zero-length file, skipping split calculation.");
                return;
            } else {
                this.splitsList = (ArrayList) splitFile(fileStatus);
                writeSplits();
                return;
            }
        }
        LOG.info("Reading splits is disabled - constructing single split for " + fileStatus);
        BSONFileSplit createFileSplit = createFileSplit(fileStatus, fileStatus.getPath().getFileSystem(getConf()), 0L, len);
        ArrayList<BSONFileSplit> arrayList = new ArrayList<>();
        arrayList.add(createFileSplit);
        this.splitsList = arrayList;
    }

    protected List<BSONFileSplit> splitFile(FileStatus fileStatus) throws IOException {
        Path path = fileStatus.getPath();
        ArrayList arrayList = new ArrayList();
        FileSystem fileSystem = path.getFileSystem(getConf());
        long len = fileStatus.getLen();
        int i = 0;
        long splitSize = getSplitSize(getConf(), fileStatus);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Generating splits for " + path + " of up to " + splitSize + " bytes.");
        }
        InputStream open = fileSystem.open(path);
        long j = 0;
        long j2 = 0;
        while (open.getPos() + 1 < len) {
            try {
                try {
                    this.lazyCallback.reset();
                    this.lazyDec.decode(open, this.lazyCallback);
                    int bSONSize = ((LazyBSONObject) this.lazyCallback.get()).getBSONSize();
                    if (j + bSONSize >= splitSize) {
                        BSONFileSplit createFileSplit = createFileSplit(fileStatus, fileSystem, j2, j);
                        arrayList.add(createFileSplit);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug(String.format("Creating new split (%d) %s", Integer.valueOf(arrayList.size()), createFileSplit));
                        }
                        j2 = open.getPos() - bSONSize;
                        j = 0;
                    }
                    j += bSONSize;
                    i++;
                    if (i % MongoConfigUtil.DEFAULT_INPUT_SPLIT_MIN_DOCS == 0) {
                        float pos = 100.0f * (((float) open.getPos()) / ((float) len));
                        if (LOG.isDebugEnabled()) {
                            LOG.debug(String.format("Read %d docs calculating splits for %s; %3.3f%% complete.", Integer.valueOf(i), fileStatus.getPath(), Float.valueOf(pos)));
                        }
                    }
                } catch (IOException e) {
                    LOG.warn("IOException: " + e);
                    open.close();
                }
            } catch (Throwable th) {
                open.close();
                throw th;
            }
        }
        if (j > 0) {
            BSONFileSplit createFileSplit2 = createFileSplit(fileStatus, fileSystem, j2, j);
            arrayList.add(createFileSplit2);
            if (LOG.isDebugEnabled()) {
                LOG.debug(String.format("Final split (%d) %s", Integer.valueOf(arrayList.size()), createFileSplit2.getPath()));
            }
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Completed splits calculation for " + fileStatus.getPath());
        }
        open.close();
        return arrayList;
    }

    public void writeSplits() throws IOException {
        if (!getConf().getBoolean("bson.split.write_splits", true)) {
            LOG.info("bson.split.write_splits is set to false - skipping writing splits to disk.");
            return;
        }
        LOG.info("Writing splits to disk.");
        if (this.splitsList == null) {
            LOG.info("No splits found, skipping write of splits file.");
        }
        Path splitsFilePath = getSplitsFilePath(this.inputPath, getConf());
        FSDataOutputStream fSDataOutputStream = null;
        try {
            try {
                fSDataOutputStream = splitsFilePath.getFileSystem(getConf()).create(splitsFilePath, false);
                Iterator<BSONFileSplit> it = this.splitsList.iterator();
                while (it.hasNext()) {
                    BSONFileSplit next = it.next();
                    byte[] encode = this.bsonEnc.encode(BasicDBObjectBuilder.start().add("s", Long.valueOf(next.getStart())).add("l", Long.valueOf(next.getLength())).get());
                    fSDataOutputStream.write(encode, 0, encode.length);
                }
                if (fSDataOutputStream != null) {
                    fSDataOutputStream.close();
                }
            } catch (IOException e) {
                LOG.error("Could not create splits file: " + e.getMessage());
                throw e;
            }
        } catch (Throwable th) {
            if (fSDataOutputStream != null) {
                fSDataOutputStream.close();
            }
            throw th;
        }
    }

    public void readSplits() throws IOException {
        this.splitsList = new ArrayList<>();
        if (this.inputPath == null) {
            throw new IllegalStateException("Input path has not been set.");
        }
        readSplitsForFile(this.inputPath.getFileSystem(getConf()).getFileStatus(this.inputPath));
    }

    private static int getBlockIndex(BlockLocation[] blockLocationArr, long j) {
        for (int i = 0; i < blockLocationArr.length; i++) {
            BlockLocation blockLocation = blockLocationArr[i];
            if (blockLocation.getOffset() <= j && j < blockLocation.getOffset() + blockLocation.getLength()) {
                return i;
            }
        }
        BlockLocation blockLocation2 = blockLocationArr[blockLocationArr.length - 1];
        throw new IllegalArgumentException(String.format("Offset %d is outside the file [0..%d].", Long.valueOf(j), Long.valueOf((blockLocation2.getOffset() + blockLocation2.getLength()) - 1)));
    }

    public synchronized long getStartingPositionForSplit(FileSplit fileSplit) throws IOException {
        ArrayList<BSONFileSplit> allSplits;
        FileSystem fileSystem = fileSplit.getPath().getFileSystem(getConf());
        FileStatus fileStatus = fileSystem.getFileStatus(fileSplit.getPath());
        if (MongoConfigUtil.getBSONReadSplits(getConf())) {
            try {
                loadSplitsFromSplitFile(fileStatus, getSplitsFilePath(fileStatus.getPath(), getConf()));
            } catch (NoSplitFileException e) {
                readSplitsForFile(fileStatus);
            }
            allSplits = getAllSplits();
        } else {
            allSplits = (ArrayList) splitFile(fileStatus);
        }
        BSONFileSplit[] bSONFileSplitArr = new BSONFileSplit[allSplits.size()];
        allSplits.toArray(bSONFileSplitArr);
        long start = fileSplit.getStart();
        long j = 0;
        int length = bSONFileSplitArr.length;
        int i = 0;
        while (true) {
            if (i >= length) {
                break;
            }
            BSONFileSplit bSONFileSplit = bSONFileSplitArr[i];
            if (bSONFileSplit.getStart() >= fileSplit.getStart()) {
                j = start;
                break;
            }
            start = bSONFileSplit.getStart();
            i++;
        }
        InputStream inputStream = null;
        long j2 = j;
        try {
            inputStream = fileSystem.open(fileSplit.getPath());
            inputStream.seek(j2);
            while (j2 < fileSplit.getStart()) {
                this.callback.reset();
                this.bsonDec.decode(inputStream, this.callback);
                j2 = inputStream.getPos();
            }
            if (null != inputStream) {
                inputStream.close();
            }
            return j2;
        } catch (Throwable th) {
            if (null != inputStream) {
                inputStream.close();
            }
            throw th;
        }
    }

    public static Path getSplitsFilePath(Path path, Configuration configuration) {
        String bSONSplitsPath = MongoConfigUtil.getBSONSplitsPath(configuration);
        String str = "." + path.getName() + ".splits";
        return null == bSONSplitsPath ? new Path(path.getParent(), str) : new Path(bSONSplitsPath, str);
    }

    private void printUsage() {
        System.err.println("USAGE: hadoop jar mongo-hadoop-core.jar " + getClass().getName() + " <fileName> [-c compressionCodec] [-o outputDirectory]\n\nMake sure to use the full path, including scheme, for input and output paths.");
    }

    /* JADX WARN: Finally extract failed */
    public int run(String[] strArr) throws Exception {
        int i;
        if (strArr.length < 1) {
            printUsage();
            return 1;
        }
        Path path = new Path(strArr[0]);
        String str = null;
        String str2 = null;
        int i2 = 1;
        while (i2 < strArr.length) {
            if ("-c".equals(strArr[i2]) && strArr.length > i2) {
                i = i2 + 1;
                str = strArr[i];
            } else {
                if (!"-o".equals(strArr[i2]) || strArr.length <= i2) {
                    System.err.println("unrecognized option: " + strArr[i2]);
                    printUsage();
                    return 1;
                }
                i = i2 + 1;
                str2 = strArr[i];
            }
            i2 = i + 1;
        }
        Path parent = null == str2 ? path.getParent() : new Path(str2);
        DefaultCodec defaultCodec = null == str ? new DefaultCodec() : (CompressionCodec) ReflectionUtils.newInstance(Class.forName(str), getConf());
        if (defaultCodec instanceof Configurable) {
            ((Configurable) defaultCodec).setConf(getConf());
        }
        MongoConfigUtil.setBSONWriteSplits(getConf(), false);
        FileSystem fileSystem = FileSystem.get(path.toUri(), getConf());
        FileSystem fileSystem2 = FileSystem.get(parent.toUri(), getConf());
        FSDataInputStream open = fileSystem.open(path);
        Path splitsFilePath = getSplitsFilePath(path, getConf());
        try {
            loadSplitsFromSplitFile(fileSystem.getFileStatus(path), splitsFilePath);
        } catch (NoSplitFileException e) {
            LOG.info("did not find .splits file in " + splitsFilePath.toUri());
            setInputPath(path);
            readSplits();
        }
        ArrayList<BSONFileSplit> allSplits = getAllSplits();
        LOG.info("compressing " + allSplits.size() + " splits.");
        byte[] bArr = new byte[1048576];
        for (int i3 = 0; i3 < allSplits.size(); i3++) {
            Path path2 = new Path(parent, path.getName() + "-" + i3 + defaultCodec.getDefaultExtension());
            Compressor compressor = CodecPool.getCompressor(defaultCodec);
            CompressionOutputStream compressionOutputStream = null;
            try {
                compressionOutputStream = defaultCodec.createOutputStream(fileSystem2.create(path2), compressor);
                int i4 = 0;
                int i5 = 0;
                BSONFileSplit bSONFileSplit = allSplits.get(i3);
                open.seek(bSONFileSplit.getStart());
                LOG.info("writing " + path2.toUri() + ".");
                while (i4 < bSONFileSplit.getLength() && i5 >= 0) {
                    i5 = open.read(bArr, 0, (int) Math.min(bArr.length, bSONFileSplit.getLength() - i4));
                    if (i5 > 0) {
                        compressionOutputStream.write(bArr, 0, i5);
                        i4 += i5;
                    }
                }
                if (compressionOutputStream != null) {
                    compressionOutputStream.close();
                }
                CodecPool.returnCompressor(compressor);
            } catch (Throwable th) {
                if (compressionOutputStream != null) {
                    compressionOutputStream.close();
                }
                CodecPool.returnCompressor(compressor);
                throw th;
            }
        }
        LOG.info("done.");
        return 0;
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(new BSONSplitter(), strArr));
    }
}
