package com.bigdata.btree.raba.codec;

import com.bigdata.util.BytesUtil;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StreamTokenizer;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import org.apache.log4j.Logger;

/* loaded from: input_file:com/bigdata/btree/raba/codec/TokenizeKeysGenerator.class */
public class TokenizeKeysGenerator implements IRabaGenerator {
    protected static final Logger log = Logger.getLogger(TokenizeKeysGenerator.class);
    public static final transient String charset = "UTF-8";
    final byte[][] data;

    /* JADX WARN: Type inference failed for: r1v7, types: [byte[], byte[][]] */
    public TokenizeKeysGenerator(String str) {
        BufferedReader bufferedReader;
        if (new File(str).exists()) {
            try {
                bufferedReader = new BufferedReader(new FileReader(str));
            } catch (FileNotFoundException e) {
                throw new RuntimeException("Could not open file: " + str);
            }
        } else {
            InputStream resourceAsStream = getClass().getResourceAsStream(str);
            if (resourceAsStream == null) {
                throw new RuntimeException("No such resource: " + str);
            }
            bufferedReader = new BufferedReader(new InputStreamReader(resourceAsStream));
        }
        try {
            Set<String> set = tokenize(str, bufferedReader);
            this.data = new byte[set.size()];
            int i = 0;
            for (String str2 : set) {
                try {
                    int i2 = i;
                    i++;
                    this.data[i2] = str2.getBytes(charset);
                } catch (UnsupportedEncodingException e2) {
                    throw new RuntimeException("Could not encode: " + str2 + ", charset=" + charset + " : " + e2, e2);
                }
            }
        } catch (Exception e3) {
            throw new RuntimeException(e3);
        }
    }

    @Override // com.bigdata.btree.raba.codec.IRabaGenerator
    public byte[][] generateKeys(int i) {
        byte[][] bArr = (byte[][]) this.data.clone();
        Arrays.sort(bArr, BytesUtil.UnsignedByteArrayComparator.INSTANCE);
        for (int i2 = i; i2 < bArr.length; i2++) {
            bArr[i2] = null;
        }
        return bArr;
    }

    @Override // com.bigdata.btree.raba.codec.IRabaGenerator
    public byte[][] generateValues(int i) {
        byte[][] bArr = (byte[][]) this.data.clone();
        for (int i2 = i; i2 < bArr.length; i2++) {
            bArr[i2] = null;
        }
        return bArr;
    }

    @Override // com.bigdata.btree.raba.codec.IRabaGenerator
    public boolean isKeysGenerator() {
        return true;
    }

    @Override // com.bigdata.btree.raba.codec.IRabaGenerator
    public boolean isValuesGenerator() {
        return true;
    }

    public Set<String> tokenize(String str, Reader reader) throws Exception {
        HashSet hashSet = new HashSet(10000);
        StreamTokenizer streamTokenizer = new StreamTokenizer(reader);
        int i = 0;
        boolean z = false;
        while (!z) {
            switch (streamTokenizer.nextToken()) {
                case -3:
                    hashSet.add(streamTokenizer.sval);
                    i++;
                    break;
                case -2:
                    hashSet.add(Double.toString(streamTokenizer.nval));
                    i++;
                    break;
                case -1:
                    z = true;
                    break;
            }
        }
        if (log.isInfoEnabled()) {
            log.info("Tokenized: " + i + " tokens with " + hashSet.size() + " distinct terms : src=" + str);
        }
        return hashSet;
    }
}
