package org.apache.mahout.df.data;

import com.google.common.base.Preconditions;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.StringTokenizer;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.util.Strings;
import org.apache.log4j.spi.LocationInfo;
import org.apache.mahout.df.data.Dataset;
import org.apache.mahout.math.DenseVector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/mahout/df/data/DataLoader.class */
public final class DataLoader {
    private static final Logger log = LoggerFactory.getLogger(DataLoader.class);

    private DataLoader() {
    }

    private static Instance parseString(int i, Dataset.Attribute[] attributeArr, List<String>[] listArr, String str) {
        StringTokenizer stringTokenizer = new StringTokenizer(str, Strings.DEFAULT_KEYVALUE_SEPARATOR);
        Preconditions.checkArgument(stringTokenizer.countTokens() == attributeArr.length, "Wrong number of attributes in the string");
        String[] strArr = new String[attributeArr.length];
        for (int i2 = 0; i2 < attributeArr.length; i2++) {
            String nextToken = stringTokenizer.nextToken();
            if (!attributeArr[i2].isIgnored()) {
                if (LocationInfo.NA.equals(nextToken)) {
                    return null;
                }
                strArr[i2] = nextToken;
            }
        }
        DenseVector denseVector = new DenseVector(Dataset.countAttributes(attributeArr));
        int i3 = 0;
        int i4 = -1;
        for (int i5 = 0; i5 < attributeArr.length; i5++) {
            if (!attributeArr[i5].isIgnored()) {
                String str2 = strArr[i5];
                if (attributeArr[i5].isNumerical()) {
                    int i6 = i3;
                    i3++;
                    denseVector.set(i6, Double.parseDouble(str2));
                } else {
                    if (listArr[i5] == null) {
                        listArr[i5] = new ArrayList();
                    }
                    if (!listArr[i5].contains(str2)) {
                        listArr[i5].add(str2);
                    }
                    if (attributeArr[i5].isCategorical()) {
                        int i7 = i3;
                        i3++;
                        denseVector.set(i7, listArr[i5].indexOf(str2));
                    } else {
                        i4 = listArr[i5].indexOf(str2);
                    }
                }
            }
        }
        if (i4 == -1) {
            throw new IllegalStateException("Label not found!");
        }
        return new Instance(i, denseVector, i4);
    }

    public static Data loadData(Dataset dataset, FileSystem fileSystem, Path path) throws IOException {
        Scanner scanner = new Scanner((InputStream) fileSystem.open(path));
        ArrayList arrayList = new ArrayList();
        DataConverter dataConverter = new DataConverter(dataset);
        while (scanner.hasNextLine()) {
            String nextLine = scanner.nextLine();
            if (nextLine.isEmpty()) {
                log.warn("{}: empty string", Integer.valueOf(arrayList.size()));
            } else {
                Instance convert = dataConverter.convert(arrayList.size(), nextLine);
                if (convert == null) {
                    log.warn("{}: missing values", Integer.valueOf(arrayList.size()));
                } else {
                    arrayList.add(convert);
                }
            }
        }
        scanner.close();
        return new Data(dataset, arrayList);
    }

    public static Data loadData(Dataset dataset, String[] strArr) {
        ArrayList arrayList = new ArrayList();
        DataConverter dataConverter = new DataConverter(dataset);
        for (String str : strArr) {
            if (str.isEmpty()) {
                log.warn("{}: empty string", Integer.valueOf(arrayList.size()));
            } else {
                Instance convert = dataConverter.convert(arrayList.size(), str);
                if (convert == null) {
                    log.warn("{}: missing values", Integer.valueOf(arrayList.size()));
                } else {
                    arrayList.add(convert);
                }
            }
        }
        return new Data(dataset, arrayList);
    }

    public static Dataset generateDataset(String str, FileSystem fileSystem, Path path) throws DescriptorException, IOException {
        Dataset.Attribute[] parseDescriptor = DescriptorUtils.parseDescriptor(str);
        Scanner scanner = new Scanner((InputStream) fileSystem.open(path));
        List[] listArr = new List[parseDescriptor.length];
        int i = 0;
        while (scanner.hasNextLine()) {
            String nextLine = scanner.nextLine();
            if (!nextLine.isEmpty() && parseString(i, parseDescriptor, listArr, nextLine) != null) {
                i++;
            }
        }
        scanner.close();
        return new Dataset(parseDescriptor, listArr, i);
    }

    public static Dataset generateDataset(String str, String[] strArr) throws DescriptorException {
        Dataset.Attribute[] parseDescriptor = DescriptorUtils.parseDescriptor(str);
        List[] listArr = new List[parseDescriptor.length];
        int i = 0;
        for (String str2 : strArr) {
            if (!str2.isEmpty() && parseString(i, parseDescriptor, listArr, str2) != null) {
                i++;
            }
        }
        return new Dataset(parseDescriptor, listArr, i);
    }
}
