package de.lmu.ifi.dbs.elki.datasource.parser;

import de.lmu.ifi.dbs.elki.data.LabelList;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.SparseFloatVector;
import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.VectorTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.parser.NumberVectorLabelParser;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.io.ParseUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap;
import java.util.ArrayList;
import java.util.regex.Pattern;
import org.apache.jena.atlas.lib.Chars;

@Description("Parser for the following line format:\nA single line provides a single point. Entries are separated by whitespace. The values will be parsed as floats (resulting in a set of SparseFloatVectors).\nA line is expected in the following format:\nThe first entry of each line is the number of attributes with coordinate value not zero. Subsequent entries are of the form (index, value), where index is the number of the corresponding dimension, and value is the value of the corresponding attribute. Any pair of two subsequent substrings not containing whitespace is tried to be read as int and float. If this fails for the first of the pair (interpreted ans index), it will be appended to a label. (Thus, any label must not be parseable as Integer.) If the float component is not parseable, an exception will be thrown. Empty lines and lines beginning with \"#\" will be ignored.")
@Title("Sparse Vector Label Parser")
/* loaded from: input_file:de/lmu/ifi/dbs/elki/datasource/parser/SparseNumberVectorLabelParser.class */
public class SparseNumberVectorLabelParser<V extends SparseNumberVector> extends NumberVectorLabelParser<V> {
    private static final Logging LOG = Logging.getLogger((Class<?>) SparseNumberVectorLabelParser.class);
    protected SparseNumberVector.Factory<V> sparsefactory;
    Int2DoubleOpenHashMap values;
    ArrayList<String> labels;

    /* loaded from: input_file:de/lmu/ifi/dbs/elki/datasource/parser/SparseNumberVectorLabelParser$Parameterizer.class */
    public static class Parameterizer<V extends SparseNumberVector> extends NumberVectorLabelParser.Parameterizer<V> {
        @Override // de.lmu.ifi.dbs.elki.datasource.parser.NumberVectorLabelParser.Parameterizer
        protected void getFactory(Parameterization parameterization) {
            ObjectParameter objectParameter = new ObjectParameter(VECTOR_TYPE_ID, (Class<?>) SparseNumberVector.Factory.class, (Class<?>) SparseFloatVector.Factory.class);
            if (parameterization.grab(objectParameter)) {
                this.factory = (NumberVector.Factory) objectParameter.instantiateClass(parameterization);
            }
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // de.lmu.ifi.dbs.elki.datasource.parser.NumberVectorLabelParser.Parameterizer, de.lmu.ifi.dbs.elki.datasource.parser.AbstractStreamingParser.Parameterizer, de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer
        public SparseNumberVectorLabelParser<V> makeInstance() {
            return new SparseNumberVectorLabelParser<>(this.format, this.labelIndices, (SparseNumberVector.Factory) this.factory);
        }
    }

    public SparseNumberVectorLabelParser(CSVReaderFormat cSVReaderFormat, long[] jArr, SparseNumberVector.Factory<V> factory) {
        super(cSVReaderFormat, jArr, factory);
        this.values = new Int2DoubleOpenHashMap();
        this.labels = new ArrayList<>();
        this.sparsefactory = factory;
    }

    public SparseNumberVectorLabelParser(Pattern pattern, String str, Pattern pattern2, long[] jArr, SparseNumberVector.Factory<V> factory) {
        super(pattern, str, pattern2, jArr, factory);
        this.values = new Int2DoubleOpenHashMap();
        this.labels = new ArrayList<>();
        this.sparsefactory = factory;
    }

    @Override // de.lmu.ifi.dbs.elki.datasource.parser.NumberVectorLabelParser
    protected boolean parseLineInternal() {
        try {
            int intBase10 = this.tokenizer.getIntBase10();
            this.tokenizer.advance();
            int i = 0;
            int i2 = -1;
            while (this.tokenizer.valid()) {
                if (this.values.size() < intBase10) {
                    if (i2 < 0) {
                        try {
                            i2 = this.tokenizer.getIntBase10();
                            this.tokenizer.advance();
                        } catch (NumberFormatException e) {
                            if (!this.warnedPrecision && (e == ParseUtil.PRECISION_OVERFLOW || e == ParseUtil.EXPONENT_OVERFLOW)) {
                                getLogger().warning("Too many digits in what looked like a double number - treating as string: " + this.tokenizer.getSubstring());
                                this.warnedPrecision = true;
                            }
                        }
                    } else if (!isLabelColumn(i2)) {
                        double d = this.tokenizer.getDouble();
                        i = i2 >= i ? i2 + 1 : i;
                        this.values.put(i2, d);
                        this.tokenizer.advance();
                        i2 = -1;
                    }
                }
                this.haslabels = true;
                this.labels.add(this.tokenizer.getSubstring());
                this.tokenizer.advance();
            }
            if (i2 >= 0 && !this.tokenizer.valid()) {
                throw new IllegalArgumentException("Parser expected double value, but line ended too early: " + this.reader.getLineNumber());
            }
            this.curvec = this.sparsefactory.newNumberVector(this.values, i);
            this.curlbl = LabelList.make(this.labels);
            this.values.clear();
            this.labels.clear();
            return true;
        } catch (NumberFormatException e2) {
            throw new NumberFormatException("Expected the number of values at the beginning of line " + this.reader.getLineNumber() + ", read '" + this.tokenizer.getSubstring() + Chars.S_QUOTE1);
        }
    }

    @Override // de.lmu.ifi.dbs.elki.datasource.parser.NumberVectorLabelParser
    protected SimpleTypeInformation<V> getTypeInformation(int i, int i2) {
        if (i == i2) {
            return new VectorFieldTypeInformation(this.factory, i);
        }
        if (i < i2) {
            return new VectorTypeInformation(this.factory, this.factory.getDefaultSerializer(), i, i2);
        }
        throw new AbortException("No vectors were read from the input file - cannot determine vector data type.");
    }

    @Override // de.lmu.ifi.dbs.elki.datasource.parser.NumberVectorLabelParser, de.lmu.ifi.dbs.elki.datasource.parser.AbstractStreamingParser
    protected Logging getLogger() {
        return LOG;
    }
}
