package io.projectglow.gff;

import io.projectglow.common.FeatureSchemas$;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import scala.Array$;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.Some;
import scala.Tuple2;
import scala.collection.GenSeqLike;
import scala.collection.SeqLike;
import scala.collection.TraversableLike;
import scala.collection.immutable.Map;
import scala.collection.immutable.Seq;
import scala.collection.immutable.Seq$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.math.Ordering$;
import scala.math.Ordering$Int$;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;

/* compiled from: GffDataSource.scala */
/* loaded from: input_file:io/projectglow/gff/GffDataSource$.class */
public final class GffDataSource$ {
    public static GffDataSource$ MODULE$;
    private final String attributesMapColumnName;
    private final String COLUMN_DELIMITER;
    private final String ATTRIBUTES_DELIMITER;
    private final String GFF3_TAG_VALUE_DELIMITER;
    private final String GTF_TAG_VALUE_DELIMITER;
    private final String COMMENT_IDENTIFIER;
    private final String NULL_IDENTIFIER;
    private final String ARRAY_DELIMITER;
    private final Map<String, String> csvReadOptions;
    private final String columnPruningConf;

    static {
        new GffDataSource$();
    }

    public String attributesMapColumnName() {
        return this.attributesMapColumnName;
    }

    public String COLUMN_DELIMITER() {
        return this.COLUMN_DELIMITER;
    }

    public String ATTRIBUTES_DELIMITER() {
        return this.ATTRIBUTES_DELIMITER;
    }

    public String GFF3_TAG_VALUE_DELIMITER() {
        return this.GFF3_TAG_VALUE_DELIMITER;
    }

    public String GTF_TAG_VALUE_DELIMITER() {
        return this.GTF_TAG_VALUE_DELIMITER;
    }

    public String COMMENT_IDENTIFIER() {
        return this.COMMENT_IDENTIFIER;
    }

    public String NULL_IDENTIFIER() {
        return this.NULL_IDENTIFIER;
    }

    public String ARRAY_DELIMITER() {
        return this.ARRAY_DELIMITER;
    }

    public Map<String, String> csvReadOptions() {
        return this.csvReadOptions;
    }

    public String columnPruningConf() {
        return this.columnPruningConf;
    }

    public String checkAndGetPath(Map<String, String> map) {
        Some some = map.get("path");
        if (some instanceof Some) {
            return (String) some.value();
        }
        throw new IllegalArgumentException("Path is required");
    }

    public StructType inferSchema(SQLContext sQLContext, String str) {
        SparkSession sparkSession = sQLContext.sparkSession();
        boolean z = new StringOps(Predef$.MODULE$.augmentString(sparkSession.conf().get(columnPruningConf()))).toBoolean();
        if (z) {
            sparkSession.conf().set(columnPruningConf(), false);
        }
        Seq seq = (Seq) ((TraversableLike) ((TraversableLike) ((Row[]) addAttributesMapColumn(sparkSession.read().options(csvReadOptions()).schema(FeatureSchemas$.MODULE$.gffBaseSchema()).csv(str)).withColumn(attributesMapColumnName(), functions$.MODULE$.explode(functions$.MODULE$.map_keys(functions$.MODULE$.col(attributesMapColumnName())))).agg(functions$.MODULE$.collect_set(attributesMapColumnName()), Predef$.MODULE$.wrapRefArray(new Column[0])).collect())[0].getAs(0)).filter(str2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$inferSchema$1(str2));
        })).groupBy(str3 -> {
            return str3.toLowerCase();
        }).mapValues(seq2 -> {
            return (String) seq2.head();
        }).values().to(Predef$.MODULE$.fallbackStringCanBuildFrom());
        sparkSession.conf().set(columnPruningConf(), z);
        return new StructType((StructField[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(FeatureSchemas$.MODULE$.gffBaseSchema().fields())).dropRight(1))).$plus$plus((Seq) ((SeqLike) seq.map(str4 -> {
            return new StructField(str4, (DataType) FeatureSchemas$.MODULE$.gffOfficialAttributeFields().find(structField -> {
                return BoxesRunTime.boxToBoolean($anonfun$inferSchema$5(str4, structField));
            }).map(structField2 -> {
                return structField2.dataType();
            }).getOrElse(() -> {
                return StringType$.MODULE$;
            }), StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4());
        }, Seq$.MODULE$.canBuildFrom())).sortBy(structField -> {
            Integer boxToInteger;
            int indexOf = ((GenSeqLike) FeatureSchemas$.MODULE$.gffOfficialAttributeFields().map(structField -> {
                return structField.name();
            }, scala.collection.Seq$.MODULE$.canBuildFrom())).indexOf(MODULE$.normalizeString(structField.name()));
            switch (indexOf) {
                case -1:
                    boxToInteger = BoxesRunTime.boxToInteger(FeatureSchemas$.MODULE$.gffOfficialAttributeFields().length() + 1);
                    break;
                default:
                    boxToInteger = BoxesRunTime.boxToInteger(indexOf);
                    break;
            }
            return new Tuple2(boxToInteger, structField.name());
        }, Ordering$.MODULE$.Tuple2(Ordering$Int$.MODULE$, Ordering$String$.MODULE$)), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(StructField.class))));
    }

    public Dataset<Row> addAttributesMapColumn(Dataset<Row> dataset) {
        return dataset.withColumn(attributesMapColumnName(), functions$.MODULE$.expr(new StringOps(Predef$.MODULE$.augmentString(new StringBuilder(94).append("str_to_map(\n           |       ").append(FeatureSchemas$.MODULE$.attributesField().name()).append(",\n           |       \"").append(ATTRIBUTES_DELIMITER()).append("\",\n           |       \"").append(GFF3_TAG_VALUE_DELIMITER()).append("\"\n           |   )").toString())).stripMargin()));
    }

    public Dataset<Row> normalizeAttributesMapKeys(Dataset<Row> dataset) {
        return dataset.withColumn(attributesMapColumnName(), functions$.MODULE$.map_from_arrays(functions$.MODULE$.expr(new StringOps(Predef$.MODULE$.augmentString(new StringBuilder(119).append("transform(\n            |       map_keys(").append(attributesMapColumnName()).append("),\n            |       k -> regexp_replace(lower(k), \"_\", \"\")\n            |   )").toString())).stripMargin()), functions$.MODULE$.map_values(functions$.MODULE$.col(attributesMapColumnName()))));
    }

    public Dataset<Row> filterFastaLines(Dataset<Row> dataset) {
        return dataset.where(functions$.MODULE$.isnull(functions$.MODULE$.coalesce(Predef$.MODULE$.wrapRefArray((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(FeatureSchemas$.MODULE$.gffBaseSchema().fieldNames())).drop(1))).map(str -> {
            return functions$.MODULE$.col(str);
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class)))))).unary_$bang());
    }

    public String normalizeString(String str) {
        return str.toLowerCase().replaceAll("_", "");
    }

    public static final /* synthetic */ boolean $anonfun$inferSchema$1(String str) {
        return !str.isEmpty();
    }

    public static final /* synthetic */ boolean $anonfun$inferSchema$5(String str, StructField structField) {
        String name = structField.name();
        String normalizeString = MODULE$.normalizeString(str);
        return name != null ? name.equals(normalizeString) : normalizeString == null;
    }

    private GffDataSource$() {
        MODULE$ = this;
        this.attributesMapColumnName = "attributesMap";
        this.COLUMN_DELIMITER = "\t";
        this.ATTRIBUTES_DELIMITER = ";";
        this.GFF3_TAG_VALUE_DELIMITER = "=";
        this.GTF_TAG_VALUE_DELIMITER = " ";
        this.COMMENT_IDENTIFIER = "#";
        this.NULL_IDENTIFIER = ".";
        this.ARRAY_DELIMITER = ",";
        this.csvReadOptions = Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("sep"), COLUMN_DELIMITER()), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("comment"), COMMENT_IDENTIFIER()), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("mode"), "DROPMALFORMED"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("nullValue"), NULL_IDENTIFIER())}));
        this.columnPruningConf = "spark.sql.csv.parser.columnPruning.enabled";
    }
}
