package org.neo4j.internal.batchimport.input.parquet;

import blue.strategic.parquet.ParquetReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Supplier;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.io.DelegatingSeekableInputStream;
import org.apache.parquet.io.InputFile;
import org.apache.parquet.io.SeekableInputStream;
import org.neo4j.batchimport.api.InputIterable;
import org.neo4j.batchimport.api.input.Collector;
import org.neo4j.batchimport.api.input.IdType;
import org.neo4j.batchimport.api.input.Input;
import org.neo4j.batchimport.api.input.PropertySizeCalculator;
import org.neo4j.batchimport.api.input.ReadableGroups;
import org.neo4j.cloud.storage.io.ReadableChannel;
import org.neo4j.internal.batchimport.input.Groups;
import org.neo4j.internal.batchimport.input.HeaderException;
import org.neo4j.internal.batchimport.input.InputException;
import org.neo4j.internal.schema.LabelSchemaDescriptor;
import org.neo4j.internal.schema.SchemaCommand;
import org.neo4j.internal.schema.SchemaDescriptor;
import org.neo4j.internal.schema.SchemaDescriptors;
import org.neo4j.token.TokenHolders;
import org.neo4j.util.Preconditions;

/* loaded from: input_file:org/neo4j/internal/batchimport/input/parquet/ParquetInput.class */
public class ParquetInput implements Input {
    private static final Supplier<ZoneId> defaultTimezoneSupplier = () -> {
        return ZoneOffset.UTC;
    };
    private final List<ParquetData> nodeDatas;
    private final List<ParquetData> relationshipDatas;
    private final List<SchemaCommand> schemaCommands;
    private final IdType idType;
    private final Groups groups;
    private final ParquetMonitor monitor;
    private final Map<Set<String>, List<Path[]>> nodeFiles;
    private final Map<String, List<Path[]>> relationshipFiles;
    private final Map<Path, List<ParquetColumn>> verifiedColumns;
    private final String arrayDelimiter;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/neo4j/internal/batchimport/input/parquet/ParquetInput$ParquetImportInputFile.class */
    public static class ParquetImportInputFile implements InputFile {
        static Map<Path, ParquetImportInputFile> importFileCache = new HashMap();
        private final Path lePath;

        /* JADX INFO: Access modifiers changed from: package-private */
        public static ParquetImportInputFile of(Path path) {
            return importFileCache.computeIfAbsent(path, path2 -> {
                return new ParquetImportInputFile(path);
            });
        }

        private ParquetImportInputFile(Path path) {
            this.lePath = path;
        }

        public long getLength() throws IOException {
            return Files.size(this.lePath);
        }

        public SeekableInputStream newStream() throws IOException {
            ReadableChannel newInputStream = Files.newInputStream(this.lePath, new OpenOption[0]);
            if (newInputStream instanceof ReadableChannel) {
                final ReadableChannel readableChannel = newInputStream;
                return new DelegatingSeekableInputStream(newInputStream) { // from class: org.neo4j.internal.batchimport.input.parquet.ParquetInput.ParquetImportInputFile.1
                    private long position = 0;

                    public long getPos() {
                        return this.position;
                    }

                    public void seek(long j) throws IOException {
                        readableChannel.position(j);
                        this.position = j;
                    }
                };
            }
            final FileInputStream fileInputStream = new FileInputStream(this.lePath.toFile());
            return new DelegatingSeekableInputStream(fileInputStream) { // from class: org.neo4j.internal.batchimport.input.parquet.ParquetInput.ParquetImportInputFile.2
                private long position = 0;

                public long getPos() {
                    return this.position;
                }

                public void seek(long j) throws IOException {
                    fileInputStream.getChannel().position(j);
                    this.position = j;
                }
            };
        }
    }

    public ParquetInput(Map<Set<String>, List<Path[]>> map, Map<String, List<Path[]>> map2, IdType idType, Character ch, Groups groups, ParquetMonitor parquetMonitor) {
        this(map, map2, Collections.emptyList(), idType, ch, groups, parquetMonitor);
    }

    public ParquetInput(Map<Set<String>, List<Path[]>> map, Map<String, List<Path[]>> map2, List<SchemaCommand> list, IdType idType, Character ch, Groups groups, ParquetMonitor parquetMonitor) {
        this.idType = idType;
        this.groups = groups;
        this.monitor = parquetMonitor;
        this.arrayDelimiter = ch.toString();
        this.nodeFiles = map;
        this.relationshipFiles = map2;
        this.schemaCommands = list;
        this.verifiedColumns = verifyColumns(map, map2);
        this.nodeDatas = map.entrySet().stream().flatMap(entry -> {
            return ((List) entry.getValue()).stream().map(pathArr -> {
                return Map.entry((Set) entry.getKey(), pathArr);
            });
        }).flatMap(entry2 -> {
            return Arrays.stream((Path[]) entry2.getValue()).map(path -> {
                return Map.entry((Set) entry2.getKey(), path);
            });
        }).map(entry3 -> {
            return new ParquetData((Set) entry3.getKey(), (Path) entry3.getValue(), this.verifiedColumns.get(entry3.getValue()), defaultTimezoneSupplier);
        }).toList();
        this.relationshipDatas = map2.entrySet().stream().flatMap(entry4 -> {
            return ((List) entry4.getValue()).stream().map(pathArr -> {
                return Map.entry((String) entry4.getKey(), pathArr);
            });
        }).flatMap(entry5 -> {
            return Arrays.stream((Path[]) entry5.getValue()).map(path -> {
                return Map.entry((String) entry5.getKey(), path);
            });
        }).map(entry6 -> {
            return new ParquetData(Set.of((String) entry6.getKey()), (Path) entry6.getValue(), this.verifiedColumns.get(entry6.getValue()), defaultTimezoneSupplier);
        }).toList();
    }

    public InputIterable nodes(Collector collector) {
        return () -> {
            return new ParquetGroupInputIterator(this.nodeDatas, this.groups, this.idType, this.arrayDelimiter);
        };
    }

    public InputIterable relationships(Collector collector) {
        return () -> {
            return new ParquetGroupInputIterator(this.relationshipDatas, this.groups, this.idType, this.arrayDelimiter);
        };
    }

    public IdType idType() {
        return this.idType;
    }

    public ReadableGroups groups() {
        return this.groups;
    }

    public List<SchemaCommand> schemaCommands() {
        return this.schemaCommands;
    }

    private Map<Path, List<ParquetColumn>> verifyColumns(Map<Set<String>, List<Path[]>> map, Map<String, List<Path[]>> map2) {
        HashMap hashMap = new HashMap();
        try {
            for (Map.Entry<Set<String>, List<Path[]>> entry : map.entrySet()) {
                boolean z = !entry.getKey().isEmpty() && entry.getKey().stream().anyMatch(str -> {
                    return !str.isBlank();
                });
                for (Path path : entry.getValue().stream().flatMap((v0) -> {
                    return Arrays.stream(v0);
                }).toList()) {
                    try {
                        ParquetMetadata readMetadata = ParquetReader.readMetadata(ParquetImportInputFile.of(path));
                        ArrayList arrayList = new ArrayList();
                        HashSet hashSet = new HashSet();
                        String str2 = null;
                        List columns = readMetadata.getFileMetaData().getSchema().getColumns();
                        String path2 = path.getFileName().toString();
                        Iterator it = columns.iterator();
                        while (it.hasNext()) {
                            String str3 = ((ColumnDescriptor) it.next()).getPath()[0];
                            if (str3.isBlank()) {
                                throw new InputException("column name must not be blank");
                            }
                            try {
                                ParquetColumn from = ParquetColumn.from(str3, EntityType.NODE);
                                if (!from.isIgnoredColumn()) {
                                    String propertyName = from.propertyName() != null ? from.propertyName() : from.logicalColumnType().name();
                                    if (from.isIdColumn() && from.groupName() != null) {
                                        if (str2 != null && !str2.equals(from.groupName())) {
                                            throw new IllegalStateException("There are multiple :ID columns, but they are referring to different groups");
                                        }
                                        str2 = from.groupName();
                                    }
                                    if (hashSet.contains(propertyName) && from.isIdColumn()) {
                                        throw new DuplicatedColumnException("Cannot store composite IDs as properties, only individual part. Property %s / File: %s".formatted(propertyName, path2));
                                    }
                                    if (hashSet.contains(propertyName)) {
                                        throw new DuplicatedColumnException("Duplicated header property %s found in file %s.".formatted(propertyName, path2));
                                    }
                                    hashSet.add(propertyName);
                                    if (from.logicalColumnType() == ParquetLogicalColumnType.ID) {
                                        this.groups.getOrCreate(from.groupName());
                                    }
                                    if (from.columnType().needsConversion()) {
                                        this.monitor.typeNormalized(path2, propertyName, from.columnType().name(), from.columnType().convertedType().name());
                                    }
                                    if (from.logicalColumnType() == ParquetLogicalColumnType.LABEL) {
                                        z = true;
                                    }
                                    arrayList.add(from);
                                }
                            } catch (IllegalArgumentException e) {
                                throw new InputException("Column name " + str3 + " is used as a special type but is unknown. Allowed types are " + ParquetColumn.getReservedColumns(EntityType.NODE));
                            }
                        }
                        if (!z) {
                            this.monitor.noNodeLabelsSpecified(path2);
                        }
                        hashMap.put(path, arrayList);
                    } catch (RuntimeException e2) {
                        throw new RuntimeException("Could not read parquet file %s".formatted(path.toAbsolutePath()), e2);
                    }
                }
            }
            for (Map.Entry<String, List<Path[]>> entry2 : map2.entrySet()) {
                for (Path path3 : entry2.getValue().stream().flatMap((v0) -> {
                    return Arrays.stream(v0);
                }).toList()) {
                    try {
                        ParquetMetadata readMetadata2 = ParquetReader.readMetadata(ParquetImportInputFile.of(path3));
                        ArrayList arrayList2 = new ArrayList();
                        HashSet hashSet2 = new HashSet();
                        List columns2 = readMetadata2.getFileMetaData().getSchema().getColumns();
                        boolean z2 = (entry2.getKey() == null || entry2.getKey().isBlank()) ? false : true;
                        String path4 = path3.getFileName().toString();
                        Iterator it2 = columns2.iterator();
                        while (it2.hasNext()) {
                            String str4 = ((ColumnDescriptor) it2.next()).getPath()[0];
                            try {
                                ParquetColumn from2 = ParquetColumn.from(str4, EntityType.RELATIONSHIP);
                                if (!from2.isIgnoredColumn()) {
                                    String propertyName2 = from2.propertyName() != null ? from2.propertyName() : from2.logicalColumnType().name();
                                    if (hashSet2.contains(propertyName2)) {
                                        throw new DuplicatedColumnException("Duplicated header property %s found in file %s.".formatted(propertyName2, path4));
                                    }
                                    hashSet2.add(propertyName2);
                                    if (from2.columnType().needsConversion()) {
                                        this.monitor.typeNormalized(path4, propertyName2, from2.columnType().name(), from2.columnType().convertedType().name());
                                    }
                                    if (from2.logicalColumnType() == ParquetLogicalColumnType.START_ID || from2.logicalColumnType() == ParquetLogicalColumnType.END_ID) {
                                        try {
                                            this.groups.get(from2.groupName());
                                        } catch (HeaderException e3) {
                                            throw new InputException(e3.getMessage());
                                        }
                                    }
                                    if (from2.logicalColumnType() == ParquetLogicalColumnType.TYPE) {
                                        z2 = true;
                                    }
                                    arrayList2.add(from2);
                                }
                            } catch (IllegalArgumentException e4) {
                                throw new InputException("Column name " + str4 + " is used as a special type but is unknown. Allowed types are " + ParquetColumn.getReservedColumns(EntityType.RELATIONSHIP));
                            }
                        }
                        hashMap.put(path3, arrayList2);
                        if (!z2) {
                            this.monitor.noRelationshipTypeSpecified(path4);
                        }
                    } catch (RuntimeException e5) {
                        throw new RuntimeException("Could not read parquet file %s".formatted(path3.toAbsolutePath()), e5);
                    }
                }
            }
            return hashMap;
        } catch (IOException e6) {
            throw new UncheckedIOException(e6);
        }
    }

    public Map<String, SchemaDescriptor> referencedNodeSchema(TokenHolders tokenHolders) {
        List<ParquetColumn> list = this.verifiedColumns.values().stream().flatMap((v0) -> {
            return v0.stream();
        }).filter((v0) -> {
            return v0.isIdColumn();
        }).toList();
        HashMap hashMap = new HashMap();
        checkReferencedNodeSchema(list, tokenHolders, hashMap);
        return hashMap;
    }

    private void checkReferencedNodeSchema(List<ParquetColumn> list, TokenHolders tokenHolders, Map<String, SchemaDescriptor> map) {
        list.forEach(parquetColumn -> {
            String idLabel = parquetColumn.idLabel();
            Preconditions.checkState(idLabel != null, "No label was specified for the node index in '%s'", new Object[]{parquetColumn});
            String propertyName = parquetColumn.propertyName();
            Preconditions.checkState(propertyName != null, "No property key was specified for node index in '%s'", new Object[]{parquetColumn});
            int idByName = tokenHolders.labelTokens().getIdByName(idLabel);
            int idByName2 = tokenHolders.propertyKeyTokens().getIdByName(propertyName);
            Preconditions.checkState(idByName != -1, "Label '%s' for node index specified in '%s' does not exist", new Object[]{idLabel, parquetColumn});
            Preconditions.checkState(idByName2 != -1, "Property key '%s' for node index specified in '%s' does not exist", new Object[]{propertyName, parquetColumn});
            LabelSchemaDescriptor forLabel = SchemaDescriptors.forLabel(idByName, new int[]{idByName2});
            SchemaDescriptor schemaDescriptor = (SchemaDescriptor) map.put(parquetColumn.groupName(), forLabel);
            Preconditions.checkState(schemaDescriptor == null || schemaDescriptor.equals(forLabel), "Multiple different indexes for group " + parquetColumn.groupName());
        });
    }

    public Input.Estimates validateAndEstimate(PropertySizeCalculator propertySizeCalculator) throws IOException {
        long j = 0;
        long j2 = 0;
        long j3 = 0;
        HashSet hashSet = new HashSet();
        for (Map.Entry<Set<String>, List<Path[]>> entry : this.nodeFiles.entrySet()) {
            hashSet.addAll(Collections.unmodifiableSet(entry.getKey()));
            for (Path[] pathArr : entry.getValue()) {
                for (Path path : pathArr) {
                    for (BlockMetaData blockMetaData : ParquetReader.readMetadata(ParquetImportInputFile.of(path)).getBlocks()) {
                        j += blockMetaData.getRowCount();
                        int size = blockMetaData.getColumns().size();
                        if (size > j2) {
                            j2 = size;
                        }
                        Iterator it = blockMetaData.getColumns().iterator();
                        while (it.hasNext()) {
                            j3 += ((ColumnChunkMetaData) it.next()).getTotalUncompressedSize();
                        }
                    }
                }
            }
        }
        int size2 = hashSet.size();
        long j4 = 0;
        Iterator<Map.Entry<String, List<Path[]>>> it2 = this.relationshipFiles.entrySet().iterator();
        while (it2.hasNext()) {
            for (Path[] pathArr2 : it2.next().getValue()) {
                for (Path path2 : pathArr2) {
                    for (BlockMetaData blockMetaData2 : ParquetReader.readMetadata(ParquetImportInputFile.of(path2)).getBlocks()) {
                        j += blockMetaData2.getRowCount();
                        int size3 = blockMetaData2.getColumns().size();
                        if (size3 > j2) {
                            j2 = size3;
                        }
                        Iterator it3 = blockMetaData2.getColumns().iterator();
                        while (it3.hasNext()) {
                            j4 += ((ColumnChunkMetaData) it3.next()).getTotalUncompressedSize();
                        }
                    }
                }
            }
        }
        return new Input.Estimates(j, 0L, j2, 0L, j3, j4, size2);
    }
}
