package de.gwdg.metadataqa.marc.analysis;

import de.gwdg.metadataqa.marc.MarcSubfield;
import de.gwdg.metadataqa.marc.Utils;
import de.gwdg.metadataqa.marc.cli.parameters.ClassificationParameters;
import de.gwdg.metadataqa.marc.cli.utils.Schema;
import de.gwdg.metadataqa.marc.dao.DataField;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import de.gwdg.metadataqa.marc.definition.bibliographic.SchemaType;
import de.gwdg.metadataqa.marc.definition.general.indexer.subject.ClassificationSchemes;
import de.gwdg.metadataqa.marc.utils.pica.PicaVocabularyManager;
import de.gwdg.metadataqa.marc.utils.pica.VocabularyEntry;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/* loaded from: input_file:de/gwdg/metadataqa/marc/analysis/ClassificationAnalyzer.class */
public class ClassificationAnalyzer {
    private final ClassificationStatistics statistics;
    private ClassificationParameters parameters;
    private BibliographicRecord marcRecord;
    private List<Schema> schemasInRecord;
    private static final Logger logger = Logger.getLogger(ClassificationAnalyzer.class.getCanonicalName());
    private static final ClassificationSchemes classificationSchemes = ClassificationSchemes.getInstance();
    private static final Pattern NUMERIC = Pattern.compile("^\\d");
    private static PicaVocabularyManager manager = null;
    private static final List<String> fieldsWithIndicator1AndSubfield2 = Arrays.asList("052", "086");
    private static final List<String> fieldsWithIndicator2AndSubfield2 = Arrays.asList("055", "072", "600", "610", "611", "630", "647", "648", "650", "651", "655", "656", "657");
    private static final List<String> fieldsWithSubfield2 = Arrays.asList("084", "654", "658", "662");
    private static final List<String> fieldsWithoutSource = Arrays.asList("653");
    public static final String DEWEY_DECIMAL_CLASSIFICATION = "Dewey Decimal Classification";
    private static final List<FieldWithScheme> MARC21_FIELD_WITH_SCHEMES = Arrays.asList(new FieldWithScheme("080", "Universal Decimal Classification"), new FieldWithScheme("082", DEWEY_DECIMAL_CLASSIFICATION), new FieldWithScheme("083", DEWEY_DECIMAL_CLASSIFICATION), new FieldWithScheme("085", DEWEY_DECIMAL_CLASSIFICATION));
    private static final List<FieldWithScheme> PICA_FIELDS_WITH_SCHEME = Arrays.asList(new FieldWithScheme("041A", "Schlagwortfolgen (DNB und Verbünde)"), new FieldWithScheme("044K", "Schlagwortfolgen (GBV, SWB, K10plus)"), new FieldWithScheme("044L", "Einzelschlagwörter (Projekte)"), new FieldWithScheme("044N", "Schlagwörter aus einem Thesaurus und freie Schlagwörter"), new FieldWithScheme("044S", "Gattungsbegriffe bei Alten Drucken"), new FieldWithScheme("044Z", "Lokale Schlagwörter auf bibliografischer Ebene"), new FieldWithScheme("045A", "LCC-Notation"), new FieldWithScheme("045B/00", "Allgemeine Systematik für Bibliotheken (ASB)"), new FieldWithScheme("045B/01", "Systematik der Stadtbibliothek Duisburg (SSD)"), new FieldWithScheme("045B/02", "Systematik für Bibliotheken (SfB)"), new FieldWithScheme("045B/03", "Klassifikation für Allgemeinbibliotheken (KAB)"), new FieldWithScheme("045B/04", "Systematiken der ekz"), new FieldWithScheme("045B/05", "Gattungsbegriffe (DNB)"), new FieldWithScheme("045C", "Klassifikation der National Library of Medicine (NLM)"), new FieldWithScheme("045D/49", "ZBW-Schlagwörter - Veröffentlichungsart"), new FieldWithScheme("045D/50", "Vorläufige Schhlagwörter (STW)"), new FieldWithScheme("045D/60", "FIV-Schlagwörter (Themen)"), new FieldWithScheme("045D/70", "FIV-Schlagwörter (Aspekte)"), new FieldWithScheme("045E", "Sachgruppen der Deutschen Nationalbibliografie bis 2003"), new FieldWithScheme("045F", "DDC-Notation"), new FieldWithScheme("045G", "Sachgruppen der Deutschen Nationalbibliografie ab 2004"), new FieldWithScheme("045H", "DDC-Notation: Vollständige Notation"), new FieldWithScheme("045M", "Lokale Notationen auf bibliografischer Ebene"), new FieldWithScheme("045N", "FIV-Regionalklassifikation"), new FieldWithScheme("045Q/01", "Basisklassifikation"), new FieldWithScheme("045R", "Regensburger Verbundklassifikation (RVK)"), new FieldWithScheme("045S", "Deutsche Bibliotheksstatistik (DBS)"), new FieldWithScheme("045T", "Nicht mehr gültige Notationen der Regensburger Verbundklassifikation (RVK)"), new FieldWithScheme("045V", "SSG-Nummer/FID-Kennzeichen"), new FieldWithScheme("045W", "SSG-Angabe für thematische OLC-Ausschnitte"), new FieldWithScheme("045X", "Notation eines Klassifikationssystems"), new FieldWithScheme("045Y", "SSG-Angabe für Fachkataloge"));

    public ClassificationAnalyzer(BibliographicRecord bibliographicRecord, ClassificationStatistics classificationStatistics) {
        this.parameters = null;
        this.marcRecord = bibliographicRecord;
        this.statistics = classificationStatistics;
        if (bibliographicRecord.getSchemaType().equals(SchemaType.PICA) && manager == null) {
            manager = PicaVocabularyManager.getInstance();
        }
    }

    public ClassificationAnalyzer(BibliographicRecord bibliographicRecord, ClassificationStatistics classificationStatistics, ClassificationParameters classificationParameters) {
        this(bibliographicRecord, classificationStatistics);
        this.parameters = classificationParameters;
    }

    public int process() {
        int i = 0;
        this.schemasInRecord = new ArrayList();
        if (this.marcRecord.getSchemaType().equals(SchemaType.MARC21)) {
            i = processFieldsWithScheme(processFieldsWithoutSource(processFieldsWithSubfield2(processFieldsWithIndicator2AndSubfield2(processFieldsWithIndicator1AndSubfield2(0)))), MARC21_FIELD_WITH_SCHEMES);
        } else if (this.marcRecord.getSchemaType().equals(SchemaType.PICA)) {
            i = processFieldsWithSchemePica(0, PICA_FIELDS_WITH_SCHEME);
        }
        increaseCounters(i);
        return i;
    }

    private void increaseCounters(int i) {
        Utils.count(Boolean.valueOf(i > 0), this.statistics.getHasClassifications());
        Utils.count(Integer.valueOf(i), this.statistics.getSchemaHistogram());
        this.statistics.getFrequencyExamples().computeIfAbsent(Integer.valueOf(i), num -> {
            return this.marcRecord.getId(true);
        });
        if (this.parameters == null || !this.parameters.doCollectCollocations()) {
            return;
        }
        List<String> collocationInRecord = getCollocationInRecord();
        if (collocationInRecord.isEmpty()) {
            return;
        }
        Utils.count(collocationInRecord, this.statistics.getCollocationHistogram());
    }

    private int processFieldsWithScheme(int i, List<FieldWithScheme> list) {
        Iterator<FieldWithScheme> it = list.iterator();
        while (it.hasNext()) {
            int processFieldWithScheme = processFieldWithScheme(this.marcRecord, it.next());
            if (processFieldWithScheme > 0) {
                i += processFieldWithScheme;
            }
        }
        return i;
    }

    private int processFieldsWithSchemePica(int i, List<FieldWithScheme> list) {
        int i2 = i;
        for (VocabularyEntry vocabularyEntry : manager.getAll()) {
            if (this.marcRecord.hasDatafield(vocabularyEntry.getPica())) {
                String label = vocabularyEntry.getLabel();
                List<DataField> datafield = this.marcRecord.getDatafield(vocabularyEntry.getPica());
                ArrayList arrayList = new ArrayList();
                for (DataField dataField : datafield) {
                    String str = null;
                    if (dataField.getSubfield("a") == null) {
                        Iterator<MarcSubfield> it = dataField.getSubfields().iterator();
                        while (true) {
                            if (!it.hasNext()) {
                                break;
                            }
                            String code = it.next().getCode();
                            if (!code.equals("A")) {
                                str = "$" + code;
                                break;
                            }
                        }
                    } else {
                        str = "$a";
                    }
                    if (str != null) {
                        Schema schema = new Schema(dataField.getTagWithOccurrence(), str, vocabularyEntry.getVoc(), label);
                        arrayList.add(schema);
                        updateSchemaSubfieldStatistics(dataField, schema);
                        i2++;
                    } else {
                        logger.log(Level.SEVERE, "undetected subfield in record {0} {1}", new Object[]{this.marcRecord.getId(), dataField.toString()});
                    }
                }
                registerSchemas(arrayList);
            }
        }
        return i2;
    }

    private int processFieldsWithoutSource(int i) {
        Iterator<String> it = fieldsWithoutSource.iterator();
        while (it.hasNext()) {
            int processFieldWithoutSource = processFieldWithoutSource(this.marcRecord, it.next());
            if (processFieldWithoutSource > 0) {
                i += processFieldWithoutSource;
            }
        }
        return i;
    }

    private int processFieldsWithSubfield2(int i) {
        Iterator<String> it = fieldsWithSubfield2.iterator();
        while (it.hasNext()) {
            int processFieldWithSubfield2 = processFieldWithSubfield2(this.marcRecord, it.next());
            if (processFieldWithSubfield2 > 0) {
                i += processFieldWithSubfield2;
            }
        }
        return i;
    }

    private int processFieldsWithIndicator2AndSubfield2(int i) {
        Iterator<String> it = fieldsWithIndicator2AndSubfield2.iterator();
        while (it.hasNext()) {
            int processFieldWithIndicator2AndSubfield2 = processFieldWithIndicator2AndSubfield2(this.marcRecord, it.next());
            if (processFieldWithIndicator2AndSubfield2 > 0) {
                i += processFieldWithIndicator2AndSubfield2;
            }
        }
        return i;
    }

    private int processFieldsWithIndicator1AndSubfield2(int i) {
        Iterator<String> it = fieldsWithIndicator1AndSubfield2.iterator();
        while (it.hasNext()) {
            int processFieldWithIndicator1AndSubfield2 = processFieldWithIndicator1AndSubfield2(this.marcRecord, it.next());
            if (processFieldWithIndicator1AndSubfield2 > 0) {
                i += processFieldWithIndicator1AndSubfield2;
            }
        }
        return i;
    }

    private int processFieldWithScheme(BibliographicRecord bibliographicRecord, FieldWithScheme fieldWithScheme) {
        int i = 0;
        String tag = fieldWithScheme.getTag();
        if (!bibliographicRecord.hasDatafield(tag)) {
            return 0;
        }
        List<DataField> datafield = bibliographicRecord.getDatafield(tag);
        ArrayList arrayList = new ArrayList();
        for (DataField dataField : datafield) {
            String str = null;
            String str2 = null;
            Iterator<MarcSubfield> it = dataField.getSubfields().iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                String code = it.next().getCode();
                if (!code.equals("1") && !code.equals("2") && !code.equals("6") && !code.equals("8")) {
                    str = "$" + code;
                    break;
                }
                if (str2 == null) {
                    str2 = "$" + code;
                }
            }
            if (str != null) {
                String schemaName = fieldWithScheme.getSchemaName();
                Schema schema = new Schema(tag, str, classificationSchemes.resolve(schemaName), schemaName);
                arrayList.add(schema);
                updateSchemaSubfieldStatistics(dataField, schema);
                i++;
            } else {
                logger.log(Level.SEVERE, "undetected subfield in record {0} {1}", new Object[]{bibliographicRecord.getId(), dataField.toString()});
            }
        }
        registerSchemas(arrayList);
        return i;
    }

    private void registerSchemas(List<Schema> list) {
        addSchemasToStatistics(this.statistics.getInstances(), list);
        List<Schema> deduplicateSchema = deduplicateSchema(list);
        addSchemasToStatistics(this.statistics.getRecords(), deduplicateSchema);
        this.schemasInRecord.addAll(deduplicateSchema);
    }

    private int processFieldWithIndicator1AndSubfield2(BibliographicRecord bibliographicRecord, String str) {
        Schema schema;
        int i = 0;
        if (!bibliographicRecord.hasDatafield(str)) {
            return 0;
        }
        ArrayList arrayList = new ArrayList();
        for (DataField dataField : bibliographicRecord.getDatafield(str)) {
            String resolveInd1 = dataField.resolveInd1();
            if (!resolveInd1.equals("No information provided")) {
                i++;
                if (isaReferenceToSubfield2(str, resolveInd1)) {
                    schema = extractSchemaFromSubfield2(str, arrayList, dataField);
                } else {
                    try {
                        schema = new Schema(str, "ind1", classificationSchemes.resolve(resolveInd1), resolveInd1);
                    } catch (IllegalArgumentException e) {
                        logger.log(Level.SEVERE, "Invalid scheme in ind1: {0}. {1}", new Object[]{e.getLocalizedMessage(), dataField});
                        schema = new Schema(str, "ind1", dataField.getInd1(), resolveInd1);
                    }
                    arrayList.add(schema);
                }
                updateSchemaSubfieldStatistics(dataField, schema);
            }
        }
        registerSchemas(arrayList);
        return i;
    }

    private int processFieldWithIndicator2AndSubfield2(BibliographicRecord bibliographicRecord, String str) {
        Schema schema;
        int i = 0;
        if (!bibliographicRecord.hasDatafield(str)) {
            return 0;
        }
        ArrayList arrayList = new ArrayList();
        for (DataField dataField : bibliographicRecord.getDatafield(str)) {
            String resolveInd2 = dataField.resolveInd2();
            if (isaReferenceToSubfield2(str, resolveInd2)) {
                schema = extractSchemaFromSubfield2(str, arrayList, dataField);
            } else {
                try {
                    schema = new Schema(str, "ind2", classificationSchemes.resolve(resolveInd2), resolveInd2);
                } catch (IllegalArgumentException e) {
                    logger.log(Level.WARNING, "Invalid scheme in ind2: {0}. {1}", new Object[]{e.getLocalizedMessage(), dataField});
                    schema = new Schema(str, "ind2", dataField.getInd2(), resolveInd2);
                }
                arrayList.add(schema);
            }
            i++;
            updateSchemaSubfieldStatistics(dataField, schema);
        }
        registerSchemas(arrayList);
        return i;
    }

    private int processFieldWithSubfield2(BibliographicRecord bibliographicRecord, String str) {
        int i = 0;
        if (!bibliographicRecord.hasDatafield(str)) {
            return 0;
        }
        List<DataField> datafield = bibliographicRecord.getDatafield(str);
        ArrayList arrayList = new ArrayList();
        for (DataField dataField : datafield) {
            updateSchemaSubfieldStatistics(dataField, extractSchemaFromSubfield2(str, arrayList, dataField));
            i++;
        }
        registerSchemas(arrayList);
        return i;
    }

    private int processFieldWithoutSource(BibliographicRecord bibliographicRecord, String str) {
        int i = 0;
        if (!bibliographicRecord.hasDatafield(str)) {
            return 0;
        }
        List<DataField> datafield = bibliographicRecord.getDatafield(str);
        ArrayList arrayList = new ArrayList();
        for (DataField dataField : datafield) {
            Schema schema = new Schema(str, "ind2", "uncontrolled/" + (dataField.getInd2().equals(" ") ? "#" : dataField.getInd2()), dataField.resolveInd2());
            arrayList.add(schema);
            updateSchemaSubfieldStatistics(dataField, schema);
            i++;
        }
        registerSchemas(arrayList);
        return i;
    }

    private Schema extractSchemaFromSubfield2(String str, List<Schema> list, DataField dataField) {
        Schema schema = null;
        List<MarcSubfield> subfield = dataField.getSubfield("2");
        if (subfield == null || subfield.isEmpty()) {
            schema = new Schema(str, "$2", AuthorithyAnalyzer.UNDETECTABLE, AuthorithyAnalyzer.UNDETECTABLE);
            list.add(schema);
        } else {
            for (MarcSubfield marcSubfield : subfield) {
                schema = new Schema(str, "$2", marcSubfield.getValue(), marcSubfield.resolve());
                list.add(schema);
            }
        }
        return schema;
    }

    private void updateSchemaSubfieldStatistics(DataField dataField, Schema schema) {
        if (schema == null) {
            return;
        }
        List<String> orderSubfields = orderSubfields(dataField.getSubfields());
        this.statistics.getSubfields().computeIfAbsent(schema, schema2 -> {
            return new HashMap();
        });
        Map<List<String>, Integer> map = this.statistics.getSubfields().get(schema);
        if (map.containsKey(orderSubfields)) {
            map.put(orderSubfields, Integer.valueOf(map.get(orderSubfields).intValue() + 1));
        } else {
            map.put(orderSubfields, 1);
        }
    }

    private List<String> orderSubfields(List<MarcSubfield> list) {
        ArrayList<String> arrayList = new ArrayList();
        HashSet hashSet = new HashSet();
        Iterator<MarcSubfield> it = list.iterator();
        while (it.hasNext()) {
            String code = it.next().getCode();
            if (arrayList.contains(code)) {
                hashSet.add(code);
            } else {
                arrayList.add(code);
            }
        }
        if (!hashSet.isEmpty()) {
            Iterator it2 = hashSet.iterator();
            while (it2.hasNext()) {
                arrayList.remove((String) it2.next());
            }
            Iterator it3 = hashSet.iterator();
            while (it3.hasNext()) {
                arrayList.add(((String) it3.next()) + "+");
            }
        }
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        for (String str : arrayList) {
            if (NUMERIC.matcher(str).matches()) {
                arrayList3.add(str);
            } else {
                arrayList2.add(str);
            }
        }
        if (arrayList3.isEmpty()) {
            Collections.sort(arrayList);
        } else {
            Collections.sort(arrayList2);
            Collections.sort(arrayList3);
            arrayList = arrayList2;
            arrayList.addAll(arrayList3);
        }
        return arrayList;
    }

    private List<Schema> deduplicateSchema(List<Schema> list) {
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(new HashSet(list));
        return arrayList;
    }

    private boolean isaReferenceToSubfield2(String str, String str2) {
        return (str.equals("055") && isAReferenceFrom055(str2)) || str2.equals("Source specified in subfield $2");
    }

    private boolean isAReferenceFrom055(String str) {
        return str.equals("Other call number assigned by LAC") || str.equals("Other class number assigned by LAC") || str.equals("Other call number assigned by the contributing library") || str.equals("Other class number assigned by the contributing library");
    }

    private void addSchemasToStatistics(Map<Schema, Integer> map, List<Schema> list) {
        if (list.isEmpty()) {
            return;
        }
        Iterator<Schema> it = list.iterator();
        while (it.hasNext()) {
            Utils.count(it.next(), map);
        }
    }

    private void addSchemesToStatistics(Map<String[], Integer> map, List<String[]> list) {
        if (list.isEmpty()) {
            return;
        }
        for (String[] strArr : list) {
            if (!map.containsKey(strArr)) {
                map.put(strArr, 0);
                this.statistics.getFieldInRecords().computeIfAbsent(strArr, strArr2 -> {
                    return 0;
                });
                this.statistics.getFieldInRecords().put(strArr, Integer.valueOf(this.statistics.getFieldInRecords().get(strArr).intValue() + 1));
            }
            map.put(strArr, Integer.valueOf(map.get(strArr).intValue() + 1));
        }
    }

    private Map<String[], Integer> getFieldInstanceStatistics(String str) {
        this.statistics.getFieldInstances().computeIfAbsent(str, str2 -> {
            return new HashMap();
        });
        return this.statistics.getFieldInstances().get(str);
    }

    public List<Schema> getSchemasInRecord() {
        return this.schemasInRecord;
    }

    public List<String> getCollocationInRecord() {
        return (List) this.schemasInRecord.stream().map((v0) -> {
            return v0.getAbbreviation();
        }).sorted().distinct().collect(Collectors.toList());
    }
}
