package de.uni_leipzig.dbs.pprl.primat.examples.dataowner;

import de.uni_leipzig.dbs.pprl.primat.common.csv.CSVWriter;
import de.uni_leipzig.dbs.pprl.primat.common.model.NamedRecordSchemaConfiguration;
import de.uni_leipzig.dbs.pprl.primat.common.model.attributes.NonQidAttributeType;
import de.uni_leipzig.dbs.pprl.primat.common.model.attributes.QidAttributeType;
import de.uni_leipzig.dbs.pprl.primat.common.utils.DatasetReader;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.FieldNormalizer;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.FieldSplitter;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.NormalizeDefinition;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.PartySupplier;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.SplitDefinition;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.normalizing.AccentRemover;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.normalizing.LetterLowerCaseToNumberNormalizer;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.normalizing.LetterUpperCaseToNumberNormalizer;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.normalizing.LowerCaseNormalizer;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.normalizing.Normalizer;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.normalizing.NormalizerChain;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.normalizing.SpecialCharacterRemover;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.normalizing.SubstringNormalizer;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.normalizing.TrimNormalizer;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.normalizing.UmlautNormalizer;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.splitting.BlankSplitter;
import de.uni_leipzig.dbs.pprl.primat.dataowner.preprocessing.splitting.DotSplitter;
import java.io.IOException;
import java.util.List;

/* loaded from: input_file:de/uni_leipzig/dbs/pprl/primat/examples/dataowner/Preprocessing.class */
public class Preprocessing {
    public static void main(String[] strArr) throws IOException {
        NamedRecordSchemaConfiguration build = new NamedRecordSchemaConfiguration.Builder().add(0, NonQidAttributeType.ID).add(1, QidAttributeType.STRING, "FN").add(2, QidAttributeType.STRING, "LN").add(3, QidAttributeType.STRING, "MN").add(4, QidAttributeType.STRING, "DOB").build();
        String str = strArr[0];
        String str2 = strArr[1];
        List read = new DatasetReader(str, build).read();
        new PartySupplier().preprocess(read);
        SplitDefinition splitDefinition = new SplitDefinition();
        splitDefinition.setSplitter("CITY_ZIP", new BlankSplitter(2));
        splitDefinition.setSplitter("DOB", new DotSplitter(3));
        new FieldSplitter(splitDefinition).preprocess(read);
        NormalizerChain normalizerChain = new NormalizerChain(List.of(new UmlautNormalizer(), new TrimNormalizer(), new LowerCaseNormalizer(), new AccentRemover(), new SpecialCharacterRemover(), new SubstringNormalizer(0, 12)));
        NormalizerChain normalizerChain2 = new NormalizerChain(new Normalizer[]{new LetterLowerCaseToNumberNormalizer(), new LetterUpperCaseToNumberNormalizer()});
        NormalizeDefinition normalizeDefinition = new NormalizeDefinition();
        normalizeDefinition.setNormalizer(0, normalizerChain);
        normalizeDefinition.setNormalizer(1, normalizerChain);
        normalizeDefinition.setNormalizer(2, normalizerChain2);
        normalizeDefinition.setNormalizer(3, normalizerChain);
        normalizeDefinition.setNormalizer(4, normalizerChain2);
        normalizeDefinition.setNormalizer(5, normalizerChain2);
        normalizeDefinition.setNormalizer(6, normalizerChain2);
        new FieldNormalizer(normalizeDefinition).preprocess(read);
        new CSVWriter(str2).writeRecords(read);
    }
}
