package it.uniroma1.lcl.jlt.util;

import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
import com.ibm.icu.impl.locale.LanguageTag;
import com.ibm.icu.text.DateFormat;
import edu.mit.jwi.item.POS;
import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.wordnet.WordNet;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.jena.sparql.sse.Tags;
import org.semanticweb.owlapi.rdf.rdfxml.parser.RDFConstants;

/* loaded from: input_file:it/uniroma1/lcl/jlt/util/EnglishLemmatizer.class */
public class EnglishLemmatizer extends Lemmatizer {
    private static HashMap<String, Set<String>> lemmaTable = null;
    static String[] nounSuffixes = {"s", "ses", "xes", "zes", "ches", "shes", "men", "ies"};
    static String[] verbSuffixes = {"s", "ies", "es", "es", "ed", "ed", "ing", "ing"};
    static String[] adjSuffixes = {"er", "est", "er", "est"};
    static String[] nounEndings = {"", "s", LanguageTag.PRIVATEUSE, "z", "ch", "sh", "man", DateFormat.YEAR};
    static String[] verbEndings = {"", DateFormat.YEAR, "e", "", "e", "", "e", ""};
    static String[] adjEndings = {"", DateFormat.YEAR, "e", "", "e", "", "e", ""};
    static Set<String> prepositions = new HashSet();
    static String[] prepositionsArray = {"to", "at", "of", BooleanUtils.ON, BooleanUtils.OFF, Tags.tagIn, "out", "up", "down", "from", JsonPOJOBuilder.DEFAULT_WITH_PREFIX, "into", "for", RDFConstants.ATTR_ABOUT, "between"};

    static {
        for (String str : prepositionsArray) {
            prepositions.add(str);
        }
    }

    public EnglishLemmatizer() throws IOException {
        if (lemmaTable == null) {
            lemmaTable = new HashMap<>();
            for (String str : Configuration.getInstance().getEnglishLemmaTable()) {
                String str2 = "n";
                if (str.indexOf("noun") != -1) {
                    str2 = "n";
                } else if (str.indexOf("verb") != -1) {
                    str2 = DateFormat.ABBR_GENERIC_TZ;
                } else if (str.indexOf("adj") != -1) {
                    str2 = "a";
                } else if (str.indexOf("adv") != -1) {
                    str2 = "r";
                }
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), "ISO-8859-1"));
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    String[] split = readLine.split(" ");
                    Set<String> set = lemmaTable.get(String.valueOf(split[0]) + "#" + str2);
                    if (set == null) {
                        set = new HashSet();
                        lemmaTable.put(String.valueOf(split[0]) + "#" + (split.length > 2 ? split[2] : str2), set);
                    }
                    set.add(split[1]);
                }
                bufferedReader.close();
            }
        }
    }

    @Override // it.uniroma1.lcl.jlt.util.Lemmatizer
    public Set<String> getLemmas(String str) {
        HashSet hashSet = new HashSet();
        for (String str2 : new String[]{"n", DateFormat.ABBR_GENERIC_TZ, "a", "r"}) {
            Set<String> lemmas = getLemmas(str, str2);
            if (lemmas != null) {
                hashSet.addAll(lemmas);
            }
        }
        return hashSet;
    }

    public Set<String> getLemmasWithPos(String str) {
        HashSet hashSet = new HashSet();
        for (String str2 : new String[]{"n", DateFormat.ABBR_GENERIC_TZ, "a", "r"}) {
            Set<String> lemmas = getLemmas(str, str2);
            if (lemmas != null) {
                Iterator<String> it2 = lemmas.iterator();
                while (it2.hasNext()) {
                    hashSet.add(String.valueOf(it2.next()) + "#" + str2);
                }
            }
        }
        return hashSet;
    }

    @Override // it.uniroma1.lcl.jlt.util.Lemmatizer
    public Set<String> getLemmas(String str, String str2) {
        String replace = str.toLowerCase().replace(' ', '_');
        Set<String> set = lemmaTable.get(replace);
        if (set != null) {
            return set;
        }
        HashSet hashSet = new HashSet();
        int count = Strings.count(replace, '_') + 1;
        boolean equals = str2.equals(DateFormat.ABBR_GENERIC_TZ);
        String wordNetLemma = getWordNetLemma(replace, str2);
        if (wordNetLemma != null) {
            hashSet.add(wordNetLemma);
            if (!wordNetLemma.equals(replace)) {
                return hashSet;
            }
        }
        if (count > 2 && hasPrep(replace)) {
            hashSet.add(getVerbPrepLemma(replace));
            return hashSet;
        }
        StringBuffer stringBuffer = new StringBuffer();
        int i = 0;
        String[] split = replace.split("[-_]");
        for (int i2 = 0; i2 < split.length; i2++) {
            String str3 = split[i2];
            if (equals || i2 == split.length - 1) {
                String wordNetLemma2 = getWordNetLemma(str3, str2, true);
                if (wordNetLemma2 != null) {
                    stringBuffer.append(wordNetLemma2);
                } else {
                    stringBuffer.append(str3);
                }
            } else {
                stringBuffer.append(str3);
            }
            int length = i + str3.length();
            if (i2 < split.length - 1) {
                stringBuffer.append(replace.charAt(length));
            }
            i = length + 1;
        }
        if (stringBuffer.toString().equals(replace) || !isDefined(stringBuffer.toString(), str2)) {
            return Sets.varargsToHashSet(stringBuffer.toString());
        }
        hashSet.add(stringBuffer.toString());
        return hashSet;
    }

    public String getVerbPrepLemma(String str) {
        String wordNetLemma;
        String[] split = str.split("_");
        int length = split.length - 1;
        String str2 = "";
        if (split.length >= 2 && (wordNetLemma = getWordNetLemma(split[split.length - 1], "n")) != null) {
            length--;
            str2 = wordNetLemma;
        }
        String str3 = split[0];
        for (int i = 0; i < str3.length(); i++) {
            if (!Character.isLetterOrDigit(str3.charAt(i))) {
                return null;
            }
        }
        Set<String> set = lemmaTable.get(String.valueOf(str3) + "#v");
        if (set != null && !str3.equals(set.iterator().next())) {
            String str4 = String.valueOf(str3) + "_" + Strings.join(split, 1, length, "_");
            if (isDefined(str4, DateFormat.ABBR_GENERIC_TZ)) {
                return str4;
            }
            if (str2.length() > 0) {
                String str5 = String.valueOf(str3) + "_" + str2;
                if (isDefined(str5, DateFormat.ABBR_GENERIC_TZ)) {
                    return str5;
                }
            }
        }
        for (int i2 = 0; i2 < verbSuffixes.length; i2++) {
            if (str3.endsWith(verbSuffixes[i2])) {
                String str6 = String.valueOf(str3.substring(0, str3.length() - verbSuffixes[i2].length())) + verbEndings[i2];
                if (str6.equals(str3)) {
                    continue;
                } else {
                    String str7 = String.valueOf(str6) + "_" + Strings.join(split, 1, length, "_");
                    if (isDefined(str7, DateFormat.ABBR_GENERIC_TZ)) {
                        return str7;
                    }
                    if (str2.length() > 0) {
                        String str8 = String.valueOf(str6) + "_" + str2;
                        if (isDefined(str8, DateFormat.ABBR_GENERIC_TZ)) {
                            return str8;
                        }
                    } else {
                        continue;
                    }
                }
            }
        }
        String str9 = String.valueOf(str3) + "_" + Strings.join(split, 1, length, "_");
        if (!str9.equals(str)) {
            return str9;
        }
        if (str2.length() <= 0) {
            return "";
        }
        String str10 = String.valueOf(str3) + "_" + str2;
        return !str10.equals(str) ? str10 : "";
    }

    public String getWordNetLemma(String str, String str2) {
        return getWordNetLemma(str, str2, false);
    }

    private String getWordNetLemma(String str, String str2, boolean z) {
        Set<String> set = lemmaTable.get(String.valueOf(str) + "#" + str2);
        if (z && set != null) {
            return set.iterator().next();
        }
        String str3 = "";
        if (str2.equals("r")) {
            return null;
        }
        if (str2.equals("n")) {
            if (str.endsWith("ful")) {
                str = str.substring(0, str.length() - 3);
                str3 = "ful";
            } else if (str.endsWith("ss") || str.length() <= 2) {
                return null;
            }
        }
        String[] strArr = null;
        String[] strArr2 = null;
        if (str2.equals("n")) {
            strArr = nounSuffixes;
            strArr2 = nounEndings;
        } else if (str2.equals("a")) {
            strArr = adjSuffixes;
            strArr2 = adjEndings;
        } else if (str2.equals(DateFormat.ABBR_GENERIC_TZ)) {
            strArr = verbSuffixes;
            strArr2 = verbEndings;
        }
        if (!z && isDefined(str, str2)) {
            return str;
        }
        for (int i = 0; i < strArr.length; i++) {
            if (str.endsWith(strArr[i])) {
                String str4 = String.valueOf(str.substring(0, str.length() - strArr[i].length())) + strArr2[i];
                if (isDefined(str4, str2)) {
                    return String.valueOf(str4) + str3;
                }
            }
        }
        return null;
    }

    public boolean isDefined(String str, String str2) {
        POS pos = POS.NOUN;
        if (str2.equals("n")) {
            pos = POS.NOUN;
        } else if (str2.equals("a")) {
            pos = POS.ADJECTIVE;
        } else if (str2.equals(DateFormat.ABBR_GENERIC_TZ)) {
            pos = POS.VERB;
        } else if (str2.equals("r")) {
            pos = POS.ADVERB;
        }
        return WordNet.getInstance().getSenses(str, pos).size() > 0;
    }

    public boolean hasPrep(String str) {
        String[] split = str.split("_");
        for (int i = 1; i < split.length; i++) {
            if (prepositions.contains(split[i])) {
                return true;
            }
        }
        return false;
    }

    public static void main(String[] strArr) throws IOException {
        EnglishLemmatizer englishLemmatizer = new EnglishLemmatizer();
        System.out.println(englishLemmatizer.getLemmas("saw", DateFormat.ABBR_GENERIC_TZ));
        System.out.println(englishLemmatizer.getLemmas("arms", "n"));
        System.out.println(englishLemmatizer.getLemmas("saw", "n"));
        System.out.println(englishLemmatizer.getLemmas("bus_arms", "n"));
        System.out.println(englishLemmatizer.getWordNetLemma("bus_driver", "n"));
        System.out.println(englishLemmatizer.getWordNetLemma("buses_driver", "n"));
        System.out.println(englishLemmatizer.getWordNetLemma("bus_drivers", "n"));
        System.out.println(englishLemmatizer.getWordNetLemma("buses_drivers", "n"));
        System.out.println(englishLemmatizer.getLemmas("doing", DateFormat.ABBR_GENERIC_TZ));
        System.out.println(englishLemmatizer.getLemmas("doing", DateFormat.ABBR_GENERIC_TZ));
        System.out.println(englishLemmatizer.getLemmas("done", DateFormat.ABBR_GENERIC_TZ));
        System.out.println(englishLemmatizer.getLemmas("babies", "n"));
        System.out.println(englishLemmatizer.getLemmas("churches", "n"));
        System.out.println(englishLemmatizer.getLemmas("dizzier", "a"));
        System.out.println(englishLemmatizer.getLemmas("taxi driver", "n"));
        System.out.println(englishLemmatizer.getLemmas("went away", DateFormat.ABBR_GENERIC_TZ));
        System.out.println(englishLemmatizer.getLemmas("go away", DateFormat.ABBR_GENERIC_TZ));
        System.out.println(englishLemmatizer.getLemmas("apple companies", "n"));
        System.out.println(englishLemmatizer.getLemmas("apple company", "n"));
        System.out.println(englishLemmatizer.getLemmas("kilos kilos", "n"));
        System.out.println(englishLemmatizer.getLemmas("buses driver", "n"));
        System.out.println(englishLemmatizer.getLemmas("buses drivers", "n"));
        System.out.println(englishLemmatizer.getLemmas("bus drivers", "n"));
        System.out.println(englishLemmatizer.getLemmas("went", DateFormat.ABBR_GENERIC_TZ));
        System.out.println(englishLemmatizer.getLemmas("did", DateFormat.ABBR_GENERIC_TZ));
        System.out.println(englishLemmatizer.getLemmas("talked", DateFormat.ABBR_GENERIC_TZ));
    }
}
