package GNormPluslib;

import com.pengyifan.bioc.BioCAnnotation;
import com.pengyifan.bioc.BioCCollection;
import com.pengyifan.bioc.BioCDocument;
import com.pengyifan.bioc.BioCLocation;
import com.pengyifan.bioc.BioCPassage;
import com.pengyifan.bioc.io.BioCCollectionReader;
import com.pengyifan.bioc.io.BioCCollectionWriter;
import com.pengyifan.bioc.io.BioCDocumentWriter;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.time.LocalDate;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.stream.XMLStreamException;

/* loaded from: input_file:GNormPluslib/BioCDoc.class */
public class BioCDoc {
    public ArrayList<String> PMIDs = new ArrayList<>();
    public ArrayList<ArrayList<String>> PassageNames = new ArrayList<>();
    public ArrayList<ArrayList<Integer>> PassageOffsets = new ArrayList<>();
    public ArrayList<ArrayList<String>> PassageContexts = new ArrayList<>();
    public ArrayList<ArrayList<ArrayList<String>>> Annotations = new ArrayList<>();

    public String BioCFormatCheck(String str) throws IOException {
        try {
            BioCCollectionReader bioCCollectionReader = new BioCCollectionReader(new InputStreamReader(new FileInputStream(str), "UTF-8"));
            try {
                bioCCollectionReader.readCollection();
                bioCCollectionReader.close();
                return "BioC";
            } finally {
            }
        } catch (FileNotFoundException | UnsupportedEncodingException | XMLStreamException e) {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), "UTF-8"));
            String str2 = "";
            String str3 = "";
            boolean z = false;
            Pattern compile = Pattern.compile("^([^\\|\\t]+)\\|([^\\|\\t]+)\\|(.*)$");
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    return !z ? "[Error]: " + str + " - It's neither BioC nor PubTator format." : str2.equals("") ? "PubTator" : "[Error]: " + str + " - The last column missed a blank.";
                }
                Matcher matcher = compile.matcher(readLine);
                if (matcher.find()) {
                    if (str3.equals("")) {
                        str3 = matcher.group(1);
                    } else if (!str3.equals(matcher.group(1))) {
                        return "[Error]: " + str + " - A blank is needed between " + str3 + " and " + matcher.group(1) + ".";
                    }
                    str2 = "tiabs";
                    z = true;
                } else if (!readLine.contains("\t") && readLine.length() == 0) {
                    if (str2.equals("")) {
                        return str3.equals("") ? "[Error]: " + str + " - It's neither BioC nor PubTator format. PMID is empty." : "[Error]: " + str + " - A redundant blank is after " + str3 + ".";
                    }
                    str3 = "";
                    str2 = "";
                }
            }
        }
    }

    public void PubTator2BioC(String str, String str2) throws IOException, XMLStreamException {
        BioCCollectionWriter bioCCollectionWriter = new BioCCollectionWriter(new OutputStreamWriter(new FileOutputStream(str2), "UTF-8"));
        BioCCollection bioCCollection = new BioCCollection();
        bioCCollection.setDate(LocalDate.now(ZoneId.of("America/Montreal")).toString());
        bioCCollection.setKey("BioC.key");
        bioCCollection.setSource("GNormPlus");
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), "UTF-8"));
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        String str3 = "";
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bioCCollectionWriter.writeCollection(bioCCollection);
                bioCCollectionWriter.close();
                bufferedReader.close();
                return;
            }
            if (readLine.contains("|") && !readLine.contains("\t")) {
                String[] split = readLine.split("\\|", -1);
                str3 = split[0];
                if (split[1].equals("t")) {
                    split[1] = "title";
                }
                if (split[1].equals("a")) {
                    split[1] = "abstract";
                }
                arrayList.add(split[1]);
                if (split.length == 3) {
                    arrayList2.add(split[2].replaceAll("ω", "w").replaceAll("μ", "u").replaceAll("κ", "k").replaceAll("α", "a").replaceAll("γ", "g").replaceAll("ɣ", "g").replaceAll("β", "b").replaceAll("×", "x").replaceAll("‑", "-").replaceAll("¹", "1").replaceAll("²", "2").replaceAll("°", "o").replaceAll("ö", "o").replaceAll("é", "e").replaceAll("à", "a").replaceAll("Á", "A").replaceAll("ε", "e").replaceAll("θ", "O").replaceAll("•", ".").replaceAll("µ", "u").replaceAll("λ", "r").replaceAll("⁺", "+").replaceAll("ν", "v").replaceAll("ï", "i").replaceAll("ã", "a").replaceAll("≡", "=").replaceAll("ó", "o").replaceAll("³", "3").replaceAll("〖", "[").replaceAll("〗", "]").replaceAll("Å", "A").replaceAll("ρ", "p").replaceAll("ü", "u").replaceAll("ɛ", "e").replaceAll("č", "c").replaceAll("š", "s").replaceAll("ß", "b").replaceAll("═", "=").replaceAll("£", "L").replaceAll("Ł", "L").replaceAll("ƒ", "f").replaceAll("ä", "a").replaceAll("–", "-").replaceAll("⁻", "-").replaceAll("〈", "<").replaceAll("〉", ">").replaceAll("χ", "X").replaceAll("Đ", "D").replaceAll("‰", "%").replaceAll("·", ".").replaceAll("→", ">").replaceAll("←", "<").replaceAll("ζ", "z").replaceAll("π", "p").replaceAll("τ", "t").replaceAll("ξ", "X").replaceAll("η", "h").replaceAll("ø", "0").replaceAll("Δ", "D").replaceAll("∆", "D").replaceAll("∑", "S").replaceAll("Ω", "O").replaceAll("δ", "d").replaceAll("σ", "s").replaceAll("Φ", "F").replaceAll("[^\\~\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\_\\+\\{\\}\\|\\:\"\\<\\>\\?\\`\\-\\=\\[\\]\\;\\'\\,\\.\\/\\r\\n0-9a-zA-Z ]", " "));
                } else {
                    arrayList2.add("- No text -");
                }
            } else if (readLine.contains("\t")) {
                String[] split2 = readLine.split("\t");
                if (split2.length == 6) {
                    arrayList3.add(split2[1] + "\t" + split2[2] + "\t" + split2[3] + "\t" + split2[4] + "\t" + split2[5]);
                } else if (split2.length == 5) {
                    arrayList3.add(split2[1] + "\t" + split2[2] + "\t" + split2[3] + "\t" + split2[4]);
                }
            } else if (readLine.length() == 0) {
                BioCDocument bioCDocument = new BioCDocument();
                bioCDocument.setID(str3);
                int i = 0;
                for (int i2 = 0; i2 < arrayList.size(); i2++) {
                    BioCPassage bioCPassage = new BioCPassage();
                    HashMap hashMap = new HashMap();
                    hashMap.put("type", (String) arrayList.get(i2));
                    bioCPassage.setInfons(hashMap);
                    bioCPassage.setText((String) arrayList2.get(i2));
                    bioCPassage.setOffset(i);
                    i = i + ((String) arrayList2.get(i2)).length() + 1;
                    for (int i3 = 0; i3 < arrayList3.size(); i3++) {
                        String[] split3 = ((String) arrayList3.get(i3)).split("\t");
                        if (Integer.parseInt(split3[0]) < i && Integer.parseInt(split3[0]) >= (i - ((String) arrayList2.get(i2)).length()) - 1) {
                            BioCAnnotation bioCAnnotation = new BioCAnnotation();
                            HashMap hashMap2 = new HashMap();
                            if (split3.length == 5) {
                                hashMap2.put("Identifier", split3[4]);
                            }
                            hashMap2.put("type", split3[3]);
                            bioCAnnotation.setInfons(hashMap2);
                            bioCAnnotation.setLocations(Set.of(new BioCLocation(Integer.parseInt(split3[0]), Integer.parseInt(split3[1]) - Integer.parseInt(split3[0]))));
                            bioCAnnotation.setText(split3[2]);
                            bioCPassage.addAnnotation(bioCAnnotation);
                        }
                    }
                    bioCDocument.addPassage(bioCPassage);
                }
                bioCCollection.addDocument(bioCDocument);
                arrayList.clear();
                arrayList2.clear();
                arrayList3.clear();
            }
        }
    }

    public void BioC2PubTator(String str, String str2) throws IOException, XMLStreamException {
        String str3;
        String str4;
        HashMap hashMap = new HashMap();
        boolean z = false;
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str2), "UTF-8"));
        new BioCCollection();
        Iterator documentIterator = new BioCCollectionReader(new InputStreamReader(new FileInputStream(str), "UTF-8")).readCollection().documentIterator();
        while (documentIterator.hasNext()) {
            BioCDocument bioCDocument = (BioCDocument) documentIterator.next();
            String id = bioCDocument.getID();
            if (hashMap.containsKey(id)) {
                System.out.println("\nError: duplicate pmid-" + id);
                z = true;
            } else {
                hashMap.put(id, "");
            }
            String str5 = "";
            for (BioCPassage bioCPassage : bioCDocument.getPassages()) {
                if (((String) bioCPassage.getInfon("type").get()).equals("title")) {
                    bufferedWriter.write(id + "|t|" + ((String) bioCPassage.getText().get()) + "\n");
                } else if (((String) bioCPassage.getInfon("type").get()).equals("abstract")) {
                    bufferedWriter.write(id + "|a|" + ((String) bioCPassage.getText().get()) + "\n");
                } else {
                    bufferedWriter.write(id + "|" + ((String) bioCPassage.getInfon("type").get()) + "|" + ((String) bioCPassage.getText().get()) + "\n");
                }
                for (BioCAnnotation bioCAnnotation : bioCPassage.getAnnotations()) {
                    String str6 = (String) bioCAnnotation.getInfon("type").get();
                    if (!str6.matches("(Gene|FamilyName|DomainMotif)")) {
                        str3 = (str6.equals("Species") || str6.equals("Genus") || str6.equals("Strain")) ? bioCAnnotation.getInfons().containsKey("NCBI Taxonomy") ? (String) bioCAnnotation.getInfon("NCBI Taxonomy").get() : (String) bioCAnnotation.getInfon("Identifier").get() : str6.equals("CellLine") ? bioCAnnotation.getInfons().containsKey("NCBI Taxonomy") ? (String) bioCAnnotation.getInfon("NCBI Taxonomy").get() : (String) bioCAnnotation.getInfon("Identifier").get() : (String) bioCAnnotation.getInfon("Identifier").get();
                    } else if (bioCAnnotation.getInfons().containsKey("NCBI Gene")) {
                        String[] split = ((String) bioCAnnotation.getInfon("NCBI Gene").get()).split(";");
                        str3 = "";
                        for (int i = 0; i < split.length; i++) {
                            String str7 = GNormPlus.Normalization2Protein_hash.containsKey(split[i]) ? GNormPlus.Normalization2Protein_hash.get(split[i]) : "";
                            String str8 = GNormPlus.HomologeneID_hash.containsKey(split[i]) ? GNormPlus.HomologeneID_hash.get(split[i]) : "";
                            if (str7.equals("") && str8.equals("")) {
                                str4 = str3.equals("") ? split[i] : str3 + ";" + split[i];
                            } else if (str3.equals("")) {
                                String str9 = split[i] + "(";
                                if (!str7.equals("")) {
                                    str9 = str9 + "UniProt:" + str7;
                                }
                                if (!str8.equals("")) {
                                    if (!str7.equals("")) {
                                        str9 = str9 + ";";
                                    }
                                    str9 = str9 + "Homoid:" + str8;
                                }
                                str4 = str9 + ")";
                            } else {
                                String str10 = str3 + ";" + split[i] + "(";
                                if (!str7.equals("")) {
                                    str10 = str10 + "UniProt:" + str7;
                                }
                                if (!str8.equals("")) {
                                    if (!str7.equals("")) {
                                        str10 = str10 + ";";
                                    }
                                    str10 = str10 + "Homoid:" + str8;
                                }
                                str4 = str10 + ")";
                            }
                            str3 = str4;
                        }
                    } else {
                        str3 = (String) bioCAnnotation.getInfon("Identifier").orElse(null);
                    }
                    int offset = bioCAnnotation.getTotalLocation().getOffset();
                    str5 = str5 + id + "\t" + offset + "\t" + (offset + bioCAnnotation.getTotalLocation().getLength()) + "\t" + ((String) bioCAnnotation.getText().get()) + "\t" + str6 + "\t" + str3 + "\n";
                }
            }
            bufferedWriter.write(str5 + "\n");
        }
        bufferedWriter.close();
        if (z) {
            System.exit(0);
        }
    }

    public void BioC2PubTator(String str, String str2, String str3) throws IOException, XMLStreamException {
        String str4;
        String str5;
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), "UTF-8"));
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        String str6 = "";
        int i = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            if (readLine.contains("|") && !readLine.contains("\t")) {
                String[] split = readLine.split("\\|", -1);
                str6 = split[0];
                hashMap.put(str6 + "\t" + split[1], split[2]);
                i++;
            } else if (readLine.contains("\t")) {
                hashMap2.put(str6, ((String) hashMap2.get(str6)) + readLine);
            } else if (readLine.length() == 0) {
                i = 0;
            }
        }
        bufferedReader.close();
        HashMap hashMap3 = new HashMap();
        boolean z = false;
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str3), "UTF-8"));
        new BioCCollection();
        Iterator documentIterator = new BioCCollectionReader(new InputStreamReader(new FileInputStream(str2), "UTF-8")).readCollection().documentIterator();
        while (documentIterator.hasNext()) {
            BioCDocument bioCDocument = (BioCDocument) documentIterator.next();
            String id = bioCDocument.getID();
            if (hashMap3.containsKey(id)) {
                System.out.println("\nError: duplicate pmid-" + id);
                z = true;
            } else {
                hashMap3.put(id, "");
            }
            String str7 = "";
            for (BioCPassage bioCPassage : bioCDocument.getPassages()) {
                if (((String) bioCPassage.getInfon("type").get()).equals("title") || ((String) bioCPassage.getInfon("type").get()).equals("t")) {
                    bufferedWriter.write(id + "|t|" + ((String) hashMap.get(id + "\tt")) + "\n");
                } else if (((String) bioCPassage.getInfon("type").get()).equals("abstract") || ((String) bioCPassage.getInfon("type").get()).equals("a")) {
                    bufferedWriter.write(id + "|a|" + ((String) hashMap.get(id + "\ta")) + "\n");
                } else {
                    bufferedWriter.write(id + "|" + ((String) bioCPassage.getInfon("type").get()) + "|" + ((String) bioCPassage.getText().get()) + "\n");
                }
                for (BioCAnnotation bioCAnnotation : bioCPassage.getAnnotations()) {
                    String str8 = (String) bioCAnnotation.getInfon("type").get();
                    if (!str8.matches("(Gene|FamilyName|DomainMotif)")) {
                        str4 = (str8.equals("Species") || str8.equals("Genus") || str8.equals("Strain")) ? bioCAnnotation.getInfons().containsKey("NCBI Taxonomy") ? (String) bioCAnnotation.getInfon("NCBI Taxonomy").get() : (String) bioCAnnotation.getInfon("Identifier").get() : str8.equals("CellLine") ? bioCAnnotation.getInfons().containsKey("NCBI Taxonomy") ? (String) bioCAnnotation.getInfon("NCBI Taxonomy").get() : (String) bioCAnnotation.getInfon("Identifier").get() : bioCAnnotation.getInfons().containsKey("Identifier") ? (String) bioCAnnotation.getInfon("Identifier").get() : "";
                    } else if (bioCAnnotation.getInfons().containsKey("NCBI Gene")) {
                        String[] split2 = ((String) bioCAnnotation.getInfon("NCBI Gene").get()).split(";");
                        str4 = "";
                        for (int i2 = 0; i2 < split2.length; i2++) {
                            String str9 = GNormPlus.Normalization2Protein_hash.containsKey(split2[i2]) ? GNormPlus.Normalization2Protein_hash.get(split2[i2]) : "";
                            String str10 = GNormPlus.HomologeneID_hash.containsKey(split2[i2]) ? GNormPlus.HomologeneID_hash.get(split2[i2]) : "";
                            if (str9.equals("") && str10.equals("")) {
                                str5 = str4.equals("") ? split2[i2] : str4 + ";" + split2[i2];
                            } else if (str4.equals("")) {
                                String str11 = split2[i2] + "(";
                                if (!str9.equals("")) {
                                    str11 = str11 + "UniProt:" + str9;
                                }
                                if (!str10.equals("")) {
                                    if (!str9.equals("")) {
                                        str11 = str11 + ";";
                                    }
                                    str11 = str11 + "Homoid:" + str10;
                                }
                                str5 = str11 + ")";
                            } else {
                                String str12 = str4 + ";" + split2[i2] + "(";
                                if (!str9.equals("")) {
                                    str12 = str12 + "UniProt:" + str9;
                                }
                                if (!str10.equals("")) {
                                    if (!str9.equals("")) {
                                        str12 = str12 + ";";
                                    }
                                    str12 = str12 + "Homoid:" + str10;
                                }
                                str5 = str12 + ")";
                            }
                            str4 = str5;
                        }
                    } else {
                        str4 = (String) bioCAnnotation.getInfon("Identifier").get();
                    }
                    int offset = bioCAnnotation.getTotalLocation().getOffset();
                    int length = offset + bioCAnnotation.getTotalLocation().getLength();
                    String str13 = (String) bioCAnnotation.getText().get();
                    str7 = (str4 == null || str4.equals(null)) ? str7 + id + "\t" + offset + "\t" + length + "\t" + str13 + "\t" + str8 + "\n" : str7 + id + "\t" + offset + "\t" + length + "\t" + str13 + "\t" + str8 + "\t" + str4 + "\n";
                }
            }
            bufferedWriter.write(str7 + "\n");
        }
        bufferedWriter.close();
        if (z) {
            System.exit(0);
        }
    }

    public void BioCReader(String str) throws IOException, XMLStreamException {
        new BioCCollection();
        Iterator documentIterator = new BioCCollectionReader(new InputStreamReader(new FileInputStream(str), "UTF-8")).readCollection().documentIterator();
        while (documentIterator.hasNext()) {
            BioCDocument bioCDocument = (BioCDocument) documentIterator.next();
            this.PMIDs.add(bioCDocument.getID());
            ArrayList<String> arrayList = new ArrayList<>();
            ArrayList<Integer> arrayList2 = new ArrayList<>();
            ArrayList<String> arrayList3 = new ArrayList<>();
            ArrayList<ArrayList<String>> arrayList4 = new ArrayList<>();
            for (BioCPassage bioCPassage : bioCDocument.getPassages()) {
                arrayList.add((String) bioCPassage.getInfon("type").get());
                String str2 = (String) bioCPassage.getText().get();
                String replaceAll = str2.matches("[\t ]+") ? str2.replaceAll(".", "@") : str2.replaceAll("ω", "w").replaceAll("μ", "u").replaceAll("κ", "k").replaceAll("α", "a").replaceAll("γ", "g").replaceAll("ɣ", "g").replaceAll("β", "b").replaceAll("×", "x").replaceAll("‑", "-").replaceAll("¹", "1").replaceAll("²", "2").replaceAll("°", "o").replaceAll("ö", "o").replaceAll("é", "e").replaceAll("à", "a").replaceAll("Á", "A").replaceAll("ε", "e").replaceAll("θ", "O").replaceAll("•", ".").replaceAll("µ", "u").replaceAll("λ", "r").replaceAll("⁺", "+").replaceAll("ν", "v").replaceAll("ï", "i").replaceAll("ã", "a").replaceAll("≡", "=").replaceAll("ó", "o").replaceAll("³", "3").replaceAll("〖", "[").replaceAll("〗", "]").replaceAll("Å", "A").replaceAll("ρ", "p").replaceAll("ü", "u").replaceAll("ɛ", "e").replaceAll("č", "c").replaceAll("š", "s").replaceAll("ß", "b").replaceAll("═", "=").replaceAll("£", "L").replaceAll("Ł", "L").replaceAll("ƒ", "f").replaceAll("ä", "a").replaceAll("–", "-").replaceAll("⁻", "-").replaceAll("〈", "<").replaceAll("〉", ">").replaceAll("χ", "X").replaceAll("Đ", "D").replaceAll("‰", "%").replaceAll("·", ".").replaceAll("→", ">").replaceAll("←", "<").replaceAll("ζ", "z").replaceAll("π", "p").replaceAll("τ", "t").replaceAll("ξ", "X").replaceAll("η", "h").replaceAll("ø", "0").replaceAll("Δ", "D").replaceAll("∆", "D").replaceAll("∑", "S").replaceAll("Ω", "O").replaceAll("δ", "d").replaceAll("σ", "s").replaceAll("Φ", "F");
                if (((String) bioCPassage.getText().get()).equals("") || ((String) bioCPassage.getText().get()).matches("[ ]+")) {
                    arrayList3.add("-notext-");
                } else {
                    arrayList3.add(replaceAll);
                }
                arrayList2.add(Integer.valueOf(bioCPassage.getOffset()));
                arrayList4.add(new ArrayList<>());
            }
            this.PassageNames.add(arrayList);
            this.PassageContexts.add(arrayList3);
            this.PassageOffsets.add(arrayList2);
            this.Annotations.add(arrayList4);
        }
    }

    public void BioCReaderWithAnnotation(String str) throws IOException, XMLStreamException {
        new BioCCollection();
        Iterator documentIterator = new BioCCollectionReader(new InputStreamReader(new FileInputStream(str), "UTF-8")).readCollection().documentIterator();
        while (documentIterator.hasNext()) {
            BioCDocument bioCDocument = (BioCDocument) documentIterator.next();
            this.PMIDs.add(bioCDocument.getID());
            ArrayList<String> arrayList = new ArrayList<>();
            ArrayList<Integer> arrayList2 = new ArrayList<>();
            ArrayList<String> arrayList3 = new ArrayList<>();
            ArrayList<ArrayList<String>> arrayList4 = new ArrayList<>();
            for (BioCPassage bioCPassage : bioCDocument.getPassages()) {
                arrayList.add((String) bioCPassage.getInfon("type").get());
                String str2 = (String) bioCPassage.getText().get();
                String replaceAll = str2.matches("[\t ]+") ? str2.replaceAll(".", "@") : str2.replaceAll("ω", "w").replaceAll("μ", "u").replaceAll("κ", "k").replaceAll("α", "a").replaceAll("γ", "g").replaceAll("ɣ", "g").replaceAll("β", "b").replaceAll("×", "x").replaceAll("‑", "-").replaceAll("¹", "1").replaceAll("²", "2").replaceAll("°", "o").replaceAll("ö", "o").replaceAll("é", "e").replaceAll("à", "a").replaceAll("Á", "A").replaceAll("ε", "e").replaceAll("θ", "O").replaceAll("•", ".").replaceAll("µ", "u").replaceAll("λ", "r").replaceAll("⁺", "+").replaceAll("ν", "v").replaceAll("ï", "i").replaceAll("ã", "a").replaceAll("≡", "=").replaceAll("ó", "o").replaceAll("³", "3").replaceAll("〖", "[").replaceAll("〗", "]").replaceAll("Å", "A").replaceAll("ρ", "p").replaceAll("ü", "u").replaceAll("ɛ", "e").replaceAll("č", "c").replaceAll("š", "s").replaceAll("ß", "b").replaceAll("═", "=").replaceAll("£", "L").replaceAll("Ł", "L").replaceAll("ƒ", "f").replaceAll("ä", "a").replaceAll("–", "-").replaceAll("⁻", "-").replaceAll("〈", "<").replaceAll("〉", ">").replaceAll("χ", "X").replaceAll("Đ", "D").replaceAll("‰", "%").replaceAll("·", ".").replaceAll("→", ">").replaceAll("←", "<").replaceAll("ζ", "z").replaceAll("π", "p").replaceAll("τ", "t").replaceAll("ξ", "X").replaceAll("η", "h").replaceAll("ø", "0").replaceAll("Δ", "D").replaceAll("∆", "D").replaceAll("∑", "S").replaceAll("Ω", "O").replaceAll("δ", "d").replaceAll("σ", "s").replaceAll("Φ", "F");
                if (((String) bioCPassage.getText().get()).equals("") || ((String) bioCPassage.getText().get()).matches("[ ]+")) {
                    arrayList3.add("-notext-");
                } else {
                    arrayList3.add(replaceAll);
                }
                arrayList2.add(Integer.valueOf(bioCPassage.getOffset()));
                ArrayList<String> arrayList5 = new ArrayList<>();
                for (BioCAnnotation bioCAnnotation : bioCPassage.getAnnotations()) {
                    int offset = bioCAnnotation.getTotalLocation().getOffset() - bioCPassage.getOffset();
                    int length = offset + bioCAnnotation.getTotalLocation().getLength();
                    String str3 = (String) bioCAnnotation.getText().get();
                    String str4 = (String) bioCAnnotation.getInfon("type").get();
                    String str5 = (String) bioCAnnotation.getInfon("Identifier").orElse(null);
                    if (str5 == null) {
                        str5 = (String) bioCAnnotation.getInfon("Identifier").orElse(null);
                    }
                    if (str5 == null || str5.equals("null")) {
                        arrayList5.add(offset + "\t" + length + "\t" + str3 + "\t" + str4);
                    } else {
                        arrayList5.add(offset + "\t" + length + "\t" + str3 + "\t" + str4 + "\t" + str5);
                    }
                }
                arrayList4.add(arrayList5);
            }
            this.PassageNames.add(arrayList);
            this.PassageContexts.add(arrayList3);
            this.PassageOffsets.add(arrayList2);
            this.Annotations.add(arrayList4);
        }
    }

    public void BioCOutput(String str, String str2, ArrayList<ArrayList<ArrayList<String>>> arrayList, boolean z, boolean z2) throws IOException, XMLStreamException {
        boolean z3 = false;
        if (GNormPlus.setup_hash.containsKey("ShowUnNormalizedMention") && GNormPlus.setup_hash.get("ShowUnNormalizedMention").equals("True")) {
            z3 = true;
        }
        BioCDocumentWriter bioCDocumentWriter = new BioCDocumentWriter(new OutputStreamWriter(new FileOutputStream(str2), "UTF-8"));
        new BioCCollection();
        BioCCollection bioCCollection = new BioCCollection();
        BioCCollection readCollection = new BioCCollectionReader(new InputStreamReader(new FileInputStream(str), "UTF-8")).readCollection();
        bioCDocumentWriter.writeBeginCollectionInfo(readCollection);
        int i = 0;
        Iterator documentIterator = readCollection.documentIterator();
        while (documentIterator.hasNext()) {
            BioCDocument bioCDocument = new BioCDocument();
            BioCDocument bioCDocument2 = (BioCDocument) documentIterator.next();
            String id = bioCDocument2.getID();
            bioCDocument.setID(id);
            int i2 = 0;
            int i3 = 0;
            for (BioCPassage bioCPassage : bioCDocument2.getPassages()) {
                if (z2) {
                    bioCPassage.clearAnnotations();
                } else {
                    Iterator it = bioCPassage.getAnnotations().iterator();
                    while (it.hasNext()) {
                        ((BioCAnnotation) it.next()).setID(i2);
                        i2++;
                    }
                }
                int offset = bioCPassage.getOffset();
                String str3 = (String) bioCPassage.getText().get();
                ArrayList arrayList2 = new ArrayList();
                if (arrayList.size() > i && arrayList.get(i).size() > i3) {
                    for (int i4 = 0; i4 < arrayList.get(i).get(i3).size(); i4++) {
                        String[] split = arrayList.get(i).get(i3).get(i4).split("\\t");
                        int parseInt = Integer.parseInt(split[0]);
                        int parseInt2 = Integer.parseInt(split[1]);
                        boolean z4 = false;
                        if (str3.length() > parseInt2) {
                            String str4 = split[2];
                            if (z && str3.length() >= parseInt2) {
                                str4 = str3.substring(parseInt, parseInt2);
                            }
                            if (str4.matches(".*\t.*")) {
                                split[3] = split[4];
                                if (split.length >= 6) {
                                    split[4] = split[5];
                                }
                            }
                            String str5 = split[3];
                            String str6 = split.length >= 5 ? split[4] : "";
                            if (z) {
                                int i5 = 0;
                                while (true) {
                                    if (i5 >= arrayList2.size()) {
                                        break;
                                    }
                                    String[] split2 = ((String) arrayList2.get(i5)).split("\\t");
                                    int parseInt3 = Integer.parseInt(split2[0]);
                                    int parseInt4 = Integer.parseInt(split2[1]);
                                    String str7 = split2[2];
                                    if (z && str3.length() >= parseInt4) {
                                        str7 = str3.substring(parseInt3, parseInt4);
                                    }
                                    if (str7.matches(".*\t.*")) {
                                        split2[3] = split2[4];
                                        if (split2.length >= 6) {
                                            split2[4] = split2[5];
                                        }
                                    }
                                    String str8 = split2[3];
                                    String str9 = split2.length >= 5 ? split2[4] : "";
                                    if (parseInt == parseInt3 && parseInt2 == parseInt4 && str5.equals(str8)) {
                                        z4 = true;
                                        if (!str6.matches("(Focus|Right|Left|Prefix|GeneID|Tax):[0-9]+") || str9.equals("")) {
                                            if (str9.matches("(Focus|Right|Left|Prefix|GeneID|Tax):[0-9]+") && !str6.matches("(Focus|Right|Left|Prefix|GeneID|Tax):[0-9]+") && !str6.equals("")) {
                                                arrayList2.set(i5, parseInt + "\t" + parseInt2 + "\t" + str4 + "\t" + str5 + "\t" + str6);
                                            } else if (!str6.equals("")) {
                                                arrayList2.set(i5, parseInt + "\t" + parseInt2 + "\t" + str4 + "\t" + str5 + "\t" + str9 + ";" + str6);
                                            }
                                        }
                                    } else {
                                        i5++;
                                    }
                                }
                            }
                        }
                        if (!z4) {
                            arrayList2.add(arrayList.get(i).get(i3).get(i4));
                        }
                    }
                }
                for (int i6 = 0; i6 < arrayList2.size(); i6++) {
                    String[] split3 = ((String) arrayList2.get(i6)).split("\\t");
                    HashMap hashMap = new HashMap();
                    if (split3.length >= 5) {
                        int parseInt5 = Integer.parseInt(split3[0]);
                        int parseInt6 = Integer.parseInt(split3[1]);
                        String str10 = split3[2];
                        if (z && str3.length() >= parseInt6) {
                            str10 = str3.substring(parseInt5, parseInt6);
                        }
                        if (str10.matches(".*\t.*")) {
                            split3[3] = split3[4];
                            if (split3.length >= 6) {
                                split3[4] = split3[5];
                            }
                        }
                        for (String str11 : split3[4].split(",")) {
                            hashMap.put(str11, "");
                        }
                        String str12 = "";
                        for (String str13 : hashMap.keySet()) {
                            str12 = str12.equals("") ? str13 : str12 + ";" + str13;
                        }
                        arrayList2.set(i6, split3[0] + "\t" + split3[1] + "\t" + split3[2] + "\t" + split3[3] + "\t" + str12);
                    }
                }
                for (int i7 = 0; i7 < arrayList2.size(); i7++) {
                    String[] split4 = ((String) arrayList2.get(i7)).split("\\t");
                    int parseInt7 = Integer.parseInt(split4[0]);
                    int parseInt8 = Integer.parseInt(split4[1]);
                    if (str3.length() > parseInt8) {
                        String str14 = split4[2];
                        if (z && str3.length() >= parseInt8) {
                            str14 = str3.substring(parseInt7, parseInt8);
                        }
                        if (str14.matches(".*\t.*")) {
                            split4[3] = split4[4];
                            if (split4.length >= 6) {
                                split4[4] = split4[5];
                            }
                        }
                        if (split4.length < 4) {
                            throw new IllegalStateException("Document with ID " + id + " has annotation \"" + Arrays.toString(split4) + "\" which is too short: A minimal length of 4 is expected.");
                        }
                        String str15 = split4[3];
                        if (str15.equals("GeneID")) {
                            str15 = "Gene";
                        }
                        BioCAnnotation bioCAnnotation = new BioCAnnotation();
                        HashMap hashMap2 = new HashMap();
                        hashMap2.put("type", str15);
                        if (split4.length >= 5) {
                            String str16 = split4[4];
                            if (!z || z3) {
                                hashMap2.put("Identifier", str16);
                            } else if (str15.matches("(FamilyName|Domain|Gene)")) {
                                Matcher matcher = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)\\|([0-9\\;]+)$").matcher(str16);
                                Matcher matcher2 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)\\|([0-9]+)\\-([0-9]+)$").matcher(str16);
                                Matcher matcher3 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)$").matcher(str16);
                                Matcher matcher4 = Pattern.compile("^Homo\\:([0-9]+)$").matcher(str16);
                                if (matcher.find()) {
                                    matcher.group(1);
                                    matcher.group(2);
                                    String group = matcher.group(3);
                                    if (GNormPlus.Normalization2Protein_hash.containsKey(group)) {
                                        hashMap2.put("UniProt", GNormPlus.Normalization2Protein_hash.get(group));
                                    }
                                    if (GNormPlus.HomologeneID_hash.containsKey(group)) {
                                        hashMap2.put("NCBI Homologene", GNormPlus.HomologeneID_hash.get(group));
                                    }
                                    hashMap2.put("NCBI Gene", group);
                                } else if (matcher2.find()) {
                                    matcher2.group(1);
                                    matcher2.group(2);
                                    String group2 = matcher2.group(3);
                                    matcher2.group(4);
                                    if (GNormPlus.Normalization2Protein_hash.containsKey(group2)) {
                                        hashMap2.put("UniProt", GNormPlus.Normalization2Protein_hash.get(group2));
                                    }
                                    if (GNormPlus.HomologeneID_hash.containsKey(group2)) {
                                        hashMap2.put("NCBI Homologene", GNormPlus.HomologeneID_hash.get(group2));
                                    }
                                    hashMap2.put("NCBI Gene", group2);
                                } else if (matcher3.find()) {
                                    matcher3.group(1);
                                    hashMap2.put("FocusSpecies", "NCBITaxonomyID:" + matcher3.group(2));
                                } else if (matcher4.find()) {
                                    matcher4.group(1);
                                    hashMap2.put("NCBI Homologene", matcher4.group(2));
                                } else {
                                    String[] split5 = str16.split(";");
                                    if (split5.length > 1) {
                                        ArrayList arrayList3 = new ArrayList();
                                        ArrayList arrayList4 = new ArrayList();
                                        ArrayList arrayList5 = new ArrayList();
                                        for (int i8 = 0; i8 < split5.length; i8++) {
                                            Matcher matcher5 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)\\|([0-9]+)\\-([0-9]+)$").matcher(split5[i8]);
                                            Matcher matcher6 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)\\|([0-9\\;]+)$").matcher(split5[i8]);
                                            if (matcher5.find()) {
                                                matcher5.group(1);
                                                matcher5.group(2);
                                                String group3 = matcher5.group(3);
                                                matcher5.group(4);
                                                if (!arrayList3.contains(group3)) {
                                                    arrayList3.add(group3);
                                                }
                                                if (GNormPlus.Normalization2Protein_hash.containsKey(group3) && !arrayList4.contains(Boolean.valueOf(GNormPlus.Normalization2Protein_hash.containsKey(group3)))) {
                                                    arrayList4.add(GNormPlus.Normalization2Protein_hash.get(group3));
                                                }
                                                if (GNormPlus.HomologeneID_hash.containsKey(group3) && !arrayList5.contains(Boolean.valueOf(GNormPlus.HomologeneID_hash.containsKey(group3)))) {
                                                    arrayList5.add(GNormPlus.HomologeneID_hash.get(group3));
                                                }
                                            } else if (matcher6.find()) {
                                                matcher6.group(1);
                                                matcher6.group(2);
                                                String group4 = matcher6.group(3);
                                                if (!arrayList3.contains(group4)) {
                                                    arrayList3.add(group4);
                                                }
                                            }
                                        }
                                        String str17 = "";
                                        for (int i9 = 0; i9 < arrayList3.size(); i9++) {
                                            str17 = str17.equals("") ? (String) arrayList3.get(i9) : str17 + ";" + ((String) arrayList3.get(i9));
                                        }
                                        hashMap2.put("NCBI Gene", str17);
                                        String str18 = "";
                                        for (int i10 = 0; i10 < arrayList4.size(); i10++) {
                                            str18 = str18.equals("") ? (String) arrayList4.get(i10) : str18 + ";" + ((String) arrayList4.get(i10));
                                        }
                                        if (!str18.equals("")) {
                                            hashMap2.put("UniProt", str18);
                                        }
                                        String str19 = "";
                                        for (int i11 = 0; i11 < arrayList5.size(); i11++) {
                                            str19 = str19.equals("") ? (String) arrayList5.get(i11) : str19 + ";" + ((String) arrayList5.get(i11));
                                        }
                                        if (!str19.equals("")) {
                                            hashMap2.put("NCBI Homologene", str19);
                                        }
                                    }
                                }
                            } else if (str15.matches("(Species|Genus|Strain)")) {
                                hashMap2.put("type", str15);
                                hashMap2.put("NCBI Taxonomy", str16);
                            } else if (str15.matches("Cell")) {
                                hashMap2.put("type", "CellLine");
                                hashMap2.put("NCBI Taxonomy", str16);
                            } else {
                                hashMap2.put("Identifier", str16);
                            }
                        }
                        bioCAnnotation.setInfons(hashMap2);
                        bioCAnnotation.setLocations(Set.of(new BioCLocation(parseInt7 + offset, parseInt8 - parseInt7)));
                        bioCAnnotation.setText(str14);
                        bioCAnnotation.setID(i2);
                        i2++;
                        if (!z) {
                            bioCPassage.addAnnotation(bioCAnnotation);
                        } else if (hashMap2.containsKey("Identifier") || hashMap2.containsKey("NCBI Homologene") || hashMap2.containsKey("NCBI Gene") || hashMap2.containsKey("NCBI Taxonomy") || str15.equals("FamilyName")) {
                            bioCPassage.addAnnotation(bioCAnnotation);
                        }
                    }
                }
                bioCDocument.addPassage(bioCPassage);
                i3++;
            }
            bioCCollection.addDocument(bioCDocument);
            bioCDocumentWriter.writeDocument(bioCDocument);
            i++;
        }
        bioCDocumentWriter.close();
    }
}
