package javapersianutils.core.normalizer;

import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.util.regex.Pattern;
import javapersianutils.core.validators.StringUtil;
import org.apache.commons.lang3.StringEscapeUtils;

/* loaded from: input_file:javapersianutils/core/normalizer/AggressiveEditing.class */
public class AggressiveEditing {
    private static final Pattern _matchCleanupExtraMarks1 = Pattern.compile("(!){2,}", 2);
    private static final Pattern _matchCleanupExtraMarks2 = Pattern.compile("(؟){2,}", 2);
    private static final Pattern _matchCleanupSpacingAndLineBreaks1 = Pattern.compile("[ ]+", 2);
    private static final Pattern _matchCleanupSpacingAndLineBreaks2 = Pattern.compile("([\n]+)[   \u200c]*", 2);
    private static final Pattern _matchRemoveAllKashida = Pattern.compile("ـ+", 2);
    private static final Pattern _matchRemoveOutsideInsideSpacing1 = Pattern.compile("[   \u200c]*(\\()\\s*([^)]+?)\\s*?(\\))[   \u200c]*", 2);
    private static final Pattern _matchRemoveOutsideInsideSpacing10 = Pattern.compile("(\\{)\\s*([^)]+?)\\s*?(\\})", 2);
    private static final Pattern _matchRemoveOutsideInsideSpacing11 = Pattern.compile("(“)\\s*([^)]+?)\\s*?(”)", 2);
    private static final Pattern _matchRemoveOutsideInsideSpacing12 = Pattern.compile("(«)\\s*([^)]+?)\\s*?(»)", 2);
    private static final Pattern _matchRemoveOutsideInsideSpacing2 = Pattern.compile("[   \u200c]*(\\[)\\s*([^)]+?)\\s*?(\\])[   \u200c]*", 2);
    private static final Pattern _matchRemoveOutsideInsideSpacing3 = Pattern.compile("[   \u200c]*(\\{)\\s*([^)]+?)\\s*?(\\})[   \u200c]*", 2);
    private static final Pattern _matchRemoveOutsideInsideSpacing4 = Pattern.compile("[   \u200c]*(“)\\s*([^)]+?)\\s*?(”)[   \u200c]*", 2);
    private static final Pattern _matchRemoveOutsideInsideSpacing5 = Pattern.compile("[   \u200c]*(«)\\s*([^)]+?)\\s*?(»)[   \u200c]*", 2);
    private static final Pattern _matchRemoveOutsideInsideSpacing6 = Pattern.compile("[ ?  ]*([:;,??.?!]{1})[ ?  ]*", 2);
    private static final Pattern _matchRemoveOutsideInsideSpacing7 = Pattern.compile("([0-9]+):\\s+([0-9]+)", 2);
    private static final Pattern _matchRemoveOutsideInsideSpacing8 = Pattern.compile("(\\()\\s*([^)]+?)\\s*?(\\))", 2);
    private static final Pattern _matchRemoveOutsideInsideSpacing9 = Pattern.compile("(\\[)\\s*([^)]+?)\\s*?(\\])", 2);
    private static final Pattern _matchHexadecimalSymbols = Pattern.compile("[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]", 2);

    private AggressiveEditing() {
    }

    public static String normalizeAllKashida(String str) {
        return _matchRemoveAllKashida.matcher(str).replaceAll("");
    }

    public static String normalizeExtraMarks(String str) {
        return _matchCleanupExtraMarks2.matcher(_matchCleanupExtraMarks1.matcher(str).replaceAll("$1")).replaceAll("$1");
    }

    public static String normalizeOutsideInsideSpacing(String str) {
        return _matchRemoveOutsideInsideSpacing12.matcher(_matchRemoveOutsideInsideSpacing11.matcher(_matchRemoveOutsideInsideSpacing10.matcher(_matchRemoveOutsideInsideSpacing9.matcher(_matchRemoveOutsideInsideSpacing8.matcher(_matchRemoveOutsideInsideSpacing7.matcher(_matchRemoveOutsideInsideSpacing6.matcher(_matchRemoveOutsideInsideSpacing5.matcher(_matchRemoveOutsideInsideSpacing4.matcher(_matchRemoveOutsideInsideSpacing3.matcher(_matchRemoveOutsideInsideSpacing2.matcher(_matchRemoveOutsideInsideSpacing1.matcher(str).replaceAll(" $1$2$3 ")).replaceAll(" $1$2$3 ")).replaceAll(" $1$2$3 ")).replaceAll(" $1$2$3 ")).replaceAll(" $1$2$3 ")).replaceAll("$1 ")).replaceAll("$1:$2")).replaceAll("$1$2$3")).replaceAll("$1$2$3")).replaceAll("$1$2$3")).replaceAll("$1$2$3")).replaceAll("$1$2$3").trim();
    }

    public static String normalizeSpacingAndLineBreaks(String str) {
        return _matchCleanupSpacingAndLineBreaks2.matcher(_matchCleanupSpacingAndLineBreaks1.matcher(str).replaceAll(" ")).replaceAll("$1").trim();
    }

    public static String normalizeUnderLines(String str) {
        return StringUtil.isNullOrWhiteSpace(str) ? "" : str.replaceAll("ـ", "").replaceAll("\u200c", "");
    }

    public static String removeHexadecimalSymbols(String str) {
        return StringUtil.isNullOrWhiteSpace(str) ? "" : _matchHexadecimalSymbols.matcher(str).replaceAll("");
    }

    public static String convertArabic1256ToUtf8(String str) throws UnsupportedEncodingException {
        return new String(StringEscapeUtils.unescapeHtml4(str).getBytes(StandardCharsets.ISO_8859_1), "Windows-1256");
    }
}
