package org.apache.solr.update.processor;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Locale;
import org.apache.hadoop.security.HttpCrossOriginFilterInitializer;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/solr-core-6.4.1.jar:org/apache/solr/update/processor/URLClassifyProcessor.class */
public class URLClassifyProcessor extends UpdateRequestProcessor {
    private static final String INPUT_FIELD_PARAM = "inputField";
    private static final String OUTPUT_LENGTH_FIELD_PARAM = "lengthOutputField";
    private static final String OUTPUT_LEVELS_FIELD_PARAM = "levelsOutputField";
    private static final String OUTPUT_TOPLEVEL_FIELD_PARAM = "toplevelOutputField";
    private static final String OUTPUT_LANDINGPAGE_FIELD_PARAM = "landingpageOutputField";
    private static final String OUTPUT_DOMAIN_FIELD_PARAM = "domainOutputField";
    private static final String OUTPUT_CANONICALURL_FIELD_PARAM = "canonicalUrlOutputField";
    private static final String DEFAULT_URL_FIELDNAME = "url";
    private static final String DEFAULT_LENGTH_FIELDNAME = "url_length";
    private static final String DEFAULT_LEVELS_FIELDNAME = "url_levels";
    private static final String DEFAULT_TOPLEVEL_FIELDNAME = "url_toplevel";
    private static final String DEFAULT_LANDINGPAGE_FIELDNAME = "url_landingpage";
    private boolean enabled;
    private String urlFieldname;
    private String lengthFieldname;
    private String levelsFieldname;
    private String toplevelpageFieldname;
    private String landingpageFieldname;
    private String domainFieldname;
    private String canonicalUrlFieldname;
    private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private static final String[] landingPageSuffixes = {"/", "index.html", "index.htm", "index.phtml", "index.shtml", "index.xml", "index.php", "index.asp", "index.aspx", "welcome.html", "welcome.htm", "welcome.phtml", "welcome.shtml", "welcome.xml", "welcome.php", "welcome.asp", "welcome.aspx"};

    public URLClassifyProcessor(SolrParams solrParams, SolrQueryRequest solrQueryRequest, SolrQueryResponse solrQueryResponse, UpdateRequestProcessor updateRequestProcessor) {
        super(updateRequestProcessor);
        this.enabled = true;
        this.urlFieldname = "url";
        this.lengthFieldname = DEFAULT_LENGTH_FIELDNAME;
        this.levelsFieldname = DEFAULT_LEVELS_FIELDNAME;
        this.toplevelpageFieldname = DEFAULT_TOPLEVEL_FIELDNAME;
        this.landingpageFieldname = DEFAULT_LANDINGPAGE_FIELDNAME;
        this.domainFieldname = null;
        this.canonicalUrlFieldname = null;
        initParameters(solrParams);
    }

    private void initParameters(SolrParams solrParams) {
        if (solrParams != null) {
            setEnabled(solrParams.getBool(HttpCrossOriginFilterInitializer.ENABLED_SUFFIX, true));
            this.urlFieldname = solrParams.get(INPUT_FIELD_PARAM, "url");
            this.lengthFieldname = solrParams.get(OUTPUT_LENGTH_FIELD_PARAM, DEFAULT_LENGTH_FIELDNAME);
            this.levelsFieldname = solrParams.get(OUTPUT_LEVELS_FIELD_PARAM, DEFAULT_LEVELS_FIELDNAME);
            this.toplevelpageFieldname = solrParams.get(OUTPUT_TOPLEVEL_FIELD_PARAM, DEFAULT_TOPLEVEL_FIELDNAME);
            this.landingpageFieldname = solrParams.get(OUTPUT_LANDINGPAGE_FIELD_PARAM, DEFAULT_LANDINGPAGE_FIELDNAME);
            this.domainFieldname = solrParams.get(OUTPUT_DOMAIN_FIELD_PARAM);
            this.canonicalUrlFieldname = solrParams.get(OUTPUT_CANONICALURL_FIELD_PARAM);
        }
    }

    @Override // org.apache.solr.update.processor.UpdateRequestProcessor
    public void processAdd(AddUpdateCommand addUpdateCommand) throws IOException {
        if (isEnabled()) {
            SolrInputDocument solrInputDocument = addUpdateCommand.getSolrInputDocument();
            if (solrInputDocument.containsKey(this.urlFieldname)) {
                String str = (String) solrInputDocument.getFieldValue(this.urlFieldname);
                try {
                    URL normalizedURL = getNormalizedURL(str);
                    solrInputDocument.setField(this.lengthFieldname, Integer.valueOf(length(normalizedURL)));
                    solrInputDocument.setField(this.levelsFieldname, Integer.valueOf(levels(normalizedURL)));
                    solrInputDocument.setField(this.toplevelpageFieldname, Integer.valueOf(isTopLevelPage(normalizedURL) ? 1 : 0));
                    solrInputDocument.setField(this.landingpageFieldname, Integer.valueOf(isLandingPage(normalizedURL) ? 1 : 0));
                    if (this.domainFieldname != null) {
                        solrInputDocument.setField(this.domainFieldname, normalizedURL.getHost());
                    }
                    if (this.canonicalUrlFieldname != null) {
                        solrInputDocument.setField(this.canonicalUrlFieldname, getCanonicalUrl(normalizedURL));
                    }
                    log.debug(solrInputDocument.toString());
                } catch (MalformedURLException | URISyntaxException e) {
                    log.warn("cannot get the normalized url for \"" + str + "\" due to " + e.getMessage());
                }
            }
        }
        super.processAdd(addUpdateCommand);
    }

    public URL getCanonicalUrl(URL url) {
        try {
            return new URL(url.toString().replaceFirst("/" + landingPageSuffix(url) + "$", "/"));
        } catch (MalformedURLException e) {
            e.printStackTrace();
            return url;
        }
    }

    public int length(URL url) {
        return url.toString().length();
    }

    public int levels(URL url) {
        String replaceAll = getPathWithoutSuffix(url).replaceAll("/+$", "");
        int i = 0;
        for (int i2 = 0; i2 < replaceAll.length(); i2++) {
            if (replaceAll.charAt(i2) == '/') {
                i++;
            }
        }
        return i;
    }

    public boolean isTopLevelPage(URL url) {
        return getPathWithoutSuffix(url).replaceAll("/+$", "").length() == 0 && url.getQuery() == null;
    }

    public boolean isLandingPage(URL url) {
        return url.getQuery() == null && landingPageSuffix(url) != "";
    }

    public URL getNormalizedURL(String str) throws MalformedURLException, URISyntaxException {
        return new URI(str).normalize().toURL();
    }

    public boolean isEnabled() {
        return this.enabled;
    }

    public void setEnabled(boolean z) {
        this.enabled = z;
    }

    private String landingPageSuffix(URL url) {
        String lowerCase = url.getPath().toLowerCase(Locale.ROOT);
        for (String str : landingPageSuffixes) {
            if (lowerCase.endsWith(str)) {
                return str;
            }
        }
        return "";
    }

    private String getPathWithoutSuffix(URL url) {
        return url.getPath().toLowerCase(Locale.ROOT).replaceFirst(landingPageSuffix(url) + "$", "");
    }
}
