package edu.uci.ics.crawler4j.examples.crawler;

import edu.uci.ics.crawler4j.crawler.Page;
import edu.uci.ics.crawler4j.crawler.WebCrawler;
import edu.uci.ics.crawler4j.examples.db.PostgresDBService;
import edu.uci.ics.crawler4j.parser.HtmlParseData;
import edu.uci.ics.crawler4j.url.WebURL;
import java.util.Set;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:edu/uci/ics/crawler4j/examples/crawler/PostgresWebCrawler.class */
public class PostgresWebCrawler extends WebCrawler {
    private static final Logger logger = LoggerFactory.getLogger(PostgresWebCrawler.class);
    private static final Pattern FILE_ENDING_EXCLUSION_PATTERN = Pattern.compile(".*(\\.(css|js|bmp|gif|jpe?g|JPE?G|png|tiff?|ico|nef|raw|mid|mp2|mp3|mp4|wav|wma|flv|mpe?g|avi|mov|mpeg|ram|m4v|wmv|rm|smil|pdf|doc|docx|pub|xls|xlsx|vsd|ppt|pptx|swf|zip|rar|gz|bz2|7z|bin|xml|txt|java|c|cpp|exe))$");
    private final PostgresDBService postgresDBService;

    public PostgresWebCrawler(PostgresDBService postgresDBService) {
        this.postgresDBService = postgresDBService;
    }

    public boolean shouldVisit(Page page, WebURL webURL) {
        return !FILE_ENDING_EXCLUSION_PATTERN.matcher(webURL.getURL().toLowerCase()).matches();
    }

    public void visit(Page page) {
        logger.info("URL: " + page.getWebURL().getURL());
        if (page.getParseData() instanceof HtmlParseData) {
            HtmlParseData parseData = page.getParseData();
            String text = parseData.getText();
            String html = parseData.getHtml();
            Set outgoingUrls = parseData.getOutgoingUrls();
            logger.info("Text length: " + text.length());
            logger.info("Html length: " + html.length());
            logger.info("Number of outgoing links: " + outgoingUrls.size());
            try {
                this.postgresDBService.store(page);
            } catch (RuntimeException e) {
                logger.error("Storing failed", e);
            }
        }
    }

    public void onBeforeExit() {
        if (this.postgresDBService != null) {
            this.postgresDBService.close();
        }
    }
}
