package org.apache.tika.parser.html;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.config.Field;
import org.apache.tika.detect.AutoDetectReader;
import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractEncodingDetectorParser;
import org.apache.tika.parser.ParseContext;
import org.ccil.cowan.tagsoup.HTMLSchema;
import org.ccil.cowan.tagsoup.Parser;
import org.ccil.cowan.tagsoup.Schema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:WEB-INF/lib/tika-parser-html-module-2.9.1.jar:org/apache/tika/parser/html/HtmlParser.class */
public class HtmlParser extends AbstractEncodingDetectorParser {
    private static final long serialVersionUID = 7895315240498733128L;
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) HtmlParser.class);
    private static final MediaType XHTML = MediaType.application("xhtml+xml");
    private static final MediaType WAP_XHTML = MediaType.application("vnd.wap.xhtml+xml");
    private static final MediaType X_ASP = MediaType.application("x-asp");
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet(Arrays.asList(MediaType.text("html"), XHTML, WAP_XHTML, X_ASP)));
    private static final Schema HTML_SCHEMA = new HTMLSchema();

    @Field
    private boolean extractScripts;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/tika-parser-html-module-2.9.1.jar:org/apache/tika/parser/html/HtmlParser$HtmlParserMapper.class */
    public class HtmlParserMapper implements HtmlMapper {
        private HtmlParserMapper() {
        }

        @Override // org.apache.tika.parser.html.HtmlMapper
        public String mapSafeElement(String str) {
            return HtmlParser.this.mapSafeElement(str);
        }

        @Override // org.apache.tika.parser.html.HtmlMapper
        public boolean isDiscardElement(String str) {
            return HtmlParser.this.isDiscardElement(str);
        }

        @Override // org.apache.tika.parser.html.HtmlMapper
        public String mapSafeAttribute(String str, String str2) {
            return HtmlParser.this.mapSafeAttribute(str, str2);
        }
    }

    public HtmlParser() {
        this.extractScripts = false;
    }

    public HtmlParser(EncodingDetector encodingDetector) {
        super(encodingDetector);
        this.extractScripts = false;
    }

    @Override // org.apache.tika.parser.Parser
    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        TemporaryResources temporaryResources = null;
        try {
            if (!TikaInputStream.isTikaInputStream(inputStream)) {
                temporaryResources = new TemporaryResources();
                inputStream = TikaInputStream.get(inputStream, temporaryResources, metadata);
            }
            parseImpl(inputStream, contentHandler, metadata, parseContext);
            if (temporaryResources != null) {
                temporaryResources.close();
            }
        } catch (Throwable th) {
            if (temporaryResources != null) {
                temporaryResources.close();
            }
            throw th;
        }
    }

    private void parseImpl(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        AutoDetectReader autoDetectReader = new AutoDetectReader(new CloseShieldInputStream(inputStream), metadata, getEncodingDetector(parseContext));
        try {
            Charset charset = autoDetectReader.getCharset();
            String str = metadata.get("Content-Type");
            MediaType mediaType = null;
            if (str == null || str.startsWith("text/html")) {
                mediaType = new MediaType(MediaType.TEXT_HTML, charset);
            } else if (str.startsWith(org.springframework.http.MediaType.APPLICATION_XHTML_XML_VALUE)) {
                mediaType = new MediaType(XHTML, charset);
            } else if (str.startsWith("application/vnd.wap.xhtml+xml")) {
                mediaType = new MediaType(WAP_XHTML, charset);
            } else if (str.startsWith("application/x-asp")) {
                mediaType = new MediaType(X_ASP, charset);
            }
            if (mediaType != null) {
                metadata.set("Content-Type", mediaType.toString());
            }
            metadata.set("Content-Encoding", charset.name());
            HtmlMapper htmlMapper = (HtmlMapper) parseContext.get(HtmlMapper.class, new HtmlParserMapper());
            Parser parser = new Parser();
            parser.setProperty(Parser.schemaProperty, (Schema) parseContext.get(Schema.class, HTML_SCHEMA));
            parser.setFeature(Parser.ignoreBogonsFeature, true);
            parser.setContentHandler(new XHTMLDowngradeHandler(new HtmlHandler(htmlMapper, contentHandler, metadata, parseContext, this.extractScripts)));
            parser.parse(autoDetectReader.asInputSource());
            autoDetectReader.close();
        } catch (Throwable th) {
            try {
                autoDetectReader.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }

    protected String mapSafeElement(String str) {
        return DefaultHtmlMapper.INSTANCE.mapSafeElement(str);
    }

    protected boolean isDiscardElement(String str) {
        return DefaultHtmlMapper.INSTANCE.isDiscardElement(str);
    }

    public String mapSafeAttribute(String str, String str2) {
        return DefaultHtmlMapper.INSTANCE.mapSafeAttribute(str, str2);
    }

    public boolean isExtractScripts() {
        return this.extractScripts;
    }

    @Field
    public void setExtractScripts(boolean z) {
        this.extractScripts = z;
    }
}
