package net.sf.mmm.content.parser.impl.html;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.Writer;
import java.util.regex.Pattern;
import javax.inject.Named;
import javax.inject.Singleton;
import net.sf.mmm.content.parser.api.ContentParserOptions;
import net.sf.mmm.content.parser.impl.text.AbstractContentParserTextMarkupAware;
import net.sf.mmm.util.context.api.MutableGenericContext;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;

@Singleton
@Named
/* loaded from: input_file:net/sf/mmm/content/parser/impl/html/ContentParserHtml.class */
public class ContentParserHtml extends AbstractContentParserTextMarkupAware {
    public static final String KEY_MIMETYPE = "text/html";
    public static final String KEY_EXTENSION = "html";
    private static final String TAG_HEAD = "head";
    private static final String TAG_TITLE = "title";
    private static final String TAG_META = "meta";
    private static final String ATR_META_NAME = "name";
    private static final String ATR_META_CONTENT = "content";
    private static final String TAG_BODY = "body";
    private static final Pattern TITLE_PATTERN = Pattern.compile(".*<title>([^<]*)</title>");
    private static final Pattern AUTHOR_PATTERN = Pattern.compile(".*<meta name=[\"']author[\"'] content=[\"']([^\"']*)[\"'].*");
    private static final Pattern KEYWORDS_PATTERN = Pattern.compile(".*<meta name=[\"']keywords[\"'] content=[\"']([^\"']*)[\"'].*");

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:net/sf/mmm/content/parser/impl/html/ContentParserHtml$NullWriter.class */
    public static class NullWriter extends Writer {
        private NullWriter() {
        }

        @Override // java.io.Writer, java.io.Closeable, java.lang.AutoCloseable
        public void close() throws IOException {
        }

        @Override // java.io.Writer, java.io.Flushable
        public void flush() throws IOException {
        }

        @Override // java.io.Writer
        public void write(char[] cArr, int i, int i2) throws IOException {
        }
    }

    public String getExtension() {
        return KEY_EXTENSION;
    }

    public String getMimetype() {
        return KEY_MIMETYPE;
    }

    public String[] getAlternativeKeyArray() {
        return new String[]{"htm"};
    }

    public String[] getSecondaryKeyArray() {
        return new String[]{"php", "jsp", "hta"};
    }

    protected void parseJtidy(InputStream inputStream, long j, MutableGenericContext mutableGenericContext) throws Exception {
        Tidy tidy = new Tidy();
        tidy.setErrout(new PrintWriter(new NullWriter()));
        Element documentElement = tidy.parseDOM(inputStream, (OutputStream) null).getDocumentElement();
        Element firstChildElement = getFirstChildElement(documentElement, TAG_HEAD);
        String str = null;
        if (firstChildElement != null) {
            NodeList childNodes = firstChildElement.getChildNodes();
            for (int i = 0; i < childNodes.getLength(); i++) {
                Node item = childNodes.item(i);
                if (item.getNodeType() == 1) {
                    Element element = (Element) item;
                    String tagName = element.getTagName();
                    if (tagName.equals(TAG_TITLE)) {
                        str = getTextContent(element);
                    } else if (tagName.equals(TAG_META)) {
                        String attribute = element.getAttribute(ATR_META_NAME);
                        if (attribute.equalsIgnoreCase("keywords")) {
                            mutableGenericContext.setVariable("keywords", element.getAttribute(ATR_META_CONTENT));
                        } else if (attribute.equalsIgnoreCase("author")) {
                            mutableGenericContext.setVariable("creator", element.getAttribute(ATR_META_CONTENT));
                        }
                    }
                }
            }
        }
        if (str != null) {
            mutableGenericContext.setVariable(TAG_TITLE, str);
        }
        Element firstChildElement2 = getFirstChildElement(documentElement, TAG_BODY);
        if (firstChildElement2 != null) {
            mutableGenericContext.setVariable("text", getTextContent(firstChildElement2));
        }
    }

    public void parse(InputStream inputStream, long j, ContentParserOptions contentParserOptions, MutableGenericContext mutableGenericContext) throws Exception {
        if (j <= 0 || j >= contentParserOptions.getMaximumBufferSize()) {
            super.parse(inputStream, j, contentParserOptions, mutableGenericContext);
        } else {
            parseJtidy(inputStream, j, mutableGenericContext);
        }
    }

    protected void parseLine(MutableGenericContext mutableGenericContext, String str) {
        parseProperty(mutableGenericContext, str, TITLE_PATTERN, TAG_TITLE, 1);
        parseProperty(mutableGenericContext, str, AUTHOR_PATTERN, "creator", 1);
        parseProperty(mutableGenericContext, str, KEYWORDS_PATTERN, "keywords", 1);
    }

    private Element getFirstChildElement(Element element, String str) {
        NodeList childNodes = element.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            Node item = childNodes.item(i);
            if (item.getNodeType() == 1) {
                Element element2 = (Element) item;
                if (element2.getTagName().equals(str)) {
                    return element2;
                }
            }
        }
        return null;
    }

    private String getTextContent(Element element) {
        StringBuffer stringBuffer = new StringBuffer();
        collectTextContent(element, stringBuffer);
        return stringBuffer.toString();
    }

    private void collectTextContent(Element element, StringBuffer stringBuffer) {
        NodeList childNodes = element.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            Node item = childNodes.item(i);
            short nodeType = item.getNodeType();
            if (nodeType == 1) {
                collectTextContent((Element) item, stringBuffer);
            } else if (nodeType == 3 || nodeType == 4) {
                stringBuffer.append(item.getNodeValue());
            }
        }
    }
}
