package org.apache.tika.parser.code;

import com.uwyn.jhighlight.renderer.Renderer;
import com.uwyn.jhighlight.renderer.XhtmlRendererFactory;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.detect.AutoDetectReader;
import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractEncodingDetectorParser;
import org.apache.tika.parser.ParseContext;
import org.ccil.cowan.tagsoup.HTMLSchema;
import org.ccil.cowan.tagsoup.Parser;
import org.ccil.cowan.tagsoup.Schema;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/* loaded from: input_file:WEB-INF/lib/tika-parsers-1.19.1.jar:org/apache/tika/parser/code/SourceCodeParser.class */
public class SourceCodeParser extends AbstractEncodingDetectorParser {
    private static final long serialVersionUID = -4543476498190054160L;
    private static final Pattern authorPattern = Pattern.compile("(?im)@author (.*) *$");
    private static final Map<MediaType, String> TYPES_TO_RENDERER = new HashMap<MediaType, String>() { // from class: org.apache.tika.parser.code.SourceCodeParser.1
        private static final long serialVersionUID = -741976157563751152L;

        {
            put(MediaType.text("x-c++src"), XhtmlRendererFactory.CPP);
            put(MediaType.text("x-java-source"), "java");
            put(MediaType.text("x-groovy"), "groovy");
        }
    };
    private static final Schema HTML_SCHEMA = new HTMLSchema();

    public SourceCodeParser() {
    }

    public SourceCodeParser(EncodingDetector encodingDetector) {
        super(encodingDetector);
    }

    @Override // org.apache.tika.parser.Parser
    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return TYPES_TO_RENDERER.keySet();
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        AutoDetectReader autoDetectReader = new AutoDetectReader(new CloseShieldInputStream(inputStream), metadata, getEncodingDetector(parseContext));
        Throwable th = null;
        try {
            Charset charset = autoDetectReader.getCharset();
            String str = metadata.get("Content-Type");
            String str2 = metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY);
            if (str != null && str2 != null) {
                MediaType parse = MediaType.parse(str);
                metadata.set("Content-Type", parse.toString());
                metadata.set("Content-Encoding", charset.name());
                StringBuilder sb = new StringBuilder();
                int i = 0;
                while (true) {
                    String readLine = autoDetectReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    sb.append(readLine + System.getProperty("line.separator"));
                    String parserAuthor = parserAuthor(readLine);
                    if (parserAuthor != null) {
                        metadata.add(TikaCoreProperties.CREATOR, parserAuthor);
                    }
                    i++;
                }
                metadata.set("LoC", String.valueOf(i));
                String highlight = getRenderer(parse.toString()).highlight(str2, sb.toString(), charset.name(), false);
                Schema schema = (Schema) parseContext.get(Schema.class, HTML_SCHEMA);
                Parser parser = new Parser();
                parser.setProperty(Parser.schemaProperty, schema);
                parser.setContentHandler(contentHandler);
                parser.parse(new InputSource(new StringReader(highlight)));
            }
            if (autoDetectReader != null) {
                if (0 == 0) {
                    autoDetectReader.close();
                    return;
                }
                try {
                    autoDetectReader.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
        } catch (Throwable th3) {
            if (autoDetectReader != null) {
                if (0 != 0) {
                    try {
                        autoDetectReader.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    autoDetectReader.close();
                }
            }
            throw th3;
        }
    }

    private Renderer getRenderer(String str) {
        String str2 = TYPES_TO_RENDERER.get(MediaType.parse(str));
        if (str2 == null) {
            throw new RuntimeException("unparseable content type " + str);
        }
        return XhtmlRendererFactory.getRenderer(str2);
    }

    private String parserAuthor(String str) {
        Matcher matcher = authorPattern.matcher(str);
        if (matcher.find()) {
            return matcher.group(1).trim();
        }
        return null;
    }
}
