package org.metafacture.html;

import java.io.IOException;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.UUID;
import org.apache.commons.io.IOUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Element;
import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultObjectPipe;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@In(Reader.class)
@Out(StreamReceiver.class)
@FluxCommand("decode-html")
@Description("Decode HTML to metadata events. The attrValsAsSubfields option can be used to override the default attribute values to be used as subfields (e.g. by default `link rel=\"canonical\" href=\"http://example.org\"` becomes `link.canonical`). It expects an HTTP-style query string specifying as key the attributes whose value should be used as a subfield, and as value the attribute whose value should be the subfield value, e.g. the default contains `link.rel=href`. To use the HTML element text as the value (instead of another attribute), omit the value of the query-string key-value pair, e.g. `title.lang`. To add to the defaults, instead of replacing them, start with an `&`, e.g. `&h3.class`")
/* loaded from: input_file:org/metafacture/html/HtmlDecoder.class */
public class HtmlDecoder extends DefaultObjectPipe<Reader, StreamReceiver> {
    private static final Logger LOG = LoggerFactory.getLogger(HtmlDecoder.class);
    private static final String DEFAULT_ATTR_VALS_AS_SUBFIELDS = "meta.name=content&meta.property=content&link.rel=href&a.rel=href";
    private Map<String, String> attrValsAsSubfields;

    public HtmlDecoder() {
        setAttrValsAsSubfields(DEFAULT_ATTR_VALS_AS_SUBFIELDS);
    }

    public void process(Reader reader) {
        try {
            StreamReceiver streamReceiver = (StreamReceiver) getReceiver();
            streamReceiver.startRecord(UUID.randomUUID().toString());
            process(Jsoup.parse(IOUtils.toString(reader)), streamReceiver);
            streamReceiver.endRecord();
        } catch (IOException e) {
            LOG.error(e.getMessage(), e);
        }
    }

    private void process(Element element, StreamReceiver streamReceiver) {
        Iterator it = element.children().iterator();
        while (it.hasNext()) {
            Element element2 = (Element) it.next();
            streamReceiver.startEntity(element2.nodeName());
            Attributes attributes = element2.attributes();
            boolean z = false;
            Iterator it2 = attributes.iterator();
            while (it2.hasNext()) {
                Attribute attribute = (Attribute) it2.next();
                z = handleAttributeValuesAsSubfields(streamReceiver, element2, attributes, attribute);
                streamReceiver.literal(attribute.getKey(), attribute.getValue());
            }
            String trim = element2.text().trim();
            String data = trim.isEmpty() ? element2.data() : trim;
            if (!data.isEmpty() && !z) {
                streamReceiver.literal("value", data);
            }
            process(element2, streamReceiver);
            streamReceiver.endEntity();
        }
    }

    private boolean handleAttributeValuesAsSubfields(StreamReceiver streamReceiver, Element element, Attributes attributes, Attribute attribute) {
        String str = element.nodeName() + "." + attribute.getKey();
        if (!this.attrValsAsSubfields.containsKey(str)) {
            return false;
        }
        String str2 = this.attrValsAsSubfields.get(str);
        if (str2.trim().isEmpty()) {
            streamReceiver.literal(attribute.getValue(), element.text().trim());
            return true;
        }
        streamReceiver.literal(attribute.getValue(), attributes.get(str2));
        return false;
    }

    public void setAttrValsAsSubfields(String str) {
        this.attrValsAsSubfields = new HashMap();
        for (String str2 : (str.startsWith("&") ? "meta.name=content&meta.property=content&link.rel=href&a.rel=href" + str : str).split("&")) {
            String[] split = str2.split("=");
            try {
                String name = StandardCharsets.UTF_8.name();
                this.attrValsAsSubfields.put(URLDecoder.decode(split[0], name), split.length > 1 ? URLDecoder.decode(split[1], name) : "");
            } catch (UnsupportedEncodingException e) {
                LOG.error(e.getMessage(), e);
            }
        }
    }
}
