package io.annot8.components.tika.processors;

import io.annot8.api.capabilities.Capabilities;
import io.annot8.api.components.annotations.ComponentDescription;
import io.annot8.api.components.annotations.ComponentName;
import io.annot8.api.components.annotations.SettingsClass;
import io.annot8.api.components.responses.ProcessorResponse;
import io.annot8.api.context.Context;
import io.annot8.api.data.Content;
import io.annot8.api.data.Item;
import io.annot8.api.settings.Description;
import io.annot8.common.components.AbstractProcessor;
import io.annot8.common.components.AbstractProcessorDescriptor;
import io.annot8.common.components.capabilities.SimpleCapabilities;
import io.annot8.common.data.content.FileContent;
import io.annot8.common.data.content.InputStreamContent;
import io.annot8.common.data.content.Text;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.SAXException;

@ComponentName("Tika Extractor")
@ComponentDescription("Extract text from files and streams using Apache Tika")
@SettingsClass(Settings.class)
/* loaded from: input_file:io/annot8/components/tika/processors/TikaExtractor.class */
public class TikaExtractor extends AbstractProcessorDescriptor<Processor, Settings> {

    /* loaded from: input_file:io/annot8/components/tika/processors/TikaExtractor$Processor.class */
    public static class Processor extends AbstractProcessor {
        private final boolean removeSourceContent;

        public Processor(boolean z) {
            this.removeSourceContent = z;
        }

        public ProcessorResponse process(Item item) {
            item.getContents(InputStreamContent.class).forEach(inputStreamContent -> {
                createText(item, inputStreamContent.getId(), (InputStream) inputStreamContent.getData());
                if (this.removeSourceContent) {
                    item.removeContent(inputStreamContent);
                }
            });
            item.getContents(FileContent.class).forEach(fileContent -> {
                try {
                    createText(item, fileContent.getId(), new FileInputStream((File) fileContent.getData()));
                    if (this.removeSourceContent) {
                        item.removeContent(fileContent);
                    }
                } catch (IOException e) {
                    log().error("Unable to read File Content {}", fileContent.getId(), e);
                }
            });
            return ProcessorResponse.ok();
        }

        private void createText(Item item, String str, InputStream inputStream) {
            try {
                BodyContentHandler bodyContentHandler = new BodyContentHandler(Integer.MAX_VALUE);
                Metadata metadata = new Metadata();
                new AutoDetectParser().parse(inputStream, bodyContentHandler, metadata, new ParseContext());
                Content.Builder withDescription = item.createContent(Text.class).withData(bodyContentHandler.toString()).withDescription("Tika'd output of " + str);
                for (String str2 : metadata.names()) {
                    withDescription = metadata.isMultiValued(str2) ? (Content.Builder) withDescription.withProperty(str2, metadata.getValues(str2)) : (Content.Builder) withDescription.withProperty(str2, metadata.get(str2));
                }
                withDescription.save();
            } catch (IOException | SAXException | TikaException e) {
                log().error("Unable to extract text from Content {}", str, e);
            }
        }
    }

    /* loaded from: input_file:io/annot8/components/tika/processors/TikaExtractor$Settings.class */
    public static class Settings implements io.annot8.api.settings.Settings {
        private boolean removeSourceContent = true;

        public boolean validate() {
            return true;
        }

        @Description(value = "Should the source Content be removed after successful processing?", defaultValue = "true")
        public boolean isRemoveSourceContent() {
            return this.removeSourceContent;
        }

        public void setRemoveSourceContent(boolean z) {
            this.removeSourceContent = z;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Processor createComponent(Context context, Settings settings) {
        return new Processor(settings.isRemoveSourceContent());
    }

    public Capabilities capabilities() {
        SimpleCapabilities.Builder withCreatesContent = new SimpleCapabilities.Builder().withProcessesContent(InputStreamContent.class).withProcessesContent(FileContent.class).withCreatesContent(Text.class);
        if (((Settings) getSettings()).isRemoveSourceContent()) {
            withCreatesContent = withCreatesContent.withDeletesContent(InputStreamContent.class).withDeletesContent(FileContent.class);
        }
        return withCreatesContent.build();
    }
}
