package de.l3s.icrawl.snapshots;

import com.google.common.io.ByteStreams;
import java.io.IOException;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.TemporalAccessor;
import java.util.Optional;
import org.apache.commons.httpclient.util.DateParseException;
import org.apache.commons.httpclient.util.DateUtil;
import org.archive.format.http.HttpHeaders;
import org.archive.format.http.HttpResponse;
import org.archive.format.http.HttpResponseParser;
import org.archive.format.text.charset.CharsetDetector;
import org.archive.io.ArchiveRecord;
import org.archive.io.ArchiveRecordHeader;
import org.archive.io.arc.ARCRecordMetaData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/l3s/icrawl/snapshots/ArchiveRecordParser.class */
public class ArchiveRecordParser {
    private static final Logger logger = LoggerFactory.getLogger(ArchiveRecordParser.class);
    private final HttpResponseParser responseParser = new HttpResponseParser();
    private final CharsetDetector charsetDetector = new OnlyHtmlCharsetDetector();

    public Snapshot readSnapshot(ArchiveRecord archiveRecord) throws IOException {
        ArchiveRecordHeader header = archiveRecord.getHeader();
        String url = header.getUrl();
        HttpResponse parse = this.responseParser.parse(archiveRecord);
        HttpHeaders headers = parse.getHeaders();
        ZonedDateTime orElseGet = getCrawlTime(header).orElseGet(() -> {
            return serverDate(headers);
        });
        byte[] byteArray = ByteStreams.toByteArray(parse.getInner());
        String value = headers.getValue("Content-Type");
        if (value == null) {
            value = header.getMimetype();
        }
        return new Snapshot(url, orElseGet, parse.getMessage().getStatus(), value, Utils.asMap(headers), (value == null || !value.startsWith("text")) ? byteArray : new String(byteArray, this.charsetDetector.getCharset(byteArray, byteArray.length, headers)));
    }

    private ZonedDateTime serverDate(HttpHeaders httpHeaders) {
        String str = null;
        try {
            str = httpHeaders.getValueCaseInsensitive("Date");
            return ZonedDateTime.from((TemporalAccessor) DateUtil.parseDate(str).toInstant());
        } catch (DateParseException e) {
            logger.debug("Could not parse HTTP Date header '{}':", str, e);
            return null;
        }
    }

    Optional<ZonedDateTime> getCrawlTime(ArchiveRecordHeader archiveRecordHeader) {
        return archiveRecordHeader instanceof ARCRecordMetaData ? Utils.parseTimestamp(((ARCRecordMetaData) archiveRecordHeader).getDate()) : Optional.of(ZonedDateTime.parse((String) archiveRecordHeader.getHeaderValue("WARC-Date"), DateTimeFormatter.ISO_DATE_TIME));
    }
}
