package org.apache.tika.eval.app.io;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.compressors.z.ZCompressorInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.eval.app.io.ExtractReaderException;
import org.apache.tika.metadata.HttpHeaders;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.sax.ToTextContentHandler;
import org.apache.tika.sax.ToXMLContentHandler;
import org.apache.tika.serialization.JsonMetadataList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/tika/eval/app/io/ExtractReader.class */
public class ExtractReader {
    public static final long IGNORE_LENGTH = -1;
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) ExtractReader.class);
    private final ALTER_METADATA_LIST alterMetadataList;
    private final long minExtractLength;
    private final long maxExtractLength;
    private TikaConfig tikaConfig;

    /* loaded from: input_file:org/apache/tika/eval/app/io/ExtractReader$ALTER_METADATA_LIST.class */
    public enum ALTER_METADATA_LIST {
        AS_IS,
        FIRST_ONLY,
        CONCATENATE_CONTENT_INTO_FIRST
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/tika/eval/app/io/ExtractReader$FileSuffixes.class */
    public static class FileSuffixes {
        String compression;
        FORMAT format;
        String originalFileName;

        /* JADX INFO: Access modifiers changed from: package-private */
        /* loaded from: input_file:org/apache/tika/eval/app/io/ExtractReader$FileSuffixes$FORMAT.class */
        public enum FORMAT {
            TXT,
            HTML,
            JSON
        }

        private FileSuffixes() {
        }

        public void setFormat(String str) {
            String lowerCase = str.toLowerCase(Locale.ENGLISH);
            if (lowerCase.equals("json")) {
                this.format = FORMAT.JSON;
            } else if (lowerCase.equals("txt")) {
                this.format = FORMAT.TXT;
            } else {
                if (!lowerCase.contains("html")) {
                    throw new IllegalArgumentException("extract must end in .json, .txt or .xhtml");
                }
                this.format = FORMAT.HTML;
            }
        }
    }

    public ExtractReader() {
        this(ALTER_METADATA_LIST.AS_IS, -1L, -1L);
    }

    public ExtractReader(ALTER_METADATA_LIST alter_metadata_list) {
        this(alter_metadata_list, -1L, -1L);
    }

    public ExtractReader(ALTER_METADATA_LIST alter_metadata_list, long j, long j2) {
        this.tikaConfig = TikaConfig.getDefaultConfig();
        this.alterMetadataList = alter_metadata_list;
        this.minExtractLength = j;
        this.maxExtractLength = j2;
        if (j2 <= -1 || j < j2) {
            return;
        }
        IllegalArgumentException illegalArgumentException = new IllegalArgumentException("minExtractLength(" + j + ") must be < maxExtractLength(" + illegalArgumentException + ")");
        throw illegalArgumentException;
    }

    protected static FileSuffixes parseSuffixes(String str) {
        FileSuffixes fileSuffixes = new FileSuffixes();
        if (str == null) {
            return fileSuffixes;
        }
        Matcher matcher = Pattern.compile("(?i)^(.*?)\\.(json|txt|x?html)(?:\\.(bz2|gz(?:ip)?|zip))?$").matcher(str);
        if (matcher.find()) {
            fileSuffixes.originalFileName = matcher.group(1);
            fileSuffixes.setFormat(matcher.group(2));
            fileSuffixes.compression = matcher.group(3);
        }
        return fileSuffixes;
    }

    public List<Metadata> loadExtract(Path path) throws ExtractReaderException {
        List<Metadata> generateListFromTextFile;
        if (path == null || !Files.isRegularFile(path, new LinkOption[0])) {
            throw new ExtractReaderException(ExtractReaderException.TYPE.NO_EXTRACT_FILE);
        }
        FileSuffixes parseSuffixes = parseSuffixes(path.getFileName().toString());
        if (parseSuffixes.format == null) {
            throw new ExtractReaderException(ExtractReaderException.TYPE.INCORRECT_EXTRACT_FILE_SUFFIX);
        }
        if (!Files.isRegularFile(path, new LinkOption[0])) {
            throw new ExtractReaderException(ExtractReaderException.TYPE.NO_EXTRACT_FILE);
        }
        try {
            long size = Files.size(path);
            if (size == 0) {
                throw new ExtractReaderException(ExtractReaderException.TYPE.ZERO_BYTE_EXTRACT_FILE);
            }
            if (this.minExtractLength > -1 && size < this.minExtractLength) {
                LOG.info("minExtractLength {} > IGNORE_LENGTH {} and length {} < minExtractLength {} for file '{}'", Long.valueOf(this.minExtractLength), -1L, Long.valueOf(size), Long.valueOf(this.minExtractLength), path);
                throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_FILE_TOO_SHORT);
            }
            if (this.maxExtractLength > -1 && size > this.maxExtractLength) {
                LOG.info("maxExtractLength {} > IGNORE_LENGTH {} and length {} > maxExtractLength {} for file '{}'", Long.valueOf(this.maxExtractLength), -1L, Long.valueOf(size), Long.valueOf(this.maxExtractLength), path);
                throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_FILE_TOO_LONG);
            }
            try {
                InputStream newInputStream = Files.newInputStream(path, new OpenOption[0]);
                if (parseSuffixes.compression != null) {
                    String str = parseSuffixes.compression;
                    boolean z = -1;
                    switch (str.hashCode()) {
                        case 3315:
                            if (str.equals(CompressorStreamFactory.GZIP)) {
                                z = true;
                                break;
                            }
                            break;
                        case 98010:
                            if (str.equals("bz2")) {
                                z = false;
                                break;
                            }
                            break;
                        case 120609:
                            if (str.equals(ArchiveStreamFactory.ZIP)) {
                                z = 3;
                                break;
                            }
                            break;
                        case 3189082:
                            if (str.equals("gzip")) {
                                z = 2;
                                break;
                            }
                            break;
                    }
                    switch (z) {
                        case false:
                            newInputStream = new BZip2CompressorInputStream(newInputStream);
                            break;
                        case true:
                        case true:
                            newInputStream = new GzipCompressorInputStream(newInputStream);
                            break;
                        case true:
                            newInputStream = new ZCompressorInputStream(newInputStream);
                            break;
                        default:
                            LOG.warn("Can't yet process compression of type: {}", parseSuffixes.compression);
                            return null;
                    }
                }
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(newInputStream, StandardCharsets.UTF_8));
                try {
                    try {
                        if (parseSuffixes.format == FileSuffixes.FORMAT.JSON) {
                            generateListFromTextFile = JsonMetadataList.fromJson(bufferedReader);
                            if (!this.alterMetadataList.equals(ALTER_METADATA_LIST.FIRST_ONLY) || generateListFromTextFile.size() <= 1) {
                                ALTER_METADATA_LIST alter_metadata_list = this.alterMetadataList;
                                ALTER_METADATA_LIST alter_metadata_list2 = ALTER_METADATA_LIST.AS_IS;
                                if (alter_metadata_list.equals(ALTER_METADATA_LIST.CONCATENATE_CONTENT_INTO_FIRST) && generateListFromTextFile.size() > 1) {
                                    StringBuilder sb = new StringBuilder();
                                    Metadata metadata = generateListFromTextFile.get(0);
                                    Iterator<Metadata> it = generateListFromTextFile.iterator();
                                    while (it.hasNext()) {
                                        String str2 = it.next().get(TikaCoreProperties.TIKA_CONTENT);
                                        if (str2 != null) {
                                            sb.append(str2);
                                            sb.append(" ");
                                        }
                                    }
                                    metadata.set(TikaCoreProperties.TIKA_CONTENT, sb.toString());
                                    while (generateListFromTextFile.size() > 1) {
                                        generateListFromTextFile.remove(generateListFromTextFile.size() - 1);
                                    }
                                }
                            } else {
                                while (generateListFromTextFile.size() > 1) {
                                    generateListFromTextFile.remove(generateListFromTextFile.size() - 1);
                                }
                            }
                        } else {
                            generateListFromTextFile = generateListFromTextFile(bufferedReader, parseSuffixes);
                        }
                        return generateListFromTextFile;
                    } catch (IOException e) {
                        throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION, e);
                    }
                } finally {
                    IOUtils.closeQuietly((Reader) bufferedReader);
                    IOUtils.closeQuietly(newInputStream);
                }
            } catch (IOException e2) {
                throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION, e2);
            }
        } catch (IOException e3) {
            throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION, e3);
        }
    }

    private List<Metadata> generateListFromTextFile(Reader reader, FileSuffixes fileSuffixes) throws IOException {
        ArrayList arrayList = new ArrayList();
        String iOUtils = IOUtils.toString(reader);
        Metadata metadata = new Metadata();
        metadata.set(TikaCoreProperties.TIKA_CONTENT, iOUtils);
        if (fileSuffixes.format == FileSuffixes.FORMAT.HTML) {
            metadata.set(TikaCoreProperties.TIKA_CONTENT_HANDLER, ToXMLContentHandler.class.getSimpleName());
        } else if (fileSuffixes.format == FileSuffixes.FORMAT.TXT) {
            metadata.set(TikaCoreProperties.TIKA_CONTENT_HANDLER, ToTextContentHandler.class.getSimpleName());
        }
        metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, fileSuffixes.originalFileName);
        MediaType detect = this.tikaConfig.getMimeRepository().detect(null, metadata);
        if (detect != null) {
            metadata.set(HttpHeaders.CONTENT_TYPE, detect.toString());
        }
        arrayList.add(metadata);
        return arrayList;
    }
}
