/*
 * Decompiled with CFR 0.152.
 */
package net.sf.regain.crawler.preparator;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import net.sf.regain.RegainException;
import net.sf.regain.crawler.document.AbstractPreparator;
import net.sf.regain.crawler.document.RawDocument;
import org.apache.log4j.Logger;
import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;

public class PoiMsOfficePreparator
extends AbstractPreparator {
    private static Logger mLog = Logger.getLogger(PoiMsOfficePreparator.class);

    public PoiMsOfficePreparator() throws RegainException {
        super(new String[]{"application/msexcel", "application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml", "application/msword", "application/vnd.ms-word", "application/vnd.openxmlformats-officedocument.wordprocessingml", "application/msvisio", "application/vnd.visio", "application/mspowerpoint", "application/vnd.ms-powerpoint", "application/vnd.openxmlformats-officedocument.presentationml", "application/vnd.ms-office"});
    }

    public void prepare(RawDocument rawDocument) throws RegainException {
        InputStream stream = null;
        try {
            stream = rawDocument.getContentAsStream();
            POITextExtractor contentExtractor = ExtractorFactory.createExtractor(stream);
            this.setCleanedContent(contentExtractor.getText());
            POITextExtractor metadataExtractor = contentExtractor.getMetadataTextExtractor();
            Map<String, String> metaDataMap = this.createMetaDataMap(metadataExtractor.getText());
            StringBuilder metaData = new StringBuilder();
            metaData.append(" ");
            ArrayList<String> fields = new ArrayList<String>(Arrays.asList("Title", "Creator", "Company", "Keywords", "LastModifiedBy", "Description", "Subject", "PID_TITLE", "PID_AUTHOR", "PID_COMMENTS", "PID_KEYWORDS", "PID_SUBJECT", "PID_COMPANY"));
            for (String field : fields) {
                if (!metaDataMap.containsKey(field)) continue;
                metaData.append(metaDataMap.get(field));
                metaData.append(" ");
            }
            this.setCleanedMetaData(metaData.toString());
            if (mLog.isDebugEnabled()) {
                mLog.debug((Object)("Extracted meta data ::" + this.getCleanedMetaData() + ":: from " + rawDocument.getUrl()));
            }
            if (metaDataMap.containsKey("Title")) {
                this.setTitle(metaDataMap.get("Title"));
            } else if (metaDataMap.containsKey("PID_TITLE")) {
                this.setTitle(metaDataMap.get("PID_TITLE"));
            }
        }
        catch (InvalidFormatException invalidFormatEx) {
            throw new RegainException("Invalid format while reading MS* (OpenXML) document. URL: " + rawDocument.getUrl(), (Throwable)invalidFormatEx);
        }
        catch (Exception e) {
            throw new RegainException("Reading MS* (OpenXML) document failed : " + rawDocument.getUrl(), (Throwable)e);
        }
        finally {
            if (stream != null) {
                try {
                    stream.close();
                }
                catch (Exception exc) {}
            }
        }
    }

    private Map<String, String> createMetaDataMap(String rawLine) {
        String[] singleLines;
        HashMap<String, String> metaDataMap = new HashMap<String, String>();
        if (rawLine != null && !rawLine.isEmpty() && (singleLines = rawLine.split("\n")) != null) {
            for (int i = 0; i < singleLines.length; ++i) {
                String[] key_valuePair = singleLines[i].split("=");
                if (key_valuePair == null || key_valuePair.length != 2 || key_valuePair[0] == null || key_valuePair[0].trim().isEmpty() || key_valuePair[1] == null || key_valuePair[1].trim().isEmpty()) continue;
                metaDataMap.put(key_valuePair[0].trim(), key_valuePair[1].trim());
            }
        }
        return metaDataMap;
    }
}

