/*
 * Decompiled with CFR 0.152.
 */
package org.semanticdesktop.aperture.extractor.openxml;

import java.io.BufferedInputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.text.ParseException;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.ontoware.rdf2go.exception.ModelException;
import org.ontoware.rdf2go.model.Model;
import org.ontoware.rdf2go.model.node.Resource;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.vocabulary.RDF;
import org.semanticdesktop.aperture.extractor.Extractor;
import org.semanticdesktop.aperture.extractor.ExtractorException;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.semanticdesktop.aperture.rdf.util.ModelUtil;
import org.semanticdesktop.aperture.util.DateUtil;
import org.semanticdesktop.aperture.util.IOUtil;
import org.semanticdesktop.aperture.util.SimpleSAXAdapter;
import org.semanticdesktop.aperture.util.SimpleSAXListener;
import org.semanticdesktop.aperture.util.SimpleSAXParser;
import org.semanticdesktop.aperture.vocabulary.NCO;
import org.semanticdesktop.aperture.vocabulary.NFO;
import org.semanticdesktop.aperture.vocabulary.NIE;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;

public class OpenXmlExtractor
implements Extractor {
    private static final String CONTENT_TYPES_FILE = "[Content_Types].xml";
    private static final int BUFFER_SIZE = 0x400000;
    private static final String END_OF_LINE = System.getProperty("line.separator", "\n");
    private static final byte[] ENCRYPTED_MARKER = new byte[]{-48, -49, 17, -32, -95, -79, 26, -31, 0, 0, 0, 0, 0, 0, 0, 0};
    private static final HashSet<String> TEXT_ELEMENT_TYPES = new HashSet();
    private static final HashMap<String, String> TEXT_ATTRIBUTE_TYPES = new HashMap();
    private Logger logger = LoggerFactory.getLogger(this.getClass());
    private ContentTypes contentTypes;
    private StringBuilder fullText;

    public void extract(URI id, InputStream stream, Charset charset, String mimeType, RDFContainer result) throws ExtractorException {
        String name;
        ZipEntry entry;
        if (!stream.markSupported()) {
            stream = new BufferedInputStream(stream, 0x400000);
        }
        stream.mark(0x400000);
        this.fullText = new StringBuilder(262144);
        try {
            byte[] firstBytes = IOUtil.readBytes(stream, ENCRYPTED_MARKER.length);
            stream.reset();
            if (Arrays.equals(firstBytes, ENCRYPTED_MARKER)) {
                result.add(NFO.encryptionStatus, NFO.encryptedStatus);
                return;
            }
        }
        catch (IOException e) {
            throw new ExtractorException(e);
        }
        ZipInputStream zipStream = null;
        try {
            zipStream = new ZipInputStream(stream);
            entry = null;
            while ((entry = zipStream.getNextEntry()) != null) {
                name = entry.getName();
                if (CONTENT_TYPES_FILE.equals(name)) {
                    this.parseContentTypes(zipStream);
                    break;
                }
                zipStream.closeEntry();
            }
        }
        catch (IOException e) {
            throw new ExtractorException(e);
        }
        if (this.contentTypes == null) {
            throw new ExtractorException("missing [Content_Types].xml file");
        }
        try {
            stream.reset();
        }
        catch (IOException e) {
            throw new ExtractorException("Unable to reset stream", e);
        }
        try {
            zipStream = new ZipInputStream(stream);
            entry = null;
            while ((entry = zipStream.getNextEntry()) != null) {
                name = entry.getName();
                String type = this.contentTypes.getType(name = this.toAbsoluteName(name));
                if (type != null) {
                    if (TEXT_ELEMENT_TYPES.contains(type)) {
                        this.process(zipStream, new TextCollector());
                    } else if (TEXT_ATTRIBUTE_TYPES.containsKey(type)) {
                        this.process(zipStream, new AttributeTextCollector(TEXT_ATTRIBUTE_TYPES.get(type)));
                    } else if ("application/vnd.openxmlformats-package.core-properties+xml".equals(type) || "application/vnd.openxmlformats-officedocument.extended-properties+xml".equals(type)) {
                        this.extractMetadata(zipStream, result);
                    }
                }
                zipStream.closeEntry();
            }
        }
        catch (IOException e) {
            throw new ExtractorException(e);
        }
        String text = this.fullText.toString();
        if (text.length() > 0) {
            result.add(NIE.plainTextContent, text);
        }
    }

    private void parseContentTypes(InputStream stream) throws ExtractorException {
        Document doc = this.getDocument(stream, false);
        this.contentTypes = new ContentTypes();
        Element rootElement = doc.getDocumentElement();
        NodeList children = rootElement.getChildNodes();
        int nrChildren = children.getLength();
        for (int i = 0; i < nrChildren; ++i) {
            String partName;
            Node child = children.item(i);
            if (child.getNodeType() != 1) continue;
            Element element = (Element)child;
            String name = element.getTagName();
            String contentType = element.getAttribute("ContentType");
            if ("Default".equals(name)) {
                String extension = element.getAttribute("Extension");
                if (extension == null || contentType == null) continue;
                this.contentTypes.addDefault(extension, contentType);
                continue;
            }
            if (!"Override".equals(name) || (partName = element.getAttribute("PartName")) == null || contentType == null) continue;
            this.contentTypes.addOverride(partName, contentType);
        }
    }

    private Document getDocument(InputStream stream, boolean namespaceAware) throws ExtractorException {
        DocumentBuilder docBuilder;
        DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
        docBuilderFactory.setNamespaceAware(namespaceAware);
        docBuilderFactory.setValidating(false);
        docBuilderFactory.setExpandEntityReferences(false);
        try {
            docBuilder = docBuilderFactory.newDocumentBuilder();
        }
        catch (ParserConfigurationException e) {
            throw new RuntimeException("unable to instantiate DocumentBuilder", e);
        }
        try {
            return docBuilder.parse(new NonCloseableStream(stream));
        }
        catch (SAXException e) {
            throw new ExtractorException(e);
        }
        catch (IOException e) {
            throw new ExtractorException(e);
        }
    }

    private String toAbsoluteName(String name) {
        if (name.startsWith("/")) {
            return name;
        }
        return "/" + name;
    }

    private void process(InputStream stream, SimpleSAXListener listener) {
        SimpleSAXParser parser = null;
        try {
            parser = new SimpleSAXParser();
        }
        catch (Exception e) {
            throw new RuntimeException("unable to instantiate SAXParser", e);
        }
        parser.setListener(listener);
        parser.setTrimWhiteSpace(false);
        try {
            parser.parse(new NonCloseableStream(stream));
        }
        catch (Exception e) {
            this.logger.warn("Exception while parsing XML", e);
        }
    }

    private void extractMetadata(InputStream stream, RDFContainer metadata) throws ExtractorException {
        Document document = this.getDocument(stream, true);
        Element root = document.getDocumentElement();
        metadata.add(RDF.type, NFO.Document);
        NodeList children = root.getChildNodes();
        int nrChildren = children.getLength();
        for (int i = 0; i < nrChildren; ++i) {
            Element childElement;
            String text;
            Node childNode = children.item(i);
            if (childNode.getNodeType() != 1 || (text = this.getText(childElement = (Element)childNode)) == null) continue;
            String nameSpace = childElement.getNamespaceURI();
            if (nameSpace != null) {
                if (!nameSpace.endsWith("/")) {
                    nameSpace = nameSpace + "/";
                }
                try {
                    String uriString = nameSpace + childElement.getLocalName();
                    URI predicate = metadata.getValueFactory().createURI(uriString);
                    metadata.add(predicate, text);
                }
                catch (ModelException e) {
                    this.logger.error("ModelException while adding statement, ignoring", e);
                }
            }
            this.mapToApertureProperty(childElement, text, metadata);
        }
    }

    private void mapToApertureProperty(Element element, String value, RDFContainer metadata) {
        String localName = element.getLocalName();
        if ("title".equals(localName)) {
            metadata.add(NIE.title, value);
        } else if ("subject".equals(localName)) {
            metadata.add(NIE.subject, value);
        } else if ("created".equals(localName)) {
            metadata.add(NIE.contentCreated, this.convertStringToDate(value));
        } else if ("creator".equals(localName)) {
            this.addContactStatement(NCO.creator, value, metadata);
        } else if ("description".equals(localName)) {
            metadata.add(NIE.description, value);
        } else if ("lastModifiedBy".equals(localName)) {
            this.addContactStatement(NCO.contributor, value, metadata);
        } else if ("modified".equals(localName)) {
            metadata.add(NIE.contentLastModified, this.convertStringToDate(value));
        } else if ("Application".equals(localName)) {
            metadata.add(NIE.generator, value);
        } else if ("Pages".equals(localName)) {
            try {
                metadata.add(RDF.type, NFO.PaginatedTextDocument);
                metadata.add(NFO.pageCount, Integer.parseInt(value));
            }
            catch (NumberFormatException e) {}
        } else if ("Manager".equals(localName)) {
            this.addContactStatement(NCO.contributor, value, metadata);
        } else if ("Company".equals(localName)) {
            metadata.add(NCO.org, value);
        } else if ("keywords".equals(localName)) {
            StringTokenizer tokenizer = new StringTokenizer(value, " \t.,;|/\\", false);
            while (tokenizer.hasMoreTokens()) {
                String keyword = tokenizer.nextToken();
                metadata.add(NIE.keyword, keyword);
            }
        }
    }

    private Date convertStringToDate(String value) {
        Date date = null;
        try {
            date = DateUtil.string2DateTime(value);
            return date;
        }
        catch (ParseException parseException) {
            return null;
        }
    }

    private void addContactStatement(URI uri, String fullname, RDFContainer container) {
        Model model = container.getModel();
        Resource contactResource = ModelUtil.generateRandomResource(model);
        model.addStatement(contactResource, RDF.type, NCO.Contact);
        model.addStatement(contactResource, NCO.fullname, fullname);
        container.add(uri, contactResource);
    }

    private String getText(Element element) {
        Node child = element.getFirstChild();
        if (child instanceof Text) {
            return ((Text)child).getWholeText();
        }
        return null;
    }

    static {
        TEXT_ELEMENT_TYPES.add("application/vnd.ms-word.document.macroEnabled.main+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.ms-word.template.macroEnabledTemplate.main+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.comments+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.slide+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.slideMaster+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.notesMaster+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.handoutMaster+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.main+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml");
        TEXT_ATTRIBUTE_TYPES.put("application/vnd.openxmlformats-officedocument.presentationml.commentAuthors+xml", "p:cmAuthor");
        TEXT_ATTRIBUTE_TYPES.put("application/vnd.openxmlformats-officedocument.spreadsheetml.main+xml", "sheet");
        TEXT_ATTRIBUTE_TYPES.put("application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml", "sheet");
        TEXT_ATTRIBUTE_TYPES.put("application/vnd.openxmlformats-officedocument.spreadsheetml.table+xml", "tableColumn");
        TEXT_ATTRIBUTE_TYPES.put("application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml", "p:cSld");
    }

    private class AttributeTextCollector
    extends SimpleSAXAdapter {
        private String fullTextTag;

        public AttributeTextCollector(String fullTextTag) {
            this.fullTextTag = fullTextTag;
        }

        public void startTag(String tagName, Map atts, String text) throws SAXException {
            Object value;
            if (this.fullTextTag.equals(tagName) && (value = atts.get("name")) instanceof String) {
                OpenXmlExtractor.this.fullText.append(value);
                OpenXmlExtractor.this.fullText.append(END_OF_LINE);
            }
        }
    }

    private class TextCollector
    extends SimpleSAXAdapter {
        private int initialLength;
        private boolean insideTabs = false;

        private TextCollector() {
        }

        public void startDocument() throws SAXException {
            this.initialLength = OpenXmlExtractor.this.fullText.length();
        }

        public void startTag(String tagName, Map atts, String text) throws SAXException {
            if ("w:t".equals(tagName)) {
                OpenXmlExtractor.this.fullText.append(text);
            } else if ("t".equals(tagName) || "p:text".equals(tagName) || "a:t".equals(tagName) || "st:t".equals(tagName) || "v".equals(tagName)) {
                OpenXmlExtractor.this.fullText.append(text);
                OpenXmlExtractor.this.fullText.append(' ');
            } else if ("w:tab".equals(tagName) && !this.insideTabs) {
                OpenXmlExtractor.this.fullText.append('\t');
            } else if ("w:tabs".equals(tagName)) {
                this.insideTabs = true;
            }
        }

        public void endTag(String tagName) throws SAXException {
            if ("w:p".equals(tagName)) {
                OpenXmlExtractor.this.fullText.append(END_OF_LINE);
            } else if ("w:tabs".equals(tagName)) {
                this.insideTabs = false;
            }
        }

        public void endDocument() throws SAXException {
            if (OpenXmlExtractor.this.fullText.length() > this.initialLength) {
                OpenXmlExtractor.this.fullText.append(END_OF_LINE);
            }
        }
    }

    private static class NonCloseableStream
    extends FilterInputStream {
        public NonCloseableStream(InputStream in) {
            super(in);
        }

        public void close() throws IOException {
        }
    }

    private static class ContentTypes {
        private HashMap<String, String> defaults = new HashMap();
        private HashMap<String, String> overrides = new HashMap();

        public void addDefault(String extension, String contentType) {
            this.defaults.put(extension, contentType);
        }

        public void addOverride(String partName, String contentType) {
            this.overrides.put(partName, contentType);
        }

        public String getDefault(String extension) {
            return this.defaults.get(extension);
        }

        public String getOverride(String partName) {
            return this.overrides.get(partName);
        }

        public String getType(String partName) {
            String override = this.getOverride(partName);
            if (override == null) {
                int index = partName.lastIndexOf(46);
                if (index >= 0 && index < partName.length() - 1) {
                    String extension = partName.substring(index + 1);
                    return this.getDefault(extension);
                }
                return null;
            }
            return override;
        }

        public String toString() {
            return "ContentTypes[default=" + this.defaults + ",overrides=" + this.overrides + "]";
        }
    }
}

