/*
 * Decompiled with CFR 0.152.
 */
package org.semanticdesktop.aperture.extractor.xml;

import java.io.BufferedInputStream;
import java.io.FileNotFoundException;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.UnknownHostException;
import java.nio.charset.Charset;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.vocabulary.RDF;
import org.semanticdesktop.aperture.extractor.Extractor;
import org.semanticdesktop.aperture.extractor.ExtractorException;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.semanticdesktop.aperture.vocabulary.NFO;
import org.semanticdesktop.aperture.vocabulary.NIE;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

public class XmlExtractor
implements Extractor {
    private static final int BUFFER_SIZE = 8192;
    private Logger logger = LoggerFactory.getLogger(XmlExtractor.class);

    public void extract(URI id, InputStream stream, Charset charset, String mimeType, RDFContainer result) throws ExtractorException {
        try {
            if (!stream.markSupported()) {
                stream = new BufferedInputStream(stream, 8192);
            }
            stream.mark(8192);
            FilterInputStream filterStream = new FilterInputStream(stream){

                public void close() {
                }
            };
            SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
            XMLReader r = parser.getXMLReader();
            r.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
            XmlTextExtractor listener = new XmlTextExtractor();
            InputSource source = new InputSource();
            source.setSystemId(id.toString());
            source.setByteStream(filterStream);
            try {
                parser.parse(source, (DefaultHandler)listener);
            }
            catch (Exception e) {
                if (!this.isFailingInclusionException(e)) {
                    throw new ExtractorException(e);
                }
                try {
                    r = parser.getXMLReader();
                    r.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
                    r.setFeature("http://xml.org/sax/features/external-general-entities", false);
                    r.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
                    listener.clear();
                    filterStream.reset();
                    parser.parse(source, (DefaultHandler)listener);
                }
                catch (IOException ioe) {
                    this.logger.error("Exception while parsing document " + id.toString(), e);
                    this.logger.error("While trying to reparse the file without loading external DTDs following error occured", ioe);
                    throw ioe;
                }
                catch (SAXException se) {
                    this.logger.error("Exception while parsing document " + id.toString(), e);
                    this.logger.error("While trying to reparse the file without loading external DTDs following error occured", se);
                }
            }
            String text = listener.getText();
            if (!text.equals("")) {
                result.add(NIE.plainTextContent, text);
                result.add(RDF.type, NFO.PlainTextDocument);
            }
        }
        catch (ParserConfigurationException e) {
            throw new ExtractorException(e);
        }
        catch (SAXException e) {
            throw new ExtractorException(e);
        }
        catch (IOException e) {
            throw new ExtractorException(e);
        }
    }

    private boolean isFailingInclusionException(Exception e) {
        return e instanceof FileNotFoundException || e instanceof UnknownHostException;
    }

    private static class XmlTextExtractor
    extends DefaultHandler {
        private StringBuilder buffer = new StringBuilder(65536);

        private XmlTextExtractor() {
        }

        public String getText() {
            return this.buffer.toString().trim();
        }

        public void clear() {
            this.buffer.setLength(0);
        }

        public void startElement(String namespaceURI, String localName, String qName, Attributes attributes) throws SAXException {
            int nrAtts = attributes.getLength();
            for (int i = 0; i < nrAtts; ++i) {
                String value = attributes.getValue(i);
                if (value == null || value.length() <= 0 || this.isGarbage(value)) continue;
                this.buffer.append(value);
                this.buffer.append(' ');
            }
        }

        public void characters(char[] ch, int start, int length) throws SAXException {
            this.buffer.append(ch, start, length);
            this.buffer.append(' ');
        }

        private boolean isGarbage(String attsValue) {
            return "true".equalsIgnoreCase(attsValue) || "false".equalsIgnoreCase(attsValue) || "yes".equalsIgnoreCase(attsValue) || "no".equalsIgnoreCase(attsValue);
        }
    }
}

