/*
 * Decompiled with CFR 0.152.
 */
package org.semanticdesktop.aperture.crawler.base;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.Set;
import org.ontoware.rdf2go.RDF2Go;
import org.ontoware.rdf2go.model.Model;
import org.ontoware.rdf2go.model.node.URI;
import org.semanticdesktop.aperture.accessor.DataObject;
import org.semanticdesktop.aperture.accessor.FileDataObject;
import org.semanticdesktop.aperture.accessor.RDFContainerFactory;
import org.semanticdesktop.aperture.crawler.Crawler;
import org.semanticdesktop.aperture.crawler.CrawlerHandler;
import org.semanticdesktop.aperture.crawler.ExitCode;
import org.semanticdesktop.aperture.extractor.Extractor;
import org.semanticdesktop.aperture.extractor.ExtractorException;
import org.semanticdesktop.aperture.extractor.ExtractorFactory;
import org.semanticdesktop.aperture.extractor.ExtractorRegistry;
import org.semanticdesktop.aperture.extractor.FileExtractor;
import org.semanticdesktop.aperture.extractor.FileExtractorFactory;
import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifier;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl;
import org.semanticdesktop.aperture.subcrawler.SubCrawler;
import org.semanticdesktop.aperture.subcrawler.SubCrawlerException;
import org.semanticdesktop.aperture.subcrawler.SubCrawlerFactory;
import org.semanticdesktop.aperture.subcrawler.SubCrawlerRegistry;
import org.semanticdesktop.aperture.util.IOUtil;
import org.semanticdesktop.aperture.vocabulary.NIE;

public class CrawlerHandlerBase
implements CrawlerHandler {
    protected boolean extractingContents = true;
    protected MimeTypeIdentifier mimeTypeIdentifier;
    protected ExtractorRegistry extractorRegistry;
    protected SubCrawlerRegistry subCrawlerRegistry;

    public CrawlerHandlerBase() {
    }

    public CrawlerHandlerBase(MimeTypeIdentifier mimeTypeIdentifier, ExtractorRegistry extractorRegistry, SubCrawlerRegistry subCrawlerRegistry) {
        this.mimeTypeIdentifier = mimeTypeIdentifier;
        this.extractorRegistry = extractorRegistry;
        this.subCrawlerRegistry = subCrawlerRegistry;
    }

    public RDFContainerFactory getRDFContainerFactory(Crawler crawler, String url) {
        return new RDFContainerFactory(){

            public RDFContainer getRDFContainer(URI uri) {
                Model model = RDF2Go.getModelFactory().createModel();
                model.open();
                return new RDFContainerImpl(model, uri);
            }
        };
    }

    public void accessingObject(Crawler crawler, String url) {
    }

    public void clearFinished(Crawler crawler, ExitCode exitCode) {
    }

    public void clearingObject(Crawler crawler, String url) {
    }

    public void clearStarted(Crawler crawler) {
    }

    public void crawlStarted(Crawler crawler) {
    }

    public void crawlStopped(Crawler crawler, ExitCode exitCode) {
    }

    public void objectChanged(Crawler crawler, DataObject object) {
        object.dispose();
    }

    public void objectNew(Crawler crawler, DataObject object) {
        object.dispose();
    }

    public void objectNotModified(Crawler crawler, String url) {
    }

    public void objectRemoved(Crawler crawler, String url) {
    }

    protected void processBinary(Crawler crawler, DataObject dataObject) throws IOException, ExtractorException, SubCrawlerException {
        if (!this.extractingContents) {
            return;
        }
        if (this.mimeTypeIdentifier == null) {
            throw new RuntimeException("MimeTypeIdentifier is not set. ");
        }
        if (dataObject == null) {
            throw new NullPointerException("dataObject is null. This parameter must be set.");
        }
        if (dataObject instanceof FileDataObject) {
            FileDataObject object = (FileDataObject)dataObject;
            URI id = object.getID();
            int minimumArrayLength = this.mimeTypeIdentifier.getMinArrayLength();
            int bufferSize = Math.max(minimumArrayLength, 8192);
            BufferedInputStream bufferedStream = new BufferedInputStream(object.getContent(), bufferSize);
            bufferedStream.mark(minimumArrayLength + 10);
            byte[] bytes = IOUtil.readBytes(bufferedStream, minimumArrayLength);
            String mimeType = this.mimeTypeIdentifier.identify(bytes, null, id);
            if (mimeType != null) {
                Set subcrawlerFactories;
                Iterator i$;
                RDFContainer metadata = object.getMetadata();
                metadata.add(NIE.mimeType, mimeType);
                bufferedStream.reset();
                if (this.extractorRegistry != null) {
                    Set extractors = this.extractorRegistry.getExtractorFactories(mimeType);
                    if (!extractors.isEmpty()) {
                        ExtractorFactory factory = (ExtractorFactory)extractors.iterator().next();
                        Extractor extractor = factory.get();
                        extractor.extract(id, bufferedStream, null, mimeType, metadata);
                        return;
                    }
                    Set fileextractors = this.extractorRegistry.getFileExtractorFactories(mimeType);
                    if (!fileextractors.isEmpty()) {
                        FileExtractorFactory factory = (FileExtractorFactory)fileextractors.iterator().next();
                        FileExtractor extractor = factory.get();
                        File originalFile = object.getFile();
                        if (originalFile != null) {
                            extractor.extract(id, originalFile, null, mimeType, metadata);
                        } else {
                            File tempFile = object.downloadContent();
                            extractor.extract(id, tempFile, null, mimeType, metadata);
                            tempFile.delete();
                        }
                        return;
                    }
                }
                if (this.subCrawlerRegistry != null && (i$ = (subcrawlerFactories = this.subCrawlerRegistry.get(mimeType)).iterator()).hasNext()) {
                    Object sub = i$.next();
                    SubCrawlerFactory subcrawlerfactory = (SubCrawlerFactory)sub;
                    SubCrawler subcrawler = subcrawlerfactory.get();
                    crawler.runSubCrawler(subcrawler, dataObject, bufferedStream, null, mimeType);
                    return;
                }
            }
        }
    }

    public boolean isExtractingContents() {
        return this.extractingContents;
    }

    public void setExtractingContents(boolean extractingContents) {
        this.extractingContents = extractingContents;
    }
}

