/*
 * Decompiled with CFR 0.152.
 */
package org.semanticdesktop.aperture.extractor.works;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.vocabulary.RDF;
import org.semanticdesktop.aperture.extractor.Extractor;
import org.semanticdesktop.aperture.extractor.ExtractorException;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.semanticdesktop.aperture.util.StringExtractor;
import org.semanticdesktop.aperture.vocabulary.NFO;
import org.semanticdesktop.aperture.vocabulary.NIE;

public class WorksExtractor
implements Extractor {
    public void extract(URI id, InputStream stream, Charset charset, String mimeType, RDFContainer result) throws ExtractorException {
        try {
            WorksStringExtractor extractor = new WorksStringExtractor();
            String text = extractor.extract(stream).trim();
            if (text.length() > 0) {
                result.add(NIE.plainTextContent, text);
                result.add(RDF.type, NFO.PaginatedTextDocument);
            }
        }
        catch (IOException e) {
            throw new ExtractorException(e);
        }
    }

    private static class WorksStringExtractor
    extends StringExtractor {
        private static final int MSWORKS_DOCUMENT = 0;
        private static final int MSWORKS_SPREADSHEET = 1;
        private boolean endOfDocumentReached = false;
        private int okayTrimmedLineCount = 0;
        private int worksType = 0;

        private WorksStringExtractor() {
        }

        protected boolean isTextCharacter(int charNumber) {
            return super.isTextCharacter(charNumber) || charNumber == 0;
        }

        protected boolean isStartLine(String lineLowerCase) {
            if ("gtt".equals(lineLowerCase)) {
                return true;
            }
            return super.isStartLine(lineLowerCase);
        }

        protected boolean isValidLine(String lineLowerCase) {
            if ("microsoft works".equals(lineLowerCase) || "msworkswpdoc".equals(lineLowerCase)) {
                return false;
            }
            return super.isValidLine(lineLowerCase);
        }

        protected String postProcessLine(String line) {
            boolean okay;
            if (this.endOfDocumentReached) {
                return null;
            }
            if (this.okayTrimmedLineCount < 4 && (line.length() == 3 && line.endsWith("&T") || line.startsWith("VT&"))) {
                this.worksType = 1;
                return null;
            }
            if ((line = super.postProcessLine(line)) == null) {
                return null;
            }
            if (this.worksType == 1) {
                if (line.startsWith("@")) {
                    return null;
                }
                if (line.length() < 6 && (line.indexOf(64) >= 0 || this.isAllUppercase(line))) {
                    return null;
                }
            } else if (line.indexOf(0) >= 0) {
                if (this.okayTrimmedLineCount > 5) {
                    this.endOfDocumentReached = true;
                }
                return null;
            }
            boolean bl = okay = line.length() > 2 && !Character.isWhitespace(line.charAt(1)) && line.charAt(1) != '\u0000';
            if (okay) {
                ++this.okayTrimmedLineCount;
                if (this.worksType == 1) {
                    line = line.replace('\u0000', ' ');
                }
                return line;
            }
            return null;
        }

        private boolean isAllUppercase(String string) {
            int i = string.length();
            while (i-- > 0) {
                char c = string.charAt(i);
                if (!Character.isLetter(c) || Character.isUpperCase(c)) continue;
                return false;
            }
            return true;
        }
    }
}

