/*
 * Decompiled with CFR 0.152.
 */
package net.sf.regain.crawler.preparator;

import com.jacob.com.ComFailException;
import com.jacob.com.ComThread;
import com.jacob.com.Dispatch;
import com.jacob.com.Variant;
import de.filiadata.lucene.spider.generated.msoffice2000.word.Application;
import de.filiadata.lucene.spider.generated.msoffice2000.word.Document;
import de.filiadata.lucene.spider.generated.msoffice2000.word.Documents;
import de.filiadata.lucene.spider.generated.msoffice2000.word.GroupShapes;
import de.filiadata.lucene.spider.generated.msoffice2000.word.HeaderFooter;
import de.filiadata.lucene.spider.generated.msoffice2000.word.Paragraph;
import de.filiadata.lucene.spider.generated.msoffice2000.word.Paragraphs;
import de.filiadata.lucene.spider.generated.msoffice2000.word.Section;
import de.filiadata.lucene.spider.generated.msoffice2000.word.Sections;
import de.filiadata.lucene.spider.generated.msoffice2000.word.Selection;
import de.filiadata.lucene.spider.generated.msoffice2000.word.Shape;
import de.filiadata.lucene.spider.generated.msoffice2000.word.Shapes;
import java.util.HashSet;
import java.util.Map;
import net.sf.regain.RegainException;
import net.sf.regain.RegainToolkit;
import net.sf.regain.crawler.config.PreparatorConfig;
import net.sf.regain.crawler.document.RawDocument;
import net.sf.regain.crawler.preparator.AbstractJacobMsOfficePreparator;
import org.apache.log4j.Logger;

public class JacobMsWordPreparator
extends AbstractJacobMsOfficePreparator {
    private static Logger mLog = Logger.getLogger(JacobMsWordPreparator.class);
    private Application mWordApplication;
    private HashSet mHeadlineStyleNameSet;

    public JacobMsWordPreparator() throws RegainException {
        super(new String[]{"doc", "dot"});
    }

    @Override
    public void init(PreparatorConfig config) throws RegainException {
        String headlineStyles;
        super.init(config);
        Map main = config.getSectionWithName("main");
        if (main != null && (headlineStyles = (String)main.get("headlineStyles")) != null) {
            String[] styleArr = RegainToolkit.splitString((String)headlineStyles, (String)";", (boolean)true);
            this.mHeadlineStyleNameSet = new HashSet();
            for (int i = 0; i < styleArr.length; ++i) {
                this.mHeadlineStyleNameSet.add(styleArr[i]);
            }
        }
    }

    public void prepare(RawDocument rawDocument) throws RegainException {
        if (this.mWordApplication == null) {
            ComThread.InitSTA();
            mLog.info((Object)"Starting MS Word");
            this.mWordApplication = new Application();
            Dispatch.put((Object)this.mWordApplication, "Visible", (Object)new Variant(false));
        }
        try {
            String fileName = rawDocument.getContentAsFile(true).getAbsolutePath();
            Documents docs = this.mWordApplication.getDocuments();
            Document doc = docs.open(new Variant(fileName), new Variant(false), new Variant(true));
            StringBuffer content = new StringBuffer(16384);
            Sections sections = doc.getSections();
            for (int i = 1; i <= sections.getCount(); ++i) {
                Section sec = sections.item(i);
                if (i == 1) {
                    int headerFirstPage = 2;
                    HeaderFooter firstHeader = sec.getHeaders().item(headerFirstPage);
                    String title = firstHeader.getRange().getText();
                    this.setTitle(title);
                }
                sec.getRange().select();
                content.append(this.getSelection(this.mWordApplication) + "\n");
            }
            Shapes shapes = doc.getShapes();
            for (int i = 1; i <= shapes.getCount(); ++i) {
                Shape shape = shapes.item(new Variant(i));
                this.appendShape(shape, content);
            }
            StringBuffer headlines = null;
            if (this.mHeadlineStyleNameSet != null && !this.mHeadlineStyleNameSet.isEmpty()) {
                Paragraphs paragraphs = doc.getParagraphs();
                for (int i = 1; i <= paragraphs.getCount(); ++i) {
                    Paragraph paragraph = paragraphs.item(i);
                    Object styleDispatch = paragraph.getFormat().getStyle().getDispatch();
                    String formatName = Dispatch.get(styleDispatch, "NameLocal").toString();
                    if (!this.mHeadlineStyleNameSet.contains(formatName)) continue;
                    paragraph.getRange().select();
                    String text = this.getSelection(this.mWordApplication);
                    text = this.removeBinaryStuff(text);
                    if (headlines == null) {
                        headlines = new StringBuffer();
                    }
                    headlines.append(text + "\n");
                    if (!mLog.isDebugEnabled()) continue;
                    mLog.debug((Object)("Extracted headline: '" + text + "'"));
                }
            }
            this.readProperties(doc);
            this.setCleanedContent(content.toString());
            if (headlines != null) {
                this.setHeadlines(headlines.toString());
            }
            doc.close(new Variant(false));
        }
        catch (ComFailException exc) {
            throw new RegainException("Using COM failed.", (Throwable)exc);
        }
    }

    private String getSelection(Application wordAppl) {
        Selection sel = wordAppl.getSelection();
        sel.moveEnd();
        sel.copy();
        return sel.getText();
    }

    private void appendShape(Shape shape, StringBuffer buffer) {
        String shapeName = shape.getName();
        if (shapeName.startsWith("Text Box ")) {
            shape.getTextFrame().getTextRange().select();
            buffer.append(this.getSelection(this.mWordApplication) + "\n");
        } else if (shapeName.startsWith("Group ")) {
            GroupShapes group = shape.getGroupItems();
            for (int i = 1; i <= group.getCount(); ++i) {
                Shape child = group.item(new Variant(i));
                this.appendShape(child, buffer);
            }
        }
    }

    private String removeBinaryStuff(String text) {
        StringBuffer newText = new StringBuffer(text.length());
        for (int j = 0; j < text.length(); ++j) {
            char c = text.charAt(j);
            if (c < ' ') continue;
            newText.append(c);
        }
        return newText.toString();
    }

    public void close() throws RegainException {
        if (this.mWordApplication != null) {
            try {
                this.mWordApplication.quit();
                mLog.info((Object)"Closed MS Word");
            }
            catch (Throwable thr) {
                throw new RegainException("Using COM failed.", thr);
            }
            finally {
                ComThread.Release();
            }
        }
    }
}

