/*
 * Decompiled with CFR 0.152.
 */
package chemaxon.naming.document;

import chemaxon.naming.DocumentExtractor;
import chemaxon.naming.document.OCR;
import chemaxon.naming.document.TesseractProcessOCR;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.pdfbox.Version;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;
import org.apache.pdfbox.util.Matrix;
import org.apache.pdfbox.util.PDFOperator;
import org.apache.pdfbox.util.PDFTextStripper;

public class PDFBridge {
    private static final OCR ocr = new TesseractProcessOCR();
    private static final Logger logger = Logger.getLogger(PDFBridge.class.getName());

    public static DocumentExtractor readPDF(File pdf, InputStream pdfStream) throws IOException {
        if (logger.isLoggable(Level.FINE)) {
            logger.fine("Calling PDFBox " + Version.getVersion());
        }
        long startTime = System.currentTimeMillis();
        PDFParser pdfParser = new PDFParser(pdfStream);
        pdfParser.parse();
        PDDocument pdfDoc = pdfParser.getPDDocument();
        final boolean ocrAvailable = ocr.isAvailable();
        PDFTextStripper pdTextStripper = new PDFTextStripper(){

            protected void startPage(PDPage page) {
                if (logger.isLoggable(Level.FINE)) {
                    logger.log(Level.FINE, "Processing page " + this.getCurrentPageNo());
                }
                try {
                    this.writeString(PDFBridge.pageMarker(this.getCurrentPageNo()));
                }
                catch (IOException e) {
                    throw new RuntimeException("Should not happen, the output is a StringWriter", e);
                }
            }

            protected void processOperator(PDFOperator operator, List arguments) throws IOException {
                super.processOperator(operator, arguments);
                if (!ocrAvailable) {
                    return;
                }
                String operation = operator.getOperation();
                if (operation.equals("Do")) {
                    COSName objectName = (COSName)arguments.get(0);
                    Map xobjects = this.getResources().getXObjects();
                    PDXObject xobject = (PDXObject)xobjects.get(objectName.getName());
                    if (xobject instanceof PDXObjectImage) {
                        this.processImage((PDXObjectImage)xobject);
                    }
                }
            }

            private void processImage(PDXObjectImage image) throws IOException {
                block4: {
                    PDPage page = this.getCurrentPage();
                    Matrix ctm = this.getGraphicsState().getCurrentTransformationMatrix();
                    float width = ctm.getXScale();
                    float height = ctm.getYScale();
                    float imageSurface = width * height;
                    PDRectangle pageSize = page.findMediaBox();
                    float totalSurface = pageSize.getHeight() * pageSize.getWidth();
                    float ratio = imageSurface / totalSurface;
                    if (logger.isLoggable(Level.FINE)) {
                        logger.fine("Image takes " + ratio * 100.0f + "% of the page");
                    }
                    if (ratio > 0.8f) {
                        try {
                            String ocredText = ocr.process(image.getRGBImage());
                            this.writeString(ocredText);
                        }
                        catch (RuntimeException e) {
                            if (!logger.isLoggable(Level.WARNING)) break block4;
                            logger.log(Level.WARNING, "Failure during processing of page " + this.getCurrentPageNo(), e);
                        }
                    }
                }
            }
        };
        String text = pdTextStripper.getText(pdfDoc);
        pdfDoc.close();
        if (logger.isLoggable(Level.FINE)) {
            long time = System.currentTimeMillis() - startTime;
            logger.fine("PDFBox ran for " + time + " ms");
        }
        if (logger.isLoggable(Level.FINER)) {
            logger.finer("PDFBox extracted " + text.length() + " characters");
        }
        if (logger.isLoggable(Level.FINEST)) {
            logger.finest("PDFBox extracted text: >>" + text + "<<");
        }
        DocumentExtractor x = new DocumentExtractor(text);
        return x;
    }

    static String pageMarker(int page) {
        return " <<PAGE" + page + ">> ";
    }
}

