/*
 * Decompiled with CFR 0.152.
 */
package chemaxon.naming.document;

import chemaxon.marvin.util.Environment;
import chemaxon.marvin.util.InstalledComponent;
import chemaxon.naming.document.OCR;
import chemaxon.naming.document.PDF2XHTML;
import chemaxon.util.LoggingUtil;
import java.awt.image.RenderedImage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;

public class TesseractProcessOCR
implements OCR {
    private static final String version = "3.01";
    private static final InstalledComponent installedExec = InstalledComponent.get("tesseract-" + TesseractProcessOCR.getArchitectureID(true) + "-" + "3.01");
    private static final InstalledComponent installedData = InstalledComponent.get("tesseract-data-3.01");
    private final String LANG_OPTION = "-l";
    private final String PSM_OPTION = "-psm";
    private final String EOL = "\n";
    private File tessExecutable;
    static final String OUTPUT_FILE_NAME = "TessOutput";
    static final String FILE_EXTENSION = ".txt";
    private String psm = "3";
    static final Logger logger = LoggingUtil.getLogger(TesseractProcessOCR.class);

    @Override
    public boolean isAvailable() {
        try {
            return installedExec.getInstalledDir() != null && installedData.getInstalledDir() != null;
        }
        catch (IOException e) {
            logger.log(Level.WARNING, "Tesseract could not be installed, OCR is disabled", e);
            return false;
        }
    }

    @Override
    public String process(File document) throws IOException {
        String name = document.getName().toLowerCase();
        if (name.endsWith(".png") || name.endsWith(".tiff")) {
            return this.processImage(document, "eng");
        }
        if (name.endsWith(".pdf")) {
            return this.processPDF(document, "eng");
        }
        return null;
    }

    private String processPDF(File documentFile, String string) throws IOException {
        StringBuilder completeText = new StringBuilder();
        PDDocument doc = PDDocument.load((File)documentFile);
        List pages = doc.getDocumentCatalog().getAllPages();
        for (PDPage page : pages) {
            Map images = page.findResources().getImages();
            for (PDXObjectImage image : images.values()) {
                String ocredText = this.process(image.getRGBImage());
                completeText.append(ocredText);
            }
        }
        return completeText.toString();
    }

    private File getExecutable() throws IOException {
        if (this.tessExecutable == null) {
            File lib = installedExec.getInstalledDir();
            this.tessExecutable = new File(lib, "tesseract." + TesseractProcessOCR.getArchitectureID(false));
            logger.log(Level.FINE, "Tesseract executable: " + this.tessExecutable);
            if (!this.tessExecutable.canExecute()) {
                this.tessExecutable.setExecutable(true);
            }
        }
        return this.tessExecutable;
    }

    private static String getArchitectureID(boolean jar) {
        String bits = System.getProperty("sun.arch.data.model");
        if (Environment.LINUX) {
            return "linux-x" + bits;
        }
        if (Environment.MSWINDOWS) {
            if (jar) {
                return "windows";
            }
            return "win.exe";
        }
        return "unknown";
    }

    private File getTessDir() {
        File res;
        try {
            res = installedData.getInstalledDir();
        }
        catch (IOException e) {
            e.printStackTrace();
            logger.log(Level.WARNING, "OCR engine data not found", e);
            return null;
        }
        if (res == null) {
            logger.log(Level.WARNING, "OCR engine data not found");
            return null;
        }
        return res;
    }

    private String processImage(File imageFile, String lang) throws IOException {
        BufferedReader in;
        int w;
        File tempTessOutputFile = File.createTempFile(OUTPUT_FILE_NAME, FILE_EXTENSION);
        String outputFileName = tempTessOutputFile.getPath().substring(0, tempTessOutputFile.getPath().length() - FILE_EXTENSION.length());
        ArrayList<String> cmd = new ArrayList<String>();
        cmd.add(this.getExecutable().getPath());
        cmd.add("");
        cmd.add(outputFileName);
        cmd.add("-l");
        cmd.add(lang);
        cmd.add("-psm");
        cmd.add(this.psm);
        ProcessBuilder pb = new ProcessBuilder(new String[0]);
        pb.redirectErrorStream(true);
        pb.environment().put("TESSDATA_PREFIX", this.getTessDir().getPath() + File.separatorChar);
        StringBuilder result = new StringBuilder();
        cmd.set(1, imageFile.getPath());
        if (logger.isLoggable(Level.FINER)) {
            logger.log(Level.FINER, ((Object)cmd).toString());
        }
        pb.command(cmd);
        Process process = pb.start();
        StreamGobbler outputGobbler = new StreamGobbler(process.getInputStream());
        outputGobbler.start();
        try {
            w = process.waitFor();
        }
        catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        if (logger.isLoggable(Level.FINER)) {
            logger.log(Level.FINER, "Exit value = " + w);
        }
        if (w == 0) {
            String str;
            in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(tempTessOutputFile), "UTF-8"));
            while ((str = in.readLine()) != null) {
                result.append(str).append("\n");
            }
        } else {
            tempTessOutputFile.delete();
            String msg = outputGobbler.getMessage();
            if (msg.trim().length() == 0) {
                msg = "Errors occurred.";
            }
            throw new RuntimeException(msg);
        }
        in.close();
        tempTessOutputFile.delete();
        return result.toString();
    }

    @Override
    public String process(RenderedImage image) throws IOException {
        File imageFile = PDF2XHTML.writeTemp(image);
        String res = this.processImage(imageFile, "eng");
        imageFile.delete();
        return res;
    }

    class StreamGobbler
    extends Thread {
        InputStream is;
        StringBuilder outputMessage = new StringBuilder();

        StreamGobbler(InputStream is) {
            this.is = is;
        }

        String getMessage() {
            return this.outputMessage.toString();
        }

        @Override
        public void run() {
            try {
                InputStreamReader isr = new InputStreamReader(this.is);
                BufferedReader br = new BufferedReader(isr);
                String line = null;
                while ((line = br.readLine()) != null) {
                    if (logger.isLoggable(Level.FINEST)) {
                        logger.log(Level.FINEST, line);
                    }
                    this.outputMessage.append(line).append("\n");
                }
            }
            catch (IOException ioe) {
                logger.log(Level.WARNING, "Exception while reading tesseract output", ioe);
            }
        }
    }
}

