/*
 * Decompiled with CFR 0.152.
 */
package chemaxon.naming.document;

import chemaxon.formats.MolFormatException;
import chemaxon.formats.MolImporter;
import chemaxon.marvin.io.formats.name.dictionaries.Blacklist;
import chemaxon.marvin.io.formats.name.nameexport.Util;
import chemaxon.marvin.io.formats.name.nameimport.UnknownTokenException;
import chemaxon.marvin.io.formats.name.util.TextUtils;
import chemaxon.naming.NameFormatException;
import chemaxon.naming.NamePrefixException;
import chemaxon.naming.document.D2S;
import chemaxon.naming.n2s.CASNumberConverter;
import chemaxon.naming.n2s.N2S;
import chemaxon.naming.n2s.OcrCorrector;
import chemaxon.naming.n2s.Options;
import chemaxon.naming.n2s.Standardize;
import chemaxon.struc.MolAtom;
import chemaxon.struc.Molecule;
import chemaxon.util.LoggingUtil;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.StringTokenizer;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;

public class Extractor {
    static int calls = 0;
    int timeout;
    public D2S.Options options;
    boolean wantNonHits;
    boolean nonSeparatedToken = false;
    boolean correctOCRerrors;
    int lineNumber = 0;
    public ArrayList<Token> hits = new ArrayList();
    private boolean optimizePrefixes = true;
    private LinkedList<String> words = new LinkedList();
    private LinkedList<Object> values = new LinkedList();
    private static Pattern wordSplitter = Pattern.compile("[;/]");
    private boolean ignoreNext = false;
    String bestPrefixParsed = "";
    String lastPrefixParsed = "";
    private static final Pattern separators = Pattern.compile("[-,]");
    private static final Molecule splitName;
    private boolean acceptGenericNames = false;
    private boolean acceptPlurals = true;
    static long totalTime;
    static boolean cold;
    public boolean dbg = false;
    private static final Logger logger;

    public Extractor(int timeout, boolean wantNonHits, boolean correctOCRerrors, D2S.Options options) {
        this.timeout = timeout;
        this.wantNonHits = wantNonHits;
        this.correctOCRerrors = correctOCRerrors;
        this.options = options;
    }

    private int windowLength() {
        return this.optimizePrefixes ? 15 : 3;
    }

    public void extract(Reader r) throws IOException {
        String line;
        BufferedReader in = new BufferedReader(r);
        while ((line = in.readLine()) != null) {
            ++this.lineNumber;
            if (this.lineNumber % 100 == 0) {
                this.dbg(this.lineNumber + " lines processed");
            }
            this.putChunk(line);
        }
        this.processAll();
    }

    public void putChunk(String chunk) {
        StringTokenizer st = new StringTokenizer(chunk);
        while (st.hasMoreElements()) {
            String word = st.nextToken();
            this.putWord(word, -1);
        }
    }

    void putChunk(String chunk, int position) {
        String delims = " \t\n\r\f";
        StringTokenizer st = new StringTokenizer(chunk, delims, true);
        while (st.hasMoreElements()) {
            String word = st.nextToken();
            if (word.length() == 1 && delims.indexOf(word.charAt(0)) != -1) {
                ++position;
                continue;
            }
            this.putWord(word, position);
            position += word.length();
        }
    }

    public void putWord(String word, Object value) {
        if (word.indexOf(38) != -1 || word.startsWith("InChI=")) {
            this.putWordSplit(word, value);
            return;
        }
        boolean endsWithSemicolon = false;
        if (word.endsWith(";")) {
            endsWithSemicolon = true;
            word = word + ".";
        }
        String[] words = wordSplitter.split(word);
        int posSeparator = -1;
        for (int i = 0; i < words.length; ++i) {
            if (i > 0) {
                this.processAll();
                this.nonSeparatedToken = true;
                if (value instanceof Integer) {
                    value = (Integer)value + words[i - 1].length();
                }
                this.putWordSplit(word.charAt(posSeparator) + "", value);
                if (value instanceof Integer) {
                    value = (Integer)value + 1;
                }
            }
            if (i == words.length - 1 && endsWithSemicolon) break;
            this.putWordSplit(words[i], value);
            posSeparator += words[i].length() + 1;
        }
        this.nonSeparatedToken = false;
    }

    void putWordSplit(String word, Object value) {
        if ((word = this.trimControlChars(word)).length() == 0) {
            return;
        }
        if (";".equals(word)) {
            this.processAll();
        }
        this.words.addLast(word);
        this.values.addLast(value);
        if (this.words.size() == this.windowLength()) {
            this.process();
        }
        if (";".equals(word)) {
            this.processAll();
        }
    }

    private String trimControlChars(String word) {
        int i = word.length();
        while (--i >= 0) {
            if (Character.getType(word.charAt(i)) == 15) continue;
            return word.substring(0, i + 1);
        }
        return "";
    }

    public void processAll() {
        this.process(0);
    }

    void process(int window) {
        while (this.words.size() > window) {
            this.process();
        }
    }

    void addNonHit(String word) {
        if (!this.wantNonHits) {
            return;
        }
        this.hits.add(new Token(word, this.nonSeparatedToken));
    }

    void addHit(String word, String name, Molecule m, Object value) {
        if (this.dbg) {
            this.dbg("Hit: " + word + " @@ " + name + " -> " + Util.getDictionarySmiles(m));
        }
        this.hits.add(new Hit(word, this.nonSeparatedToken, name, m, value));
    }

    void process() {
        int i;
        if (this.skipPathological()) {
            return;
        }
        String bestWord = null;
        int bestI = -1;
        Molecule bestM = null;
        String sourceText = null;
        String word = null;
        boolean noSpace = false;
        this.bestPrefixParsed = "";
        for (i = 0; i < this.words.size(); ++i) {
            if (this.ignoreNext) {
                this.ignoreNext = false;
                break;
            }
            String lastWord = this.words.get(i);
            if ("all".equals(lastWord)) break;
            if (lastWord.startsWith("reductase")) {
                bestWord = null;
                break;
            }
            String decodedLastWord = Standardize.decodeHTML(lastWord);
            if (word == null) {
                sourceText = lastWord;
                word = decodedLastWord;
            } else {
                int startPos = ((Number)this.values.get(0)).intValue();
                int curPos = ((Number)this.values.get(i)).intValue();
                if (startPos == -1) {
                    if (!noSpace) {
                        sourceText = sourceText + " ";
                    }
                } else {
                    int desiredLength = curPos - startPos;
                    while (sourceText.length() < desiredLength) {
                        sourceText = sourceText + " ";
                    }
                }
                sourceText = sourceText + lastWord;
                word = word + (noSpace ? "" : " ") + decodedLastWord;
            }
            noSpace = false;
            String trimmed = Extractor.trimWord(word);
            if (trimmed.startsWith("- ")) break;
            this.lastPrefixParsed = null;
            Molecule m = this.importName(trimmed);
            if (this.dbg) {
                this.debugResult(trimmed, m);
            }
            if (this.lastPrefixParsed != null && this.lastPrefixParsed.length() > this.bestPrefixParsed.length()) {
                this.bestPrefixParsed = this.lastPrefixParsed;
                if (this.failed(m)) continue;
            }
            if (m == null) {
                if (this.rewriteColon()) {
                    sourceText = null;
                    word = null;
                    i = -1;
                    continue;
                }
                if (!this.optimizePrefixes || lastWord.equals("l")) continue;
                break;
            }
            if (m == splitName) {
                word = word.substring(0, word.length() - 1);
                noSpace = true;
                continue;
            }
            bestWord = sourceText;
            bestI = i;
            bestM = m;
            if (!trimmed.endsWith(";")) continue;
            if (!bestWord.endsWith(";")) break;
            bestWord = bestWord.substring(0, bestWord.length() - 1);
            break;
        }
        if (bestM != null && this.reject(bestM)) {
            bestWord = null;
        }
        if (bestWord == null) {
            if (this.rewriteSeparators()) {
                return;
            }
            String previousWord = this.words.removeFirst();
            this.ignoreNext = previousWord.equals("cyclic");
            this.addNonHit(previousWord);
            this.values.removeFirst();
            return;
        }
        this.addHit(bestWord, Extractor.trimWord(bestWord), bestM, this.values.get(0));
        for (i = 0; i <= bestI; ++i) {
            this.words.removeFirst();
            this.values.removeFirst();
        }
    }

    private boolean skipPathological() {
        return this.skipListOfNumbers();
    }

    private boolean skipListOfNumbers() {
        int index;
        for (index = 0; index < this.words.size() && this.isNumber(this.words.get(index)); ++index) {
        }
        if (index <= 3) {
            return false;
        }
        for (int i = 0; i < index; ++i) {
            this.words.removeFirst();
            this.values.removeFirst();
        }
        return true;
    }

    private boolean isNumber(String string) {
        int i = string.length();
        while (--i >= 0) {
            char c = string.charAt(i);
            if (c >= '0' && c <= '9') continue;
            return false;
        }
        return true;
    }

    private boolean rewriteColon() {
        String word = this.words.get(0);
        int colon = word.indexOf(58);
        if (colon <= 0 || colon == word.length() - 1) {
            return false;
        }
        String first = word.substring(0, colon);
        String second = word.substring(colon + 1);
        this.words.set(0, first);
        this.words.add(1, ":");
        this.words.add(2, second);
        int pos = (Integer)this.values.get(0);
        this.values.add(1, pos + first.length());
        this.values.add(2, pos + first.length() + 1);
        return true;
    }

    private boolean rewriteSeparators() {
        return this.rewriteSeparator(',') || this.rewriteSeparator('-');
    }

    private boolean rewriteSeparator(char separator) {
        String part;
        int i;
        String word = this.words.get(0);
        if (word.indexOf(separator) == -1) {
            return false;
        }
        String[] parts = TextUtils.split(word, separator);
        boolean hasHit = false;
        int pos = (Integer)this.values.get(0);
        for (i = 0; i < parts.length; ++i) {
            part = parts[i];
            if (this.isNonSystematicName(part) || this.lowercaseWord(part)) continue;
            return false;
        }
        for (i = 0; i < parts.length; ++i) {
            Molecule m;
            part = parts[i];
            if (this.isCommonLanguageWord(part, false)) continue;
            if (i > 0) {
                pos += parts[i - 1].length() + 1;
            }
            if ((m = this.convertNonSystematicName(part)) == null) {
                if (!hasHit) continue;
                this.addNonHit(part);
                continue;
            }
            if (!hasHit) {
                for (int j = 0; j < i; ++j) {
                    this.addNonHit(parts[j]);
                }
                hasHit = true;
            }
            m.setName(part);
            this.addHit(part, part, m, pos);
        }
        if (hasHit) {
            this.words.remove(0);
            this.values.remove(0);
        }
        return hasHit;
    }

    private boolean isNonSystematicName(String name) {
        return this.convertNonSystematicName(name) != null;
    }

    private Molecule convertNonSystematicName(String name) {
        if (name.length() == 0) {
            return null;
        }
        Options options = this.options.n2sOptions.clone();
        options.systematic = Boolean.FALSE;
        try {
            return N2S.importName(name, options, "");
        }
        catch (MolFormatException e) {
            return null;
        }
    }

    private boolean lowercaseWord(String word) {
        int i = word.length();
        while (--i >= 0) {
            char c = word.charAt(i);
            if (c >= 'a' && c <= 'z') continue;
            return false;
        }
        return true;
    }

    void debugResult(String name, Molecule m) {
        String res = m == null ? "[NULL]" : (m == splitName ? "[SPLIT]" : Util.getDictionarySmiles(m));
        res = res + " Best prefix parsed: '" + this.bestPrefixParsed + "'\tLast Prefix parsed: '" + this.lastPrefixParsed + "'";
        this.dbg(name + " -> " + res);
    }

    boolean reject(Molecule m) {
        return m.getAtomCount() == 0;
    }

    public boolean wouldConvert(String name) {
        Molecule m = this.importName(name);
        return m != null && m != splitName && !this.reject(m);
    }

    Molecule importName(String name) {
        Molecule res = this.importRawName(name);
        if (this.acceptPlurals && res == null && name.length() > 0 && name.charAt(name.length() - 1) == 's' && !this.isCommonLanguageWord(name, false)) {
            res = this.importRawName(name.substring(0, name.length() - 1));
        }
        return res;
    }

    public void acceptElements(boolean on) {
        this.options.n2sOptions.convertElements = on;
    }

    public void acceptIons(boolean on) {
        this.options.n2sOptions.convertIons = on;
    }

    public void acceptGenericNames(boolean on) {
        this.acceptGenericNames = on;
    }

    public void acceptPlurals(boolean on) {
        this.acceptPlurals = on;
    }

    private Molecule importRawNameNoTimeout(String name) {
        String correctedName;
        Molecule res;
        String standardized = Standardize.get(name, false);
        if (this.isCommonLanguageWord(standardized, true)) {
            return null;
        }
        try {
            if (this.options.enableInChI && name.startsWith("InChI=")) {
                Molecule res2 = MolImporter.importMol(name, "inchi");
                if (res2 != null) {
                    res2.setName(name);
                    res2.properties().setString("type", "InChI");
                }
                return res2;
            }
            if (this.recognizeSmiles(name)) {
                Molecule res3 = MolImporter.importMol(name, "smiles");
                if (res3 != null) {
                    res3.setName(name);
                    res3.properties().setString("type", "smiles");
                }
                return res3;
            }
            if (this.options.isCASEnabled() && CASNumberConverter.isCASNumber(name)) {
                Molecule res4 = CASNumberConverter.convertCasNumber(name);
                if (res4 != null) {
                    res4.setName(name);
                    res4.properties().setString("type", "CAS#");
                }
                return res4;
            }
        }
        catch (MolFormatException e) {
            return null;
        }
        if (this.correctOCRerrors) {
            String correctedName2 = OcrCorrector.correctOCRerrors(name);
            if (!name.equals(correctedName2)) {
                this.dbg("\tCorrected name pass 1: " + correctedName2);
            }
            name = correctedName2;
        }
        if (this.failed(res = this.importRawNameNoTimeoutNoCorrect(name)) && this.correctOCRerrors && !name.equals(correctedName = OcrCorrector.correctOCRerrorsStage2(name))) {
            this.dbg("\tCorrected name pass 2: " + correctedName);
            name = correctedName;
            res = this.improve(res, this.importRawNameNoTimeoutNoCorrect(name));
        }
        if (this.failed(res) && this.correctOCRerrors && !name.equals(correctedName = OcrCorrector.correctOCRerrorsStage3(name))) {
            this.dbg("\tCorrected name pass 3: " + correctedName);
            name = correctedName;
            res = this.improve(res, this.importRawNameNoTimeoutNoCorrect(name));
        }
        return res;
    }

    private boolean recognizeSmiles(String name) {
        if (!this.options.enableSmiles) {
            return false;
        }
        int len = name.length();
        if (len < 5) {
            return false;
        }
        boolean aromatic = false;
        boolean ring = false;
        int usualSmilesChars = 0;
        block4: for (int i = 0; i < len; ++i) {
            char c = name.charAt(i);
            char upper = Character.toUpperCase(c);
            if (upper == 'C' || upper == 'N' || upper == 'O' || upper == 'H') {
                ++usualSmilesChars;
            }
            if (c >= 'a' && c <= 'z') {
                aromatic = true;
                continue;
            }
            if (c >= 'A' && c <= 'Z') {
                char next;
                if (i + 1 >= len || 'a' > (next = name.charAt(i + 1)) || next > 'z') continue;
                ++i;
                continue;
            }
            if (c >= '1' && c <= '9') {
                if (i == 0) {
                    return false;
                }
                ring = true;
                continue;
            }
            switch (c) {
                case '(': 
                case ')': 
                case '+': 
                case '-': 
                case '=': 
                case '@': 
                case '[': 
                case ']': {
                    ++usualSmilesChars;
                }
                case '*': 
                case '.': {
                    continue block4;
                }
                default: {
                    return false;
                }
            }
        }
        if (usualSmilesChars * 2 < len) {
            return false;
        }
        return ring || !aromatic;
    }

    private boolean failed(Molecule m) {
        return m == null;
    }

    private Molecule improve(Molecule previous, Molecule current) {
        if (!this.failed(current)) {
            return current;
        }
        if (current == null) {
            return previous;
        }
        return current;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private Molecule importRawNameNoTimeoutNoCorrect(String name) {
        ++calls;
        try {
            Molecule res = N2S.importName(name, this.options.n2sOptions, "");
            if (res != null && res.properties().get("Ambiguous") != null && (name.charAt(1) == ' ' || name.charAt(name.length() - 2) == ' ' || name.charAt(1) == '-' && name.charAt(2) == ' ')) {
                if (logger.isLoggable(Level.FINE)) {
                    logger.fine("Hiding bogus hit: " + name);
                }
                res = null;
                if (name.charAt(name.length() - 2) == ' ') {
                    this.prefixParsed(name);
                }
            }
            if (!this.failed(res)) {
                this.prefixParsed(name);
            }
            Molecule molecule = res;
            this.optimizePrefixes = N2S.getState().optimizingPrefixes;
            return molecule;
        }
        catch (Throwable throwable) {
            try {
                this.optimizePrefixes = N2S.getState().optimizingPrefixes;
                throw throwable;
            }
            catch (NamePrefixException e) {
                this.prefixParsed(e.getParsed());
                return null;
            }
            catch (NameFormatException.FilteredCase e) {
                this.prefixParsed(name);
                return null;
            }
            catch (UnknownTokenException e) {
                this.prefixParsed(e.getPrefix());
                if (name.endsWith(e.getUnknownToken() + "-")) {
                    return splitName;
                }
                return null;
            }
            catch (chemaxon.naming.n2s.UnknownTokenException e) {
                this.prefixParsed(e.getPrefix());
                if (name.endsWith(e.getUnknownToken() + "-")) {
                    return splitName;
                }
                return null;
            }
            catch (MolFormatException e) {
                return null;
            }
        }
    }

    private void prefixParsed(String prefix) {
        if (prefix == null) {
            return;
        }
        if (prefix.equals("a") || prefix.equals("an") || prefix.equals("in") || prefix.equals("o") || prefix.equals("one") || prefix.equals("the")) {
            return;
        }
        if (this.lastPrefixParsed == null || prefix.length() > this.lastPrefixParsed.length()) {
            this.lastPrefixParsed = prefix;
            this.dbg("\tPrefixParsed=" + prefix);
        }
    }

    Molecule importRawNameTimed(String name) {
        long start = System.currentTimeMillis();
        Molecule res = this.importRawName(name);
        long end = System.currentTimeMillis();
        long time = end - start;
        System.err.println(time + "ms\t importing: " + name + "\t total: " + (totalTime += time) + "ms");
        return res;
    }

    private Molecule importRawName(String name) {
        if (this.timeout != 0) {
            // empty if block
        }
        return this.importRawNameNoTimeout(name);
    }

    public static String trimWord(String word) {
        char c;
        while (word.length() > 0 && ((c = word.charAt(word.length() - 1)) == ',' || c == '.' || c == '?' || c == '!' || c == ':' || c == '\n')) {
            word = word.substring(0, word.length() - 1);
        }
        if (word.length() > 2) {
            char end;
            char start = word.charAt(0);
            if (Extractor.pair(start, end = word.charAt(word.length() - 1))) {
                return Extractor.trimWord(word.substring(1, word.length() - 1).trim());
            }
            if (end == ')' && word.indexOf(40) == -1) {
                word = word.substring(0, word.length() - 1).trim();
            } else if (end == ']' && word.indexOf(91) == -1) {
                word = word.substring(0, word.length() - 1).trim();
            } else if (start == '(' && word.indexOf(41) == -1) {
                word = word.substring(1).trim();
            } else if (start == '[' && word.indexOf(93) == -1) {
                word = word.substring(1).trim();
            }
        }
        if (word.endsWith(";") && word.indexOf(38) == -1) {
            word = word.substring(0, word.length() - 1);
        }
        return word.trim();
    }

    static boolean pair(char start, char end) {
        if (start == '(' && end == ')') {
            return true;
        }
        if (start == end && (start == '\'' || start == '\"')) {
            return true;
        }
        return start == '\u201c' && end == '\u201d';
    }

    void dbg(String message) {
        if (this.dbg) {
            System.out.println("DBG: " + message);
        }
    }

    public static void main(String[] args) throws IOException {
        String name = "C[C@H](N*)C(*)=O";
        Extractor x = new Extractor(0, false, true, new D2S.Options());
        x.dbg = true;
        x.extract(new StringReader(name));
    }

    public boolean isCommonLanguageWord(String name, boolean alreadyStandardized) {
        if (!alreadyStandardized) {
            name = Standardize.get(name);
        }
        return Blacklist.isCommonLanguageWord(name, this.acceptGenericNames);
    }

    static /* synthetic */ Molecule access$000(Extractor x0, String x1) {
        return x0.importRawNameNoTimeout(x1);
    }

    static {
        Molecule uniqueMolecule = new Molecule();
        uniqueMolecule.add(new MolAtom(0));
        uniqueMolecule.add(new MolAtom(0));
        uniqueMolecule.add(new MolAtom(0));
        splitName = uniqueMolecule.cloneMolecule();
        totalTime = 0L;
        cold = true;
        logger = LoggingUtil.getLogger(Extractor.class);
    }

    public static class Hit
    extends Token {
        public final String name;
        public final Molecule molecule;
        public final Object value;

        Hit(String text, boolean nonSeparatedToken, String name, Molecule m, Object value) {
            super(text, nonSeparatedToken);
            this.name = name;
            this.molecule = m;
            this.value = value;
        }

        public String toString() {
            return this.text + ": " + this.molecule.toFormat("cxsmiles:u");
        }
    }

    public static class Token {
        public final String text;
        public final boolean nonSeparatedToken;

        Token(String text, boolean nonSeparatedToken) {
            this.text = text;
            this.nonSeparatedToken = nonSeparatedToken;
        }
    }
}

