/*
 * Decompiled with CFR 0.152.
 */
package chemaxon.naming.document;

import chemaxon.naming.document.HTMLScanner;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.html.DefaultHtmlMapper;
import org.apache.tika.parser.html.HtmlMapper;
import org.apache.tika.parser.txt.CharsetDetector;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.apache.tika.sax.TextContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.utils.CharsetUtils;
import org.ccil.cowan.tagsoup.HTMLSchema;
import org.ccil.cowan.tagsoup.Parser;
import org.ccil.cowan.tagsoup.Scanner;
import org.ccil.cowan.tagsoup.Schema;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

public class HtmlParser
extends AbstractParser {
    private static final long serialVersionUID = 7895315240498733128L;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(MediaType.text((String)"html"), MediaType.application((String)"vnd.wap.xhtml+xml"), MediaType.application((String)"x-asp"))));
    private static final String DEFAULT_CHARSET = "windows-1252";
    private static final int META_TAG_BUFFER_SIZE = 8192;
    private static final Pattern HTTP_EQUIV_PATTERN = Pattern.compile("(?is)<meta\\s+http-equiv\\s*=\\s*['\\\"]\\s*Content-Type['\\\"]\\s+content\\s*=\\s*['\\\"]([^'\\\"]+)['\\\"]");
    private static final Schema HTML_SCHEMA = new HTMLSchema();
    private static final Scanner scanner = new HTMLScanner();
    private static final Set<String> URI_ATTRIBUTES = new HashSet<String>(Arrays.asList("src", "href", "longdesc", "cite"));
    private static final Pattern ICBM = Pattern.compile("\\s*(-?\\d+\\.\\d+)[,\\s]+(-?\\d+\\.\\d+)\\s*");

    public Set<MediaType> getSupportedTypes(ParseContext context) {
        return SUPPORTED_TYPES;
    }

    private String getEncoding(InputStream stream, Metadata metadata) throws IOException {
        String encoding;
        String charset;
        MediaType mt;
        String metaString;
        Matcher m;
        stream.mark(8192);
        char[] buffer = new char[8192];
        InputStreamReader isr = new InputStreamReader(stream, "us-ascii");
        int bufferSize = isr.read(buffer);
        stream.reset();
        if (bufferSize != -1 && (m = HTTP_EQUIV_PATTERN.matcher(metaString = new String(buffer, 0, bufferSize))).find()) {
            String[] attrs;
            for (String string : attrs = m.group(1).split(";")) {
                String charset2;
                String[] keyValue = string.trim().split("=");
                if (keyValue.length != 2 || !keyValue[0].equalsIgnoreCase("charset") || !CharsetUtils.isSupported((String)(charset2 = CharsetUtils.clean((String)keyValue[1])))) continue;
                metadata.set("Content-Encoding", charset2);
                return charset2;
            }
        }
        CharsetDetector detector = new CharsetDetector();
        String incomingCharset = metadata.get("Content-Encoding");
        String incomingType = metadata.get("Content-Type");
        if (incomingCharset == null && incomingType != null && (mt = MediaType.parse((String)incomingType)) != null && (charset = (String)mt.getParameters().get("charset")) != null && Charset.isSupported(charset)) {
            incomingCharset = charset;
        }
        if (incomingCharset != null) {
            detector.setDeclaredEncoding(incomingCharset);
        }
        detector.enableInputFilter(true);
        detector.setText(stream);
        for (String string : detector.detectAll()) {
            if (!Charset.isSupported(string.getName())) continue;
            metadata.set("Content-Encoding", string.getName());
            break;
        }
        if ((encoding = metadata.get("Content-Encoding")) == null) {
            encoding = Charset.isSupported(DEFAULT_CHARSET) ? DEFAULT_CHARSET : Charset.defaultCharset().name();
            metadata.set("Content-Encoding", encoding);
        }
        return encoding;
    }

    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
        if (!stream.markSupported()) {
            stream = new BufferedInputStream(stream);
        }
        stream = new CloseShieldInputStream(stream);
        InputSource source = new InputSource(stream);
        source.setEncoding(this.getEncoding(stream, metadata));
        HtmlMapper mapper = (HtmlMapper)context.get(HtmlMapper.class, (Object)new HtmlParserMapper());
        Parser parser = new Parser();
        parser.setProperty("http://www.ccil.org/~cowan/tagsoup/properties/schema", (Object)HTML_SCHEMA);
        parser.setFeature("http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons", true);
        parser.setContentHandler((ContentHandler)((Object)new XHTMLDowngradeHandler((ContentHandler)((Object)new HtmlHandler(mapper, handler, metadata)))));
        parser.setProperty("http://www.ccil.org/~cowan/tagsoup/properties/scanner", (Object)scanner);
        parser.parse(source);
    }

    protected String mapSafeElement(String name) {
        return DefaultHtmlMapper.INSTANCE.mapSafeElement(name);
    }

    protected boolean isDiscardElement(String name) {
        return DefaultHtmlMapper.INSTANCE.isDiscardElement(name);
    }

    public String mapSafeAttribute(String elementName, String attributeName) {
        return DefaultHtmlMapper.INSTANCE.mapSafeAttribute(elementName, attributeName);
    }

    class XHTMLDowngradeHandler
    extends ContentHandlerDecorator {
        public XHTMLDowngradeHandler(ContentHandler handler) {
            super(handler);
        }

        public void startElement(String uri, String localName, String name, Attributes atts) throws SAXException {
            String upper = localName.toUpperCase(Locale.ENGLISH);
            AttributesImpl attributes = new AttributesImpl();
            for (int i = 0; i < atts.getLength(); ++i) {
                String auri = atts.getURI(i);
                String local = atts.getLocalName(i);
                String qname = atts.getQName(i);
                if (!"".equals(auri) || local.equals("xmlns") || qname.startsWith("xmlns:")) continue;
                attributes.addAttribute(auri, local, qname, atts.getType(i), atts.getValue(i));
            }
            super.startElement("", upper, upper, (Attributes)attributes);
        }

        public void endElement(String uri, String localName, String name) throws SAXException {
            String upper = localName.toUpperCase(Locale.ENGLISH);
            super.endElement("", upper, upper);
        }

        public void startPrefixMapping(String prefix, String uri) {
        }

        public void endPrefixMapping(String prefix) {
        }
    }

    class HtmlHandler
    extends TextContentHandler {
        private final HtmlMapper mapper;
        private final XHTMLContentHandler xhtml;
        private final Metadata metadata;
        private int bodyLevel;
        private int discardLevel;
        private int titleLevel;
        private final StringBuilder title;

        public void setDocumentLocator(Locator locator) {
            this.xhtml.setDocumentLocator(locator);
        }

        private HtmlHandler(HtmlMapper mapper, XHTMLContentHandler xhtml, Metadata metadata) {
            String name;
            super((ContentHandler)xhtml);
            this.bodyLevel = 0;
            this.discardLevel = 0;
            this.titleLevel = 0;
            this.title = new StringBuilder();
            this.mapper = mapper;
            this.xhtml = xhtml;
            this.metadata = metadata;
            if (metadata.get("Content-Location") == null && (name = metadata.get("resourceName")) != null) {
                name = name.trim();
                try {
                    new URL(name);
                    metadata.set("Content-Location", name);
                }
                catch (MalformedURLException e) {
                    // empty catch block
                }
            }
        }

        public HtmlHandler(HtmlMapper mapper, ContentHandler handler, Metadata metadata) {
            this(mapper, new XHTMLContentHandler(handler, metadata), metadata);
        }

        public void startElement(String uri, String local, String name, Attributes atts) throws SAXException {
            String safe;
            if ("TITLE".equals(name) || this.titleLevel > 0) {
                ++this.titleLevel;
            }
            if ("BODY".equals(name) || "FRAMESET".equals(name) || this.bodyLevel > 0) {
                ++this.bodyLevel;
            }
            if (this.mapper.isDiscardElement(name) || this.discardLevel > 0) {
                ++this.discardLevel;
            }
            if (this.bodyLevel == 0 && this.discardLevel == 0) {
                if ("META".equals(name) && atts.getValue("content") != null) {
                    if (atts.getValue("http-equiv") != null) {
                        this.addHtmlMetadata(atts.getValue("http-equiv"), atts.getValue("content"));
                    } else if (atts.getValue("name") != null) {
                        this.addHtmlMetadata(atts.getValue("name"), atts.getValue("content"));
                    }
                } else if ("BASE".equals(name) && atts.getValue("href") != null) {
                    this.startElementWithSafeAttributes("base", atts);
                    this.xhtml.endElement("base");
                    this.metadata.set("Content-Location", this.resolve(atts.getValue("href")));
                } else if ("LINK".equals(name)) {
                    this.startElementWithSafeAttributes("link", atts);
                    this.xhtml.endElement("link");
                }
            }
            if (this.bodyLevel > 0 && this.discardLevel == 0 && (safe = this.mapper.mapSafeElement(name)) != null) {
                this.startElementWithSafeAttributes(safe, atts);
            }
            this.title.setLength(0);
        }

        private void addHtmlMetadata(String name, String value) {
            if (name != null && value != null) {
                if (name.equalsIgnoreCase("ICBM")) {
                    Matcher m = ICBM.matcher(value);
                    if (m.matches()) {
                        this.metadata.set("ICBM", m.group(1) + ", " + m.group(2));
                        this.metadata.set(Metadata.LATITUDE, m.group(1));
                        this.metadata.set(Metadata.LONGITUDE, m.group(2));
                    } else {
                        this.metadata.set("ICBM", value);
                    }
                } else if (name.equalsIgnoreCase("Content-Type")) {
                    MediaType type = MediaType.parse((String)value);
                    if (type != null) {
                        this.metadata.set("Content-Type", type.toString());
                    } else {
                        this.metadata.set("Content-Type", value);
                    }
                } else {
                    this.metadata.set(name, value);
                }
            }
        }

        private void startElementWithSafeAttributes(String name, Attributes atts) throws SAXException {
            if (atts.getLength() == 0) {
                this.xhtml.startElement(name);
                return;
            }
            boolean isObject = name.equals("object");
            String codebase = null;
            if (isObject) {
                codebase = atts.getValue("", "codebase");
                codebase = codebase != null ? this.resolve(codebase) : this.metadata.get("Content-Location");
            }
            AttributesImpl newAttributes = new AttributesImpl(atts);
            for (int att = 0; att < newAttributes.getLength(); ++att) {
                String attrName = newAttributes.getLocalName(att);
                String normAttrName = this.mapper.mapSafeAttribute(name, attrName);
                if (normAttrName == null) {
                    newAttributes.removeAttribute(att);
                    --att;
                    continue;
                }
                newAttributes.setLocalName(att, normAttrName);
                if (URI_ATTRIBUTES.contains(normAttrName)) {
                    newAttributes.setValue(att, this.resolve(newAttributes.getValue(att)));
                    continue;
                }
                if (isObject && "codebase".equals(normAttrName)) {
                    newAttributes.setValue(att, codebase);
                    continue;
                }
                if (!isObject || !"data".equals(normAttrName) && !"classid".equals(normAttrName)) continue;
                newAttributes.setValue(att, this.resolve(codebase, newAttributes.getValue(att)));
            }
            if ("img".equals(name) && newAttributes.getValue("", "alt") == null) {
                newAttributes.addAttribute("", "alt", "alt", "CDATA", "");
            }
            this.xhtml.startElement(name, newAttributes);
        }

        public void endElement(String uri, String local, String name) throws SAXException {
            if (this.bodyLevel > 0 && this.discardLevel == 0) {
                String safe = this.mapper.mapSafeElement(name);
                if (safe != null) {
                    this.xhtml.endElement(safe);
                } else if (XHTMLContentHandler.ENDLINE.contains(name.toLowerCase(Locale.ENGLISH))) {
                    this.xhtml.newline();
                }
            }
            if (this.titleLevel > 0) {
                --this.titleLevel;
                if (this.titleLevel == 0) {
                    this.metadata.set("title", this.title.toString().trim());
                }
            }
            if (this.bodyLevel > 0) {
                --this.bodyLevel;
                if (this.bodyLevel == 0) {
                    this.xhtml.endDocument();
                }
            }
            if (this.discardLevel > 0) {
                --this.discardLevel;
            }
        }

        public void characters(char[] ch, int start, int length) throws SAXException {
            if (this.titleLevel > 0 && this.bodyLevel == 0) {
                this.title.append(ch, start, length);
            }
            if (this.bodyLevel > 0 && this.discardLevel == 0) {
                super.characters(ch, start, length);
            }
        }

        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
            if (this.bodyLevel > 0 && this.discardLevel == 0) {
                super.ignorableWhitespace(ch, start, length);
            }
        }

        private String resolve(String url) {
            return this.resolve(this.metadata.get("Content-Location"), url);
        }

        private String resolve(String base, String url) {
            url = url.trim();
            String lower = url.toLowerCase(Locale.ENGLISH);
            if (base == null || lower.startsWith("urn:") || lower.startsWith("mailto:") || lower.startsWith("tel:") || lower.startsWith("data:") || lower.startsWith("javascript:") || lower.startsWith("about:")) {
                return url;
            }
            try {
                URL baseURL = new URL(base.trim());
                String path = baseURL.getPath();
                if (url.startsWith("?") && path.length() > 0 && !path.endsWith("/")) {
                    return new URL(baseURL.getProtocol(), baseURL.getHost(), baseURL.getPort(), baseURL.getPath() + url).toExternalForm();
                }
                return new URL(baseURL, url).toExternalForm();
            }
            catch (MalformedURLException e) {
                return url;
            }
        }
    }

    private class HtmlParserMapper
    implements HtmlMapper {
        private HtmlParserMapper() {
        }

        public String mapSafeElement(String name) {
            return HtmlParser.this.mapSafeElement(name);
        }

        public boolean isDiscardElement(String name) {
            return HtmlParser.this.isDiscardElement(name);
        }

        public String mapSafeAttribute(String elementName, String attributeName) {
            return HtmlParser.this.mapSafeAttribute(elementName, attributeName);
        }
    }
}

