%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /proc/309157/root/home/waritko/yacy/source/net/yacy/document/parser/images/
Upload File :
Create Path :
Current File : //proc/309157/root/home/waritko/yacy/source/net/yacy/document/parser/images/svgParser.java

/**
 * svgParser.java 
 * Copyright 2015 by Burkhard Buelte
 * First released 26.09.2015 at http://yacy.net
 *
 * This library is free software; you can redistribute it and/or modify it under
 * the terms of the GNU Lesser General Public License as published by the Free
 * Software Foundation; either version 2.1 of the License, or (at your option)
 * any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
 * details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program in the file lgpl21.txt If not, see
 * <http://www.gnu.org/licenses/>.
 */
package net.yacy.document.parser.images;

import java.io.EOFException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.LinkedHashMap;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.NumberTools;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Parser;
import net.yacy.document.VocabularyScraper;
import net.yacy.document.parser.html.ImageEntry;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/**
 * Metadata parser for svg image files (which are xml files) SVG 1.1 (Second Edition)
 * http://www.w3.org/TR/SVG/metadata.html#MetadataElement according to SVG 1.1
 * parser stops parsing after the first metadata elment has been read and
 * document level metadata are expected picture data (as proposed in spec) like
 * <svg>
 * <title></title>
 * <desc></desc>
 * <metadata></metadata>
 * <... other/>
 * </svg>
 */
public class svgParser extends AbstractParser implements Parser {

    public svgParser() {
        super("SVG Image Parser");
        this.SUPPORTED_EXTENSIONS.add("svg");
        this.SUPPORTED_MIME_TYPES.add("image/svg+xml");
    }

    private static final ThreadLocal<SAXParser> tlSax = new ThreadLocal<SAXParser>();

    private static SAXParser getParser() throws SAXException {
        SAXParser parser = tlSax.get();
        if (parser == null) {
            try {
                parser = SAXParserFactory.newInstance().newSAXParser();
            } catch (final ParserConfigurationException e) {
                throw new SAXException(e.getMessage(), e);
            }
            tlSax.set(parser);
        }
        return parser;
    }

    @Override
    public Document[] parse(
            final DigestURL location,
            final String mimeType,
            final String charset,
            final VocabularyScraper scraper,
            final int timezoneOffset,
            final InputStream source) throws Parser.Failure, InterruptedException {

        try {
            final SAXParser saxParser = getParser();
            final svgMetaDataHandler metaData = new svgMetaDataHandler();
            try {
                saxParser.parse(source, metaData);
            } catch (SAXException e) {
                // catch EOFException which is intentionally thrown after capturing metadata to skip further reading (not a error, just a way to get out of SAX)
                if (e.getException() == null || !(e.getException() instanceof EOFException)) {
                    throw new Parser.Failure("Unexpected error while parsing svg file. " + e.getMessage(), location);
                }
            }

            String docTitle = metaData.getTitle();
            if (docTitle == null) { // use filename like in genericParser
                docTitle = location.getFileName().isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(location.getFileName()); //
            }
            String docDescription = metaData.getDescription();
            if (docDescription == null) { // use url token as in genericParser
                docDescription = location.toTokens();
            }

            LinkedHashMap<DigestURL, ImageEntry> images = null;
            // add this image to the map of images to register size (as in genericImageParser)
            if (metaData.getHeight() != null && metaData.getWidth() != null) {
                images = new LinkedHashMap<DigestURL, ImageEntry>();
                images.put(location, new ImageEntry(location, "", metaData.getWidth(), metaData.getHeight(), -1));
            }

            // create the parser document
            Document[] docs = new Document[]{new Document(
                location,
                mimeType,
                StandardCharsets.UTF_8.name(),
                this,
                null,
                null,
                AbstractParser.singleList(docTitle),
                null,
                "",
                null,
                null,
                0.0d, 0.0d,
                docDescription, // text - for this image description is best text we have
                null,
                null,
                images,
                false,
                null)};
            return docs;
        } catch (final Exception e) {
            if (e instanceof InterruptedException) {
                throw (InterruptedException) e;
            }
            if (e instanceof Parser.Failure) {
                throw (Parser.Failure) e;
            }

            ConcurrentLog.logException(e);
            throw new Parser.Failure("Unexpected error while parsing svg file. " + e.getMessage(), location);
        }
    }

    /**
     * SAX handler for svg metadata
     */
    public class svgMetaDataHandler extends DefaultHandler {

        private final StringBuilder buffer = new StringBuilder();
        private boolean scrapeMetaData = false; // true if within metadata tag
        private boolean svgStartTagFound = false; // switch to recognize start tag processing, to cancel parsing on wrong tag

        private String docTitle = null; // document level title
        private String docDescription = null; // document level description
        private String imgWidth = null; // size in pixel
        private String imgHeight = null;

        public svgMetaDataHandler() {
        }

        @Override
        public void characters(final char ch[], final int start, final int length) {
            buffer.append(ch, start, length);
        }

        @Override
        public void startElement(final String uri, final String name, final String tag, final Attributes atts) throws SAXException {
            if (scrapeMetaData) {
                // not implemented yet TODO: interprete RDF content
                // may contain RDF + DC, DC, CC ...
            } else {
                if (tag != null) {
                    switch (tag) {
                        case "svg":
                            svgStartTagFound = true;
                            imgHeight = atts.getValue("height");
                            imgWidth = atts.getValue("width");
                            break;
                        case "metadata":
                            scrapeMetaData = true;
                            break;
                        // some common graph elements as stop condition (skip reading remainder of input), metadata is expected before graphic content
                        case "g":
                        case "line":
                        case "path":
                        case "rect":
                            throw new SAXException("EOF svg Metadata", new EOFException());
                        default : { // K.O. criteria, start tag is not svg, fail parser on none svg
                            if (!svgStartTagFound) {
                                throw new SAXException("not a svg file, start tag "+tag, new Failure());
                            }
                        }
                    }
                }
            }
            buffer.delete(0, buffer.length());
        }

        @Override
        public void endElement(final String uri, final String name, final String tag) throws SAXException {
            if (scrapeMetaData) {
                // stop condition, scrape only first metadata element
                if ("metadata".equals(tag)) {
                    scrapeMetaData = false;
                    buffer.delete(0, buffer.length());
                    // we have read metadate, other data are not of interest here, end parsing
                    throw new SAXException("EOF svg Metadata", new EOFException());
                }
            } else if ("title".equals(tag)) {
                this.docTitle = buffer.toString();
            } else if ("desc".equals(tag)) {
                this.docDescription = buffer.toString();
            }
            buffer.delete(0, buffer.length());
        }

        /**
         * @return document level title or null
         */
        public String getTitle() {
            return docTitle;
        }

        /**
         * @return document level description or null
         */
        public String getDescription() {
            return docDescription;
        }

        /**
         * @return image width in pixel or null
         */
        public Integer getWidth() {
            if (imgWidth != null) {
                // return number if given in pixel or a number only, return nothing for size like "100%"
                if ((imgWidth.indexOf("px") > 0) || ((imgWidth.charAt(imgWidth.length() - 1) >= '0' && imgWidth.charAt(imgWidth.length() - 1) <= '9'))) {
                    return NumberTools.parseIntDecSubstring(imgWidth);
                }
            }
            return null;
        }

        /**
         * @return image height in pixel or null
         */
        public Integer getHeight() {
            if (imgHeight != null) {
                // return number if given in pixel or a number only, return nothing for size like "100%"
                if ((imgHeight.indexOf("px") > 0) || ((imgHeight.charAt(imgHeight.length() - 1) >= '0' && imgHeight.charAt(imgHeight.length() - 1) <= '9'))) {
                    return NumberTools.parseIntDecSubstring(imgHeight);
                }
            }
            return null;
        }
    }
}

Zerion Mini Shell 1.0