[XML-DEV Mailing List Archive Home] [By Thread] [By Date] [Recent Entries] [Reply To This Message]

sax and entities

  • To: <xml-dev@l...>
  • Subject: sax and entities
  • From: "Scott Purcell" <spurcell@v...>
  • Date: Fri, 20 Jun 2003 07:52:31 -0500
  • Thread-index: AcM3KtAPaa5szJq8Qjq/obq7LgL2sw==
  • Thread-topic: sax and entities

reg xml
Hello,
I am using the org.xml.sax classes and extending the DefaultHandler. See code below.

I am parsing the XML (below class file) which has entity references in the file: eg: &reg; .
The xml has entity references to turn &reg to its equivalent. But instead of parsing, I get the following error:
exception: org.xml.sax.SAXParseException: invalid Character Entitiy

Has anyone had this issue with entities? Maybe I am handling this improperly?

Thanks,
Scott

// code
public xmlProductUploadParser(String filename, String db_id) throws Exception
{
System.out.println("FOO2");
this.filename = filename;

DefaultHandler handler = new xmlProductUploadParser(db_id);
// Use the validating parser
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setValidating(true);
System.out.println("FOO3");
try {
// Set up output stream
out = new OutputStreamWriter(System.out, "UTF8");

// Parse the input
SAXParser saxParser = factory.newSAXParser();
saxParser.parse( new File(filename), handler);

} catch (SAXParseException spe) {
// Error generated by the parser
System.out.println("\n** Parsing error" + ", line " + spe.getLineNumber()+ ", uri " + spe.getSystemId());
System.out.println(" " + spe.getMessage() );

// Use the contained exception, if any
Exception x = spe;
if (spe.getException() != null)
x = spe.getException();
x.printStackTrace();

} catch (SAXException sxe) {
// Error generated by this application
// (or a parser-initialization error)
Exception x = sxe;
if (sxe.getException() != null)
x = sxe.getException();
x.printStackTrace();

} catch (ParserConfigurationException pce) {
// Parser with specified options can't be built
pce.printStackTrace();

} catch (IOException ioe) {
// I/O error
ioe.printStackTrace();
}

}


// dtd (partial)
<?xml version='1.0' encoding='ISO-8859-1' standalone='no'?>
<!DOCTYPE XML_PRODUCT_GROUP [<!ELEMENT XML_PRODUCT_GROUP (LANGUAGE, TEXT_DATA*)>
<!ELEMENT LANGUAGE (ORIGINAL_LANGUAGE, TRANSLATION_LANGUAGE, VERSION_NO, ORIGINAL_LANGUAGE_ID, TRANSLATION_LANGUAGE_ID)>
<!ELEMENT ORIGINAL_LANGUAGE (#PCDATA)>
<!ELEMENT TRANSLATION_LANGUAGE (#PCDATA)>
<!ELEMENT VERSION_NO (#PCDATA)>
<!ELEMENT ORIGINAL_LANGUAGE_ID (#PCDATA)>
<!ELEMENT TRANSLATION_LANGUAGE_ID (#PCDATA)>
<!ELEMENT TEXT_DATA (PRODUCT_ID, TECH_SPEC_GRP_FOOTNOTE*, TECH_SPEC_GRP_INFO*, TECH_SPEC_INFO*, TECH_SPEC_FOOTNOTE*, TECH_SPEC_DATA_FOOTNOTE*, TECH_SPEC_DATA_INFO*, CAT_DATA*, DATA*, FOOTNOTE*, OPEN_META*)>
<!ELEMENT PRODUCT_ID (#PCDATA)>
<!ELEMENT TECH_SPEC_GRP_FOOTNOTE (TECH_SPEC_GRP_FNOTE_ID, TECH_SPEC_GRP_FNOTE)>
<!ELEMENT TECH_SPEC_GRP_FNOTE_ID (#PCDATA)>
<!ELEMENT TECH_SPEC_GRP_FNOTE (#PCDATA)>
<!ELEMENT TECH_SPEC_GRP_INFO (TECH_SPEC_GRP_ID, HEADING, LONG_DESCRIPTION)>
<!ELEMENT TECH_SPEC_GRP_ID (#PCDATA)>
<!ELEMENT HEADING (#PCDATA)>
<!ELEMENT LONG_DESCRIPTION (#PCDATA)>
<!ELEMENT TECH_SPEC_INFO (TECH_SPEC_ID, TECH_SPEC_NAME)>
<!ELEMENT TECH_SPEC_ID (#PCDATA)>
<!ELEMENT TECH_SPEC_NAME (#PCDATA)>
<!ELEMENT TECH_SPEC_FOOTNOTE (TECH_SPEC_FNOTEID, TECH_SPEC_FNOTE)>
<!ELEMENT TECH_SPEC_FNOTEID (#PCDATA)>
<!ELEMENT TECH_SPEC_FNOTE (#PCDATA)>
<!ELEMENT TECH_SPEC_DATA_FOOTNOTE (TECH_SPEC_DATA_FNOTE_ID, TECH_SPEC_DATA_FNOTE)>
<!ELEMENT TECH_SPEC_DATA_FNOTE_ID (#PCDATA)>
<!ELEMENT TECH_SPEC_DATA_FNOTE (#PCDATA)>
<!ELEMENT TECH_SPEC_DATA_INFO (TECH_SPEC_DATA_ID, TECH_SPEC_DATA_VALUETEXT)>
<!ELEMENT TECH_SPEC_DATA_ID (#PCDATA)>
<!ELEMENT TECH_SPEC_DATA_VALUETEXT (#PCDATA)>
<!ELEMENT CAT_DATA (CAT_ARG_ID, CAT_ARG_NAME, CAT_ARG_NOTE)>
<!ELEMENT CAT_ARG_ID (#PCDATA)>
<!ELEMENT CAT_ARG_NAME (#PCDATA)>
<!ELEMENT CAT_ARG_NOTE (#PCDATA)>
<!ELEMENT DATA (DATA_ID, ATTRIBUTE_ID, TEXT_VALUE, SORT_ORDER)>
<!ELEMENT DATA_ID (#PCDATA)>
<!ELEMENT ATTRIBUTE_ID (#PCDATA)>
<!ELEMENT TEXT_VALUE (#PCDATA)>
<!ELEMENT SORT_ORDER (#PCDATA)>
<!ELEMENT FOOTNOTE (FOOT_DATA_ID, FOOT_ATTRIBUTE_ID, FOOT_TEXT_VALUE, FOOT_SORT_ORDER)>
<!ELEMENT FOOT_DATA_ID (#PCDATA)>
<!ELEMENT FOOT_ATTRIBUTE_ID (#PCDATA)>
<!ELEMENT FOOT_TEXT_VALUE (#PCDATA)>
<!ELEMENT FOOT_SORT_ORDER (#PCDATA)>
<!ELEMENT OPEN_META (OPEN_META_ID, OPEN_META_VALUE)>
<!ELEMENT OPEN_META_ID (#PCDATA)>
<!ELEMENT OPEN_META_VALUE (#PCDATA)>
<!ENTITY nbsp " ">
<!-- no-break space -->
<!ENTITY iexcl "¡">
<!-- inverted exclamation mark -->
<!ENTITY cent "¢">
<!-- cent sign -->
<!ENTITY pound "£">
<!-- pound sterling sign -->
<!ENTITY curren "¤">
<!-- general currency sign -->
<!ENTITY yen "¥">
<!-- yen sign -->
<!ENTITY brvbar "¦">
<!-- broken (vertical) bar -->
<!ENTITY sect "§">
<!-- section sign -->
<!ENTITY uml "¨">
<!-- umlaut (dieresis) -->
<!ENTITY copy "©">
<!-- copyright sign -->
<!ENTITY ordf "ª">
<!-- ordinal indicator, feminine -->
<!ENTITY laquo "«">
<!-- angle quotation mark, left -->
<!ENTITY not "¬">
<!-- not sign -->
<!ENTITY shy "">
<!-- soft hyphen -->
<!ENTITY reg "®">
<!-- registered sign -->
<!ENTITY macr "¯">
<!-- macron -->
<!ENTITY deg "°">
<!-- degree sign -->
<!ENTITY plusmn "±">
<!-- plus-or-minus sign -->
<!ENTITY sup2 "²">
<!-- superscript two -->
<!ENTITY sup3 "³">
<!-- superscript three -->
<!ENTITY acute "´">
<!-- acute accent -->
<!ENTITY micro "µ">
<!-- micro sign -->
<!ENTITY para "¶">
<!-- pilcrow (paragraph sign) -->
<!ENTITY middot "·">
<!-- middle dot -->
<!ENTITY cedil "¸">
<!-- cedilla -->
<!ENTITY sup1 "¹">
<!-- superscript one -->
<!ENTITY ordm "º">
<!-- ordinal indicator, masculine -->
<!ENTITY raquo "»">
<!-- angle quotation mark, right -->
<!ENTITY frac14 "¼">
<!-- fraction one-quarter -->
<!ENTITY frac12 "½">
<!-- fraction one-half -->
<!ENTITY frac34 "¾">
<!-- fraction three-quarters -->
<!ENTITY iquest "¿">
<!-- inverted question mark -->
<!ENTITY Agrave "À">
<!-- capital A, grave accent -->
<!ENTITY Aacute "Á">
<!-- capital A, acute accent -->
<!ENTITY Acirc "Â">
<!-- capital A, circumflex accent -->
<!ENTITY Atilde "Ã">
<!-- capital A, tilde -->
<!ENTITY Auml "Ä">
<!-- capital A, dieresis or umlaut mark -->
<!ENTITY Aring "Å">
<!-- capital A, ring -->
<!ENTITY AElig "Æ">
<!-- capital AE diphthong (ligature) -->
<!ENTITY Ccedil "Ç">
<!-- capital C, cedilla -->
<!ENTITY Egrave "È">
<!-- capital E, grave accent -->
<!ENTITY Eacute "É">
<!-- capital E, acute accent -->
<!ENTITY Ecirc "Ê">
<!-- capital E, circumflex accent -->
<!ENTITY Euml "Ë">
<!-- capital E, dieresis or umlaut mark -->
<!ENTITY Igrave "Ì">
<!-- capital I, grave accent -->
<!ENTITY Iacute "Í">
<!-- capital I, acute accent -->
<!ENTITY Icirc "Î">
<!-- capital I, circumflex accent -->
<!ENTITY Iuml "Ï">
<!-- capital I, dieresis or umlaut mark -->
<!ENTITY ETH "Ð">
<!-- capital Eth, Icelandic -->
<!ENTITY Ntilde "Ñ">
<!-- capital N, tilde -->
<!ENTITY Ograve "Ò">
<!-- capital O, grave accent -->
<!ENTITY Oacute "Ó">
<!-- capital O, acute accent -->
<!ENTITY Ocirc "Ô">
<!-- capital O, circumflex accent -->
<!ENTITY Otilde "Õ">
<!-- capital O, tilde -->
<!ENTITY Ouml "Ö">
<!-- capital O, dieresis or umlaut mark -->
<!ENTITY times "×">
<!-- multiply sign -->
<!ENTITY Oslash "Ø">
<!-- capital O, slash -->
<!ENTITY Ugrave "Ù">
<!-- capital U, grave accent -->
<!ENTITY Uacute "Ú">
<!-- capital U, acute accent -->
<!ENTITY Ucirc "Û">
<!-- capital U, circumflex accent -->
<!ENTITY Uuml "Ü">
<!-- capital U, dieresis or umlaut mark -->
<!ENTITY Yacute "Ý">
<!-- capital Y, acute accent -->
<!ENTITY THORN "Þ">
<!-- capital THORN, Icelandic -->
<!ENTITY szlig "ß">
<!-- small sharp s, German (sz ligature) -->
<!ENTITY agrave "à">
<!-- small a, grave accent -->
<!ENTITY aacute "á">
<!-- small a, acute accent -->
<!ENTITY acirc "â">
<!-- small a, circumflex accent -->
<!ENTITY atilde "ã">
<!-- small a, tilde -->
<!ENTITY auml "ä">
<!-- small a, dieresis or umlaut mark -->
<!ENTITY aring "å">
<!-- small a, ring -->
<!ENTITY aelig "æ">
<!-- small ae diphthong (ligature) -->
<!ENTITY ccedil "ç">
<!-- small c, cedilla -->
<!ENTITY egrave "è">
<!-- small e, grave accent -->
<!ENTITY eacute "é">
<!-- small e, acute accent -->
<!ENTITY ecirc "ê">
<!-- small e, circumflex accent -->
<!ENTITY euml "ë">
<!-- small e, dieresis or umlaut mark -->
<!ENTITY igrave "ì">
<!-- small i, grave accent -->
<!ENTITY iacute "í">
<!-- small i, acute accent -->
<!ENTITY icirc "î">
<!-- small i, circumflex accent -->
<!ENTITY iuml "ï">
<!-- small i, dieresis or umlaut mark -->
<!ENTITY eth "ð">
<!-- small eth, Icelandic -->
<!ENTITY ntilde "ñ">
<!-- small n, tilde -->
<!ENTITY ograve "ò">
<!-- small o, grave accent -->
<!ENTITY oacute "ó">
<!-- small o, acute accent -->
<!ENTITY ocirc "ô">
<!-- small o, circumflex accent -->
<!ENTITY otilde "õ">
<!-- small o, tilde -->
<!ENTITY ouml "ö">
<!-- small o, dieresis or umlaut mark -->
<!ENTITY divide "÷">
<!-- divide sign -->
<!ENTITY oslash "ø">
<!-- small o, slash -->
<!ENTITY ugrave "ù">
<!-- small u, grave accent -->
<!ENTITY uacute "ú">
<!-- small u, acute accent -->
<!ENTITY ucirc "û">
<!-- small u, circumflex accent -->
<!ENTITY uuml "ü">
<!-- small u, dieresis or umlaut mark -->
<!ENTITY yacute "ý">
<!-- small y, acute accent -->
<!ENTITY thorn "þ">
<!-- small thorn, Icelandic -->
<!ENTITY yuml "ÿ">
<!-- small y, dieresis or umlaut mark -->
<!ENTITY bull "·">
<!-- bullet -->
<!ENTITY trade "(tm)">
<!--trade mark-->
<!ENTITY ndash "-">
<!--En dash -->
<!ENTITY ldquo """>
<!--EM left dbl quote than -->
<!ENTITY rdquo """>
<!--EM right dbl quote -->
<!ENTITY lsquo "'">
<!--EM left sing quote -->
<!ENTITY rsquo "'">
<!--EM right sing quote -->
<!ENTITY hellp "...">
<!--EM elipse -->
<!ENTITY mdash "-">
<!--EM dash -->
]>
<XML_PRODUCT_GROUP>
<LANGUAGE>
<ORIGINAL_LANGUAGE>English</ORIGINAL_LANGUAGE>
<TRANSLATION_LANGUAGE>Portugese</TRANSLATION_LANGUAGE>
<VERSION_NO>5</VERSION_NO>
<ORIGINAL_LANGUAGE_ID>eng</ORIGINAL_LANGUAGE_ID>
<TRANSLATION_LANGUAGE_ID>por</TRANSLATION_LANGUAGE_ID>
</LANGUAGE>






PURCHASE STYLUS STUDIO ONLINE TODAY!

Purchasing Stylus Studio from our online shop is Easy, Secure and Value Priced!

Buy Stylus Studio Now

Download The World's Best XML IDE!

Accelerate XML development with our award-winning XML IDE - Download a free trial today!

Don't miss another message! Subscribe to this list today.
Email
First Name
Last Name
Company
Subscribe in XML format
RSS 2.0
Atom 0.3
 

Stylus Studio has published XML-DEV in RSS and ATOM formats, enabling users to easily subcribe to the list from their preferred news reader application.


Stylus Studio Sponsored Links are added links designed to provide related and additional information to the visitors of this website. they were not included by the author in the initial post. To view the content without the Sponsor Links please click here.

Site Map | Privacy Policy | Terms of Use | Trademarks
Free Stylus Studio XML Training:
W3C Member
Stylus Studio® and DataDirect XQuery ™are products from DataDirect Technologies, is a registered trademark of Progress Software Corporation, in the U.S. and other countries. © 2004-2013 All Rights Reserved.