Index: doc/dom.xml ================================================================== --- doc/dom.xml +++ doc/dom.xml @@ -188,10 +188,28 @@ "notstandalone" means, that all external entities will be resolved and parsed, with the execption of documents, which explicitly states standalone="yes" in their XML declaration. + + + -ignorexmlns + It is recommended, that you not use this + option. If this option is given, no node within the + created DOM tree will be internally marked as placed + into an XML Namespace, even if there is a default + namespace in scope for un-prefixed elements or even if + the element has a defined namespace prefix. One + consequence is of this is, that XPath node expressions + on such a DOM tree doesn't work as expected. Prefixed + element nodes can't be selected and element nodes + without prefix will be seen by XPath expressions as if + they haven't any namespace (no matter if they in fact + in a default namespace). + + +

Index: generic/dom.c ================================================================== --- generic/dom.c +++ generic/dom.c @@ -63,11 +63,10 @@ #ifdef DEBUG # define DBG(x) x #else # define DBG(x) #endif -#define TDOM_NS #define MutationEvent() #define MutationEvent2(type,node) #define MutationEvent3(type,node,relatioNode) @@ -154,10 +153,11 @@ int depth; int ignoreWhiteSpaces; Tcl_DString *cdata; TEncoding *encoding_8bit; int storeLineColumn; + int ignorexmlns; int feedbackAfter; Tcl_Obj *feedbackCmd; XML_Index nextFeedbackPosition; Tcl_Interp *interp; int activeNSsize; @@ -1192,104 +1192,107 @@ lastAttr = NULL; /*-------------------------------------------------------------- | process namespace declarations | \-------------------------------------------------------------*/ -#ifdef TDOM_NS - for (atPtr = atts; atPtr[0] && atPtr[1]; atPtr += 2) { - - if (strncmp(atPtr[0], "xmlns", 5) == 0) { - xmlns = atPtr[0]; - newNS = 1; - if (xmlns[5] == ':') { - if (domIsNamespaceInScope (info->activeNS, info->activeNSpos, - &(xmlns[6]), atPtr[1])) { - ns = domLookupPrefix (info->currentNode, &(xmlns[6])); - newNS = 0; - } - else { - ns = domNewNamespace(info->document, &xmlns[6], atPtr[1]); - } - } else { - ns = domNewNamespace(info->document, "", atPtr[1]); - } - if (newNS) { - /* push active namespace */ - info->activeNSpos++; - if (info->activeNSpos >= info->activeNSsize) { - info->activeNS = (domActiveNS*) REALLOC( - (char*)info->activeNS, - sizeof(domActiveNS) * 2 * info->activeNSsize); - info->activeNSsize = 2 * info->activeNSsize; - } - info->activeNS[info->activeNSpos].depth = info->depth; - info->activeNS[info->activeNSpos].namespace = ns; - } - - h = Tcl_CreateHashEntry(&HASHTAB(info->document, tdom_attrNames), - atPtr[0], &hnew); - attrnode = (domAttrNode*) domAlloc(sizeof(domAttrNode)); - memset(attrnode, 0, sizeof(domAttrNode)); - attrnode->nodeType = ATTRIBUTE_NODE; - attrnode->nodeFlags = IS_NS_NODE; - attrnode->namespace = ns->index; - attrnode->nodeName = (char *)&(h->key); - attrnode->parentNode = node; - len = strlen(atPtr[1]); - if (TclOnly8Bits && info->encoding_8bit) { - tdom_Utf8to8Bit(info->encoding_8bit, atPtr[1], &len); - } - attrnode->valueLength = len; - attrnode->nodeValue = (char*)MALLOC(len+1); - strcpy(attrnode->nodeValue, atPtr[1]); - if (node->firstAttr) { - lastAttr->nextSibling = attrnode; - } else { - node->firstAttr = attrnode; - } - lastAttr = attrnode; - } - - } - - /*---------------------------------------------------------- - | look for namespace of element - \---------------------------------------------------------*/ - domSplitQName (name, tagPrefix, &localname); - for (pos = info->activeNSpos; pos >= 0; pos--) { - if ( ((tagPrefix[0] == '\0') && (info->activeNS[pos].namespace->prefix[0] == '\0')) - || ((tagPrefix[0] != '\0') && (info->activeNS[pos].namespace->prefix[0] != '\0') - && (strcmp(tagPrefix, info->activeNS[pos].namespace->prefix) == 0)) - ) { - if (info->activeNS[pos].namespace->prefix[0] == '\0' - && info->activeNS[pos].namespace->uri[0] == '\0' - && tagPrefix[0] == '\0') { - /* xml-names rec. 5.2: "The default namespace can be - set to the empty string. This has the same effect, - within the scope of the declaration, of there being - no default namespace." */ - goto elemNSfound; - } - node->namespace = info->activeNS[pos].namespace->index; - DBG(fprintf(stderr, "tag='%s' uri='%s' \n", - node->nodeName, - info->activeNS[pos].namespace->uri); - ) - goto elemNSfound; - } - } - if (tagPrefix[0] != '\0') { - if (strcmp (tagPrefix, "xml")==0) { - node->namespace = info->document->rootNode->firstAttr->namespace; - } else { - /* Since where here, this means, the element has a - up to now not declared namespace prefix. We probably - should return this as an error, shouldn't we?*/ - } - } - elemNSfound: -#endif + if (!info->ignorexmlns) { + for (atPtr = atts; atPtr[0] && atPtr[1]; atPtr += 2) { + + if (strncmp(atPtr[0], "xmlns", 5) == 0) { + xmlns = atPtr[0]; + newNS = 1; + if (xmlns[5] == ':') { + if (domIsNamespaceInScope (info->activeNS, info->activeNSpos, + &(xmlns[6]), atPtr[1])) { + ns = domLookupPrefix (info->currentNode, &(xmlns[6])); + newNS = 0; + } + else { + ns = domNewNamespace(info->document, &xmlns[6], atPtr[1]); + } + } else { + ns = domNewNamespace(info->document, "", atPtr[1]); + } + if (newNS) { + /* push active namespace */ + info->activeNSpos++; + if (info->activeNSpos >= info->activeNSsize) { + info->activeNS = (domActiveNS*) REALLOC( + (char*)info->activeNS, + sizeof(domActiveNS) * 2 * info->activeNSsize); + info->activeNSsize = 2 * info->activeNSsize; + } + info->activeNS[info->activeNSpos].depth = info->depth; + info->activeNS[info->activeNSpos].namespace = ns; + } + + h = Tcl_CreateHashEntry(&HASHTAB(info->document, tdom_attrNames), + atPtr[0], &hnew); + attrnode = (domAttrNode*) domAlloc(sizeof(domAttrNode)); + memset(attrnode, 0, sizeof(domAttrNode)); + attrnode->nodeType = ATTRIBUTE_NODE; + attrnode->nodeFlags = IS_NS_NODE; + attrnode->namespace = ns->index; + attrnode->nodeName = (char *)&(h->key); + attrnode->parentNode = node; + len = strlen(atPtr[1]); + if (TclOnly8Bits && info->encoding_8bit) { + tdom_Utf8to8Bit(info->encoding_8bit, atPtr[1], &len); + } + attrnode->valueLength = len; + attrnode->nodeValue = (char*)MALLOC(len+1); + strcpy(attrnode->nodeValue, atPtr[1]); + if (node->firstAttr) { + lastAttr->nextSibling = attrnode; + } else { + node->firstAttr = attrnode; + } + lastAttr = attrnode; + } + + } + + /*---------------------------------------------------------- + | look for namespace of element + \---------------------------------------------------------*/ + domSplitQName (name, tagPrefix, &localname); + for (pos = info->activeNSpos; pos >= 0; pos--) { + if ( ((tagPrefix[0] == '\0') + && (info->activeNS[pos].namespace->prefix[0] == '\0')) + || ((tagPrefix[0] != '\0') + && (info->activeNS[pos].namespace->prefix[0] != '\0') + && (strcmp(tagPrefix, + info->activeNS[pos].namespace->prefix) == 0)) + ) { + if (info->activeNS[pos].namespace->prefix[0] == '\0' + && info->activeNS[pos].namespace->uri[0] == '\0' + && tagPrefix[0] == '\0') { + /* xml-names rec. 5.2: "The default namespace can be + set to the empty string. This has the same effect, + within the scope of the declaration, of there being + no default namespace." */ + goto elemNSfound; + } + node->namespace = info->activeNS[pos].namespace->index; + DBG(fprintf(stderr, "tag='%s' uri='%s' \n", + node->nodeName, + info->activeNS[pos].namespace->uri); + ) + goto elemNSfound; + } + } + if (tagPrefix[0] != '\0') { + if (strcmp (tagPrefix, "xml")==0) { + node->namespace = info->document->rootNode->firstAttr->namespace; + } else { + /* Since where here, this means, the element has a + up to now not declared namespace prefix. We probably + should return this as an error, shouldn't we?*/ + } + } + } +elemNSfound: /*-------------------------------------------------------------- | add the attribute nodes | \-------------------------------------------------------------*/ @@ -1316,16 +1319,15 @@ idAttPtr = NULL; } /* lastAttr already set right, either to NULL above, or to the last NS attribute */ for (atPtr = atts; atPtr[0] && atPtr[1]; atPtr += 2) { - -#ifdef TDOM_NS - if (strncmp(atPtr[0], "xmlns", 5) == 0) { - continue; + if (!info->ignorexmlns) { + if (strncmp(atPtr[0], "xmlns", 5) == 0) { + continue; + } } -#endif h = Tcl_CreateHashEntry(&HASHTAB(info->document, tdom_attrNames), atPtr[0], &hnew); attrnode = (domAttrNode*) domAlloc(sizeof(domAttrNode)); memset(attrnode, 0, sizeof(domAttrNode)); attrnode->nodeType = ATTRIBUTE_NODE; @@ -1350,41 +1352,43 @@ } else { node->firstAttr = attrnode; } lastAttr = attrnode; -#ifdef TDOM_NS - /*---------------------------------------------------------- - | look for attribute namespace - \---------------------------------------------------------*/ - domSplitQName (attrnode->nodeName, prefix, &localname); - if (prefix[0] != '\0') { - for (pos = info->activeNSpos; pos >= 0; pos--) { - if ( ((prefix[0] == '\0') && (info->activeNS[pos].namespace->prefix[0] == '\0')) - || ((prefix[0] != '\0') && (info->activeNS[pos].namespace->prefix[0] != '\0') - && (strcmp(prefix, info->activeNS[pos].namespace->prefix) == 0)) - ) { - attrnode->namespace = info->activeNS[pos].namespace->index; - DBG(fprintf(stderr, "attr='%s' uri='%s' \n", - attrnode->nodeName, - info->activeNS[pos].namespace->uri); - ) - goto attrNSfound; - } - } - if (strcmp (prefix, "xml")==0) { - attrnode->namespace = - info->document->rootNode->firstAttr->namespace; - } else { - /* Since where here, this means, the attribute has a - up to now not declared namespace prefix. We probably - should return this as an error, shouldn't we?*/ - } - attrNSfound: - ; - } -#endif + if (!info->ignorexmlns) { + /*---------------------------------------------------------- + | look for attribute namespace + \---------------------------------------------------------*/ + domSplitQName (attrnode->nodeName, prefix, &localname); + if (prefix[0] != '\0') { + for (pos = info->activeNSpos; pos >= 0; pos--) { + if ( ((prefix[0] == '\0') + && (info->activeNS[pos].namespace->prefix[0] == '\0')) + || ((prefix[0] != '\0') + && (info->activeNS[pos].namespace->prefix[0] != '\0') + && (strcmp(prefix, info->activeNS[pos].namespace->prefix) == 0)) + ) { + attrnode->namespace = info->activeNS[pos].namespace->index; + DBG(fprintf(stderr, "attr='%s' uri='%s' \n", + attrnode->nodeName, + info->activeNS[pos].namespace->uri); + ) + goto attrNSfound; + } + } + if (strcmp (prefix, "xml")==0) { + attrnode->namespace = + info->document->rootNode->firstAttr->namespace; + } else { + /* Since where here, this means, the attribute has a + up to now not declared namespace prefix. We probably + should return this as an error, shouldn't we?*/ + } + attrNSfound: + ; + } + } } info->depth++; } @@ -1401,18 +1405,18 @@ domReadInfo *info = userData; DispatchPCDATA (info); info->depth--; -#ifdef TDOM_NS - /* pop active namespaces */ - while ( (info->activeNSpos >= 0) && - (info->activeNS[info->activeNSpos].depth == info->depth) ) - { - info->activeNSpos--; - } -#endif + if (!info->ignorexmlns) { + /* pop active namespaces */ + while ( (info->activeNSpos >= 0) && + (info->activeNS[info->activeNSpos].depth == info->depth) ) + { + info->activeNSpos--; + } + } if (info->depth != -1) { info->currentNode = info->currentNode->parentNode; } else { info->currentNode = NULL; @@ -2076,10 +2080,11 @@ char *xml, int length, int ignoreWhiteSpaces, TEncoding *encoding_8bit, int storeLineColumn, + int ignorexmlns, int feedbackAfter, Tcl_Obj *feedbackCmd, Tcl_Channel channel, const char *baseurl, Tcl_Obj *extResolver, @@ -2103,10 +2108,13 @@ domDocument *doc = domCreateDoc(baseurl, storeLineColumn); if (extResolver) { doc->extResolver = tdomstrdup (Tcl_GetString (extResolver)); } + if (ignorexmlns) { + doc->nodeFlags |= IGNORE_XMLNS; + } info.parser = parser; info.document = doc; info.currentNode = NULL; info.depth = 0; @@ -2113,10 +2121,11 @@ info.ignoreWhiteSpaces = ignoreWhiteSpaces; info.cdata = (Tcl_DString*) MALLOC (sizeof (Tcl_DString)); Tcl_DStringInit (info.cdata); info.encoding_8bit = encoding_8bit; info.storeLineColumn = storeLineColumn; + info.ignorexmlns = ignorexmlns; info.feedbackAfter = feedbackAfter; info.feedbackCmd = feedbackCmd; info.nextFeedbackPosition = feedbackAfter; info.interp = interp; info.activeNSpos = -1; @@ -2342,11 +2351,10 @@ } else { return -1; } } -#ifdef TDOM_NS domAttrNode * domCreateXMLNamespaceNode ( domNode *parent ) { @@ -2367,11 +2375,10 @@ attr->parentNode = parent; attr->valueLength = strlen (XML_NAMESPACE); attr->nodeValue = tdomstrdup (XML_NAMESPACE); return attr; } -#endif /* TDOM_NS */ /* *---------------------------------------------------------------------- * @@ -2392,11 +2399,11 @@ domDocument * domCreateDoc ( const char * baseURI, int storeLineColumn - ) +) { Tcl_HashEntry *h; int hnew; domNode *rootNode; domDocument *doc; @@ -2440,13 +2447,11 @@ rootNode->nodeName = (char *)&(h->key); rootNode->nodeNumber = NODE_NO(doc); rootNode->ownerDocument = doc; rootNode->parentNode = NULL; rootNode->firstChild = rootNode->lastChild = NULL; -#ifdef TDOM_NS rootNode->firstAttr = domCreateXMLNamespaceNode (rootNode); -#endif if (storeLineColumn) { lc = (domLineColumn*) ( ((char*)rootNode) + sizeof(domNode)); rootNode->nodeFlags |= HAS_LINE_COLUMN; lc->line = 0; lc->column = 0; @@ -5160,10 +5165,11 @@ int depth; int ignoreWhiteSpaces; Tcl_DString *cdata; TEncoding *encoding_8bit; int storeLineColumn; + int ignorexmlns; int feedbackAfter; Tcl_Obj *feedbackCmd; int nextFeedbackPosition; Tcl_Interp *interp; int activeNSsize; @@ -5237,10 +5243,11 @@ info->document = NULL; info->currentNode = NULL; info->depth = 0; info->feedbackAfter = 0; + info->ignorexmlns = 0; Tcl_DStringSetLength (info->cdata, 0); info->nextFeedbackPosition = info->feedbackAfter; info->interp = interp; info->activeNSpos = -1; info->insideDTD = 0; @@ -5300,18 +5307,18 @@ static CONST84 char *tdomMethods[] = { "enable", "getdoc", "setResultEncoding", "setStoreLineColumn", "setExternalEntityResolver", "keepEmpties", - "remove", + "remove", "ignorexmlns", NULL }; enum tdomMethod { m_enable, m_getdoc, m_setResultEncoding, m_setStoreLineColumn, m_setExternalEntityResolver, m_keepEmpties, - m_remove + m_remove, m_ignorexmlns }; if (objc < 3 || objc > 4) { Tcl_WrongNumArgs (interp, 1, objv, tdom_usage); return TCL_ERROR; @@ -5367,10 +5374,11 @@ info->ignoreWhiteSpaces = 1; info->cdata = (Tcl_DString*) MALLOC (sizeof (Tcl_DString)); Tcl_DStringInit (info->cdata); info->encoding_8bit = 0; info->storeLineColumn = 0; + info->ignorexmlns = 0; info->feedbackAfter = 0; info->feedbackCmd = NULL; info->nextFeedbackPosition = 0; info->interp = interp; info->activeNSpos = -1; @@ -5502,9 +5510,25 @@ Tcl_GetBooleanFromObj (interp, objv[3], &bool); info->ignoreWhiteSpaces = !bool; handlerSet->ignoreWhiteCDATAs = !bool; info->tdomStatus = 1; break; + + case m_ignorexmlns: + info = CHandlerSetGetUserData (interp, objv[1], "tdom"); + if (!info) { + Tcl_SetResult (interp, "parser object isn't tdom enabled.", NULL); + return TCL_ERROR; + } + Tcl_SetIntObj (Tcl_GetObjResult (interp), info->ignorexmlns); + if (objc == 4) { + Tcl_GetBooleanFromObj (interp, objv[3], &bool); + info->storeLineColumn = bool; + } + info->tdomStatus = 1; + break; + + } return TCL_OK; } Index: generic/dom.h ================================================================== --- generic/dom.h +++ generic/dom.h @@ -437,10 +437,11 @@ typedef unsigned int domDocFlags; #define OUTPUT_DEFAULT_INDENT 1 #define NEEDS_RENUMBERING 2 #define DONT_FREE 4 +#define IGNORE_XMLNS 8 /*-------------------------------------------------------------------------- | a index to the namespace records | \-------------------------------------------------------------------------*/ @@ -735,10 +736,11 @@ char *xml, int length, int ignoreWhiteSpaces, TEncoding *encoding_8bit, int storeLineColumn, + int ignoreXMLNS, int feedbackAfter, Tcl_Obj *feedbackCmd, Tcl_Channel channel, const char *baseurl, Tcl_Obj *extResolver, Index: generic/domxslt.c ================================================================== --- generic/domxslt.c +++ generic/domxslt.c @@ -5981,11 +5981,11 @@ extResolver = Tcl_NewStringObj(xsltDoc->extResolver, -1); Tcl_IncrRefCount (extResolver); } /* keep white space, no fiddling with the encoding (is this a good idea?) */ - doc = domReadDocument (parser, xmlstring, len, 0, 0, storeLineColumn, 0, + doc = domReadDocument (parser, xmlstring, len, 0, 0, storeLineColumn, 0, 0, NULL, chan, extbase, extResolver, 0, (int) XML_PARAM_ENTITY_PARSING_ALWAYS, interp, &resultcode); if (xsltDoc->extResolver) { Tcl_DecrRefCount (extResolver); Index: generic/tcldom.c ================================================================== --- generic/tcldom.c +++ generic/tcldom.c @@ -1149,10 +1149,11 @@ { char *xml_string; Tcl_Obj *extResolver = NULL; int xml_string_len; int resultcode = 0; + int ignorexmlns = 0; domDocument *doc; domNode *nodeToAppend; XML_Parser parser; GetTcldomTSD() @@ -1167,17 +1168,21 @@ if (node->ownerDocument->extResolver) { extResolver = Tcl_NewStringObj(node->ownerDocument->extResolver, -1); Tcl_IncrRefCount (extResolver); } + if (node->ownerDocument->nodeFlags & IGNORE_XMLNS) { + ignorexmlns = 1; + } doc = domReadDocument(parser, xml_string, xml_string_len, 1, TSD(Encoding_to_8bit), TSD(storeLineColumn), + ignorexmlns, 0, NULL, NULL, NULL, extResolver, @@ -5491,10 +5496,11 @@ int optionIndex, value, xml_string_len, mode; int ignoreWhiteSpaces = 1; int takeSimpleParser = 0; int takeHTMLParser = 0; int setVariable = 0; + int ignorexmlns = 0; int feedbackAfter = 0; int useForeignDTD = 0; int paramEntityParsing = (int)XML_PARAM_ENTITY_PARSING_ALWAYS; int status = 0; domDocument *doc; @@ -5505,18 +5511,18 @@ static CONST84 char *parseOptions[] = { "-keepEmpties", "-simple", "-html", "-feedbackAfter", "-channel", "-baseurl", "-externalentitycommand", "-useForeignDTD", "-paramentityparsing", - "-feedbackcmd", + "-feedbackcmd", "-ignorexmlns", NULL }; enum parseOption { o_keepEmpties, o_simple, o_html, o_feedbackAfter, o_channel, o_baseurl, o_externalentitycommand, o_useForeignDTD, o_paramentityparsing, - o_feedbackcmd + o_feedbackcmd, o_ignorexmlns }; static CONST84 char *paramEntityParsingValues[] = { "always", "never", @@ -5672,10 +5678,14 @@ "requires a script as argument."); return TCL_ERROR; } objv++; objc--; continue; + + case o_ignorexmlns: + ignorexmlns = 1; + objv++; objc--; continue; } } if (feedbackAfter && !feedbackCmd) { @@ -5772,10 +5782,11 @@ doc = domReadDocument(parser, xml_string, xml_string_len, ignoreWhiteSpaces, TSD(Encoding_to_8bit), TSD(storeLineColumn), + ignorexmlns, feedbackAfter, feedbackCmd, chan, baseURI, extResolver, Index: tests/dom.test ================================================================== --- tests/dom.test +++ tests/dom.test @@ -457,10 +457,33 @@ catch {dom parse {}} } 1 +test dom-2.27 {parse -ignorexmlns} { + set result [list] + set doc [dom parse {}] + set root [$doc documentElement] + lappend result [$root localName] + lappend result [$root namespaceURI] + set child [$root firstChild] + lappend result [$child localName] + lappend result [$child namespaceURI] + lappend result [$doc selectNodes count(/doc/child)] + $doc delete + set doc [dom parse -ignorexmlns {}] + set root [$doc documentElement] + lappend result [$root nodeName] + lappend result [$root namespaceURI] + set child [$root firstChild] + lappend result [$child nodeName] + lappend result [$child namespaceURI] + lappend result [$doc selectNodes count(/doc/child)] + $doc delete + set result +} {doc foo.bar child foo.bar 0 doc {} child {} 1} + test dom-3.1 {isName} { dom isName ":foo" } {1} test dom-3.2 {isName} {