Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Added method clearString to the dom command.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 5300f428a6194bd556bb0dce9d568c82dd46d3c425d1def59dd83f71e4279d3c
User & Date: rolf 2020-05-13 23:49:38
Context
2020-05-13
23:50
Merged from trunk. check-in: 1fc373ed25 user: rolf tags: schema
23:49
Added method clearString to the dom command. Leaf check-in: 5300f428a6 user: rolf tags: trunk
2020-04-08
23:54
There is no need that the dom parse -jsonroot option argument has to be an XML name. Documented the existence of the option, check-in: 67a97e3f0d user: rolf tags: trunk
Changes

Changes to CHANGES.





1
2
3
4
5
6
7




2019-12-31  Rolf Ade  <rolf@pointsman.de>

        Updated to expat 2.2.9.

2018-10-12  Rolf Ade  <rolf@pointsman.de>

        Updated to expat 2.2.6.
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
2020-05-14  Rolf Ade  <rolf@pointsman.de>

        Added method clearString to the dom command.

2019-12-31  Rolf Ade  <rolf@pointsman.de>

        Updated to expat 2.2.9.

2018-10-12  Rolf Ade  <rolf@pointsman.de>

        Updated to expat 2.2.6.

Changes to doc/dom.xml.

140
141
142
143
144
145
146
147






148
149
150
151
152
153
154
...
511
512
513
514
515
516
517






518
519
520
521
522
523
524
                document element node.</desc>
              </optdef>
              
              <optdef>
                <optname>-jsonmaxnesting</optname>
                <optarg>integer</optarg>
                <desc>This option only has effect if used together
                with the <m>-json</m> option. The current implementation uses recursive descent JSON parser. In order to avoid using excess stack space, any JSON input that has more than a certain levels of nesting is considered invalid. The default maximum nesting is 2000. The option -jsonmaxnesting allows the user to adjust that.</desc>






              </optdef>
              
              <optdef>
                <optname>--</optname> 
                <desc>The option <m>--</m> marks the end of options.
                While respected in general this option is only needed
                in case of parsing JSON data, which may start with a
................................................................................
          <command><cmd>dom</cmd> <method>isCharData</method>
<m>string</m></command>
          <desc>Returns 1 if every character in <m>string</m> is
a valid XML Char according to production 2 of the <ref href="http://www.w3.org/TR/2000/REC-xml-20001006.html">XML 1.0</ref>
recommendation. Otherwise it returns 0.</desc>
        </commanddef>







        <commanddef>
          <command><cmd>dom</cmd> <method>isBMPCharData</method>
<m>string</m></command>
          <desc>Returns 1 if every character in <m>string</m> is
a valid XML Char with a Unicode code point within the Basic
Multilingual Plane (that means, that every character within the string
is at most 3 bytes long). Otherwise it returns 0.</desc>







|
>
>
>
>
>
>







 







>
>
>
>
>
>







140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
...
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
                document element node.</desc>
              </optdef>
              
              <optdef>
                <optname>-jsonmaxnesting</optname>
                <optarg>integer</optarg>
                <desc>This option only has effect if used together
                with the <m>-json</m> option. The current
                implementation uses a recursive descent JSON parser.
                In order to avoid using excess stack space, any JSON
                input that has more than a certain levels of nesting
                is considered invalid. The default maximum nesting is
                2000. The option -jsonmaxnesting allows the user to
                adjust that.</desc>
              </optdef>
              
              <optdef>
                <optname>--</optname> 
                <desc>The option <m>--</m> marks the end of options.
                While respected in general this option is only needed
                in case of parsing JSON data, which may start with a
................................................................................
          <command><cmd>dom</cmd> <method>isCharData</method>
<m>string</m></command>
          <desc>Returns 1 if every character in <m>string</m> is
a valid XML Char according to production 2 of the <ref href="http://www.w3.org/TR/2000/REC-xml-20001006.html">XML 1.0</ref>
recommendation. Otherwise it returns 0.</desc>
        </commanddef>

        <commanddef>
          <command><cmd>dom</cmd> <method>clearString</method> <m>string</m></command>
          <desc>Returns the string given as argument cleared out from any characters not
          allowed as XML parsed character data.</desc>
        </commanddef>
        
        <commanddef>
          <command><cmd>dom</cmd> <method>isBMPCharData</method>
<m>string</m></command>
          <desc>Returns 1 if every character in <m>string</m> is
a valid XML Char with a Unicode code point within the Basic
Multilingual Plane (that means, that every character within the string
is at most 3 bytes long). Otherwise it returns 0.</desc>

Changes to generic/dom.c.

346
347
348
349
350
351
352





















































353
354
355
356
357
358
359
        if (clen > 4) return 0;
        if (UTF8_XMLCHAR((unsigned const char *)p,clen))
            p += clen;
        else return 0;
    }
    return 1;
}






















































/*---------------------------------------------------------------------------
|   domIsBMPChar 
|
\--------------------------------------------------------------------------*/
int
domIsBMPChar (







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
        if (clen > 4) return 0;
        if (UTF8_XMLCHAR((unsigned const char *)p,clen))
            p += clen;
        else return 0;
    }
    return 1;
}

/*---------------------------------------------------------------------------
|   domClearString
|
\--------------------------------------------------------------------------*/
char *
domClearString (
    char *str,
    int *haveToFree
    )
{
    const char *p, *s;
    char *p1, *clearedstr;
    int   clen, i, rewrite = 0;
    
    p = str;
    while (*p) {
        clen = UTF8_CHAR_LEN(*p);
        if (clen > 4 || !UTF8_XMLCHAR((unsigned const char*)p,clen)) {
            rewrite = 1;
            break;
        }
        p += clen;
    }
    if (!rewrite) {
        *haveToFree = 0;
        return str;
    }
    s = p;
    p += clen;
    while (*p) p++;
    clearedstr = MALLOC (sizeof(char) * (p-str));
    p1 = clearedstr;
    while (str < s) {
        *p1 = *str;
        p1++; str++;
    }
    str += clen;
    while (*str) {
        clen = UTF8_CHAR_LEN(*str);
        if (clen <= 4 && UTF8_XMLCHAR((unsigned const char*)str,clen)) {
            for (i = 0; i < clen; i++) {
                *p1 = *str;
                p1++; str++;
            }
        } else {
            str += clen;
        }
    }
    *p1 = '\0';
    *haveToFree = 1;
    return clearedstr;
}

/*---------------------------------------------------------------------------
|   domIsBMPChar 
|
\--------------------------------------------------------------------------*/
int
domIsBMPChar (

Changes to generic/dom.h.

828
829
830
831
832
833
834

835
836
837
838
839
840
841

void           tcldom_tolower (const char *str, char *str_out, int  len);
int            domIsNAME (const char *name);
int            domIsPINAME (const char *name);
int            domIsQNAME (const char *name);
int            domIsNCNAME (const char *name);
int            domIsChar (const char *str);

int            domIsBMPChar (const char *str);
int            domIsComment (const char *str);
int            domIsCDATA (const char *str);
int            domIsPIValue (const char *str);
void           domCopyTo (domNode *node, domNode *parent, int copyNS);
void           domCopyNS (domNode *from, domNode *to);
domAttrNode *  domCreateXMLNamespaceNode (domNode *parent);







>







828
829
830
831
832
833
834
835
836
837
838
839
840
841
842

void           tcldom_tolower (const char *str, char *str_out, int  len);
int            domIsNAME (const char *name);
int            domIsPINAME (const char *name);
int            domIsQNAME (const char *name);
int            domIsNCNAME (const char *name);
int            domIsChar (const char *str);
char *         domClearString (char *str, int *haveToFree);
int            domIsBMPChar (const char *str);
int            domIsComment (const char *str);
int            domIsCDATA (const char *str);
int            domIsPIValue (const char *str);
void           domCopyTo (domNode *node, domNode *parent, int copyNS);
void           domCopyNS (domNode *from, domNode *to);
domAttrNode *  domCreateXMLNamespaceNode (domNode *parent);

Changes to generic/tcldom.c.

215
216
217
218
219
220
221


222
223
224
225
226
227
228
....
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
....
7022
7023
7024
7025
7026
7027
7028












7029
7030
7031
7032
7033
7034
7035
    )
    "    createNodeCmd ?-returnNodeCmd? ?-tagName name? ?-jsonType jsonType? ?-namespace URI? (element|comment|text|cdata|pi)Node cmdName \n"
    "    setStoreLineColumn ?boolean?                     \n"
    "    setNameCheck ?boolean?                           \n"
    "    setTextCheck ?boolean?                           \n"
    "    setObjectCommands ?(automatic|token|command)?    \n"
    "    isCharData string                                \n"


    "    isComment string                                 \n"
    "    isCDATA string                                   \n"
    "    isPIValue string                                 \n"
    "    isName string                                    \n"
    "    isQName string                                   \n"
    "    isNCName string                                  \n"
    "    isPIName string                                  \n"
................................................................................
    Tcl_Interp * interp,
    int          objc,
    Tcl_Obj    * const objv[]
)
{
    GetTcldomTSD()

    char        * method, tmp[300];
    int           methodIndex, result, i, bool;
    Tcl_CmdInfo   cmdInfo;
    Tcl_Obj     * mobjv[MAX_REWRITE_ARGS];

    static const char *domMethods[] = {
        "createDocument",  "createDocumentNS",   "createNodeCmd",
        "parse",                                 "setStoreLineColumn",
        "isCharData",      "isName",             "isPIName",
        "isQName",         "isComment",          "isCDATA",
        "isPIValue",       "isNCName",           "createDocumentNode",
        "setNameCheck",    "setTextCheck",       "setObjectCommands",
        "featureinfo",     "isBMPCharData",
#ifdef TCL_THREADS
        "attachDocument",  "detachDocument",
#endif
        NULL
    };
    enum domMethod {
        m_createDocument,    m_createDocumentNS,   m_createNodeCmd,
        m_parse,                                   m_setStoreLineColumn,
        m_isCharData,        m_isName,             m_isPIName,
        m_isQName,           m_isComment,          m_isCDATA,
        m_isPIValue,         m_isNCName,           m_createDocumentNode,
        m_setNameCheck,      m_setTextCheck,       m_setObjectCommands,
        m_featureinfo,       m_isBMPCharData
#ifdef TCL_THREADS
        ,m_attachDocument,   m_detachDocument
#endif
    };

    static const char *nodeModeValues[] = {
        "automatic", "command", "token", NULL
................................................................................
            CheckArgs(3,3,2,"feature")
            return tcldom_featureinfo(clientData, interp, --objc, objv+1);

        case m_isBMPCharData:
            CheckArgs(3,3,2,"string");
            SetBooleanResult(domIsBMPChar(Tcl_GetString(objv[2])));
            return TCL_OK;












                
    }
    SetResult( dom_usage);
    return TCL_ERROR;
}

#ifdef TCL_THREADS







>
>







 







|


|








|












|







 







>
>
>
>
>
>
>
>
>
>
>
>







215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
....
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
....
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
    )
    "    createNodeCmd ?-returnNodeCmd? ?-tagName name? ?-jsonType jsonType? ?-namespace URI? (element|comment|text|cdata|pi)Node cmdName \n"
    "    setStoreLineColumn ?boolean?                     \n"
    "    setNameCheck ?boolean?                           \n"
    "    setTextCheck ?boolean?                           \n"
    "    setObjectCommands ?(automatic|token|command)?    \n"
    "    isCharData string                                \n"
    "    clearString string                               \n"
    "    isBMPCharData string                             \n"
    "    isComment string                                 \n"
    "    isCDATA string                                   \n"
    "    isPIValue string                                 \n"
    "    isName string                                    \n"
    "    isQName string                                   \n"
    "    isNCName string                                  \n"
    "    isPIName string                                  \n"
................................................................................
    Tcl_Interp * interp,
    int          objc,
    Tcl_Obj    * const objv[]
)
{
    GetTcldomTSD()

    char        * method, tmp[300], *clearedStr;
    int           methodIndex, result, i, bool;
    Tcl_CmdInfo   cmdInfo;
    Tcl_Obj     * mobjv[MAX_REWRITE_ARGS], *newObj;

    static const char *domMethods[] = {
        "createDocument",  "createDocumentNS",   "createNodeCmd",
        "parse",                                 "setStoreLineColumn",
        "isCharData",      "isName",             "isPIName",
        "isQName",         "isComment",          "isCDATA",
        "isPIValue",       "isNCName",           "createDocumentNode",
        "setNameCheck",    "setTextCheck",       "setObjectCommands",
        "featureinfo",     "isBMPCharData",      "clearString",
#ifdef TCL_THREADS
        "attachDocument",  "detachDocument",
#endif
        NULL
    };
    enum domMethod {
        m_createDocument,    m_createDocumentNS,   m_createNodeCmd,
        m_parse,                                   m_setStoreLineColumn,
        m_isCharData,        m_isName,             m_isPIName,
        m_isQName,           m_isComment,          m_isCDATA,
        m_isPIValue,         m_isNCName,           m_createDocumentNode,
        m_setNameCheck,      m_setTextCheck,       m_setObjectCommands,
        m_featureinfo,       m_isBMPCharData,      m_clearString
#ifdef TCL_THREADS
        ,m_attachDocument,   m_detachDocument
#endif
    };

    static const char *nodeModeValues[] = {
        "automatic", "command", "token", NULL
................................................................................
            CheckArgs(3,3,2,"feature")
            return tcldom_featureinfo(clientData, interp, --objc, objv+1);

        case m_isBMPCharData:
            CheckArgs(3,3,2,"string");
            SetBooleanResult(domIsBMPChar(Tcl_GetString(objv[2])));
            return TCL_OK;

        case m_clearString:
            CheckArgs(3,3,2,"string");
            clearedStr = domClearString (Tcl_GetString (objv[2]), &bool);
            if (bool) {
                newObj = Tcl_NewStringObj (clearedStr, -1);
                FREE (clearedStr);
                Tcl_SetObjResult (interp, newObj);
            } else {
                Tcl_SetObjResult (interp, objv[2]);
            }
            return TCL_OK;
                
    }
    SetResult( dom_usage);
    return TCL_ERROR;
}

#ifdef TCL_THREADS

Changes to tests/dom.test.

1057
1058
1059
1060
1061
1062
1063





















1064
1065
1066
1067
1068
1069
1070
} {0}

test dom-3.41 {isPIValue} {
    dom isPIValue "some invalid processing instruction data?>"
} {0}























test dom-4.1 {-useForeignDTD 0} {
    set doc [dom parse -useForeignDTD 0 {<root/>}]
    $doc delete
} {}

test dom-4.2 {-useForeignDTD 1 with document with internal subset} {need_uri} {
    set baseURI [tdom::baseURL [file join [pwd] [file dir [info script]] dom.test]]







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
} {0}

test dom-3.41 {isPIValue} {
    dom isPIValue "some invalid processing instruction data?>"
} {0}


test dom-3.43 {clearString} {
    set result [list]
    foreach str {
        \u0001
        a\u0002
        \u0003b
        a\u0004b
        a\u0004\u0005b
        a\u0004c\u0005b
        a\u0004d\u0005\u0006b
        a\u0004d\u0005\uD800\uD801\uD802_foo_bar
        \uD800\uD801\uD802_foo_bar_baz\uD802_didum\uDFFF
        \uD800\uD801\uD802_foo_bar_baz\uD802_didum\uE000
        \u0004\u0005\uDABC
        abc
    } {
        lappend result [dom clearString $str]
    }
    set result
} [list {} a b ab ab acb adb ad_foo_bar _foo_bar_baz_didum _foo_bar_baz_didum\uE000 {} abc]

test dom-4.1 {-useForeignDTD 0} {
    set doc [dom parse -useForeignDTD 0 {<root/>}]
    $doc delete
} {}

test dom-4.2 {-useForeignDTD 1 with document with internal subset} {need_uri} {
    set baseURI [tdom::baseURL [file join [pwd] [file dir [info script]] dom.test]]