CVSTrac Legacy Code

Check-in [14e235cdd5]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:(#177) makes HTML entities _work_ in wiki markup and (maybe) other places

In order to allow it to work in both normal web browsing and the encoding needed for RSS (where we _need_ to encode even entities), I modified htmlize (%h) to take an "allow entities" flag, and added a new %R formatting option which disables this flag. %R is used by the RSS formatter to do the final content generation. This means anywhere wiki content is allowed someone can write é and, apparently, it works.

Keep in mind that I don't normally use anything except ASCII in my day-to-day environment, so someone who cares about this stuff should maybe test it out a bit more than myself.

Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:14e235cdd581a3784fb2bef21469ff411f8665b2
User & Date: cpb 2009-03-06 03:06:31
Context
2009-03-07
00:42
(#231) ditch the opaque class names like "border2" and "bkgnd1" and replace with more meaningful class names, and related cleanups. check-in: 64a9390abe user: cpb tags: trunk
2009-03-06
03:06
(#177) makes HTML entities _work_ in wiki markup and (maybe) other places

In order to allow it to work in both normal web browsing and the encoding needed for RSS (where we _need_ to encode even entities), I modified htmlize (%h) to take an "allow entities" flag, and added a new %R formatting option which disables this flag. %R is used by the RSS formatter to do the final content generation. This means anywhere wiki content is allowed someone can write é and, apparently, it works.

Keep in mind that I don't normally use anything except ASCII in my day-to-day environment, so someone who cares about this stuff should maybe test it out a bit more than myself. check-in: 14e235cdd5 user: cpb tags: trunk

2009-03-02
00:07
(#787) append/add remarks should be hidden for users without ticket write permissions check-in: 10e5c9bba7 user: cpb tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to cgi.c.

816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
....
1432
1433
1434
1435
1436
1437
1438
1439

1440
1441
1442
1443
1444
1445
1446
....
1458
1459
1460
1461
1462
1463
1464
1465

1466
1467
1468
1469
1470
1471
1472
....
1928
1929
1930
1931
1932
1933
1934
1935







1936
1937
1938
1939
1940
1941
1942
....
2197
2198
2199
2200
2201
2202
2203























































2204
2205
2206
2207
2208
2209
2210
2211


2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
....
2241
2242
2243
2244
2245
2246
2247






2248
2249
2250
2251
2252

2253
2254
2255
2256
2257
2258
2259
** parameters as HTML.  This is used for testing and debugging.
*/
void cgi_print_all(void){
  int i;
  cgi_parameter("","");  /* For the parameters into sorted order */
  for(i=0; i<nUsedQP; i++){
    cgi_printf("%s = %s  <br />\n",
       htmlize(aParamQP[i].zName, -1), htmlize(aParamQP[i].zValue, -1));
  }
}

/*
** Write HTML text for an option menu to standard output.  zParam
** is the query parameter that the option menu sets.  zDflt is the
** initial value of the option menu.  Additional arguments are name/value
................................................................................
   etPERCENT,          /* Percent symbol. %% */
   etCHARX,            /* Characters. %c */
   etERROR,            /* Used to indicate no such conversion type */
/* The rest are extensions, not normally found in printf() */
   etCHARLIT,          /* Literal characters.  %' */
   etDYNAMIC,          /* Like %s but free() called on input */
   etORDINAL,          /* 1st, 2nd, 3rd and so forth */
   etHTMLIZE,          /* Make text safe for HTML */

   etHTTPIZE,          /* Make text safe for HTTP.  "/" encoded as %2f */
   etURLIZE            /* Make text safe for HTTP.  "/" not encoded */
};

/*
** Each builtin conversion character (ex: the 'd' in "%d") is described
** by an instance of the following structure
................................................................................
** The following table is searched linearly, so it is good to put the
** most frequently used conversion types first.
*/
static et_info fmtinfo[] = {
  { 'd',  10,  "0123456789",       1,    0, etRADIX,      },
  { 's',   0,  0,                  0,    0, etSTRING,     }, 
  { 'z',   0,  0,                  0,    0, etDYNAMIC,    }, 
  { 'h',   0,  0,                  0,    0, etHTMLIZE,    },

  { 't',   0,  0,                  0,    0, etHTTPIZE,    }, /* / -> %2F */
  { 'T',   0,  0,                  0,    0, etURLIZE,     }, /* / -> / */
  { 'c',   0,  0,                  0,    0, etCHARX,      },
  { 'o',   8,  "01234567",         0,  "0", etRADIX,      },
  { 'u',  10,  "0123456789",       0,    0, etRADIX,      },
  { 'x',  16,  "0123456789abcdef", 0, "x0", etRADIX,      },
  { 'X',  16,  "0123456789ABCDEF", 0, "X0", etRADIX,      },
................................................................................
        if( bufpt==0 ) bufpt = "";
        length = strlen(bufpt);
        if( precision>=0 && precision<length ) length = precision;
        break;
      case etHTMLIZE:
        zMem = va_arg(ap,char*);
        if( zMem==0 ) zMem = "";
        zExtra = bufpt = htmlize(zMem, -1);







        length = strlen(bufpt);
        if( precision>=0 && precision<length ) length = precision;
        break;
      case etHTTPIZE:
        zMem = va_arg(ap,char*);
        if( zMem==0 ) zMem = "";
        zExtra = bufpt = httpize(zMem, -1);
................................................................................
/*
** This routine works like "vprintf" except that it has the
** extra formatting capabilities such as %h and %t.
*/
void cgi_vprintf(const char *zFormat, va_list ap){
  vxprintf(sout,0,zFormat,ap);
}
























































/*
** Make the given string safe for HTML by converting every "<" into "&lt;",
** every ">" into "&gt;" and every "&" into "&amp;".  Return a pointer
** to a new string obtained from malloc().
**
** We also encode " as &quot; so that it can appear as an argument
** to markup.


*/
char *htmlize(const char *zIn, int n){
  int c;
  int i = 0;
  int count = 0;
  char *zOut;

  if( n<0 ) n = strlen(zIn);
  while( i<n && (c = zIn[i])!=0 ){
    switch( c ){
      case '<':   count += 4;       break;
................................................................................
      case '>':   
        zOut[i++] = '&';
        zOut[i++] = 'g';
        zOut[i++] = 't';
        zOut[i++] = ';';
        break;
      case '&':   






        zOut[i++] = '&';
        zOut[i++] = 'a';
        zOut[i++] = 'm';
        zOut[i++] = 'p';
        zOut[i++] = ';';

        break;
      case '"':   
        zOut[i++] = '&';
        zOut[i++] = 'q';
        zOut[i++] = 'u';
        zOut[i++] = 'o';
        zOut[i++] = 't';







|







 







|
>







 







|
>







 







|
>
>
>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>








>
>

|

|







 







>
>
>
>
>
>
|
|
|
|
|
>







816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
....
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
....
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
....
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
....
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
....
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
** parameters as HTML.  This is used for testing and debugging.
*/
void cgi_print_all(void){
  int i;
  cgi_parameter("","");  /* For the parameters into sorted order */
  for(i=0; i<nUsedQP; i++){
    cgi_printf("%s = %s  <br />\n",
       htmlize(aParamQP[i].zName, -1, 1), htmlize(aParamQP[i].zValue, -1, 1));
  }
}

/*
** Write HTML text for an option menu to standard output.  zParam
** is the query parameter that the option menu sets.  zDflt is the
** initial value of the option menu.  Additional arguments are name/value
................................................................................
   etPERCENT,          /* Percent symbol. %% */
   etCHARX,            /* Characters. %c */
   etERROR,            /* Used to indicate no such conversion type */
/* The rest are extensions, not normally found in printf() */
   etCHARLIT,          /* Literal characters.  %' */
   etDYNAMIC,          /* Like %s but free() called on input */
   etORDINAL,          /* 1st, 2nd, 3rd and so forth */
   etHTMLIZE,          /* Make text safe for HTML, do not convert entities */
   etRSSIZE,           /* Make text safe for RSS HTML content */
   etHTTPIZE,          /* Make text safe for HTTP.  "/" encoded as %2f */
   etURLIZE            /* Make text safe for HTTP.  "/" not encoded */
};

/*
** Each builtin conversion character (ex: the 'd' in "%d") is described
** by an instance of the following structure
................................................................................
** The following table is searched linearly, so it is good to put the
** most frequently used conversion types first.
*/
static et_info fmtinfo[] = {
  { 'd',  10,  "0123456789",       1,    0, etRADIX,      },
  { 's',   0,  0,                  0,    0, etSTRING,     }, 
  { 'z',   0,  0,                  0,    0, etDYNAMIC,    }, 
  { 'h',   0,  0,                  0,    0, etHTMLIZE,    }, /* not entities */
  { 'R',   0,  0,                  0,    0, etRSSIZE,     },
  { 't',   0,  0,                  0,    0, etHTTPIZE,    }, /* / -> %2F */
  { 'T',   0,  0,                  0,    0, etURLIZE,     }, /* / -> / */
  { 'c',   0,  0,                  0,    0, etCHARX,      },
  { 'o',   8,  "01234567",         0,  "0", etRADIX,      },
  { 'u',  10,  "0123456789",       0,    0, etRADIX,      },
  { 'x',  16,  "0123456789abcdef", 0, "x0", etRADIX,      },
  { 'X',  16,  "0123456789ABCDEF", 0, "X0", etRADIX,      },
................................................................................
        if( bufpt==0 ) bufpt = "";
        length = strlen(bufpt);
        if( precision>=0 && precision<length ) length = precision;
        break;
      case etHTMLIZE:
        zMem = va_arg(ap,char*);
        if( zMem==0 ) zMem = "";
        zExtra = bufpt = htmlize(zMem, -1, 1);
        length = strlen(bufpt);
        if( precision>=0 && precision<length ) length = precision;
        break;
      case etRSSIZE:
        zMem = va_arg(ap,char*);
        if( zMem==0 ) zMem = "";
        zExtra = bufpt = htmlize(zMem, -1, 0);
        length = strlen(bufpt);
        if( precision>=0 && precision<length ) length = precision;
        break;
      case etHTTPIZE:
        zMem = va_arg(ap,char*);
        if( zMem==0 ) zMem = "";
        zExtra = bufpt = httpize(zMem, -1);
................................................................................
/*
** This routine works like "vprintf" except that it has the
** extra formatting capabilities such as %h and %t.
*/
void cgi_vprintf(const char *zFormat, va_list ap){
  vxprintf(sout,0,zFormat,ap);
}

/* maximum length of a named HTML entity: &thetasym; */
#define MAX_ENTITY_LEN 8

/* equivalent to strspn, but we can pass a ctype.h function pointer
** in order to essentially check character classes
*/
static int strctype(const char* z, int m, int (*isctype)(int) ){
  int n = 0;
  while( (m<0 || n<m) && isctype(*(z++)) ) { n++; }
  return n;
}

static int entity_length(const char* z, int m){
  int j, n = 0;

  if( m>0 && m<4 ) return 0;
  if( z[0]!='&' ) return 0;
  z++;
  n++;
  if(m>=0) m--;

  if( z[0]=='#' ){
    z++;
    n++;
    if(m>=0) m--;

    if( z[0]=='x' ){
      z++;
      n++;
      if(m>=0) m--;
      if( (j=strctype(z,m,isxdigit))>0 && j<=5 ){
        /* &#x00; */
      }else{
        return 0;
      }
    }else if( (j=strctype(z,m,isdigit))>0 && j<=6 ){
      /* &#00; */
    }else{
      return 0;
    }
  }else if( (j=strctype(z,m,isalpha))>2 && j<=MAX_ENTITY_LEN ){
    /* &eacute; */
  }else{
    return 0;
  }

  z += j;
  n += j;
  if(m>=0) m -= j;

  if( m==0 ) return 0;
  if( z[0]!=';' ) return 0;
  return n+1;
}

/*
** Make the given string safe for HTML by converting every "<" into "&lt;",
** every ">" into "&gt;" and every "&" into "&amp;".  Return a pointer
** to a new string obtained from malloc().
**
** We also encode " as &quot; so that it can appear as an argument
** to markup.
**
** If ae is non-zero, we allow HTML entities through unchanged.
*/
char *htmlize(const char *zIn, int n, int ae){
  int c;
  int i = 0, j;
  int count = 0;
  char *zOut;

  if( n<0 ) n = strlen(zIn);
  while( i<n && (c = zIn[i])!=0 ){
    switch( c ){
      case '<':   count += 4;       break;
................................................................................
      case '>':   
        zOut[i++] = '&';
        zOut[i++] = 'g';
        zOut[i++] = 't';
        zOut[i++] = ';';
        break;
      case '&':   
        if( ae && (j=entity_length(zIn,n+1))>0 ){
          strncpy( &zOut[i], zIn, j );
          i += j;
          n -= (j-1); /* n-- already */
          zIn += j-1; /* zIn++ below, always */
        }else{
          zOut[i++] = '&';
          zOut[i++] = 'a';
          zOut[i++] = 'm';
          zOut[i++] = 'p';
          zOut[i++] = ';';
        }
        break;
      case '"':   
        zOut[i++] = '&';
        zOut[i++] = 'q';
        zOut[i++] = 'u';
        zOut[i++] = 'o';
        zOut[i++] = 't';

Changes to format.c.

275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
....
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
....
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
....
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
}

/*
** Output N characters of text from zText.
*/
static void put_htmlized_text(const char **pzText, int N){
  if( N>0 ){
    char *z = htmlize(*pzText, N);
    cgi_printf("%s", z);
    free(z);
    *pzText += N;
  }
}

/*
................................................................................
      */
      if( (sMarkup.lenType==5 && strncmp(sMarkup.zType,"image",5)==0)
       || (sMarkup.lenType==9 && strncmp(sMarkup.zType,"leftimage",9)==0)
       || (sMarkup.lenType==10 && strncmp(sMarkup.zType,"rightimage",10)==0)
      ){
        char *zUrl = 0;
        const char *zAlign;
        char *zAlt = htmlize(sMarkup.zArgs, sMarkup.lenArgs);
        if( azAttach==0 && zPageId!=0 ){
          azAttach = (char **)
                     db_query("SELECT fname, atn FROM attachment "
                              "WHERE tn='%q'", zPageId);
        }
        if( azAttach ){
          int ix;
................................................................................
              zUrl = format_link("attach_get/%s/%h",
                                 azAttach[ix+1], azAttach[ix]);
              break;
            }
          }
        }
        if( zUrl==0 ){
          zUrl = htmlize(sMarkup.zKey, sMarkup.lenKey);
        }
        put_htmlized_text(&zText, i);
        switch( sMarkup.zType[0] ){
          case 'l': case 'L':   zAlign = " align=\"left\"";  break;
          case 'r': case 'R':   zAlign = " align=\"right\""; break;
          default:              zAlign = "";                 break;
        }
................................................................................


      /* Words that begin with "http:" or "https:" or "ftp:" or "mailto:"
      ** become hyperlinks.
      */
      if( (c=='h' || c=='f' || c=='m') && (j=is_url(&zText[i]))>0 ){
        put_htmlized_text(&zText, i);
        z = htmlize(zText, j);
        if( is_image(z, strlen(z)) ){
          cgi_printf("<img src=\"%s\" alt=\"%s\"%s>", z, z,
                     g.noFollow ? " rel=\"nofollow\"" : "");
        }else{
          cgi_printf("<a class=\"external\" href=\"%s\"%s>%s</a>",
                     z, g.noFollow ? " rel=\"nofollow\"" : "", z);
        }







|







 







|







 







|







 







|







275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
....
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
....
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
....
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
}

/*
** Output N characters of text from zText.
*/
static void put_htmlized_text(const char **pzText, int N){
  if( N>0 ){
    char *z = htmlize(*pzText, N, 1);
    cgi_printf("%s", z);
    free(z);
    *pzText += N;
  }
}

/*
................................................................................
      */
      if( (sMarkup.lenType==5 && strncmp(sMarkup.zType,"image",5)==0)
       || (sMarkup.lenType==9 && strncmp(sMarkup.zType,"leftimage",9)==0)
       || (sMarkup.lenType==10 && strncmp(sMarkup.zType,"rightimage",10)==0)
      ){
        char *zUrl = 0;
        const char *zAlign;
        char *zAlt = htmlize(sMarkup.zArgs, sMarkup.lenArgs,1);
        if( azAttach==0 && zPageId!=0 ){
          azAttach = (char **)
                     db_query("SELECT fname, atn FROM attachment "
                              "WHERE tn='%q'", zPageId);
        }
        if( azAttach ){
          int ix;
................................................................................
              zUrl = format_link("attach_get/%s/%h",
                                 azAttach[ix+1], azAttach[ix]);
              break;
            }
          }
        }
        if( zUrl==0 ){
          zUrl = htmlize(sMarkup.zKey, sMarkup.lenKey,1);
        }
        put_htmlized_text(&zText, i);
        switch( sMarkup.zType[0] ){
          case 'l': case 'L':   zAlign = " align=\"left\"";  break;
          case 'r': case 'R':   zAlign = " align=\"right\""; break;
          default:              zAlign = "";                 break;
        }
................................................................................


      /* Words that begin with "http:" or "https:" or "ftp:" or "mailto:"
      ** become hyperlinks.
      */
      if( (c=='h' || c=='f' || c=='m') && (j=is_url(&zText[i]))>0 ){
        put_htmlized_text(&zText, i);
        z = htmlize(zText, j,1);
        if( is_image(z, strlen(z)) ){
          cgi_printf("<img src=\"%s\" alt=\"%s\"%s>", z, z,
                     g.noFollow ? " rel=\"nofollow\"" : "");
        }else{
          cgi_printf("<a class=\"external\" href=\"%s\"%s>%s</a>",
                     z, g.noFollow ? " rel=\"nofollow\"" : "", z);
        }

Changes to rss.c.

620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
        /* Cannot happen */
        break;
    }
    @ <item>
    if( zLink[0] ){
      @ <link>%s(g.zBaseURL)/%s(zLink)</link>
    }
    @ <title>%h(zPrefix) %h(zSuffix)</title>

    @ <description>\

    if(zMsg){
      if( rssDetail<9 && output_trim_message(zMsg, MN_CKIN_MSG, MX_CKIN_MSG) ){
        @ %h(zMsg) [...]\
      }else{
        @ %h(zMsg)\
      }
      cgi_printf("%h","<br>");
      /* don't free this */
    }

    if( zWiki ){
      if( rssDetail<9 && output_trim_message(zWiki, MN_CKIN_MSG, MX_CKIN_MSG) ){
        zWiki = format_formatted(zWiki, zPageId);
        @ %h(zWiki) [...]%h("<br>")
      }else{
        zWiki = format_formatted(zWiki, zPageId);
        @ %h(zWiki)%h("<br>")
      }
      free(zWiki);  /* format_formatted(), no longer points to az[?] */
    }
    
    if(zTailMsg){
      if( rssDetail<9
          && output_trim_message(zTailMsg, MN_CKIN_MSG, MX_CKIN_MSG)
      ){
        @ %h(zTailMsg) [...]\
      }else{
        @ %h(zTailMsg)\
      }
      free(zTailMsg);
    }

    if( rssDetail>=5 && zWiki==0 && zMsg==0 && zTailMsg==0 ){
      @ %h(zPrefix) %h(zSuffix)\
    }
    @ </description>

    @ <pubDate>%h(cgi_rfc822_datestamp(thisDate))</pubDate>
    @ </item>
  }
  common_rss_footer();
}







|





|

|

|






|


|








|

|





|








620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
        /* Cannot happen */
        break;
    }
    @ <item>
    if( zLink[0] ){
      @ <link>%s(g.zBaseURL)/%s(zLink)</link>
    }
    @ <title>%R(zPrefix) %R(zSuffix)</title>

    @ <description>\

    if(zMsg){
      if( rssDetail<9 && output_trim_message(zMsg, MN_CKIN_MSG, MX_CKIN_MSG) ){
        @ %R(zMsg) [...]\
      }else{
        @ %R(zMsg)\
      }
      cgi_printf("%R","<br>");
      /* don't free this */
    }

    if( zWiki ){
      if( rssDetail<9 && output_trim_message(zWiki, MN_CKIN_MSG, MX_CKIN_MSG) ){
        zWiki = format_formatted(zWiki, zPageId);
        @ %R(zWiki) [...]%R("<br>")
      }else{
        zWiki = format_formatted(zWiki, zPageId);
        @ %R(zWiki)%R("<br>")
      }
      free(zWiki);  /* format_formatted(), no longer points to az[?] */
    }
    
    if(zTailMsg){
      if( rssDetail<9
          && output_trim_message(zTailMsg, MN_CKIN_MSG, MX_CKIN_MSG)
      ){
        @ %R(zTailMsg) [...]\
      }else{
        @ %R(zTailMsg)\
      }
      free(zTailMsg);
    }

    if( rssDetail>=5 && zWiki==0 && zMsg==0 && zTailMsg==0 ){
      @ %R(zPrefix) %R(zSuffix)\
    }
    @ </description>

    @ <pubDate>%h(cgi_rfc822_datestamp(thisDate))</pubDate>
    @ </item>
  }
  common_rss_footer();
}