1 /* 2 ** $Id: lstrlib.c,v 1.178.1.1 2013/04/12 18:48:47 roberto Exp $ 3 ** Standard library for string operations and pattern-matching 4 ** See Copyright Notice in lua.h 5 */ 6 7 8 #include <ctype.h> 9 #include <stddef.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #define lstrlib_c 15 #define LUA_LIB 16 17 #include "lua.h" 18 19 #include "lauxlib.h" 20 #include "lualib.h" 21 22 23 /* 24 ** maximum number of captures that a pattern can do during 25 ** pattern-matching. This limit is arbitrary. 26 */ 27 #if !defined(LUA_MAXCAPTURES) 28 #define LUA_MAXCAPTURES 32 29 #endif 30 31 32 /* macro to `unsign' a character */ 33 #define uchar(c) ((unsigned char)(c)) 34 35 36 37 static int str_len (lua_State *L) { 38 size_t l; 39 luaL_checklstring(L, 1, &l); 40 lua_pushinteger(L, (lua_Integer)l); 41 return 1; 42 } 43 44 45 /* translate a relative string position: negative means back from end */ 46 static size_t posrelat (ptrdiff_t pos, size_t len) { 47 if (pos >= 0) return (size_t)pos; 48 else if (0u - (size_t)pos > len) return 0; 49 else return len - ((size_t)-pos) + 1; 50 } 51 52 53 static int str_sub (lua_State *L) { 54 size_t l; 55 const char *s = luaL_checklstring(L, 1, &l); 56 size_t start = posrelat(luaL_checkinteger(L, 2), l); 57 size_t end = posrelat(luaL_optinteger(L, 3, -1), l); 58 if (start < 1) start = 1; 59 if (end > l) end = l; 60 if (start <= end) 61 lua_pushlstring(L, s + start - 1, end - start + 1); 62 else lua_pushliteral(L, ""); 63 return 1; 64 } 65 66 67 static int str_reverse (lua_State *L) { 68 size_t l, i; 69 luaL_Buffer b; 70 const char *s = luaL_checklstring(L, 1, &l); 71 char *p = luaL_buffinitsize(L, &b, l); 72 for (i = 0; i < l; i++) 73 p[i] = s[l - i - 1]; 74 luaL_pushresultsize(&b, l); 75 return 1; 76 } 77 78 79 static int str_lower (lua_State *L) { 80 size_t l; 81 size_t i; 82 luaL_Buffer b; 83 const char *s = luaL_checklstring(L, 1, &l); 84 char *p = luaL_buffinitsize(L, &b, l); 85 for (i=0; i<l; i++) 86 p[i] = tolower(uchar(s[i])); 87 luaL_pushresultsize(&b, l); 88 return 1; 89 } 90 91 92 static int str_upper (lua_State *L) { 93 size_t l; 94 size_t i; 95 luaL_Buffer b; 96 const char *s = luaL_checklstring(L, 1, &l); 97 char *p = luaL_buffinitsize(L, &b, l); 98 for (i=0; i<l; i++) 99 p[i] = toupper(uchar(s[i])); 100 luaL_pushresultsize(&b, l); 101 return 1; 102 } 103 104 105 /* reasonable limit to avoid arithmetic overflow */ 106 #define MAXSIZE ((~(size_t)0) >> 1) 107 108 static int str_rep (lua_State *L) { 109 size_t l, lsep; 110 const char *s = luaL_checklstring(L, 1, &l); 111 int n = luaL_checkint(L, 2); 112 const char *sep = luaL_optlstring(L, 3, "", &lsep); 113 if (n <= 0) lua_pushliteral(L, ""); 114 else if (l + lsep < l || l + lsep >= MAXSIZE / n) /* may overflow? */ 115 return luaL_error(L, "resulting string too large"); 116 else { 117 size_t totallen = n * l + (n - 1) * lsep; 118 luaL_Buffer b; 119 char *p = luaL_buffinitsize(L, &b, totallen); 120 while (n-- > 1) { /* first n-1 copies (followed by separator) */ 121 memcpy(p, s, l * sizeof(char)); p += l; 122 if (lsep > 0) { /* avoid empty 'memcpy' (may be expensive) */ 123 memcpy(p, sep, lsep * sizeof(char)); p += lsep; 124 } 125 } 126 memcpy(p, s, l * sizeof(char)); /* last copy (not followed by separator) */ 127 luaL_pushresultsize(&b, totallen); 128 } 129 return 1; 130 } 131 132 133 static int str_byte (lua_State *L) { 134 size_t l; 135 const char *s = luaL_checklstring(L, 1, &l); 136 size_t posi = posrelat(luaL_optinteger(L, 2, 1), l); 137 size_t pose = posrelat(luaL_optinteger(L, 3, posi), l); 138 int n, i; 139 if (posi < 1) posi = 1; 140 if (pose > l) pose = l; 141 if (posi > pose) return 0; /* empty interval; return no values */ 142 n = (int)(pose - posi + 1); 143 if (posi + n <= pose) /* (size_t -> int) overflow? */ 144 return luaL_error(L, "string slice too long"); 145 luaL_checkstack(L, n, "string slice too long"); 146 for (i=0; i<n; i++) 147 lua_pushinteger(L, uchar(s[posi+i-1])); 148 return n; 149 } 150 151 152 static int str_char (lua_State *L) { 153 int n = lua_gettop(L); /* number of arguments */ 154 int i; 155 luaL_Buffer b; 156 char *p = luaL_buffinitsize(L, &b, n); 157 for (i=1; i<=n; i++) { 158 int c = luaL_checkint(L, i); 159 luaL_argcheck(L, uchar(c) == c, i, "value out of range"); 160 p[i - 1] = uchar(c); 161 } 162 luaL_pushresultsize(&b, n); 163 return 1; 164 } 165 166 167 static int writer (lua_State *L, const void* b, size_t size, void* B) { 168 (void)L; 169 luaL_addlstring((luaL_Buffer*) B, (const char *)b, size); 170 return 0; 171 } 172 173 174 static int str_dump (lua_State *L) { 175 luaL_Buffer b; 176 luaL_checktype(L, 1, LUA_TFUNCTION); 177 lua_settop(L, 1); 178 luaL_buffinit(L,&b); 179 if (lua_dump(L, writer, &b) != 0) 180 return luaL_error(L, "unable to dump given function"); 181 luaL_pushresult(&b); 182 return 1; 183 } 184 185 186 187 /* 188 ** {====================================================== 189 ** PATTERN MATCHING 190 ** ======================================================= 191 */ 192 193 194 #define CAP_UNFINISHED (-1) 195 #define CAP_POSITION (-2) 196 197 198 typedef struct MatchState { 199 int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ 200 const char *src_init; /* init of source string */ 201 const char *src_end; /* end ('\0') of source string */ 202 const char *p_end; /* end ('\0') of pattern */ 203 lua_State *L; 204 int level; /* total number of captures (finished or unfinished) */ 205 struct { 206 const char *init; 207 ptrdiff_t len; 208 } capture[LUA_MAXCAPTURES]; 209 } MatchState; 210 211 212 /* recursive function */ 213 static const char *match (MatchState *ms, const char *s, const char *p); 214 215 216 /* maximum recursion depth for 'match' */ 217 #if !defined(MAXCCALLS) 218 #define MAXCCALLS 200 219 #endif 220 221 222 #define L_ESC '%' 223 #define SPECIALS "^$*+?.([%-" 224 225 226 static int check_capture (MatchState *ms, int l) { 227 l -= '1'; 228 if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) 229 return luaL_error(ms->L, "invalid capture index %%%d", l + 1); 230 return l; 231 } 232 233 234 static int capture_to_close (MatchState *ms) { 235 int level = ms->level; 236 for (level--; level>=0; level--) 237 if (ms->capture[level].len == CAP_UNFINISHED) return level; 238 return luaL_error(ms->L, "invalid pattern capture"); 239 } 240 241 242 static const char *classend (MatchState *ms, const char *p) { 243 switch (*p++) { 244 case L_ESC: { 245 if (p == ms->p_end) 246 luaL_error(ms->L, "malformed pattern (ends with " LUA_QL("%%") ")"); 247 return p+1; 248 } 249 case '[': { 250 if (*p == '^') p++; 251 do { /* look for a `]' */ 252 if (p == ms->p_end) 253 luaL_error(ms->L, "malformed pattern (missing " LUA_QL("]") ")"); 254 if (*(p++) == L_ESC && p < ms->p_end) 255 p++; /* skip escapes (e.g. `%]') */ 256 } while (*p != ']'); 257 return p+1; 258 } 259 default: { 260 return p; 261 } 262 } 263 } 264 265 266 static int match_class (int c, int cl) { 267 int res; 268 switch (tolower(cl)) { 269 case 'a' : res = isalpha(c); break; 270 case 'c' : res = iscntrl(c); break; 271 case 'd' : res = isdigit(c); break; 272 case 'g' : res = isgraph(c); break; 273 case 'l' : res = islower(c); break; 274 case 'p' : res = ispunct(c); break; 275 case 's' : res = isspace(c); break; 276 case 'u' : res = isupper(c); break; 277 case 'w' : res = isalnum(c); break; 278 case 'x' : res = isxdigit(c); break; 279 case 'z' : res = (c == 0); break; /* deprecated option */ 280 default: return (cl == c); 281 } 282 return (islower(cl) ? res : !res); 283 } 284 285 286 static int matchbracketclass (int c, const char *p, const char *ec) { 287 int sig = 1; 288 if (*(p+1) == '^') { 289 sig = 0; 290 p++; /* skip the `^' */ 291 } 292 while (++p < ec) { 293 if (*p == L_ESC) { 294 p++; 295 if (match_class(c, uchar(*p))) 296 return sig; 297 } 298 else if ((*(p+1) == '-') && (p+2 < ec)) { 299 p+=2; 300 if (uchar(*(p-2)) <= c && c <= uchar(*p)) 301 return sig; 302 } 303 else if (uchar(*p) == c) return sig; 304 } 305 return !sig; 306 } 307 308 309 static int singlematch (MatchState *ms, const char *s, const char *p, 310 const char *ep) { 311 if (s >= ms->src_end) 312 return 0; 313 else { 314 int c = uchar(*s); 315 switch (*p) { 316 case '.': return 1; /* matches any char */ 317 case L_ESC: return match_class(c, uchar(*(p+1))); 318 case '[': return matchbracketclass(c, p, ep-1); 319 default: return (uchar(*p) == c); 320 } 321 } 322 } 323 324 325 static const char *matchbalance (MatchState *ms, const char *s, 326 const char *p) { 327 if (p >= ms->p_end - 1) 328 luaL_error(ms->L, "malformed pattern " 329 "(missing arguments to " LUA_QL("%%b") ")"); 330 if (*s != *p) return NULL; 331 else { 332 int b = *p; 333 int e = *(p+1); 334 int cont = 1; 335 while (++s < ms->src_end) { 336 if (*s == e) { 337 if (--cont == 0) return s+1; 338 } 339 else if (*s == b) cont++; 340 } 341 } 342 return NULL; /* string ends out of balance */ 343 } 344 345 346 static const char *max_expand (MatchState *ms, const char *s, 347 const char *p, const char *ep) { 348 ptrdiff_t i = 0; /* counts maximum expand for item */ 349 while (singlematch(ms, s + i, p, ep)) 350 i++; 351 /* keeps trying to match with the maximum repetitions */ 352 while (i>=0) { 353 const char *res = match(ms, (s+i), ep+1); 354 if (res) return res; 355 i--; /* else didn't match; reduce 1 repetition to try again */ 356 } 357 return NULL; 358 } 359 360 361 static const char *min_expand (MatchState *ms, const char *s, 362 const char *p, const char *ep) { 363 for (;;) { 364 const char *res = match(ms, s, ep+1); 365 if (res != NULL) 366 return res; 367 else if (singlematch(ms, s, p, ep)) 368 s++; /* try with one more repetition */ 369 else return NULL; 370 } 371 } 372 373 374 static const char *start_capture (MatchState *ms, const char *s, 375 const char *p, int what) { 376 const char *res; 377 int level = ms->level; 378 if (level >= LUA_MAXCAPTURES) luaL_error(ms->L, "too many captures"); 379 ms->capture[level].init = s; 380 ms->capture[level].len = what; 381 ms->level = level+1; 382 if ((res=match(ms, s, p)) == NULL) /* match failed? */ 383 ms->level--; /* undo capture */ 384 return res; 385 } 386 387 388 static const char *end_capture (MatchState *ms, const char *s, 389 const char *p) { 390 int l = capture_to_close(ms); 391 const char *res; 392 ms->capture[l].len = s - ms->capture[l].init; /* close capture */ 393 if ((res = match(ms, s, p)) == NULL) /* match failed? */ 394 ms->capture[l].len = CAP_UNFINISHED; /* undo capture */ 395 return res; 396 } 397 398 399 static const char *match_capture (MatchState *ms, const char *s, int l) { 400 size_t len; 401 l = check_capture(ms, l); 402 len = ms->capture[l].len; 403 if ((size_t)(ms->src_end-s) >= len && 404 memcmp(ms->capture[l].init, s, len) == 0) 405 return s+len; 406 else return NULL; 407 } 408 409 410 static const char *match (MatchState *ms, const char *s, const char *p) { 411 if (ms->matchdepth-- == 0) 412 luaL_error(ms->L, "pattern too complex"); 413 init: /* using goto's to optimize tail recursion */ 414 if (p != ms->p_end) { /* end of pattern? */ 415 switch (*p) { 416 case '(': { /* start capture */ 417 if (*(p + 1) == ')') /* position capture? */ 418 s = start_capture(ms, s, p + 2, CAP_POSITION); 419 else 420 s = start_capture(ms, s, p + 1, CAP_UNFINISHED); 421 break; 422 } 423 case ')': { /* end capture */ 424 s = end_capture(ms, s, p + 1); 425 break; 426 } 427 case '$': { 428 if ((p + 1) != ms->p_end) /* is the `$' the last char in pattern? */ 429 goto dflt; /* no; go to default */ 430 s = (s == ms->src_end) ? s : NULL; /* check end of string */ 431 break; 432 } 433 case L_ESC: { /* escaped sequences not in the format class[*+?-]? */ 434 switch (*(p + 1)) { 435 case 'b': { /* balanced string? */ 436 s = matchbalance(ms, s, p + 2); 437 if (s != NULL) { 438 p += 4; goto init; /* return match(ms, s, p + 4); */ 439 } /* else fail (s == NULL) */ 440 break; 441 } 442 case 'f': { /* frontier? */ 443 const char *ep; char previous; 444 p += 2; 445 if (*p != '[') 446 luaL_error(ms->L, "missing " LUA_QL("[") " after " 447 LUA_QL("%%f") " in pattern"); 448 ep = classend(ms, p); /* points to what is next */ 449 previous = (s == ms->src_init) ? '\0' : *(s - 1); 450 if (!matchbracketclass(uchar(previous), p, ep - 1) && 451 matchbracketclass(uchar(*s), p, ep - 1)) { 452 p = ep; goto init; /* return match(ms, s, ep); */ 453 } 454 s = NULL; /* match failed */ 455 break; 456 } 457 case '0': case '1': case '2': case '3': 458 case '4': case '5': case '6': case '7': 459 case '8': case '9': { /* capture results (%0-%9)? */ 460 s = match_capture(ms, s, uchar(*(p + 1))); 461 if (s != NULL) { 462 p += 2; goto init; /* return match(ms, s, p + 2) */ 463 } 464 break; 465 } 466 default: goto dflt; 467 } 468 break; 469 } 470 default: dflt: { /* pattern class plus optional suffix */ 471 const char *ep = classend(ms, p); /* points to optional suffix */ 472 /* does not match at least once? */ 473 if (!singlematch(ms, s, p, ep)) { 474 if (*ep == '*' || *ep == '?' || *ep == '-') { /* accept empty? */ 475 p = ep + 1; goto init; /* return match(ms, s, ep + 1); */ 476 } 477 else /* '+' or no suffix */ 478 s = NULL; /* fail */ 479 } 480 else { /* matched once */ 481 switch (*ep) { /* handle optional suffix */ 482 case '?': { /* optional */ 483 const char *res; 484 if ((res = match(ms, s + 1, ep + 1)) != NULL) 485 s = res; 486 else { 487 p = ep + 1; goto init; /* else return match(ms, s, ep + 1); */ 488 } 489 break; 490 } 491 case '+': /* 1 or more repetitions */ 492 s++; /* 1 match already done */ 493 /* go through */ 494 case '*': /* 0 or more repetitions */ 495 s = max_expand(ms, s, p, ep); 496 break; 497 case '-': /* 0 or more repetitions (minimum) */ 498 s = min_expand(ms, s, p, ep); 499 break; 500 default: /* no suffix */ 501 s++; p = ep; goto init; /* return match(ms, s + 1, ep); */ 502 } 503 } 504 break; 505 } 506 } 507 } 508 ms->matchdepth++; 509 return s; 510 } 511 512 513 514 static const char *lmemfind (const char *s1, size_t l1, 515 const char *s2, size_t l2) { 516 if (l2 == 0) return s1; /* empty strings are everywhere */ 517 else if (l2 > l1) return NULL; /* avoids a negative `l1' */ 518 else { 519 const char *init; /* to search for a `*s2' inside `s1' */ 520 l2--; /* 1st char will be checked by `memchr' */ 521 l1 = l1-l2; /* `s2' cannot be found after that */ 522 while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { 523 init++; /* 1st char is already checked */ 524 if (memcmp(init, s2+1, l2) == 0) 525 return init-1; 526 else { /* correct `l1' and `s1' to try again */ 527 l1 -= init-s1; 528 s1 = init; 529 } 530 } 531 return NULL; /* not found */ 532 } 533 } 534 535 536 static void push_onecapture (MatchState *ms, int i, const char *s, 537 const char *e) { 538 if (i >= ms->level) { 539 if (i == 0) /* ms->level == 0, too */ 540 lua_pushlstring(ms->L, s, e - s); /* add whole match */ 541 else 542 luaL_error(ms->L, "invalid capture index"); 543 } 544 else { 545 ptrdiff_t l = ms->capture[i].len; 546 if (l == CAP_UNFINISHED) luaL_error(ms->L, "unfinished capture"); 547 if (l == CAP_POSITION) 548 lua_pushinteger(ms->L, ms->capture[i].init - ms->src_init + 1); 549 else 550 lua_pushlstring(ms->L, ms->capture[i].init, l); 551 } 552 } 553 554 555 static int push_captures (MatchState *ms, const char *s, const char *e) { 556 int i; 557 int nlevels = (ms->level == 0 && s) ? 1 : ms->level; 558 luaL_checkstack(ms->L, nlevels, "too many captures"); 559 for (i = 0; i < nlevels; i++) 560 push_onecapture(ms, i, s, e); 561 return nlevels; /* number of strings pushed */ 562 } 563 564 565 /* check whether pattern has no special characters */ 566 static int nospecials (const char *p, size_t l) { 567 size_t upto = 0; 568 do { 569 if (strpbrk(p + upto, SPECIALS)) 570 return 0; /* pattern has a special character */ 571 upto += strlen(p + upto) + 1; /* may have more after \0 */ 572 } while (upto <= l); 573 return 1; /* no special chars found */ 574 } 575 576 577 static int str_find_aux (lua_State *L, int find) { 578 size_t ls, lp; 579 const char *s = luaL_checklstring(L, 1, &ls); 580 const char *p = luaL_checklstring(L, 2, &lp); 581 size_t init = posrelat(luaL_optinteger(L, 3, 1), ls); 582 if (init < 1) init = 1; 583 else if (init > ls + 1) { /* start after string's end? */ 584 lua_pushnil(L); /* cannot find anything */ 585 return 1; 586 } 587 /* explicit request or no special characters? */ 588 if (find && (lua_toboolean(L, 4) || nospecials(p, lp))) { 589 /* do a plain search */ 590 const char *s2 = lmemfind(s + init - 1, ls - init + 1, p, lp); 591 if (s2) { 592 lua_pushinteger(L, s2 - s + 1); 593 lua_pushinteger(L, s2 - s + lp); 594 return 2; 595 } 596 } 597 else { 598 MatchState ms; 599 const char *s1 = s + init - 1; 600 int anchor = (*p == '^'); 601 if (anchor) { 602 p++; lp--; /* skip anchor character */ 603 } 604 ms.L = L; 605 ms.matchdepth = MAXCCALLS; 606 ms.src_init = s; 607 ms.src_end = s + ls; 608 ms.p_end = p + lp; 609 do { 610 const char *res; 611 ms.level = 0; 612 lua_assert(ms.matchdepth == MAXCCALLS); 613 if ((res=match(&ms, s1, p)) != NULL) { 614 if (find) { 615 lua_pushinteger(L, s1 - s + 1); /* start */ 616 lua_pushinteger(L, res - s); /* end */ 617 return push_captures(&ms, NULL, 0) + 2; 618 } 619 else 620 return push_captures(&ms, s1, res); 621 } 622 } while (s1++ < ms.src_end && !anchor); 623 } 624 lua_pushnil(L); /* not found */ 625 return 1; 626 } 627 628 629 static int str_find (lua_State *L) { 630 return str_find_aux(L, 1); 631 } 632 633 634 static int str_match (lua_State *L) { 635 return str_find_aux(L, 0); 636 } 637 638 639 static int gmatch_aux (lua_State *L) { 640 MatchState ms; 641 size_t ls, lp; 642 const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls); 643 const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp); 644 const char *src; 645 ms.L = L; 646 ms.matchdepth = MAXCCALLS; 647 ms.src_init = s; 648 ms.src_end = s+ls; 649 ms.p_end = p + lp; 650 for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3)); 651 src <= ms.src_end; 652 src++) { 653 const char *e; 654 ms.level = 0; 655 lua_assert(ms.matchdepth == MAXCCALLS); 656 if ((e = match(&ms, src, p)) != NULL) { 657 lua_Integer newstart = e-s; 658 if (e == src) newstart++; /* empty match? go at least one position */ 659 lua_pushinteger(L, newstart); 660 lua_replace(L, lua_upvalueindex(3)); 661 return push_captures(&ms, src, e); 662 } 663 } 664 return 0; /* not found */ 665 } 666 667 668 static int gmatch (lua_State *L) { 669 luaL_checkstring(L, 1); 670 luaL_checkstring(L, 2); 671 lua_settop(L, 2); 672 lua_pushinteger(L, 0); 673 lua_pushcclosure(L, gmatch_aux, 3); 674 return 1; 675 } 676 677 678 static void add_s (MatchState *ms, luaL_Buffer *b, const char *s, 679 const char *e) { 680 size_t l, i; 681 const char *news = lua_tolstring(ms->L, 3, &l); 682 for (i = 0; i < l; i++) { 683 if (news[i] != L_ESC) 684 luaL_addchar(b, news[i]); 685 else { 686 i++; /* skip ESC */ 687 if (!isdigit(uchar(news[i]))) { 688 if (news[i] != L_ESC) 689 luaL_error(ms->L, "invalid use of " LUA_QL("%c") 690 " in replacement string", L_ESC); 691 luaL_addchar(b, news[i]); 692 } 693 else if (news[i] == '0') 694 luaL_addlstring(b, s, e - s); 695 else { 696 push_onecapture(ms, news[i] - '1', s, e); 697 luaL_addvalue(b); /* add capture to accumulated result */ 698 } 699 } 700 } 701 } 702 703 704 static void add_value (MatchState *ms, luaL_Buffer *b, const char *s, 705 const char *e, int tr) { 706 lua_State *L = ms->L; 707 switch (tr) { 708 case LUA_TFUNCTION: { 709 int n; 710 lua_pushvalue(L, 3); 711 n = push_captures(ms, s, e); 712 lua_call(L, n, 1); 713 break; 714 } 715 case LUA_TTABLE: { 716 push_onecapture(ms, 0, s, e); 717 lua_gettable(L, 3); 718 break; 719 } 720 default: { /* LUA_TNUMBER or LUA_TSTRING */ 721 add_s(ms, b, s, e); 722 return; 723 } 724 } 725 if (!lua_toboolean(L, -1)) { /* nil or false? */ 726 lua_pop(L, 1); 727 lua_pushlstring(L, s, e - s); /* keep original text */ 728 } 729 else if (!lua_isstring(L, -1)) 730 luaL_error(L, "invalid replacement value (a %s)", luaL_typename(L, -1)); 731 luaL_addvalue(b); /* add result to accumulator */ 732 } 733 734 735 static int str_gsub (lua_State *L) { 736 size_t srcl, lp; 737 const char *src = luaL_checklstring(L, 1, &srcl); 738 const char *p = luaL_checklstring(L, 2, &lp); 739 int tr = lua_type(L, 3); 740 size_t max_s = luaL_optinteger(L, 4, srcl+1); 741 int anchor = (*p == '^'); 742 size_t n = 0; 743 MatchState ms; 744 luaL_Buffer b; 745 luaL_argcheck(L, tr == LUA_TNUMBER || tr == LUA_TSTRING || 746 tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3, 747 "string/function/table expected"); 748 luaL_buffinit(L, &b); 749 if (anchor) { 750 p++; lp--; /* skip anchor character */ 751 } 752 ms.L = L; 753 ms.matchdepth = MAXCCALLS; 754 ms.src_init = src; 755 ms.src_end = src+srcl; 756 ms.p_end = p + lp; 757 while (n < max_s) { 758 const char *e; 759 ms.level = 0; 760 lua_assert(ms.matchdepth == MAXCCALLS); 761 e = match(&ms, src, p); 762 if (e) { 763 n++; 764 add_value(&ms, &b, src, e, tr); 765 } 766 if (e && e>src) /* non empty match? */ 767 src = e; /* skip it */ 768 else if (src < ms.src_end) 769 luaL_addchar(&b, *src++); 770 else break; 771 if (anchor) break; 772 } 773 luaL_addlstring(&b, src, ms.src_end-src); 774 luaL_pushresult(&b); 775 lua_pushinteger(L, n); /* number of substitutions */ 776 return 2; 777 } 778 779 /* }====================================================== */ 780 781 782 783 /* 784 ** {====================================================== 785 ** STRING FORMAT 786 ** ======================================================= 787 */ 788 789 /* 790 ** LUA_INTFRMLEN is the length modifier for integer conversions in 791 ** 'string.format'; LUA_INTFRM_T is the integer type corresponding to 792 ** the previous length 793 */ 794 #if !defined(LUA_INTFRMLEN) /* { */ 795 #if defined(LUA_USE_LONGLONG) 796 797 #define LUA_INTFRMLEN "ll" 798 #define LUA_INTFRM_T long long 799 800 #else 801 802 #define LUA_INTFRMLEN "l" 803 #define LUA_INTFRM_T long 804 805 #endif 806 #endif /* } */ 807 808 809 /* 810 ** LUA_FLTFRMLEN is the length modifier for float conversions in 811 ** 'string.format'; LUA_FLTFRM_T is the float type corresponding to 812 ** the previous length 813 */ 814 #if !defined(LUA_FLTFRMLEN) 815 816 #define LUA_FLTFRMLEN "" 817 #define LUA_FLTFRM_T double 818 819 #endif 820 821 822 /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ 823 #define MAX_ITEM 512 824 /* valid flags in a format specification */ 825 #define FLAGS "-+ #0" 826 /* 827 ** maximum size of each format specification (such as '%-099.99d') 828 ** (+10 accounts for %99.99x plus margin of error) 829 */ 830 #define MAX_FORMAT (sizeof(FLAGS) + sizeof(LUA_INTFRMLEN) + 10) 831 832 833 static void addquoted (lua_State *L, luaL_Buffer *b, int arg) { 834 size_t l; 835 const char *s = luaL_checklstring(L, arg, &l); 836 luaL_addchar(b, '"'); 837 while (l--) { 838 if (*s == '"' || *s == '\\' || *s == '\n') { 839 luaL_addchar(b, '\\'); 840 luaL_addchar(b, *s); 841 } 842 else if (*s == '\0' || iscntrl(uchar(*s))) { 843 char buff[10]; 844 if (!isdigit(uchar(*(s+1)))) 845 sprintf(buff, "\\%d", (int)uchar(*s)); 846 else 847 sprintf(buff, "\\%03d", (int)uchar(*s)); 848 luaL_addstring(b, buff); 849 } 850 else 851 luaL_addchar(b, *s); 852 s++; 853 } 854 luaL_addchar(b, '"'); 855 } 856 857 static const char *scanformat (lua_State *L, const char *strfrmt, char *form) { 858 const char *p = strfrmt; 859 while (*p != '\0' && strchr(FLAGS, *p) != NULL) p++; /* skip flags */ 860 if ((size_t)(p - strfrmt) >= sizeof(FLAGS)/sizeof(char)) 861 luaL_error(L, "invalid format (repeated flags)"); 862 if (isdigit(uchar(*p))) p++; /* skip width */ 863 if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ 864 if (*p == '.') { 865 p++; 866 if (isdigit(uchar(*p))) p++; /* skip precision */ 867 if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ 868 } 869 if (isdigit(uchar(*p))) 870 luaL_error(L, "invalid format (width or precision too long)"); 871 *(form++) = '%'; 872 memcpy(form, strfrmt, (p - strfrmt + 1) * sizeof(char)); 873 form += p - strfrmt + 1; 874 *form = '\0'; 875 return p; 876 } 877 878 879 /* 880 ** add length modifier into formats 881 */ 882 static void addlenmod (char *form, const char *lenmod) { 883 size_t l = strlen(form); 884 size_t lm = strlen(lenmod); 885 char spec = form[l - 1]; 886 strcpy(form + l - 1, lenmod); 887 form[l + lm - 1] = spec; 888 form[l + lm] = '\0'; 889 } 890 891 892 static int str_format (lua_State *L) { 893 int top = lua_gettop(L); 894 int arg = 1; 895 size_t sfl; 896 const char *strfrmt = luaL_checklstring(L, arg, &sfl); 897 const char *strfrmt_end = strfrmt+sfl; 898 luaL_Buffer b; 899 luaL_buffinit(L, &b); 900 while (strfrmt < strfrmt_end) { 901 if (*strfrmt != L_ESC) 902 luaL_addchar(&b, *strfrmt++); 903 else if (*++strfrmt == L_ESC) 904 luaL_addchar(&b, *strfrmt++); /* %% */ 905 else { /* format item */ 906 char form[MAX_FORMAT]; /* to store the format (`%...') */ 907 char *buff = luaL_prepbuffsize(&b, MAX_ITEM); /* to put formatted item */ 908 int nb = 0; /* number of bytes in added item */ 909 if (++arg > top) 910 luaL_argerror(L, arg, "no value"); 911 strfrmt = scanformat(L, strfrmt, form); 912 switch (*strfrmt++) { 913 case 'c': { 914 nb = sprintf(buff, form, luaL_checkint(L, arg)); 915 break; 916 } 917 case 'd': case 'i': { 918 lua_Number n = luaL_checknumber(L, arg); 919 LUA_INTFRM_T ni = (LUA_INTFRM_T)n; 920 lua_Number diff = n - (lua_Number)ni; 921 luaL_argcheck(L, -1 < diff && diff < 1, arg, 922 "not a number in proper range"); 923 addlenmod(form, LUA_INTFRMLEN); 924 nb = sprintf(buff, form, ni); 925 break; 926 } 927 case 'o': case 'u': case 'x': case 'X': { 928 lua_Number n = luaL_checknumber(L, arg); 929 unsigned LUA_INTFRM_T ni = (unsigned LUA_INTFRM_T)n; 930 lua_Number diff = n - (lua_Number)ni; 931 luaL_argcheck(L, -1 < diff && diff < 1, arg, 932 "not a non-negative number in proper range"); 933 addlenmod(form, LUA_INTFRMLEN); 934 nb = sprintf(buff, form, ni); 935 break; 936 } 937 case 'e': case 'E': case 'f': 938 #if defined(LUA_USE_AFORMAT) 939 case 'a': case 'A': 940 #endif 941 case 'g': case 'G': { 942 addlenmod(form, LUA_FLTFRMLEN); 943 nb = sprintf(buff, form, (LUA_FLTFRM_T)luaL_checknumber(L, arg)); 944 break; 945 } 946 case 'q': { 947 addquoted(L, &b, arg); 948 break; 949 } 950 case 's': { 951 size_t l; 952 const char *s = luaL_tolstring(L, arg, &l); 953 if (!strchr(form, '.') && l >= 100) { 954 /* no precision and string is too long to be formatted; 955 keep original string */ 956 luaL_addvalue(&b); 957 break; 958 } 959 else { 960 nb = sprintf(buff, form, s); 961 lua_pop(L, 1); /* remove result from 'luaL_tolstring' */ 962 break; 963 } 964 } 965 default: { /* also treat cases `pnLlh' */ 966 return luaL_error(L, "invalid option " LUA_QL("%%%c") " to " 967 LUA_QL("format"), *(strfrmt - 1)); 968 } 969 } 970 luaL_addsize(&b, nb); 971 } 972 } 973 luaL_pushresult(&b); 974 return 1; 975 } 976 977 /* }====================================================== */ 978 979 980 static const luaL_Reg strlib[] = { 981 {"byte", str_byte}, 982 {"char", str_char}, 983 {"dump", str_dump}, 984 {"find", str_find}, 985 {"format", str_format}, 986 {"gmatch", gmatch}, 987 {"gsub", str_gsub}, 988 {"len", str_len}, 989 {"lower", str_lower}, 990 {"match", str_match}, 991 {"rep", str_rep}, 992 {"reverse", str_reverse}, 993 {"sub", str_sub}, 994 {"upper", str_upper}, 995 {NULL, NULL} 996 }; 997 998 999 static void createmetatable (lua_State *L) { 1000 lua_createtable(L, 0, 1); /* table to be metatable for strings */ 1001 lua_pushliteral(L, ""); /* dummy string */ 1002 lua_pushvalue(L, -2); /* copy table */ 1003 lua_setmetatable(L, -2); /* set table as metatable for strings */ 1004 lua_pop(L, 1); /* pop dummy string */ 1005 lua_pushvalue(L, -2); /* get string library */ 1006 lua_setfield(L, -2, "__index"); /* metatable.__index = string */ 1007 lua_pop(L, 1); /* pop metatable */ 1008 } 1009 1010 1011 /* 1012 ** Open string library 1013 */ 1014 LUAMOD_API int luaopen_string (lua_State *L) { 1015 luaL_newlib(L, strlib); 1016 createmetatable(L); 1017 return 1; 1018 } 1019 1020