1 /* $OpenBSD: expr.c,v 1.21 2009/06/01 19:00:57 deraadt Exp $ */ 2 3 /*- 4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 5 * Thorsten Glaser <tg (at) mirbsd.org> 6 * 7 * Provided that these terms and disclaimer and all copyright notices 8 * are retained or reproduced in an accompanying document, permission 9 * is granted to deal in this work without restriction, including un- 10 * limited rights to use, publicly perform, distribute, sell, modify, 11 * merge, give away, or sublicence. 12 * 13 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to 14 * the utmost extent permitted by applicable law, neither express nor 15 * implied; without malicious intent or gross negligence. In no event 16 * may a licensor, author or contributor be held liable for indirect, 17 * direct, other damage, loss, or other issues arising in any way out 18 * of dealing in the work, even if advised of the possibility of such 19 * damage or existence of a defect, except proven that it results out 20 * of said person's immediate fault when using the work as intended. 21 */ 22 23 #include "sh.h" 24 25 __RCSID("$MirOS: src/bin/mksh/expr.c,v 1.49 2011/09/07 15:24:14 tg Exp $"); 26 27 /* The order of these enums is constrained by the order of opinfo[] */ 28 enum token { 29 /* some (long) unary operators */ 30 O_PLUSPLUS = 0, O_MINUSMINUS, 31 /* binary operators */ 32 O_EQ, O_NE, 33 /* assignments are assumed to be in range O_ASN .. O_BORASN */ 34 O_ASN, O_TIMESASN, O_DIVASN, O_MODASN, O_PLUSASN, O_MINUSASN, 35 O_LSHIFTASN, O_RSHIFTASN, O_BANDASN, O_BXORASN, O_BORASN, 36 O_LSHIFT, O_RSHIFT, 37 O_LE, O_GE, O_LT, O_GT, 38 O_LAND, 39 O_LOR, 40 O_TIMES, O_DIV, O_MOD, 41 O_PLUS, O_MINUS, 42 O_BAND, 43 O_BXOR, 44 O_BOR, 45 O_TERN, 46 O_COMMA, 47 /* things after this aren't used as binary operators */ 48 /* unary that are not also binaries */ 49 O_BNOT, O_LNOT, 50 /* misc */ 51 OPEN_PAREN, CLOSE_PAREN, CTERN, 52 /* things that don't appear in the opinfo[] table */ 53 VAR, LIT, END, BAD 54 }; 55 #define IS_BINOP(op) (((int)op) >= (int)O_EQ && ((int)op) <= (int)O_COMMA) 56 #define IS_ASSIGNOP(op) ((int)(op) >= (int)O_ASN && (int)(op) <= (int)O_BORASN) 57 58 /* precisions; used to be enum prec but we do arithmetics on it */ 59 #define P_PRIMARY 0 /* VAR, LIT, (), ~ ! - + */ 60 #define P_MULT 1 /* * / % */ 61 #define P_ADD 2 /* + - */ 62 #define P_SHIFT 3 /* << >> */ 63 #define P_RELATION 4 /* < <= > >= */ 64 #define P_EQUALITY 5 /* == != */ 65 #define P_BAND 6 /* & */ 66 #define P_BXOR 7 /* ^ */ 67 #define P_BOR 8 /* | */ 68 #define P_LAND 9 /* && */ 69 #define P_LOR 10 /* || */ 70 #define P_TERN 11 /* ?: */ 71 #define P_ASSIGN 12 /* = *= /= %= += -= <<= >>= &= ^= |= */ 72 #define P_COMMA 13 /* , */ 73 #define MAX_PREC P_COMMA 74 75 struct opinfo { 76 char name[4]; 77 int len; /* name length */ 78 int prec; /* precedence: lower is higher */ 79 }; 80 81 /* Tokens in this table must be ordered so the longest are first 82 * (eg, += before +). If you change something, change the order 83 * of enum token too. 84 */ 85 static const struct opinfo opinfo[] = { 86 { "++", 2, P_PRIMARY }, /* before + */ 87 { "--", 2, P_PRIMARY }, /* before - */ 88 { "==", 2, P_EQUALITY }, /* before = */ 89 { "!=", 2, P_EQUALITY }, /* before ! */ 90 { "=", 1, P_ASSIGN }, /* keep assigns in a block */ 91 { "*=", 2, P_ASSIGN }, 92 { "/=", 2, P_ASSIGN }, 93 { "%=", 2, P_ASSIGN }, 94 { "+=", 2, P_ASSIGN }, 95 { "-=", 2, P_ASSIGN }, 96 { "<<=", 3, P_ASSIGN }, 97 { ">>=", 3, P_ASSIGN }, 98 { "&=", 2, P_ASSIGN }, 99 { "^=", 2, P_ASSIGN }, 100 { "|=", 2, P_ASSIGN }, 101 { "<<", 2, P_SHIFT }, 102 { ">>", 2, P_SHIFT }, 103 { "<=", 2, P_RELATION }, 104 { ">=", 2, P_RELATION }, 105 { "<", 1, P_RELATION }, 106 { ">", 1, P_RELATION }, 107 { "&&", 2, P_LAND }, 108 { "||", 2, P_LOR }, 109 { "*", 1, P_MULT }, 110 { "/", 1, P_MULT }, 111 { "%", 1, P_MULT }, 112 { "+", 1, P_ADD }, 113 { "-", 1, P_ADD }, 114 { "&", 1, P_BAND }, 115 { "^", 1, P_BXOR }, 116 { "|", 1, P_BOR }, 117 { "?", 1, P_TERN }, 118 { ",", 1, P_COMMA }, 119 { "~", 1, P_PRIMARY }, 120 { "!", 1, P_PRIMARY }, 121 { "(", 1, P_PRIMARY }, 122 { ")", 1, P_PRIMARY }, 123 { ":", 1, P_PRIMARY }, 124 { "", 0, P_PRIMARY } 125 }; 126 127 typedef struct expr_state Expr_state; 128 struct expr_state { 129 const char *expression; /* expression being evaluated */ 130 const char *tokp; /* lexical position */ 131 struct tbl *val; /* value from token() */ 132 struct tbl *evaling; /* variable that is being recursively 133 * expanded (EXPRINEVAL flag set) */ 134 int noassign; /* don't do assigns (for ?:,&&,||) */ 135 enum token tok; /* token from token() */ 136 bool arith; /* evaluating an $(()) expression? */ 137 bool natural; /* unsigned arithmetic calculation */ 138 }; 139 140 #define bivui(x, op, y) (es->natural ? \ 141 (mksh_ari_t)((x)->val.u op (y)->val.u) : \ 142 (mksh_ari_t)((x)->val.i op (y)->val.i) \ 143 ) 144 #define stvui(x, n) do { \ 145 if (es->natural) \ 146 (x)->val.u = (n); \ 147 else \ 148 (x)->val.i = (n); \ 149 } while (/* CONSTCOND */ 0) 150 151 enum error_type { 152 ET_UNEXPECTED, ET_BADLIT, ET_RECURSIVE, 153 ET_LVALUE, ET_RDONLY, ET_STR 154 }; 155 156 static void evalerr(Expr_state *, enum error_type, const char *) 157 MKSH_A_NORETURN; 158 static struct tbl *evalexpr(Expr_state *, int); 159 static void exprtoken(Expr_state *); 160 static struct tbl *do_ppmm(Expr_state *, enum token, struct tbl *, bool); 161 static void assign_check(Expr_state *, enum token, struct tbl *); 162 static struct tbl *tempvar(void); 163 static struct tbl *intvar(Expr_state *, struct tbl *); 164 165 /* 166 * parse and evaluate expression 167 */ 168 int 169 evaluate(const char *expr, mksh_ari_t *rval, int error_ok, bool arith) 170 { 171 struct tbl v; 172 int ret; 173 174 v.flag = DEFINED|INTEGER; 175 v.type = 0; 176 ret = v_evaluate(&v, expr, error_ok, arith); 177 *rval = v.val.i; 178 return (ret); 179 } 180 181 /* 182 * parse and evaluate expression, storing result in vp. 183 */ 184 int 185 v_evaluate(struct tbl *vp, const char *expr, volatile int error_ok, 186 bool arith) 187 { 188 struct tbl *v; 189 Expr_state curstate; 190 Expr_state * const es = &curstate; 191 int i; 192 193 /* save state to allow recursive calls */ 194 curstate.expression = curstate.tokp = expr; 195 curstate.noassign = 0; 196 curstate.arith = arith; 197 curstate.evaling = NULL; 198 curstate.natural = false; 199 200 newenv(E_ERRH); 201 i = sigsetjmp(e->jbuf, 0); 202 if (i) { 203 /* Clear EXPRINEVAL in of any variables we were playing with */ 204 if (curstate.evaling) 205 curstate.evaling->flag &= ~EXPRINEVAL; 206 quitenv(NULL); 207 if (i == LAEXPR) { 208 if (error_ok == KSH_RETURN_ERROR) 209 return (0); 210 errorfz(); 211 } 212 unwind(i); 213 /* NOTREACHED */ 214 } 215 216 exprtoken(es); 217 if (es->tok == END) { 218 es->tok = LIT; 219 es->val = tempvar(); 220 } 221 v = intvar(es, evalexpr(es, MAX_PREC)); 222 223 if (es->tok != END) 224 evalerr(es, ET_UNEXPECTED, NULL); 225 226 if (es->arith && es->natural) 227 vp->flag |= INT_U; 228 if (vp->flag & INTEGER) 229 setint_v(vp, v, es->arith); 230 else 231 /* can fail if readonly */ 232 setstr(vp, str_val(v), error_ok); 233 234 quitenv(NULL); 235 236 return (1); 237 } 238 239 static void 240 evalerr(Expr_state *es, enum error_type type, const char *str) 241 { 242 char tbuf[2]; 243 const char *s; 244 245 es->arith = false; 246 switch (type) { 247 case ET_UNEXPECTED: 248 switch (es->tok) { 249 case VAR: 250 s = es->val->name; 251 break; 252 case LIT: 253 s = str_val(es->val); 254 break; 255 case END: 256 s = "end of expression"; 257 break; 258 case BAD: 259 tbuf[0] = *es->tokp; 260 tbuf[1] = '\0'; 261 s = tbuf; 262 break; 263 default: 264 s = opinfo[(int)es->tok].name; 265 } 266 warningf(true, "%s: %s '%s'", es->expression, 267 "unexpected", s); 268 break; 269 270 case ET_BADLIT: 271 warningf(true, "%s: %s '%s'", es->expression, 272 "bad number", str); 273 break; 274 275 case ET_RECURSIVE: 276 warningf(true, "%s: %s '%s'", es->expression, 277 "expression recurses on parameter", str); 278 break; 279 280 case ET_LVALUE: 281 warningf(true, "%s: %s %s", 282 es->expression, str, "requires lvalue"); 283 break; 284 285 case ET_RDONLY: 286 warningf(true, "%s: %s %s", 287 es->expression, str, "applied to read only variable"); 288 break; 289 290 default: /* keep gcc happy */ 291 case ET_STR: 292 warningf(true, "%s: %s", es->expression, str); 293 break; 294 } 295 unwind(LAEXPR); 296 } 297 298 static struct tbl * 299 evalexpr(Expr_state *es, int prec) 300 { 301 struct tbl *vl, *vr = NULL, *vasn; 302 enum token op; 303 mksh_ari_t res = 0; 304 305 if (prec == P_PRIMARY) { 306 op = es->tok; 307 if (op == O_BNOT || op == O_LNOT || op == O_MINUS || 308 op == O_PLUS) { 309 exprtoken(es); 310 vl = intvar(es, evalexpr(es, P_PRIMARY)); 311 if (op == O_BNOT) 312 vl->val.i = ~vl->val.i; 313 else if (op == O_LNOT) 314 vl->val.i = !vl->val.i; 315 else if (op == O_MINUS) 316 vl->val.i = -vl->val.i; 317 /* op == O_PLUS is a no-op */ 318 } else if (op == OPEN_PAREN) { 319 exprtoken(es); 320 vl = evalexpr(es, MAX_PREC); 321 if (es->tok != CLOSE_PAREN) 322 evalerr(es, ET_STR, "missing )"); 323 exprtoken(es); 324 } else if (op == O_PLUSPLUS || op == O_MINUSMINUS) { 325 exprtoken(es); 326 vl = do_ppmm(es, op, es->val, true); 327 exprtoken(es); 328 } else if (op == VAR || op == LIT) { 329 vl = es->val; 330 exprtoken(es); 331 } else { 332 evalerr(es, ET_UNEXPECTED, NULL); 333 /* NOTREACHED */ 334 } 335 if (es->tok == O_PLUSPLUS || es->tok == O_MINUSMINUS) { 336 vl = do_ppmm(es, es->tok, vl, false); 337 exprtoken(es); 338 } 339 return (vl); 340 } 341 vl = evalexpr(es, prec - 1); 342 for (op = es->tok; IS_BINOP(op) && opinfo[(int)op].prec == prec; 343 op = es->tok) { 344 exprtoken(es); 345 vasn = vl; 346 if (op != O_ASN) /* vl may not have a value yet */ 347 vl = intvar(es, vl); 348 if (IS_ASSIGNOP(op)) { 349 assign_check(es, op, vasn); 350 vr = intvar(es, evalexpr(es, P_ASSIGN)); 351 } else if (op != O_TERN && op != O_LAND && op != O_LOR) 352 vr = intvar(es, evalexpr(es, prec - 1)); 353 if ((op == O_DIV || op == O_MOD || op == O_DIVASN || 354 op == O_MODASN) && vr->val.i == 0) { 355 if (es->noassign) 356 vr->val.i = 1; 357 else 358 evalerr(es, ET_STR, "zero divisor"); 359 } 360 switch ((int)op) { 361 case O_TIMES: 362 case O_TIMESASN: 363 res = bivui(vl, *, vr); 364 break; 365 case O_DIV: 366 case O_DIVASN: 367 res = bivui(vl, /, vr); 368 break; 369 case O_MOD: 370 case O_MODASN: 371 res = bivui(vl, %, vr); 372 break; 373 case O_PLUS: 374 case O_PLUSASN: 375 res = bivui(vl, +, vr); 376 break; 377 case O_MINUS: 378 case O_MINUSASN: 379 res = bivui(vl, -, vr); 380 break; 381 case O_LSHIFT: 382 case O_LSHIFTASN: 383 res = bivui(vl, <<, vr); 384 break; 385 case O_RSHIFT: 386 case O_RSHIFTASN: 387 res = bivui(vl, >>, vr); 388 break; 389 case O_LT: 390 res = bivui(vl, <, vr); 391 break; 392 case O_LE: 393 res = bivui(vl, <=, vr); 394 break; 395 case O_GT: 396 res = bivui(vl, >, vr); 397 break; 398 case O_GE: 399 res = bivui(vl, >=, vr); 400 break; 401 case O_EQ: 402 res = bivui(vl, ==, vr); 403 break; 404 case O_NE: 405 res = bivui(vl, !=, vr); 406 break; 407 case O_BAND: 408 case O_BANDASN: 409 res = bivui(vl, &, vr); 410 break; 411 case O_BXOR: 412 case O_BXORASN: 413 res = bivui(vl, ^, vr); 414 break; 415 case O_BOR: 416 case O_BORASN: 417 res = bivui(vl, |, vr); 418 break; 419 case O_LAND: 420 if (!vl->val.i) 421 es->noassign++; 422 vr = intvar(es, evalexpr(es, prec - 1)); 423 res = bivui(vl, &&, vr); 424 if (!vl->val.i) 425 es->noassign--; 426 break; 427 case O_LOR: 428 if (vl->val.i) 429 es->noassign++; 430 vr = intvar(es, evalexpr(es, prec - 1)); 431 res = bivui(vl, ||, vr); 432 if (vl->val.i) 433 es->noassign--; 434 break; 435 case O_TERN: 436 { 437 bool ev = vl->val.i != 0; 438 439 if (!ev) 440 es->noassign++; 441 vl = evalexpr(es, MAX_PREC); 442 if (!ev) 443 es->noassign--; 444 if (es->tok != CTERN) 445 evalerr(es, ET_STR, "missing :"); 446 exprtoken(es); 447 if (ev) 448 es->noassign++; 449 vr = evalexpr(es, P_TERN); 450 if (ev) 451 es->noassign--; 452 vl = ev ? vl : vr; 453 } 454 break; 455 case O_ASN: 456 res = vr->val.i; 457 break; 458 case O_COMMA: 459 res = vr->val.i; 460 break; 461 } 462 if (IS_ASSIGNOP(op)) { 463 stvui(vr, res); 464 if (!es->noassign) { 465 if (vasn->flag & INTEGER) 466 setint_v(vasn, vr, es->arith); 467 else 468 setint(vasn, res); 469 } 470 vl = vr; 471 } else if (op != O_TERN) 472 stvui(vl, res); 473 } 474 return (vl); 475 } 476 477 static void 478 exprtoken(Expr_state *es) 479 { 480 const char *cp = es->tokp; 481 int c; 482 char *tvar; 483 484 /* skip white space */ 485 skip_spaces: 486 while ((c = *cp), ksh_isspace(c)) 487 ++cp; 488 if (es->tokp == es->expression && c == '#') { 489 /* expression begins with # */ 490 es->natural = true; /* switch to unsigned */ 491 ++cp; 492 goto skip_spaces; 493 } 494 es->tokp = cp; 495 496 if (c == '\0') 497 es->tok = END; 498 else if (ksh_isalphx(c)) { 499 for (; ksh_isalnux(c); c = *cp) 500 cp++; 501 if (c == '[') { 502 size_t len; 503 504 len = array_ref_len(cp); 505 if (len == 0) 506 evalerr(es, ET_STR, "missing ]"); 507 cp += len; 508 } else if (c == '(' /*)*/ ) { 509 /* todo: add math functions (all take single argument): 510 * abs acos asin atan cos cosh exp int log sin sinh sqrt 511 * tan tanh 512 */ 513 ; 514 } 515 if (es->noassign) { 516 es->val = tempvar(); 517 es->val->flag |= EXPRLVALUE; 518 } else { 519 strndupx(tvar, es->tokp, cp - es->tokp, ATEMP); 520 es->val = global(tvar); 521 afree(tvar, ATEMP); 522 } 523 es->tok = VAR; 524 } else if (c == '1' && cp[1] == '#') { 525 cp += 2; 526 cp += utf_ptradj(cp); 527 strndupx(tvar, es->tokp, cp - es->tokp, ATEMP); 528 goto process_tvar; 529 #ifndef MKSH_SMALL 530 } else if (c == '\'') { 531 ++cp; 532 cp += utf_ptradj(cp); 533 if (*cp++ != '\'') 534 evalerr(es, ET_STR, 535 "multi-character character constant"); 536 /* 'x' -> 1#x (x = one multibyte character) */ 537 c = cp - es->tokp; 538 tvar = alloc(c + /* NUL */ 1, ATEMP); 539 tvar[0] = '1'; 540 tvar[1] = '#'; 541 memcpy(tvar + 2, es->tokp + 1, c - 2); 542 tvar[c] = '\0'; 543 goto process_tvar; 544 #endif 545 } else if (ksh_isdigit(c)) { 546 while (c != '_' && (ksh_isalnux(c) || c == '#')) 547 c = *cp++; 548 strndupx(tvar, es->tokp, --cp - es->tokp, ATEMP); 549 process_tvar: 550 es->val = tempvar(); 551 es->val->flag &= ~INTEGER; 552 es->val->type = 0; 553 es->val->val.s = tvar; 554 if (setint_v(es->val, es->val, es->arith) == NULL) 555 evalerr(es, ET_BADLIT, tvar); 556 afree(tvar, ATEMP); 557 es->tok = LIT; 558 } else { 559 int i, n0; 560 561 for (i = 0; (n0 = opinfo[i].name[0]); i++) 562 if (c == n0 && strncmp(cp, opinfo[i].name, 563 (size_t)opinfo[i].len) == 0) { 564 es->tok = (enum token)i; 565 cp += opinfo[i].len; 566 break; 567 } 568 if (!n0) 569 es->tok = BAD; 570 } 571 es->tokp = cp; 572 } 573 574 /* Do a ++ or -- operation */ 575 static struct tbl * 576 do_ppmm(Expr_state *es, enum token op, struct tbl *vasn, bool is_prefix) 577 { 578 struct tbl *vl; 579 mksh_ari_t oval; 580 581 assign_check(es, op, vasn); 582 583 vl = intvar(es, vasn); 584 oval = vl->val.i; 585 if (op == O_PLUSPLUS) { 586 if (es->natural) 587 ++vl->val.u; 588 else 589 ++vl->val.i; 590 } else { 591 if (es->natural) 592 --vl->val.u; 593 else 594 --vl->val.i; 595 } 596 if (vasn->flag & INTEGER) 597 setint_v(vasn, vl, es->arith); 598 else 599 setint(vasn, vl->val.i); 600 if (!is_prefix) /* undo the inc/dec */ 601 vl->val.i = oval; 602 603 return (vl); 604 } 605 606 static void 607 assign_check(Expr_state *es, enum token op, struct tbl *vasn) 608 { 609 if (es->tok == END || 610 (vasn->name[0] == '\0' && !(vasn->flag & EXPRLVALUE))) 611 evalerr(es, ET_LVALUE, opinfo[(int)op].name); 612 else if (vasn->flag & RDONLY) 613 evalerr(es, ET_RDONLY, opinfo[(int)op].name); 614 } 615 616 static struct tbl * 617 tempvar(void) 618 { 619 struct tbl *vp; 620 621 vp = alloc(sizeof(struct tbl), ATEMP); 622 vp->flag = ISSET|INTEGER; 623 vp->type = 0; 624 vp->areap = ATEMP; 625 vp->ua.hval = 0; 626 vp->val.i = 0; 627 vp->name[0] = '\0'; 628 return (vp); 629 } 630 631 /* cast (string) variable to temporary integer variable */ 632 static struct tbl * 633 intvar(Expr_state *es, struct tbl *vp) 634 { 635 struct tbl *vq; 636 637 /* try to avoid replacing a temp var with another temp var */ 638 if (vp->name[0] == '\0' && 639 (vp->flag & (ISSET|INTEGER|EXPRLVALUE)) == (ISSET|INTEGER)) 640 return (vp); 641 642 vq = tempvar(); 643 if (setint_v(vq, vp, es->arith) == NULL) { 644 if (vp->flag & EXPRINEVAL) 645 evalerr(es, ET_RECURSIVE, vp->name); 646 es->evaling = vp; 647 vp->flag |= EXPRINEVAL; 648 v_evaluate(vq, str_val(vp), KSH_UNWIND_ERROR, es->arith); 649 vp->flag &= ~EXPRINEVAL; 650 es->evaling = NULL; 651 } 652 return (vq); 653 } 654 655 656 /* 657 * UTF-8 support code: high-level functions 658 */ 659 660 int 661 utf_widthadj(const char *src, const char **dst) 662 { 663 size_t len; 664 unsigned int wc; 665 int width; 666 667 if (!UTFMODE || (len = utf_mbtowc(&wc, src)) == (size_t)-1 || 668 wc == 0) 669 len = width = 1; 670 else if ((width = utf_wcwidth(wc)) < 0) 671 /* XXX use 2 for x_zotc3 here? */ 672 width = 1; 673 674 if (dst) 675 *dst = src + len; 676 return (width); 677 } 678 679 size_t 680 utf_mbswidth(const char *s) 681 { 682 size_t len, width = 0; 683 unsigned int wc; 684 int cw; 685 686 if (!UTFMODE) 687 return (strlen(s)); 688 689 while (*s) 690 if (((len = utf_mbtowc(&wc, s)) == (size_t)-1) || 691 ((cw = utf_wcwidth(wc)) == -1)) { 692 s++; 693 width += 1; 694 } else { 695 s += len; 696 width += cw; 697 } 698 return (width); 699 } 700 701 const char * 702 utf_skipcols(const char *p, int cols) 703 { 704 int c = 0; 705 706 while (c < cols) { 707 if (!*p) 708 return (p + cols - c); 709 c += utf_widthadj(p, &p); 710 } 711 return (p); 712 } 713 714 size_t 715 utf_ptradj(const char *src) 716 { 717 register size_t n; 718 719 if (!UTFMODE || 720 *(const unsigned char *)(src) < 0xC2 || 721 (n = utf_mbtowc(NULL, src)) == (size_t)-1) 722 n = 1; 723 return (n); 724 } 725 726 /* 727 * UTF-8 support code: low-level functions 728 */ 729 730 /* CESU-8 multibyte and wide character conversion crafted for mksh */ 731 732 size_t 733 utf_mbtowc(unsigned int *dst, const char *src) 734 { 735 const unsigned char *s = (const unsigned char *)src; 736 unsigned int c, wc; 737 738 if ((wc = *s++) < 0x80) { 739 out: 740 if (dst != NULL) 741 *dst = wc; 742 return (wc ? ((const char *)s - src) : 0); 743 } 744 if (wc < 0xC2 || wc >= 0xF0) 745 /* < 0xC0: spurious second byte */ 746 /* < 0xC2: non-minimalistic mapping error in 2-byte seqs */ 747 /* > 0xEF: beyond BMP */ 748 goto ilseq; 749 750 if (wc < 0xE0) { 751 wc = (wc & 0x1F) << 6; 752 if (((c = *s++) & 0xC0) != 0x80) 753 goto ilseq; 754 wc |= c & 0x3F; 755 goto out; 756 } 757 758 wc = (wc & 0x0F) << 12; 759 760 if (((c = *s++) & 0xC0) != 0x80) 761 goto ilseq; 762 wc |= (c & 0x3F) << 6; 763 764 if (((c = *s++) & 0xC0) != 0x80) 765 goto ilseq; 766 wc |= c & 0x3F; 767 768 /* Check for non-minimalistic mapping error in 3-byte seqs */ 769 if (wc >= 0x0800 && wc <= 0xFFFD) 770 goto out; 771 ilseq: 772 return ((size_t)(-1)); 773 } 774 775 size_t 776 utf_wctomb(char *dst, unsigned int wc) 777 { 778 unsigned char *d; 779 780 if (wc < 0x80) { 781 *dst = wc; 782 return (1); 783 } 784 785 d = (unsigned char *)dst; 786 if (wc < 0x0800) 787 *d++ = (wc >> 6) | 0xC0; 788 else { 789 *d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0; 790 *d++ = ((wc >> 6) & 0x3F) | 0x80; 791 } 792 *d++ = (wc & 0x3F) | 0x80; 793 return ((char *)d - dst); 794 } 795 796 797 #ifndef MKSH_mirbsd_wcwidth 798 /* --- begin of wcwidth.c excerpt --- */ 799 /*- 800 * Markus Kuhn -- 2007-05-26 (Unicode 5.0) 801 * 802 * Permission to use, copy, modify, and distribute this software 803 * for any purpose and without fee is hereby granted. The author 804 * disclaims all warranties with regard to this software. 805 */ 806 807 __RCSID("$miros: src/lib/libc/i18n/wcwidth.c,v 1.10 2010/12/11 16:05:03 tg Exp $"); 808 809 int 810 utf_wcwidth(unsigned int c) 811 { 812 static const struct cbset { 813 unsigned short first; 814 unsigned short last; 815 } comb[] = { 816 /* Unicode 6.0.0 BMP */ 817 { 0x0300, 0x036F }, { 0x0483, 0x0489 }, { 0x0591, 0x05BD }, 818 { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C5 }, 819 { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 }, { 0x0610, 0x061A }, 820 { 0x064B, 0x065F }, { 0x0670, 0x0670 }, { 0x06D6, 0x06DD }, 821 { 0x06DF, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, 822 { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, 823 { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0816, 0x0819 }, 824 { 0x081B, 0x0823 }, { 0x0825, 0x0827 }, { 0x0829, 0x082D }, 825 { 0x0859, 0x085B }, { 0x0900, 0x0902 }, { 0x093A, 0x093A }, 826 { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D }, 827 { 0x0951, 0x0957 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, 828 { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, 829 { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, 830 { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, 831 { 0x0A51, 0x0A51 }, { 0x0A70, 0x0A71 }, { 0x0A75, 0x0A75 }, 832 { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, 833 { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, { 0x0AE2, 0x0AE3 }, 834 { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, { 0x0B3F, 0x0B3F }, 835 { 0x0B41, 0x0B44 }, { 0x0B4D, 0x0B4D }, { 0x0B56, 0x0B56 }, 836 { 0x0B62, 0x0B63 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, 837 { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, 838 { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0C62, 0x0C63 }, 839 { 0x0CBC, 0x0CBC }, { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, 840 { 0x0CCC, 0x0CCD }, { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D44 }, 841 { 0x0D4D, 0x0D4D }, { 0x0D62, 0x0D63 }, { 0x0DCA, 0x0DCA }, 842 { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, 843 { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, 844 { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, 845 { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, 846 { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, 847 { 0x0F86, 0x0F87 }, { 0x0F8D, 0x0F97 }, { 0x0F99, 0x0FBC }, 848 { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, { 0x1032, 0x1037 }, 849 { 0x1039, 0x103A }, { 0x103D, 0x103E }, { 0x1058, 0x1059 }, 850 { 0x105E, 0x1060 }, { 0x1071, 0x1074 }, { 0x1082, 0x1082 }, 851 { 0x1085, 0x1086 }, { 0x108D, 0x108D }, { 0x109D, 0x109D }, 852 { 0x1160, 0x11FF }, { 0x135D, 0x135F }, { 0x1712, 0x1714 }, 853 { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, { 0x1772, 0x1773 }, 854 { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, { 0x17C6, 0x17C6 }, 855 { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, { 0x180B, 0x180D }, 856 { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, { 0x1927, 0x1928 }, 857 { 0x1932, 0x1932 }, { 0x1939, 0x193B }, { 0x1A17, 0x1A18 }, 858 { 0x1A56, 0x1A56 }, { 0x1A58, 0x1A5E }, { 0x1A60, 0x1A60 }, 859 { 0x1A62, 0x1A62 }, { 0x1A65, 0x1A6C }, { 0x1A73, 0x1A7C }, 860 { 0x1A7F, 0x1A7F }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 }, 861 { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 }, 862 { 0x1B6B, 0x1B73 }, { 0x1B80, 0x1B81 }, { 0x1BA2, 0x1BA5 }, 863 { 0x1BA8, 0x1BA9 }, { 0x1BE6, 0x1BE6 }, { 0x1BE8, 0x1BE9 }, 864 { 0x1BED, 0x1BED }, { 0x1BEF, 0x1BF1 }, { 0x1C2C, 0x1C33 }, 865 { 0x1C36, 0x1C37 }, { 0x1CD0, 0x1CD2 }, { 0x1CD4, 0x1CE0 }, 866 { 0x1CE2, 0x1CE8 }, { 0x1CED, 0x1CED }, { 0x1DC0, 0x1DE6 }, 867 { 0x1DFC, 0x1DFF }, { 0x200B, 0x200F }, { 0x202A, 0x202E }, 868 { 0x2060, 0x2064 }, { 0x206A, 0x206F }, { 0x20D0, 0x20F0 }, 869 { 0x2CEF, 0x2CF1 }, { 0x2D7F, 0x2D7F }, { 0x2DE0, 0x2DFF }, 870 { 0x302A, 0x302F }, { 0x3099, 0x309A }, { 0xA66F, 0xA672 }, 871 { 0xA67C, 0xA67D }, { 0xA6F0, 0xA6F1 }, { 0xA802, 0xA802 }, 872 { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 }, 873 { 0xA8C4, 0xA8C4 }, { 0xA8E0, 0xA8F1 }, { 0xA926, 0xA92D }, 874 { 0xA947, 0xA951 }, { 0xA980, 0xA982 }, { 0xA9B3, 0xA9B3 }, 875 { 0xA9B6, 0xA9B9 }, { 0xA9BC, 0xA9BC }, { 0xAA29, 0xAA2E }, 876 { 0xAA31, 0xAA32 }, { 0xAA35, 0xAA36 }, { 0xAA43, 0xAA43 }, 877 { 0xAA4C, 0xAA4C }, { 0xAAB0, 0xAAB0 }, { 0xAAB2, 0xAAB4 }, 878 { 0xAAB7, 0xAAB8 }, { 0xAABE, 0xAABF }, { 0xAAC1, 0xAAC1 }, 879 { 0xABE5, 0xABE5 }, { 0xABE8, 0xABE8 }, { 0xABED, 0xABED }, 880 { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE26 }, 881 { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB } 882 }; 883 size_t min = 0, mid, max = NELEM(comb) - 1; 884 885 /* test for 8-bit control characters */ 886 if (c < 32 || (c >= 0x7F && c < 0xA0)) 887 return (c ? -1 : 0); 888 889 /* binary search in table of non-spacing characters */ 890 if (c >= comb[0].first && c <= comb[max].last) 891 while (max >= min) { 892 mid = (min + max) / 2; 893 if (c > comb[mid].last) 894 min = mid + 1; 895 else if (c < comb[mid].first) 896 max = mid - 1; 897 else 898 return (0); 899 } 900 901 /* if we arrive here, c is not a combining or C0/C1 control char */ 902 903 return ((c >= 0x1100 && ( 904 c <= 0x115F || /* Hangul Jamo init. consonants */ 905 c == 0x2329 || c == 0x232A || 906 (c >= 0x2E80 && c <= 0xA4CF && c != 0x303F) || /* CJK ... Yi */ 907 (c >= 0xAC00 && c <= 0xD7A3) || /* Hangul Syllables */ 908 (c >= 0xF900 && c <= 0xFAFF) || /* CJK Compatibility Ideographs */ 909 (c >= 0xFE10 && c <= 0xFE19) || /* Vertical forms */ 910 (c >= 0xFE30 && c <= 0xFE6F) || /* CJK Compatibility Forms */ 911 (c >= 0xFF00 && c <= 0xFF60) || /* Fullwidth Forms */ 912 (c >= 0xFFE0 && c <= 0xFFE6))) ? 2 : 1); 913 } 914 /* --- end of wcwidth.c excerpt --- */ 915 #endif 916 917 /* 918 * Wrapper around access(2) because it says root can execute everything 919 * on some operating systems. Does not set errno, no user needs it. Use 920 * this iff mode can have the X_OK bit set, access otherwise. 921 */ 922 int 923 ksh_access(const char *fn, int mode) 924 { 925 int rv; 926 struct stat sb; 927 928 if ((rv = access(fn, mode)) == 0 && kshuid == 0 && (mode & X_OK) && 929 (rv = stat(fn, &sb)) == 0 && !S_ISDIR(sb.st_mode) && 930 (sb.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 931 rv = -1; 932 933 return (rv); 934 } 935