1 /* $OpenBSD: expr.c,v 1.21 2009/06/01 19:00:57 deraadt Exp $ */ 2 3 /*- 4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 5 * Thorsten Glaser <tg (at) mirbsd.org> 6 * 7 * Provided that these terms and disclaimer and all copyright notices 8 * are retained or reproduced in an accompanying document, permission 9 * is granted to deal in this work without restriction, including un- 10 * limited rights to use, publicly perform, distribute, sell, modify, 11 * merge, give away, or sublicence. 12 * 13 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to 14 * the utmost extent permitted by applicable law, neither express nor 15 * implied; without malicious intent or gross negligence. In no event 16 * may a licensor, author or contributor be held liable for indirect, 17 * direct, other damage, loss, or other issues arising in any way out 18 * of dealing in the work, even if advised of the possibility of such 19 * damage or existence of a defect, except proven that it results out 20 * of said person's immediate fault when using the work as intended. 21 */ 22 23 #include "sh.h" 24 25 __RCSID("$MirOS: src/bin/mksh/expr.c,v 1.44 2010/08/14 21:35:13 tg Exp $"); 26 27 /* The order of these enums is constrained by the order of opinfo[] */ 28 enum token { 29 /* some (long) unary operators */ 30 O_PLUSPLUS = 0, O_MINUSMINUS, 31 /* binary operators */ 32 O_EQ, O_NE, 33 /* assignments are assumed to be in range O_ASN .. O_BORASN */ 34 O_ASN, O_TIMESASN, O_DIVASN, O_MODASN, O_PLUSASN, O_MINUSASN, 35 O_LSHIFTASN, O_RSHIFTASN, O_BANDASN, O_BXORASN, O_BORASN, 36 O_LSHIFT, O_RSHIFT, 37 O_LE, O_GE, O_LT, O_GT, 38 O_LAND, 39 O_LOR, 40 O_TIMES, O_DIV, O_MOD, 41 O_PLUS, O_MINUS, 42 O_BAND, 43 O_BXOR, 44 O_BOR, 45 O_TERN, 46 O_COMMA, 47 /* things after this aren't used as binary operators */ 48 /* unary that are not also binaries */ 49 O_BNOT, O_LNOT, 50 /* misc */ 51 OPEN_PAREN, CLOSE_PAREN, CTERN, 52 /* things that don't appear in the opinfo[] table */ 53 VAR, LIT, END, BAD 54 }; 55 #define IS_BINOP(op) (((int)op) >= (int)O_EQ && ((int)op) <= (int)O_COMMA) 56 #define IS_ASSIGNOP(op) ((int)(op) >= (int)O_ASN && (int)(op) <= (int)O_BORASN) 57 58 /* precisions; used to be enum prec but we do arithmetics on it */ 59 #define P_PRIMARY 0 /* VAR, LIT, (), ~ ! - + */ 60 #define P_MULT 1 /* * / % */ 61 #define P_ADD 2 /* + - */ 62 #define P_SHIFT 3 /* << >> */ 63 #define P_RELATION 4 /* < <= > >= */ 64 #define P_EQUALITY 5 /* == != */ 65 #define P_BAND 6 /* & */ 66 #define P_BXOR 7 /* ^ */ 67 #define P_BOR 8 /* | */ 68 #define P_LAND 9 /* && */ 69 #define P_LOR 10 /* || */ 70 #define P_TERN 11 /* ?: */ 71 #define P_ASSIGN 12 /* = *= /= %= += -= <<= >>= &= ^= |= */ 72 #define P_COMMA 13 /* , */ 73 #define MAX_PREC P_COMMA 74 75 struct opinfo { 76 char name[4]; 77 int len; /* name length */ 78 int prec; /* precedence: lower is higher */ 79 }; 80 81 /* Tokens in this table must be ordered so the longest are first 82 * (eg, += before +). If you change something, change the order 83 * of enum token too. 84 */ 85 static const struct opinfo opinfo[] = { 86 { "++", 2, P_PRIMARY }, /* before + */ 87 { "--", 2, P_PRIMARY }, /* before - */ 88 { "==", 2, P_EQUALITY }, /* before = */ 89 { "!=", 2, P_EQUALITY }, /* before ! */ 90 { "=", 1, P_ASSIGN }, /* keep assigns in a block */ 91 { "*=", 2, P_ASSIGN }, 92 { "/=", 2, P_ASSIGN }, 93 { "%=", 2, P_ASSIGN }, 94 { "+=", 2, P_ASSIGN }, 95 { "-=", 2, P_ASSIGN }, 96 { "<<=", 3, P_ASSIGN }, 97 { ">>=", 3, P_ASSIGN }, 98 { "&=", 2, P_ASSIGN }, 99 { "^=", 2, P_ASSIGN }, 100 { "|=", 2, P_ASSIGN }, 101 { "<<", 2, P_SHIFT }, 102 { ">>", 2, P_SHIFT }, 103 { "<=", 2, P_RELATION }, 104 { ">=", 2, P_RELATION }, 105 { "<", 1, P_RELATION }, 106 { ">", 1, P_RELATION }, 107 { "&&", 2, P_LAND }, 108 { "||", 2, P_LOR }, 109 { "*", 1, P_MULT }, 110 { "/", 1, P_MULT }, 111 { "%", 1, P_MULT }, 112 { "+", 1, P_ADD }, 113 { "-", 1, P_ADD }, 114 { "&", 1, P_BAND }, 115 { "^", 1, P_BXOR }, 116 { "|", 1, P_BOR }, 117 { "?", 1, P_TERN }, 118 { ",", 1, P_COMMA }, 119 { "~", 1, P_PRIMARY }, 120 { "!", 1, P_PRIMARY }, 121 { "(", 1, P_PRIMARY }, 122 { ")", 1, P_PRIMARY }, 123 { ":", 1, P_PRIMARY }, 124 { "", 0, P_PRIMARY } 125 }; 126 127 typedef struct expr_state Expr_state; 128 struct expr_state { 129 const char *expression; /* expression being evaluated */ 130 const char *tokp; /* lexical position */ 131 struct tbl *val; /* value from token() */ 132 struct tbl *evaling; /* variable that is being recursively 133 * expanded (EXPRINEVAL flag set) */ 134 int noassign; /* don't do assigns (for ?:,&&,||) */ 135 enum token tok; /* token from token() */ 136 bool arith; /* evaluating an $(()) expression? */ 137 bool natural; /* unsigned arithmetic calculation */ 138 }; 139 140 #define bivui(x, op, y) (es->natural ? \ 141 (mksh_ari_t)((x)->val.u op (y)->val.u) : \ 142 (mksh_ari_t)((x)->val.i op (y)->val.i) \ 143 ) 144 #define chvui(x, op) do { \ 145 if (es->natural) \ 146 (x)->val.u = op (x)->val.u; \ 147 else \ 148 (x)->val.i = op (x)->val.i; \ 149 } while (/* CONSTCOND */ 0) 150 #define stvui(x, n) do { \ 151 if (es->natural) \ 152 (x)->val.u = (n); \ 153 else \ 154 (x)->val.i = (n); \ 155 } while (/* CONSTCOND */ 0) 156 157 enum error_type { 158 ET_UNEXPECTED, ET_BADLIT, ET_RECURSIVE, 159 ET_LVALUE, ET_RDONLY, ET_STR 160 }; 161 162 static void evalerr(Expr_state *, enum error_type, const char *) 163 MKSH_A_NORETURN; 164 static struct tbl *evalexpr(Expr_state *, int); 165 static void exprtoken(Expr_state *); 166 static struct tbl *do_ppmm(Expr_state *, enum token, struct tbl *, bool); 167 static void assign_check(Expr_state *, enum token, struct tbl *); 168 static struct tbl *tempvar(void); 169 static struct tbl *intvar(Expr_state *, struct tbl *); 170 171 /* 172 * parse and evaluate expression 173 */ 174 int 175 evaluate(const char *expr, mksh_ari_t *rval, int error_ok, bool arith) 176 { 177 struct tbl v; 178 int ret; 179 180 v.flag = DEFINED|INTEGER; 181 v.type = 0; 182 ret = v_evaluate(&v, expr, error_ok, arith); 183 *rval = v.val.i; 184 return (ret); 185 } 186 187 /* 188 * parse and evaluate expression, storing result in vp. 189 */ 190 int 191 v_evaluate(struct tbl *vp, const char *expr, volatile int error_ok, 192 bool arith) 193 { 194 struct tbl *v; 195 Expr_state curstate; 196 Expr_state * const es = &curstate; 197 int i; 198 199 /* save state to allow recursive calls */ 200 curstate.expression = curstate.tokp = expr; 201 curstate.noassign = 0; 202 curstate.arith = arith; 203 curstate.evaling = NULL; 204 curstate.natural = false; 205 206 newenv(E_ERRH); 207 i = sigsetjmp(e->jbuf, 0); 208 if (i) { 209 /* Clear EXPRINEVAL in of any variables we were playing with */ 210 if (curstate.evaling) 211 curstate.evaling->flag &= ~EXPRINEVAL; 212 quitenv(NULL); 213 if (i == LAEXPR) { 214 if (error_ok == KSH_RETURN_ERROR) 215 return (0); 216 errorfz(); 217 } 218 unwind(i); 219 /* NOTREACHED */ 220 } 221 222 exprtoken(es); 223 if (es->tok == END) { 224 es->tok = LIT; 225 es->val = tempvar(); 226 } 227 v = intvar(es, evalexpr(es, MAX_PREC)); 228 229 if (es->tok != END) 230 evalerr(es, ET_UNEXPECTED, NULL); 231 232 if (es->arith && es->natural) 233 vp->flag |= INT_U; 234 if (vp->flag & INTEGER) 235 setint_v(vp, v, es->arith); 236 else 237 /* can fail if readonly */ 238 setstr(vp, str_val(v), error_ok); 239 240 quitenv(NULL); 241 242 return (1); 243 } 244 245 static void 246 evalerr(Expr_state *es, enum error_type type, const char *str) 247 { 248 char tbuf[2]; 249 const char *s; 250 251 es->arith = false; 252 switch (type) { 253 case ET_UNEXPECTED: 254 switch (es->tok) { 255 case VAR: 256 s = es->val->name; 257 break; 258 case LIT: 259 s = str_val(es->val); 260 break; 261 case END: 262 s = "end of expression"; 263 break; 264 case BAD: 265 tbuf[0] = *es->tokp; 266 tbuf[1] = '\0'; 267 s = tbuf; 268 break; 269 default: 270 s = opinfo[(int)es->tok].name; 271 } 272 warningf(true, "%s: unexpected '%s'", es->expression, s); 273 break; 274 275 case ET_BADLIT: 276 warningf(true, "%s: bad number '%s'", es->expression, str); 277 break; 278 279 case ET_RECURSIVE: 280 warningf(true, "%s: expression recurses on parameter '%s'", 281 es->expression, str); 282 break; 283 284 case ET_LVALUE: 285 warningf(true, "%s: %s requires lvalue", 286 es->expression, str); 287 break; 288 289 case ET_RDONLY: 290 warningf(true, "%s: %s applied to read only variable", 291 es->expression, str); 292 break; 293 294 default: /* keep gcc happy */ 295 case ET_STR: 296 warningf(true, "%s: %s", es->expression, str); 297 break; 298 } 299 unwind(LAEXPR); 300 } 301 302 static struct tbl * 303 evalexpr(Expr_state *es, int prec) 304 { 305 struct tbl *vl, *vr = NULL, *vasn; 306 enum token op; 307 mksh_ari_t res = 0; 308 309 if (prec == P_PRIMARY) { 310 op = es->tok; 311 if (op == O_BNOT || op == O_LNOT || op == O_MINUS || 312 op == O_PLUS) { 313 exprtoken(es); 314 vl = intvar(es, evalexpr(es, P_PRIMARY)); 315 if (op == O_BNOT) 316 chvui(vl, ~); 317 else if (op == O_LNOT) 318 chvui(vl, !); 319 else if (op == O_MINUS) 320 chvui(vl, -); 321 /* op == O_PLUS is a no-op */ 322 } else if (op == OPEN_PAREN) { 323 exprtoken(es); 324 vl = evalexpr(es, MAX_PREC); 325 if (es->tok != CLOSE_PAREN) 326 evalerr(es, ET_STR, "missing )"); 327 exprtoken(es); 328 } else if (op == O_PLUSPLUS || op == O_MINUSMINUS) { 329 exprtoken(es); 330 vl = do_ppmm(es, op, es->val, true); 331 exprtoken(es); 332 } else if (op == VAR || op == LIT) { 333 vl = es->val; 334 exprtoken(es); 335 } else { 336 evalerr(es, ET_UNEXPECTED, NULL); 337 /* NOTREACHED */ 338 } 339 if (es->tok == O_PLUSPLUS || es->tok == O_MINUSMINUS) { 340 vl = do_ppmm(es, es->tok, vl, false); 341 exprtoken(es); 342 } 343 return (vl); 344 } 345 vl = evalexpr(es, prec - 1); 346 for (op = es->tok; IS_BINOP(op) && opinfo[(int)op].prec == prec; 347 op = es->tok) { 348 exprtoken(es); 349 vasn = vl; 350 if (op != O_ASN) /* vl may not have a value yet */ 351 vl = intvar(es, vl); 352 if (IS_ASSIGNOP(op)) { 353 assign_check(es, op, vasn); 354 vr = intvar(es, evalexpr(es, P_ASSIGN)); 355 } else if (op != O_TERN && op != O_LAND && op != O_LOR) 356 vr = intvar(es, evalexpr(es, prec - 1)); 357 if ((op == O_DIV || op == O_MOD || op == O_DIVASN || 358 op == O_MODASN) && vr->val.i == 0) { 359 if (es->noassign) 360 vr->val.i = 1; 361 else 362 evalerr(es, ET_STR, "zero divisor"); 363 } 364 switch ((int)op) { 365 case O_TIMES: 366 case O_TIMESASN: 367 res = bivui(vl, *, vr); 368 break; 369 case O_DIV: 370 case O_DIVASN: 371 res = bivui(vl, /, vr); 372 break; 373 case O_MOD: 374 case O_MODASN: 375 res = bivui(vl, %, vr); 376 break; 377 case O_PLUS: 378 case O_PLUSASN: 379 res = bivui(vl, +, vr); 380 break; 381 case O_MINUS: 382 case O_MINUSASN: 383 res = bivui(vl, -, vr); 384 break; 385 case O_LSHIFT: 386 case O_LSHIFTASN: 387 res = bivui(vl, <<, vr); 388 break; 389 case O_RSHIFT: 390 case O_RSHIFTASN: 391 res = bivui(vl, >>, vr); 392 break; 393 case O_LT: 394 res = bivui(vl, <, vr); 395 break; 396 case O_LE: 397 res = bivui(vl, <=, vr); 398 break; 399 case O_GT: 400 res = bivui(vl, >, vr); 401 break; 402 case O_GE: 403 res = bivui(vl, >=, vr); 404 break; 405 case O_EQ: 406 res = bivui(vl, ==, vr); 407 break; 408 case O_NE: 409 res = bivui(vl, !=, vr); 410 break; 411 case O_BAND: 412 case O_BANDASN: 413 res = bivui(vl, &, vr); 414 break; 415 case O_BXOR: 416 case O_BXORASN: 417 res = bivui(vl, ^, vr); 418 break; 419 case O_BOR: 420 case O_BORASN: 421 res = bivui(vl, |, vr); 422 break; 423 case O_LAND: 424 if (!vl->val.i) 425 es->noassign++; 426 vr = intvar(es, evalexpr(es, prec - 1)); 427 res = bivui(vl, &&, vr); 428 if (!vl->val.i) 429 es->noassign--; 430 break; 431 case O_LOR: 432 if (vl->val.i) 433 es->noassign++; 434 vr = intvar(es, evalexpr(es, prec - 1)); 435 res = bivui(vl, ||, vr); 436 if (vl->val.i) 437 es->noassign--; 438 break; 439 case O_TERN: 440 { 441 bool ev = vl->val.i != 0; 442 443 if (!ev) 444 es->noassign++; 445 vl = evalexpr(es, MAX_PREC); 446 if (!ev) 447 es->noassign--; 448 if (es->tok != CTERN) 449 evalerr(es, ET_STR, "missing :"); 450 exprtoken(es); 451 if (ev) 452 es->noassign++; 453 vr = evalexpr(es, P_TERN); 454 if (ev) 455 es->noassign--; 456 vl = ev ? vl : vr; 457 } 458 break; 459 case O_ASN: 460 res = vr->val.i; 461 break; 462 case O_COMMA: 463 res = vr->val.i; 464 break; 465 } 466 if (IS_ASSIGNOP(op)) { 467 stvui(vr, res); 468 if (!es->noassign) { 469 if (vasn->flag & INTEGER) 470 setint_v(vasn, vr, es->arith); 471 else 472 setint(vasn, res); 473 } 474 vl = vr; 475 } else if (op != O_TERN) 476 stvui(vl, res); 477 } 478 return (vl); 479 } 480 481 static void 482 exprtoken(Expr_state *es) 483 { 484 const char *cp = es->tokp; 485 int c; 486 char *tvar; 487 488 /* skip white space */ 489 skip_spaces: 490 while ((c = *cp), ksh_isspace(c)) 491 ++cp; 492 if (es->tokp == es->expression && c == '#') { 493 /* expression begins with # */ 494 es->natural = true; /* switch to unsigned */ 495 ++cp; 496 goto skip_spaces; 497 } 498 es->tokp = cp; 499 500 if (c == '\0') 501 es->tok = END; 502 else if (ksh_isalphx(c)) { 503 for (; ksh_isalnux(c); c = *cp) 504 cp++; 505 if (c == '[') { 506 int len; 507 508 len = array_ref_len(cp); 509 if (len == 0) 510 evalerr(es, ET_STR, "missing ]"); 511 cp += len; 512 } else if (c == '(' /*)*/ ) { 513 /* todo: add math functions (all take single argument): 514 * abs acos asin atan cos cosh exp int log sin sinh sqrt 515 * tan tanh 516 */ 517 ; 518 } 519 if (es->noassign) { 520 es->val = tempvar(); 521 es->val->flag |= EXPRLVALUE; 522 } else { 523 strndupx(tvar, es->tokp, cp - es->tokp, ATEMP); 524 es->val = global(tvar); 525 afree(tvar, ATEMP); 526 } 527 es->tok = VAR; 528 } else if (c == '1' && cp[1] == '#') { 529 cp += 2; 530 cp += utf_ptradj(cp); 531 strndupx(tvar, es->tokp, cp - es->tokp, ATEMP); 532 goto process_tvar; 533 #ifndef MKSH_SMALL 534 } else if (c == '\'') { 535 ++cp; 536 cp += utf_ptradj(cp); 537 if (*cp++ != '\'') 538 evalerr(es, ET_STR, 539 "multi-character character constant"); 540 /* 'x' -> 1#x (x = one multibyte character) */ 541 c = cp - es->tokp; 542 tvar = alloc(c + /* NUL */ 1, ATEMP); 543 tvar[0] = '1'; 544 tvar[1] = '#'; 545 memcpy(tvar + 2, es->tokp + 1, c - 2); 546 tvar[c] = '\0'; 547 goto process_tvar; 548 #endif 549 } else if (ksh_isdigit(c)) { 550 while (c != '_' && (ksh_isalnux(c) || c == '#')) 551 c = *cp++; 552 strndupx(tvar, es->tokp, --cp - es->tokp, ATEMP); 553 process_tvar: 554 es->val = tempvar(); 555 es->val->flag &= ~INTEGER; 556 es->val->type = 0; 557 es->val->val.s = tvar; 558 if (setint_v(es->val, es->val, es->arith) == NULL) 559 evalerr(es, ET_BADLIT, tvar); 560 afree(tvar, ATEMP); 561 es->tok = LIT; 562 } else { 563 int i, n0; 564 565 for (i = 0; (n0 = opinfo[i].name[0]); i++) 566 if (c == n0 && strncmp(cp, opinfo[i].name, 567 (size_t)opinfo[i].len) == 0) { 568 es->tok = (enum token)i; 569 cp += opinfo[i].len; 570 break; 571 } 572 if (!n0) 573 es->tok = BAD; 574 } 575 es->tokp = cp; 576 } 577 578 /* Do a ++ or -- operation */ 579 static struct tbl * 580 do_ppmm(Expr_state *es, enum token op, struct tbl *vasn, bool is_prefix) 581 { 582 struct tbl *vl; 583 mksh_ari_t oval; 584 585 assign_check(es, op, vasn); 586 587 vl = intvar(es, vasn); 588 oval = vl->val.i; 589 if (op == O_PLUSPLUS) { 590 if (es->natural) 591 ++vl->val.u; 592 else 593 ++vl->val.i; 594 } else { 595 if (es->natural) 596 --vl->val.u; 597 else 598 --vl->val.i; 599 } 600 if (vasn->flag & INTEGER) 601 setint_v(vasn, vl, es->arith); 602 else 603 setint(vasn, vl->val.i); 604 if (!is_prefix) /* undo the inc/dec */ 605 vl->val.i = oval; 606 607 return (vl); 608 } 609 610 static void 611 assign_check(Expr_state *es, enum token op, struct tbl *vasn) 612 { 613 if (es->tok == END || 614 (vasn->name[0] == '\0' && !(vasn->flag & EXPRLVALUE))) 615 evalerr(es, ET_LVALUE, opinfo[(int)op].name); 616 else if (vasn->flag & RDONLY) 617 evalerr(es, ET_RDONLY, opinfo[(int)op].name); 618 } 619 620 static struct tbl * 621 tempvar(void) 622 { 623 struct tbl *vp; 624 625 vp = alloc(sizeof(struct tbl), ATEMP); 626 vp->flag = ISSET|INTEGER; 627 vp->type = 0; 628 vp->areap = ATEMP; 629 vp->ua.hval = 0; 630 vp->val.i = 0; 631 vp->name[0] = '\0'; 632 return (vp); 633 } 634 635 /* cast (string) variable to temporary integer variable */ 636 static struct tbl * 637 intvar(Expr_state *es, struct tbl *vp) 638 { 639 struct tbl *vq; 640 641 /* try to avoid replacing a temp var with another temp var */ 642 if (vp->name[0] == '\0' && 643 (vp->flag & (ISSET|INTEGER|EXPRLVALUE)) == (ISSET|INTEGER)) 644 return (vp); 645 646 vq = tempvar(); 647 if (setint_v(vq, vp, es->arith) == NULL) { 648 if (vp->flag & EXPRINEVAL) 649 evalerr(es, ET_RECURSIVE, vp->name); 650 es->evaling = vp; 651 vp->flag |= EXPRINEVAL; 652 v_evaluate(vq, str_val(vp), KSH_UNWIND_ERROR, es->arith); 653 vp->flag &= ~EXPRINEVAL; 654 es->evaling = NULL; 655 } 656 return (vq); 657 } 658 659 660 /* 661 * UTF-8 support code: high-level functions 662 */ 663 664 int 665 utf_widthadj(const char *src, const char **dst) 666 { 667 size_t len; 668 unsigned int wc; 669 int width; 670 671 if (!UTFMODE || (len = utf_mbtowc(&wc, src)) == (size_t)-1 || 672 wc == 0) 673 len = width = 1; 674 else if ((width = utf_wcwidth(wc)) < 0) 675 /* XXX use 2 for x_zotc3 here? */ 676 width = 1; 677 678 if (dst) 679 *dst = src + len; 680 return (width); 681 } 682 683 int 684 utf_mbswidth(const char *s) 685 { 686 size_t len; 687 unsigned int wc; 688 int width = 0, cw; 689 690 if (!UTFMODE) 691 return (strlen(s)); 692 693 while (*s) 694 if (((len = utf_mbtowc(&wc, s)) == (size_t)-1) || 695 ((cw = utf_wcwidth(wc)) == -1)) { 696 s++; 697 width += 1; 698 } else { 699 s += len; 700 width += cw; 701 } 702 return (width); 703 } 704 705 const char * 706 utf_skipcols(const char *p, int cols) 707 { 708 int c = 0; 709 710 while (c < cols) { 711 if (!*p) 712 return (p + cols - c); 713 c += utf_widthadj(p, &p); 714 } 715 return (p); 716 } 717 718 size_t 719 utf_ptradj(const char *src) 720 { 721 register size_t n; 722 723 if (!UTFMODE || 724 *(const unsigned char *)(src) < 0xC2 || 725 (n = utf_mbtowc(NULL, src)) == (size_t)-1) 726 n = 1; 727 return (n); 728 } 729 730 /* 731 * UTF-8 support code: low-level functions 732 */ 733 734 /* CESU-8 multibyte and wide character conversion crafted for mksh */ 735 736 size_t 737 utf_mbtowc(unsigned int *dst, const char *src) 738 { 739 const unsigned char *s = (const unsigned char *)src; 740 unsigned int c, wc; 741 742 if ((wc = *s++) < 0x80) { 743 out: 744 if (dst != NULL) 745 *dst = wc; 746 return (wc ? ((const char *)s - src) : 0); 747 } 748 if (wc < 0xC2 || wc >= 0xF0) 749 /* < 0xC0: spurious second byte */ 750 /* < 0xC2: non-minimalistic mapping error in 2-byte seqs */ 751 /* > 0xEF: beyond BMP */ 752 goto ilseq; 753 754 if (wc < 0xE0) { 755 wc = (wc & 0x1F) << 6; 756 if (((c = *s++) & 0xC0) != 0x80) 757 goto ilseq; 758 wc |= c & 0x3F; 759 goto out; 760 } 761 762 wc = (wc & 0x0F) << 12; 763 764 if (((c = *s++) & 0xC0) != 0x80) 765 goto ilseq; 766 wc |= (c & 0x3F) << 6; 767 768 if (((c = *s++) & 0xC0) != 0x80) 769 goto ilseq; 770 wc |= c & 0x3F; 771 772 /* Check for non-minimalistic mapping error in 3-byte seqs */ 773 if (wc >= 0x0800 && wc <= 0xFFFD) 774 goto out; 775 ilseq: 776 return ((size_t)(-1)); 777 } 778 779 size_t 780 utf_wctomb(char *dst, unsigned int wc) 781 { 782 unsigned char *d; 783 784 if (wc < 0x80) { 785 *dst = wc; 786 return (1); 787 } 788 789 d = (unsigned char *)dst; 790 if (wc < 0x0800) 791 *d++ = (wc >> 6) | 0xC0; 792 else { 793 *d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0; 794 *d++ = ((wc >> 6) & 0x3F) | 0x80; 795 } 796 *d++ = (wc & 0x3F) | 0x80; 797 return ((char *)d - dst); 798 } 799 800 801 #ifndef MKSH_mirbsd_wcwidth 802 /* --- begin of wcwidth.c excerpt --- */ 803 /*- 804 * Markus Kuhn -- 2007-05-26 (Unicode 5.0) 805 * 806 * Permission to use, copy, modify, and distribute this software 807 * for any purpose and without fee is hereby granted. The author 808 * disclaims all warranties with regard to this software. 809 */ 810 811 __RCSID("$miros: src/lib/libc/i18n/wcwidth.c,v 1.8 2008/09/20 12:01:18 tg Exp $"); 812 813 int 814 utf_wcwidth(unsigned int c) 815 { 816 static const struct cbset { 817 unsigned short first; 818 unsigned short last; 819 } comb[] = { 820 { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, 821 { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, 822 { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 }, 823 { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 }, 824 { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, 825 { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, 826 { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 }, 827 { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D }, 828 { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, 829 { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, 830 { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, 831 { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, 832 { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, 833 { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, 834 { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, 835 { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, 836 { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, 837 { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, 838 { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC }, 839 { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, 840 { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, 841 { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, 842 { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, 843 { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, 844 { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, 845 { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, 846 { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, 847 { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, 848 { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 }, 849 { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F }, 850 { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, 851 { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, 852 { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, 853 { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, 854 { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B }, 855 { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 }, 856 { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 }, 857 { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF }, 858 { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 }, 859 { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F }, 860 { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, 861 { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, 862 { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB } 863 }; 864 size_t min = 0, mid, max = NELEM(comb) - 1; 865 866 /* test for 8-bit control characters */ 867 if (c < 32 || (c >= 0x7f && c < 0xa0)) 868 return (c ? -1 : 0); 869 870 /* binary search in table of non-spacing characters */ 871 if (c >= comb[0].first && c <= comb[max].last) 872 while (max >= min) { 873 mid = (min + max) / 2; 874 if (c > comb[mid].last) 875 min = mid + 1; 876 else if (c < comb[mid].first) 877 max = mid - 1; 878 else 879 return (0); 880 } 881 882 /* if we arrive here, c is not a combining or C0/C1 control char */ 883 return ((c >= 0x1100 && ( 884 c <= 0x115f || /* Hangul Jamo init. consonants */ 885 c == 0x2329 || c == 0x232a || 886 (c >= 0x2e80 && c <= 0xa4cf && c != 0x303f) || /* CJK ... Yi */ 887 (c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */ 888 (c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */ 889 (c >= 0xfe10 && c <= 0xfe19) || /* Vertical forms */ 890 (c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */ 891 (c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */ 892 (c >= 0xffe0 && c <= 0xffe6))) ? 2 : 1); 893 } 894 /* --- end of wcwidth.c excerpt --- */ 895 #endif 896