Home | History | Annotate | Download | only in pending
      1 /* expr.c - evaluate expression
      2  *
      3  * Copyright 2016 Google Inc.
      4  * Copyright 2013 Daniel Verkamp <daniel (at) drv.nu>
      5  *
      6  * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html
      7  *
      8  * The web standard is incomplete (precedence grouping missing), see:
      9  * http://permalink.gmane.org/gmane.comp.standards.posix.austin.general/10141
     10  *
     11  * eval_expr() uses the recursive "Precedence Climbing" algorithm:
     12  *
     13  * Clarke, Keith. "The top-down parsing of expressions." University of London.
     14  * Queen Mary College. Department of Computer Science and Statistics, 1986.
     15  *
     16  * http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf
     17  *
     18  * Nice explanation and Python implementation:
     19  * http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
     20 
     21 USE_EXPR(NEWTOY(expr, NULL, TOYFLAG_USR|TOYFLAG_BIN))
     22 
     23 config EXPR
     24   bool "expr"
     25   default n
     26   help
     27     usage: expr ARG1 OPERATOR ARG2...
     28 
     29     Evaluate expression and print result. For example, "expr 1 + 2".
     30 
     31     The supported operators are (grouped from highest to lowest priority):
     32 
     33       ( )    :    * / %    + -    != <= < >= > =    &    |
     34 
     35     Each constant and operator must be a separate command line argument.
     36     All operators are infix, meaning they expect a constant (or expression
     37     that resolves to a constant) on each side of the operator. Operators of
     38     the same priority (within each group above) are evaluated left to right.
     39     Parentheses may be used (as separate arguments) to elevate the priority
     40     of expressions.
     41 
     42     Calling expr from a command shell requires a lot of \( or '*' escaping
     43     to avoid interpreting shell control characters.
     44 
     45     The & and | operators are logical (not bitwise) and may operate on
     46     strings (a blank string is "false"). Comparison operators may also
     47     operate on strings (alphabetical sort).
     48 
     49     Constants may be strings or integers. Comparison, logical, and regex
     50     operators may operate on strings (a blank string is "false"), other
     51     operators require integers.
     52 */
     53 
     54 // TODO: int overflow checking
     55 
     56 #define FOR_expr
     57 #include "toys.h"
     58 
     59 GLOBALS(
     60   char **tok; // current token, not on the stack since recursive calls mutate it
     61 
     62   char *refree;
     63 )
     64 
     65 // Scalar value.  If s != NULL, it's a string, otherwise it's an int.
     66 struct value {
     67   char *s;
     68   long long i;
     69 };
     70 
     71 // Get the value as a string.
     72 char *get_str(struct value *v)
     73 {
     74   if (v->s) return v->s;
     75   else return xmprintf("%lld", v->i);
     76 }
     77 
     78 // Get the value as an integer and return 1, or return 0 on error.
     79 int get_int(struct value *v, long long *ret)
     80 {
     81   if (v->s) {
     82     char *endp;
     83 
     84     *ret = strtoll(v->s, &endp, 10);
     85 
     86     if (*endp) return 0; // If endp points to NUL, all chars were converted
     87   } else *ret = v->i;
     88 
     89   return 1;
     90 }
     91 
     92 // Preserve the invariant that v.s is NULL when the value is an integer.
     93 void assign_int(struct value *v, long long i)
     94 {
     95   v->i = i;
     96   v->s = NULL;
     97 }
     98 
     99 // Check if v is 0 or the empty string.
    100 static int is_false(struct value *v)
    101 {
    102   return get_int(v, &v->i) && !v->i;
    103 }
    104 
    105 // 'ret' is filled with a string capture or int match position.
    106 static void re(char *target, char *pattern, struct value *ret)
    107 {
    108   regex_t pat;
    109   regmatch_t m[2];
    110 
    111   xregcomp(&pat, pattern, 0);
    112   // must match at pos 0
    113   if (!regexec(&pat, target, 2, m, 0) && !m[0].rm_so) {
    114     // Return first parenthesized subexpression as string, or length of match
    115     if (pat.re_nsub>0) {
    116       ret->s = xmprintf("%.*s", (int)(m[1].rm_eo-m[1].rm_so),
    117           target+m[1].rm_so);
    118       if (TT.refree) free(TT.refree);
    119       TT.refree = ret->s;
    120     } else assign_int(ret, m[0].rm_eo);
    121   } else {
    122     if (pat.re_nsub>0) ret->s = "";
    123     else assign_int(ret, 0);
    124   }
    125   regfree(&pat);
    126 }
    127 
    128 // 4 different signatures of operators.  S = string, I = int, SI = string or
    129 // int.
    130 enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI };
    131 
    132 enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE };
    133 
    134 // operators grouped by precedence
    135 static struct op_def {
    136   char *tok;
    137   char prec, sig, op; // precedence, signature for type coercion, operator ID
    138 } OPS[] = {
    139   // logical ops, precedence 1 and 2, signature SI_TO_SI
    140   {"|", 1, SI_TO_SI, OR  },
    141   {"&", 2, SI_TO_SI, AND },
    142   // comparison ops, precedence 3, signature SI_TO_I
    143   {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ  }, {"!=", 3, SI_TO_I, NE },
    144   {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE },
    145   {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE },
    146   // arithmetic ops, precedence 4 and 5, signature I_TO_I
    147   {"+", 4, I_TO_I, ADD }, {"-",  4, I_TO_I, SUB },
    148   {"*", 5, I_TO_I, MUL }, {"/",  5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD },
    149   // regex match, precedence 6, signature S_TO_SI
    150   {":", 6, S_TO_SI, RE },
    151   {NULL, 0, 0, 0}, // sentinel
    152 };
    153 
    154 void eval_op(struct op_def *o, struct value *ret, struct value *rhs)
    155 {
    156   long long a, b, x = 0; // x = a OP b for ints.
    157   char *s, *t; // string operands
    158   int cmp;
    159 
    160   switch (o->sig) {
    161 
    162   case SI_TO_SI:
    163     switch (o->op) {
    164     case OR:  if (is_false(ret)) *ret = *rhs; break;
    165     case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break;
    166     }
    167     break;
    168 
    169   case SI_TO_I:
    170     if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints
    171       cmp = a - b;
    172     } else { // otherwise compare both as strings
    173       cmp = strcmp(s = get_str(ret), t = get_str(rhs));
    174       if (ret->s != s) free(s);
    175       if (rhs->s != t) free(t);
    176     }
    177     switch (o->op) {
    178     case EQ:  x = cmp == 0; break;
    179     case NE:  x = cmp != 0; break;
    180     case GT:  x = cmp >  0; break;
    181     case GTE: x = cmp >= 0; break;
    182     case LT:  x = cmp <  0; break;
    183     case LTE: x = cmp <= 0; break;
    184     }
    185     assign_int(ret, x);
    186     break;
    187 
    188   case I_TO_I:
    189     if (!get_int(ret, &a) || !get_int(rhs, &b))
    190       error_exit("non-integer argument");
    191     switch (o->op) {
    192     case ADD: x = a + b; break;
    193     case SUB: x = a - b; break;
    194     case MUL: x = a * b; break;
    195     case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break;
    196     case MOD:  if (b == 0) error_exit("division by zero"); x = a % b; break;
    197     }
    198     assign_int(ret, x);
    199     break;
    200 
    201   case S_TO_SI: // op == RE
    202     s = get_str(ret);
    203     cmp = ret->s!=s; // ret overwritten by re so check now
    204     re(s, t = get_str(rhs), ret);
    205     if (cmp) free(s);
    206     if (rhs->s!=t) free(t);
    207     break;
    208   }
    209 }
    210 
    211 // Evalute a compound expression using recursive "Precedence Climbing"
    212 // algorithm, setting 'ret'.
    213 static void eval_expr(struct value *ret, int min_prec)
    214 {
    215   if (!*TT.tok) error_exit("Unexpected end of input");
    216 
    217   // Evaluate LHS atom, setting 'ret'.
    218   if (!strcmp(*TT.tok, "(")) { // parenthesized expression
    219     TT.tok++;  // consume (
    220 
    221     eval_expr(ret, 1);        // We're inside ( ), so min_prec = 1
    222     if (ret->s && !strcmp(ret->s, ")")) error_exit("empty ( )");
    223     if (!*TT.tok) error_exit("Expected )");
    224     if (strcmp(*TT.tok, ")")) error_exit("Expected ) but got %s", *TT.tok);
    225   } else ret->s = *TT.tok;  // simple literal, all values start as strings
    226   TT.tok++;
    227 
    228   // Evaluate RHS and apply operator until precedence is too low.
    229   struct value rhs;
    230   while (*TT.tok) {
    231     struct op_def *o = OPS;
    232 
    233     while (o->tok) { // Look up operator
    234       if (!strcmp(*TT.tok, o->tok)) break;
    235       o++;
    236     }
    237     if (!o->tok) break; // Not an operator (extra input will fail later)
    238     if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
    239     TT.tok++;
    240 
    241     eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
    242     eval_op(o, ret, &rhs); // Apply operator, setting 'ret'
    243   }
    244 }
    245 
    246 void expr_main(void)
    247 {
    248   struct value ret = {0};
    249 
    250   toys.exitval = 2; // if exiting early, indicate error
    251   TT.tok = toys.optargs; // initialize global token
    252   eval_expr(&ret, 1);
    253   if (*TT.tok) error_exit("Unexpected extra input '%s'\n", *TT.tok);
    254 
    255   if (ret.s) printf("%s\n", ret.s);
    256   else printf("%lld\n", ret.i);
    257 
    258   toys.exitval = is_false(&ret);
    259 
    260   if (TT.refree) free(TT.refree);
    261 }
    262