Home | History | Annotate | Download | only in kati
      1 // Copyright 2015 Google Inc. All rights reserved
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // +build ignore
     16 
     17 #include "strutil.h"
     18 
     19 #include <ctype.h>
     20 #include <limits.h>
     21 #include <unistd.h>
     22 
     23 #include <algorithm>
     24 #include <functional>
     25 #include <stack>
     26 #include <utility>
     27 
     28 #ifdef __SSE4_2__
     29 #include <smmintrin.h>
     30 #endif
     31 
     32 #include "log.h"
     33 
     34 static bool isSpace(char c) {
     35   return (9 <= c && c <= 13) || c == 32;
     36 }
     37 
     38 #ifdef __SSE4_2__
     39 static int SkipUntilSSE42(const char* s,
     40                           int len,
     41                           const char* ranges,
     42                           int ranges_size) {
     43   __m128i ranges16 = _mm_loadu_si128((const __m128i*)ranges);
     44   len &= ~15;
     45   int i = 0;
     46   while (i < len) {
     47     __m128i b16 = _mm_loadu_si128((const __m128i*)(s + i));
     48     int r = _mm_cmpestri(
     49         ranges16, ranges_size, b16, len - i,
     50         _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
     51     if (r != 16) {
     52       return i + r;
     53     }
     54     i += 16;
     55   }
     56   return len;
     57 }
     58 #endif
     59 
     60 template <typename Cond>
     61 static int SkipUntil(const char* s,
     62                      int len,
     63                      const char* ranges UNUSED,
     64                      int ranges_size UNUSED,
     65                      Cond cond) {
     66   int i = 0;
     67 #ifdef __SSE4_2__
     68   i += SkipUntilSSE42(s, len, ranges, ranges_size);
     69 #endif
     70   for (; i < len; i++) {
     71     if (cond(s[i]))
     72       break;
     73   }
     74   return i;
     75 }
     76 
     77 WordScanner::Iterator& WordScanner::Iterator::operator++() {
     78   int len = static_cast<int>(in->size());
     79   for (s = i + 1; s < len; s++) {
     80     if (!isSpace((*in)[s]))
     81       break;
     82   }
     83   if (s >= len) {
     84     in = NULL;
     85     s = 0;
     86     i = 0;
     87     return *this;
     88   }
     89 
     90   static const char ranges[] = "\x09\x0d  ";
     91   // It's intentional we are not using isSpace here. It seems with
     92   // lambda the compiler generates better code.
     93   i = s + SkipUntil(in->data() + s, len - s, ranges, 4,
     94                     [](char c) { return (9 <= c && c <= 13) || c == 32; });
     95   return *this;
     96 }
     97 
     98 StringPiece WordScanner::Iterator::operator*() const {
     99   return in->substr(s, i - s);
    100 }
    101 
    102 WordScanner::WordScanner(StringPiece in) : in_(in) {}
    103 
    104 WordScanner::Iterator WordScanner::begin() const {
    105   Iterator iter;
    106   iter.in = &in_;
    107   iter.s = 0;
    108   iter.i = -1;
    109   ++iter;
    110   return iter;
    111 }
    112 
    113 WordScanner::Iterator WordScanner::end() const {
    114   Iterator iter;
    115   iter.in = NULL;
    116   iter.s = 0;
    117   iter.i = 0;
    118   return iter;
    119 }
    120 
    121 void WordScanner::Split(vector<StringPiece>* o) {
    122   for (StringPiece t : *this)
    123     o->push_back(t);
    124 }
    125 
    126 WordWriter::WordWriter(string* o) : out_(o), needs_space_(false) {}
    127 
    128 void WordWriter::MaybeAddWhitespace() {
    129   if (needs_space_) {
    130     out_->push_back(' ');
    131   } else {
    132     needs_space_ = true;
    133   }
    134 }
    135 
    136 void WordWriter::Write(StringPiece s) {
    137   MaybeAddWhitespace();
    138   AppendString(s, out_);
    139 }
    140 
    141 ScopedTerminator::ScopedTerminator(StringPiece s) : s_(s), c_(s[s.size()]) {
    142   const_cast<char*>(s_.data())[s_.size()] = '\0';
    143 }
    144 
    145 ScopedTerminator::~ScopedTerminator() {
    146   const_cast<char*>(s_.data())[s_.size()] = c_;
    147 }
    148 
    149 void AppendString(StringPiece str, string* out) {
    150   out->append(str.begin(), str.end());
    151 }
    152 
    153 bool HasPrefix(StringPiece str, StringPiece prefix) {
    154   ssize_t size_diff = str.size() - prefix.size();
    155   return size_diff >= 0 && str.substr(0, prefix.size()) == prefix;
    156 }
    157 
    158 bool HasSuffix(StringPiece str, StringPiece suffix) {
    159   ssize_t size_diff = str.size() - suffix.size();
    160   return size_diff >= 0 && str.substr(size_diff) == suffix;
    161 }
    162 
    163 bool HasWord(StringPiece str, StringPiece w) {
    164   size_t found = str.find(w);
    165   if (found == string::npos)
    166     return false;
    167   if (found != 0 && !isSpace(str[found - 1]))
    168     return false;
    169   size_t end = found + w.size();
    170   if (end != str.size() && !isSpace(str[end]))
    171     return false;
    172   return true;
    173 }
    174 
    175 StringPiece TrimPrefix(StringPiece str, StringPiece prefix) {
    176   ssize_t size_diff = str.size() - prefix.size();
    177   if (size_diff < 0 || str.substr(0, prefix.size()) != prefix)
    178     return str;
    179   return str.substr(prefix.size());
    180 }
    181 
    182 StringPiece TrimSuffix(StringPiece str, StringPiece suffix) {
    183   ssize_t size_diff = str.size() - suffix.size();
    184   if (size_diff < 0 || str.substr(size_diff) != suffix)
    185     return str;
    186   return str.substr(0, size_diff);
    187 }
    188 
    189 Pattern::Pattern(StringPiece pat) : pat_(pat), percent_index_(pat.find('%')) {}
    190 
    191 bool Pattern::Match(StringPiece str) const {
    192   if (percent_index_ == string::npos)
    193     return str == pat_;
    194   return MatchImpl(str);
    195 }
    196 
    197 bool Pattern::MatchImpl(StringPiece str) const {
    198   return (HasPrefix(str, pat_.substr(0, percent_index_)) &&
    199           HasSuffix(str, pat_.substr(percent_index_ + 1)));
    200 }
    201 
    202 StringPiece Pattern::Stem(StringPiece str) const {
    203   if (!Match(str))
    204     return "";
    205   return str.substr(percent_index_,
    206                     str.size() - (pat_.size() - percent_index_ - 1));
    207 }
    208 
    209 void Pattern::AppendSubst(StringPiece str,
    210                           StringPiece subst,
    211                           string* out) const {
    212   if (percent_index_ == string::npos) {
    213     if (str == pat_) {
    214       AppendString(subst, out);
    215       return;
    216     } else {
    217       AppendString(str, out);
    218       return;
    219     }
    220   }
    221 
    222   if (MatchImpl(str)) {
    223     size_t subst_percent_index = subst.find('%');
    224     if (subst_percent_index == string::npos) {
    225       AppendString(subst, out);
    226       return;
    227     } else {
    228       AppendString(subst.substr(0, subst_percent_index), out);
    229       AppendString(str.substr(percent_index_, str.size() - pat_.size() + 1),
    230                    out);
    231       AppendString(subst.substr(subst_percent_index + 1), out);
    232       return;
    233     }
    234   }
    235   AppendString(str, out);
    236 }
    237 
    238 void Pattern::AppendSubstRef(StringPiece str,
    239                              StringPiece subst,
    240                              string* out) const {
    241   if (percent_index_ != string::npos && subst.find('%') != string::npos) {
    242     AppendSubst(str, subst, out);
    243     return;
    244   }
    245   StringPiece s = TrimSuffix(str, pat_);
    246   out->append(s.begin(), s.end());
    247   out->append(subst.begin(), subst.end());
    248 }
    249 
    250 string NoLineBreak(const string& s) {
    251   size_t index = s.find('\n');
    252   if (index == string::npos)
    253     return s;
    254   string r = s;
    255   while (index != string::npos) {
    256     r = r.substr(0, index) + "\\n" + r.substr(index + 1);
    257     index = r.find('\n', index + 2);
    258   }
    259   return r;
    260 }
    261 
    262 StringPiece TrimLeftSpace(StringPiece s) {
    263   size_t i = 0;
    264   for (; i < s.size(); i++) {
    265     if (isSpace(s[i]))
    266       continue;
    267     char n = s.get(i + 1);
    268     if (s[i] == '\\' && (n == '\r' || n == '\n')) {
    269       i++;
    270       continue;
    271     }
    272     break;
    273   }
    274   return s.substr(i, s.size() - i);
    275 }
    276 
    277 StringPiece TrimRightSpace(StringPiece s) {
    278   size_t i = 0;
    279   for (; i < s.size(); i++) {
    280     char c = s[s.size() - 1 - i];
    281     if (isSpace(c)) {
    282       if ((c == '\r' || c == '\n') && s.get(s.size() - 2 - i) == '\\')
    283         i++;
    284       continue;
    285     }
    286     break;
    287   }
    288   return s.substr(0, s.size() - i);
    289 }
    290 
    291 StringPiece TrimSpace(StringPiece s) {
    292   return TrimRightSpace(TrimLeftSpace(s));
    293 }
    294 
    295 StringPiece Dirname(StringPiece s) {
    296   size_t found = s.rfind('/');
    297   if (found == string::npos)
    298     return StringPiece(".");
    299   if (found == 0)
    300     return StringPiece("");
    301   return s.substr(0, found);
    302 }
    303 
    304 StringPiece Basename(StringPiece s) {
    305   size_t found = s.rfind('/');
    306   if (found == string::npos || found == 0)
    307     return s;
    308   return s.substr(found + 1);
    309 }
    310 
    311 StringPiece GetExt(StringPiece s) {
    312   size_t found = s.rfind('.');
    313   if (found == string::npos)
    314     return StringPiece("");
    315   return s.substr(found);
    316 }
    317 
    318 StringPiece StripExt(StringPiece s) {
    319   size_t slash_index = s.rfind('/');
    320   size_t found = s.rfind('.');
    321   if (found == string::npos ||
    322       (slash_index != string::npos && found < slash_index))
    323     return s;
    324   return s.substr(0, found);
    325 }
    326 
    327 void NormalizePath(string* o) {
    328   if (o->empty())
    329     return;
    330   size_t start_index = 0;
    331   if ((*o)[0] == '/')
    332     start_index++;
    333   size_t j = start_index;
    334   size_t prev_start = start_index;
    335   for (size_t i = start_index; i <= o->size(); i++) {
    336     char c = (*o)[i];
    337     if (c != '/' && c != 0) {
    338       (*o)[j] = c;
    339       j++;
    340       continue;
    341     }
    342 
    343     StringPiece prev_dir = StringPiece(o->data() + prev_start, j - prev_start);
    344     if (prev_dir == ".") {
    345       j--;
    346     } else if (prev_dir == ".." && j != 2 /* .. */) {
    347       if (j == 3) {
    348         // /..
    349         j = start_index;
    350       } else {
    351         size_t orig_j = j;
    352         j -= 4;
    353         j = o->rfind('/', j);
    354         if (j == string::npos) {
    355           j = start_index;
    356         } else {
    357           j++;
    358         }
    359         if (StringPiece(o->data() + j, 3) == "../") {
    360           j = orig_j;
    361           (*o)[j] = c;
    362           j++;
    363         }
    364       }
    365     } else if (!prev_dir.empty()) {
    366       if (c) {
    367         (*o)[j] = c;
    368         j++;
    369       }
    370     }
    371     prev_start = j;
    372   }
    373   if (j > 1 && (*o)[j - 1] == '/')
    374     j--;
    375   o->resize(j);
    376 }
    377 
    378 void AbsPath(StringPiece s, string* o) {
    379   if (s.get(0) == '/') {
    380     o->clear();
    381   } else {
    382     char buf[PATH_MAX];
    383     if (!getcwd(buf, PATH_MAX)) {
    384       fprintf(stderr, "getcwd failed\n");
    385       CHECK(false);
    386     }
    387 
    388     CHECK(buf[0] == '/');
    389     *o = buf;
    390     *o += '/';
    391   }
    392   AppendString(s, o);
    393   NormalizePath(o);
    394 }
    395 
    396 template <typename Cond>
    397 size_t FindOutsideParenImpl(StringPiece s, Cond cond) {
    398   bool prev_backslash = false;
    399   stack<char> paren_stack;
    400   for (size_t i = 0; i < s.size(); i++) {
    401     char c = s[i];
    402     if (cond(c) && paren_stack.empty() && !prev_backslash) {
    403       return i;
    404     }
    405     switch (c) {
    406       case '(':
    407         paren_stack.push(')');
    408         break;
    409       case '{':
    410         paren_stack.push('}');
    411         break;
    412 
    413       case ')':
    414       case '}':
    415         if (!paren_stack.empty() && c == paren_stack.top()) {
    416           paren_stack.pop();
    417         }
    418         break;
    419     }
    420     prev_backslash = c == '\\' && !prev_backslash;
    421   }
    422   return string::npos;
    423 }
    424 
    425 size_t FindOutsideParen(StringPiece s, char c) {
    426   return FindOutsideParenImpl(s, [&c](char d) { return c == d; });
    427 }
    428 
    429 size_t FindTwoOutsideParen(StringPiece s, char c1, char c2) {
    430   return FindOutsideParenImpl(
    431       s, [&c1, &c2](char d) { return d == c1 || d == c2; });
    432 }
    433 
    434 size_t FindThreeOutsideParen(StringPiece s, char c1, char c2, char c3) {
    435   return FindOutsideParenImpl(
    436       s, [&c1, &c2, &c3](char d) { return d == c1 || d == c2 || d == c3; });
    437 }
    438 
    439 size_t FindEndOfLine(StringPiece s, size_t e, size_t* lf_cnt) {
    440   static const char ranges[] = "\0\0\n\n\\\\";
    441   while (e < s.size()) {
    442     e += SkipUntil(s.data() + e, s.size() - e, ranges, 6,
    443                    [](char c) { return c == 0 || c == '\n' || c == '\\'; });
    444     if (e >= s.size()) {
    445       CHECK(s.size() == e);
    446       break;
    447     }
    448     char c = s[e];
    449     if (c == '\0')
    450       break;
    451     if (c == '\\') {
    452       if (s[e + 1] == '\n') {
    453         e += 2;
    454         ++*lf_cnt;
    455       } else if (s[e + 1] == '\r' && s[e + 2] == '\n') {
    456         e += 3;
    457         ++*lf_cnt;
    458       } else if (s[e + 1] == '\\') {
    459         e += 2;
    460       } else {
    461         e++;
    462       }
    463     } else if (c == '\n') {
    464       ++*lf_cnt;
    465       return e;
    466     }
    467   }
    468   return e;
    469 }
    470 
    471 StringPiece TrimLeadingCurdir(StringPiece s) {
    472   while (s.substr(0, 2) == "./")
    473     s = s.substr(2);
    474   return s;
    475 }
    476 
    477 void FormatForCommandSubstitution(string* s) {
    478   while ((*s)[s->size() - 1] == '\n')
    479     s->pop_back();
    480   for (size_t i = 0; i < s->size(); i++) {
    481     if ((*s)[i] == '\n')
    482       (*s)[i] = ' ';
    483   }
    484 }
    485 
    486 string SortWordsInString(StringPiece s) {
    487   vector<string> toks;
    488   for (StringPiece tok : WordScanner(s)) {
    489     toks.push_back(tok.as_string());
    490   }
    491   sort(toks.begin(), toks.end());
    492   return JoinStrings(toks, " ");
    493 }
    494 
    495 string ConcatDir(StringPiece b, StringPiece n) {
    496   string r;
    497   if (!b.empty()) {
    498     b.AppendToString(&r);
    499     r += '/';
    500   }
    501   n.AppendToString(&r);
    502   NormalizePath(&r);
    503   return r;
    504 }
    505 
    506 string EchoEscape(const string& str) {
    507   const char* in = str.c_str();
    508   string buf;
    509   for (; *in; in++) {
    510     switch (*in) {
    511       case '\\':
    512         buf += "\\\\\\\\";
    513         break;
    514       case '\n':
    515         buf += "\\n";
    516         break;
    517       case '"':
    518         buf += "\\\"";
    519         break;
    520       default:
    521         buf += *in;
    522     }
    523   }
    524   return buf;
    525 }
    526 
    527 static bool NeedsShellEscape(char c) {
    528   return c == 0 || c == '"' || c == '$' || c == '\\' || c == '`';
    529 }
    530 
    531 void EscapeShell(string* s) {
    532   static const char ranges[] = "\0\0\"\"$$\\\\``";
    533   size_t prev = 0;
    534   size_t i = SkipUntil(s->c_str(), s->size(), ranges, 10, NeedsShellEscape);
    535   if (i == s->size())
    536     return;
    537 
    538   string r;
    539   for (; i < s->size();) {
    540     StringPiece(*s).substr(prev, i - prev).AppendToString(&r);
    541     char c = (*s)[i];
    542     r += '\\';
    543     if (c == '$') {
    544       if ((*s)[i + 1] == '$') {
    545         r += '$';
    546         i++;
    547       }
    548     }
    549     r += c;
    550     i++;
    551     prev = i;
    552     i += SkipUntil(s->c_str() + i, s->size() - i, ranges, 10, NeedsShellEscape);
    553   }
    554   StringPiece(*s).substr(prev).AppendToString(&r);
    555   s->swap(r);
    556 }
    557