Home | History | Annotate | Download | only in kati
      1 // Copyright 2015 Google Inc. All rights reserved
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // +build ignore
     16 
     17 #include "strutil.h"
     18 
     19 #include <ctype.h>
     20 #include <limits.h>
     21 #include <unistd.h>
     22 
     23 #include <algorithm>
     24 #include <functional>
     25 #include <stack>
     26 #include <utility>
     27 
     28 #ifdef __SSE4_2__
     29 #include <smmintrin.h>
     30 #endif
     31 
     32 #include "log.h"
     33 
     34 static bool isSpace(char c) {
     35   return (9 <= c && c <= 13) || c == 32;
     36 }
     37 
     38 #ifdef __SSE4_2__
     39 static int SkipUntilSSE42(const char* s, int len,
     40                           const char* ranges, int ranges_size) {
     41   __m128i ranges16 = _mm_loadu_si128((const __m128i*)ranges);
     42   len &= ~15;
     43   int i = 0;
     44   while (i < len) {
     45     __m128i b16 = _mm_loadu_si128((const __m128i*)(s + i));
     46     int r = _mm_cmpestri(
     47         ranges16, ranges_size, b16, len - i,
     48         _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
     49     if (r != 16) {
     50       return i + r;
     51     }
     52     i += 16;
     53   }
     54   return len;
     55 }
     56 #endif
     57 
     58 template <typename Cond>
     59 static int SkipUntil(const char* s, int len,
     60                      const char* ranges UNUSED, int ranges_size UNUSED,
     61                      Cond cond) {
     62   int i = 0;
     63 #ifdef __SSE4_2__
     64   i += SkipUntilSSE42(s, len, ranges, ranges_size);
     65 #endif
     66   for (; i < len; i++) {
     67     if (cond(s[i]))
     68       break;
     69   }
     70   return i;
     71 }
     72 
     73 WordScanner::Iterator& WordScanner::Iterator::operator++() {
     74   int len = static_cast<int>(in->size());
     75   for (s = i + 1; s < len; s++) {
     76     if (!isSpace((*in)[s]))
     77       break;
     78   }
     79   if (s >= len) {
     80     in = NULL;
     81     s = 0;
     82     i = 0;
     83     return *this;
     84   }
     85 
     86   static const char ranges[] = "\x09\x0d  ";
     87   // It's intentional we are not using isSpace here. It seems with
     88   // lambda the compiler generates better code.
     89   i = s + SkipUntil(in->data() + s, len - s, ranges, 4,
     90                     [](char c) { return (9 <= c && c <= 13) || c == 32; });
     91   return *this;
     92 }
     93 
     94 StringPiece WordScanner::Iterator::operator*() const {
     95   return in->substr(s, i - s);
     96 }
     97 
     98 WordScanner::WordScanner(StringPiece in)
     99     : in_(in) {
    100 }
    101 
    102 WordScanner::Iterator WordScanner::begin() const {
    103   Iterator iter;
    104   iter.in = &in_;
    105   iter.s = 0;
    106   iter.i = -1;
    107   ++iter;
    108   return iter;
    109 }
    110 
    111 WordScanner::Iterator WordScanner::end() const {
    112   Iterator iter;
    113   iter.in = NULL;
    114   iter.s = 0;
    115   iter.i = 0;
    116   return iter;
    117 }
    118 
    119 void WordScanner::Split(vector<StringPiece>* o) {
    120   for (StringPiece t : *this)
    121     o->push_back(t);
    122 }
    123 
    124 WordWriter::WordWriter(string* o)
    125     : out_(o),
    126       needs_space_(false) {
    127 }
    128 
    129 void WordWriter::MaybeAddWhitespace() {
    130   if (needs_space_) {
    131     out_->push_back(' ');
    132   } else {
    133     needs_space_ = true;
    134   }
    135 }
    136 
    137 void WordWriter::Write(StringPiece s) {
    138   MaybeAddWhitespace();
    139   AppendString(s, out_);
    140 }
    141 
    142 ScopedTerminator::ScopedTerminator(StringPiece s)
    143     : s_(s), c_(s[s.size()]) {
    144   const_cast<char*>(s_.data())[s_.size()] = '\0';
    145 }
    146 
    147 ScopedTerminator::~ScopedTerminator() {
    148   const_cast<char*>(s_.data())[s_.size()] = c_;
    149 }
    150 
    151 void AppendString(StringPiece str, string* out) {
    152   out->append(str.begin(), str.end());
    153 }
    154 
    155 bool HasPrefix(StringPiece str, StringPiece prefix) {
    156   ssize_t size_diff = str.size() - prefix.size();
    157   return size_diff >= 0 && str.substr(0, prefix.size()) == prefix;
    158 }
    159 
    160 bool HasSuffix(StringPiece str, StringPiece suffix) {
    161   ssize_t size_diff = str.size() - suffix.size();
    162   return size_diff >= 0 && str.substr(size_diff) == suffix;
    163 }
    164 
    165 bool HasWord(StringPiece str, StringPiece w) {
    166   size_t found = str.find(w);
    167   if (found == string::npos)
    168     return false;
    169   if (found != 0 && !isSpace(str[found-1]))
    170     return false;
    171   size_t end = found + w.size();
    172   if (end != str.size() && !isSpace(str[end]))
    173     return false;
    174   return true;
    175 }
    176 
    177 StringPiece TrimPrefix(StringPiece str, StringPiece prefix) {
    178   ssize_t size_diff = str.size() - prefix.size();
    179   if (size_diff < 0 || str.substr(0, prefix.size()) != prefix)
    180     return str;
    181   return str.substr(prefix.size());
    182 }
    183 
    184 StringPiece TrimSuffix(StringPiece str, StringPiece suffix) {
    185   ssize_t size_diff = str.size() - suffix.size();
    186   if (size_diff < 0 || str.substr(size_diff) != suffix)
    187     return str;
    188   return str.substr(0, size_diff);
    189 }
    190 
    191 Pattern::Pattern(StringPiece pat)
    192     : pat_(pat), percent_index_(pat.find('%')) {
    193 }
    194 
    195 bool Pattern::Match(StringPiece str) const {
    196   if (percent_index_ == string::npos)
    197     return str == pat_;
    198   return MatchImpl(str);
    199 }
    200 
    201 bool Pattern::MatchImpl(StringPiece str) const {
    202   return (HasPrefix(str, pat_.substr(0, percent_index_)) &&
    203           HasSuffix(str, pat_.substr(percent_index_ + 1)));
    204 }
    205 
    206 StringPiece Pattern::Stem(StringPiece str) const {
    207   if (!Match(str))
    208     return "";
    209   return str.substr(percent_index_,
    210                     str.size() - (pat_.size() - percent_index_ - 1));
    211 }
    212 
    213 void Pattern::AppendSubst(StringPiece str, StringPiece subst,
    214                           string* out) const {
    215   if (percent_index_ == string::npos) {
    216     if (str == pat_) {
    217       AppendString(subst, out);
    218       return;
    219     } else {
    220       AppendString(str, out);
    221       return;
    222     }
    223   }
    224 
    225   if (MatchImpl(str)) {
    226     size_t subst_percent_index = subst.find('%');
    227     if (subst_percent_index == string::npos) {
    228       AppendString(subst, out);
    229       return;
    230     } else {
    231       AppendString(subst.substr(0, subst_percent_index), out);
    232       AppendString(str.substr(percent_index_,
    233                               str.size() - pat_.size() + 1), out);
    234       AppendString(subst.substr(subst_percent_index + 1), out);
    235       return;
    236     }
    237   }
    238   AppendString(str, out);
    239 }
    240 
    241 void Pattern::AppendSubstRef(StringPiece str, StringPiece subst,
    242                              string* out) const {
    243   if (percent_index_ != string::npos && subst.find('%') != string::npos) {
    244     AppendSubst(str, subst, out);
    245     return;
    246   }
    247   StringPiece s = TrimSuffix(str, pat_);
    248   out->append(s.begin(), s.end());
    249   out->append(subst.begin(), subst.end());
    250 }
    251 
    252 string NoLineBreak(const string& s) {
    253   size_t index = s.find('\n');
    254   if (index == string::npos)
    255     return s;
    256   string r = s;
    257   while (index != string::npos) {
    258     r = r.substr(0, index) + "\\n" + r.substr(index + 1);
    259     index = r.find('\n', index + 2);
    260   }
    261   return r;
    262 }
    263 
    264 StringPiece TrimLeftSpace(StringPiece s) {
    265   size_t i = 0;
    266   for (; i < s.size(); i++) {
    267     if (isSpace(s[i]))
    268       continue;
    269     char n = s.get(i+1);
    270     if (s[i] == '\\' && (n == '\r' || n == '\n')) {
    271       i++;
    272       continue;
    273     }
    274     break;
    275   }
    276   return s.substr(i, s.size() - i);
    277 }
    278 
    279 StringPiece TrimRightSpace(StringPiece s) {
    280   size_t i = 0;
    281   for (; i < s.size(); i++) {
    282     char c = s[s.size() - 1 - i];
    283     if (isSpace(c)) {
    284       if ((c == '\r' || c == '\n') && s.get(s.size() - 2 - i) == '\\')
    285         i++;
    286       continue;
    287     }
    288     break;
    289   }
    290   return s.substr(0, s.size() - i);
    291 }
    292 
    293 StringPiece TrimSpace(StringPiece s) {
    294   return TrimRightSpace(TrimLeftSpace(s));
    295 }
    296 
    297 StringPiece Dirname(StringPiece s) {
    298   size_t found = s.rfind('/');
    299   if (found == string::npos)
    300     return StringPiece(".");
    301   if (found == 0)
    302     return StringPiece("");
    303   return s.substr(0, found);
    304 }
    305 
    306 StringPiece Basename(StringPiece s) {
    307   size_t found = s.rfind('/');
    308   if (found == string::npos || found == 0)
    309     return s;
    310   return s.substr(found + 1);
    311 }
    312 
    313 StringPiece GetExt(StringPiece s) {
    314   size_t found = s.rfind('.');
    315   if (found == string::npos)
    316     return StringPiece("");
    317   return s.substr(found);
    318 }
    319 
    320 StringPiece StripExt(StringPiece s) {
    321   size_t slash_index = s.rfind('/');
    322   size_t found = s.rfind('.');
    323   if (found == string::npos ||
    324       (slash_index != string::npos && found < slash_index))
    325     return s;
    326   return s.substr(0, found);
    327 }
    328 
    329 void NormalizePath(string* o) {
    330   if (o->empty())
    331     return;
    332   size_t start_index = 0;
    333   if ((*o)[0] == '/')
    334     start_index++;
    335   size_t j = start_index;
    336   size_t prev_start = start_index;
    337   for (size_t i = start_index; i <= o->size(); i++) {
    338     char c = (*o)[i];
    339     if (c != '/' && c != 0) {
    340       (*o)[j] = c;
    341       j++;
    342       continue;
    343     }
    344 
    345     StringPiece prev_dir = StringPiece(o->data() + prev_start, j - prev_start);
    346     if (prev_dir == ".") {
    347       j--;
    348     } else if (prev_dir == ".." && j != 2 /* .. */) {
    349       if (j == 3) {
    350         // /..
    351         j = start_index;
    352       } else {
    353         size_t orig_j = j;
    354         j -= 4;
    355         j = o->rfind('/', j);
    356         if (j == string::npos) {
    357           j = start_index;
    358         } else {
    359           j++;
    360         }
    361         if (StringPiece(o->data() + j, 3) == "../") {
    362           j = orig_j;
    363           (*o)[j] = c;
    364           j++;
    365         }
    366       }
    367     } else if (!prev_dir.empty()) {
    368       if (c) {
    369         (*o)[j] = c;
    370         j++;
    371       }
    372     }
    373     prev_start = j;
    374   }
    375   if (j > 1 && (*o)[j-1] == '/')
    376     j--;
    377   o->resize(j);
    378 }
    379 
    380 void AbsPath(StringPiece s, string* o) {
    381   if (s.get(0) == '/') {
    382     o->clear();
    383   } else {
    384     char buf[PATH_MAX];
    385     if (!getcwd(buf, PATH_MAX)) {
    386       fprintf(stderr, "getcwd failed\n");
    387       CHECK(false);
    388     }
    389 
    390     CHECK(buf[0] == '/');
    391     *o = buf;
    392     *o += '/';
    393   }
    394   AppendString(s, o);
    395   NormalizePath(o);
    396 }
    397 
    398 template<typename Cond>
    399 size_t FindOutsideParenImpl(StringPiece s, Cond cond) {
    400   bool prev_backslash = false;
    401   stack<char> paren_stack;
    402   for (size_t i = 0; i < s.size(); i++) {
    403     char c = s[i];
    404     if (cond(c) && paren_stack.empty() && !prev_backslash) {
    405       return i;
    406     }
    407     switch (c) {
    408       case '(':
    409         paren_stack.push(')');
    410         break;
    411       case '{':
    412         paren_stack.push('}');
    413         break;
    414 
    415       case ')':
    416       case '}':
    417         if (!paren_stack.empty() && c == paren_stack.top()) {
    418           paren_stack.pop();
    419         }
    420         break;
    421     }
    422     prev_backslash = c == '\\' && !prev_backslash;
    423   }
    424   return string::npos;
    425 }
    426 
    427 size_t FindOutsideParen(StringPiece s, char c) {
    428   return FindOutsideParenImpl(s, [&c](char d){return c == d;});
    429 }
    430 
    431 size_t FindTwoOutsideParen(StringPiece s, char c1, char c2) {
    432   return FindOutsideParenImpl(s, [&c1, &c2](char d){
    433       return d == c1 || d == c2;
    434     });
    435 }
    436 
    437 size_t FindThreeOutsideParen(StringPiece s, char c1, char c2, char c3) {
    438   return FindOutsideParenImpl(s, [&c1, &c2, &c3](char d){
    439       return d == c1 || d == c2 || d == c3;
    440     });
    441 }
    442 
    443 size_t FindEndOfLine(StringPiece s, size_t e, size_t* lf_cnt) {
    444   static const char ranges[] = "\0\0\n\n\\\\";
    445   while (e < s.size()) {
    446     e += SkipUntil(s.data() + e, s.size() - e, ranges, 6,
    447                    [](char c) { return c == 0 || c == '\n' || c == '\\'; });
    448     if (e >= s.size()) {
    449       CHECK(s.size() == e);
    450       break;
    451     }
    452     char c = s[e];
    453     if (c == '\0')
    454       break;
    455     if (c == '\\') {
    456       if (s[e+1] == '\n') {
    457         e += 2;
    458         ++*lf_cnt;
    459       } else if (s[e+1] == '\r' && s[e+2] == '\n') {
    460         e += 3;
    461         ++*lf_cnt;
    462       } else if (s[e+1] == '\\') {
    463         e += 2;
    464       } else {
    465         e++;
    466       }
    467     } else if (c == '\n') {
    468       ++*lf_cnt;
    469       return e;
    470     }
    471   }
    472   return e;
    473 }
    474 
    475 StringPiece TrimLeadingCurdir(StringPiece s) {
    476   while (s.substr(0, 2) == "./")
    477     s = s.substr(2);
    478   return s;
    479 }
    480 
    481 void FormatForCommandSubstitution(string* s) {
    482   while ((*s)[s->size()-1] == '\n')
    483     s->pop_back();
    484   for (size_t i = 0; i < s->size(); i++) {
    485     if ((*s)[i] == '\n')
    486       (*s)[i] = ' ';
    487   }
    488 }
    489 
    490 string SortWordsInString(StringPiece s) {
    491   vector<string> toks;
    492   for (StringPiece tok : WordScanner(s)) {
    493     toks.push_back(tok.as_string());
    494   }
    495   sort(toks.begin(), toks.end());
    496   return JoinStrings(toks, " ");
    497 }
    498 
    499 string ConcatDir(StringPiece b, StringPiece n) {
    500   string r;
    501   if (!b.empty()) {
    502     b.AppendToString(&r);
    503     r += '/';
    504   }
    505   n.AppendToString(&r);
    506   NormalizePath(&r);
    507   return r;
    508 }
    509 
    510 string EchoEscape(const string &str) {
    511   const char *in = str.c_str();
    512   string buf;
    513   for (; *in; in++) {
    514     switch(*in) {
    515       case '\\':
    516         buf += "\\\\\\\\";
    517         break;
    518       case '\n':
    519         buf += "\\n";
    520         break;
    521       case '"':
    522         buf += "\\\"";
    523         break;
    524       default:
    525         buf += *in;
    526     }
    527   }
    528   return buf;
    529 }
    530 
    531 static bool NeedsShellEscape(char c) {
    532   return c == 0 || c == '"' || c == '$' || c == '\\' || c == '`';
    533 }
    534 
    535 void EscapeShell(string* s) {
    536   static const char ranges[] = "\0\0\"\"$$\\\\``";
    537   size_t prev = 0;
    538   size_t i = SkipUntil(s->c_str(), s->size(), ranges, 10, NeedsShellEscape);
    539   if (i == s->size())
    540     return;
    541 
    542   string r;
    543   for (; i < s->size();) {
    544     StringPiece(*s).substr(prev, i - prev).AppendToString(&r);
    545     char c = (*s)[i];
    546     r += '\\';
    547     if (c == '$') {
    548       if ((*s)[i+1] == '$') {
    549         r += '$';
    550         i++;
    551       }
    552     }
    553     r += c;
    554     i++;
    555     prev = i;
    556     i += SkipUntil(s->c_str() + i, s->size() - i, ranges, 10, NeedsShellEscape);
    557   }
    558   StringPiece(*s).substr(prev).AppendToString(&r);
    559   s->swap(r);
    560 }
    561