Home | History | Annotate | Download | only in kati
      1 // Copyright 2015 Google Inc. All rights reserved
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // +build ignore
     16 
     17 #include "strutil.h"
     18 
     19 #include <ctype.h>
     20 #include <limits.h>
     21 #include <unistd.h>
     22 
     23 #include <algorithm>
     24 #include <stack>
     25 #include <utility>
     26 
     27 #ifdef __SSE4_2__
     28 #include <smmintrin.h>
     29 #endif
     30 
     31 #include "log.h"
     32 
     33 static bool isSpace(char c) {
     34   return (9 <= c && c <= 13) || c == 32;
     35 }
     36 
     37 #ifdef __SSE4_2__
     38 static int SkipUntilSSE42(const char* s, int len,
     39                           const char* ranges, int ranges_size) {
     40   __m128i ranges16 = _mm_loadu_si128((const __m128i*)ranges);
     41   int i = 0;
     42   do {
     43     __m128i b16 = _mm_loadu_si128((const __m128i*)(s + i));
     44     int r = _mm_cmpestri(
     45         ranges16, ranges_size, b16, len - i,
     46         _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
     47     if (r != 16) {
     48       return i + r;
     49     }
     50     i += 16;
     51   } while (i < len);
     52   return len;
     53 }
     54 #endif
     55 
     56 WordScanner::Iterator& WordScanner::Iterator::operator++() {
     57   int len = static_cast<int>(in->size());
     58   for (s = i + 1; s < len; s++) {
     59     if (!isSpace((*in)[s]))
     60       break;
     61   }
     62   if (s >= len) {
     63     in = NULL;
     64     s = 0;
     65     i = 0;
     66     return *this;
     67   }
     68 
     69 #ifdef __SSE4_2__
     70   static const char ranges[] = "\x09\x0d  ";
     71   i = s;
     72   i += SkipUntilSSE42(in->data() + s, len - s, ranges, 4);
     73 #else
     74   for (i = s; i < len; i++) {
     75     if (isSpace((*in)[i]))
     76       break;
     77   }
     78 #endif
     79 
     80   return *this;
     81 }
     82 
     83 StringPiece WordScanner::Iterator::operator*() const {
     84   return in->substr(s, i - s);
     85 }
     86 
     87 WordScanner::WordScanner(StringPiece in)
     88     : in_(in) {
     89 }
     90 
     91 WordScanner::Iterator WordScanner::begin() const {
     92   Iterator iter;
     93   iter.in = &in_;
     94   iter.s = 0;
     95   iter.i = -1;
     96   ++iter;
     97   return iter;
     98 }
     99 
    100 WordScanner::Iterator WordScanner::end() const {
    101   Iterator iter;
    102   iter.in = NULL;
    103   iter.s = 0;
    104   iter.i = 0;
    105   return iter;
    106 }
    107 
    108 void WordScanner::Split(vector<StringPiece>* o) {
    109   for (StringPiece t : *this)
    110     o->push_back(t);
    111 }
    112 
    113 WordWriter::WordWriter(string* o)
    114     : out_(o),
    115       needs_space_(false) {
    116 }
    117 
    118 void WordWriter::MaybeAddWhitespace() {
    119   if (needs_space_) {
    120     out_->push_back(' ');
    121   } else {
    122     needs_space_ = true;
    123   }
    124 }
    125 
    126 void WordWriter::Write(StringPiece s) {
    127   MaybeAddWhitespace();
    128   AppendString(s, out_);
    129 }
    130 
    131 ScopedTerminator::ScopedTerminator(StringPiece s)
    132     : s_(s), c_(s[s.size()]) {
    133   const_cast<char*>(s_.data())[s_.size()] = '\0';
    134 }
    135 
    136 ScopedTerminator::~ScopedTerminator() {
    137   const_cast<char*>(s_.data())[s_.size()] = c_;
    138 }
    139 
    140 void AppendString(StringPiece str, string* out) {
    141   out->append(str.begin(), str.end());
    142 }
    143 
    144 bool HasPrefix(StringPiece str, StringPiece prefix) {
    145   ssize_t size_diff = str.size() - prefix.size();
    146   return size_diff >= 0 && str.substr(0, prefix.size()) == prefix;
    147 }
    148 
    149 bool HasSuffix(StringPiece str, StringPiece suffix) {
    150   ssize_t size_diff = str.size() - suffix.size();
    151   return size_diff >= 0 && str.substr(size_diff) == suffix;
    152 }
    153 
    154 bool HasWord(StringPiece str, StringPiece w) {
    155   size_t found = str.find(w);
    156   if (found == string::npos)
    157     return false;
    158   if (found != 0 && !isSpace(str[found-1]))
    159     return false;
    160   size_t end = found + w.size();
    161   if (end != str.size() && !isSpace(str[end]))
    162     return false;
    163   return true;
    164 }
    165 
    166 StringPiece TrimSuffix(StringPiece str, StringPiece suffix) {
    167   ssize_t size_diff = str.size() - suffix.size();
    168   if (size_diff < 0 || str.substr(size_diff) != suffix)
    169     return str;
    170   return str.substr(0, size_diff);
    171 }
    172 
    173 Pattern::Pattern(StringPiece pat)
    174     : pat_(pat), percent_index_(pat.find('%')) {
    175 }
    176 
    177 bool Pattern::Match(StringPiece str) const {
    178   if (percent_index_ == string::npos)
    179     return str == pat_;
    180   return MatchImpl(str);
    181 }
    182 
    183 bool Pattern::MatchImpl(StringPiece str) const {
    184   return (HasPrefix(str, pat_.substr(0, percent_index_)) &&
    185           HasSuffix(str, pat_.substr(percent_index_ + 1)));
    186 }
    187 
    188 StringPiece Pattern::Stem(StringPiece str) const {
    189   if (!Match(str))
    190     return "";
    191   return str.substr(percent_index_,
    192                     str.size() - (pat_.size() - percent_index_ - 1));
    193 }
    194 
    195 void Pattern::AppendSubst(StringPiece str, StringPiece subst,
    196                           string* out) const {
    197   if (percent_index_ == string::npos) {
    198     if (str == pat_) {
    199       AppendString(subst, out);
    200       return;
    201     } else {
    202       AppendString(str, out);
    203       return;
    204     }
    205   }
    206 
    207   if (MatchImpl(str)) {
    208     size_t subst_percent_index = subst.find('%');
    209     if (subst_percent_index == string::npos) {
    210       AppendString(subst, out);
    211       return;
    212     } else {
    213       AppendString(subst.substr(0, subst_percent_index), out);
    214       AppendString(str.substr(percent_index_,
    215                               str.size() - pat_.size() + 1), out);
    216       AppendString(subst.substr(subst_percent_index + 1), out);
    217       return;
    218     }
    219   }
    220   AppendString(str, out);
    221 }
    222 
    223 void Pattern::AppendSubstRef(StringPiece str, StringPiece subst,
    224                              string* out) const {
    225   if (percent_index_ != string::npos && subst.find('%') != string::npos) {
    226     AppendSubst(str, subst, out);
    227     return;
    228   }
    229   StringPiece s = TrimSuffix(str, pat_);
    230   out->append(s.begin(), s.end());
    231   out->append(subst.begin(), subst.end());
    232 }
    233 
    234 string NoLineBreak(const string& s) {
    235   size_t index = s.find('\n');
    236   if (index == string::npos)
    237     return s;
    238   string r = s;
    239   while (index != string::npos) {
    240     r = r.substr(0, index) + "\\n" + r.substr(index + 1);
    241     index = r.find('\n', index + 2);
    242   }
    243   return r;
    244 }
    245 
    246 StringPiece TrimLeftSpace(StringPiece s) {
    247   size_t i = 0;
    248   for (; i < s.size(); i++) {
    249     if (isSpace(s[i]))
    250       continue;
    251     char n = s.get(i+1);
    252     if (s[i] == '\\' && (n == '\r' || n == '\n')) {
    253       i++;
    254       continue;
    255     }
    256     break;
    257   }
    258   return s.substr(i, s.size() - i);
    259 }
    260 
    261 StringPiece TrimRightSpace(StringPiece s) {
    262   size_t i = 0;
    263   for (; i < s.size(); i++) {
    264     char c = s[s.size() - 1 - i];
    265     if (isSpace(c)) {
    266       if ((c == '\r' || c == '\n') && s.get(s.size() - 2 - i) == '\\')
    267         i++;
    268       continue;
    269     }
    270     break;
    271   }
    272   return s.substr(0, s.size() - i);
    273 }
    274 
    275 StringPiece TrimSpace(StringPiece s) {
    276   return TrimRightSpace(TrimLeftSpace(s));
    277 }
    278 
    279 StringPiece Dirname(StringPiece s) {
    280   size_t found = s.rfind('/');
    281   if (found == string::npos)
    282     return StringPiece(".");
    283   if (found == 0)
    284     return StringPiece("");
    285   return s.substr(0, found);
    286 }
    287 
    288 StringPiece Basename(StringPiece s) {
    289   size_t found = s.rfind('/');
    290   if (found == string::npos || found == 0)
    291     return s;
    292   return s.substr(found + 1);
    293 }
    294 
    295 StringPiece GetExt(StringPiece s) {
    296   size_t found = s.rfind('.');
    297   if (found == string::npos)
    298     return StringPiece("");
    299   return s.substr(found);
    300 }
    301 
    302 StringPiece StripExt(StringPiece s) {
    303   size_t slash_index = s.rfind('/');
    304   size_t found = s.rfind('.');
    305   if (found == string::npos ||
    306       (slash_index != string::npos && found < slash_index))
    307     return s;
    308   return s.substr(0, found);
    309 }
    310 
    311 void NormalizePath(string* o) {
    312   if (o->empty())
    313     return;
    314   size_t start_index = 0;
    315   if ((*o)[0] == '/')
    316     start_index++;
    317   size_t j = start_index;
    318   size_t prev_start = start_index;
    319   for (size_t i = start_index; i <= o->size(); i++) {
    320     char c = (*o)[i];
    321     if (c != '/' && c != 0) {
    322       (*o)[j] = c;
    323       j++;
    324       continue;
    325     }
    326 
    327     StringPiece prev_dir = StringPiece(o->data() + prev_start, j - prev_start);
    328     if (prev_dir == ".") {
    329       j--;
    330     } else if (prev_dir == ".." && j != 2 /* .. */) {
    331       if (j == 3) {
    332         // /..
    333         j = start_index;
    334       } else {
    335         size_t orig_j = j;
    336         j -= 4;
    337         j = o->rfind('/', j);
    338         if (j == string::npos) {
    339           j = start_index;
    340         } else {
    341           j++;
    342         }
    343         if (StringPiece(o->data() + j, 3) == "../") {
    344           j = orig_j;
    345           (*o)[j] = c;
    346           j++;
    347         }
    348       }
    349     } else if (!prev_dir.empty()) {
    350       if (c) {
    351         (*o)[j] = c;
    352         j++;
    353       }
    354     }
    355     prev_start = j;
    356   }
    357   if (j > 1 && (*o)[j-1] == '/')
    358     j--;
    359   o->resize(j);
    360 }
    361 
    362 void AbsPath(StringPiece s, string* o) {
    363   if (s.get(0) == '/') {
    364     o->clear();
    365   } else {
    366     char buf[PATH_MAX];
    367     if (!getcwd(buf, PATH_MAX)) {
    368       fprintf(stderr, "getcwd failed\n");
    369       CHECK(false);
    370     }
    371 
    372     CHECK(buf[0] == '/');
    373     *o = buf;
    374     *o += '/';
    375   }
    376   AppendString(s, o);
    377   NormalizePath(o);
    378 }
    379 
    380 template<typename Cond>
    381 size_t FindOutsideParenImpl(StringPiece s, Cond cond) {
    382   bool prev_backslash = false;
    383   stack<char> paren_stack;
    384   for (size_t i = 0; i < s.size(); i++) {
    385     char c = s[i];
    386     if (cond(c) && paren_stack.empty() && !prev_backslash) {
    387       return i;
    388     }
    389     switch (c) {
    390       case '(':
    391         paren_stack.push(')');
    392         break;
    393       case '{':
    394         paren_stack.push('}');
    395         break;
    396 
    397       case ')':
    398       case '}':
    399         if (!paren_stack.empty() && c == paren_stack.top()) {
    400           paren_stack.pop();
    401         }
    402         break;
    403     }
    404     prev_backslash = c == '\\' && !prev_backslash;
    405   }
    406   return string::npos;
    407 }
    408 
    409 size_t FindOutsideParen(StringPiece s, char c) {
    410   return FindOutsideParenImpl(s, [&c](char d){return c == d;});
    411 }
    412 
    413 size_t FindTwoOutsideParen(StringPiece s, char c1, char c2) {
    414   return FindOutsideParenImpl(s, [&c1, &c2](char d){
    415       return d == c1 || d == c2;
    416     });
    417 }
    418 
    419 size_t FindThreeOutsideParen(StringPiece s, char c1, char c2, char c3) {
    420   return FindOutsideParenImpl(s, [&c1, &c2, &c3](char d){
    421       return d == c1 || d == c2 || d == c3;
    422     });
    423 }
    424 
    425 size_t FindEndOfLine(StringPiece s, size_t e, size_t* lf_cnt) {
    426 #ifdef __SSE4_2__
    427   static const char ranges[] = "\0\0\n\n\\\\";
    428   while (e < s.size()) {
    429     e += SkipUntilSSE42(s.data() + e, s.size() - e, ranges, 6);
    430     if (e >= s.size()) {
    431       CHECK(s.size() == e);
    432       break;
    433     }
    434     char c = s[e];
    435     if (c == '\0')
    436       break;
    437     if (c == '\\') {
    438       if (s[e+1] == '\n') {
    439         e += 2;
    440         ++*lf_cnt;
    441       } else if (s[e+1] == '\r' && s[e+2] == '\n') {
    442         e += 3;
    443         ++*lf_cnt;
    444       } else if (s[e+1] == '\\') {
    445         e += 2;
    446       } else {
    447         e++;
    448       }
    449     } else if (c == '\n') {
    450       ++*lf_cnt;
    451       return e;
    452     }
    453   }
    454   return e;
    455 #else
    456   bool prev_backslash = false;
    457   for (; e < s.size(); e++) {
    458     char c = s[e];
    459     if (c == '\\') {
    460       prev_backslash = !prev_backslash;
    461     } else if (c == '\n') {
    462       ++*lf_cnt;
    463       if (!prev_backslash) {
    464         return e;
    465       }
    466       prev_backslash = false;
    467     } else if (c != '\r') {
    468       prev_backslash = false;
    469     }
    470   }
    471   return e;
    472 #endif
    473 }
    474 
    475 StringPiece TrimLeadingCurdir(StringPiece s) {
    476   while (s.substr(0, 2) == "./")
    477     s = s.substr(2);
    478   return s;
    479 }
    480 
    481 void FormatForCommandSubstitution(string* s) {
    482   while ((*s)[s->size()-1] == '\n')
    483     s->pop_back();
    484   for (size_t i = 0; i < s->size(); i++) {
    485     if ((*s)[i] == '\n')
    486       (*s)[i] = ' ';
    487   }
    488 }
    489 
    490 string SortWordsInString(StringPiece s) {
    491   vector<string> toks;
    492   for (StringPiece tok : WordScanner(s)) {
    493     toks.push_back(tok.as_string());
    494   }
    495   sort(toks.begin(), toks.end());
    496   return JoinStrings(toks, " ");
    497 }
    498 
    499 string ConcatDir(StringPiece b, StringPiece n) {
    500   string r;
    501   if (!b.empty()) {
    502     b.AppendToString(&r);
    503     r += '/';
    504   }
    505   n.AppendToString(&r);
    506   NormalizePath(&r);
    507   return r;
    508 }
    509 
    510 string EchoEscape(const string str) {
    511   const char *in = str.c_str();
    512   string buf;
    513   for (; *in; in++) {
    514     switch(*in) {
    515       case '\\':
    516         buf += "\\\\\\\\";
    517         break;
    518       case '\n':
    519         buf += "\\n";
    520         break;
    521       case '"':
    522         buf += "\\\"";
    523         break;
    524       default:
    525         buf += *in;
    526     }
    527   }
    528   return buf;
    529 }
    530 
    531 void EscapeShell(string* s) {
    532 #ifdef __SSE4_2__
    533   static const char ranges[] = "\0\0\"\"$$\\\\``";
    534   size_t prev = 0;
    535   size_t i = SkipUntilSSE42(s->c_str(), s->size(), ranges, 10);
    536   if (i == s->size())
    537     return;
    538 
    539   string r;
    540   for (; i < s->size();) {
    541     StringPiece(*s).substr(prev, i - prev).AppendToString(&r);
    542     char c = (*s)[i];
    543     r += '\\';
    544     if (c == '$') {
    545       if ((*s)[i+1] == '$') {
    546         r += '$';
    547         i++;
    548       }
    549     }
    550     r += c;
    551     i++;
    552     prev = i;
    553     i += SkipUntilSSE42(s->c_str() + i, s->size() - i, ranges, 10);
    554   }
    555   StringPiece(*s).substr(prev).AppendToString(&r);
    556   s->swap(r);
    557 #else
    558   if (s->find_first_of("$`\\\"") == string::npos)
    559     return;
    560   string r;
    561   bool last_dollar = false;
    562   for (char c : *s) {
    563     switch (c) {
    564       case '$':
    565         if (last_dollar) {
    566           r += c;
    567           last_dollar = false;
    568         } else {
    569           r += '\\';
    570           r += c;
    571           last_dollar = true;
    572         }
    573         break;
    574       case '`':
    575       case '"':
    576       case '\\':
    577         r += '\\';
    578         // fall through.
    579       default:
    580         r += c;
    581         last_dollar = false;
    582     }
    583   }
    584   s->swap(r);
    585 #endif
    586 }
    587