1 // Copyright 2015 Google Inc. All rights reserved 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // +build ignore 16 17 #include "strutil.h" 18 19 #include <ctype.h> 20 #include <limits.h> 21 #include <unistd.h> 22 23 #include <algorithm> 24 #include <stack> 25 #include <utility> 26 27 #ifdef __SSE4_2__ 28 #include <smmintrin.h> 29 #endif 30 31 #include "log.h" 32 33 static bool isSpace(char c) { 34 return (9 <= c && c <= 13) || c == 32; 35 } 36 37 #ifdef __SSE4_2__ 38 static int SkipUntilSSE42(const char* s, int len, 39 const char* ranges, int ranges_size) { 40 __m128i ranges16 = _mm_loadu_si128((const __m128i*)ranges); 41 int i = 0; 42 do { 43 __m128i b16 = _mm_loadu_si128((const __m128i*)(s + i)); 44 int r = _mm_cmpestri( 45 ranges16, ranges_size, b16, len - i, 46 _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS); 47 if (r != 16) { 48 return i + r; 49 } 50 i += 16; 51 } while (i < len); 52 return len; 53 } 54 #endif 55 56 WordScanner::Iterator& WordScanner::Iterator::operator++() { 57 int len = static_cast<int>(in->size()); 58 for (s = i + 1; s < len; s++) { 59 if (!isSpace((*in)[s])) 60 break; 61 } 62 if (s >= len) { 63 in = NULL; 64 s = 0; 65 i = 0; 66 return *this; 67 } 68 69 #ifdef __SSE4_2__ 70 static const char ranges[] = "\x09\x0d "; 71 i = s; 72 i += SkipUntilSSE42(in->data() + s, len - s, ranges, 4); 73 #else 74 for (i = s; i < len; i++) { 75 if (isSpace((*in)[i])) 76 break; 77 } 78 #endif 79 80 return *this; 81 } 82 83 StringPiece WordScanner::Iterator::operator*() const { 84 return in->substr(s, i - s); 85 } 86 87 WordScanner::WordScanner(StringPiece in) 88 : in_(in) { 89 } 90 91 WordScanner::Iterator WordScanner::begin() const { 92 Iterator iter; 93 iter.in = &in_; 94 iter.s = 0; 95 iter.i = -1; 96 ++iter; 97 return iter; 98 } 99 100 WordScanner::Iterator WordScanner::end() const { 101 Iterator iter; 102 iter.in = NULL; 103 iter.s = 0; 104 iter.i = 0; 105 return iter; 106 } 107 108 void WordScanner::Split(vector<StringPiece>* o) { 109 for (StringPiece t : *this) 110 o->push_back(t); 111 } 112 113 WordWriter::WordWriter(string* o) 114 : out_(o), 115 needs_space_(false) { 116 } 117 118 void WordWriter::MaybeAddWhitespace() { 119 if (needs_space_) { 120 out_->push_back(' '); 121 } else { 122 needs_space_ = true; 123 } 124 } 125 126 void WordWriter::Write(StringPiece s) { 127 MaybeAddWhitespace(); 128 AppendString(s, out_); 129 } 130 131 ScopedTerminator::ScopedTerminator(StringPiece s) 132 : s_(s), c_(s[s.size()]) { 133 const_cast<char*>(s_.data())[s_.size()] = '\0'; 134 } 135 136 ScopedTerminator::~ScopedTerminator() { 137 const_cast<char*>(s_.data())[s_.size()] = c_; 138 } 139 140 void AppendString(StringPiece str, string* out) { 141 out->append(str.begin(), str.end()); 142 } 143 144 bool HasPrefix(StringPiece str, StringPiece prefix) { 145 ssize_t size_diff = str.size() - prefix.size(); 146 return size_diff >= 0 && str.substr(0, prefix.size()) == prefix; 147 } 148 149 bool HasSuffix(StringPiece str, StringPiece suffix) { 150 ssize_t size_diff = str.size() - suffix.size(); 151 return size_diff >= 0 && str.substr(size_diff) == suffix; 152 } 153 154 bool HasWord(StringPiece str, StringPiece w) { 155 size_t found = str.find(w); 156 if (found == string::npos) 157 return false; 158 if (found != 0 && !isSpace(str[found-1])) 159 return false; 160 size_t end = found + w.size(); 161 if (end != str.size() && !isSpace(str[end])) 162 return false; 163 return true; 164 } 165 166 StringPiece TrimSuffix(StringPiece str, StringPiece suffix) { 167 ssize_t size_diff = str.size() - suffix.size(); 168 if (size_diff < 0 || str.substr(size_diff) != suffix) 169 return str; 170 return str.substr(0, size_diff); 171 } 172 173 Pattern::Pattern(StringPiece pat) 174 : pat_(pat), percent_index_(pat.find('%')) { 175 } 176 177 bool Pattern::Match(StringPiece str) const { 178 if (percent_index_ == string::npos) 179 return str == pat_; 180 return MatchImpl(str); 181 } 182 183 bool Pattern::MatchImpl(StringPiece str) const { 184 return (HasPrefix(str, pat_.substr(0, percent_index_)) && 185 HasSuffix(str, pat_.substr(percent_index_ + 1))); 186 } 187 188 StringPiece Pattern::Stem(StringPiece str) const { 189 if (!Match(str)) 190 return ""; 191 return str.substr(percent_index_, 192 str.size() - (pat_.size() - percent_index_ - 1)); 193 } 194 195 void Pattern::AppendSubst(StringPiece str, StringPiece subst, 196 string* out) const { 197 if (percent_index_ == string::npos) { 198 if (str == pat_) { 199 AppendString(subst, out); 200 return; 201 } else { 202 AppendString(str, out); 203 return; 204 } 205 } 206 207 if (MatchImpl(str)) { 208 size_t subst_percent_index = subst.find('%'); 209 if (subst_percent_index == string::npos) { 210 AppendString(subst, out); 211 return; 212 } else { 213 AppendString(subst.substr(0, subst_percent_index), out); 214 AppendString(str.substr(percent_index_, 215 str.size() - pat_.size() + 1), out); 216 AppendString(subst.substr(subst_percent_index + 1), out); 217 return; 218 } 219 } 220 AppendString(str, out); 221 } 222 223 void Pattern::AppendSubstRef(StringPiece str, StringPiece subst, 224 string* out) const { 225 if (percent_index_ != string::npos && subst.find('%') != string::npos) { 226 AppendSubst(str, subst, out); 227 return; 228 } 229 StringPiece s = TrimSuffix(str, pat_); 230 out->append(s.begin(), s.end()); 231 out->append(subst.begin(), subst.end()); 232 } 233 234 string NoLineBreak(const string& s) { 235 size_t index = s.find('\n'); 236 if (index == string::npos) 237 return s; 238 string r = s; 239 while (index != string::npos) { 240 r = r.substr(0, index) + "\\n" + r.substr(index + 1); 241 index = r.find('\n', index + 2); 242 } 243 return r; 244 } 245 246 StringPiece TrimLeftSpace(StringPiece s) { 247 size_t i = 0; 248 for (; i < s.size(); i++) { 249 if (isSpace(s[i])) 250 continue; 251 char n = s.get(i+1); 252 if (s[i] == '\\' && (n == '\r' || n == '\n')) { 253 i++; 254 continue; 255 } 256 break; 257 } 258 return s.substr(i, s.size() - i); 259 } 260 261 StringPiece TrimRightSpace(StringPiece s) { 262 size_t i = 0; 263 for (; i < s.size(); i++) { 264 char c = s[s.size() - 1 - i]; 265 if (isSpace(c)) { 266 if ((c == '\r' || c == '\n') && s.get(s.size() - 2 - i) == '\\') 267 i++; 268 continue; 269 } 270 break; 271 } 272 return s.substr(0, s.size() - i); 273 } 274 275 StringPiece TrimSpace(StringPiece s) { 276 return TrimRightSpace(TrimLeftSpace(s)); 277 } 278 279 StringPiece Dirname(StringPiece s) { 280 size_t found = s.rfind('/'); 281 if (found == string::npos) 282 return StringPiece("."); 283 if (found == 0) 284 return StringPiece(""); 285 return s.substr(0, found); 286 } 287 288 StringPiece Basename(StringPiece s) { 289 size_t found = s.rfind('/'); 290 if (found == string::npos || found == 0) 291 return s; 292 return s.substr(found + 1); 293 } 294 295 StringPiece GetExt(StringPiece s) { 296 size_t found = s.rfind('.'); 297 if (found == string::npos) 298 return StringPiece(""); 299 return s.substr(found); 300 } 301 302 StringPiece StripExt(StringPiece s) { 303 size_t slash_index = s.rfind('/'); 304 size_t found = s.rfind('.'); 305 if (found == string::npos || 306 (slash_index != string::npos && found < slash_index)) 307 return s; 308 return s.substr(0, found); 309 } 310 311 void NormalizePath(string* o) { 312 if (o->empty()) 313 return; 314 size_t start_index = 0; 315 if ((*o)[0] == '/') 316 start_index++; 317 size_t j = start_index; 318 size_t prev_start = start_index; 319 for (size_t i = start_index; i <= o->size(); i++) { 320 char c = (*o)[i]; 321 if (c != '/' && c != 0) { 322 (*o)[j] = c; 323 j++; 324 continue; 325 } 326 327 StringPiece prev_dir = StringPiece(o->data() + prev_start, j - prev_start); 328 if (prev_dir == ".") { 329 j--; 330 } else if (prev_dir == ".." && j != 2 /* .. */) { 331 if (j == 3) { 332 // /.. 333 j = start_index; 334 } else { 335 size_t orig_j = j; 336 j -= 4; 337 j = o->rfind('/', j); 338 if (j == string::npos) { 339 j = start_index; 340 } else { 341 j++; 342 } 343 if (StringPiece(o->data() + j, 3) == "../") { 344 j = orig_j; 345 (*o)[j] = c; 346 j++; 347 } 348 } 349 } else if (!prev_dir.empty()) { 350 if (c) { 351 (*o)[j] = c; 352 j++; 353 } 354 } 355 prev_start = j; 356 } 357 if (j > 1 && (*o)[j-1] == '/') 358 j--; 359 o->resize(j); 360 } 361 362 void AbsPath(StringPiece s, string* o) { 363 if (s.get(0) == '/') { 364 o->clear(); 365 } else { 366 char buf[PATH_MAX]; 367 if (!getcwd(buf, PATH_MAX)) { 368 fprintf(stderr, "getcwd failed\n"); 369 CHECK(false); 370 } 371 372 CHECK(buf[0] == '/'); 373 *o = buf; 374 *o += '/'; 375 } 376 AppendString(s, o); 377 NormalizePath(o); 378 } 379 380 template<typename Cond> 381 size_t FindOutsideParenImpl(StringPiece s, Cond cond) { 382 bool prev_backslash = false; 383 stack<char> paren_stack; 384 for (size_t i = 0; i < s.size(); i++) { 385 char c = s[i]; 386 if (cond(c) && paren_stack.empty() && !prev_backslash) { 387 return i; 388 } 389 switch (c) { 390 case '(': 391 paren_stack.push(')'); 392 break; 393 case '{': 394 paren_stack.push('}'); 395 break; 396 397 case ')': 398 case '}': 399 if (!paren_stack.empty() && c == paren_stack.top()) { 400 paren_stack.pop(); 401 } 402 break; 403 } 404 prev_backslash = c == '\\' && !prev_backslash; 405 } 406 return string::npos; 407 } 408 409 size_t FindOutsideParen(StringPiece s, char c) { 410 return FindOutsideParenImpl(s, [&c](char d){return c == d;}); 411 } 412 413 size_t FindTwoOutsideParen(StringPiece s, char c1, char c2) { 414 return FindOutsideParenImpl(s, [&c1, &c2](char d){ 415 return d == c1 || d == c2; 416 }); 417 } 418 419 size_t FindThreeOutsideParen(StringPiece s, char c1, char c2, char c3) { 420 return FindOutsideParenImpl(s, [&c1, &c2, &c3](char d){ 421 return d == c1 || d == c2 || d == c3; 422 }); 423 } 424 425 size_t FindEndOfLine(StringPiece s, size_t e, size_t* lf_cnt) { 426 #ifdef __SSE4_2__ 427 static const char ranges[] = "\0\0\n\n\\\\"; 428 while (e < s.size()) { 429 e += SkipUntilSSE42(s.data() + e, s.size() - e, ranges, 6); 430 if (e >= s.size()) { 431 CHECK(s.size() == e); 432 break; 433 } 434 char c = s[e]; 435 if (c == '\0') 436 break; 437 if (c == '\\') { 438 if (s[e+1] == '\n') { 439 e += 2; 440 ++*lf_cnt; 441 } else if (s[e+1] == '\r' && s[e+2] == '\n') { 442 e += 3; 443 ++*lf_cnt; 444 } else if (s[e+1] == '\\') { 445 e += 2; 446 } else { 447 e++; 448 } 449 } else if (c == '\n') { 450 ++*lf_cnt; 451 return e; 452 } 453 } 454 return e; 455 #else 456 bool prev_backslash = false; 457 for (; e < s.size(); e++) { 458 char c = s[e]; 459 if (c == '\\') { 460 prev_backslash = !prev_backslash; 461 } else if (c == '\n') { 462 ++*lf_cnt; 463 if (!prev_backslash) { 464 return e; 465 } 466 prev_backslash = false; 467 } else if (c != '\r') { 468 prev_backslash = false; 469 } 470 } 471 return e; 472 #endif 473 } 474 475 StringPiece TrimLeadingCurdir(StringPiece s) { 476 while (s.substr(0, 2) == "./") 477 s = s.substr(2); 478 return s; 479 } 480 481 void FormatForCommandSubstitution(string* s) { 482 while ((*s)[s->size()-1] == '\n') 483 s->pop_back(); 484 for (size_t i = 0; i < s->size(); i++) { 485 if ((*s)[i] == '\n') 486 (*s)[i] = ' '; 487 } 488 } 489 490 string SortWordsInString(StringPiece s) { 491 vector<string> toks; 492 for (StringPiece tok : WordScanner(s)) { 493 toks.push_back(tok.as_string()); 494 } 495 sort(toks.begin(), toks.end()); 496 return JoinStrings(toks, " "); 497 } 498 499 string ConcatDir(StringPiece b, StringPiece n) { 500 string r; 501 if (!b.empty()) { 502 b.AppendToString(&r); 503 r += '/'; 504 } 505 n.AppendToString(&r); 506 NormalizePath(&r); 507 return r; 508 } 509 510 string EchoEscape(const string str) { 511 const char *in = str.c_str(); 512 string buf; 513 for (; *in; in++) { 514 switch(*in) { 515 case '\\': 516 buf += "\\\\\\\\"; 517 break; 518 case '\n': 519 buf += "\\n"; 520 break; 521 case '"': 522 buf += "\\\""; 523 break; 524 default: 525 buf += *in; 526 } 527 } 528 return buf; 529 } 530 531 void EscapeShell(string* s) { 532 #ifdef __SSE4_2__ 533 static const char ranges[] = "\0\0\"\"$$\\\\``"; 534 size_t prev = 0; 535 size_t i = SkipUntilSSE42(s->c_str(), s->size(), ranges, 10); 536 if (i == s->size()) 537 return; 538 539 string r; 540 for (; i < s->size();) { 541 StringPiece(*s).substr(prev, i - prev).AppendToString(&r); 542 char c = (*s)[i]; 543 r += '\\'; 544 if (c == '$') { 545 if ((*s)[i+1] == '$') { 546 r += '$'; 547 i++; 548 } 549 } 550 r += c; 551 i++; 552 prev = i; 553 i += SkipUntilSSE42(s->c_str() + i, s->size() - i, ranges, 10); 554 } 555 StringPiece(*s).substr(prev).AppendToString(&r); 556 s->swap(r); 557 #else 558 if (s->find_first_of("$`\\\"") == string::npos) 559 return; 560 string r; 561 bool last_dollar = false; 562 for (char c : *s) { 563 switch (c) { 564 case '$': 565 if (last_dollar) { 566 r += c; 567 last_dollar = false; 568 } else { 569 r += '\\'; 570 r += c; 571 last_dollar = true; 572 } 573 break; 574 case '`': 575 case '"': 576 case '\\': 577 r += '\\'; 578 // fall through. 579 default: 580 r += c; 581 last_dollar = false; 582 } 583 } 584 s->swap(r); 585 #endif 586 } 587