Home | History | Annotate | Download | only in utility
      1 /*
      2  * Copyright (c) 2015, Intel Corporation
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without modification,
      6  * are permitted provided that the following conditions are met:
      7  *
      8  * 1. Redistributions of source code must retain the above copyright notice, this
      9  * list of conditions and the following disclaimer.
     10  *
     11  * 2. Redistributions in binary form must reproduce the above copyright notice,
     12  * this list of conditions and the following disclaimer in the documentation and/or
     13  * other materials provided with the distribution.
     14  *
     15  * 3. Neither the name of the copyright holder nor the names of its contributors
     16  * may be used to endorse or promote products derived from this software without
     17  * specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
     23  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 #include "Tokenizer.h"
     31 
     32 using std::string;
     33 using std::vector;
     34 
     35 const string Tokenizer::defaultDelimiters = " \n\r\t\v\f";
     36 
     37 Tokenizer::Tokenizer(const string &input, const string &delimiters, bool mergeDelimiters)
     38     : _input(input), _delimiters(delimiters), _mergeDelimiters(mergeDelimiters)
     39 {
     40 }
     41 
     42 vector<string> Tokenizer::split()
     43 {
     44     vector<string> result;
     45     string token;
     46     bool leftover = false;
     47 
     48     for (const auto character : _input) {
     49         if (_delimiters.find(character) != string::npos) {
     50             if (_mergeDelimiters) {
     51                 leftover = false;
     52                 if (token.empty()) {
     53                     // skip consecutive delimiters
     54                     continue;
     55                 }
     56             } else {
     57                 // We've encountered a delimiter, which means that there is a
     58                 // left-hand token and a right-side token. We are going to add
     59                 // the left-hand one but must not forget that there is a
     60                 // right-hand one (possibly empty)
     61                 leftover = true;
     62             }
     63 
     64             result.push_back(token);
     65             token.clear();
     66             continue;
     67         }
     68         token += character;
     69         leftover = true;
     70     }
     71 
     72     // push any leftover token:
     73     if (leftover) {
     74         result.push_back(token);
     75     }
     76 
     77     return result;
     78 }
     79