Home | History | Annotate | Download | only in service
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_LEXER_H_
     17 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_LEXER_H_
     18 
     19 #include <string>
     20 
     21 #include "absl/strings/string_view.h"
     22 #include "tensorflow/compiler/xla/shape.h"
     23 #include "tensorflow/compiler/xla/types.h"
     24 #include "tensorflow/compiler/xla/xla_data.pb.h"
     25 #include "tensorflow/core/platform/logging.h"
     26 #include "tensorflow/core/platform/regexp.h"
     27 #include "tensorflow/core/platform/types.h"
     28 
     29 namespace xla {
     30 
     31 // Defines different kinds of tokens used by the HLO lexer.
     32 //
     33 // You shouldn't need to use this directly unless you're using HloLexer
     34 // directly, and you probably don't need to do that.  Use hlo_parser instead.
     35 enum class TokKind {
     36   // Markers
     37   kEof,
     38   kError,
     39 
     40   // Tokens with no info.
     41   kEqual,     // =
     42   kComma,     // ,
     43   kColon,     // :
     44   kAsterisk,  // *
     45   kLsquare,
     46   kRsquare,  // [  ]
     47   kLbrace,
     48   kRbrace,  // {  }
     49   kLparen,
     50   kRparen,  // (  )
     51   kDots,    // ...
     52 
     53   kArrow,  // ->
     54   kLeq,    // <=
     55 
     56   // Keywords
     57   kw_HloModule,
     58   kw_ENTRY,
     59   kw_ROOT,
     60   kw_true,
     61   kw_false,
     62   kw_maximal,
     63   kw_replicated,
     64   kw_nan,
     65   kw_inf,
     66   kw_sparse,
     67 
     68   kNegInf,  // -inf
     69 
     70   // Typed tokens.
     71   kPrimitiveType,  // F32, PRED, etc.
     72   kName,           // %foo
     73   kAttributeName,  // dimensions=
     74   kDimLabels,      // [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,}
     75   kDxD,            // [0-9]+(x[0-9]+)+
     76   kPad,            // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)*
     77   kIdent,          // other identifiers
     78   kString,         // "abcd\"\n"
     79   kInt,            // 42
     80   kDecimal,        // 4.2
     81 };
     82 
     83 string TokKindToString(TokKind kind);
     84 
     85 // Lexer for the HloModule::ToString() format text.
     86 //
     87 // This class is meant to be used by hlo_parser.cc.  You shouldn't need to use
     88 // it directly.
     89 class HloLexer {
     90  public:
     91   explicit HloLexer(absl::string_view buf) : buf_(buf) {
     92     current_ptr_ = buf_.begin();
     93   }
     94 
     95   TokKind Lex() { return token_state_.current_kind = LexToken(); }
     96 
     97   TokKind GetKind() const { return token_state_.current_kind; }
     98   string GetStrVal() const {
     99     switch (GetKind()) {
    100       case TokKind::kName:
    101       case TokKind::kAttributeName:
    102       case TokKind::kDimLabels:
    103       case TokKind::kDxD:
    104       case TokKind::kPad:
    105       case TokKind::kString:
    106       case TokKind::kIdent:
    107         return token_state_.str_val;
    108       default:
    109         LOG(FATAL) << "This token does not have string value";
    110     }
    111   }
    112   int64 GetInt64Val() const {
    113     CHECK(GetKind() == TokKind::kInt);
    114     return token_state_.int64_val;
    115   }
    116   double GetDecimalVal() const {
    117     CHECK(GetKind() == TokKind::kDecimal);
    118     return token_state_.decimal_val;
    119   }
    120   PrimitiveType GetPrimitiveTypeVal() const {
    121     CHECK(GetKind() == TokKind::kPrimitiveType);
    122     return token_state_.primitive_type_val;
    123   }
    124 
    125   typedef const char* LocTy;
    126 
    127   // Returns the location of the current token.
    128   LocTy GetLoc() const { return token_state_.token_start; }
    129 
    130   // Returns the line and column of a location in the buffer.
    131   std::pair<unsigned, unsigned> GetLineAndColumn(LocTy location) const;
    132 
    133   // Returns the whole line given the location.
    134   absl::string_view GetLine(LocTy loc) const;
    135 
    136   // Looks ahead one token and returns it. Lexer state is unchanged.
    137   TokKind LookAhead();
    138 
    139  private:
    140   // Returns the current character. If it's neither the end of input buffer nor
    141   // an invalid character, moves the pointer forward.
    142   int GetNextChar();
    143 
    144   // Returns the current character.
    145   int PeekCurrentChar() const;
    146 
    147   // Creates StringPiece with the given begin and end. Exits if the begin > end,
    148   // or it's out of the range of the current buffer.
    149   absl::string_view StringPieceFromPointers(const char* begin,
    150                                             const char* end) const;
    151   tensorflow::RegexpStringPiece RegexpStringPieceFromPointers(
    152       const char* begin, const char* end) const;
    153 
    154   // Returns true if the given ptr is dereferenceable within the range of the
    155   // current buffer.
    156   bool CanDereference(const char* ptr) const;
    157 
    158   TokKind LexToken();
    159 
    160   TokKind LexIdentifier();
    161   TokKind LexPercent();
    162   TokKind LexShape();
    163   TokKind LexConstant();
    164   TokKind LexNumberOrPattern();
    165   TokKind LexString();
    166 
    167   const absl::string_view buf_;
    168   const char* current_ptr_;
    169 
    170   // Information about the current token.
    171   struct TokenState {
    172     const char* token_start = nullptr;
    173     TokKind current_kind;
    174     string str_val;
    175     int64 int64_val;
    176     double decimal_val;
    177     PrimitiveType primitive_type_val;
    178   };
    179   TokenState token_state_;
    180 
    181   struct LineNoCacheTy {
    182     const char* last_query;
    183     unsigned line_no_of_query;
    184   };
    185   // This caches the line number of the previous query.
    186   mutable LineNoCacheTy line_no_cache_{nullptr, 0};
    187 };
    188 
    189 }  // namespace xla
    190 
    191 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_LEXER_H_
    192