Home | History | Annotate | Download | only in Sema
      1 //===--- TypoCorrection.h - Class for typo correction results ---*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the TypoCorrection class, which stores the results of
     11 // Sema's typo correction (Sema::CorrectTypo).
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #ifndef LLVM_CLANG_SEMA_TYPOCORRECTION_H
     16 #define LLVM_CLANG_SEMA_TYPOCORRECTION_H
     17 
     18 #include "clang/AST/DeclCXX.h"
     19 #include "clang/Sema/DeclSpec.h"
     20 #include "llvm/ADT/SmallVector.h"
     21 
     22 namespace clang {
     23 
     24 /// @brief Simple class containing the result of Sema::CorrectTypo
     25 class TypoCorrection {
     26 public:
     27   // "Distance" for unusable corrections
     28   static const unsigned InvalidDistance = ~0U;
     29   // The largest distance still considered valid (larger edit distances are
     30   // mapped to InvalidDistance by getEditDistance).
     31   static const unsigned MaximumDistance = 10000U;
     32 
     33   // Relative weightings of the "edit distance" components. The higher the
     34   // weight, the more of a penalty to fitness the component will give (higher
     35   // weights mean greater contribution to the total edit distance, with the
     36   // best correction candidates having the lowest edit distance).
     37   static const unsigned CharDistanceWeight = 100U;
     38   static const unsigned QualifierDistanceWeight = 110U;
     39   static const unsigned CallbackDistanceWeight = 150U;
     40 
     41   TypoCorrection(const DeclarationName &Name, NamedDecl *NameDecl,
     42                  NestedNameSpecifier *NNS=0, unsigned CharDistance=0,
     43                  unsigned QualifierDistance=0)
     44       : CorrectionName(Name), CorrectionNameSpec(NNS),
     45       CharDistance(CharDistance), QualifierDistance(QualifierDistance),
     46       CallbackDistance(0) {
     47     if (NameDecl)
     48       CorrectionDecls.push_back(NameDecl);
     49   }
     50 
     51   TypoCorrection(NamedDecl *Name, NestedNameSpecifier *NNS=0,
     52                  unsigned CharDistance=0)
     53       : CorrectionName(Name->getDeclName()), CorrectionNameSpec(NNS),
     54       CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0) {
     55     if (Name)
     56       CorrectionDecls.push_back(Name);
     57   }
     58 
     59   TypoCorrection(DeclarationName Name, NestedNameSpecifier *NNS=0,
     60                  unsigned CharDistance=0)
     61       : CorrectionName(Name), CorrectionNameSpec(NNS),
     62       CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0) {}
     63 
     64   TypoCorrection()
     65       : CorrectionNameSpec(0), CharDistance(0), QualifierDistance(0),
     66       CallbackDistance(0) {}
     67 
     68   /// \brief Gets the DeclarationName of the typo correction
     69   DeclarationName getCorrection() const { return CorrectionName; }
     70   IdentifierInfo* getCorrectionAsIdentifierInfo() const {
     71     return CorrectionName.getAsIdentifierInfo();
     72   }
     73 
     74   /// \brief Gets the NestedNameSpecifier needed to use the typo correction
     75   NestedNameSpecifier* getCorrectionSpecifier() const {
     76     return CorrectionNameSpec;
     77   }
     78   void setCorrectionSpecifier(NestedNameSpecifier* NNS) {
     79     CorrectionNameSpec = NNS;
     80   }
     81 
     82   void setQualifierDistance(unsigned ED) {
     83     QualifierDistance = ED;
     84   }
     85 
     86   void setCallbackDistance(unsigned ED) {
     87     CallbackDistance = ED;
     88   }
     89 
     90   // Convert the given weighted edit distance to a roughly equivalent number of
     91   // single-character edits (typically for comparison to the length of the
     92   // string being edited).
     93   static unsigned NormalizeEditDistance(unsigned ED) {
     94     if (ED > MaximumDistance)
     95       return InvalidDistance;
     96     return (ED + CharDistanceWeight / 2) / CharDistanceWeight;
     97   }
     98 
     99   /// \brief Gets the "edit distance" of the typo correction from the typo.
    100   /// If Normalized is true, scale the distance down by the CharDistanceWeight
    101   /// to return the edit distance in terms of single-character edits.
    102   unsigned getEditDistance(bool Normalized = true) const {
    103     if (CharDistance > MaximumDistance || QualifierDistance > MaximumDistance ||
    104         CallbackDistance > MaximumDistance)
    105       return InvalidDistance;
    106     unsigned ED =
    107         CharDistance * CharDistanceWeight +
    108         QualifierDistance * QualifierDistanceWeight +
    109         CallbackDistance * CallbackDistanceWeight;
    110     if (ED > MaximumDistance)
    111       return InvalidDistance;
    112     // Half the CharDistanceWeight is added to ED to simulate rounding since
    113     // integer division truncates the value (i.e. round-to-nearest-int instead
    114     // of round-to-zero).
    115     return Normalized ? NormalizeEditDistance(ED) : ED;
    116   }
    117 
    118   /// \brief Gets the pointer to the declaration of the typo correction
    119   NamedDecl* getCorrectionDecl() const {
    120     return hasCorrectionDecl() ? *(CorrectionDecls.begin()) : 0;
    121   }
    122   template <class DeclClass>
    123   DeclClass *getCorrectionDeclAs() const {
    124     return dyn_cast_or_null<DeclClass>(getCorrectionDecl());
    125   }
    126 
    127   /// \brief Clears the list of NamedDecls before adding the new one.
    128   void setCorrectionDecl(NamedDecl *CDecl) {
    129     CorrectionDecls.clear();
    130     addCorrectionDecl(CDecl);
    131   }
    132 
    133   /// \brief Add the given NamedDecl to the list of NamedDecls that are the
    134   /// declarations associated with the DeclarationName of this TypoCorrection
    135   void addCorrectionDecl(NamedDecl *CDecl);
    136 
    137   std::string getAsString(const LangOptions &LO) const;
    138   std::string getQuoted(const LangOptions &LO) const {
    139     return "'" + getAsString(LO) + "'";
    140   }
    141 
    142   /// \brief Returns whether this TypoCorrection has a non-empty DeclarationName
    143   operator bool() const { return bool(CorrectionName); }
    144 
    145   /// \brief Mark this TypoCorrection as being a keyword.
    146   /// Since addCorrectionDeclsand setCorrectionDecl don't allow NULL to be
    147   /// added to the list of the correction's NamedDecl pointers, NULL is added
    148   /// as the only element in the list to mark this TypoCorrection as a keyword.
    149   void makeKeyword() {
    150     CorrectionDecls.clear();
    151     CorrectionDecls.push_back(0);
    152   }
    153 
    154   // Check if this TypoCorrection is a keyword by checking if the first
    155   // item in CorrectionDecls is NULL.
    156   bool isKeyword() const {
    157     return !CorrectionDecls.empty() &&
    158         CorrectionDecls.front() == 0;
    159   }
    160 
    161   // Check if this TypoCorrection is the given keyword.
    162   template<std::size_t StrLen>
    163   bool isKeyword(const char (&Str)[StrLen]) const {
    164     return isKeyword() && getCorrectionAsIdentifierInfo()->isStr(Str);
    165   }
    166 
    167   // Returns true if the correction either is a keyword or has a known decl.
    168   bool isResolved() const { return !CorrectionDecls.empty(); }
    169 
    170   bool isOverloaded() const {
    171     return CorrectionDecls.size() > 1;
    172   }
    173 
    174   void setCorrectionRange(CXXScopeSpec* SS,
    175                           const DeclarationNameInfo &TypoName) {
    176     CorrectionRange.setBegin(CorrectionNameSpec && SS ? SS->getBeginLoc()
    177                                                       : TypoName.getLoc());
    178     CorrectionRange.setEnd(TypoName.getLoc());
    179   }
    180 
    181   SourceRange getCorrectionRange() const {
    182     return CorrectionRange;
    183   }
    184 
    185   typedef SmallVector<NamedDecl *, 1>::iterator decl_iterator;
    186   decl_iterator begin() {
    187     return isKeyword() ? CorrectionDecls.end() : CorrectionDecls.begin();
    188   }
    189   decl_iterator end() { return CorrectionDecls.end(); }
    190   typedef SmallVector<NamedDecl *, 1>::const_iterator const_decl_iterator;
    191   const_decl_iterator begin() const {
    192     return isKeyword() ? CorrectionDecls.end() : CorrectionDecls.begin();
    193   }
    194   const_decl_iterator end() const { return CorrectionDecls.end(); }
    195 
    196 private:
    197   bool hasCorrectionDecl() const {
    198     return (!isKeyword() && !CorrectionDecls.empty());
    199   }
    200 
    201   // Results.
    202   DeclarationName CorrectionName;
    203   NestedNameSpecifier *CorrectionNameSpec;
    204   SmallVector<NamedDecl *, 1> CorrectionDecls;
    205   unsigned CharDistance;
    206   unsigned QualifierDistance;
    207   unsigned CallbackDistance;
    208   SourceRange CorrectionRange;
    209 };
    210 
    211 /// @brief Base class for callback objects used by Sema::CorrectTypo to check
    212 /// the validity of a potential typo correction.
    213 class CorrectionCandidateCallback {
    214  public:
    215   static const unsigned InvalidDistance = TypoCorrection::InvalidDistance;
    216 
    217   CorrectionCandidateCallback()
    218       : WantTypeSpecifiers(true), WantExpressionKeywords(true),
    219         WantCXXNamedCasts(true), WantRemainingKeywords(true),
    220         WantObjCSuper(false),
    221         IsObjCIvarLookup(false) {}
    222 
    223   virtual ~CorrectionCandidateCallback() {}
    224 
    225   /// \brief Simple predicate used by the default RankCandidate to
    226   /// determine whether to return an edit distance of 0 or InvalidDistance.
    227   /// This can be overrided by validators that only need to determine if a
    228   /// candidate is viable, without ranking potentially viable candidates.
    229   /// Only ValidateCandidate or RankCandidate need to be overriden by a
    230   /// callback wishing to check the viability of correction candidates.
    231   virtual bool ValidateCandidate(const TypoCorrection &candidate) {
    232     return true;
    233   }
    234 
    235   /// \brief Method used by Sema::CorrectTypo to assign an "edit distance" rank
    236   /// to a candidate (where a lower value represents a better candidate), or
    237   /// returning InvalidDistance if the candidate is not at all viable. For
    238   /// validation callbacks that only need to determine if a candidate is viable,
    239   /// the default RankCandidate returns either 0 or InvalidDistance depending
    240   /// whether ValidateCandidate returns true or false.
    241   virtual unsigned RankCandidate(const TypoCorrection &candidate) {
    242     return ValidateCandidate(candidate) ? 0 : InvalidDistance;
    243   }
    244 
    245   // Flags for context-dependent keywords.
    246   // TODO: Expand these to apply to non-keywords or possibly remove them.
    247   bool WantTypeSpecifiers;
    248   bool WantExpressionKeywords;
    249   bool WantCXXNamedCasts;
    250   bool WantRemainingKeywords;
    251   bool WantObjCSuper;
    252   // Temporary hack for the one case where a CorrectTypoContext enum is used
    253   // when looking up results.
    254   bool IsObjCIvarLookup;
    255 };
    256 
    257 /// @brief Simple template class for restricting typo correction candidates
    258 /// to ones having a single Decl* of the given type.
    259 template <class C>
    260 class DeclFilterCCC : public CorrectionCandidateCallback {
    261  public:
    262   virtual bool ValidateCandidate(const TypoCorrection &candidate) {
    263     return candidate.getCorrectionDeclAs<C>();
    264   }
    265 };
    266 
    267 }
    268 
    269 #endif
    270