Home | History | Annotate | Download | only in Sema
      1 //===--- TypoCorrection.h - Class for typo correction results ---*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the TypoCorrection class, which stores the results of
     11 // Sema's typo correction (Sema::CorrectTypo).
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #ifndef LLVM_CLANG_SEMA_TYPOCORRECTION_H
     16 #define LLVM_CLANG_SEMA_TYPOCORRECTION_H
     17 
     18 #include "clang/AST/DeclCXX.h"
     19 #include "llvm/ADT/SmallVector.h"
     20 
     21 namespace clang {
     22 
     23 /// @brief Simple class containing the result of Sema::CorrectTypo
     24 class TypoCorrection {
     25 public:
     26   // "Distance" for unusable corrections
     27   static const unsigned InvalidDistance = ~0U;
     28   // The largest distance still considered valid (larger edit distances are
     29   // mapped to InvalidDistance by getEditDistance).
     30   static const unsigned MaximumDistance = 10000U;
     31 
     32   // Relative weightings of the "edit distance" components. The higher the
     33   // weight, the more of a penalty to fitness the component will give (higher
     34   // weights mean greater contribution to the total edit distance, with the
     35   // best correction candidates having the lowest edit distance).
     36   static const unsigned CharDistanceWeight = 100U;
     37   static const unsigned QualifierDistanceWeight = 110U;
     38   static const unsigned CallbackDistanceWeight = 150U;
     39 
     40   TypoCorrection(const DeclarationName &Name, NamedDecl *NameDecl,
     41                  NestedNameSpecifier *NNS=0, unsigned CharDistance=0,
     42                  unsigned QualifierDistance=0)
     43       : CorrectionName(Name), CorrectionNameSpec(NNS),
     44       CharDistance(CharDistance), QualifierDistance(QualifierDistance),
     45       CallbackDistance(0) {
     46     if (NameDecl)
     47       CorrectionDecls.push_back(NameDecl);
     48   }
     49 
     50   TypoCorrection(NamedDecl *Name, NestedNameSpecifier *NNS=0,
     51                  unsigned CharDistance=0)
     52       : CorrectionName(Name->getDeclName()), CorrectionNameSpec(NNS),
     53       CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0) {
     54     if (Name)
     55       CorrectionDecls.push_back(Name);
     56   }
     57 
     58   TypoCorrection(DeclarationName Name, NestedNameSpecifier *NNS=0,
     59                  unsigned CharDistance=0)
     60       : CorrectionName(Name), CorrectionNameSpec(NNS),
     61       CharDistance(CharDistance), QualifierDistance(0), CallbackDistance(0) {}
     62 
     63   TypoCorrection()
     64       : CorrectionNameSpec(0), CharDistance(0), QualifierDistance(0),
     65       CallbackDistance(0) {}
     66 
     67   /// \brief Gets the DeclarationName of the typo correction
     68   DeclarationName getCorrection() const { return CorrectionName; }
     69   IdentifierInfo* getCorrectionAsIdentifierInfo() const {
     70     return CorrectionName.getAsIdentifierInfo();
     71   }
     72 
     73   /// \brief Gets the NestedNameSpecifier needed to use the typo correction
     74   NestedNameSpecifier* getCorrectionSpecifier() const {
     75     return CorrectionNameSpec;
     76   }
     77   void setCorrectionSpecifier(NestedNameSpecifier* NNS) {
     78     CorrectionNameSpec = NNS;
     79   }
     80 
     81   void setQualifierDistance(unsigned ED) {
     82     QualifierDistance = ED;
     83   }
     84 
     85   void setCallbackDistance(unsigned ED) {
     86     CallbackDistance = ED;
     87   }
     88 
     89   // Convert the given weighted edit distance to a roughly equivalent number of
     90   // single-character edits (typically for comparison to the length of the
     91   // string being edited).
     92   static unsigned NormalizeEditDistance(unsigned ED) {
     93     if (ED > MaximumDistance)
     94       return InvalidDistance;
     95     return (ED + CharDistanceWeight / 2) / CharDistanceWeight;
     96   }
     97 
     98   /// \brief Gets the "edit distance" of the typo correction from the typo.
     99   /// If Normalized is true, scale the distance down by the CharDistanceWeight
    100   /// to return the edit distance in terms of single-character edits.
    101   unsigned getEditDistance(bool Normalized = true) const {
    102     if (CharDistance > MaximumDistance || QualifierDistance > MaximumDistance ||
    103         CallbackDistance > MaximumDistance)
    104       return InvalidDistance;
    105     unsigned ED =
    106         CharDistance * CharDistanceWeight +
    107         QualifierDistance * QualifierDistanceWeight +
    108         CallbackDistance * CallbackDistanceWeight;
    109     if (ED > MaximumDistance)
    110       return InvalidDistance;
    111     // Half the CharDistanceWeight is added to ED to simulate rounding since
    112     // integer division truncates the value (i.e. round-to-nearest-int instead
    113     // of round-to-zero).
    114     return Normalized ? NormalizeEditDistance(ED) : ED;
    115   }
    116 
    117   /// \brief Gets the pointer to the declaration of the typo correction
    118   NamedDecl* getCorrectionDecl() const {
    119     return hasCorrectionDecl() ? *(CorrectionDecls.begin()) : 0;
    120   }
    121   template <class DeclClass>
    122   DeclClass *getCorrectionDeclAs() const {
    123     return dyn_cast_or_null<DeclClass>(getCorrectionDecl());
    124   }
    125 
    126   /// \brief Clears the list of NamedDecls before adding the new one.
    127   void setCorrectionDecl(NamedDecl *CDecl) {
    128     CorrectionDecls.clear();
    129     addCorrectionDecl(CDecl);
    130   }
    131 
    132   /// \brief Add the given NamedDecl to the list of NamedDecls that are the
    133   /// declarations associated with the DeclarationName of this TypoCorrection
    134   void addCorrectionDecl(NamedDecl *CDecl);
    135 
    136   std::string getAsString(const LangOptions &LO) const;
    137   std::string getQuoted(const LangOptions &LO) const {
    138     return "'" + getAsString(LO) + "'";
    139   }
    140 
    141   /// \brief Returns whether this TypoCorrection has a non-empty DeclarationName
    142   operator bool() const { return bool(CorrectionName); }
    143 
    144   /// \brief Mark this TypoCorrection as being a keyword.
    145   /// Since addCorrectionDeclsand setCorrectionDecl don't allow NULL to be
    146   /// added to the list of the correction's NamedDecl pointers, NULL is added
    147   /// as the only element in the list to mark this TypoCorrection as a keyword.
    148   void makeKeyword() {
    149     CorrectionDecls.clear();
    150     CorrectionDecls.push_back(0);
    151   }
    152 
    153   // Check if this TypoCorrection is a keyword by checking if the first
    154   // item in CorrectionDecls is NULL.
    155   bool isKeyword() const {
    156     return !CorrectionDecls.empty() &&
    157         CorrectionDecls.front() == 0;
    158   }
    159 
    160   // Check if this TypoCorrection is the given keyword.
    161   template<std::size_t StrLen>
    162   bool isKeyword(const char (&Str)[StrLen]) const {
    163     return isKeyword() && getCorrectionAsIdentifierInfo()->isStr(Str);
    164   }
    165 
    166   // Returns true if the correction either is a keyword or has a known decl.
    167   bool isResolved() const { return !CorrectionDecls.empty(); }
    168 
    169   bool isOverloaded() const {
    170     return CorrectionDecls.size() > 1;
    171   }
    172 
    173   typedef llvm::SmallVector<NamedDecl*, 1>::iterator decl_iterator;
    174   decl_iterator begin() {
    175     return isKeyword() ? CorrectionDecls.end() : CorrectionDecls.begin();
    176   }
    177   decl_iterator end() { return CorrectionDecls.end(); }
    178   typedef llvm::SmallVector<NamedDecl*, 1>::const_iterator const_decl_iterator;
    179   const_decl_iterator begin() const {
    180     return isKeyword() ? CorrectionDecls.end() : CorrectionDecls.begin();
    181   }
    182   const_decl_iterator end() const { return CorrectionDecls.end(); }
    183 
    184 private:
    185   bool hasCorrectionDecl() const {
    186     return (!isKeyword() && !CorrectionDecls.empty());
    187   }
    188 
    189   // Results.
    190   DeclarationName CorrectionName;
    191   NestedNameSpecifier *CorrectionNameSpec;
    192   llvm::SmallVector<NamedDecl*, 1> CorrectionDecls;
    193   unsigned CharDistance;
    194   unsigned QualifierDistance;
    195   unsigned CallbackDistance;
    196 };
    197 
    198 /// @brief Base class for callback objects used by Sema::CorrectTypo to check
    199 /// the validity of a potential typo correction.
    200 class CorrectionCandidateCallback {
    201  public:
    202   static const unsigned InvalidDistance = TypoCorrection::InvalidDistance;
    203 
    204   CorrectionCandidateCallback()
    205       : WantTypeSpecifiers(true), WantExpressionKeywords(true),
    206         WantCXXNamedCasts(true), WantRemainingKeywords(true),
    207         WantObjCSuper(false),
    208         IsObjCIvarLookup(false) {}
    209 
    210   virtual ~CorrectionCandidateCallback() {}
    211 
    212   /// \brief Simple predicate used by the default RankCandidate to
    213   /// determine whether to return an edit distance of 0 or InvalidDistance.
    214   /// This can be overrided by validators that only need to determine if a
    215   /// candidate is viable, without ranking potentially viable candidates.
    216   /// Only ValidateCandidate or RankCandidate need to be overriden by a
    217   /// callback wishing to check the viability of correction candidates.
    218   virtual bool ValidateCandidate(const TypoCorrection &candidate) {
    219     return true;
    220   }
    221 
    222   /// \brief Method used by Sema::CorrectTypo to assign an "edit distance" rank
    223   /// to a candidate (where a lower value represents a better candidate), or
    224   /// returning InvalidDistance if the candidate is not at all viable. For
    225   /// validation callbacks that only need to determine if a candidate is viable,
    226   /// the default RankCandidate returns either 0 or InvalidDistance depending
    227   /// whether ValidateCandidate returns true or false.
    228   virtual unsigned RankCandidate(const TypoCorrection &candidate) {
    229     return ValidateCandidate(candidate) ? 0 : InvalidDistance;
    230   }
    231 
    232   // Flags for context-dependent keywords.
    233   // TODO: Expand these to apply to non-keywords or possibly remove them.
    234   bool WantTypeSpecifiers;
    235   bool WantExpressionKeywords;
    236   bool WantCXXNamedCasts;
    237   bool WantRemainingKeywords;
    238   bool WantObjCSuper;
    239   // Temporary hack for the one case where a CorrectTypoContext enum is used
    240   // when looking up results.
    241   bool IsObjCIvarLookup;
    242 };
    243 
    244 /// @brief Simple template class for restricting typo correction candidates
    245 /// to ones having a single Decl* of the given type.
    246 template <class C>
    247 class DeclFilterCCC : public CorrectionCandidateCallback {
    248  public:
    249   virtual bool ValidateCandidate(const TypoCorrection &candidate) {
    250     return candidate.getCorrectionDeclAs<C>();
    251   }
    252 };
    253 
    254 }
    255 
    256 #endif
    257