Home | History | Annotate | Download | only in win
      1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_
      6 #define ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_
      7 
      8 #include <tchar.h>
      9 #include <windows.h>
     10 
     11 #include "encodings/compact_lang_det/win/cld_scopedptr.h"
     12 
     13 
     14 #if (WINVER < 0x0600)
     15 // Copied from winnls.h, we're not using the latest SDK yet.
     16 typedef enum _NORM_FORM {
     17   NormalizationOther  = 0,
     18   NormalizationC = 0x1,
     19   NormalizationD = 0x2,
     20   NormalizationKC = 0x5,
     21   NormalizationKD = 0x6
     22 } NORM_FORM;
     23 #endif
     24 
     25 
     26 // Gives you back a normalized version of the input text.  Normalization is
     27 // performed to the specified form.
     28 // Instance lifetime should be within the lifetime span of the 'text'.
     29 class NormalizedUnicodeText {
     30  public:
     31   // Creates an empty instance of NormalizedUnicodeText.
     32   NormalizedUnicodeText();
     33 
     34   // Creates a fully initialized instance of NormalizedUnicodeText.
     35   // [in] normalization_form - normalization rule set (see MSDN for details).
     36   // [in] text - zero-terminated UTF-16 encoded string.
     37   // Returns 0 in case of success, Win32 error code in case of failure.
     38   //     In case of failure, get() returns the original text.
     39   DWORD Normalize(NORM_FORM normalization_form, const WCHAR* text);
     40 
     41   // Returns pointer to the normalized text.
     42   const WCHAR* get() const { return normalized_text_; }
     43 
     44  private:
     45   // Normalizes 'text' by the 'normalization_form' rules.
     46   // [in] normalization_form - normalization rule set (see MSDN for details).
     47   // [in] text - zero-terminated UTF-16 encoded string.
     48   // [out] error_code - Win32 error code.
     49   const WCHAR* TryToNormalizeText(NORM_FORM normalization_form,
     50                                   const WCHAR* text, DWORD *error_code);
     51 
     52   // Pointer to the normalized text.
     53   const WCHAR* normalized_text_;
     54   // When the source text is already normalized by the requested normalization
     55   // form, text_ is not used and normalized_text_ just points to the source
     56   // text. When the source text requres normalization, text_ contains normalized
     57   // version of the source text and normalized_text_ points to this buffer.
     58   // Since CLD requires NormalizationC form and the overwhelming majority of all
     59   // texts in the Internet is already normalized to this form, it's expected
     60   // that this class will not introduce any runtime memory overhead.
     61   scoped_array<WCHAR> text_;
     62 
     63   DISALLOW_COPY_AND_ASSIGN(NormalizedUnicodeText);
     64 };
     65 
     66 
     67 #endif  // ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_
     68