1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_ 6 #define ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_ 7 8 #include <tchar.h> 9 #include <windows.h> 10 11 #include "encodings/compact_lang_det/win/cld_scopedptr.h" 12 13 14 #if (WINVER < 0x0600) 15 // Copied from winnls.h, we're not using the latest SDK yet. 16 typedef enum _NORM_FORM { 17 NormalizationOther = 0, 18 NormalizationC = 0x1, 19 NormalizationD = 0x2, 20 NormalizationKC = 0x5, 21 NormalizationKD = 0x6 22 } NORM_FORM; 23 #endif 24 25 26 // Gives you back a normalized version of the input text. Normalization is 27 // performed to the specified form. 28 // Instance lifetime should be within the lifetime span of the 'text'. 29 class NormalizedUnicodeText { 30 public: 31 // Creates an empty instance of NormalizedUnicodeText. 32 NormalizedUnicodeText(); 33 34 // Creates a fully initialized instance of NormalizedUnicodeText. 35 // [in] normalization_form - normalization rule set (see MSDN for details). 36 // [in] text - zero-terminated UTF-16 encoded string. 37 // Returns 0 in case of success, Win32 error code in case of failure. 38 // In case of failure, get() returns the original text. 39 DWORD Normalize(NORM_FORM normalization_form, const WCHAR* text); 40 41 // Returns pointer to the normalized text. 42 const WCHAR* get() const { return normalized_text_; } 43 44 private: 45 // Normalizes 'text' by the 'normalization_form' rules. 46 // [in] normalization_form - normalization rule set (see MSDN for details). 47 // [in] text - zero-terminated UTF-16 encoded string. 48 // [out] error_code - Win32 error code. 49 const WCHAR* TryToNormalizeText(NORM_FORM normalization_form, 50 const WCHAR* text, DWORD *error_code); 51 52 // Pointer to the normalized text. 53 const WCHAR* normalized_text_; 54 // When the source text is already normalized by the requested normalization 55 // form, text_ is not used and normalized_text_ just points to the source 56 // text. When the source text requres normalization, text_ contains normalized 57 // version of the source text and normalized_text_ points to this buffer. 58 // Since CLD requires NormalizationC form and the overwhelming majority of all 59 // texts in the Internet is already normalized to this form, it's expected 60 // that this class will not introduce any runtime memory overhead. 61 scoped_array<WCHAR> text_; 62 63 DISALLOW_COPY_AND_ASSIGN(NormalizedUnicodeText); 64 }; 65 66 67 #endif // ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_ 68