1 /* 2 * Copyright (c) 2006, 2007, 2008, 2009, Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 // A wrapper around Uniscribe that provides a reasonable API. 32 33 #ifndef UniscribeHelper_h 34 #define UniscribeHelper_h 35 36 #include <windows.h> 37 #include <usp10.h> 38 #include <map> 39 40 #include <unicode/uchar.h> 41 #include "wtf/Vector.h" 42 43 class UniscribeTest_TooBig_Test; // A gunit test for UniscribeHelper. 44 45 namespace WebCore { 46 47 class FloatRect; 48 class FontFeatureSettings; 49 class FontPlatformData; 50 class GraphicsContext; 51 52 const unsigned cUniscribeHelperStackRuns = 8; 53 const unsigned cUniscribeHelperStackChars = 32; 54 const unsigned cUniscribeHelperFeatures = 4; 55 56 // This object should be safe to create & destroy frequently, as long as the 57 // caller preserves the script_cache when possible (this data may be slow to 58 // compute). 59 // 60 // This object is "kind of large" (~1K) because it reserves a lot of space for 61 // working with to avoid expensive heap operations. Therefore, not only should 62 // you not worry about creating and destroying it, you should try to not keep 63 // them around. 64 class UniscribeHelper { 65 public: 66 // Initializes this Uniscribe run with the text pointed to by |run| with 67 // |length|. The input is NOT null terminated. 68 // 69 // The is_rtl flag should be set if the input script is RTL. It is assumed 70 // that the caller has already divided up the input text (using ICU, for 71 // example) into runs of the same direction of script. This avoids 72 // disagreements between the caller and Uniscribe later (see FillItems). 73 // 74 // A script cache should be provided by the caller that is initialized to 75 // NULL. When the caller is done with the cache (it may be stored between 76 // runs as long as it is used consistently with the same HFONT), it should 77 // call ScriptFreeCache(). 78 UniscribeHelper(const UChar* input, 79 int inputLength, 80 bool isRtl, 81 HFONT, 82 SCRIPT_CACHE*, 83 SCRIPT_FONTPROPERTIES*, 84 WORD); 85 86 virtual ~UniscribeHelper(); 87 88 // Sets Uniscribe's directional override flag. False by default. 89 bool directionalOverride() const 90 { 91 return m_directionalOverride; 92 } 93 void setDirectionalOverride(bool override) 94 { 95 m_directionalOverride = override; 96 } 97 98 // Set's Uniscribe's no-ligate override flag. False by default. 99 bool inhibitLigate() const 100 { 101 return m_inhibitLigate; 102 } 103 void setInhibitLigate(bool inhibit) 104 { 105 m_inhibitLigate = inhibit; 106 } 107 108 // Set letter spacing. We will try to insert this much space between 109 // graphemes (one or more glyphs perceived as a single unit by ordinary 110 // users of a script). Positive values increase letter spacing, negative 111 // values decrease it. 0 by default. 112 int letterSpacing() const 113 { 114 return m_letterSpacing; 115 } 116 void setLetterSpacing(int letterSpacing) 117 { 118 m_letterSpacing = letterSpacing; 119 } 120 121 // Set the width of a standard space character. We use this to normalize 122 // space widths. Windows will make spaces after Hindi characters larger than 123 // other spaces. A space_width of 0 means to use the default space width. 124 // 125 // Must be set before Init() is called. 126 int spaceWidth() const 127 { 128 return m_spaceWidth; 129 } 130 void setSpaceWidth(int spaceWidth) 131 { 132 m_spaceWidth = spaceWidth; 133 } 134 135 // Set word spacing. We will try to insert this much extra space between 136 // each word in the input (beyond whatever whitespace character separates 137 // words). Positive values lead to increased letter spacing, negative values 138 // decrease it. 0 by default. 139 // 140 // Must be set before Init() is called. 141 int wordSpacing() const 142 { 143 return m_wordSpacing; 144 } 145 void setWordSpacing(int wordSpacing) 146 { 147 m_wordSpacing = wordSpacing; 148 } 149 150 void setAscent(int ascent) 151 { 152 m_ascent = ascent; 153 } 154 155 // When set to true, this class is used only to look up glyph 156 // indices for a range of Unicode characters without glyph placement. 157 // By default, it's false. This should be set to true when this 158 // class is used for glyph index look-up for non-BMP characters 159 // in GlyphPageNodeChromiumWin.cpp. 160 void setDisableFontFallback(bool disableFontFallback) 161 { 162 m_disableFontFallback = true; 163 } 164 165 // Set TEXTRANGE_PROPERTIES structure which contains 166 // OpenType feature records generated from FontFeatureSettings. 167 void setRangeProperties(const FontFeatureSettings*); 168 169 // You must call this after setting any options but before doing any 170 // other calls like asking for widths or drawing. 171 void init() 172 { 173 initWithOptionalLengthProtection(true); 174 } 175 176 // Returns the total width in pixels of the text run. 177 int width() const; 178 179 // Call to justify the text, with the amount of space that should be ADDED 180 // to get the desired width that the column should be justified to. 181 // Normally, spaces are inserted, but for Arabic there will be kashidas 182 // (extra strokes) inserted instead. 183 // 184 // This function MUST be called AFTER Init(). 185 void justify(int additionalSpace); 186 187 // Computes the given character offset into a pixel offset of the beginning 188 // of that character. 189 int characterToX(int offset) const; 190 191 // Converts the given pixel X position into a logical character offset into 192 // the run. For positions appearing before the first character, this will 193 // return -1. 194 int xToCharacter(int x) const; 195 196 // Draws the given characters to (x, y) in the given DC. The font will be 197 // handled by this function, but the font color and other attributes should 198 // be pre-set. 199 // 200 // The y position is the upper left corner, NOT the baseline. 201 void draw(GraphicsContext*, const FontPlatformData&, HDC, 202 int x, int y, const FloatRect& textRect, 203 int from, int to); 204 205 // Returns the first glyph assigned to the character at the given offset. 206 // This function is used to retrieve glyph information when Uniscribe is 207 // being used to generate glyphs for non-complex, non-BMP (above U+FFFF) 208 // characters. These characters are not otherwise special and have no 209 // complex shaping rules, so we don't otherwise need Uniscribe, except 210 // Uniscribe is the only way to get glyphs for non-BMP characters. 211 // 212 // Returns 0 if there is no glyph for the given character. 213 WORD firstGlyphForCharacter(int charOffset) const; 214 215 protected: 216 // Backend for init. The flag allows the unit test to specify whether we 217 // should fail early for very long strings like normal, or try to pass the 218 // long string to Uniscribe. The latter provides a way to force failure of 219 // shaping. 220 void initWithOptionalLengthProtection(bool lengthProtection); 221 222 // Tries to preload the font when the it is not accessible. 223 // This is the default implementation and it does not do anything. 224 virtual void tryToPreloadFont(HFONT) {} 225 226 // Let our subclasses provide the input lazily in case they can't compute 227 // it in their constructors. Once we have input, however, we don't let 228 // our subclasses change it. 229 void setInput(const UChar* input) { ASSERT(!m_input); m_input = input; } 230 231 private: 232 friend class UniscribeTest_TooBig_Test; 233 234 // An array corresponding to each item in runs_ containing information 235 // on each of the glyphs that were generated. Like runs_, this is in 236 // reading order. However, for rtl text, the characters within each 237 // item will be reversed. 238 struct Shaping { 239 Shaping() 240 : m_prePadding(0) 241 , m_hfont(NULL) 242 , m_scriptCache(NULL) 243 , m_ascentOffset(0) 244 , m_spaceGlyph(0) 245 { 246 m_abc.abcA = 0; 247 m_abc.abcB = 0; 248 m_abc.abcC = 0; 249 } 250 251 // Returns the number of glyphs (which will be drawn to the screen) 252 // in this run. 253 int glyphLength() const 254 { 255 return static_cast<int>(m_glyphs.size()); 256 } 257 258 // Returns the number of characters (that we started with) in this run. 259 int charLength() const 260 { 261 return static_cast<int>(m_logs.size()); 262 } 263 264 // Returns the advance array that should be used when measuring glyphs. 265 // The returned pointer will indicate an array with glyph_length() 266 // elements and the advance that should be used for each one. This is 267 // either the real advance, or the justified advances if there is one, 268 // and is the array we want to use for measurement. 269 const int* effectiveAdvances() const 270 { 271 if (m_advance.size() == 0) 272 return 0; 273 if (m_justify.size() == 0) 274 return &m_advance[0]; 275 return &m_justify[0]; 276 } 277 278 // This is the advance amount of space that we have added to the 279 // beginning of the run. It is like the ABC's |A| advance but one that 280 // we create and must handle internally whenever computing with pixel 281 // offsets. 282 int m_prePadding; 283 284 // Glyph indices in the font used to display this item. These indices 285 // are in screen order. 286 Vector<WORD, cUniscribeHelperStackChars> m_glyphs; 287 288 // For each input character, this tells us the first glyph index it 289 // generated. This is the only array with size of the input chars. 290 // 291 // All offsets are from the beginning of this run. Multiple characters 292 // can generate one glyph, in which case there will be adjacent 293 // duplicates in this list. One character can also generate multiple 294 // glyphs, in which case there will be skipped indices in this list. 295 Vector<WORD, cUniscribeHelperStackChars> m_logs; 296 297 // Flags and such for each glyph. 298 Vector<SCRIPT_VISATTR, cUniscribeHelperStackChars> m_visualAttributes; 299 300 // Horizontal advances for each glyph listed above, this is basically 301 // how wide each glyph is. 302 Vector<int, cUniscribeHelperStackChars> m_advance; 303 304 // This contains glyph offsets, from the nominal position of a glyph. 305 // It is used to adjust the positions of multiple combining characters 306 // around/above/below base characters in a context-sensitive manner so 307 // that they don't bump against each other and the base character. 308 Vector<GOFFSET, cUniscribeHelperStackChars> m_offsets; 309 310 // Filled by a call to Justify, this is empty for nonjustified text. 311 // If nonempty, this contains the array of justify characters for each 312 // character as returned by ScriptJustify. 313 // 314 // This is the same as the advance array, but with extra space added 315 // for some characters. The difference between a glyph's |justify| 316 // width and it's |advance| width is the extra space added. 317 Vector<int, cUniscribeHelperStackChars> m_justify; 318 319 // Sizing information for this run. This treats the entire run as a 320 // character with a preceeding advance, width, and ending advance. The 321 // B width is the sum of the |advance| array, and the A and C widths 322 // are any extra spacing applied to each end. 323 // 324 // It is unclear from the documentation what this actually means. From 325 // experimentation, it seems that the sum of the character advances is 326 // always the sum of the ABC values, and I'm not sure what you're 327 // supposed to do with the ABC values. 328 ABC m_abc; 329 330 // Pointers to windows font data used to render this run. 331 HFONT m_hfont; 332 SCRIPT_CACHE* m_scriptCache; 333 334 // Ascent offset between the ascent of the primary font 335 // and that of the fallback font. The offset needs to be applied, 336 // when drawing a string, to align multiple runs rendered with 337 // different fonts. 338 int m_ascentOffset; 339 340 WORD m_spaceGlyph; 341 }; 342 343 // Computes the runs_ array from the text run. 344 void fillRuns(); 345 346 // Computes the shapes_ array given an runs_ array already filled in. 347 void fillShapes(); 348 349 // Fills in the screen_order_ array (see below). 350 void fillScreenOrder(); 351 352 // Called to update the glyph positions based on the current spacing 353 // options that are set. 354 void applySpacing(); 355 356 // Normalizes all advances for spaces to the same width. This keeps windows 357 // from making spaces after Hindi characters larger, which is then 358 // inconsistent with our meaure of the width since WebKit doesn't include 359 // spaces in text-runs sent to uniscribe unless white-space:pre. 360 void adjustSpaceAdvances(); 361 362 // Returns the total width of a single item. 363 int advanceForItem(int) const; 364 365 bool containsMissingGlyphs(const Shaping&, 366 const SCRIPT_ITEM&, 367 const SCRIPT_FONTPROPERTIES*) const; 368 369 // Shapes a run (pointed to by |input|) using |hfont| first. 370 // Tries a series of fonts specified retrieved with NextWinFontData 371 // and finally a font covering characters in |*input|. A string pointed 372 // by |input| comes from ScriptItemize and is supposed to contain 373 // characters belonging to a single script aside from characters common to 374 // all scripts (e.g. space). 375 bool shape(const UChar* input, int itemLength, int numGlyphs, SCRIPT_ITEM& run, OPENTYPE_TAG, Shaping&); 376 377 // Gets Windows font data for the next best font to try in the list 378 // of fonts. When there's no more font available, returns false 379 // without touching any of out params. Need to call ResetFontIndex 380 // to start scanning of the font list from the beginning. 381 virtual bool nextWinFontData(HFONT&, SCRIPT_CACHE*&, SCRIPT_FONTPROPERTIES*&, int&, WORD&) 382 { 383 return false; 384 } 385 386 // Resets the font index to the first in the list of fonts to try after the 387 // primaryFont turns out not to work. With fontIndex reset, 388 // NextWinFontData scans fallback fonts from the beginning. 389 virtual void resetFontIndex() {} 390 391 // If m_cachedDC is 0, creates one that is compatible with the screen DC. 392 void EnsureCachedDCCreated(); 393 394 // The input data for this run of Uniscribe. See the constructor. 395 const UChar* m_input; 396 const int m_inputLength; 397 const bool m_isRtl; 398 399 // Windows font data for the primary font. In a sense, m_logfont and m_style 400 // are redundant because m_hfont contains all the information. However, 401 // invoking GetObject, everytime we need the height and the style, is rather 402 // expensive so that we cache them. Would it be better to add getter and 403 // (virtual) setter for the height and the style of the primary font, 404 // instead of m_logfont? Then, a derived class ctor can set m_ascent, 405 // m_height and m_style if they're known. Getters for them would have to 406 // 'infer' their values from m_hfont ONLY when they're not set. 407 HFONT m_hfont; 408 // We cache the DC to use with ScriptShape/ScriptPlace. 409 static HDC m_cachedDC; 410 SCRIPT_CACHE* m_scriptCache; 411 SCRIPT_FONTPROPERTIES* m_fontProperties; 412 int m_ascent; 413 LOGFONT m_logfont; 414 int m_style; 415 WORD m_spaceGlyph; 416 417 // Options, see the getters/setters above. 418 bool m_directionalOverride; 419 bool m_inhibitLigate; 420 int m_letterSpacing; 421 int m_spaceWidth; 422 int m_wordSpacing; 423 bool m_disableFontFallback; 424 425 // Uniscribe breaks the text into Runs. These are one length of text that is 426 // in one script and one direction. This array is in reading order. 427 Vector<SCRIPT_ITEM, cUniscribeHelperStackRuns> m_runs; 428 429 Vector<Shaping, cUniscribeHelperStackRuns> m_shapes; 430 Vector<OPENTYPE_TAG, cUniscribeHelperStackRuns> m_scriptTags; 431 432 // This is a mapping between reading order and screen order for the items. 433 // Uniscribe's items array are in reading order. For right-to-left text, 434 // or mixed (although WebKit's |TextRun| should really be only one 435 // direction), this makes it very difficult to compute character offsets 436 // and positions. This list is in screen order from left to right, and 437 // gives the index into the |m_runs| and |m_shapes| arrays of each 438 // subsequent item. 439 Vector<int, cUniscribeHelperStackRuns> m_screenOrder; 440 441 // This contains Uniscribe's OpenType feature settings. This structure 442 // is filled by using WebKit's |FontFeatureSettings|. 443 TEXTRANGE_PROPERTIES m_rangeProperties; 444 Vector<OPENTYPE_FEATURE_RECORD, cUniscribeHelperFeatures> m_featureRecords; 445 }; 446 447 } // namespace WebCore 448 449 #endif // UniscribeHelper_h 450