Home | History | Annotate | Download | only in win
      1 /*
      2  * Copyright (c) 2006, 2007, 2008, 2009, Google Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 // A wrapper around Uniscribe that provides a reasonable API.
     32 
     33 #ifndef UniscribeHelper_h
     34 #define UniscribeHelper_h
     35 
     36 #include <windows.h>
     37 #include <usp10.h>
     38 #include <map>
     39 
     40 #include <unicode/uchar.h>
     41 #include "wtf/Vector.h"
     42 
     43 class UniscribeTest_TooBig_Test; // A gunit test for UniscribeHelper.
     44 
     45 namespace WebCore {
     46 
     47 class FloatRect;
     48 class FontFeatureSettings;
     49 class FontPlatformData;
     50 class GraphicsContext;
     51 
     52 const unsigned cUniscribeHelperStackRuns = 8;
     53 const unsigned cUniscribeHelperStackChars = 32;
     54 const unsigned cUniscribeHelperFeatures = 4;
     55 
     56 // This object should be safe to create & destroy frequently, as long as the
     57 // caller preserves the script_cache when possible (this data may be slow to
     58 // compute).
     59 //
     60 // This object is "kind of large" (~1K) because it reserves a lot of space for
     61 // working with to avoid expensive heap operations. Therefore, not only should
     62 // you not worry about creating and destroying it, you should try to not keep
     63 // them around.
     64 class UniscribeHelper {
     65 public:
     66     // Initializes this Uniscribe run with the text pointed to by |run| with
     67     // |length|. The input is NOT null terminated.
     68     //
     69     // The is_rtl flag should be set if the input script is RTL. It is assumed
     70     // that the caller has already divided up the input text (using ICU, for
     71     // example) into runs of the same direction of script. This avoids
     72     // disagreements between the caller and Uniscribe later (see FillItems).
     73     //
     74     // A script cache should be provided by the caller that is initialized to
     75     // NULL. When the caller is done with the cache (it may be stored between
     76     // runs as long as it is used consistently with the same HFONT), it should
     77     // call ScriptFreeCache().
     78     UniscribeHelper(const UChar* input,
     79                     int inputLength,
     80                     bool isRtl,
     81                     HFONT,
     82                     SCRIPT_CACHE*,
     83                     SCRIPT_FONTPROPERTIES*,
     84                     WORD);
     85 
     86     virtual ~UniscribeHelper();
     87 
     88     // Sets Uniscribe's directional override flag. False by default.
     89     bool directionalOverride() const
     90     {
     91         return m_directionalOverride;
     92     }
     93     void setDirectionalOverride(bool override)
     94     {
     95         m_directionalOverride = override;
     96     }
     97 
     98     // Set's Uniscribe's no-ligate override flag. False by default.
     99     bool inhibitLigate() const
    100     {
    101         return m_inhibitLigate;
    102     }
    103     void setInhibitLigate(bool inhibit)
    104     {
    105         m_inhibitLigate = inhibit;
    106     }
    107 
    108     // Set letter spacing. We will try to insert this much space between
    109     // graphemes (one or more glyphs perceived as a single unit by ordinary
    110     // users of a script). Positive values increase letter spacing, negative
    111     // values decrease it. 0 by default.
    112     int letterSpacing() const
    113     {
    114         return m_letterSpacing;
    115     }
    116     void setLetterSpacing(int letterSpacing)
    117     {
    118         m_letterSpacing = letterSpacing;
    119     }
    120 
    121     // Set the width of a standard space character. We use this to normalize
    122     // space widths. Windows will make spaces after Hindi characters larger than
    123     // other spaces. A space_width of 0 means to use the default space width.
    124     //
    125     // Must be set before Init() is called.
    126     int spaceWidth() const
    127     {
    128         return m_spaceWidth;
    129     }
    130     void setSpaceWidth(int spaceWidth)
    131     {
    132         m_spaceWidth = spaceWidth;
    133     }
    134 
    135     // Set word spacing. We will try to insert this much extra space between
    136     // each word in the input (beyond whatever whitespace character separates
    137     // words). Positive values lead to increased letter spacing, negative values
    138     // decrease it. 0 by default.
    139     //
    140     // Must be set before Init() is called.
    141     int wordSpacing() const
    142     {
    143         return m_wordSpacing;
    144     }
    145     void setWordSpacing(int wordSpacing)
    146     {
    147         m_wordSpacing = wordSpacing;
    148     }
    149 
    150     void setAscent(int ascent)
    151     {
    152         m_ascent = ascent;
    153     }
    154 
    155     // When set to true, this class is used only to look up glyph
    156     // indices for a range of Unicode characters without glyph placement.
    157     // By default, it's false. This should be set to true when this
    158     // class is used for glyph index look-up for non-BMP characters
    159     // in GlyphPageNodeChromiumWin.cpp.
    160     void setDisableFontFallback(bool disableFontFallback)
    161     {
    162         m_disableFontFallback = true;
    163     }
    164 
    165     // Set TEXTRANGE_PROPERTIES structure which contains
    166     // OpenType feature records generated from FontFeatureSettings.
    167     void setRangeProperties(const FontFeatureSettings*);
    168 
    169     // You must call this after setting any options but before doing any
    170     // other calls like asking for widths or drawing.
    171     void init()
    172     {
    173         initWithOptionalLengthProtection(true);
    174     }
    175 
    176     // Returns the total width in pixels of the text run.
    177     int width() const;
    178 
    179     // Call to justify the text, with the amount of space that should be ADDED
    180     // to get the desired width that the column should be justified to.
    181     // Normally, spaces are inserted, but for Arabic there will be kashidas
    182     // (extra strokes) inserted instead.
    183     //
    184     // This function MUST be called AFTER Init().
    185     void justify(int additionalSpace);
    186 
    187     // Computes the given character offset into a pixel offset of the beginning
    188     // of that character.
    189     int characterToX(int offset) const;
    190 
    191     // Converts the given pixel X position into a logical character offset into
    192     // the run. For positions appearing before the first character, this will
    193     // return -1.
    194     int xToCharacter(int x) const;
    195 
    196     // Draws the given characters to (x, y) in the given DC. The font will be
    197     // handled by this function, but the font color and other attributes should
    198     // be pre-set.
    199     //
    200     // The y position is the upper left corner, NOT the baseline.
    201     void draw(GraphicsContext*, const FontPlatformData&, HDC,
    202         int x, int y, const FloatRect& textRect,
    203         int from, int to);
    204 
    205     // Returns the first glyph assigned to the character at the given offset.
    206     // This function is used to retrieve glyph information when Uniscribe is
    207     // being used to generate glyphs for non-complex, non-BMP (above U+FFFF)
    208     // characters. These characters are not otherwise special and have no
    209     // complex shaping rules, so we don't otherwise need Uniscribe, except
    210     // Uniscribe is the only way to get glyphs for non-BMP characters.
    211     //
    212     // Returns 0 if there is no glyph for the given character.
    213     WORD firstGlyphForCharacter(int charOffset) const;
    214 
    215 protected:
    216     // Backend for init. The flag allows the unit test to specify whether we
    217     // should fail early for very long strings like normal, or try to pass the
    218     // long string to Uniscribe. The latter provides a way to force failure of
    219     // shaping.
    220     void initWithOptionalLengthProtection(bool lengthProtection);
    221 
    222     // Tries to preload the font when the it is not accessible.
    223     // This is the default implementation and it does not do anything.
    224     virtual void tryToPreloadFont(HFONT) {}
    225 
    226     // Let our subclasses provide the input lazily in case they can't compute
    227     // it in their constructors. Once we have input, however, we don't let
    228     // our subclasses change it.
    229     void setInput(const UChar* input) { ASSERT(!m_input); m_input = input; }
    230 
    231 private:
    232     friend class UniscribeTest_TooBig_Test;
    233 
    234     // An array corresponding to each item in runs_ containing information
    235     // on each of the glyphs that were generated. Like runs_, this is in
    236     // reading order. However, for rtl text, the characters within each
    237     // item will be reversed.
    238     struct Shaping {
    239         Shaping()
    240             : m_prePadding(0)
    241             , m_hfont(NULL)
    242             , m_scriptCache(NULL)
    243             , m_ascentOffset(0)
    244             , m_spaceGlyph(0)
    245         {
    246             m_abc.abcA = 0;
    247             m_abc.abcB = 0;
    248             m_abc.abcC = 0;
    249         }
    250 
    251         // Returns the number of glyphs (which will be drawn to the screen)
    252         // in this run.
    253         int glyphLength() const
    254         {
    255             return static_cast<int>(m_glyphs.size());
    256         }
    257 
    258         // Returns the number of characters (that we started with) in this run.
    259         int charLength() const
    260         {
    261             return static_cast<int>(m_logs.size());
    262         }
    263 
    264         // Returns the advance array that should be used when measuring glyphs.
    265         // The returned pointer will indicate an array with glyph_length()
    266         // elements and the advance that should be used for each one. This is
    267         // either the real advance, or the justified advances if there is one,
    268         // and is the array we want to use for measurement.
    269         const int* effectiveAdvances() const
    270         {
    271             if (m_advance.size() == 0)
    272                 return 0;
    273             if (m_justify.size() == 0)
    274                 return &m_advance[0];
    275             return &m_justify[0];
    276         }
    277 
    278         // This is the advance amount of space that we have added to the
    279         // beginning of the run. It is like the ABC's |A| advance but one that
    280         // we create and must handle internally whenever computing with pixel
    281         // offsets.
    282         int m_prePadding;
    283 
    284         // Glyph indices in the font used to display this item. These indices
    285         // are in screen order.
    286         Vector<WORD, cUniscribeHelperStackChars> m_glyphs;
    287 
    288         // For each input character, this tells us the first glyph index it
    289         // generated. This is the only array with size of the input chars.
    290         //
    291         // All offsets are from the beginning of this run. Multiple characters
    292         // can generate one glyph, in which case there will be adjacent
    293         // duplicates in this list. One character can also generate multiple
    294         // glyphs, in which case there will be skipped indices in this list.
    295         Vector<WORD, cUniscribeHelperStackChars> m_logs;
    296 
    297         // Flags and such for each glyph.
    298         Vector<SCRIPT_VISATTR, cUniscribeHelperStackChars> m_visualAttributes;
    299 
    300         // Horizontal advances for each glyph listed above, this is basically
    301         // how wide each glyph is.
    302         Vector<int, cUniscribeHelperStackChars> m_advance;
    303 
    304         // This contains glyph offsets, from the nominal position of a glyph.
    305         // It is used to adjust the positions of multiple combining characters
    306         // around/above/below base characters in a context-sensitive manner so
    307         // that they don't bump against each other and the base character.
    308         Vector<GOFFSET, cUniscribeHelperStackChars> m_offsets;
    309 
    310         // Filled by a call to Justify, this is empty for nonjustified text.
    311         // If nonempty, this contains the array of justify characters for each
    312         // character as returned by ScriptJustify.
    313         //
    314         // This is the same as the advance array, but with extra space added
    315         // for some characters. The difference between a glyph's |justify|
    316         // width and it's |advance| width is the extra space added.
    317         Vector<int, cUniscribeHelperStackChars> m_justify;
    318 
    319         // Sizing information for this run. This treats the entire run as a
    320         // character with a preceeding advance, width, and ending advance.  The
    321         // B width is the sum of the |advance| array, and the A and C widths
    322         // are any extra spacing applied to each end.
    323         //
    324         // It is unclear from the documentation what this actually means. From
    325         // experimentation, it seems that the sum of the character advances is
    326         // always the sum of the ABC values, and I'm not sure what you're
    327         // supposed to do with the ABC values.
    328         ABC m_abc;
    329 
    330         // Pointers to windows font data used to render this run.
    331         HFONT m_hfont;
    332         SCRIPT_CACHE* m_scriptCache;
    333 
    334         // Ascent offset between the ascent of the primary font
    335         // and that of the fallback font. The offset needs to be applied,
    336         // when drawing a string, to align multiple runs rendered with
    337         // different fonts.
    338         int m_ascentOffset;
    339 
    340         WORD m_spaceGlyph;
    341     };
    342 
    343     // Computes the runs_ array from the text run.
    344     void fillRuns();
    345 
    346     // Computes the shapes_ array given an runs_ array already filled in.
    347     void fillShapes();
    348 
    349     // Fills in the screen_order_ array (see below).
    350     void fillScreenOrder();
    351 
    352     // Called to update the glyph positions based on the current spacing
    353     // options that are set.
    354     void applySpacing();
    355 
    356     // Normalizes all advances for spaces to the same width. This keeps windows
    357     // from making spaces after Hindi characters larger, which is then
    358     // inconsistent with our meaure of the width since WebKit doesn't include
    359     // spaces in text-runs sent to uniscribe unless white-space:pre.
    360     void adjustSpaceAdvances();
    361 
    362     // Returns the total width of a single item.
    363     int advanceForItem(int) const;
    364 
    365     bool containsMissingGlyphs(const Shaping&,
    366                                const SCRIPT_ITEM&,
    367                                const SCRIPT_FONTPROPERTIES*) const;
    368 
    369     // Shapes a run (pointed to by |input|) using |hfont| first.
    370     // Tries a series of fonts specified retrieved with NextWinFontData
    371     // and finally a font covering characters in |*input|. A string pointed
    372     // by |input| comes from ScriptItemize and is supposed to contain
    373     // characters belonging to a single script aside from characters common to
    374     // all scripts (e.g. space).
    375     bool shape(const UChar* input, int itemLength, int numGlyphs, SCRIPT_ITEM& run, OPENTYPE_TAG, Shaping&);
    376 
    377     // Gets Windows font data for the next best font to try in the list
    378     // of fonts. When there's no more font available, returns false
    379     // without touching any of out params. Need to call ResetFontIndex
    380     // to start scanning of the font list from the beginning.
    381     virtual bool nextWinFontData(HFONT&, SCRIPT_CACHE*&, SCRIPT_FONTPROPERTIES*&, int&, WORD&)
    382     {
    383         return false;
    384     }
    385 
    386     // Resets the font index to the first in the list of fonts to try after the
    387     // primaryFont turns out not to work. With fontIndex reset,
    388     // NextWinFontData scans fallback fonts from the beginning.
    389     virtual void resetFontIndex() {}
    390 
    391     // If m_cachedDC is 0, creates one that is compatible with the screen DC.
    392     void EnsureCachedDCCreated();
    393 
    394     // The input data for this run of Uniscribe. See the constructor.
    395     const UChar* m_input;
    396     const int m_inputLength;
    397     const bool m_isRtl;
    398 
    399     // Windows font data for the primary font. In a sense, m_logfont and m_style
    400     // are redundant because m_hfont contains all the information. However,
    401     // invoking GetObject, everytime we need the height and the style, is rather
    402     // expensive so that we cache them. Would it be better to add getter and
    403     // (virtual) setter for the height and the style of the primary font,
    404     // instead of m_logfont? Then, a derived class ctor can set m_ascent,
    405     // m_height and m_style if they're known. Getters for them would have to
    406     // 'infer' their values from m_hfont ONLY when they're not set.
    407     HFONT m_hfont;
    408     // We cache the DC to use with ScriptShape/ScriptPlace.
    409     static HDC m_cachedDC;
    410     SCRIPT_CACHE* m_scriptCache;
    411     SCRIPT_FONTPROPERTIES* m_fontProperties;
    412     int m_ascent;
    413     LOGFONT m_logfont;
    414     int m_style;
    415     WORD m_spaceGlyph;
    416 
    417     // Options, see the getters/setters above.
    418     bool m_directionalOverride;
    419     bool m_inhibitLigate;
    420     int m_letterSpacing;
    421     int m_spaceWidth;
    422     int m_wordSpacing;
    423     bool m_disableFontFallback;
    424 
    425     // Uniscribe breaks the text into Runs. These are one length of text that is
    426     // in one script and one direction. This array is in reading order.
    427     Vector<SCRIPT_ITEM, cUniscribeHelperStackRuns> m_runs;
    428 
    429     Vector<Shaping, cUniscribeHelperStackRuns> m_shapes;
    430     Vector<OPENTYPE_TAG, cUniscribeHelperStackRuns> m_scriptTags;
    431 
    432     // This is a mapping between reading order and screen order for the items.
    433     // Uniscribe's items array are in reading order. For right-to-left text,
    434     // or mixed (although WebKit's |TextRun| should really be only one
    435     // direction), this makes it very difficult to compute character offsets
    436     // and positions. This list is in screen order from left to right, and
    437     // gives the index into the |m_runs| and |m_shapes| arrays of each
    438     // subsequent item.
    439     Vector<int, cUniscribeHelperStackRuns> m_screenOrder;
    440 
    441     // This contains Uniscribe's OpenType feature settings. This structure
    442     // is filled by using WebKit's |FontFeatureSettings|.
    443     TEXTRANGE_PROPERTIES m_rangeProperties;
    444     Vector<OPENTYPE_FEATURE_RECORD, cUniscribeHelperFeatures> m_featureRecords;
    445 };
    446 
    447 }  // namespace WebCore
    448 
    449 #endif  // UniscribeHelper_h
    450