Home | History | Annotate | Download | only in rendering
      1 /*
      2  * Copyright (C) 2005, 2007, 2010 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
     14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
     15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
     17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
     23  * THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #include "config.h"
     27 #include "break_lines.h"
     28 
     29 #include "CharacterNames.h"
     30 #include "TextBreakIterator.h"
     31 
     32 #if PLATFORM(MAC)
     33 #include <CoreServices/CoreServices.h>
     34 #endif
     35 
     36 namespace WebCore {
     37 
     38 static inline bool isBreakableSpace(UChar ch, bool treatNoBreakSpaceAsBreak)
     39 {
     40     switch (ch) {
     41         case ' ':
     42         case '\n':
     43         case '\t':
     44             return true;
     45         case noBreakSpace:
     46             return treatNoBreakSpaceAsBreak;
     47         default:
     48             return false;
     49     }
     50 }
     51 
     52 // This differs from the Unicode algorithm only in that Unicode does not break
     53 // between a question mark and a vertical line (U+007C).
     54 static const unsigned char internetExplorerLineBreaksAfterQuestionMarkTable[0x80] = {
     55     1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, // \t
     56     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     57     1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, // ! " ' ) , . /
     58     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, // : ; ?
     59     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     60     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, // ]
     61     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     62     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1  // }
     63 };
     64 
     65 static const size_t internetExplorerLineBreaksAfterQuestionMarkTableSize = sizeof(internetExplorerLineBreaksAfterQuestionMarkTable) / sizeof(*internetExplorerLineBreaksAfterQuestionMarkTable);
     66 
     67 static inline bool shouldBreakAfter(UChar ch, UChar nextCh)
     68 {
     69     switch (ch) {
     70         // For a question mark preceding a non-ASCII characters, defer to the Unicode algorithm by returning false.
     71         // For ASCII characters, use a lookup table for enhanced speed and for compatibility with Internet Explorer.
     72         case '?':
     73             return nextCh < internetExplorerLineBreaksAfterQuestionMarkTableSize && internetExplorerLineBreaksAfterQuestionMarkTable[nextCh];
     74         // Internet Explorer always allows breaking after a hyphen.
     75         case '-':
     76         case softHyphen:
     77         // FIXME: cases for ideographicComma and ideographicFullStop are a workaround for an issue in Unicode 5.0
     78         // which is likely to be resolved in Unicode 5.1 <http://bugs.webkit.org/show_bug.cgi?id=17411>.
     79         // We may want to remove or conditionalize this workaround at some point.
     80         case ideographicComma:
     81         case ideographicFullStop:
     82 #ifdef ANDROID_LAYOUT
     83         // as '/' is used in uri which is always long, we would like to break it
     84         case '/':
     85 #endif
     86             return true;
     87         default:
     88             return false;
     89     }
     90 }
     91 
     92 static inline bool needsLineBreakIterator(UChar ch)
     93 {
     94     return ch > 0x7F && ch != noBreakSpace;
     95 }
     96 
     97 #if PLATFORM(MAC) && defined(BUILDING_ON_TIGER)
     98 static inline TextBreakLocatorRef lineBreakLocator()
     99 {
    100     TextBreakLocatorRef locator = 0;
    101     UCCreateTextBreakLocator(0, 0, kUCTextBreakLineMask, &locator);
    102     return locator;
    103 }
    104 #endif
    105 
    106 int nextBreakablePosition(const UChar* str, int pos, int len, bool treatNoBreakSpaceAsBreak)
    107 {
    108 #if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER)
    109     TextBreakIterator* breakIterator = 0;
    110 #endif
    111     int nextBreak = -1;
    112 
    113     UChar lastCh = pos > 0 ? str[pos - 1] : 0;
    114     for (int i = pos; i < len; i++) {
    115         UChar ch = str[i];
    116 
    117         if (isBreakableSpace(ch, treatNoBreakSpaceAsBreak) || shouldBreakAfter(lastCh, ch))
    118             return i;
    119 
    120         if (needsLineBreakIterator(ch) || needsLineBreakIterator(lastCh)) {
    121             if (nextBreak < i && i) {
    122 #if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER)
    123                 if (!breakIterator)
    124                     breakIterator = lineBreakIterator(str, len);
    125                 if (breakIterator)
    126                     nextBreak = textBreakFollowing(breakIterator, i - 1);
    127 #else
    128                 static TextBreakLocatorRef breakLocator = lineBreakLocator();
    129                 if (breakLocator) {
    130                     UniCharArrayOffset nextUCBreak;
    131                     if (UCFindTextBreak(breakLocator, kUCTextBreakLineMask, 0, str, len, i, &nextUCBreak) == 0)
    132                         nextBreak = nextUCBreak;
    133                 }
    134 #endif
    135             }
    136             if (i == nextBreak && !isBreakableSpace(lastCh, treatNoBreakSpaceAsBreak))
    137                 return i;
    138         }
    139 
    140         lastCh = ch;
    141     }
    142 
    143     return len;
    144 }
    145 
    146 } // namespace WebCore
    147