1 /* 2 * Copyright (C) 2005, 2007, 2010 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' 14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS 17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 23 * THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "config.h" 27 #include "break_lines.h" 28 29 #include "CharacterNames.h" 30 #include "TextBreakIterator.h" 31 32 #if PLATFORM(MAC) 33 #include <CoreServices/CoreServices.h> 34 #endif 35 36 namespace WebCore { 37 38 static inline bool isBreakableSpace(UChar ch, bool treatNoBreakSpaceAsBreak) 39 { 40 switch (ch) { 41 case ' ': 42 case '\n': 43 case '\t': 44 return true; 45 case noBreakSpace: 46 return treatNoBreakSpaceAsBreak; 47 default: 48 return false; 49 } 50 } 51 52 // This differs from the Unicode algorithm only in that Unicode does not break 53 // between a question mark and a vertical line (U+007C). 54 static const unsigned char internetExplorerLineBreaksAfterQuestionMarkTable[0x80] = { 55 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, // \t 56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 57 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, // ! " ' ) , . / 58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, // : ; ? 59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, // ] 61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1 // } 63 }; 64 65 static const size_t internetExplorerLineBreaksAfterQuestionMarkTableSize = sizeof(internetExplorerLineBreaksAfterQuestionMarkTable) / sizeof(*internetExplorerLineBreaksAfterQuestionMarkTable); 66 67 static inline bool shouldBreakAfter(UChar ch, UChar nextCh) 68 { 69 switch (ch) { 70 // For a question mark preceding a non-ASCII characters, defer to the Unicode algorithm by returning false. 71 // For ASCII characters, use a lookup table for enhanced speed and for compatibility with Internet Explorer. 72 case '?': 73 return nextCh < internetExplorerLineBreaksAfterQuestionMarkTableSize && internetExplorerLineBreaksAfterQuestionMarkTable[nextCh]; 74 // Internet Explorer always allows breaking after a hyphen. 75 case '-': 76 case softHyphen: 77 // FIXME: cases for ideographicComma and ideographicFullStop are a workaround for an issue in Unicode 5.0 78 // which is likely to be resolved in Unicode 5.1 <http://bugs.webkit.org/show_bug.cgi?id=17411>. 79 // We may want to remove or conditionalize this workaround at some point. 80 case ideographicComma: 81 case ideographicFullStop: 82 #ifdef ANDROID_LAYOUT 83 // as '/' is used in uri which is always long, we would like to break it 84 case '/': 85 #endif 86 return true; 87 default: 88 return false; 89 } 90 } 91 92 static inline bool needsLineBreakIterator(UChar ch) 93 { 94 return ch > 0x7F && ch != noBreakSpace; 95 } 96 97 #if PLATFORM(MAC) && defined(BUILDING_ON_TIGER) 98 static inline TextBreakLocatorRef lineBreakLocator() 99 { 100 TextBreakLocatorRef locator = 0; 101 UCCreateTextBreakLocator(0, 0, kUCTextBreakLineMask, &locator); 102 return locator; 103 } 104 #endif 105 106 int nextBreakablePosition(const UChar* str, int pos, int len, bool treatNoBreakSpaceAsBreak) 107 { 108 #if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER) 109 TextBreakIterator* breakIterator = 0; 110 #endif 111 int nextBreak = -1; 112 113 UChar lastCh = pos > 0 ? str[pos - 1] : 0; 114 for (int i = pos; i < len; i++) { 115 UChar ch = str[i]; 116 117 if (isBreakableSpace(ch, treatNoBreakSpaceAsBreak) || shouldBreakAfter(lastCh, ch)) 118 return i; 119 120 if (needsLineBreakIterator(ch) || needsLineBreakIterator(lastCh)) { 121 if (nextBreak < i && i) { 122 #if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER) 123 if (!breakIterator) 124 breakIterator = lineBreakIterator(str, len); 125 if (breakIterator) 126 nextBreak = textBreakFollowing(breakIterator, i - 1); 127 #else 128 static TextBreakLocatorRef breakLocator = lineBreakLocator(); 129 if (breakLocator) { 130 UniCharArrayOffset nextUCBreak; 131 if (UCFindTextBreak(breakLocator, kUCTextBreakLineMask, 0, str, len, i, &nextUCBreak) == 0) 132 nextBreak = nextUCBreak; 133 } 134 #endif 135 } 136 if (i == nextBreak && !isBreakableSpace(lastCh, treatNoBreakSpaceAsBreak)) 137 return i; 138 } 139 140 lastCh = ch; 141 } 142 143 return len; 144 } 145 146 } // namespace WebCore 147