1 // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. 2 3 // Copyright 2016 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package bidi 8 9 import "unicode/utf8" 10 11 // Properties provides access to BiDi properties of runes. 12 type Properties struct { 13 entry uint8 14 last uint8 15 } 16 17 var trie = newBidiTrie(0) 18 19 // TODO: using this for bidirule reduces the running time by about 5%. Consider 20 // if this is worth exposing or if we can find a way to speed up the Class 21 // method. 22 // 23 // // CompactClass is like Class, but maps all of the BiDi control classes 24 // // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control. 25 // func (p Properties) CompactClass() Class { 26 // return Class(p.entry & 0x0F) 27 // } 28 29 // Class returns the Bidi class for p. 30 func (p Properties) Class() Class { 31 c := Class(p.entry & 0x0F) 32 if c == Control { 33 c = controlByteToClass[p.last&0xF] 34 } 35 return c 36 } 37 38 // IsBracket reports whether the rune is a bracket. 39 func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 } 40 41 // IsOpeningBracket reports whether the rune is an opening bracket. 42 // IsBracket must return true. 43 func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 } 44 45 // TODO: find a better API and expose. 46 func (p Properties) reverseBracket(r rune) rune { 47 return xorMasks[p.entry>>xorMaskShift] ^ r 48 } 49 50 var controlByteToClass = [16]Class{ 51 0xD: LRO, // U+202D LeftToRightOverride, 52 0xE: RLO, // U+202E RightToLeftOverride, 53 0xA: LRE, // U+202A LeftToRightEmbedding, 54 0xB: RLE, // U+202B RightToLeftEmbedding, 55 0xC: PDF, // U+202C PopDirectionalFormat, 56 0x6: LRI, // U+2066 LeftToRightIsolate, 57 0x7: RLI, // U+2067 RightToLeftIsolate, 58 0x8: FSI, // U+2068 FirstStrongIsolate, 59 0x9: PDI, // U+2069 PopDirectionalIsolate, 60 } 61 62 // LookupRune returns properties for r. 63 func LookupRune(r rune) (p Properties, size int) { 64 var buf [4]byte 65 n := utf8.EncodeRune(buf[:], r) 66 return Lookup(buf[:n]) 67 } 68 69 // TODO: these lookup methods are based on the generated trie code. The returned 70 // sizes have slightly different semantics from the generated code, in that it 71 // always returns size==1 for an illegal UTF-8 byte (instead of the length 72 // of the maximum invalid subsequence). Most Transformers, like unicode/norm, 73 // leave invalid UTF-8 untouched, in which case it has performance benefits to 74 // do so (without changing the semantics). Bidi requires the semantics used here 75 // for the bidirule implementation to be compatible with the Go semantics. 76 // They ultimately should perhaps be adopted by all trie implementations, for 77 // convenience sake. 78 // This unrolled code also boosts performance of the secure/bidirule package by 79 // about 30%. 80 // So, to remove this code: 81 // - add option to trie generator to define return type. 82 // - always return 1 byte size for ill-formed UTF-8 runes. 83 84 // Lookup returns properties for the first rune in s and the width in bytes of 85 // its encoding. The size will be 0 if s does not hold enough bytes to complete 86 // the encoding. 87 func Lookup(s []byte) (p Properties, sz int) { 88 c0 := s[0] 89 switch { 90 case c0 < 0x80: // is ASCII 91 return Properties{entry: bidiValues[c0]}, 1 92 case c0 < 0xC2: 93 return Properties{}, 1 94 case c0 < 0xE0: // 2-byte UTF-8 95 if len(s) < 2 { 96 return Properties{}, 0 97 } 98 i := bidiIndex[c0] 99 c1 := s[1] 100 if c1 < 0x80 || 0xC0 <= c1 { 101 return Properties{}, 1 102 } 103 return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2 104 case c0 < 0xF0: // 3-byte UTF-8 105 if len(s) < 3 { 106 return Properties{}, 0 107 } 108 i := bidiIndex[c0] 109 c1 := s[1] 110 if c1 < 0x80 || 0xC0 <= c1 { 111 return Properties{}, 1 112 } 113 o := uint32(i)<<6 + uint32(c1) 114 i = bidiIndex[o] 115 c2 := s[2] 116 if c2 < 0x80 || 0xC0 <= c2 { 117 return Properties{}, 1 118 } 119 return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3 120 case c0 < 0xF8: // 4-byte UTF-8 121 if len(s) < 4 { 122 return Properties{}, 0 123 } 124 i := bidiIndex[c0] 125 c1 := s[1] 126 if c1 < 0x80 || 0xC0 <= c1 { 127 return Properties{}, 1 128 } 129 o := uint32(i)<<6 + uint32(c1) 130 i = bidiIndex[o] 131 c2 := s[2] 132 if c2 < 0x80 || 0xC0 <= c2 { 133 return Properties{}, 1 134 } 135 o = uint32(i)<<6 + uint32(c2) 136 i = bidiIndex[o] 137 c3 := s[3] 138 if c3 < 0x80 || 0xC0 <= c3 { 139 return Properties{}, 1 140 } 141 return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4 142 } 143 // Illegal rune 144 return Properties{}, 1 145 } 146 147 // LookupString returns properties for the first rune in s and the width in 148 // bytes of its encoding. The size will be 0 if s does not hold enough bytes to 149 // complete the encoding. 150 func LookupString(s string) (p Properties, sz int) { 151 c0 := s[0] 152 switch { 153 case c0 < 0x80: // is ASCII 154 return Properties{entry: bidiValues[c0]}, 1 155 case c0 < 0xC2: 156 return Properties{}, 1 157 case c0 < 0xE0: // 2-byte UTF-8 158 if len(s) < 2 { 159 return Properties{}, 0 160 } 161 i := bidiIndex[c0] 162 c1 := s[1] 163 if c1 < 0x80 || 0xC0 <= c1 { 164 return Properties{}, 1 165 } 166 return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2 167 case c0 < 0xF0: // 3-byte UTF-8 168 if len(s) < 3 { 169 return Properties{}, 0 170 } 171 i := bidiIndex[c0] 172 c1 := s[1] 173 if c1 < 0x80 || 0xC0 <= c1 { 174 return Properties{}, 1 175 } 176 o := uint32(i)<<6 + uint32(c1) 177 i = bidiIndex[o] 178 c2 := s[2] 179 if c2 < 0x80 || 0xC0 <= c2 { 180 return Properties{}, 1 181 } 182 return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3 183 case c0 < 0xF8: // 4-byte UTF-8 184 if len(s) < 4 { 185 return Properties{}, 0 186 } 187 i := bidiIndex[c0] 188 c1 := s[1] 189 if c1 < 0x80 || 0xC0 <= c1 { 190 return Properties{}, 1 191 } 192 o := uint32(i)<<6 + uint32(c1) 193 i = bidiIndex[o] 194 c2 := s[2] 195 if c2 < 0x80 || 0xC0 <= c2 { 196 return Properties{}, 1 197 } 198 o = uint32(i)<<6 + uint32(c2) 199 i = bidiIndex[o] 200 c3 := s[3] 201 if c3 < 0x80 || 0xC0 <= c3 { 202 return Properties{}, 1 203 } 204 return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4 205 } 206 // Illegal rune 207 return Properties{}, 1 208 } 209