Home | History | Annotate | Download | only in bidi
      1 // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
      2 
      3 // Copyright 2016 The Go Authors. All rights reserved.
      4 // Use of this source code is governed by a BSD-style
      5 // license that can be found in the LICENSE file.
      6 
      7 package bidi
      8 
      9 import "unicode/utf8"
     10 
     11 // Properties provides access to BiDi properties of runes.
     12 type Properties struct {
     13 	entry uint8
     14 	last  uint8
     15 }
     16 
     17 var trie = newBidiTrie(0)
     18 
     19 // TODO: using this for bidirule reduces the running time by about 5%. Consider
     20 // if this is worth exposing or if we can find a way to speed up the Class
     21 // method.
     22 //
     23 // // CompactClass is like Class, but maps all of the BiDi control classes
     24 // // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
     25 // func (p Properties) CompactClass() Class {
     26 // 	return Class(p.entry & 0x0F)
     27 // }
     28 
     29 // Class returns the Bidi class for p.
     30 func (p Properties) Class() Class {
     31 	c := Class(p.entry & 0x0F)
     32 	if c == Control {
     33 		c = controlByteToClass[p.last&0xF]
     34 	}
     35 	return c
     36 }
     37 
     38 // IsBracket reports whether the rune is a bracket.
     39 func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }
     40 
     41 // IsOpeningBracket reports whether the rune is an opening bracket.
     42 // IsBracket must return true.
     43 func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }
     44 
     45 // TODO: find a better API and expose.
     46 func (p Properties) reverseBracket(r rune) rune {
     47 	return xorMasks[p.entry>>xorMaskShift] ^ r
     48 }
     49 
     50 var controlByteToClass = [16]Class{
     51 	0xD: LRO, // U+202D LeftToRightOverride,
     52 	0xE: RLO, // U+202E RightToLeftOverride,
     53 	0xA: LRE, // U+202A LeftToRightEmbedding,
     54 	0xB: RLE, // U+202B RightToLeftEmbedding,
     55 	0xC: PDF, // U+202C PopDirectionalFormat,
     56 	0x6: LRI, // U+2066 LeftToRightIsolate,
     57 	0x7: RLI, // U+2067 RightToLeftIsolate,
     58 	0x8: FSI, // U+2068 FirstStrongIsolate,
     59 	0x9: PDI, // U+2069 PopDirectionalIsolate,
     60 }
     61 
     62 // LookupRune returns properties for r.
     63 func LookupRune(r rune) (p Properties, size int) {
     64 	var buf [4]byte
     65 	n := utf8.EncodeRune(buf[:], r)
     66 	return Lookup(buf[:n])
     67 }
     68 
     69 // TODO: these lookup methods are based on the generated trie code. The returned
     70 // sizes have slightly different semantics from the generated code, in that it
     71 // always returns size==1 for an illegal UTF-8 byte (instead of the length
     72 // of the maximum invalid subsequence). Most Transformers, like unicode/norm,
     73 // leave invalid UTF-8 untouched, in which case it has performance benefits to
     74 // do so (without changing the semantics). Bidi requires the semantics used here
     75 // for the bidirule implementation to be compatible with the Go semantics.
     76 //  They ultimately should perhaps be adopted by all trie implementations, for
     77 // convenience sake.
     78 // This unrolled code also boosts performance of the secure/bidirule package by
     79 // about 30%.
     80 // So, to remove this code:
     81 //   - add option to trie generator to define return type.
     82 //   - always return 1 byte size for ill-formed UTF-8 runes.
     83 
     84 // Lookup returns properties for the first rune in s and the width in bytes of
     85 // its encoding. The size will be 0 if s does not hold enough bytes to complete
     86 // the encoding.
     87 func Lookup(s []byte) (p Properties, sz int) {
     88 	c0 := s[0]
     89 	switch {
     90 	case c0 < 0x80: // is ASCII
     91 		return Properties{entry: bidiValues[c0]}, 1
     92 	case c0 < 0xC2:
     93 		return Properties{}, 1
     94 	case c0 < 0xE0: // 2-byte UTF-8
     95 		if len(s) < 2 {
     96 			return Properties{}, 0
     97 		}
     98 		i := bidiIndex[c0]
     99 		c1 := s[1]
    100 		if c1 < 0x80 || 0xC0 <= c1 {
    101 			return Properties{}, 1
    102 		}
    103 		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
    104 	case c0 < 0xF0: // 3-byte UTF-8
    105 		if len(s) < 3 {
    106 			return Properties{}, 0
    107 		}
    108 		i := bidiIndex[c0]
    109 		c1 := s[1]
    110 		if c1 < 0x80 || 0xC0 <= c1 {
    111 			return Properties{}, 1
    112 		}
    113 		o := uint32(i)<<6 + uint32(c1)
    114 		i = bidiIndex[o]
    115 		c2 := s[2]
    116 		if c2 < 0x80 || 0xC0 <= c2 {
    117 			return Properties{}, 1
    118 		}
    119 		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
    120 	case c0 < 0xF8: // 4-byte UTF-8
    121 		if len(s) < 4 {
    122 			return Properties{}, 0
    123 		}
    124 		i := bidiIndex[c0]
    125 		c1 := s[1]
    126 		if c1 < 0x80 || 0xC0 <= c1 {
    127 			return Properties{}, 1
    128 		}
    129 		o := uint32(i)<<6 + uint32(c1)
    130 		i = bidiIndex[o]
    131 		c2 := s[2]
    132 		if c2 < 0x80 || 0xC0 <= c2 {
    133 			return Properties{}, 1
    134 		}
    135 		o = uint32(i)<<6 + uint32(c2)
    136 		i = bidiIndex[o]
    137 		c3 := s[3]
    138 		if c3 < 0x80 || 0xC0 <= c3 {
    139 			return Properties{}, 1
    140 		}
    141 		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
    142 	}
    143 	// Illegal rune
    144 	return Properties{}, 1
    145 }
    146 
    147 // LookupString returns properties for the first rune in s and the width in
    148 // bytes of its encoding. The size will be 0 if s does not hold enough bytes to
    149 // complete the encoding.
    150 func LookupString(s string) (p Properties, sz int) {
    151 	c0 := s[0]
    152 	switch {
    153 	case c0 < 0x80: // is ASCII
    154 		return Properties{entry: bidiValues[c0]}, 1
    155 	case c0 < 0xC2:
    156 		return Properties{}, 1
    157 	case c0 < 0xE0: // 2-byte UTF-8
    158 		if len(s) < 2 {
    159 			return Properties{}, 0
    160 		}
    161 		i := bidiIndex[c0]
    162 		c1 := s[1]
    163 		if c1 < 0x80 || 0xC0 <= c1 {
    164 			return Properties{}, 1
    165 		}
    166 		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
    167 	case c0 < 0xF0: // 3-byte UTF-8
    168 		if len(s) < 3 {
    169 			return Properties{}, 0
    170 		}
    171 		i := bidiIndex[c0]
    172 		c1 := s[1]
    173 		if c1 < 0x80 || 0xC0 <= c1 {
    174 			return Properties{}, 1
    175 		}
    176 		o := uint32(i)<<6 + uint32(c1)
    177 		i = bidiIndex[o]
    178 		c2 := s[2]
    179 		if c2 < 0x80 || 0xC0 <= c2 {
    180 			return Properties{}, 1
    181 		}
    182 		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
    183 	case c0 < 0xF8: // 4-byte UTF-8
    184 		if len(s) < 4 {
    185 			return Properties{}, 0
    186 		}
    187 		i := bidiIndex[c0]
    188 		c1 := s[1]
    189 		if c1 < 0x80 || 0xC0 <= c1 {
    190 			return Properties{}, 1
    191 		}
    192 		o := uint32(i)<<6 + uint32(c1)
    193 		i = bidiIndex[o]
    194 		c2 := s[2]
    195 		if c2 < 0x80 || 0xC0 <= c2 {
    196 			return Properties{}, 1
    197 		}
    198 		o = uint32(i)<<6 + uint32(c2)
    199 		i = bidiIndex[o]
    200 		c3 := s[3]
    201 		if c3 < 0x80 || 0xC0 <= c3 {
    202 			return Properties{}, 1
    203 		}
    204 		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
    205 	}
    206 	// Illegal rune
    207 	return Properties{}, 1
    208 }
    209