Home | History | Annotate | Download | only in width
      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package width
      6 
      7 import (
      8 	"unicode/utf8"
      9 
     10 	"golang_org/x/text/transform"
     11 )
     12 
     13 type foldTransform struct {
     14 	transform.NopResetter
     15 }
     16 
     17 func (foldTransform) Span(src []byte, atEOF bool) (n int, err error) {
     18 	for n < len(src) {
     19 		if src[n] < utf8.RuneSelf {
     20 			// ASCII fast path.
     21 			for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
     22 			}
     23 			continue
     24 		}
     25 		v, size := trie.lookup(src[n:])
     26 		if size == 0 { // incomplete UTF-8 encoding
     27 			if !atEOF {
     28 				err = transform.ErrShortSrc
     29 			} else {
     30 				n = len(src)
     31 			}
     32 			break
     33 		}
     34 		if elem(v)&tagNeedsFold != 0 {
     35 			err = transform.ErrEndOfSpan
     36 			break
     37 		}
     38 		n += size
     39 	}
     40 	return n, err
     41 }
     42 
     43 func (foldTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
     44 	for nSrc < len(src) {
     45 		if src[nSrc] < utf8.RuneSelf {
     46 			// ASCII fast path.
     47 			start, end := nSrc, len(src)
     48 			if d := len(dst) - nDst; d < end-start {
     49 				end = nSrc + d
     50 			}
     51 			for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
     52 			}
     53 			n := copy(dst[nDst:], src[start:nSrc])
     54 			if nDst += n; nDst == len(dst) {
     55 				nSrc = start + n
     56 				if nSrc == len(src) {
     57 					return nDst, nSrc, nil
     58 				}
     59 				if src[nSrc] < utf8.RuneSelf {
     60 					return nDst, nSrc, transform.ErrShortDst
     61 				}
     62 			}
     63 			continue
     64 		}
     65 		v, size := trie.lookup(src[nSrc:])
     66 		if size == 0 { // incomplete UTF-8 encoding
     67 			if !atEOF {
     68 				return nDst, nSrc, transform.ErrShortSrc
     69 			}
     70 			size = 1 // gobble 1 byte
     71 		}
     72 		if elem(v)&tagNeedsFold == 0 {
     73 			if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
     74 				return nDst, nSrc, transform.ErrShortDst
     75 			}
     76 			nDst += size
     77 		} else {
     78 			data := inverseData[byte(v)]
     79 			if len(dst)-nDst < int(data[0]) {
     80 				return nDst, nSrc, transform.ErrShortDst
     81 			}
     82 			i := 1
     83 			for end := int(data[0]); i < end; i++ {
     84 				dst[nDst] = data[i]
     85 				nDst++
     86 			}
     87 			dst[nDst] = data[i] ^ src[nSrc+size-1]
     88 			nDst++
     89 		}
     90 		nSrc += size
     91 	}
     92 	return nDst, nSrc, nil
     93 }
     94 
     95 type narrowTransform struct {
     96 	transform.NopResetter
     97 }
     98 
     99 func (narrowTransform) Span(src []byte, atEOF bool) (n int, err error) {
    100 	for n < len(src) {
    101 		if src[n] < utf8.RuneSelf {
    102 			// ASCII fast path.
    103 			for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
    104 			}
    105 			continue
    106 		}
    107 		v, size := trie.lookup(src[n:])
    108 		if size == 0 { // incomplete UTF-8 encoding
    109 			if !atEOF {
    110 				err = transform.ErrShortSrc
    111 			} else {
    112 				n = len(src)
    113 			}
    114 			break
    115 		}
    116 		if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
    117 		} else {
    118 			err = transform.ErrEndOfSpan
    119 			break
    120 		}
    121 		n += size
    122 	}
    123 	return n, err
    124 }
    125 
    126 func (narrowTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
    127 	for nSrc < len(src) {
    128 		if src[nSrc] < utf8.RuneSelf {
    129 			// ASCII fast path.
    130 			start, end := nSrc, len(src)
    131 			if d := len(dst) - nDst; d < end-start {
    132 				end = nSrc + d
    133 			}
    134 			for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
    135 			}
    136 			n := copy(dst[nDst:], src[start:nSrc])
    137 			if nDst += n; nDst == len(dst) {
    138 				nSrc = start + n
    139 				if nSrc == len(src) {
    140 					return nDst, nSrc, nil
    141 				}
    142 				if src[nSrc] < utf8.RuneSelf {
    143 					return nDst, nSrc, transform.ErrShortDst
    144 				}
    145 			}
    146 			continue
    147 		}
    148 		v, size := trie.lookup(src[nSrc:])
    149 		if size == 0 { // incomplete UTF-8 encoding
    150 			if !atEOF {
    151 				return nDst, nSrc, transform.ErrShortSrc
    152 			}
    153 			size = 1 // gobble 1 byte
    154 		}
    155 		if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
    156 			if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
    157 				return nDst, nSrc, transform.ErrShortDst
    158 			}
    159 			nDst += size
    160 		} else {
    161 			data := inverseData[byte(v)]
    162 			if len(dst)-nDst < int(data[0]) {
    163 				return nDst, nSrc, transform.ErrShortDst
    164 			}
    165 			i := 1
    166 			for end := int(data[0]); i < end; i++ {
    167 				dst[nDst] = data[i]
    168 				nDst++
    169 			}
    170 			dst[nDst] = data[i] ^ src[nSrc+size-1]
    171 			nDst++
    172 		}
    173 		nSrc += size
    174 	}
    175 	return nDst, nSrc, nil
    176 }
    177 
    178 type wideTransform struct {
    179 	transform.NopResetter
    180 }
    181 
    182 func (wideTransform) Span(src []byte, atEOF bool) (n int, err error) {
    183 	for n < len(src) {
    184 		// TODO: Consider ASCII fast path. Special-casing ASCII handling can
    185 		// reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
    186 		// not enough to warrant the extra code and complexity.
    187 		v, size := trie.lookup(src[n:])
    188 		if size == 0 { // incomplete UTF-8 encoding
    189 			if !atEOF {
    190 				err = transform.ErrShortSrc
    191 			} else {
    192 				n = len(src)
    193 			}
    194 			break
    195 		}
    196 		if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
    197 		} else {
    198 			err = transform.ErrEndOfSpan
    199 			break
    200 		}
    201 		n += size
    202 	}
    203 	return n, err
    204 }
    205 
    206 func (wideTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
    207 	for nSrc < len(src) {
    208 		// TODO: Consider ASCII fast path. Special-casing ASCII handling can
    209 		// reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
    210 		// not enough to warrant the extra code and complexity.
    211 		v, size := trie.lookup(src[nSrc:])
    212 		if size == 0 { // incomplete UTF-8 encoding
    213 			if !atEOF {
    214 				return nDst, nSrc, transform.ErrShortSrc
    215 			}
    216 			size = 1 // gobble 1 byte
    217 		}
    218 		if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
    219 			if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
    220 				return nDst, nSrc, transform.ErrShortDst
    221 			}
    222 			nDst += size
    223 		} else {
    224 			data := inverseData[byte(v)]
    225 			if len(dst)-nDst < int(data[0]) {
    226 				return nDst, nSrc, transform.ErrShortDst
    227 			}
    228 			i := 1
    229 			for end := int(data[0]); i < end; i++ {
    230 				dst[nDst] = data[i]
    231 				nDst++
    232 			}
    233 			dst[nDst] = data[i] ^ src[nSrc+size-1]
    234 			nDst++
    235 		}
    236 		nSrc += size
    237 	}
    238 	return nDst, nSrc, nil
    239 }
    240