Home | History | Annotate | Download | only in json
      1 // Copyright 2013 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package json
      6 
      7 import (
      8 	"bytes"
      9 	"unicode/utf8"
     10 )
     11 
     12 const (
     13 	caseMask     = ^byte(0x20) // Mask to ignore case in ASCII.
     14 	kelvin       = '\u212a'
     15 	smallLongEss = '\u017f'
     16 )
     17 
     18 // foldFunc returns one of four different case folding equivalence
     19 // functions, from most general (and slow) to fastest:
     20 //
     21 // 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
     22 // 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
     23 // 3) asciiEqualFold, no special, but includes non-letters (including _)
     24 // 4) simpleLetterEqualFold, no specials, no non-letters.
     25 //
     26 // The letters S and K are special because they map to 3 runes, not just 2:
     27 //  * S maps to s and to U+017F '' Latin small letter long s
     28 //  * k maps to K and to U+212A '' Kelvin sign
     29 // See https://play.golang.org/p/tTxjOc0OGo
     30 //
     31 // The returned function is specialized for matching against s and
     32 // should only be given s. It's not curried for performance reasons.
     33 func foldFunc(s []byte) func(s, t []byte) bool {
     34 	nonLetter := false
     35 	special := false // special letter
     36 	for _, b := range s {
     37 		if b >= utf8.RuneSelf {
     38 			return bytes.EqualFold
     39 		}
     40 		upper := b & caseMask
     41 		if upper < 'A' || upper > 'Z' {
     42 			nonLetter = true
     43 		} else if upper == 'K' || upper == 'S' {
     44 			// See above for why these letters are special.
     45 			special = true
     46 		}
     47 	}
     48 	if special {
     49 		return equalFoldRight
     50 	}
     51 	if nonLetter {
     52 		return asciiEqualFold
     53 	}
     54 	return simpleLetterEqualFold
     55 }
     56 
     57 // equalFoldRight is a specialization of bytes.EqualFold when s is
     58 // known to be all ASCII (including punctuation), but contains an 's',
     59 // 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
     60 // See comments on foldFunc.
     61 func equalFoldRight(s, t []byte) bool {
     62 	for _, sb := range s {
     63 		if len(t) == 0 {
     64 			return false
     65 		}
     66 		tb := t[0]
     67 		if tb < utf8.RuneSelf {
     68 			if sb != tb {
     69 				sbUpper := sb & caseMask
     70 				if 'A' <= sbUpper && sbUpper <= 'Z' {
     71 					if sbUpper != tb&caseMask {
     72 						return false
     73 					}
     74 				} else {
     75 					return false
     76 				}
     77 			}
     78 			t = t[1:]
     79 			continue
     80 		}
     81 		// sb is ASCII and t is not. t must be either kelvin
     82 		// sign or long s; sb must be s, S, k, or K.
     83 		tr, size := utf8.DecodeRune(t)
     84 		switch sb {
     85 		case 's', 'S':
     86 			if tr != smallLongEss {
     87 				return false
     88 			}
     89 		case 'k', 'K':
     90 			if tr != kelvin {
     91 				return false
     92 			}
     93 		default:
     94 			return false
     95 		}
     96 		t = t[size:]
     97 
     98 	}
     99 	if len(t) > 0 {
    100 		return false
    101 	}
    102 	return true
    103 }
    104 
    105 // asciiEqualFold is a specialization of bytes.EqualFold for use when
    106 // s is all ASCII (but may contain non-letters) and contains no
    107 // special-folding letters.
    108 // See comments on foldFunc.
    109 func asciiEqualFold(s, t []byte) bool {
    110 	if len(s) != len(t) {
    111 		return false
    112 	}
    113 	for i, sb := range s {
    114 		tb := t[i]
    115 		if sb == tb {
    116 			continue
    117 		}
    118 		if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
    119 			if sb&caseMask != tb&caseMask {
    120 				return false
    121 			}
    122 		} else {
    123 			return false
    124 		}
    125 	}
    126 	return true
    127 }
    128 
    129 // simpleLetterEqualFold is a specialization of bytes.EqualFold for
    130 // use when s is all ASCII letters (no underscores, etc) and also
    131 // doesn't contain 'k', 'K', 's', or 'S'.
    132 // See comments on foldFunc.
    133 func simpleLetterEqualFold(s, t []byte) bool {
    134 	if len(s) != len(t) {
    135 		return false
    136 	}
    137 	for i, b := range s {
    138 		if b&caseMask != t[i]&caseMask {
    139 			return false
    140 		}
    141 	}
    142 	return true
    143 }
    144