Home | History | Annotate | Download | only in strings
      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package strings
      6 
      7 import "io"
      8 
      9 // Replacer replaces a list of strings with replacements.
     10 // It is safe for concurrent use by multiple goroutines.
     11 type Replacer struct {
     12 	r replacer
     13 }
     14 
     15 // replacer is the interface that a replacement algorithm needs to implement.
     16 type replacer interface {
     17 	Replace(s string) string
     18 	WriteString(w io.Writer, s string) (n int, err error)
     19 }
     20 
     21 // NewReplacer returns a new Replacer from a list of old, new string pairs.
     22 // Replacements are performed in order, without overlapping matches.
     23 func NewReplacer(oldnew ...string) *Replacer {
     24 	if len(oldnew)%2 == 1 {
     25 		panic("strings.NewReplacer: odd argument count")
     26 	}
     27 
     28 	if len(oldnew) == 2 && len(oldnew[0]) > 1 {
     29 		return &Replacer{r: makeSingleStringReplacer(oldnew[0], oldnew[1])}
     30 	}
     31 
     32 	allNewBytes := true
     33 	for i := 0; i < len(oldnew); i += 2 {
     34 		if len(oldnew[i]) != 1 {
     35 			return &Replacer{r: makeGenericReplacer(oldnew)}
     36 		}
     37 		if len(oldnew[i+1]) != 1 {
     38 			allNewBytes = false
     39 		}
     40 	}
     41 
     42 	if allNewBytes {
     43 		r := byteReplacer{}
     44 		for i := range r {
     45 			r[i] = byte(i)
     46 		}
     47 		// The first occurrence of old->new map takes precedence
     48 		// over the others with the same old string.
     49 		for i := len(oldnew) - 2; i >= 0; i -= 2 {
     50 			o := oldnew[i][0]
     51 			n := oldnew[i+1][0]
     52 			r[o] = n
     53 		}
     54 		return &Replacer{r: &r}
     55 	}
     56 
     57 	r := byteStringReplacer{}
     58 	// The first occurrence of old->new map takes precedence
     59 	// over the others with the same old string.
     60 	for i := len(oldnew) - 2; i >= 0; i -= 2 {
     61 		o := oldnew[i][0]
     62 		n := oldnew[i+1]
     63 		r[o] = []byte(n)
     64 	}
     65 	return &Replacer{r: &r}
     66 }
     67 
     68 // Replace returns a copy of s with all replacements performed.
     69 func (r *Replacer) Replace(s string) string {
     70 	return r.r.Replace(s)
     71 }
     72 
     73 // WriteString writes s to w with all replacements performed.
     74 func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
     75 	return r.r.WriteString(w, s)
     76 }
     77 
     78 // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
     79 // and values may be empty. For example, the trie containing keys "ax", "ay",
     80 // "bcbc", "x" and "xy" could have eight nodes:
     81 //
     82 //  n0  -
     83 //  n1  a-
     84 //  n2  .x+
     85 //  n3  .y+
     86 //  n4  b-
     87 //  n5  .cbc+
     88 //  n6  x+
     89 //  n7  .y+
     90 //
     91 // n0 is the root node, and its children are n1, n4 and n6; n1's children are
     92 // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
     93 // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
     94 // (marked with a trailing "+") are complete keys.
     95 type trieNode struct {
     96 	// value is the value of the trie node's key/value pair. It is empty if
     97 	// this node is not a complete key.
     98 	value string
     99 	// priority is the priority (higher is more important) of the trie node's
    100 	// key/value pair; keys are not necessarily matched shortest- or longest-
    101 	// first. Priority is positive if this node is a complete key, and zero
    102 	// otherwise. In the example above, positive/zero priorities are marked
    103 	// with a trailing "+" or "-".
    104 	priority int
    105 
    106 	// A trie node may have zero, one or more child nodes:
    107 	//  * if the remaining fields are zero, there are no children.
    108 	//  * if prefix and next are non-zero, there is one child in next.
    109 	//  * if table is non-zero, it defines all the children.
    110 	//
    111 	// Prefixes are preferred over tables when there is one child, but the
    112 	// root node always uses a table for lookup efficiency.
    113 
    114 	// prefix is the difference in keys between this trie node and the next.
    115 	// In the example above, node n4 has prefix "cbc" and n4's next node is n5.
    116 	// Node n5 has no children and so has zero prefix, next and table fields.
    117 	prefix string
    118 	next   *trieNode
    119 
    120 	// table is a lookup table indexed by the next byte in the key, after
    121 	// remapping that byte through genericReplacer.mapping to create a dense
    122 	// index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
    123 	// 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
    124 	// genericReplacer.tableSize will be 5. Node n0's table will be
    125 	// []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
    126 	// 'a', 'b' and 'x'.
    127 	table []*trieNode
    128 }
    129 
    130 func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
    131 	if key == "" {
    132 		if t.priority == 0 {
    133 			t.value = val
    134 			t.priority = priority
    135 		}
    136 		return
    137 	}
    138 
    139 	if t.prefix != "" {
    140 		// Need to split the prefix among multiple nodes.
    141 		var n int // length of the longest common prefix
    142 		for ; n < len(t.prefix) && n < len(key); n++ {
    143 			if t.prefix[n] != key[n] {
    144 				break
    145 			}
    146 		}
    147 		if n == len(t.prefix) {
    148 			t.next.add(key[n:], val, priority, r)
    149 		} else if n == 0 {
    150 			// First byte differs, start a new lookup table here. Looking up
    151 			// what is currently t.prefix[0] will lead to prefixNode, and
    152 			// looking up key[0] will lead to keyNode.
    153 			var prefixNode *trieNode
    154 			if len(t.prefix) == 1 {
    155 				prefixNode = t.next
    156 			} else {
    157 				prefixNode = &trieNode{
    158 					prefix: t.prefix[1:],
    159 					next:   t.next,
    160 				}
    161 			}
    162 			keyNode := new(trieNode)
    163 			t.table = make([]*trieNode, r.tableSize)
    164 			t.table[r.mapping[t.prefix[0]]] = prefixNode
    165 			t.table[r.mapping[key[0]]] = keyNode
    166 			t.prefix = ""
    167 			t.next = nil
    168 			keyNode.add(key[1:], val, priority, r)
    169 		} else {
    170 			// Insert new node after the common section of the prefix.
    171 			next := &trieNode{
    172 				prefix: t.prefix[n:],
    173 				next:   t.next,
    174 			}
    175 			t.prefix = t.prefix[:n]
    176 			t.next = next
    177 			next.add(key[n:], val, priority, r)
    178 		}
    179 	} else if t.table != nil {
    180 		// Insert into existing table.
    181 		m := r.mapping[key[0]]
    182 		if t.table[m] == nil {
    183 			t.table[m] = new(trieNode)
    184 		}
    185 		t.table[m].add(key[1:], val, priority, r)
    186 	} else {
    187 		t.prefix = key
    188 		t.next = new(trieNode)
    189 		t.next.add("", val, priority, r)
    190 	}
    191 }
    192 
    193 func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
    194 	// Iterate down the trie to the end, and grab the value and keylen with
    195 	// the highest priority.
    196 	bestPriority := 0
    197 	node := &r.root
    198 	n := 0
    199 	for node != nil {
    200 		if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
    201 			bestPriority = node.priority
    202 			val = node.value
    203 			keylen = n
    204 			found = true
    205 		}
    206 
    207 		if s == "" {
    208 			break
    209 		}
    210 		if node.table != nil {
    211 			index := r.mapping[s[0]]
    212 			if int(index) == r.tableSize {
    213 				break
    214 			}
    215 			node = node.table[index]
    216 			s = s[1:]
    217 			n++
    218 		} else if node.prefix != "" && HasPrefix(s, node.prefix) {
    219 			n += len(node.prefix)
    220 			s = s[len(node.prefix):]
    221 			node = node.next
    222 		} else {
    223 			break
    224 		}
    225 	}
    226 	return
    227 }
    228 
    229 // genericReplacer is the fully generic algorithm.
    230 // It's used as a fallback when nothing faster can be used.
    231 type genericReplacer struct {
    232 	root trieNode
    233 	// tableSize is the size of a trie node's lookup table. It is the number
    234 	// of unique key bytes.
    235 	tableSize int
    236 	// mapping maps from key bytes to a dense index for trieNode.table.
    237 	mapping [256]byte
    238 }
    239 
    240 func makeGenericReplacer(oldnew []string) *genericReplacer {
    241 	r := new(genericReplacer)
    242 	// Find each byte used, then assign them each an index.
    243 	for i := 0; i < len(oldnew); i += 2 {
    244 		key := oldnew[i]
    245 		for j := 0; j < len(key); j++ {
    246 			r.mapping[key[j]] = 1
    247 		}
    248 	}
    249 
    250 	for _, b := range r.mapping {
    251 		r.tableSize += int(b)
    252 	}
    253 
    254 	var index byte
    255 	for i, b := range r.mapping {
    256 		if b == 0 {
    257 			r.mapping[i] = byte(r.tableSize)
    258 		} else {
    259 			r.mapping[i] = index
    260 			index++
    261 		}
    262 	}
    263 	// Ensure root node uses a lookup table (for performance).
    264 	r.root.table = make([]*trieNode, r.tableSize)
    265 
    266 	for i := 0; i < len(oldnew); i += 2 {
    267 		r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
    268 	}
    269 	return r
    270 }
    271 
    272 type appendSliceWriter []byte
    273 
    274 // Write writes to the buffer to satisfy io.Writer.
    275 func (w *appendSliceWriter) Write(p []byte) (int, error) {
    276 	*w = append(*w, p...)
    277 	return len(p), nil
    278 }
    279 
    280 // WriteString writes to the buffer without string->[]byte->string allocations.
    281 func (w *appendSliceWriter) WriteString(s string) (int, error) {
    282 	*w = append(*w, s...)
    283 	return len(s), nil
    284 }
    285 
    286 type stringWriterIface interface {
    287 	WriteString(string) (int, error)
    288 }
    289 
    290 type stringWriter struct {
    291 	w io.Writer
    292 }
    293 
    294 func (w stringWriter) WriteString(s string) (int, error) {
    295 	return w.w.Write([]byte(s))
    296 }
    297 
    298 func getStringWriter(w io.Writer) stringWriterIface {
    299 	sw, ok := w.(stringWriterIface)
    300 	if !ok {
    301 		sw = stringWriter{w}
    302 	}
    303 	return sw
    304 }
    305 
    306 func (r *genericReplacer) Replace(s string) string {
    307 	buf := make(appendSliceWriter, 0, len(s))
    308 	r.WriteString(&buf, s)
    309 	return string(buf)
    310 }
    311 
    312 func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
    313 	sw := getStringWriter(w)
    314 	var last, wn int
    315 	var prevMatchEmpty bool
    316 	for i := 0; i <= len(s); {
    317 		// Fast path: s[i] is not a prefix of any pattern.
    318 		if i != len(s) && r.root.priority == 0 {
    319 			index := int(r.mapping[s[i]])
    320 			if index == r.tableSize || r.root.table[index] == nil {
    321 				i++
    322 				continue
    323 			}
    324 		}
    325 
    326 		// Ignore the empty match iff the previous loop found the empty match.
    327 		val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
    328 		prevMatchEmpty = match && keylen == 0
    329 		if match {
    330 			wn, err = sw.WriteString(s[last:i])
    331 			n += wn
    332 			if err != nil {
    333 				return
    334 			}
    335 			wn, err = sw.WriteString(val)
    336 			n += wn
    337 			if err != nil {
    338 				return
    339 			}
    340 			i += keylen
    341 			last = i
    342 			continue
    343 		}
    344 		i++
    345 	}
    346 	if last != len(s) {
    347 		wn, err = sw.WriteString(s[last:])
    348 		n += wn
    349 	}
    350 	return
    351 }
    352 
    353 // singleStringReplacer is the implementation that's used when there is only
    354 // one string to replace (and that string has more than one byte).
    355 type singleStringReplacer struct {
    356 	finder *stringFinder
    357 	// value is the new string that replaces that pattern when it's found.
    358 	value string
    359 }
    360 
    361 func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
    362 	return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
    363 }
    364 
    365 func (r *singleStringReplacer) Replace(s string) string {
    366 	var buf []byte
    367 	i, matched := 0, false
    368 	for {
    369 		match := r.finder.next(s[i:])
    370 		if match == -1 {
    371 			break
    372 		}
    373 		matched = true
    374 		buf = append(buf, s[i:i+match]...)
    375 		buf = append(buf, r.value...)
    376 		i += match + len(r.finder.pattern)
    377 	}
    378 	if !matched {
    379 		return s
    380 	}
    381 	buf = append(buf, s[i:]...)
    382 	return string(buf)
    383 }
    384 
    385 func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
    386 	sw := getStringWriter(w)
    387 	var i, wn int
    388 	for {
    389 		match := r.finder.next(s[i:])
    390 		if match == -1 {
    391 			break
    392 		}
    393 		wn, err = sw.WriteString(s[i : i+match])
    394 		n += wn
    395 		if err != nil {
    396 			return
    397 		}
    398 		wn, err = sw.WriteString(r.value)
    399 		n += wn
    400 		if err != nil {
    401 			return
    402 		}
    403 		i += match + len(r.finder.pattern)
    404 	}
    405 	wn, err = sw.WriteString(s[i:])
    406 	n += wn
    407 	return
    408 }
    409 
    410 // byteReplacer is the implementation that's used when all the "old"
    411 // and "new" values are single ASCII bytes.
    412 // The array contains replacement bytes indexed by old byte.
    413 type byteReplacer [256]byte
    414 
    415 func (r *byteReplacer) Replace(s string) string {
    416 	var buf []byte // lazily allocated
    417 	for i := 0; i < len(s); i++ {
    418 		b := s[i]
    419 		if r[b] != b {
    420 			if buf == nil {
    421 				buf = []byte(s)
    422 			}
    423 			buf[i] = r[b]
    424 		}
    425 	}
    426 	if buf == nil {
    427 		return s
    428 	}
    429 	return string(buf)
    430 }
    431 
    432 func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
    433 	// TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
    434 	bufsize := 32 << 10
    435 	if len(s) < bufsize {
    436 		bufsize = len(s)
    437 	}
    438 	buf := make([]byte, bufsize)
    439 
    440 	for len(s) > 0 {
    441 		ncopy := copy(buf, s[:])
    442 		s = s[ncopy:]
    443 		for i, b := range buf[:ncopy] {
    444 			buf[i] = r[b]
    445 		}
    446 		wn, err := w.Write(buf[:ncopy])
    447 		n += wn
    448 		if err != nil {
    449 			return n, err
    450 		}
    451 	}
    452 	return n, nil
    453 }
    454 
    455 // byteStringReplacer is the implementation that's used when all the
    456 // "old" values are single ASCII bytes but the "new" values vary in size.
    457 // The array contains replacement byte slices indexed by old byte.
    458 // A nil []byte means that the old byte should not be replaced.
    459 type byteStringReplacer [256][]byte
    460 
    461 func (r *byteStringReplacer) Replace(s string) string {
    462 	newSize := len(s)
    463 	anyChanges := false
    464 	for i := 0; i < len(s); i++ {
    465 		b := s[i]
    466 		if r[b] != nil {
    467 			anyChanges = true
    468 			// The -1 is because we are replacing 1 byte with len(r[b]) bytes.
    469 			newSize += len(r[b]) - 1
    470 		}
    471 	}
    472 	if !anyChanges {
    473 		return s
    474 	}
    475 	buf := make([]byte, newSize)
    476 	bi := buf
    477 	for i := 0; i < len(s); i++ {
    478 		b := s[i]
    479 		if r[b] != nil {
    480 			n := copy(bi, r[b])
    481 			bi = bi[n:]
    482 		} else {
    483 			bi[0] = b
    484 			bi = bi[1:]
    485 		}
    486 	}
    487 	return string(buf)
    488 }
    489 
    490 func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
    491 	sw := getStringWriter(w)
    492 	last := 0
    493 	for i := 0; i < len(s); i++ {
    494 		b := s[i]
    495 		if r[b] == nil {
    496 			continue
    497 		}
    498 		if last != i {
    499 			nw, err := sw.WriteString(s[last:i])
    500 			n += nw
    501 			if err != nil {
    502 				return n, err
    503 			}
    504 		}
    505 		last = i + 1
    506 		nw, err := w.Write(r[b])
    507 		n += nw
    508 		if err != nil {
    509 			return n, err
    510 		}
    511 	}
    512 	if last != len(s) {
    513 		var nw int
    514 		nw, err = sw.WriteString(s[last:])
    515 		n += nw
    516 	}
    517 	return
    518 }
    519