1 /* stringlib: fastsearch implementation */ 2 3 #ifndef STRINGLIB_FASTSEARCH_H 4 #define STRINGLIB_FASTSEARCH_H 5 6 /* fast search/count implementation, based on a mix between boyer- 7 moore and horspool, with a few more bells and whistles on the top. 8 for some more background, see: http://effbot.org/zone/stringlib.htm */ 9 10 /* note: fastsearch may access s[n], which isn't a problem when using 11 Python's ordinary string types, but may cause problems if you're 12 using this code in other contexts. also, the count mode returns -1 13 if there cannot possible be a match in the target string, and 0 if 14 it has actually checked for matches, but didn't find any. callers 15 beware! */ 16 17 #define FAST_COUNT 0 18 #define FAST_SEARCH 1 19 #define FAST_RSEARCH 2 20 21 #if LONG_BIT >= 128 22 #define STRINGLIB_BLOOM_WIDTH 128 23 #elif LONG_BIT >= 64 24 #define STRINGLIB_BLOOM_WIDTH 64 25 #elif LONG_BIT >= 32 26 #define STRINGLIB_BLOOM_WIDTH 32 27 #else 28 #error "LONG_BIT is smaller than 32" 29 #endif 30 31 #define STRINGLIB_BLOOM_ADD(mask, ch) \ 32 ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) 33 #define STRINGLIB_BLOOM(mask, ch) \ 34 ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) 35 36 Py_LOCAL_INLINE(Py_ssize_t) 37 fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, 38 const STRINGLIB_CHAR* p, Py_ssize_t m, 39 Py_ssize_t maxcount, int mode) 40 { 41 unsigned long mask; 42 Py_ssize_t skip, count = 0; 43 Py_ssize_t i, j, mlast, w; 44 45 w = n - m; 46 47 if (w < 0 || (mode == FAST_COUNT && maxcount == 0)) 48 return -1; 49 50 /* look for special cases */ 51 if (m <= 1) { 52 if (m <= 0) 53 return -1; 54 /* use special case for 1-character strings */ 55 if (mode == FAST_COUNT) { 56 for (i = 0; i < n; i++) 57 if (s[i] == p[0]) { 58 count++; 59 if (count == maxcount) 60 return maxcount; 61 } 62 return count; 63 } else if (mode == FAST_SEARCH) { 64 for (i = 0; i < n; i++) 65 if (s[i] == p[0]) 66 return i; 67 } else { /* FAST_RSEARCH */ 68 for (i = n - 1; i > -1; i--) 69 if (s[i] == p[0]) 70 return i; 71 } 72 return -1; 73 } 74 75 mlast = m - 1; 76 skip = mlast - 1; 77 mask = 0; 78 79 if (mode != FAST_RSEARCH) { 80 81 /* create compressed boyer-moore delta 1 table */ 82 83 /* process pattern[:-1] */ 84 for (i = 0; i < mlast; i++) { 85 STRINGLIB_BLOOM_ADD(mask, p[i]); 86 if (p[i] == p[mlast]) 87 skip = mlast - i - 1; 88 } 89 /* process pattern[-1] outside the loop */ 90 STRINGLIB_BLOOM_ADD(mask, p[mlast]); 91 92 for (i = 0; i <= w; i++) { 93 /* note: using mlast in the skip path slows things down on x86 */ 94 if (s[i+m-1] == p[m-1]) { 95 /* candidate match */ 96 for (j = 0; j < mlast; j++) 97 if (s[i+j] != p[j]) 98 break; 99 if (j == mlast) { 100 /* got a match! */ 101 if (mode != FAST_COUNT) 102 return i; 103 count++; 104 if (count == maxcount) 105 return maxcount; 106 i = i + mlast; 107 continue; 108 } 109 /* miss: check if next character is part of pattern */ 110 if (!STRINGLIB_BLOOM(mask, s[i+m])) 111 i = i + m; 112 else 113 i = i + skip; 114 } else { 115 /* skip: check if next character is part of pattern */ 116 if (!STRINGLIB_BLOOM(mask, s[i+m])) 117 i = i + m; 118 } 119 } 120 } else { /* FAST_RSEARCH */ 121 122 /* create compressed boyer-moore delta 1 table */ 123 124 /* process pattern[0] outside the loop */ 125 STRINGLIB_BLOOM_ADD(mask, p[0]); 126 /* process pattern[:0:-1] */ 127 for (i = mlast; i > 0; i--) { 128 STRINGLIB_BLOOM_ADD(mask, p[i]); 129 if (p[i] == p[0]) 130 skip = i - 1; 131 } 132 133 for (i = w; i >= 0; i--) { 134 if (s[i] == p[0]) { 135 /* candidate match */ 136 for (j = mlast; j > 0; j--) 137 if (s[i+j] != p[j]) 138 break; 139 if (j == 0) 140 /* got a match! */ 141 return i; 142 /* miss: check if previous character is part of pattern */ 143 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) 144 i = i - m; 145 else 146 i = i - skip; 147 } else { 148 /* skip: check if previous character is part of pattern */ 149 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) 150 i = i - m; 151 } 152 } 153 } 154 155 if (mode != FAST_COUNT) 156 return -1; 157 return count; 158 } 159 160 #endif 161