1 /* 2 * Copyright (c) 1995, 2001, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.misc; 27 import java.io.*; 28 29 /** 30 * A class to represent a pool of regular expressions. A string 31 * can be matched against the whole pool all at once. It is much 32 * faster than doing individual regular expression matches one-by-one. 33 * 34 * @see java.misc.RegexpTarget 35 * @author James Gosling 36 */ 37 38 public class RegexpPool { 39 private RegexpNode prefixMachine = new RegexpNode(); 40 private RegexpNode suffixMachine = new RegexpNode(); 41 private static final int BIG = 0x7FFFFFFF; 42 private int lastDepth = BIG; 43 44 public RegexpPool () { 45 } 46 47 /** 48 * Add a regular expression to the pool of regular expressions. 49 * @param re The regular expression to add to the pool. 50 For now, only handles strings that either begin or end with 51 a '*'. 52 * @param ret The object to be returned when this regular expression is 53 matched. If ret is an instance of the RegexpTarget class, ret.found 54 is called with the string fragment that matched the '*' as its 55 parameter. 56 * @exception REException error 57 */ 58 public void add(String re, Object ret) throws REException { 59 add(re, ret, false); 60 } 61 62 /** 63 * Replace the target for the regular expression with a different 64 * target. 65 * 66 * @param re The regular expression to be replaced in the pool. 67 * For now, only handles strings that either begin or end with 68 * a '*'. 69 * @param ret The object to be returned when this regular expression is 70 * matched. If ret is an instance of the RegexpTarget class, ret.found 71 * is called with the string fragment that matched the '*' as its 72 * parameter. 73 */ 74 public void replace(String re, Object ret) { 75 try { 76 add(re, ret, true); 77 } catch(Exception e) { 78 // should never occur if replace is true 79 } 80 } 81 82 /** 83 * Delete the regular expression and its target. 84 * @param re The regular expression to be deleted from the pool. 85 * must begin or end with a '*' 86 * @return target - the old target. 87 */ 88 public Object delete(String re) { 89 Object o = null; 90 RegexpNode p = prefixMachine; 91 RegexpNode best = p; 92 int len = re.length() - 1; 93 int i; 94 boolean prefix = true; 95 96 if (!re.startsWith("*") || 97 !re.endsWith("*")) 98 len++; 99 100 if (len <= 0) 101 return null; 102 103 /* March forward through the prefix machine */ 104 for (i = 0; p != null; i++) { 105 if (p.result != null && p.depth < BIG 106 && (!p.exact || i == len)) { 107 best = p; 108 } 109 if (i >= len) 110 break; 111 p = p.find(re.charAt(i)); 112 } 113 114 /* march backward through the suffix machine */ 115 p = suffixMachine; 116 for (i = len; --i >= 0 && p != null;) { 117 if (p.result != null && p.depth < BIG) { 118 prefix = false; 119 best = p; 120 } 121 p = p.find(re.charAt(i)); 122 } 123 124 // delete only if there is an exact match 125 if (prefix) { 126 if (re.equals(best.re)) { 127 o = best.result; 128 best.result = null; 129 130 } 131 } else { 132 if (re.equals(best.re)) { 133 o = best.result; 134 best.result = null; 135 } 136 } 137 return o; 138 } 139 140 /** Search for a match to a string & return the object associated 141 with it with the match. When multiple regular expressions 142 would match the string, the best match is returned first. 143 The next best match is returned the next time matchNext is 144 called. 145 @param s The string to match against the regular expressions 146 in the pool. 147 @return null on failure, otherwise the object associated with 148 the regular expression when it was added to the pool. 149 If the object is an instance of RegexpTarget, then 150 the return value is the result from calling 151 return.found(string_that_matched_wildcard). 152 */ 153 public Object match(String s) { 154 return matchAfter(s, BIG); 155 } 156 157 /** Identical to match except that it will only find matches to 158 regular expressions that were added to the pool <i>after</i> 159 the last regular expression that matched in the last call 160 to match() or matchNext() */ 161 public Object matchNext(String s) { 162 return matchAfter(s, lastDepth); 163 } 164 165 private void add(String re, Object ret, boolean replace) throws REException { 166 int len = re.length(); 167 RegexpNode p; 168 if (re.charAt(0) == '*') { 169 p = suffixMachine; 170 while (len > 1) 171 p = p.add(re.charAt(--len)); 172 } else { 173 boolean exact = false; 174 if (re.charAt(len - 1) == '*') 175 len--; 176 else 177 exact = true; 178 p = prefixMachine; 179 for (int i = 0; i < len; i++) 180 p = p.add(re.charAt(i)); 181 p.exact = exact; 182 } 183 184 if (p.result != null && !replace) 185 throw new REException(re + " is a duplicate"); 186 187 p.re = re; 188 p.result = ret; 189 } 190 191 private Object matchAfter(String s, int lastMatchDepth) { 192 RegexpNode p = prefixMachine; 193 RegexpNode best = p; 194 int bst = 0; 195 int bend = 0; 196 int len = s.length(); 197 int i; 198 if (len <= 0) 199 return null; 200 /* March forward through the prefix machine */ 201 for (i = 0; p != null; i++) { 202 if (p.result != null && p.depth < lastMatchDepth 203 && (!p.exact || i == len)) { 204 lastDepth = p.depth; 205 best = p; 206 bst = i; 207 bend = len; 208 } 209 if (i >= len) 210 break; 211 p = p.find(s.charAt(i)); 212 } 213 /* march backward through the suffix machine */ 214 p = suffixMachine; 215 for (i = len; --i >= 0 && p != null;) { 216 if (p.result != null && p.depth < lastMatchDepth) { 217 lastDepth = p.depth; 218 best = p; 219 bst = 0; 220 bend = i+1; 221 } 222 p = p.find(s.charAt(i)); 223 } 224 Object o = best.result; 225 if (o != null && o instanceof RegexpTarget) 226 o = ((RegexpTarget) o).found(s.substring(bst, bend)); 227 return o; 228 } 229 230 /** Resets the pool so that the next call to matchNext looks 231 at all regular expressions in the pool. match(s); is equivalent 232 to reset(); matchNext(s); 233 <p><b>Multithreading note:</b> reset/nextMatch leave state in the 234 regular expression pool. If multiple threads could be using this 235 pool this way, they should be syncronized to avoid race hazards. 236 match() was done in such a way that there are no such race 237 hazards: multiple threads can be matching in the same pool 238 simultaneously. */ 239 public void reset() { 240 lastDepth = BIG; 241 } 242 243 /** Print this pool to standard output */ 244 public void print(PrintStream out) { 245 out.print("Regexp pool:\n"); 246 if (suffixMachine.firstchild != null) { 247 out.print(" Suffix machine: "); 248 suffixMachine.firstchild.print(out); 249 out.print("\n"); 250 } 251 if (prefixMachine.firstchild != null) { 252 out.print(" Prefix machine: "); 253 prefixMachine.firstchild.print(out); 254 out.print("\n"); 255 } 256 } 257 258 } 259 260 /* A node in a regular expression finite state machine. */ 261 class RegexpNode { 262 char c; 263 RegexpNode firstchild; 264 RegexpNode nextsibling; 265 int depth; 266 boolean exact; 267 Object result; 268 String re = null; 269 270 RegexpNode () { 271 c = '#'; 272 depth = 0; 273 } 274 RegexpNode (char C, int depth) { 275 c = C; 276 this.depth = depth; 277 } 278 RegexpNode add(char C) { 279 RegexpNode p = firstchild; 280 if (p == null) 281 p = new RegexpNode (C, depth+1); 282 else { 283 while (p != null) 284 if (p.c == C) 285 return p; 286 else 287 p = p.nextsibling; 288 p = new RegexpNode (C, depth+1); 289 p.nextsibling = firstchild; 290 } 291 firstchild = p; 292 return p; 293 } 294 RegexpNode find(char C) { 295 for (RegexpNode p = firstchild; 296 p != null; 297 p = p.nextsibling) 298 if (p.c == C) 299 return p; 300 return null; 301 } 302 void print(PrintStream out) { 303 if (nextsibling != null) { 304 RegexpNode p = this; 305 out.print("("); 306 while (p != null) { 307 out.write(p.c); 308 if (p.firstchild != null) 309 p.firstchild.print(out); 310 p = p.nextsibling; 311 out.write(p != null ? '|' : ')'); 312 } 313 } else { 314 out.write(c); 315 if (firstchild != null) 316 firstchild.print(out); 317 } 318 } 319 } 320