Home | History | Annotate | Download | only in misc
      1 /*
      2  * Copyright (c) 1995, 2001, Oracle and/or its affiliates. All rights reserved.
      3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
      4  *
      5  * This code is free software; you can redistribute it and/or modify it
      6  * under the terms of the GNU General Public License version 2 only, as
      7  * published by the Free Software Foundation.  Oracle designates this
      8  * particular file as subject to the "Classpath" exception as provided
      9  * by Oracle in the LICENSE file that accompanied this code.
     10  *
     11  * This code is distributed in the hope that it will be useful, but WITHOUT
     12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     14  * version 2 for more details (a copy is included in the LICENSE file that
     15  * accompanied this code).
     16  *
     17  * You should have received a copy of the GNU General Public License version
     18  * 2 along with this work; if not, write to the Free Software Foundation,
     19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
     20  *
     21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
     22  * or visit www.oracle.com if you need additional information or have any
     23  * questions.
     24  */
     25 
     26 package sun.misc;
     27 import java.io.*;
     28 
     29 /**
     30  * A class to represent a pool of regular expressions.  A string
     31  * can be matched against the whole pool all at once.  It is much
     32  * faster than doing individual regular expression matches one-by-one.
     33  *
     34  * @see java.misc.RegexpTarget
     35  * @author  James Gosling
     36  */
     37 
     38 public class RegexpPool {
     39     private RegexpNode prefixMachine = new RegexpNode();
     40     private RegexpNode suffixMachine = new RegexpNode();
     41     private static final int BIG = 0x7FFFFFFF;
     42     private int lastDepth = BIG;
     43 
     44     public RegexpPool () {
     45     }
     46 
     47     /**
     48      * Add a regular expression to the pool of regular expressions.
     49      * @param   re  The regular expression to add to the pool.
     50             For now, only handles strings that either begin or end with
     51             a '*'.
     52      * @param   ret The object to be returned when this regular expression is
     53             matched.  If ret is an instance of the RegexpTarget class, ret.found
     54             is called with the string fragment that matched the '*' as its
     55             parameter.
     56      * @exception REException error
     57      */
     58     public void add(String re, Object ret) throws REException {
     59         add(re, ret, false);
     60     }
     61 
     62     /**
     63      * Replace the target for the regular expression with a different
     64      * target.
     65      *
     66      * @param   re  The regular expression to be replaced in the pool.
     67      *      For now, only handles strings that either begin or end with
     68      *      a '*'.
     69      * @param   ret The object to be returned when this regular expression is
     70      *      matched.  If ret is an instance of the RegexpTarget class, ret.found
     71      *      is called with the string fragment that matched the '*' as its
     72      *      parameter.
     73      */
     74     public void replace(String re, Object ret) {
     75         try {
     76             add(re, ret, true);
     77         } catch(Exception e) {
     78             // should never occur if replace is true
     79         }
     80     }
     81 
     82     /**
     83      * Delete the regular expression and its target.
     84      * @param re The regular expression to be deleted from the pool.
     85      *           must begin or end with a '*'
     86      * @return target - the old target.
     87      */
     88     public Object delete(String re) {
     89         Object o = null;
     90         RegexpNode p = prefixMachine;
     91         RegexpNode best = p;
     92         int len = re.length() - 1;
     93         int i;
     94         boolean prefix = true;
     95 
     96         if (!re.startsWith("*") ||
     97             !re.endsWith("*"))
     98             len++;
     99 
    100         if (len <= 0)
    101             return null;
    102 
    103         /* March forward through the prefix machine */
    104         for (i = 0; p != null; i++) {
    105             if (p.result != null && p.depth < BIG
    106                 && (!p.exact || i == len)) {
    107                 best = p;
    108             }
    109             if (i >= len)
    110                 break;
    111             p = p.find(re.charAt(i));
    112         }
    113 
    114         /* march backward through the suffix machine */
    115         p = suffixMachine;
    116         for (i = len; --i >= 0 && p != null;) {
    117             if (p.result != null && p.depth < BIG) {
    118                 prefix = false;
    119                 best = p;
    120             }
    121             p = p.find(re.charAt(i));
    122         }
    123 
    124         // delete only if there is an exact match
    125         if (prefix) {
    126             if (re.equals(best.re)) {
    127                 o = best.result;
    128                 best.result = null;
    129 
    130             }
    131         } else {
    132             if (re.equals(best.re)) {
    133                 o = best.result;
    134                 best.result = null;
    135             }
    136         }
    137         return o;
    138     }
    139 
    140     /** Search for a match to a string & return the object associated
    141         with it with the match.  When multiple regular expressions
    142         would match the string, the best match is returned first.
    143         The next best match is returned the next time matchNext is
    144         called.
    145         @param s    The string to match against the regular expressions
    146                     in the pool.
    147         @return     null on failure, otherwise the object associated with
    148                     the regular expression when it was added to the pool.
    149                     If the object is an instance of RegexpTarget, then
    150                     the return value is the result from calling
    151                     return.found(string_that_matched_wildcard).
    152     */
    153     public Object match(String s) {
    154         return matchAfter(s, BIG);
    155     }
    156 
    157     /** Identical to match except that it will only find matches to
    158         regular expressions that were added to the pool <i>after</i>
    159         the last regular expression that matched in the last call
    160         to match() or matchNext() */
    161     public Object matchNext(String s) {
    162         return matchAfter(s, lastDepth);
    163     }
    164 
    165     private void add(String re, Object ret, boolean replace) throws REException {
    166         int len = re.length();
    167         RegexpNode p;
    168         if (re.charAt(0) == '*') {
    169             p = suffixMachine;
    170             while (len > 1)
    171                 p = p.add(re.charAt(--len));
    172         } else {
    173             boolean exact = false;
    174             if (re.charAt(len - 1) == '*')
    175                 len--;
    176             else
    177                 exact = true;
    178             p = prefixMachine;
    179             for (int i = 0; i < len; i++)
    180                 p = p.add(re.charAt(i));
    181             p.exact = exact;
    182         }
    183 
    184         if (p.result != null && !replace)
    185             throw new REException(re + " is a duplicate");
    186 
    187         p.re = re;
    188         p.result = ret;
    189     }
    190 
    191     private Object matchAfter(String s, int lastMatchDepth) {
    192         RegexpNode p = prefixMachine;
    193         RegexpNode best = p;
    194         int bst = 0;
    195         int bend = 0;
    196         int len = s.length();
    197         int i;
    198         if (len <= 0)
    199             return null;
    200         /* March forward through the prefix machine */
    201         for (i = 0; p != null; i++) {
    202             if (p.result != null && p.depth < lastMatchDepth
    203                 && (!p.exact || i == len)) {
    204                 lastDepth = p.depth;
    205                 best = p;
    206                 bst = i;
    207                 bend = len;
    208             }
    209             if (i >= len)
    210                 break;
    211             p = p.find(s.charAt(i));
    212         }
    213         /* march backward through the suffix machine */
    214         p = suffixMachine;
    215         for (i = len; --i >= 0 && p != null;) {
    216             if (p.result != null && p.depth < lastMatchDepth) {
    217                 lastDepth = p.depth;
    218                 best = p;
    219                 bst = 0;
    220                 bend = i+1;
    221             }
    222             p = p.find(s.charAt(i));
    223         }
    224         Object o = best.result;
    225         if (o != null && o instanceof RegexpTarget)
    226             o = ((RegexpTarget) o).found(s.substring(bst, bend));
    227         return o;
    228     }
    229 
    230     /** Resets the pool so that the next call to matchNext looks
    231         at all regular expressions in the pool.  match(s); is equivalent
    232         to reset(); matchNext(s);
    233         <p><b>Multithreading note:</b> reset/nextMatch leave state in the
    234         regular expression pool.  If multiple threads could be using this
    235         pool this way, they should be syncronized to avoid race hazards.
    236         match() was done in such a way that there are no such race
    237         hazards: multiple threads can be matching in the same pool
    238         simultaneously. */
    239     public void reset() {
    240         lastDepth = BIG;
    241     }
    242 
    243     /** Print this pool to standard output */
    244     public void print(PrintStream out) {
    245         out.print("Regexp pool:\n");
    246         if (suffixMachine.firstchild != null) {
    247             out.print(" Suffix machine: ");
    248             suffixMachine.firstchild.print(out);
    249             out.print("\n");
    250         }
    251         if (prefixMachine.firstchild != null) {
    252             out.print(" Prefix machine: ");
    253             prefixMachine.firstchild.print(out);
    254             out.print("\n");
    255         }
    256     }
    257 
    258 }
    259 
    260 /* A node in a regular expression finite state machine. */
    261 class RegexpNode {
    262     char c;
    263     RegexpNode firstchild;
    264     RegexpNode nextsibling;
    265     int depth;
    266     boolean exact;
    267     Object result;
    268     String re = null;
    269 
    270     RegexpNode () {
    271         c = '#';
    272         depth = 0;
    273     }
    274     RegexpNode (char C, int depth) {
    275         c = C;
    276         this.depth = depth;
    277     }
    278     RegexpNode add(char C) {
    279         RegexpNode p = firstchild;
    280         if (p == null)
    281             p = new RegexpNode (C, depth+1);
    282         else {
    283             while (p != null)
    284                 if (p.c == C)
    285                     return p;
    286                 else
    287                     p = p.nextsibling;
    288             p = new RegexpNode (C, depth+1);
    289             p.nextsibling = firstchild;
    290         }
    291         firstchild = p;
    292         return p;
    293     }
    294     RegexpNode find(char C) {
    295         for (RegexpNode p = firstchild;
    296                 p != null;
    297                 p = p.nextsibling)
    298             if (p.c == C)
    299                 return p;
    300         return null;
    301     }
    302     void print(PrintStream out) {
    303         if (nextsibling != null) {
    304             RegexpNode p = this;
    305             out.print("(");
    306             while (p != null) {
    307                 out.write(p.c);
    308                 if (p.firstchild != null)
    309                     p.firstchild.print(out);
    310                 p = p.nextsibling;
    311                 out.write(p != null ? '|' : ')');
    312             }
    313         } else {
    314             out.write(c);
    315             if (firstchild != null)
    316                 firstchild.print(out);
    317         }
    318     }
    319 }
    320