Home | History | Annotate | Download | only in ref
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one
      3  * or more contributor license agreements. See the NOTICE file
      4  * distributed with this work for additional information
      5  * regarding copyright ownership. The ASF licenses this file
      6  * to you under the Apache License, Version 2.0 (the  "License");
      7  * you may not use this file except in compliance with the License.
      8  * You may obtain a copy of the License at
      9  *
     10  *     http://www.apache.org/licenses/LICENSE-2.0
     11  *
     12  * Unless required by applicable law or agreed to in writing, software
     13  * distributed under the License is distributed on an "AS IS" BASIS,
     14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15  * See the License for the specific language governing permissions and
     16  * limitations under the License.
     17  */
     18 /*
     19  * $Id: DTMStringPool.java 468653 2006-10-28 07:07:05Z minchau $
     20  */
     21 
     22 package org.apache.xml.dtm.ref;
     23 
     24 import java.util.Vector;
     25 
     26 import org.apache.xml.utils.IntVector;
     27 
     28 /** <p>DTMStringPool is an "interning" mechanism for strings. It will
     29  * create a stable 1:1 mapping between a set of string values and a set of
     30  * integer index values, so the integers can be used to reliably and
     31  * uniquely identify (and when necessary retrieve) the strings.</p>
     32  *
     33  * <p>Design Priorities:
     34  * <ul>
     35  * <li>String-to-index lookup speed is critical.</li>
     36  * <li>Index-to-String lookup speed is slightly less so.</li>
     37  * <li>Threadsafety is not guaranteed at this level.
     38  * Enforce that in the application if needed.</li>
     39  * <li>Storage efficiency is an issue but not a huge one.
     40  * It is expected that string pools won't exceed about 2000 entries.</li>
     41  * </ul>
     42  * </p>
     43  *
     44  * <p>Implementation detail: A standard Hashtable is relatively
     45  * inefficient when looking up primitive int values, especially when
     46  * we're already maintaining an int-to-string vector.  So I'm
     47  * maintaining a simple hash chain within this class.</p>
     48  *
     49  * <p>NOTE: There is nothing in the code that has a real dependency upon
     50  * String. It would work with any object type that implements reliable
     51  * .hashCode() and .equals() operations. The API enforces Strings because
     52  * it's safer that way, but this could trivially be turned into a general
     53  * ObjectPool if one was needed.</p>
     54  *
     55  * <p>Status: Passed basic test in main().</p>
     56  * */
     57 public class DTMStringPool
     58 {
     59   Vector m_intToString;
     60   static final int HASHPRIME=101;
     61   int[] m_hashStart=new int[HASHPRIME];
     62   IntVector m_hashChain;
     63   public static final int NULL=-1;
     64 
     65   /**
     66    * Create a DTMStringPool using the given chain size
     67    *
     68    * @param chainSize The size of the hash chain vector
     69    */
     70   public DTMStringPool(int chainSize)
     71     {
     72       m_intToString=new Vector();
     73       m_hashChain=new IntVector(chainSize);
     74       removeAllElements();
     75 
     76       // -sb Add this to force empty strings to be index 0.
     77       stringToIndex("");
     78     }
     79 
     80   public DTMStringPool()
     81     {
     82       this(512);
     83     }
     84 
     85   public void removeAllElements()
     86     {
     87       m_intToString.removeAllElements();
     88       for(int i=0;i<HASHPRIME;++i)
     89         m_hashStart[i]=NULL;
     90       m_hashChain.removeAllElements();
     91     }
     92 
     93   /** @return string whose value is uniquely identified by this integer index.
     94    * @throws java.lang.ArrayIndexOutOfBoundsException
     95    *  if index doesn't map to a string.
     96    * */
     97   public String indexToString(int i)
     98     throws java.lang.ArrayIndexOutOfBoundsException
     99     {
    100       if(i==NULL) return null;
    101       return (String) m_intToString.elementAt(i);
    102     }
    103 
    104   /** @return integer index uniquely identifying the value of this string. */
    105   public int stringToIndex(String s)
    106     {
    107       if(s==null) return NULL;
    108 
    109       int hashslot=s.hashCode()%HASHPRIME;
    110       if(hashslot<0) hashslot=-hashslot;
    111 
    112       // Is it one we already know?
    113       int hashlast=m_hashStart[hashslot];
    114       int hashcandidate=hashlast;
    115       while(hashcandidate!=NULL)
    116         {
    117           if(m_intToString.elementAt(hashcandidate).equals(s))
    118             return hashcandidate;
    119 
    120           hashlast=hashcandidate;
    121           hashcandidate=m_hashChain.elementAt(hashcandidate);
    122         }
    123 
    124       // New value. Add to tables.
    125       int newIndex=m_intToString.size();
    126       m_intToString.addElement(s);
    127 
    128       m_hashChain.addElement(NULL);	// Initialize to no-following-same-hash
    129       if(hashlast==NULL)  // First for this hash
    130         m_hashStart[hashslot]=newIndex;
    131       else // Link from previous with same hash
    132         m_hashChain.setElementAt(newIndex,hashlast);
    133 
    134       return newIndex;
    135     }
    136 
    137   /** Command-line unit test driver. This test relies on the fact that
    138    * this version of the pool assigns indices consecutively, starting
    139    * from zero, as new unique strings are encountered.
    140    */
    141   public static void main(String[] args)
    142   {
    143     String[] word={
    144       "Zero","One","Two","Three","Four","Five",
    145       "Six","Seven","Eight","Nine","Ten",
    146       "Eleven","Twelve","Thirteen","Fourteen","Fifteen",
    147       "Sixteen","Seventeen","Eighteen","Nineteen","Twenty",
    148       "Twenty-One","Twenty-Two","Twenty-Three","Twenty-Four",
    149       "Twenty-Five","Twenty-Six","Twenty-Seven","Twenty-Eight",
    150       "Twenty-Nine","Thirty","Thirty-One","Thirty-Two",
    151       "Thirty-Three","Thirty-Four","Thirty-Five","Thirty-Six",
    152       "Thirty-Seven","Thirty-Eight","Thirty-Nine"};
    153 
    154     DTMStringPool pool=new DTMStringPool();
    155 
    156     System.out.println("If no complaints are printed below, we passed initial test.");
    157 
    158     for(int pass=0;pass<=1;++pass)
    159       {
    160         int i;
    161 
    162         for(i=0;i<word.length;++i)
    163           {
    164             int j=pool.stringToIndex(word[i]);
    165             if(j!=i)
    166               System.out.println("\tMismatch populating pool: assigned "+
    167                                  j+" for create "+i);
    168           }
    169 
    170         for(i=0;i<word.length;++i)
    171           {
    172             int j=pool.stringToIndex(word[i]);
    173             if(j!=i)
    174               System.out.println("\tMismatch in stringToIndex: returned "+
    175                                  j+" for lookup "+i);
    176           }
    177 
    178         for(i=0;i<word.length;++i)
    179           {
    180             String w=pool.indexToString(i);
    181             if(!word[i].equals(w))
    182               System.out.println("\tMismatch in indexToString: returned"+
    183                                  w+" for lookup "+i);
    184           }
    185 
    186         pool.removeAllElements();
    187 
    188         System.out.println("\nPass "+pass+" complete\n");
    189       } // end pass loop
    190   }
    191 }
    192