1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 /* 19 * $Id: DTMStringPool.java 468653 2006-10-28 07:07:05Z minchau $ 20 */ 21 22 package org.apache.xml.dtm.ref; 23 24 import java.util.Vector; 25 26 import org.apache.xml.utils.IntVector; 27 28 /** <p>DTMStringPool is an "interning" mechanism for strings. It will 29 * create a stable 1:1 mapping between a set of string values and a set of 30 * integer index values, so the integers can be used to reliably and 31 * uniquely identify (and when necessary retrieve) the strings.</p> 32 * 33 * <p>Design Priorities: 34 * <ul> 35 * <li>String-to-index lookup speed is critical.</li> 36 * <li>Index-to-String lookup speed is slightly less so.</li> 37 * <li>Threadsafety is not guaranteed at this level. 38 * Enforce that in the application if needed.</li> 39 * <li>Storage efficiency is an issue but not a huge one. 40 * It is expected that string pools won't exceed about 2000 entries.</li> 41 * </ul> 42 * </p> 43 * 44 * <p>Implementation detail: A standard Hashtable is relatively 45 * inefficient when looking up primitive int values, especially when 46 * we're already maintaining an int-to-string vector. So I'm 47 * maintaining a simple hash chain within this class.</p> 48 * 49 * <p>NOTE: There is nothing in the code that has a real dependency upon 50 * String. It would work with any object type that implements reliable 51 * .hashCode() and .equals() operations. The API enforces Strings because 52 * it's safer that way, but this could trivially be turned into a general 53 * ObjectPool if one was needed.</p> 54 * 55 * <p>Status: Passed basic test in main().</p> 56 * */ 57 public class DTMStringPool 58 { 59 Vector m_intToString; 60 static final int HASHPRIME=101; 61 int[] m_hashStart=new int[HASHPRIME]; 62 IntVector m_hashChain; 63 public static final int NULL=-1; 64 65 /** 66 * Create a DTMStringPool using the given chain size 67 * 68 * @param chainSize The size of the hash chain vector 69 */ 70 public DTMStringPool(int chainSize) 71 { 72 m_intToString=new Vector(); 73 m_hashChain=new IntVector(chainSize); 74 removeAllElements(); 75 76 // -sb Add this to force empty strings to be index 0. 77 stringToIndex(""); 78 } 79 80 public DTMStringPool() 81 { 82 this(512); 83 } 84 85 public void removeAllElements() 86 { 87 m_intToString.removeAllElements(); 88 for(int i=0;i<HASHPRIME;++i) 89 m_hashStart[i]=NULL; 90 m_hashChain.removeAllElements(); 91 } 92 93 /** @return string whose value is uniquely identified by this integer index. 94 * @throws java.lang.ArrayIndexOutOfBoundsException 95 * if index doesn't map to a string. 96 * */ 97 public String indexToString(int i) 98 throws java.lang.ArrayIndexOutOfBoundsException 99 { 100 if(i==NULL) return null; 101 return (String) m_intToString.elementAt(i); 102 } 103 104 /** @return integer index uniquely identifying the value of this string. */ 105 public int stringToIndex(String s) 106 { 107 if(s==null) return NULL; 108 109 int hashslot=s.hashCode()%HASHPRIME; 110 if(hashslot<0) hashslot=-hashslot; 111 112 // Is it one we already know? 113 int hashlast=m_hashStart[hashslot]; 114 int hashcandidate=hashlast; 115 while(hashcandidate!=NULL) 116 { 117 if(m_intToString.elementAt(hashcandidate).equals(s)) 118 return hashcandidate; 119 120 hashlast=hashcandidate; 121 hashcandidate=m_hashChain.elementAt(hashcandidate); 122 } 123 124 // New value. Add to tables. 125 int newIndex=m_intToString.size(); 126 m_intToString.addElement(s); 127 128 m_hashChain.addElement(NULL); // Initialize to no-following-same-hash 129 if(hashlast==NULL) // First for this hash 130 m_hashStart[hashslot]=newIndex; 131 else // Link from previous with same hash 132 m_hashChain.setElementAt(newIndex,hashlast); 133 134 return newIndex; 135 } 136 137 /** Command-line unit test driver. This test relies on the fact that 138 * this version of the pool assigns indices consecutively, starting 139 * from zero, as new unique strings are encountered. 140 */ 141 public static void main(String[] args) 142 { 143 String[] word={ 144 "Zero","One","Two","Three","Four","Five", 145 "Six","Seven","Eight","Nine","Ten", 146 "Eleven","Twelve","Thirteen","Fourteen","Fifteen", 147 "Sixteen","Seventeen","Eighteen","Nineteen","Twenty", 148 "Twenty-One","Twenty-Two","Twenty-Three","Twenty-Four", 149 "Twenty-Five","Twenty-Six","Twenty-Seven","Twenty-Eight", 150 "Twenty-Nine","Thirty","Thirty-One","Thirty-Two", 151 "Thirty-Three","Thirty-Four","Thirty-Five","Thirty-Six", 152 "Thirty-Seven","Thirty-Eight","Thirty-Nine"}; 153 154 DTMStringPool pool=new DTMStringPool(); 155 156 System.out.println("If no complaints are printed below, we passed initial test."); 157 158 for(int pass=0;pass<=1;++pass) 159 { 160 int i; 161 162 for(i=0;i<word.length;++i) 163 { 164 int j=pool.stringToIndex(word[i]); 165 if(j!=i) 166 System.out.println("\tMismatch populating pool: assigned "+ 167 j+" for create "+i); 168 } 169 170 for(i=0;i<word.length;++i) 171 { 172 int j=pool.stringToIndex(word[i]); 173 if(j!=i) 174 System.out.println("\tMismatch in stringToIndex: returned "+ 175 j+" for lookup "+i); 176 } 177 178 for(i=0;i<word.length;++i) 179 { 180 String w=pool.indexToString(i); 181 if(!word[i].equals(w)) 182 System.out.println("\tMismatch in indexToString: returned"+ 183 w+" for lookup "+i); 184 } 185 186 pool.removeAllElements(); 187 188 System.out.println("\nPass "+pass+" complete\n"); 189 } // end pass loop 190 } 191 } 192