1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1999-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************/ 8 9 10 /*------------------------------------------------------------------------------ 11 * 12 * UCommonData An abstract interface for dealing with ICU Common Data Files. 13 * ICU Common Data Files are a grouping of a number of individual 14 * data items (resources, converters, tables, anything) into a 15 * single file or dll. The combined format includes a table of 16 * contents for locating the individual items by name. 17 * 18 * Two formats for the table of contents are supported, which is 19 * why there is an abstract inteface involved. 20 * 21 */ 22 23 #include "unicode/utypes.h" 24 #include "unicode/udata.h" 25 #include "cstring.h" 26 #include "ucmndata.h" 27 #include "udatamem.h" 28 29 #if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP) 30 # include <stdio.h> 31 #endif 32 33 U_CFUNC uint16_t 34 udata_getHeaderSize(const DataHeader *udh) { 35 if(udh==NULL) { 36 return 0; 37 } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) { 38 /* same endianness */ 39 return udh->dataHeader.headerSize; 40 } else { 41 /* opposite endianness */ 42 uint16_t x=udh->dataHeader.headerSize; 43 return (uint16_t)((x<<8)|(x>>8)); 44 } 45 } 46 47 U_CFUNC uint16_t 48 udata_getInfoSize(const UDataInfo *info) { 49 if(info==NULL) { 50 return 0; 51 } else if(info->isBigEndian==U_IS_BIG_ENDIAN) { 52 /* same endianness */ 53 return info->size; 54 } else { 55 /* opposite endianness */ 56 uint16_t x=info->size; 57 return (uint16_t)((x<<8)|(x>>8)); 58 } 59 } 60 61 /*-----------------------------------------------------------------------------* 62 * * 63 * Pointer TOCs. TODO: This form of table-of-contents should be removed * 64 * because DLLs must be relocated on loading to correct the * 65 * pointer values and this operation makes shared memory * 66 * mapping of the data much less likely to work. * 67 * * 68 *-----------------------------------------------------------------------------*/ 69 typedef struct { 70 const char *entryName; 71 const DataHeader *pHeader; 72 } PointerTOCEntry; 73 74 75 typedef struct { 76 uint32_t count; 77 uint32_t reserved; 78 PointerTOCEntry entry[2]; /* Actual size is from count. */ 79 } PointerTOC; 80 81 82 /* definition of OffsetTOC struct types moved to ucmndata.h */ 83 84 /*-----------------------------------------------------------------------------* 85 * * 86 * entry point lookup implementations * 87 * * 88 *-----------------------------------------------------------------------------*/ 89 static uint32_t offsetTOCEntryCount(const UDataMemory *pData) { 90 int32_t retVal=0; 91 const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc; 92 if (toc != NULL) { 93 retVal = toc->count; 94 } 95 return retVal; 96 } 97 98 99 static const DataHeader * 100 offsetTOCLookupFn(const UDataMemory *pData, 101 const char *tocEntryName, 102 int32_t *pLength, 103 UErrorCode *pErrorCode) { 104 const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc; 105 if(toc!=NULL) { 106 const char *base=(const char *)toc; 107 uint32_t start, limit, number, lastNumber; 108 int32_t strResult; 109 const UDataOffsetTOCEntry *entry; 110 111 /* perform a binary search for the data in the common data's table of contents */ 112 #if defined (UDATA_DEBUG_DUMP) 113 /* list the contents of the TOC each time .. not recommended */ 114 for(start=0;start<toc->count;start++) { 115 fprintf(stderr, "\tx%d: %s\n", start, &base[toc->entry[start].nameOffset]); 116 } 117 #endif 118 119 start=0; 120 limit=toc->count; /* number of names in this table of contents */ 121 lastNumber=limit; 122 entry=toc->entry; 123 for (;;) { 124 number = (start+limit)/2; 125 if (lastNumber == number) { /* Have we moved? */ 126 break; /* We haven't moved, and it wasn't found; */ 127 /* or the empty stub common data library was used during build. */ 128 } 129 lastNumber = number; 130 strResult = uprv_strcmp(tocEntryName, base+entry[number].nameOffset); 131 if(strResult<0) { 132 limit=number; 133 } else if (strResult>0) { 134 start=number; 135 } 136 else { 137 /* found it */ 138 #ifdef UDATA_DEBUG 139 fprintf(stderr, "%s: Found.\n", tocEntryName); 140 #endif 141 entry += number; /* Alias the entry to the current entry. */ 142 if((number+1) < toc->count) { 143 *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset); 144 } else { 145 *pLength = -1; 146 } 147 return (const DataHeader *)(base+entry->dataOffset); 148 } 149 } 150 #ifdef UDATA_DEBUG 151 fprintf(stderr, "%s: Not found.\n", tocEntryName); 152 #endif 153 return NULL; 154 } else { 155 #ifdef UDATA_DEBUG 156 fprintf(stderr, "returning header\n"); 157 #endif 158 159 return pData->pHeader; 160 } 161 } 162 163 164 static uint32_t pointerTOCEntryCount(const UDataMemory *pData) { 165 const PointerTOC *toc = (PointerTOC *)pData->toc; 166 return (uint32_t)((toc != NULL) ? (toc->count) : 0); 167 } 168 169 170 static const DataHeader *pointerTOCLookupFn(const UDataMemory *pData, 171 const char *name, 172 int32_t *pLength, 173 UErrorCode *pErrorCode) { 174 if(pData->toc!=NULL) { 175 const PointerTOC *toc = (PointerTOC *)pData->toc; 176 uint32_t start, limit, number, lastNumber; 177 int32_t strResult; 178 179 #if defined (UDATA_DEBUG_DUMP) 180 /* list the contents of the TOC each time .. not recommended */ 181 for(start=0;start<toc->count;start++) { 182 fprintf(stderr, "\tx%d: %s\n", start, toc->entry[start].entryName); 183 } 184 #endif 185 186 /* perform a binary search for the data in the common data's table of contents */ 187 start=0; 188 limit=toc->count; 189 lastNumber=limit; 190 191 for (;;) { 192 number = (start+limit)/2; 193 if (lastNumber == number) { /* Have we moved? */ 194 break; /* We haven't moved, and it wasn't found, */ 195 /* or the empty stub common data library was used during build. */ 196 } 197 lastNumber = number; 198 strResult = uprv_strcmp(name, toc->entry[number].entryName); 199 if(strResult<0) { 200 limit=number; 201 } else if (strResult>0) { 202 start=number; 203 } 204 else { 205 /* found it */ 206 #ifdef UDATA_DEBUG 207 fprintf(stderr, "%s: Found.\n", toc->entry[number].entryName); 208 #endif 209 *pLength=-1; 210 return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader); 211 } 212 } 213 #ifdef UDATA_DEBUG 214 fprintf(stderr, "%s: Not found.\n", name); 215 #endif 216 return NULL; 217 } else { 218 return pData->pHeader; 219 } 220 } 221 222 static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn, offsetTOCEntryCount}; 223 static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount}; 224 225 226 227 /*----------------------------------------------------------------------* 228 * * 229 * checkCommonData Validate the format of a common data file. * 230 * Fill in the virtual function ptr based on TOC type * 231 * If the data is invalid, close the UDataMemory * 232 * and set the appropriate error code. * 233 * * 234 *----------------------------------------------------------------------*/ 235 U_CFUNC void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) { 236 if (U_FAILURE(*err)) { 237 return; 238 } 239 240 if(!(udm->pHeader->dataHeader.magic1==0xda && 241 udm->pHeader->dataHeader.magic2==0x27 && 242 udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN && 243 udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY) 244 ) { 245 /* header not valid */ 246 *err=U_INVALID_FORMAT_ERROR; 247 } 248 else if (udm->pHeader->info.dataFormat[0]==0x43 && 249 udm->pHeader->info.dataFormat[1]==0x6d && 250 udm->pHeader->info.dataFormat[2]==0x6e && 251 udm->pHeader->info.dataFormat[3]==0x44 && 252 udm->pHeader->info.formatVersion[0]==1 253 ) { 254 /* dataFormat="CmnD" */ 255 udm->vFuncs = &CmnDFuncs; 256 udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); 257 } 258 else if(udm->pHeader->info.dataFormat[0]==0x54 && 259 udm->pHeader->info.dataFormat[1]==0x6f && 260 udm->pHeader->info.dataFormat[2]==0x43 && 261 udm->pHeader->info.dataFormat[3]==0x50 && 262 udm->pHeader->info.formatVersion[0]==1 263 ) { 264 /* dataFormat="ToCP" */ 265 udm->vFuncs = &ToCPFuncs; 266 udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); 267 } 268 else { 269 /* dataFormat not recognized */ 270 *err=U_INVALID_FORMAT_ERROR; 271 } 272 273 if (U_FAILURE(*err)) { 274 /* If the data is no good and we memory-mapped it ourselves, 275 * close the memory mapping so it doesn't leak. Note that this has 276 * no effect on non-memory mapped data, other than clearing fields in udm. 277 */ 278 udata_close(udm); 279 } 280 } 281 282 /* 283 * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package 284 * header but not its sub-items. 285 * This function will be needed for automatic runtime swapping. 286 * Sub-items should not be swapped to limit the swapping to the parts of the 287 * package that are actually used. 288 * 289 * Since lengths of items are implicit in the order and offsets of their 290 * ToC entries, and since offsets are relative to the start of the ToC, 291 * a swapped version may need to generate a different data structure 292 * with pointers to the original data items and with their lengths 293 * (-1 for the last one if it is not known), and maybe even pointers to the 294 * swapped versions of the items. 295 * These pointers to swapped versions would establish a cache; 296 * instead, each open data item could simply own the storage for its swapped 297 * data. This fits better with the current design. 298 * 299 * markus 2003sep18 Jitterbug 2235 300 */ 301