1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: denseranges.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2010sep25 12 * created by: Markus W. Scherer 13 * 14 * Helper code for finding a small number of dense ranges. 15 */ 16 17 #include "unicode/utypes.h" 18 #include "denseranges.h" 19 20 // Definitions in the anonymous namespace are invisible outside this file. 21 namespace { 22 23 /** 24 * Collect up to 15 range gaps and sort them by ascending gap size. 25 */ 26 class LargestGaps { 27 public: 28 LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {} 29 30 void add(int32_t gapStart, int64_t gapLength) { 31 int32_t i=length; 32 while(i>0 && gapLength>gapLengths[i-1]) { 33 --i; 34 } 35 if(i<maxLength) { 36 // The new gap is now one of the maxLength largest. 37 // Insert the new gap, moving up smaller ones of the previous 38 // length largest. 39 int32_t j= length<maxLength ? length++ : maxLength-1; 40 while(j>i) { 41 gapStarts[j]=gapStarts[j-1]; 42 gapLengths[j]=gapLengths[j-1]; 43 --j; 44 } 45 gapStarts[i]=gapStart; 46 gapLengths[i]=gapLength; 47 } 48 } 49 50 void truncate(int32_t newLength) { 51 if(newLength<length) { 52 length=newLength; 53 } 54 } 55 56 int32_t count() const { return length; } 57 int32_t gapStart(int32_t i) const { return gapStarts[i]; } 58 int64_t gapLength(int32_t i) const { return gapLengths[i]; } 59 60 int32_t firstAfter(int32_t value) const { 61 if(length==0) { 62 return -1; 63 } 64 int32_t minValue=0; 65 int32_t minIndex=-1; 66 for(int32_t i=0; i<length; ++i) { 67 if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) { 68 minValue=gapStarts[i]; 69 minIndex=i; 70 } 71 } 72 return minIndex; 73 } 74 75 private: 76 static const int32_t kCapacity=15; 77 78 int32_t maxLength; 79 int32_t length; 80 int32_t gapStarts[kCapacity]; 81 int64_t gapLengths[kCapacity]; 82 }; 83 84 } // namespace 85 86 /** 87 * Does it make sense to write 1..capacity ranges? 88 * Returns 0 if not, otherwise the number of ranges. 89 * @param values Sorted array of signed-integer values. 90 * @param length Number of values. 91 * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.) 92 * Should be 0x80..0x100, must be 1..0x100. 93 * @param ranges Output ranges array. 94 * @param capacity Maximum number of ranges. 95 * @return Minimum number of ranges (at most capacity) that have the desired density, 96 * or 0 if that density cannot be achieved. 97 */ 98 U_CAPI int32_t U_EXPORT2 99 uprv_makeDenseRanges(const int32_t values[], int32_t length, 100 int32_t density, 101 int32_t ranges[][2], int32_t capacity) { 102 if(length<=2) { 103 return 0; 104 } 105 int32_t minValue=values[0]; 106 int32_t maxValue=values[length-1]; // Assume minValue<=maxValue. 107 // Use int64_t variables for intermediate-value precision and to avoid 108 // signed-int32_t overflow of maxValue-minValue. 109 int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1; 110 if(length>=(density*maxLength)/0x100) { 111 // Use one range. 112 ranges[0][0]=minValue; 113 ranges[0][1]=maxValue; 114 return 1; 115 } 116 if(length<=4) { 117 return 0; 118 } 119 // See if we can split [minValue, maxValue] into 2..capacity ranges, 120 // divided by the 1..(capacity-1) largest gaps. 121 LargestGaps gaps(capacity-1); 122 int32_t i; 123 int32_t expectedValue=minValue; 124 for(i=1; i<length; ++i) { 125 ++expectedValue; 126 int32_t actualValue=values[i]; 127 if(expectedValue!=actualValue) { 128 gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue); 129 expectedValue=actualValue; 130 } 131 } 132 // We know gaps.count()>=1 because we have fewer values (length) than 133 // the length of the [minValue..maxValue] range (maxLength). 134 // (Otherwise we would have returned with the one range above.) 135 int32_t num; 136 for(i=0, num=2;; ++i, ++num) { 137 if(i>=gaps.count()) { 138 // The values are too sparse for capacity or fewer ranges 139 // of the requested density. 140 return 0; 141 } 142 maxLength-=gaps.gapLength(i); 143 if(length>num*2 && length>=(density*maxLength)/0x100) { 144 break; 145 } 146 } 147 // Use the num ranges with the num-1 largest gaps. 148 gaps.truncate(num-1); 149 ranges[0][0]=minValue; 150 for(i=0; i<=num-2; ++i) { 151 int32_t gapIndex=gaps.firstAfter(minValue); 152 int32_t gapStart=gaps.gapStart(gapIndex); 153 ranges[i][1]=gapStart-1; 154 ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex)); 155 } 156 ranges[num-1][1]=maxValue; 157 return num; 158 } 159