Home | History | Annotate | Download | only in toolutil
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2010, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  denseranges.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2010sep25
     12 *   created by: Markus W. Scherer
     13 *
     14 * Helper code for finding a small number of dense ranges.
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 #include "denseranges.h"
     19 
     20 // Definitions in the anonymous namespace are invisible outside this file.
     21 namespace {
     22 
     23 /**
     24  * Collect up to 15 range gaps and sort them by ascending gap size.
     25  */
     26 class LargestGaps {
     27 public:
     28     LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {}
     29 
     30     void add(int32_t gapStart, int64_t gapLength) {
     31         int32_t i=length;
     32         while(i>0 && gapLength>gapLengths[i-1]) {
     33             --i;
     34         }
     35         if(i<maxLength) {
     36             // The new gap is now one of the maxLength largest.
     37             // Insert the new gap, moving up smaller ones of the previous
     38             // length largest.
     39             int32_t j= length<maxLength ? length++ : maxLength-1;
     40             while(j>i) {
     41                 gapStarts[j]=gapStarts[j-1];
     42                 gapLengths[j]=gapLengths[j-1];
     43                 --j;
     44             }
     45             gapStarts[i]=gapStart;
     46             gapLengths[i]=gapLength;
     47         }
     48     }
     49 
     50     void truncate(int32_t newLength) {
     51         if(newLength<length) {
     52             length=newLength;
     53         }
     54     }
     55 
     56     int32_t count() const { return length; }
     57     int32_t gapStart(int32_t i) const { return gapStarts[i]; }
     58     int64_t gapLength(int32_t i) const { return gapLengths[i]; }
     59 
     60     int32_t firstAfter(int32_t value) const {
     61         if(length==0) {
     62             return -1;
     63         }
     64         int32_t minValue=0;
     65         int32_t minIndex=-1;
     66         for(int32_t i=0; i<length; ++i) {
     67             if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) {
     68                 minValue=gapStarts[i];
     69                 minIndex=i;
     70             }
     71         }
     72         return minIndex;
     73     }
     74 
     75 private:
     76     static const int32_t kCapacity=15;
     77 
     78     int32_t maxLength;
     79     int32_t length;
     80     int32_t gapStarts[kCapacity];
     81     int64_t gapLengths[kCapacity];
     82 };
     83 
     84 }  // namespace
     85 
     86 /**
     87  * Does it make sense to write 1..capacity ranges?
     88  * Returns 0 if not, otherwise the number of ranges.
     89  * @param values Sorted array of signed-integer values.
     90  * @param length Number of values.
     91  * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
     92  *                Should be 0x80..0x100, must be 1..0x100.
     93  * @param ranges Output ranges array.
     94  * @param capacity Maximum number of ranges.
     95  * @return Minimum number of ranges (at most capacity) that have the desired density,
     96  *         or 0 if that density cannot be achieved.
     97  */
     98 U_CAPI int32_t U_EXPORT2
     99 uprv_makeDenseRanges(const int32_t values[], int32_t length,
    100                      int32_t density,
    101                      int32_t ranges[][2], int32_t capacity) {
    102     if(length<=2) {
    103         return 0;
    104     }
    105     int32_t minValue=values[0];
    106     int32_t maxValue=values[length-1];  // Assume minValue<=maxValue.
    107     // Use int64_t variables for intermediate-value precision and to avoid
    108     // signed-int32_t overflow of maxValue-minValue.
    109     int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
    110     if(length>=(density*maxLength)/0x100) {
    111         // Use one range.
    112         ranges[0][0]=minValue;
    113         ranges[0][1]=maxValue;
    114         return 1;
    115     }
    116     if(length<=4) {
    117         return 0;
    118     }
    119     // See if we can split [minValue, maxValue] into 2..capacity ranges,
    120     // divided by the 1..(capacity-1) largest gaps.
    121     LargestGaps gaps(capacity-1);
    122     int32_t i;
    123     int32_t expectedValue=minValue;
    124     for(i=1; i<length; ++i) {
    125         ++expectedValue;
    126         int32_t actualValue=values[i];
    127         if(expectedValue!=actualValue) {
    128             gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
    129             expectedValue=actualValue;
    130         }
    131     }
    132     // We know gaps.count()>=1 because we have fewer values (length) than
    133     // the length of the [minValue..maxValue] range (maxLength).
    134     // (Otherwise we would have returned with the one range above.)
    135     int32_t num;
    136     for(i=0, num=2;; ++i, ++num) {
    137         if(i>=gaps.count()) {
    138             // The values are too sparse for capacity or fewer ranges
    139             // of the requested density.
    140             return 0;
    141         }
    142         maxLength-=gaps.gapLength(i);
    143         if(length>num*2 && length>=(density*maxLength)/0x100) {
    144             break;
    145         }
    146     }
    147     // Use the num ranges with the num-1 largest gaps.
    148     gaps.truncate(num-1);
    149     ranges[0][0]=minValue;
    150     for(i=0; i<=num-2; ++i) {
    151         int32_t gapIndex=gaps.firstAfter(minValue);
    152         int32_t gapStart=gaps.gapStart(gapIndex);
    153         ranges[i][1]=gapStart-1;
    154         ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
    155     }
    156     ranges[num-1][1]=maxValue;
    157     return num;
    158 }
    159