Home | History | Annotate | Download | only in toolutil
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2010, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *   file name:  denseranges.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2010sep25
     14 *   created by: Markus W. Scherer
     15 *
     16 * Helper code for finding a small number of dense ranges.
     17 */
     18 
     19 #include "unicode/utypes.h"
     20 #include "denseranges.h"
     21 
     22 // Definitions in the anonymous namespace are invisible outside this file.
     23 namespace {
     24 
     25 /**
     26  * Collect up to 15 range gaps and sort them by ascending gap size.
     27  */
     28 class LargestGaps {
     29 public:
     30     LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {}
     31 
     32     void add(int32_t gapStart, int64_t gapLength) {
     33         int32_t i=length;
     34         while(i>0 && gapLength>gapLengths[i-1]) {
     35             --i;
     36         }
     37         if(i<maxLength) {
     38             // The new gap is now one of the maxLength largest.
     39             // Insert the new gap, moving up smaller ones of the previous
     40             // length largest.
     41             int32_t j= length<maxLength ? length++ : maxLength-1;
     42             while(j>i) {
     43                 gapStarts[j]=gapStarts[j-1];
     44                 gapLengths[j]=gapLengths[j-1];
     45                 --j;
     46             }
     47             gapStarts[i]=gapStart;
     48             gapLengths[i]=gapLength;
     49         }
     50     }
     51 
     52     void truncate(int32_t newLength) {
     53         if(newLength<length) {
     54             length=newLength;
     55         }
     56     }
     57 
     58     int32_t count() const { return length; }
     59     int32_t gapStart(int32_t i) const { return gapStarts[i]; }
     60     int64_t gapLength(int32_t i) const { return gapLengths[i]; }
     61 
     62     int32_t firstAfter(int32_t value) const {
     63         if(length==0) {
     64             return -1;
     65         }
     66         int32_t minValue=0;
     67         int32_t minIndex=-1;
     68         for(int32_t i=0; i<length; ++i) {
     69             if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) {
     70                 minValue=gapStarts[i];
     71                 minIndex=i;
     72             }
     73         }
     74         return minIndex;
     75     }
     76 
     77 private:
     78     static const int32_t kCapacity=15;
     79 
     80     int32_t maxLength;
     81     int32_t length;
     82     int32_t gapStarts[kCapacity];
     83     int64_t gapLengths[kCapacity];
     84 };
     85 
     86 }  // namespace
     87 
     88 /**
     89  * Does it make sense to write 1..capacity ranges?
     90  * Returns 0 if not, otherwise the number of ranges.
     91  * @param values Sorted array of signed-integer values.
     92  * @param length Number of values.
     93  * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
     94  *                Should be 0x80..0x100, must be 1..0x100.
     95  * @param ranges Output ranges array.
     96  * @param capacity Maximum number of ranges.
     97  * @return Minimum number of ranges (at most capacity) that have the desired density,
     98  *         or 0 if that density cannot be achieved.
     99  */
    100 U_CAPI int32_t U_EXPORT2
    101 uprv_makeDenseRanges(const int32_t values[], int32_t length,
    102                      int32_t density,
    103                      int32_t ranges[][2], int32_t capacity) {
    104     if(length<=2) {
    105         return 0;
    106     }
    107     int32_t minValue=values[0];
    108     int32_t maxValue=values[length-1];  // Assume minValue<=maxValue.
    109     // Use int64_t variables for intermediate-value precision and to avoid
    110     // signed-int32_t overflow of maxValue-minValue.
    111     int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
    112     if(length>=(density*maxLength)/0x100) {
    113         // Use one range.
    114         ranges[0][0]=minValue;
    115         ranges[0][1]=maxValue;
    116         return 1;
    117     }
    118     if(length<=4) {
    119         return 0;
    120     }
    121     // See if we can split [minValue, maxValue] into 2..capacity ranges,
    122     // divided by the 1..(capacity-1) largest gaps.
    123     LargestGaps gaps(capacity-1);
    124     int32_t i;
    125     int32_t expectedValue=minValue;
    126     for(i=1; i<length; ++i) {
    127         ++expectedValue;
    128         int32_t actualValue=values[i];
    129         if(expectedValue!=actualValue) {
    130             gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
    131             expectedValue=actualValue;
    132         }
    133     }
    134     // We know gaps.count()>=1 because we have fewer values (length) than
    135     // the length of the [minValue..maxValue] range (maxLength).
    136     // (Otherwise we would have returned with the one range above.)
    137     int32_t num;
    138     for(i=0, num=2;; ++i, ++num) {
    139         if(i>=gaps.count()) {
    140             // The values are too sparse for capacity or fewer ranges
    141             // of the requested density.
    142             return 0;
    143         }
    144         maxLength-=gaps.gapLength(i);
    145         if(length>num*2 && length>=(density*maxLength)/0x100) {
    146             break;
    147         }
    148     }
    149     // Use the num ranges with the num-1 largest gaps.
    150     gaps.truncate(num-1);
    151     ranges[0][0]=minValue;
    152     for(i=0; i<=num-2; ++i) {
    153         int32_t gapIndex=gaps.firstAfter(minValue);
    154         int32_t gapStart=gaps.gapStart(gapIndex);
    155         ranges[i][1]=gapStart-1;
    156         ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
    157     }
    158     ranges[num-1][1]=maxValue;
    159     return num;
    160 }
    161