Home | History | Annotate | Download | only in research
      1 /* Copyright 2016 Google Inc. All Rights Reserved.
      2    Author: zip753 (at) gmail.com (Ivan Nikulin)
      3 
      4    Distributed under MIT license.
      5    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      6 */
      7 
      8 /* Backward reference visualization tool. Accepts file with backward references
      9    as an input and produces PGM image with histogram of those references. */
     10 
     11 #include <algorithm> /* min */
     12 #include <cassert>
     13 #include <cstring> /* memset */
     14 #include <cmath> /* log, round */
     15 #include <cstdio> /* fscanf, fprintf */
     16 #include <cstdint>
     17 
     18 #include <gflags/gflags.h>
     19 using gflags::ParseCommandLineFlags;
     20 
     21 #include "./read_dist.h"
     22 
     23 DEFINE_int32(height, 1000, "Height of the resulting histogam.");
     24 DEFINE_int32(width, 8000, "Width of the resulting histogam.");
     25 DEFINE_int32(size, 1e8, "Size of the compressed file.");
     26 DEFINE_int32(brotli_window, -1, "Size of brotli window in bits.");
     27 DEFINE_uint64(min_distance, 0, "Minimum distance.");
     28 DEFINE_uint64(max_distance, 1 << 30, "Maximum distance.");
     29 DEFINE_bool(with_copies, false, "True if input contains copy length.");
     30 DEFINE_bool(simple, false, "True if using only black and white pixels.");
     31 DEFINE_bool(linear, false, "True if using linear distance mapping.");
     32 DEFINE_uint64(skip, 0, "Number of bytes to skip.");
     33 
     34 inline double DistanceTransform(double x) {
     35   static bool linear = FLAGS_linear;
     36   if (linear) {
     37     return x;
     38   } else {
     39     /* Using log^2 scale because log scale produces big white gap at the bottom
     40        of image. */
     41     return log(x) * log(x);
     42   }
     43 }
     44 
     45 /* Mapping pixel density on arc function to increase contrast. */
     46 inline double DensityTransform(double x) {
     47   double z = 255 - x;
     48   return sqrt(255 * 255 - z * z);
     49 }
     50 
     51 inline int GetMaxDistance() {
     52   return FLAGS_max_distance;
     53 }
     54 
     55 void AdjustPosition(int* pos) {
     56   static uint32_t offset = 0;
     57   static int last = 0;
     58   static uint32_t window_size = (1 << FLAGS_brotli_window);
     59   assert(*pos >= 0 && *pos < window_size);
     60   if (*pos < last) {
     61     offset += window_size;
     62   }
     63   last = *pos;
     64   *pos += offset;
     65 }
     66 
     67 void BuildHistogram(FILE* fin, int** histo) {
     68   int height = FLAGS_height;
     69   int width = FLAGS_width;
     70   int skip = FLAGS_skip;
     71   size_t min_distance = FLAGS_min_distance;
     72 
     73   printf("height = %d, width = %d\n", height, width);
     74 
     75   for (int i = 0; i < height; i++) {
     76     for (int j = 0; j < width; j++) {
     77       histo[i][j] = 0;
     78     }
     79   }
     80 
     81   int max_pos = FLAGS_size - skip;
     82   double min_dist = min_distance > 0 ? DistanceTransform(min_distance) : 0;
     83   double max_dist = DistanceTransform(GetMaxDistance()) - min_dist;
     84   int copy, pos, distance, x, y;
     85   double dist;
     86   while (ReadBackwardReference(fin, &copy, &pos, &distance)) {
     87     if (pos == -1) continue;  // In case when only insert is present.
     88     if (distance < min_distance || distance >= GetMaxDistance()) continue;
     89     if (FLAGS_brotli_window != -1) {
     90       AdjustPosition(&pos);
     91     }
     92     if (pos >= skip && distance <= pos) {
     93       pos -= skip;
     94       if (pos >= max_pos) break;
     95       dist = DistanceTransform(static_cast<double>(distance)) - min_dist;
     96 
     97       x = std::min(static_cast<int>(round(dist / max_dist * height)),
     98                    height - 1);
     99       y = 1ul * pos * width / max_pos;
    100       if (!(y >= 0 && y < width)) {
    101         printf("pos = %d, max_pos = %d, y = %d\n", pos, max_pos, y);
    102         assert(y >= 0 && y < width);
    103       }
    104 
    105       if (FLAGS_with_copies) {
    106         int right = 1ul * (pos + copy - 1) * width / max_pos;
    107         if (right < 0) {
    108           printf("pos = %d, distance = %d, copy = %d, y = %d, right = %d\n",
    109                   pos, distance, copy, y, right);
    110           assert(right >= 0);
    111         }
    112         if (y == right) {
    113           histo[x][y] += copy;
    114         } else {
    115           int pos2 = static_cast<int>(ceil(1.0 * (y + 1) * max_pos / width));
    116           histo[x][y] += pos2 - pos;
    117           for (int i = y + 1; i < right && i < width; ++i) {
    118             histo[x][i] += max_pos / width;  // Sometimes 1 more, but who cares.
    119           }
    120           // Make sure the match doesn't go beyond the image.
    121           if (right < width) {
    122             pos2 = static_cast<int>(ceil(1.0 * right * max_pos / width));
    123             histo[x][right] += pos + copy - 1 - pos2 + 1;
    124           }
    125         }
    126       } else {
    127         histo[x][y]++;
    128       }
    129     }
    130   }
    131 }
    132 
    133 void ConvertToPixels(int** histo, uint8_t** pixel) {
    134   int height = FLAGS_height;
    135   int width = FLAGS_width;
    136 
    137   int maxs = 0;
    138   for (int i = 0; i < height; i++) {
    139     for (int j = 0; j < width; j++) {
    140       if (maxs < histo[i][j]) maxs = histo[i][j];
    141     }
    142   }
    143 
    144   bool simple = FLAGS_simple;
    145   double max_histo = static_cast<double>(maxs);
    146   for (int i = 0; i < height; i++) {
    147     for (int j = 0; j < width; j++) {
    148       if (simple) {
    149         pixel[i][j] = histo[i][j] > 0 ? 0 : 255;
    150       } else {
    151         pixel[i][j] = static_cast<uint8_t>(
    152             255 - DensityTransform(histo[i][j] / max_histo * 255));
    153       }
    154     }
    155   }
    156 }
    157 
    158 void DrawPixels(uint8_t** pixel, FILE* fout) {
    159   int height = FLAGS_height;
    160   int width = FLAGS_width;
    161 
    162   fprintf(fout, "P5\n%d %d\n255\n", width, height);
    163   for (int i = height - 1; i >= 0; i--) {
    164     fwrite(pixel[i], 1, width, fout);
    165   }
    166 }
    167 
    168 int main(int argc, char* argv[]) {
    169   ParseCommandLineFlags(&argc, &argv, true);
    170   if (argc != 3) {
    171     printf("usage: draw_histogram.cc data output_file\n");
    172     return 1;
    173   }
    174 
    175   int height = FLAGS_height;
    176   int width = FLAGS_width;
    177 
    178   FILE* fin = fopen(argv[1], "r");
    179   FILE* fout = fopen(argv[2], "wb");
    180 
    181   uint8_t** pixel = new uint8_t*[height];
    182   int** histo = new int*[height];
    183   for (int i = 0; i < height; i++) {
    184     pixel[i] = new uint8_t[width];
    185     histo[i] = new int[width];
    186   }
    187 
    188   BuildHistogram(fin, histo);
    189   fclose(fin);
    190 
    191   ConvertToPixels(histo, pixel);
    192 
    193   DrawPixels(pixel, fout);
    194   fclose(fout);
    195 
    196   return 0;
    197 }
    198