Home | History | Annotate | Download | only in vq
      1 /********************************************************************
      2  *                                                                  *
      3  * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
      4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
      5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
      6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
      7  *                                                                  *
      8  * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001             *
      9  * by the Xiph.Org Foundation http://www.xiph.org/                  *
     10  *                                                                  *
     11  ********************************************************************
     12 
     13  function: utility for finding the distribution in a data set
     14  last mod: $Id: distribution.c 16037 2009-05-26 21:10:58Z xiphmont $
     15 
     16  ********************************************************************/
     17 
     18 #include <stdlib.h>
     19 #include <stdio.h>
     20 #include <math.h>
     21 #include <string.h>
     22 #include <errno.h>
     23 #include "bookutil.h"
     24 
     25 /* command line:
     26    distribution file.vqd
     27 */
     28 
     29 int ascend(const void *a,const void *b){
     30   return(**((long **)a)-**((long **)b));
     31 }
     32 
     33 int main(int argc,char *argv[]){
     34   FILE *in;
     35   long lines=0;
     36   float min;
     37   float max;
     38   long bins=-1;
     39   int flag=0;
     40   long *countarray;
     41   long total=0;
     42   char *line;
     43 
     44   if(argv[1]==NULL){
     45     fprintf(stderr,"Usage: distribution {data.vqd [bins]| book.vqh} \n\n");
     46     exit(1);
     47   }
     48   if(argv[2]!=NULL)
     49     bins=atoi(argv[2])-1;
     50 
     51   in=fopen(argv[1],"r");
     52   if(!in){
     53     fprintf(stderr,"Could not open input file %s\n",argv[1]);
     54     exit(1);
     55   }
     56 
     57   if(strrchr(argv[1],'.') && strcmp(strrchr(argv[1],'.'),".vqh")==0){
     58     /* load/decode a book */
     59 
     60     codebook *b=codebook_load(argv[1]);
     61     static_codebook *c=(static_codebook *)(b->c);
     62     float delta;
     63     int i;
     64     fclose(in);
     65 
     66     switch(c->maptype){
     67     case 0:
     68       printf("entropy codebook only; no mappings\n");
     69       exit(0);
     70       break;
     71     case 1:
     72       bins=_book_maptype1_quantvals(c);
     73       break;
     74     case 2:
     75       bins=c->entries*c->dim;
     76       break;
     77     }
     78 
     79     max=min=_float32_unpack(c->q_min);
     80     delta=_float32_unpack(c->q_delta);
     81 
     82     for(i=0;i<bins;i++){
     83       float val=c->quantlist[i]*delta+min;
     84       if(val>max)max=val;
     85     }
     86 
     87     printf("Minimum scalar value: %f\n",min);
     88     printf("Maximum scalar value: %f\n",max);
     89 
     90     switch(c->maptype){
     91     case 1:
     92       {
     93         /* lattice codebook.  dump it. */
     94         int j,k;
     95         long maxcount=0;
     96         long **sort=calloc(bins,sizeof(long *));
     97         long base=c->lengthlist[0];
     98         countarray=calloc(bins,sizeof(long));
     99 
    100         for(i=0;i<bins;i++)sort[i]=c->quantlist+i;
    101         qsort(sort,bins,sizeof(long *),ascend);
    102 
    103         for(i=0;i<b->entries;i++)
    104           if(c->lengthlist[i]>base)base=c->lengthlist[i];
    105 
    106         /* dump a full, correlated count */
    107         for(j=0;j<b->entries;j++){
    108           if(c->lengthlist[j]){
    109             int indexdiv=1;
    110             printf("%4d: ",j);
    111             for(k=0;k<b->dim;k++){
    112               int index= (j/indexdiv)%bins;
    113               printf("%+3.1f,", c->quantlist[index]*_float32_unpack(c->q_delta)+
    114                      _float32_unpack(c->q_min));
    115               indexdiv*=bins;
    116             }
    117             printf("\t|");
    118             for(k=0;k<base-c->lengthlist[j];k++)printf("*");
    119             printf("\n");
    120           }
    121         }
    122 
    123         /* do a rough count */
    124         for(j=0;j<b->entries;j++){
    125           int indexdiv=1;
    126           for(k=0;k<b->dim;k++){
    127             if(c->lengthlist[j]){
    128               int index= (j/indexdiv)%bins;
    129               countarray[index]+=(1<<(base-c->lengthlist[j]));
    130               indexdiv*=bins;
    131             }
    132           }
    133         }
    134 
    135         /* dump the count */
    136 
    137         {
    138           long maxcount=0,i,j;
    139           for(i=0;i<bins;i++)
    140             if(countarray[i]>maxcount)maxcount=countarray[i];
    141 
    142           for(i=0;i<bins;i++){
    143             int ptr=sort[i]-c->quantlist;
    144             int stars=rint(50./maxcount*countarray[ptr]);
    145             printf("%+08f (%8ld) |",c->quantlist[ptr]*delta+min,countarray[ptr]);
    146             for(j=0;j<stars;j++)printf("*");
    147             printf("\n");
    148           }
    149         }
    150       }
    151       break;
    152     case 2:
    153       {
    154         /* trained, full mapping codebook. */
    155         printf("Can't do probability dump of a trained [type 2] codebook (yet)\n");
    156       }
    157       break;
    158     }
    159   }else{
    160     /* load/count a data file */
    161 
    162     /* do it the simple way; two pass. */
    163     line=setup_line(in);
    164     while(line){
    165       float code;
    166       char buf[80];
    167       lines++;
    168 
    169       sprintf(buf,"getting min/max (%.2f::%.2f). lines...",min,max);
    170       if(!(lines&0xff))spinnit(buf,lines);
    171 
    172       while(!flag && sscanf(line,"%f",&code)==1){
    173         line=strchr(line,',');
    174         min=max=code;
    175         flag=1;
    176       }
    177 
    178       while(line && sscanf(line,"%f",&code)==1){
    179         line=strchr(line,',');
    180         if(line)line++;
    181         if(code<min)min=code;
    182         if(code>max)max=code;
    183       }
    184 
    185       line=setup_line(in);
    186     }
    187 
    188     if(bins<1){
    189       if((int)(max-min)==min-max){
    190         bins=max-min;
    191       }else{
    192         bins=25;
    193       }
    194     }
    195 
    196     printf("\r                                                     \r");
    197     printf("Minimum scalar value: %f\n",min);
    198     printf("Maximum scalar value: %f\n",max);
    199 
    200     if(argv[2]){
    201 
    202       printf("\n counting hits into %ld bins...\n",bins+1);
    203       countarray=calloc(bins+1,sizeof(long));
    204 
    205       rewind(in);
    206       line=setup_line(in);
    207       while(line){
    208         float code;
    209         lines--;
    210         if(!(lines&0xff))spinnit("counting distribution. lines so far...",lines);
    211 
    212         while(line && sscanf(line,"%f",&code)==1){
    213           line=strchr(line,',');
    214           if(line)line++;
    215 
    216           code-=min;
    217           code/=(max-min);
    218           code*=bins;
    219           countarray[(int)rint(code)]++;
    220           total++;
    221         }
    222 
    223         line=setup_line(in);
    224       }
    225 
    226       /* make a pretty graph */
    227       {
    228         long maxcount=0,i,j;
    229         for(i=0;i<bins+1;i++)
    230           if(countarray[i]>maxcount)maxcount=countarray[i];
    231 
    232         printf("\r                                                     \r");
    233         printf("Total scalars: %ld\n",total);
    234         for(i=0;i<bins+1;i++){
    235           int stars=rint(50./maxcount*countarray[i]);
    236           printf("%08f (%8ld) |",(max-min)/bins*i+min,countarray[i]);
    237           for(j=0;j<stars;j++)printf("*");
    238           printf("\n");
    239         }
    240       }
    241     }
    242 
    243     fclose(in);
    244 
    245   }
    246   printf("\nDone.\n");
    247   exit(0);
    248 }
    249