1 /******************************************************************** 2 * * 3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. * 4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * 5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * 6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * 7 * * 8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 * 9 * by the Xiph.Org Foundation http://www.xiph.org/ * 10 * * 11 ******************************************************************** 12 13 function: utility for finding the distribution in a data set 14 last mod: $Id: distribution.c 16037 2009-05-26 21:10:58Z xiphmont $ 15 16 ********************************************************************/ 17 18 #include <stdlib.h> 19 #include <stdio.h> 20 #include <math.h> 21 #include <string.h> 22 #include <errno.h> 23 #include "bookutil.h" 24 25 /* command line: 26 distribution file.vqd 27 */ 28 29 int ascend(const void *a,const void *b){ 30 return(**((long **)a)-**((long **)b)); 31 } 32 33 int main(int argc,char *argv[]){ 34 FILE *in; 35 long lines=0; 36 float min; 37 float max; 38 long bins=-1; 39 int flag=0; 40 long *countarray; 41 long total=0; 42 char *line; 43 44 if(argv[1]==NULL){ 45 fprintf(stderr,"Usage: distribution {data.vqd [bins]| book.vqh} \n\n"); 46 exit(1); 47 } 48 if(argv[2]!=NULL) 49 bins=atoi(argv[2])-1; 50 51 in=fopen(argv[1],"r"); 52 if(!in){ 53 fprintf(stderr,"Could not open input file %s\n",argv[1]); 54 exit(1); 55 } 56 57 if(strrchr(argv[1],'.') && strcmp(strrchr(argv[1],'.'),".vqh")==0){ 58 /* load/decode a book */ 59 60 codebook *b=codebook_load(argv[1]); 61 static_codebook *c=(static_codebook *)(b->c); 62 float delta; 63 int i; 64 fclose(in); 65 66 switch(c->maptype){ 67 case 0: 68 printf("entropy codebook only; no mappings\n"); 69 exit(0); 70 break; 71 case 1: 72 bins=_book_maptype1_quantvals(c); 73 break; 74 case 2: 75 bins=c->entries*c->dim; 76 break; 77 } 78 79 max=min=_float32_unpack(c->q_min); 80 delta=_float32_unpack(c->q_delta); 81 82 for(i=0;i<bins;i++){ 83 float val=c->quantlist[i]*delta+min; 84 if(val>max)max=val; 85 } 86 87 printf("Minimum scalar value: %f\n",min); 88 printf("Maximum scalar value: %f\n",max); 89 90 switch(c->maptype){ 91 case 1: 92 { 93 /* lattice codebook. dump it. */ 94 int j,k; 95 long maxcount=0; 96 long **sort=calloc(bins,sizeof(long *)); 97 long base=c->lengthlist[0]; 98 countarray=calloc(bins,sizeof(long)); 99 100 for(i=0;i<bins;i++)sort[i]=c->quantlist+i; 101 qsort(sort,bins,sizeof(long *),ascend); 102 103 for(i=0;i<b->entries;i++) 104 if(c->lengthlist[i]>base)base=c->lengthlist[i]; 105 106 /* dump a full, correlated count */ 107 for(j=0;j<b->entries;j++){ 108 if(c->lengthlist[j]){ 109 int indexdiv=1; 110 printf("%4d: ",j); 111 for(k=0;k<b->dim;k++){ 112 int index= (j/indexdiv)%bins; 113 printf("%+3.1f,", c->quantlist[index]*_float32_unpack(c->q_delta)+ 114 _float32_unpack(c->q_min)); 115 indexdiv*=bins; 116 } 117 printf("\t|"); 118 for(k=0;k<base-c->lengthlist[j];k++)printf("*"); 119 printf("\n"); 120 } 121 } 122 123 /* do a rough count */ 124 for(j=0;j<b->entries;j++){ 125 int indexdiv=1; 126 for(k=0;k<b->dim;k++){ 127 if(c->lengthlist[j]){ 128 int index= (j/indexdiv)%bins; 129 countarray[index]+=(1<<(base-c->lengthlist[j])); 130 indexdiv*=bins; 131 } 132 } 133 } 134 135 /* dump the count */ 136 137 { 138 long maxcount=0,i,j; 139 for(i=0;i<bins;i++) 140 if(countarray[i]>maxcount)maxcount=countarray[i]; 141 142 for(i=0;i<bins;i++){ 143 int ptr=sort[i]-c->quantlist; 144 int stars=rint(50./maxcount*countarray[ptr]); 145 printf("%+08f (%8ld) |",c->quantlist[ptr]*delta+min,countarray[ptr]); 146 for(j=0;j<stars;j++)printf("*"); 147 printf("\n"); 148 } 149 } 150 } 151 break; 152 case 2: 153 { 154 /* trained, full mapping codebook. */ 155 printf("Can't do probability dump of a trained [type 2] codebook (yet)\n"); 156 } 157 break; 158 } 159 }else{ 160 /* load/count a data file */ 161 162 /* do it the simple way; two pass. */ 163 line=setup_line(in); 164 while(line){ 165 float code; 166 char buf[80]; 167 lines++; 168 169 sprintf(buf,"getting min/max (%.2f::%.2f). lines...",min,max); 170 if(!(lines&0xff))spinnit(buf,lines); 171 172 while(!flag && sscanf(line,"%f",&code)==1){ 173 line=strchr(line,','); 174 min=max=code; 175 flag=1; 176 } 177 178 while(line && sscanf(line,"%f",&code)==1){ 179 line=strchr(line,','); 180 if(line)line++; 181 if(code<min)min=code; 182 if(code>max)max=code; 183 } 184 185 line=setup_line(in); 186 } 187 188 if(bins<1){ 189 if((int)(max-min)==min-max){ 190 bins=max-min; 191 }else{ 192 bins=25; 193 } 194 } 195 196 printf("\r \r"); 197 printf("Minimum scalar value: %f\n",min); 198 printf("Maximum scalar value: %f\n",max); 199 200 if(argv[2]){ 201 202 printf("\n counting hits into %ld bins...\n",bins+1); 203 countarray=calloc(bins+1,sizeof(long)); 204 205 rewind(in); 206 line=setup_line(in); 207 while(line){ 208 float code; 209 lines--; 210 if(!(lines&0xff))spinnit("counting distribution. lines so far...",lines); 211 212 while(line && sscanf(line,"%f",&code)==1){ 213 line=strchr(line,','); 214 if(line)line++; 215 216 code-=min; 217 code/=(max-min); 218 code*=bins; 219 countarray[(int)rint(code)]++; 220 total++; 221 } 222 223 line=setup_line(in); 224 } 225 226 /* make a pretty graph */ 227 { 228 long maxcount=0,i,j; 229 for(i=0;i<bins+1;i++) 230 if(countarray[i]>maxcount)maxcount=countarray[i]; 231 232 printf("\r \r"); 233 printf("Total scalars: %ld\n",total); 234 for(i=0;i<bins+1;i++){ 235 int stars=rint(50./maxcount*countarray[i]); 236 printf("%08f (%8ld) |",(max-min)/bins*i+min,countarray[i]); 237 for(j=0;j<stars;j++)printf("*"); 238 printf("\n"); 239 } 240 } 241 } 242 243 fclose(in); 244 245 } 246 printf("\nDone.\n"); 247 exit(0); 248 } 249