1 /************************************************************** 2 Form adapted from lzhuf.c 3 written by Haruyasu Yoshizaki 11/20/1988 4 some minor changes 4/6/1989 5 comments translated by Haruhiko Okumura 4/7/1989 6 7 minor beautifications and adjustments for compiling under Linux 8 by Markus Gutschke <gutschk (at) math.uni-muenster.de> 9 1997-01-27 10 11 Modifications to allow use as a filter by Ken Yap 12 <ken_yap (at) users.sourceforge.net>. 13 14 1997-07-01 15 16 Small mod to cope with running on big-endian machines 17 by Jim Hague <jim.hague (at) acm.org) 18 1998-02-06 19 20 Make compression statistics report shorter 21 by Ken Yap <ken_yap (at) users.sourceforge.net>. 22 2001-04-25 23 24 Replaced algorithm with nrv2b from ucl the compression 25 library from upx. That code is: 26 Copyright (C) 1996-2002 Markus Franz Xaver Johannes Oberhumer 27 And is distributed under the terms of the GPL. 28 The conversion was performed 29 by Eric Biederman <ebiederman (at) lnxi.com>. 30 20 August 2002 31 32 **************************************************************/ 33 #define UCLPACK_COMPAT 0 34 #define NDEBUG 1 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <ctype.h> 39 #include <errno.h> 40 #ifdef __FreeBSD__ 41 #include <inttypes.h> 42 #else 43 #include <stdint.h> 44 #endif 45 #include <limits.h> 46 #include <assert.h> 47 #if UCLPACK_COMPAT 48 #include <netinet/in.h> 49 #endif 50 51 #ifndef VERBOSE 52 #define Fprintf(x) 53 #define wterr 0 54 #else 55 #define Fprintf(x) fprintf x 56 #endif 57 58 #ifndef MAIN 59 extern 60 #endif 61 FILE *infile, *outfile; 62 63 #if defined(ENCODE) || defined(DECODE) 64 65 #ifndef ENDIAN 66 #define ENDIAN 0 67 #endif 68 #ifndef BITSIZE 69 #define BITSIZE 32 70 #endif 71 72 static __inline__ void Error(char *message) 73 { 74 Fprintf((stderr, "\n%s\n", message)); 75 exit(EXIT_FAILURE); 76 } 77 78 /* These will be a complete waste of time on a lo-endian */ 79 /* system, but it only gets done once so WTF. */ 80 static unsigned long i86ul_to_host(unsigned long ul) 81 { 82 unsigned long res = 0; 83 int i; 84 union 85 { 86 unsigned char c[4]; 87 unsigned long ul; 88 } u; 89 90 u.ul = ul; 91 for (i = 3; i >= 0; i--) 92 res = (res << 8) + u.c[i]; 93 return res; 94 } 95 96 static unsigned long host_to_i86ul(unsigned long ul) 97 { 98 int i; 99 union 100 { 101 unsigned char c[4]; 102 unsigned long ul; 103 } u; 104 105 for (i = 0; i < 4; i++) 106 { 107 u.c[i] = ul & 0xff; 108 ul >>= 8; 109 } 110 return u.ul; 111 } 112 #endif 113 114 115 116 #if UCLPACK_COMPAT 117 /* magic file header for compressed files */ 118 static const unsigned char magic[8] = 119 { 0x00, 0xe9, 0x55, 0x43, 0x4c, 0xff, 0x01, 0x1a }; 120 121 #endif 122 123 #ifdef ENCODE 124 /********** NRV2B_99 compression **********/ 125 126 /* Note by limiting the ring buffer I have limited the maximum 127 * offset to 64K. Since etherboot rarely gets that big it 128 * is not a problem and it gives me a firm guarantee 129 * that I will never get a 3 byte string match that is encodes 130 * to more than 9/8 it's original size. 131 * That guaranteee is important to for the inplace decompressor. 132 * There are better ways to do this if a larger offset and buffer 133 * would give better compression. 134 */ 135 #define N (65536ul) /* size of ring buffer */ 136 #define THRESHOLD 1 /* lower limit for match length */ 137 #define F 2048 /* upper limit for match length */ 138 #define M2_MAX_OFFSET 0xd00 139 140 /* note: to use default values pass -1, i.e. initialize 141 * this struct by a memset(x,0xff,sizeof(x)) */ 142 struct ucl_compress_config 143 { 144 int bb_endian; 145 int bb_size; 146 unsigned int max_offset; 147 unsigned int max_match; 148 int s_level; 149 int h_level; 150 int p_level; 151 int c_flags; 152 unsigned int m_size; 153 }; 154 155 struct ucl_compress 156 { 157 int init; 158 159 unsigned int look; /* bytes in lookahead buffer */ 160 161 unsigned int m_len; 162 unsigned int m_off; 163 164 unsigned int last_m_len; 165 unsigned int last_m_off; 166 167 const unsigned char *bp; 168 const unsigned char *ip; 169 const unsigned char *in; 170 const unsigned char *in_end; 171 unsigned char *out; 172 173 uint64_t bb_b; 174 unsigned bb_k; 175 unsigned bb_c_endian; 176 unsigned bb_c_s; 177 unsigned bb_c_s8; 178 unsigned char *bb_p; 179 unsigned char *bb_op; 180 181 struct ucl_compress_config conf; 182 unsigned int *result; 183 184 unsigned int textsize; /* text size counter */ 185 unsigned int codesize; /* code size counter */ 186 unsigned int printcount; /* counter for reporting progress every 1K 187 bytes */ 188 189 190 /* some stats */ 191 unsigned long lit_bytes; 192 unsigned long match_bytes; 193 unsigned long rep_bytes; 194 unsigned long lazy; 195 }; 196 197 198 199 #define getbyte(c) ((c).ip < (c).in_end ? *((c).ip)++ : (-1)) 200 201 #define UCL_E_OK 0 202 #define UCL_E_INVALID_ARGUMENT 1 203 #define UCL_E_OUT_OF_MEMORY 2 204 #define UCL_E_ERROR 3 205 206 /*********************************************************************** 207 // 208 ************************************************************************/ 209 210 #define SWD_HSIZE 16384 211 #define SWD_MAX_CHAIN 2048 212 #define SWD_BEST_OFF 1 213 214 #define HEAD3(b,p) \ 215 (((0x9f5f*(((((uint32_t)b[p]<<5)^b[p+1])<<5)^b[p+2]))>>5) & (SWD_HSIZE-1)) 216 217 #define HEAD2(b,p) (b[p] ^ ((unsigned)b[p+1]<<8)) 218 #define NIL2 UINT_MAX 219 220 struct ucl_swd 221 { 222 /* public - "built-in" */ 223 unsigned int n; 224 unsigned int f; 225 unsigned int threshold; 226 227 /* public - configuration */ 228 unsigned int max_chain; 229 unsigned int nice_length; 230 int use_best_off; 231 unsigned int lazy_insert; 232 233 /* public - output */ 234 unsigned int m_len; 235 unsigned int m_off; 236 unsigned int look; 237 int b_char; 238 #if defined(SWD_BEST_OFF) 239 unsigned int best_off[ SWD_BEST_OFF ]; 240 #endif 241 242 /* semi public */ 243 struct ucl_compress *c; 244 unsigned int m_pos; 245 #if defined(SWD_BEST_OFF) 246 unsigned int best_pos[ SWD_BEST_OFF ]; 247 #endif 248 249 /* private */ 250 const uint8_t *dict; 251 const uint8_t *dict_end; 252 unsigned int dict_len; 253 254 /* private */ 255 unsigned int ip; /* input pointer (lookahead) */ 256 unsigned int bp; /* buffer pointer */ 257 unsigned int rp; /* remove pointer */ 258 unsigned int b_size; 259 260 unsigned char *b_wrap; 261 262 unsigned int node_count; 263 unsigned int first_rp; 264 265 unsigned char b [ N + F + F ]; 266 unsigned int head3 [ SWD_HSIZE ]; 267 unsigned int succ3 [ N + F ]; 268 unsigned int best3 [ N + F ]; 269 unsigned int llen3 [ SWD_HSIZE ]; 270 unsigned int head2 [ 65536U ]; 271 }; 272 273 #define s_head3(s,key) s->head3[key] 274 275 276 #if !defined( NDEBUG) 277 static void assert_match(const struct ucl_swd * swd, unsigned int m_len, 278 unsigned int m_off ) 279 280 { 281 const struct ucl_compress *c = swd->c; 282 unsigned int d_off; 283 284 assert(m_len >= 2); 285 if (m_off <= (unsigned int) (c->bp - c->in)) 286 { 287 assert(c->bp - m_off + m_len < c->ip); 288 assert(memcmp(c->bp, c->bp - m_off, m_len) == 0); 289 } 290 else 291 { 292 assert(swd->dict != NULL); 293 d_off = m_off - (unsigned int) (c->bp - c->in); 294 assert(d_off <= swd->dict_len); 295 if (m_len > d_off) 296 { 297 assert(memcmp(c->bp, swd->dict_end - d_off, d_off) == 298 0); 299 300 assert(c->in + m_len - d_off < c->ip); 301 assert(memcmp(c->bp + d_off, c->in, m_len - d_off) == 302 0); 303 304 } 305 else 306 { 307 assert(memcmp(c->bp, swd->dict_end - d_off, m_len) == 308 0); 309 310 } 311 } 312 } 313 #else 314 # define assert_match(a,b,c) ((void)0) 315 #endif 316 317 /*********************************************************************** 318 // 319 ************************************************************************/ 320 321 322 static 323 void swd_initdict(struct ucl_swd *s, const uint8_t *dict, unsigned int dict_len) 324 325 { 326 s->dict = s->dict_end = NULL; 327 s->dict_len = 0; 328 329 if (!dict || dict_len <= 0) 330 return; 331 if (dict_len > s->n) 332 { 333 dict += dict_len - s->n; 334 dict_len = s->n; 335 } 336 337 s->dict = dict; 338 s->dict_len = dict_len; 339 s->dict_end = dict + dict_len; 340 memcpy(s->b,dict,dict_len); 341 s->ip = dict_len; 342 } 343 344 345 static 346 void swd_insertdict(struct ucl_swd *s, unsigned int node, unsigned int len) 347 { 348 unsigned int key; 349 350 s->node_count = s->n - len; 351 s->first_rp = node; 352 353 while (len-- > 0) 354 { 355 key = HEAD3(s->b,node); 356 s->succ3[node] = s_head3(s,key); 357 s->head3[key] = (unsigned int)(node); 358 s->best3[node] = (unsigned int)(s->f + 1); 359 s->llen3[key]++; 360 assert(s->llen3[key] <= s->n); 361 362 key = HEAD2(s->b,node); 363 s->head2[key] = (unsigned int)(node); 364 365 node++; 366 } 367 } 368 369 /*********************************************************************** 370 // 371 ************************************************************************/ 372 373 374 static 375 int swd_init(struct ucl_swd *s, const uint8_t *dict, unsigned int dict_len) 376 { 377 unsigned int i = 0; 378 int c = 0; 379 380 if (s->n == 0) 381 s->n = N; 382 if (s->f == 0) 383 s->f = F; 384 s->threshold = THRESHOLD; 385 if (s->n > N || s->f > F) 386 return UCL_E_INVALID_ARGUMENT; 387 388 /* defaults */ 389 s->max_chain = SWD_MAX_CHAIN; 390 s->nice_length = s->f; 391 s->use_best_off = 0; 392 s->lazy_insert = 0; 393 394 s->b_size = s->n + s->f; 395 if (s->b_size + s->f >= UINT_MAX) 396 return UCL_E_ERROR; 397 s->b_wrap = s->b + s->b_size; 398 s->node_count = s->n; 399 400 memset(s->llen3, 0, sizeof(s->llen3[0]) * SWD_HSIZE); 401 for (i = 0; i < 65536U; i++) 402 s->head2[i] = NIL2; 403 404 s->ip = 0; 405 swd_initdict(s,dict,dict_len); 406 s->bp = s->ip; 407 s->first_rp = s->ip; 408 409 assert(s->ip + s->f <= s->b_size); 410 411 s->look = (unsigned int) (s->c->in_end - s->c->ip); 412 if (s->look > 0) 413 { 414 if (s->look > s->f) 415 s->look = s->f; 416 memcpy(&s->b[s->ip],s->c->ip,s->look); 417 s->c->ip += s->look; 418 s->ip += s->look; 419 } 420 if (s->ip == s->b_size) 421 s->ip = 0; 422 423 if (s->look >= 2 && s->dict_len > 0) 424 swd_insertdict(s,0,s->dict_len); 425 426 s->rp = s->first_rp; 427 if (s->rp >= s->node_count) 428 s->rp -= s->node_count; 429 else 430 s->rp += s->b_size - s->node_count; 431 432 /* unused i */ 433 /* unused c */ 434 return UCL_E_OK; 435 } 436 437 438 static 439 void swd_exit(struct ucl_swd *s) 440 { 441 /* unused s */ 442 443 } 444 445 #define swd_pos2off(s,pos) \ 446 (s->bp > (pos) ? s->bp - (pos) : s->b_size - ((pos) - s->bp)) 447 448 /*********************************************************************** 449 // 450 ************************************************************************/ 451 452 static __inline__ 453 void swd_getbyte(struct ucl_swd *s) 454 { 455 int c; 456 457 if ((c = getbyte(*(s->c))) < 0) 458 { 459 if (s->look > 0) 460 --s->look; 461 } 462 else 463 { 464 s->b[s->ip] = (uint8_t)(c); 465 if (s->ip < s->f) 466 s->b_wrap[s->ip] = (uint8_t)(c); 467 } 468 if (++s->ip == s->b_size) 469 s->ip = 0; 470 if (++s->bp == s->b_size) 471 s->bp = 0; 472 if (++s->rp == s->b_size) 473 s->rp = 0; 474 } 475 /*********************************************************************** 476 // remove node from lists 477 ************************************************************************/ 478 479 static __inline__ 480 void swd_remove_node(struct ucl_swd *s, unsigned int node) 481 { 482 if (s->node_count == 0) 483 { 484 unsigned int key; 485 486 #ifdef UCL_DEBUG 487 if (s->first_rp != UINT_MAX) 488 { 489 if (node != s->first_rp) 490 printf("Remove %5d: %5d %5d %5d %5d %6d %6d\n", 491 492 node, s->rp, s->ip, s->bp, s->first_rp, 493 s->ip - node, s->ip - s->bp); 494 assert(node == s->first_rp); 495 s->first_rp = UINT_MAX; 496 } 497 #endif 498 499 key = HEAD3(s->b,node); 500 assert(s->llen3[key] > 0); 501 --s->llen3[key]; 502 503 key = HEAD2(s->b,node); 504 assert(s->head2[key] != NIL2); 505 if ((unsigned int) s->head2[key] == node) 506 s->head2[key] = NIL2; 507 } 508 else 509 --s->node_count; 510 } 511 512 513 /*********************************************************************** 514 // 515 ************************************************************************/ 516 517 518 static 519 void swd_accept(struct ucl_swd *s, unsigned int n) 520 { 521 assert(n <= s->look); 522 523 if (n > 0) do 524 { 525 unsigned int key; 526 527 swd_remove_node(s,s->rp); 528 529 /* add bp into HEAD3 */ 530 key = HEAD3(s->b,s->bp); 531 s->succ3[s->bp] = s_head3(s,key); 532 s->head3[key] = (unsigned int)(s->bp); 533 s->best3[s->bp] = (unsigned int)(s->f + 1); 534 s->llen3[key]++; 535 assert(s->llen3[key] <= s->n); 536 537 /* add bp into HEAD2 */ 538 key = HEAD2(s->b,s->bp); 539 s->head2[key] = (unsigned int)(s->bp); 540 541 swd_getbyte(s); 542 } while (--n > 0); 543 } 544 545 /*********************************************************************** 546 // 547 ************************************************************************/ 548 549 static 550 void swd_search(struct ucl_swd *s, unsigned int node, unsigned int cnt) 551 { 552 const unsigned char *p1; 553 const unsigned char *p2; 554 const unsigned char *px; 555 556 unsigned int m_len = s->m_len; 557 const unsigned char * b = s->b; 558 const unsigned char * bp = s->b + s->bp; 559 const unsigned char * bx = s->b + s->bp + s->look; 560 unsigned char scan_end1; 561 562 assert(s->m_len > 0); 563 564 scan_end1 = bp[m_len - 1]; 565 for ( ; cnt-- > 0; node = s->succ3[node]) 566 { 567 p1 = bp; 568 p2 = b + node; 569 px = bx; 570 571 assert(m_len < s->look); 572 573 if ( 574 p2[m_len - 1] == scan_end1 && 575 p2[m_len] == p1[m_len] && 576 p2[0] == p1[0] && 577 p2[1] == p1[1]) 578 { 579 unsigned int i; 580 assert(memcmp(bp,&b[node],3) == 0); 581 582 p1 += 2; p2 += 2; 583 do {} while (++p1 < px && *p1 == *++p2); 584 i = p1 - bp; 585 586 #ifdef UCL_DEBUG 587 if (memcmp(bp,&b[node],i) != 0) 588 printf("%5ld %5ld %02x%02x %02x%02x\n", 589 (long)s->bp, (long) node, 590 bp[0], bp[1], b[node], b[node+1]); 591 #endif 592 assert(memcmp(bp,&b[node],i) == 0); 593 594 #if defined(SWD_BEST_OFF) 595 if (i < SWD_BEST_OFF) 596 { 597 if (s->best_pos[i] == 0) 598 s->best_pos[i] = node + 1; 599 } 600 #endif 601 if (i > m_len) 602 { 603 s->m_len = m_len = i; 604 s->m_pos = node; 605 if (m_len == s->look) 606 return; 607 if (m_len >= s->nice_length) 608 return; 609 if (m_len > (unsigned int) s->best3[node]) 610 return; 611 scan_end1 = bp[m_len - 1]; 612 } 613 } 614 } 615 } 616 617 static int swd_search2(struct ucl_swd *s) 618 { 619 unsigned int key; 620 621 assert(s->look >= 2); 622 assert(s->m_len > 0); 623 624 key = s->head2[ HEAD2(s->b,s->bp) ]; 625 if (key == NIL2) 626 return 0; 627 #ifdef UCL_DEBUG 628 if (memcmp(&s->b[s->bp],&s->b[key],2) != 0) 629 printf("%5ld %5ld %02x%02x %02x%02x\n", (long)s->bp, (long)key, 630 s->b[s->bp], s->b[s->bp+1], s->b[key], s->b[key+1]); 631 #endif 632 assert(memcmp(&s->b[s->bp],&s->b[key],2) == 0); 633 #if defined(SWD_BEST_OFF) 634 if (s->best_pos[2] == 0) 635 s->best_pos[2] = key + 1; 636 #endif 637 638 if (s->m_len < 2) 639 { 640 s->m_len = 2; 641 s->m_pos = key; 642 } 643 return 1; 644 } 645 646 /*********************************************************************** 647 // 648 ************************************************************************/ 649 650 static 651 void swd_findbest(struct ucl_swd *s) 652 { 653 unsigned int key; 654 unsigned int cnt, node; 655 unsigned int len; 656 657 assert(s->m_len > 0); 658 659 /* get current head, add bp into HEAD3 */ 660 key = HEAD3(s->b,s->bp); 661 node = s->succ3[s->bp] = s_head3(s,key); 662 cnt = s->llen3[key]++; 663 assert(s->llen3[key] <= s->n + s->f); 664 if (cnt > s->max_chain && s->max_chain > 0) 665 cnt = s->max_chain; 666 s->head3[key] = (unsigned int)(s->bp); 667 668 s->b_char = s->b[s->bp]; 669 len = s->m_len; 670 if (s->m_len >= s->look) 671 { 672 if (s->look == 0) 673 s->b_char = -1; 674 s->m_off = 0; 675 s->best3[s->bp] = (unsigned int)(s->f + 1); 676 } 677 else 678 { 679 if (swd_search2(s)) 680 if (s->look >= 3) 681 swd_search(s,node,cnt); 682 if (s->m_len > len) 683 s->m_off = swd_pos2off(s,s->m_pos); 684 s->best3[s->bp] = (unsigned int)(s->m_len); 685 686 #if defined(SWD_BEST_OFF) 687 if (s->use_best_off) 688 { 689 int i; 690 for (i = 2; i < SWD_BEST_OFF; i++) 691 if (s->best_pos[i] > 0) 692 s->best_off[i] = 693 swd_pos2off(s,s->best_pos[i]-1); 694 695 else 696 s->best_off[i] = 0; 697 } 698 #endif 699 } 700 701 swd_remove_node(s,s->rp); 702 703 /* add bp into HEAD2 */ 704 key = HEAD2(s->b,s->bp); 705 s->head2[key] = (unsigned int)(s->bp); 706 } 707 708 709 /*********************************************************************** 710 // 711 ************************************************************************/ 712 713 static int 714 init_match ( struct ucl_compress *c, struct ucl_swd *s, 715 const uint8_t *dict, unsigned int dict_len, 716 uint32_t flags ) 717 { 718 int r; 719 720 assert(!c->init); 721 c->init = 1; 722 723 s->c = c; 724 725 c->last_m_len = c->last_m_off = 0; 726 727 c->textsize = c->codesize = c->printcount = 0; 728 c->lit_bytes = c->match_bytes = c->rep_bytes = 0; 729 c->lazy = 0; 730 731 r = swd_init(s,dict,dict_len); 732 if (r != UCL_E_OK) 733 { 734 swd_exit(s); 735 return r; 736 } 737 738 s->use_best_off = (flags & 1) ? 1 : 0; 739 return UCL_E_OK; 740 } 741 742 static int 743 find_match ( struct ucl_compress *c, struct ucl_swd *s, 744 unsigned int this_len, unsigned int skip ) 745 { 746 assert(c->init); 747 748 if (skip > 0) 749 { 750 assert(this_len >= skip); 751 swd_accept(s, this_len - skip); 752 c->textsize += this_len - skip + 1; 753 } 754 else 755 { 756 assert(this_len <= 1); 757 c->textsize += this_len - skip; 758 } 759 760 s->m_len = THRESHOLD; 761 #ifdef SWD_BEST_OFF 762 if (s->use_best_off) 763 memset(s->best_pos,0,sizeof(s->best_pos)); 764 #endif 765 swd_findbest(s); 766 c->m_len = s->m_len; 767 c->m_off = s->m_off; 768 769 swd_getbyte(s); 770 771 if (s->b_char < 0) 772 { 773 c->look = 0; 774 c->m_len = 0; 775 swd_exit(s); 776 } 777 else 778 { 779 c->look = s->look + 1; 780 } 781 c->bp = c->ip - c->look; 782 783 #if 0 784 /* brute force match search */ 785 if (c->m_len > THRESHOLD && c->m_len + 1 <= c->look) 786 { 787 const uint8_t *ip = c->bp; 788 const uint8_t *m = c->bp - c->m_off; 789 const uint8_t *in = c->in; 790 791 if (ip - in > N) 792 in = ip - N; 793 for (;;) 794 { 795 while (*in != *ip) 796 in++; 797 if (in == ip) 798 break; 799 if (in != m) 800 if (memcmp(in,ip,c->m_len+1) == 0) 801 printf("%p %p %p %5d\n",in,ip,m,c->m_len); 802 803 in++; 804 } 805 } 806 #endif 807 808 return UCL_E_OK; 809 } 810 811 812 static int bbConfig(struct ucl_compress *c, int endian, int bitsize) 813 { 814 if (endian != -1) 815 { 816 if (endian != 0) 817 return UCL_E_ERROR; 818 c->bb_c_endian = endian; 819 } 820 if (bitsize != -1) 821 { 822 if (bitsize != 8 && bitsize != 16 && bitsize != 32 && bitsize != 64) 823 return UCL_E_ERROR; 824 c->bb_c_s = bitsize; 825 c->bb_c_s8 = bitsize / 8; 826 } 827 c->bb_b = 0; c->bb_k = 0; 828 c->bb_p = NULL; 829 c->bb_op = NULL; 830 return UCL_E_OK; 831 } 832 833 static void bbWriteBits(struct ucl_compress *c) 834 { 835 uint8_t *p = c->bb_p; 836 uint64_t b = c->bb_b; 837 838 p[0] = (uint8_t)(b >> 0); 839 if (c->bb_c_s >= 16) 840 { 841 p[1] = (uint8_t)(b >> 8); 842 if (c->bb_c_s >= 32) 843 { 844 p[2] = (uint8_t)(b >> 16); 845 p[3] = (uint8_t)(b >> 24); 846 if (c->bb_c_s == 64) 847 { 848 p[4] = (uint8_t)(b >> 32); 849 p[5] = (uint8_t)(b >> 40); 850 p[6] = (uint8_t)(b >> 48); 851 p[7] = (uint8_t)(b >> 56); 852 } 853 } 854 } 855 } 856 857 858 static void bbPutBit(struct ucl_compress *c, unsigned bit) 859 { 860 assert(bit == 0 || bit == 1); 861 assert(c->bb_k <= c->bb_c_s); 862 863 if (c->bb_k < c->bb_c_s) 864 { 865 if (c->bb_k == 0) 866 { 867 assert(c->bb_p == NULL); 868 c->bb_p = c->bb_op; 869 c->bb_op += c->bb_c_s8; 870 } 871 assert(c->bb_p != NULL); 872 assert(c->bb_p + c->bb_c_s8 <= c->bb_op); 873 874 c->bb_b = (c->bb_b << 1) + bit; 875 c->bb_k++; 876 } 877 else 878 { 879 assert(c->bb_p != NULL); 880 assert(c->bb_p + c->bb_c_s8 <= c->bb_op); 881 882 bbWriteBits(c); 883 c->bb_p = c->bb_op; 884 c->bb_op += c->bb_c_s8; 885 c->bb_b = bit; 886 c->bb_k = 1; 887 } 888 } 889 890 891 static void bbPutByte(struct ucl_compress *c, unsigned b) 892 { 893 /**printf("putbyte %p %p %x (%d)\n", op, bb_p, x, bb_k);*/ 894 assert(c->bb_p == NULL || c->bb_p + c->bb_c_s8 <= c->bb_op); 895 *c->bb_op++ = (uint8_t)(b); 896 } 897 898 static void bbFlushBits(struct ucl_compress *c, unsigned filler_bit) 899 { 900 if (c->bb_k > 0) 901 { 902 assert(c->bb_k <= c->bb_c_s); 903 while (c->bb_k != c->bb_c_s) 904 bbPutBit(c, filler_bit); 905 bbWriteBits(c); 906 c->bb_k = 0; 907 } 908 c->bb_p = NULL; 909 } 910 911 912 913 /*********************************************************************** 914 // 915 ************************************************************************/ 916 917 918 static void code_prefix_ss11(struct ucl_compress *c, uint32_t i) 919 { 920 if (i >= 2) 921 { 922 uint32_t t = 4; 923 i += 2; 924 do { 925 t <<= 1; 926 } while (i >= t); 927 t >>= 1; 928 do { 929 t >>= 1; 930 bbPutBit(c, (i & t) ? 1 : 0); 931 bbPutBit(c, 0); 932 } while (t > 2); 933 } 934 bbPutBit(c, (unsigned)i & 1); 935 bbPutBit(c, 1); 936 } 937 938 static void 939 code_match(struct ucl_compress *c, unsigned int m_len, const unsigned int m_off) 940 941 { 942 while (m_len > c->conf.max_match) 943 { 944 code_match(c, c->conf.max_match - 3, m_off); 945 m_len -= c->conf.max_match - 3; 946 } 947 948 c->match_bytes += m_len; 949 if (m_len > c->result[3]) 950 c->result[3] = m_len; 951 if (m_off > c->result[1]) 952 c->result[1] = m_off; 953 954 bbPutBit(c, 0); 955 956 if (m_off == c->last_m_off) 957 { 958 bbPutBit(c, 0); 959 bbPutBit(c, 1); 960 } 961 else 962 { 963 code_prefix_ss11(c, 1 + ((m_off - 1) >> 8)); 964 bbPutByte(c, (unsigned)m_off - 1); 965 } 966 m_len = m_len - 1 - (m_off > M2_MAX_OFFSET); 967 if (m_len >= 4) 968 { 969 bbPutBit(c,0); 970 bbPutBit(c,0); 971 code_prefix_ss11(c, m_len - 4); 972 } 973 else 974 { 975 bbPutBit(c, m_len > 1); 976 bbPutBit(c, (unsigned)m_len & 1); 977 } 978 979 c->last_m_off = m_off; 980 } 981 982 static void 983 code_run(struct ucl_compress *c, const uint8_t *ii, unsigned int lit) 984 { 985 if (lit == 0) 986 return; 987 c->lit_bytes += lit; 988 if (lit > c->result[5]) 989 c->result[5] = lit; 990 do { 991 bbPutBit(c, 1); 992 bbPutByte(c, *ii++); 993 } while (--lit > 0); 994 } 995 996 /*********************************************************************** 997 // 998 ************************************************************************/ 999 1000 static int 1001 len_of_coded_match(struct ucl_compress *c, unsigned int m_len, unsigned int 1002 m_off) 1003 1004 { 1005 int b; 1006 if (m_len < 2 || (m_len == 2 && (m_off > M2_MAX_OFFSET)) 1007 || m_off > c->conf.max_offset) 1008 return -1; 1009 assert(m_off > 0); 1010 1011 m_len = m_len - 2 - (m_off > M2_MAX_OFFSET); 1012 1013 if (m_off == c->last_m_off) 1014 b = 1 + 2; 1015 else 1016 { 1017 b = 1 + 10; 1018 m_off = (m_off - 1) >> 8; 1019 while (m_off > 0) 1020 { 1021 b += 2; 1022 m_off >>= 1; 1023 } 1024 } 1025 1026 b += 2; 1027 if (m_len < 3) 1028 return b; 1029 m_len -= 3; 1030 1031 do { 1032 b += 2; 1033 m_len >>= 1; 1034 } while (m_len > 0); 1035 1036 return b; 1037 } 1038 1039 int ucl_nrv2b_99_compress( 1040 const uint8_t *in, unsigned long in_len, 1041 uint8_t *out, unsigned long *out_len, 1042 unsigned int *result) 1043 { 1044 const uint8_t *ii; 1045 unsigned int lit; 1046 unsigned int m_len, m_off; 1047 struct ucl_compress c_buffer; 1048 struct ucl_compress * const c = &c_buffer; 1049 struct ucl_swd *swd; 1050 unsigned int result_buffer[16]; 1051 int r; 1052 1053 /* max compression */ 1054 #define SC_TRY_LAZY 2 1055 #define SC_GOOD_LENGTH F 1056 #define SC_MAX_LAZY F 1057 #define SC_NICE_LENGTH F 1058 #define SC_MAX_CHAIN 4096 1059 #define SC_FLAGS 1 1060 #define SC_MAX_OFFSET N 1061 1062 memset(c, 0, sizeof(*c)); 1063 c->ip = c->in = in; 1064 c->in_end = in + in_len; 1065 c->out = out; 1066 c->result = result ? result : result_buffer; 1067 memset(c->result, 0, 16*sizeof(*c->result)); 1068 c->result[0] = c->result[2] = c->result[4] = UINT_MAX; 1069 result = NULL; 1070 memset(&c->conf, 0xff, sizeof(c->conf)); 1071 r = bbConfig(c, ENDIAN, BITSIZE); 1072 if (r == 0) 1073 r = bbConfig(c, c->conf.bb_endian, c->conf.bb_size); 1074 if (r != 0) 1075 return UCL_E_INVALID_ARGUMENT; 1076 c->bb_op = out; 1077 1078 ii = c->ip; /* point to start of literal run */ 1079 lit = 0; 1080 1081 1082 swd = (struct ucl_swd *) malloc(sizeof(*swd)); 1083 if (!swd) 1084 return UCL_E_OUT_OF_MEMORY; 1085 1086 swd->f = F; 1087 swd->n = N; 1088 if (in_len >= 256 && in_len < swd->n) 1089 swd->n = in_len; 1090 if (swd->f < 8 || swd->n < 256) 1091 return UCL_E_INVALID_ARGUMENT; 1092 1093 r = init_match(c,swd,NULL,0, SC_FLAGS); 1094 if (r != UCL_E_OK) 1095 { 1096 free(swd); 1097 return r; 1098 } 1099 if (SC_MAX_CHAIN > 0) 1100 swd->max_chain = SC_MAX_CHAIN; 1101 if (SC_NICE_LENGTH > 0) 1102 swd->nice_length = SC_NICE_LENGTH; 1103 if (c->conf.max_match < swd->nice_length) 1104 swd->nice_length = c->conf.max_match; 1105 1106 c->last_m_off = 1; 1107 r = find_match(c,swd,0,0); 1108 if (r != UCL_E_OK) 1109 return r; 1110 while (c->look > 0) 1111 { 1112 unsigned int ahead; 1113 unsigned int max_ahead; 1114 int l1, l2; 1115 1116 c->codesize = c->bb_op - out; 1117 1118 m_len = c->m_len; 1119 m_off = c->m_off; 1120 1121 assert(c->bp == c->ip - c->look); 1122 assert(c->bp >= in); 1123 if (lit == 0) 1124 ii = c->bp; 1125 assert(ii + lit == c->bp); 1126 assert(swd->b_char == *(c->bp)); 1127 1128 if (m_len < 2 || (m_len == 2 && (m_off > M2_MAX_OFFSET)) 1129 || m_off > c->conf.max_offset) 1130 { 1131 /* a literal */ 1132 lit++; 1133 swd->max_chain = SC_MAX_CHAIN; 1134 r = find_match(c,swd,1,0); 1135 assert(r == 0); 1136 continue; 1137 } 1138 1139 /* a match */ 1140 assert_match(swd,m_len,m_off); 1141 1142 /* shall we try a lazy match ? */ 1143 ahead = 0; 1144 if (SC_TRY_LAZY <= 0 || m_len >= SC_MAX_LAZY || m_off == 1145 c->last_m_off) 1146 1147 { 1148 /* no */ 1149 l1 = 0; 1150 max_ahead = 0; 1151 } 1152 else 1153 { 1154 /* yes, try a lazy match */ 1155 l1 = len_of_coded_match(c,m_len,m_off); 1156 assert(l1 > 0); 1157 max_ahead = SC_TRY_LAZY; 1158 if ((m_len - 1) < max_ahead) { 1159 max_ahead = m_len -1; 1160 } 1161 } 1162 1163 while (ahead < max_ahead && c->look > m_len) 1164 { 1165 if (m_len >= SC_GOOD_LENGTH) 1166 swd->max_chain = SC_MAX_CHAIN >> 2; 1167 else 1168 swd->max_chain = SC_MAX_CHAIN; 1169 r = find_match(c,swd,1,0); 1170 ahead++; 1171 1172 assert(r == 0); 1173 assert(c->look > 0); 1174 assert(ii + lit + ahead == c->bp); 1175 1176 if (c->m_len < 2) 1177 continue; 1178 l2 = len_of_coded_match(c,c->m_len,c->m_off); 1179 if (l2 < 0) 1180 continue; 1181 if (l1 + (int)(ahead + c->m_len - m_len) * 5 > l2 + 1182 (int)(ahead) * 9) 1183 { 1184 c->lazy++; 1185 assert_match(swd,c->m_len,c->m_off); 1186 lit += ahead; 1187 assert(ii + lit == c->bp); 1188 goto lazy_match_done; 1189 } 1190 } 1191 1192 assert(ii + lit + ahead == c->bp); 1193 1194 /* 1 - code run */ 1195 code_run(c,ii,lit); 1196 lit = 0; 1197 1198 /* 2 - code match */ 1199 code_match(c,m_len,m_off); 1200 swd->max_chain = SC_MAX_CHAIN; 1201 r = find_match(c,swd,m_len,1+ahead); 1202 assert(r == 0); 1203 1204 lazy_match_done: ; 1205 } 1206 1207 /* store final run */ 1208 code_run(c,ii,lit); 1209 1210 /* EOF */ 1211 bbPutBit(c, 0); 1212 code_prefix_ss11(c, 0x1000000U); 1213 bbPutByte(c, 0xff); 1214 1215 bbFlushBits(c, 0); 1216 1217 assert(c->textsize == in_len); 1218 c->codesize = c->bb_op - out; 1219 *out_len = c->bb_op - out; 1220 1221 #if 0 1222 printf("%7ld %7ld -> %7ld %7ld %7ld %ld (max: %d %d %d)\n", 1223 (long) c->textsize, (long) in_len, (long) c->codesize, 1224 c->match_bytes, c->lit_bytes, c->lazy, 1225 c->result[1], c->result[3], c->result[5]); 1226 #endif 1227 assert(c->lit_bytes + c->match_bytes == in_len); 1228 1229 swd_exit(swd); 1230 free(swd); 1231 1232 return UCL_E_OK; 1233 } 1234 1235 1236 void Encode(void) /* compression */ 1237 { 1238 uint8_t *in, *out; 1239 unsigned long in_len, out_len; 1240 uint32_t tw; 1241 int r; 1242 fseek(infile, 0, SEEK_END); 1243 in_len = ftell(infile); 1244 #ifdef VERBOSE 1245 if ((signed long)in_len < 0) 1246 Fprintf((stderr, "Errno: %d", errno)); 1247 #endif 1248 #if UCLPACK_COMPAT 1249 { 1250 uint8_t byte; 1251 if (fwrite(magic, sizeof(magic), 1, outfile) != 1) 1252 Error("Can't write."); 1253 tw = htonl(0); /* flags */ 1254 if (fwrite(&tw, sizeof(tw), 1, outfile) != 1) 1255 Error("Can't write."); 1256 byte = 0x2b; /* method */ 1257 if (fwrite(&byte, sizeof(byte), 1, outfile) != 1) 1258 Error("Can't write."); 1259 byte = 10; /* level */ 1260 if (fwrite(&byte, sizeof(byte), 1, outfile) != 1) 1261 Error("Can't write."); 1262 tw = htonl(256*1024); /* block_size */ 1263 if (fwrite(&tw, sizeof(tw), 1, outfile) != 1) 1264 Error("Can't write."); 1265 tw = htonl(in_len); 1266 if (fwrite(&tw, sizeof(tw), 1, outfile) != 1) 1267 Error("Can't write."); /* output size of text */ 1268 } 1269 #else 1270 tw = host_to_i86ul(in_len); 1271 if (fwrite(&tw, sizeof(tw), 1, outfile) != 1) 1272 Error("Can't write."); /* output size of text */ 1273 #endif 1274 if (in_len == 0) 1275 return; 1276 rewind(infile); 1277 1278 in = malloc(in_len); 1279 out_len = in_len + (in_len/8) + 256; 1280 out = malloc(out_len); 1281 if (!in || !out) { 1282 Error("Can't malloc"); 1283 } 1284 if (fread(in, in_len, 1, infile) != 1) { 1285 Error("Can't read"); 1286 } 1287 r = ucl_nrv2b_99_compress(in, in_len, out, &out_len, 0 ); 1288 if (r != UCL_E_OK) 1289 Error("Compression failure\n"); 1290 #if UCLPACK_COMPAT 1291 tw = htonl(out_len); 1292 if (fwrite(&tw, sizeof(tw), 1, outfile) != 1) 1293 Error("Can't write."); /* file size of text */ 1294 1295 #endif 1296 if (fwrite(out, out_len, 1, outfile) != 1) { 1297 Error("Write error\n"); 1298 } 1299 #if UCLPACK_COMPAT 1300 tw = htonl(0); /* EOF marker */ 1301 if (fwrite(&tw, sizeof(tw), 1, outfile) != 1) 1302 Error("Can't write."); 1303 1304 #endif 1305 1306 #ifdef LONG_REPORT 1307 Fprintf((stdout, "input size %ld bytes\n", in_len)); 1308 Fprintf((stdout, "output size %ld bytes\n", out_len)); 1309 Fprintf((stdout, "input/output %.3f\n", (double)in_len / out_len)); 1310 #else 1311 Fprintf((stdout, "input/output = %ld/%ld = %.3f\n", in_len, out_len, 1312 (double)in_len / out_len)); 1313 #endif 1314 1315 } 1316 1317 #endif 1318 1319 #ifdef DECODE 1320 1321 #define GETBIT_8(bb, src, ilen) \ 1322 (((bb = bb & 0x7f ? bb*2 : ((unsigned)src[ilen++]*2+1)) >> 8) & 1) 1323 1324 #define GETBIT_LE16(bb, src, ilen) \ 1325 (bb*=2,bb&0xffff ? (bb>>16)&1 : (ilen+=2,((bb=(src[ilen-2]+src[ilen-1]*256u)*2+1)>>16)&1)) 1326 1327 #define GETBIT_LE32(bb, src, ilen) \ 1328 (bc > 0 ? ((bb>>--bc)&1) : (bc=31,\ 1329 bb=*(const uint32_t *)((src)+ilen),ilen+=4,(bb>>31)&1)) 1330 1331 #define GETBIT_LE64(bb, src, ilen) \ 1332 (bc > 0 ? ((bb>>--bc)&1) : (bc=63, \ 1333 bb=*(const uint64_t *)((src)+ilen),ilen+=8,(bb>>63)&1)) 1334 1335 #if ENDIAN == 0 && BITSIZE == 8 1336 #define GETBIT(bb, src, ilen) GETBIT_8(bb, src, ilen) 1337 #endif 1338 #if ENDIAN == 0 && BITSIZE == 16 1339 #define GETBIT(bb, src, ilen) GETBIT_LE16(bb, src, ilen) 1340 #endif 1341 #if ENDIAN == 0 && BITSIZE == 32 1342 #define GETBIT(bb, src, ilen) GETBIT_LE32(bb, src, ilen) 1343 #endif 1344 #if ENDIAN == 0 && BITSIZE == 64 1345 #define GETBIT(bb, src, ilen) GETBIT_LE64(bb, src, ilen) 1346 #endif 1347 #ifndef GETBIT 1348 #error "Bad Combination of ENDIAN and BITSIZE values specified" 1349 #endif 1350 1351 #undef SAFE 1352 1353 #ifdef SAFE 1354 #define FAIL(x,r) if (x) { Error(r); } 1355 #else 1356 #define FAIL(x,r) 1357 #endif 1358 1359 void Decode(void) /* recover */ 1360 { 1361 uint32_t tw; 1362 uint8_t *src, *dst; 1363 unsigned long max_src_len, src_len, dst_len; 1364 unsigned long ilen = 0, olen = 0, last_m_off = 1; 1365 #if BITSIZE <= 32 1366 uint32_t bb = 0; 1367 #elif BITSIZE == 64 1368 uint64_t bb = 0; 1369 #endif 1370 unsigned bc = 0; 1371 #if UCLPACK_COMPAT 1372 if (fseek(infile, sizeof(magic) + sizeof(tw) + 1 + 1 + sizeof(tw), 1373 SEEK_SET) != 0) 1374 1375 Error("Seek Error"); 1376 if (fread(&tw, sizeof(tw), 1, infile) < 1) 1377 Error("Can't read"); /* read size of text */ 1378 dst_len = ntohl(tw); 1379 if (fread(&tw, sizeof(tw), 1, infile) < 1) 1380 Error("Can't read"); /* read size of file */ 1381 max_src_len = ntohl(tw); 1382 #else 1383 if (fread(&tw, sizeof(tw), 1, infile) < 1) 1384 Error("Can't read"); /* read size of text */ 1385 dst_len = i86ul_to_host(tw); 1386 max_src_len = dst_len + (dst_len/8) + 256; 1387 #endif 1388 if (dst_len == 0) 1389 return; 1390 dst = malloc(dst_len); 1391 if (!dst) 1392 Error("Can't malloc"); 1393 src = malloc(max_src_len); 1394 if (!src) 1395 Error("Can't malloc"); 1396 src_len = fread(src, 1, max_src_len, infile); 1397 if (src_len <= 0) 1398 Error("Can't read"); 1399 1400 for(;;) { 1401 unsigned int m_off, m_len; 1402 while(GETBIT(bb, src, ilen)) { 1403 FAIL(ilen >= src_len, "input overrun"); 1404 FAIL(olen >= dst_len, "output overrun"); 1405 dst[olen++] = src[ilen++]; 1406 } 1407 m_off = 1; 1408 do { 1409 m_off = m_off*2 + GETBIT(bb, src, ilen); 1410 FAIL(ilen >= src_len, "input overrun"); 1411 FAIL(m_off > 0xffffffU +3, "lookbehind overrun"); 1412 } while (!GETBIT(bb, src, ilen)); 1413 if (m_off == 2) 1414 { 1415 m_off = last_m_off; 1416 } 1417 else 1418 { 1419 FAIL(ilen >= src_len, "input overrun"); 1420 m_off = (m_off - 3)*256 + src[ilen++]; 1421 if (m_off == 0xffffffffU) 1422 break; 1423 last_m_off = ++m_off; 1424 } 1425 m_len = GETBIT(bb, src, ilen); 1426 m_len = m_len*2 + GETBIT(bb, src, ilen); 1427 if (m_len == 0) 1428 { 1429 m_len++; 1430 do { 1431 m_len = m_len*2 + GETBIT(bb, src, ilen); 1432 FAIL(ilen >= src_len, "input overrun"); 1433 FAIL(m_len >= dst_len, "output overrun"); 1434 } while(!GETBIT(bb, src, ilen)); 1435 m_len += 2; 1436 } 1437 m_len += (m_off > 0xd00); 1438 FAIL(olen + m_len > dst_len, "output overrun"); 1439 FAIL(m_off > olen, "lookbeind overrun"); 1440 { 1441 const uint8_t *m_pos; 1442 m_pos = dst + olen - m_off; 1443 dst[olen++] = *m_pos++; 1444 do { 1445 dst[olen++] = *m_pos++; 1446 } while(--m_len > 0); 1447 } 1448 } 1449 FAIL(ilen < src_len, "input not consumed"); 1450 FAIL(ilen > src_len, "input overrun"); 1451 assert(ilen == src_len); 1452 Fprintf((stderr, "%12ld\n", olen)); 1453 if (dst_len != olen) { 1454 fprintf(stderr, "length != expected length\n"); 1455 } 1456 if (fwrite(dst, olen, 1, outfile) != 1) 1457 Error("Write error\n"); 1458 free(src); 1459 free(dst); 1460 } 1461 #endif 1462 1463 #ifdef MAIN 1464 int main(int argc, char *argv[]) 1465 { 1466 char *s; 1467 FILE *f; 1468 int c; 1469 1470 if (argc == 2) { 1471 outfile = stdout; 1472 if ((f = tmpfile()) == NULL) { 1473 perror("tmpfile"); 1474 return EXIT_FAILURE; 1475 } 1476 while ((c = getchar()) != EOF) 1477 fputc(c, f); 1478 rewind(infile = f); 1479 } 1480 else if (argc != 4) { 1481 Fprintf((stderr, "'nrv2b e file1 file2' encodes file1 into file2.\n" 1482 "'nrv2b d file2 file1' decodes file2 into file1.\n")); 1483 return EXIT_FAILURE; 1484 } 1485 if (argc == 4) { 1486 if ((s = argv[1], s[1] || strpbrk(s, "DEde") == NULL) 1487 || (s = argv[2], (infile = fopen(s, "rb")) == NULL) 1488 || (s = argv[3], (outfile = fopen(s, "wb")) == NULL)) { 1489 Fprintf((stderr, "??? %s\n", s)); 1490 return EXIT_FAILURE; 1491 } 1492 } 1493 if (toupper(*argv[1]) == 'E') 1494 Encode(); 1495 else 1496 Decode(); 1497 fclose(infile); 1498 fclose(outfile); 1499 return EXIT_SUCCESS; 1500 } 1501 #endif 1502