1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Small in-memory unzip implementation. 6 // A simplified copy of the pre-Go 1 compress/flate/inflate.go 7 // and a modified copy of the zip reader in package time. 8 // (The one in package time does not support decompression; this one does.) 9 10 package syscall 11 12 const ( 13 maxCodeLen = 16 // max length of Huffman code 14 maxHist = 32768 // max history required 15 maxLit = 286 16 maxDist = 32 17 numCodes = 19 // number of codes in Huffman meta-code 18 ) 19 20 type decompressor struct { 21 in string // compressed input 22 out []byte // uncompressed output 23 b uint32 // input bits, at top of b 24 nb uint 25 err bool // invalid input 26 eof bool // reached EOF 27 28 h1, h2 huffmanDecoder // decoders for literal/length, distance 29 bits [maxLit + maxDist]int // lengths defining Huffman codes 30 codebits [numCodes]int 31 } 32 33 func (f *decompressor) nextBlock() { 34 for f.nb < 1+2 { 35 if f.moreBits(); f.err { 36 return 37 } 38 } 39 f.eof = f.b&1 == 1 40 f.b >>= 1 41 typ := f.b & 3 42 f.b >>= 2 43 f.nb -= 1 + 2 44 switch typ { 45 case 0: 46 f.dataBlock() 47 case 1: 48 // compressed, fixed Huffman tables 49 f.huffmanBlock(&fixedHuffmanDecoder, nil) 50 case 2: 51 // compressed, dynamic Huffman tables 52 if f.readHuffman(); f.err { 53 break 54 } 55 f.huffmanBlock(&f.h1, &f.h2) 56 default: 57 // 3 is reserved. 58 f.err = true 59 } 60 } 61 62 // RFC 1951 section 3.2.7. 63 // Compression with dynamic Huffman codes 64 65 var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} 66 67 func (f *decompressor) readHuffman() { 68 // HLIT[5], HDIST[5], HCLEN[4]. 69 for f.nb < 5+5+4 { 70 if f.moreBits(); f.err { 71 return 72 } 73 } 74 nlit := int(f.b&0x1F) + 257 75 f.b >>= 5 76 ndist := int(f.b&0x1F) + 1 77 f.b >>= 5 78 nclen := int(f.b&0xF) + 4 79 f.b >>= 4 80 f.nb -= 5 + 5 + 4 81 82 // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. 83 for i := 0; i < nclen; i++ { 84 for f.nb < 3 { 85 if f.moreBits(); f.err { 86 return 87 } 88 } 89 f.codebits[codeOrder[i]] = int(f.b & 0x7) 90 f.b >>= 3 91 f.nb -= 3 92 } 93 for i := nclen; i < len(codeOrder); i++ { 94 f.codebits[codeOrder[i]] = 0 95 } 96 if !f.h1.init(f.codebits[0:]) { 97 f.err = true 98 return 99 } 100 101 // HLIT + 257 code lengths, HDIST + 1 code lengths, 102 // using the code length Huffman code. 103 for i, n := 0, nlit+ndist; i < n; { 104 x := f.huffSym(&f.h1) 105 if f.err { 106 return 107 } 108 if x < 16 { 109 // Actual length. 110 f.bits[i] = x 111 i++ 112 continue 113 } 114 // Repeat previous length or zero. 115 var rep int 116 var nb uint 117 var b int 118 switch x { 119 default: 120 f.err = true 121 return 122 case 16: 123 rep = 3 124 nb = 2 125 if i == 0 { 126 f.err = true 127 return 128 } 129 b = f.bits[i-1] 130 case 17: 131 rep = 3 132 nb = 3 133 b = 0 134 case 18: 135 rep = 11 136 nb = 7 137 b = 0 138 } 139 for f.nb < nb { 140 if f.moreBits(); f.err { 141 return 142 } 143 } 144 rep += int(f.b & uint32(1<<nb-1)) 145 f.b >>= nb 146 f.nb -= nb 147 if i+rep > n { 148 f.err = true 149 return 150 } 151 for j := 0; j < rep; j++ { 152 f.bits[i] = b 153 i++ 154 } 155 } 156 157 if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { 158 f.err = true 159 return 160 } 161 } 162 163 // Decode a single Huffman block from f. 164 // hl and hd are the Huffman states for the lit/length values 165 // and the distance values, respectively. If hd == nil, using the 166 // fixed distance encoding associated with fixed Huffman blocks. 167 func (f *decompressor) huffmanBlock(hl, hd *huffmanDecoder) { 168 for { 169 v := f.huffSym(hl) 170 if f.err { 171 return 172 } 173 var n uint // number of bits extra 174 var length int 175 switch { 176 case v < 256: 177 f.out = append(f.out, byte(v)) 178 continue 179 case v == 256: 180 // Done with huffman block; read next block. 181 return 182 // otherwise, reference to older data 183 case v < 265: 184 length = v - (257 - 3) 185 n = 0 186 case v < 269: 187 length = v*2 - (265*2 - 11) 188 n = 1 189 case v < 273: 190 length = v*4 - (269*4 - 19) 191 n = 2 192 case v < 277: 193 length = v*8 - (273*8 - 35) 194 n = 3 195 case v < 281: 196 length = v*16 - (277*16 - 67) 197 n = 4 198 case v < 285: 199 length = v*32 - (281*32 - 131) 200 n = 5 201 default: 202 length = 258 203 n = 0 204 } 205 if n > 0 { 206 for f.nb < n { 207 if f.moreBits(); f.err { 208 return 209 } 210 } 211 length += int(f.b & uint32(1<<n-1)) 212 f.b >>= n 213 f.nb -= n 214 } 215 216 var dist int 217 if hd == nil { 218 for f.nb < 5 { 219 if f.moreBits(); f.err { 220 return 221 } 222 } 223 dist = int(reverseByte[(f.b&0x1F)<<3]) 224 f.b >>= 5 225 f.nb -= 5 226 } else { 227 if dist = f.huffSym(hd); f.err { 228 return 229 } 230 } 231 232 switch { 233 case dist < 4: 234 dist++ 235 case dist >= 30: 236 f.err = true 237 return 238 default: 239 nb := uint(dist-2) >> 1 240 // have 1 bit in bottom of dist, need nb more. 241 extra := (dist & 1) << nb 242 for f.nb < nb { 243 if f.moreBits(); f.err { 244 return 245 } 246 } 247 extra |= int(f.b & uint32(1<<nb-1)) 248 f.b >>= nb 249 f.nb -= nb 250 dist = 1<<(nb+1) + 1 + extra 251 } 252 253 // Copy [-dist:-dist+length] into output. 254 // Encoding can be prescient, so no check on length. 255 if dist > len(f.out) { 256 f.err = true 257 return 258 } 259 260 p := len(f.out) - dist 261 for i := 0; i < length; i++ { 262 f.out = append(f.out, f.out[p]) 263 p++ 264 } 265 } 266 } 267 268 // Copy a single uncompressed data block from input to output. 269 func (f *decompressor) dataBlock() { 270 // Uncompressed. 271 // Discard current half-byte. 272 f.nb = 0 273 f.b = 0 274 275 if len(f.in) < 4 { 276 f.err = true 277 return 278 } 279 280 buf := f.in[:4] 281 f.in = f.in[4:] 282 n := int(buf[0]) | int(buf[1])<<8 283 nn := int(buf[2]) | int(buf[3])<<8 284 if uint16(nn) != uint16(^n) { 285 f.err = true 286 return 287 } 288 289 if len(f.in) < n { 290 f.err = true 291 return 292 } 293 f.out = append(f.out, f.in[:n]...) 294 f.in = f.in[n:] 295 } 296 297 func (f *decompressor) moreBits() { 298 if len(f.in) == 0 { 299 f.err = true 300 return 301 } 302 c := f.in[0] 303 f.in = f.in[1:] 304 f.b |= uint32(c) << f.nb 305 f.nb += 8 306 } 307 308 // Read the next Huffman-encoded symbol from f according to h. 309 func (f *decompressor) huffSym(h *huffmanDecoder) int { 310 for n := uint(h.min); n <= uint(h.max); n++ { 311 lim := h.limit[n] 312 if lim == -1 { 313 continue 314 } 315 for f.nb < n { 316 if f.moreBits(); f.err { 317 return 0 318 } 319 } 320 v := int(f.b & uint32(1<<n-1)) 321 v <<= 16 - n 322 v = int(reverseByte[v>>8]) | int(reverseByte[v&0xFF])<<8 // reverse bits 323 if v <= lim { 324 f.b >>= n 325 f.nb -= n 326 return h.codes[v-h.base[n]] 327 } 328 } 329 f.err = true 330 return 0 331 } 332 333 var reverseByte = [256]byte{ 334 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 335 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, 336 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 337 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, 338 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, 339 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, 340 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, 341 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, 342 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, 343 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, 344 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, 345 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, 346 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, 347 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, 348 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, 349 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, 350 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, 351 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, 352 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, 353 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, 354 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, 355 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, 356 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, 357 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, 358 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, 359 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, 360 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, 361 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, 362 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, 363 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, 364 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 365 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, 366 } 367 368 // Hard-coded Huffman tables for DEFLATE algorithm. 369 // See RFC 1951, section 3.2.6. 370 var fixedHuffmanDecoder = huffmanDecoder{ 371 7, 9, 372 [maxCodeLen + 1]int{7: 23, 199, 511}, 373 [maxCodeLen + 1]int{7: 0, 24, 224}, 374 []int{ 375 // length 7: 256-279 376 256, 257, 258, 259, 260, 261, 262, 377 263, 264, 265, 266, 267, 268, 269, 378 270, 271, 272, 273, 274, 275, 276, 379 277, 278, 279, 380 381 // length 8: 0-143 382 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 383 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 384 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 385 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 386 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 387 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 388 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 389 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 390 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 391 92, 93, 94, 95, 96, 97, 98, 99, 100, 392 101, 102, 103, 104, 105, 106, 107, 108, 393 109, 110, 111, 112, 113, 114, 115, 116, 394 117, 118, 119, 120, 121, 122, 123, 124, 395 125, 126, 127, 128, 129, 130, 131, 132, 396 133, 134, 135, 136, 137, 138, 139, 140, 397 141, 142, 143, 398 399 // length 8: 280-287 400 280, 281, 282, 283, 284, 285, 286, 287, 401 402 // length 9: 144-255 403 144, 145, 146, 147, 148, 149, 150, 151, 404 152, 153, 154, 155, 156, 157, 158, 159, 405 160, 161, 162, 163, 164, 165, 166, 167, 406 168, 169, 170, 171, 172, 173, 174, 175, 407 176, 177, 178, 179, 180, 181, 182, 183, 408 184, 185, 186, 187, 188, 189, 190, 191, 409 192, 193, 194, 195, 196, 197, 198, 199, 410 200, 201, 202, 203, 204, 205, 206, 207, 411 208, 209, 210, 211, 212, 213, 214, 215, 412 216, 217, 218, 219, 220, 221, 222, 223, 413 224, 225, 226, 227, 228, 229, 230, 231, 414 232, 233, 234, 235, 236, 237, 238, 239, 415 240, 241, 242, 243, 244, 245, 246, 247, 416 248, 249, 250, 251, 252, 253, 254, 255, 417 }, 418 } 419 420 // Huffman decoder is based on 421 // J. Brian Connell, ``A Huffman-Shannon-Fano Code,'' 422 // Proceedings of the IEEE, 61(7) (July 1973), pp 1046-1047. 423 type huffmanDecoder struct { 424 // min, max code length 425 min, max int 426 427 // limit[i] = largest code word of length i 428 // Given code v of length n, 429 // need more bits if v > limit[n]. 430 limit [maxCodeLen + 1]int 431 432 // base[i] = smallest code word of length i - seq number 433 base [maxCodeLen + 1]int 434 435 // codes[seq number] = output code. 436 // Given code v of length n, value is 437 // codes[v - base[n]]. 438 codes []int 439 } 440 441 // Initialize Huffman decoding tables from array of code lengths. 442 func (h *huffmanDecoder) init(bits []int) bool { 443 // Count number of codes of each length, 444 // compute min and max length. 445 var count [maxCodeLen + 1]int 446 var min, max int 447 for _, n := range bits { 448 if n == 0 { 449 continue 450 } 451 if min == 0 || n < min { 452 min = n 453 } 454 if n > max { 455 max = n 456 } 457 count[n]++ 458 } 459 if max == 0 { 460 return false 461 } 462 463 h.min = min 464 h.max = max 465 466 // For each code range, compute 467 // nextcode (first code of that length), 468 // limit (last code of that length), and 469 // base (offset from first code to sequence number). 470 code := 0 471 seq := 0 472 var nextcode [maxCodeLen]int 473 for i := min; i <= max; i++ { 474 n := count[i] 475 nextcode[i] = code 476 h.base[i] = code - seq 477 code += n 478 seq += n 479 h.limit[i] = code - 1 480 code <<= 1 481 } 482 483 // Make array mapping sequence numbers to codes. 484 if len(h.codes) < len(bits) { 485 h.codes = make([]int, len(bits)) 486 } 487 for i, n := range bits { 488 if n == 0 { 489 continue 490 } 491 code := nextcode[n] 492 nextcode[n]++ 493 seq := code - h.base[n] 494 h.codes[seq] = i 495 } 496 return true 497 } 498 499 func inflate(in string) (out []byte) { 500 var d decompressor 501 d.in = in 502 for !d.err && !d.eof { 503 d.nextBlock() 504 } 505 if len(d.in) != 0 { 506 println("fs unzip: junk at end of compressed data") 507 return nil 508 } 509 return d.out 510 } 511 512 // get4 returns the little-endian 32-bit value in b. 513 func zget4(b string) int { 514 if len(b) < 4 { 515 return 0 516 } 517 return int(b[0]) | int(b[1])<<8 | int(b[2])<<16 | int(b[3])<<24 518 } 519 520 // get2 returns the little-endian 16-bit value in b. 521 func zget2(b string) int { 522 if len(b) < 2 { 523 return 0 524 } 525 return int(b[0]) | int(b[1])<<8 526 } 527 528 func unzip(data string) { 529 const ( 530 zecheader = 0x06054b50 531 zcheader = 0x02014b50 532 ztailsize = 22 533 zheadersize = 30 534 zheader = 0x04034b50 535 ) 536 537 buf := data[len(data)-ztailsize:] 538 n := zget2(buf[10:]) 539 size := zget4(buf[12:]) 540 off := zget4(buf[16:]) 541 542 hdr := data[off : off+size] 543 for i := 0; i < n; i++ { 544 // zip entry layout: 545 // 0 magic[4] 546 // 4 madevers[1] 547 // 5 madeos[1] 548 // 6 extvers[1] 549 // 7 extos[1] 550 // 8 flags[2] 551 // 10 meth[2] 552 // 12 modtime[2] 553 // 14 moddate[2] 554 // 16 crc[4] 555 // 20 csize[4] 556 // 24 uncsize[4] 557 // 28 namelen[2] 558 // 30 xlen[2] 559 // 32 fclen[2] 560 // 34 disknum[2] 561 // 36 iattr[2] 562 // 38 eattr[4] 563 // 42 off[4] 564 // 46 name[namelen] 565 // 46+namelen+xlen+fclen - next header 566 // 567 if zget4(hdr) != zcheader { 568 println("fs unzip: bad magic") 569 break 570 } 571 meth := zget2(hdr[10:]) 572 mtime := zget2(hdr[12:]) 573 mdate := zget2(hdr[14:]) 574 csize := zget4(hdr[20:]) 575 size := zget4(hdr[24:]) 576 namelen := zget2(hdr[28:]) 577 xlen := zget2(hdr[30:]) 578 fclen := zget2(hdr[32:]) 579 xattr := uint32(zget4(hdr[38:])) >> 16 580 off := zget4(hdr[42:]) 581 name := hdr[46 : 46+namelen] 582 hdr = hdr[46+namelen+xlen+fclen:] 583 584 // zip per-file header layout: 585 // 0 magic[4] 586 // 4 extvers[1] 587 // 5 extos[1] 588 // 6 flags[2] 589 // 8 meth[2] 590 // 10 modtime[2] 591 // 12 moddate[2] 592 // 14 crc[4] 593 // 18 csize[4] 594 // 22 uncsize[4] 595 // 26 namelen[2] 596 // 28 xlen[2] 597 // 30 name[namelen] 598 // 30+namelen+xlen - file data 599 // 600 buf := data[off : off+zheadersize+namelen] 601 if zget4(buf) != zheader || 602 zget2(buf[8:]) != meth || 603 zget2(buf[26:]) != namelen || 604 buf[30:30+namelen] != name { 605 println("fs unzip: inconsistent zip file") 606 return 607 } 608 xlen = zget2(buf[28:]) 609 610 off += zheadersize + namelen + xlen 611 612 var fdata []byte 613 switch meth { 614 case 0: 615 // buf is uncompressed 616 buf = data[off : off+size] 617 fdata = []byte(buf) 618 case 8: 619 // buf is deflate-compressed 620 buf = data[off : off+csize] 621 fdata = inflate(buf) 622 if len(fdata) != size { 623 println("fs unzip: inconsistent size in zip file") 624 return 625 } 626 } 627 628 if xattr&S_IFMT == 0 { 629 if xattr&0777 == 0 { 630 xattr |= 0666 631 } 632 if len(name) > 0 && name[len(name)-1] == '/' { 633 xattr |= S_IFDIR 634 xattr |= 0111 635 } else { 636 xattr |= S_IFREG 637 } 638 } 639 640 if err := create(name, xattr, zipToTime(mdate, mtime), fdata); err != nil { 641 print("fs unzip: create ", name, ": ", err.Error(), "\n") 642 } 643 } 644 645 chdirEnv() 646 } 647 648 func zipToTime(date, time int) int64 { 649 dd := date & 0x1f 650 mm := date >> 5 & 0xf 651 yy := date >> 9 // since 1980 652 653 sec := int64(315532800) // jan 1 1980 654 sec += int64(yy) * 365 * 86400 655 sec += int64(yy) / 4 * 86400 656 if yy%4 > 0 || mm >= 3 { 657 sec += 86400 658 } 659 sec += int64(daysBeforeMonth[mm]) * 86400 660 sec += int64(dd-1) * 86400 661 662 h := time >> 11 663 m := time >> 5 & 0x3F 664 s := time & 0x1f * 2 665 sec += int64(h*3600 + m*60 + s) 666 667 return sec 668 } 669 670 var daysBeforeMonth = [...]int32{ 671 0, 672 0, 673 31, 674 31 + 28, 675 31 + 28 + 31, 676 31 + 28 + 31 + 30, 677 31 + 28 + 31 + 30 + 31, 678 31 + 28 + 31 + 30 + 31 + 30, 679 31 + 28 + 31 + 30 + 31 + 30 + 31, 680 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31, 681 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30, 682 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31, 683 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30, 684 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30 + 31, 685 } 686