1 /* 2 * Squashfs - a compressed read only filesystem for Linux 3 * 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 5 * Phillip Lougher <phillip (at) lougher.demon.co.uk> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 2, 10 * or (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 20 * 21 * file.c 22 */ 23 24 /* 25 * This file contains code for handling regular files. A regular file 26 * consists of a sequence of contiguous compressed blocks, and/or a 27 * compressed fragment block (tail-end packed block). The compressed size 28 * of each datablock is stored in a block list contained within the 29 * file inode (itself stored in one or more compressed metadata blocks). 30 * 31 * To speed up access to datablocks when reading 'large' files (256 Mbytes or 32 * larger), the code implements an index cache that caches the mapping from 33 * block index to datablock location on disk. 34 * 35 * The index cache allows Squashfs to handle large files (up to 1.75 TiB) while 36 * retaining a simple and space-efficient block list on disk. The cache 37 * is split into slots, caching up to eight 224 GiB files (128 KiB blocks). 38 * Larger files use multiple slots, with 1.75 TiB files using all 8 slots. 39 * The index cache is designed to be memory efficient, and by default uses 40 * 16 KiB. 41 */ 42 43 #include <linux/fs.h> 44 #include <linux/vfs.h> 45 #include <linux/kernel.h> 46 #include <linux/slab.h> 47 #include <linux/string.h> 48 #include <linux/pagemap.h> 49 #include <linux/mutex.h> 50 #include <linux/zlib.h> 51 52 #include "squashfs_fs.h" 53 #include "squashfs_fs_sb.h" 54 #include "squashfs_fs_i.h" 55 #include "squashfs.h" 56 57 /* 58 * Locate cache slot in range [offset, index] for specified inode. If 59 * there's more than one return the slot closest to index. 60 */ 61 static struct meta_index *locate_meta_index(struct inode *inode, int offset, 62 int index) 63 { 64 struct meta_index *meta = NULL; 65 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 66 int i; 67 68 mutex_lock(&msblk->meta_index_mutex); 69 70 TRACE("locate_meta_index: index %d, offset %d\n", index, offset); 71 72 if (msblk->meta_index == NULL) 73 goto not_allocated; 74 75 for (i = 0; i < SQUASHFS_META_SLOTS; i++) { 76 if (msblk->meta_index[i].inode_number == inode->i_ino && 77 msblk->meta_index[i].offset >= offset && 78 msblk->meta_index[i].offset <= index && 79 msblk->meta_index[i].locked == 0) { 80 TRACE("locate_meta_index: entry %d, offset %d\n", i, 81 msblk->meta_index[i].offset); 82 meta = &msblk->meta_index[i]; 83 offset = meta->offset; 84 } 85 } 86 87 if (meta) 88 meta->locked = 1; 89 90 not_allocated: 91 mutex_unlock(&msblk->meta_index_mutex); 92 93 return meta; 94 } 95 96 97 /* 98 * Find and initialise an empty cache slot for index offset. 99 */ 100 static struct meta_index *empty_meta_index(struct inode *inode, int offset, 101 int skip) 102 { 103 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 104 struct meta_index *meta = NULL; 105 int i; 106 107 mutex_lock(&msblk->meta_index_mutex); 108 109 TRACE("empty_meta_index: offset %d, skip %d\n", offset, skip); 110 111 if (msblk->meta_index == NULL) { 112 /* 113 * First time cache index has been used, allocate and 114 * initialise. The cache index could be allocated at 115 * mount time but doing it here means it is allocated only 116 * if a 'large' file is read. 117 */ 118 msblk->meta_index = kcalloc(SQUASHFS_META_SLOTS, 119 sizeof(*(msblk->meta_index)), GFP_KERNEL); 120 if (msblk->meta_index == NULL) { 121 ERROR("Failed to allocate meta_index\n"); 122 goto failed; 123 } 124 for (i = 0; i < SQUASHFS_META_SLOTS; i++) { 125 msblk->meta_index[i].inode_number = 0; 126 msblk->meta_index[i].locked = 0; 127 } 128 msblk->next_meta_index = 0; 129 } 130 131 for (i = SQUASHFS_META_SLOTS; i && 132 msblk->meta_index[msblk->next_meta_index].locked; i--) 133 msblk->next_meta_index = (msblk->next_meta_index + 1) % 134 SQUASHFS_META_SLOTS; 135 136 if (i == 0) { 137 TRACE("empty_meta_index: failed!\n"); 138 goto failed; 139 } 140 141 TRACE("empty_meta_index: returned meta entry %d, %p\n", 142 msblk->next_meta_index, 143 &msblk->meta_index[msblk->next_meta_index]); 144 145 meta = &msblk->meta_index[msblk->next_meta_index]; 146 msblk->next_meta_index = (msblk->next_meta_index + 1) % 147 SQUASHFS_META_SLOTS; 148 149 meta->inode_number = inode->i_ino; 150 meta->offset = offset; 151 meta->skip = skip; 152 meta->entries = 0; 153 meta->locked = 1; 154 155 failed: 156 mutex_unlock(&msblk->meta_index_mutex); 157 return meta; 158 } 159 160 161 static void release_meta_index(struct inode *inode, struct meta_index *meta) 162 { 163 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 164 mutex_lock(&msblk->meta_index_mutex); 165 meta->locked = 0; 166 mutex_unlock(&msblk->meta_index_mutex); 167 } 168 169 170 /* 171 * Read the next n blocks from the block list, starting from 172 * metadata block <start_block, offset>. 173 */ 174 static long long read_indexes(struct super_block *sb, int n, 175 u64 *start_block, int *offset) 176 { 177 int err, i; 178 long long block = 0; 179 __le32 *blist = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); 180 181 if (blist == NULL) { 182 ERROR("read_indexes: Failed to allocate block_list\n"); 183 return -ENOMEM; 184 } 185 186 while (n) { 187 int blocks = min_t(int, n, PAGE_CACHE_SIZE >> 2); 188 189 err = squashfs_read_metadata(sb, blist, start_block, 190 offset, blocks << 2); 191 if (err < 0) { 192 ERROR("read_indexes: reading block [%llx:%x]\n", 193 *start_block, *offset); 194 goto failure; 195 } 196 197 for (i = 0; i < blocks; i++) { 198 int size = le32_to_cpu(blist[i]); 199 block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size); 200 } 201 n -= blocks; 202 } 203 204 kfree(blist); 205 return block; 206 207 failure: 208 kfree(blist); 209 return err; 210 } 211 212 213 /* 214 * Each cache index slot has SQUASHFS_META_ENTRIES, each of which 215 * can cache one index -> datablock/blocklist-block mapping. We wish 216 * to distribute these over the length of the file, entry[0] maps index x, 217 * entry[1] maps index x + skip, entry[2] maps index x + 2 * skip, and so on. 218 * The larger the file, the greater the skip factor. The skip factor is 219 * limited to the size of the metadata cache (SQUASHFS_CACHED_BLKS) to ensure 220 * the number of metadata blocks that need to be read fits into the cache. 221 * If the skip factor is limited in this way then the file will use multiple 222 * slots. 223 */ 224 static inline int calculate_skip(int blocks) 225 { 226 int skip = blocks / ((SQUASHFS_META_ENTRIES + 1) 227 * SQUASHFS_META_INDEXES); 228 return min(SQUASHFS_CACHED_BLKS - 1, skip + 1); 229 } 230 231 232 /* 233 * Search and grow the index cache for the specified inode, returning the 234 * on-disk locations of the datablock and block list metadata block 235 * <index_block, index_offset> for index (scaled to nearest cache index). 236 */ 237 static int fill_meta_index(struct inode *inode, int index, 238 u64 *index_block, int *index_offset, u64 *data_block) 239 { 240 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 241 int skip = calculate_skip(i_size_read(inode) >> msblk->block_log); 242 int offset = 0; 243 struct meta_index *meta; 244 struct meta_entry *meta_entry; 245 u64 cur_index_block = squashfs_i(inode)->block_list_start; 246 int cur_offset = squashfs_i(inode)->offset; 247 u64 cur_data_block = squashfs_i(inode)->start; 248 int err, i; 249 250 /* 251 * Scale index to cache index (cache slot entry) 252 */ 253 index /= SQUASHFS_META_INDEXES * skip; 254 255 while (offset < index) { 256 meta = locate_meta_index(inode, offset + 1, index); 257 258 if (meta == NULL) { 259 meta = empty_meta_index(inode, offset + 1, skip); 260 if (meta == NULL) 261 goto all_done; 262 } else { 263 offset = index < meta->offset + meta->entries ? index : 264 meta->offset + meta->entries - 1; 265 meta_entry = &meta->meta_entry[offset - meta->offset]; 266 cur_index_block = meta_entry->index_block + 267 msblk->inode_table; 268 cur_offset = meta_entry->offset; 269 cur_data_block = meta_entry->data_block; 270 TRACE("get_meta_index: offset %d, meta->offset %d, " 271 "meta->entries %d\n", offset, meta->offset, 272 meta->entries); 273 TRACE("get_meta_index: index_block 0x%llx, offset 0x%x" 274 " data_block 0x%llx\n", cur_index_block, 275 cur_offset, cur_data_block); 276 } 277 278 /* 279 * If necessary grow cache slot by reading block list. Cache 280 * slot is extended up to index or to the end of the slot, in 281 * which case further slots will be used. 282 */ 283 for (i = meta->offset + meta->entries; i <= index && 284 i < meta->offset + SQUASHFS_META_ENTRIES; i++) { 285 int blocks = skip * SQUASHFS_META_INDEXES; 286 long long res = read_indexes(inode->i_sb, blocks, 287 &cur_index_block, &cur_offset); 288 289 if (res < 0) { 290 if (meta->entries == 0) 291 /* 292 * Don't leave an empty slot on read 293 * error allocated to this inode... 294 */ 295 meta->inode_number = 0; 296 err = res; 297 goto failed; 298 } 299 300 cur_data_block += res; 301 meta_entry = &meta->meta_entry[i - meta->offset]; 302 meta_entry->index_block = cur_index_block - 303 msblk->inode_table; 304 meta_entry->offset = cur_offset; 305 meta_entry->data_block = cur_data_block; 306 meta->entries++; 307 offset++; 308 } 309 310 TRACE("get_meta_index: meta->offset %d, meta->entries %d\n", 311 meta->offset, meta->entries); 312 313 release_meta_index(inode, meta); 314 } 315 316 all_done: 317 *index_block = cur_index_block; 318 *index_offset = cur_offset; 319 *data_block = cur_data_block; 320 321 /* 322 * Scale cache index (cache slot entry) to index 323 */ 324 return offset * SQUASHFS_META_INDEXES * skip; 325 326 failed: 327 release_meta_index(inode, meta); 328 return err; 329 } 330 331 332 /* 333 * Get the on-disk location and compressed size of the datablock 334 * specified by index. Fill_meta_index() does most of the work. 335 */ 336 static int read_blocklist(struct inode *inode, int index, u64 *block) 337 { 338 u64 start; 339 long long blks; 340 int offset; 341 __le32 size; 342 int res = fill_meta_index(inode, index, &start, &offset, block); 343 344 TRACE("read_blocklist: res %d, index %d, start 0x%llx, offset" 345 " 0x%x, block 0x%llx\n", res, index, start, offset, 346 *block); 347 348 if (res < 0) 349 return res; 350 351 /* 352 * res contains the index of the mapping returned by fill_meta_index(), 353 * this will likely be less than the desired index (because the 354 * meta_index cache works at a higher granularity). Read any 355 * extra block indexes needed. 356 */ 357 if (res < index) { 358 blks = read_indexes(inode->i_sb, index - res, &start, &offset); 359 if (blks < 0) 360 return (int) blks; 361 *block += blks; 362 } 363 364 /* 365 * Read length of block specified by index. 366 */ 367 res = squashfs_read_metadata(inode->i_sb, &size, &start, &offset, 368 sizeof(size)); 369 if (res < 0) 370 return res; 371 return le32_to_cpu(size); 372 } 373 374 375 static int squashfs_readpage(struct file *file, struct page *page) 376 { 377 struct inode *inode = page->mapping->host; 378 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 379 int bytes, i, offset = 0, sparse = 0; 380 struct squashfs_cache_entry *buffer = NULL; 381 void *pageaddr; 382 383 int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; 384 int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); 385 int start_index = page->index & ~mask; 386 int end_index = start_index | mask; 387 int file_end = i_size_read(inode) >> msblk->block_log; 388 389 TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", 390 page->index, squashfs_i(inode)->start); 391 392 if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 393 PAGE_CACHE_SHIFT)) 394 goto out; 395 396 if (index < file_end || squashfs_i(inode)->fragment_block == 397 SQUASHFS_INVALID_BLK) { 398 /* 399 * Reading a datablock from disk. Need to read block list 400 * to get location and block size. 401 */ 402 u64 block = 0; 403 int bsize = read_blocklist(inode, index, &block); 404 if (bsize < 0) 405 goto error_out; 406 407 if (bsize == 0) { /* hole */ 408 bytes = index == file_end ? 409 (i_size_read(inode) & (msblk->block_size - 1)) : 410 msblk->block_size; 411 sparse = 1; 412 } else { 413 /* 414 * Read and decompress datablock. 415 */ 416 buffer = squashfs_get_datablock(inode->i_sb, 417 block, bsize); 418 if (buffer->error) { 419 ERROR("Unable to read page, block %llx, size %x" 420 "\n", block, bsize); 421 squashfs_cache_put(buffer); 422 goto error_out; 423 } 424 bytes = buffer->length; 425 } 426 } else { 427 /* 428 * Datablock is stored inside a fragment (tail-end packed 429 * block). 430 */ 431 buffer = squashfs_get_fragment(inode->i_sb, 432 squashfs_i(inode)->fragment_block, 433 squashfs_i(inode)->fragment_size); 434 435 if (buffer->error) { 436 ERROR("Unable to read page, block %llx, size %x\n", 437 squashfs_i(inode)->fragment_block, 438 squashfs_i(inode)->fragment_size); 439 squashfs_cache_put(buffer); 440 goto error_out; 441 } 442 bytes = i_size_read(inode) & (msblk->block_size - 1); 443 offset = squashfs_i(inode)->fragment_offset; 444 } 445 446 /* 447 * Loop copying datablock into pages. As the datablock likely covers 448 * many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly 449 * grab the pages from the page cache, except for the page that we've 450 * been called to fill. 451 */ 452 for (i = start_index; i <= end_index && bytes > 0; i++, 453 bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { 454 struct page *push_page; 455 int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE); 456 457 TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); 458 459 push_page = (i == page->index) ? page : 460 grab_cache_page_nowait(page->mapping, i); 461 462 if (!push_page) 463 continue; 464 465 if (PageUptodate(push_page)) 466 goto skip_page; 467 468 pageaddr = kmap_atomic(push_page, KM_USER0); 469 squashfs_copy_data(pageaddr, buffer, offset, avail); 470 memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); 471 kunmap_atomic(pageaddr, KM_USER0); 472 flush_dcache_page(push_page); 473 SetPageUptodate(push_page); 474 skip_page: 475 unlock_page(push_page); 476 if (i != page->index) 477 page_cache_release(push_page); 478 } 479 480 if (!sparse) 481 squashfs_cache_put(buffer); 482 483 return 0; 484 485 error_out: 486 SetPageError(page); 487 out: 488 pageaddr = kmap_atomic(page, KM_USER0); 489 memset(pageaddr, 0, PAGE_CACHE_SIZE); 490 kunmap_atomic(pageaddr, KM_USER0); 491 flush_dcache_page(page); 492 if (!PageError(page)) 493 SetPageUptodate(page); 494 unlock_page(page); 495 496 return 0; 497 } 498 499 500 const struct address_space_operations squashfs_aops = { 501 .readpage = squashfs_readpage 502 }; 503