1 /* fsys_reiserfs.c - an implementation for the ReiserFS filesystem */ 2 /* 3 * GRUB -- GRand Unified Bootloader 4 * Copyright (C) 2000, 2001 Free Software Foundation, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 19 */ 20 21 #ifdef FSYS_REISERFS 22 #include "shared.h" 23 #include "filesys.h" 24 25 #undef REISERDEBUG 26 27 /* Some parts of this code (mainly the structures and defines) are 28 * from the original reiser fs code, as found in the linux kernel. 29 */ 30 31 /* include/asm-i386/types.h */ 32 typedef __signed__ char __s8; 33 typedef unsigned char __u8; 34 typedef __signed__ short __s16; 35 typedef unsigned short __u16; 36 typedef __signed__ int __s32; 37 typedef unsigned int __u32; 38 typedef unsigned long long __u64; 39 40 /* linux/posix_type.h */ 41 typedef long linux_off_t; 42 43 /* linux/little_endian.h */ 44 #define __cpu_to_le64(x) ((__u64) (x)) 45 #define __le64_to_cpu(x) ((__u64) (x)) 46 #define __cpu_to_le32(x) ((__u32) (x)) 47 #define __le32_to_cpu(x) ((__u32) (x)) 48 #define __cpu_to_le16(x) ((__u16) (x)) 49 #define __le16_to_cpu(x) ((__u16) (x)) 50 51 /* include/linux/reiser_fs.h */ 52 /* This is the new super block of a journaling reiserfs system */ 53 struct reiserfs_super_block 54 { 55 __u32 s_block_count; /* blocks count */ 56 __u32 s_free_blocks; /* free blocks count */ 57 __u32 s_root_block; /* root block number */ 58 __u32 s_journal_block; /* journal block number */ 59 __u32 s_journal_dev; /* journal device number */ 60 __u32 s_journal_size; /* size of the journal on FS creation. used to make sure they don't overflow it */ 61 __u32 s_journal_trans_max; /* max number of blocks in a transaction. */ 62 __u32 s_journal_magic; /* random value made on fs creation */ 63 __u32 s_journal_max_batch; /* max number of blocks to batch into a trans */ 64 __u32 s_journal_max_commit_age; /* in seconds, how old can an async commit be */ 65 __u32 s_journal_max_trans_age; /* in seconds, how old can a transaction be */ 66 __u16 s_blocksize; /* block size */ 67 __u16 s_oid_maxsize; /* max size of object id array */ 68 __u16 s_oid_cursize; /* current size of object id array */ 69 __u16 s_state; /* valid or error */ 70 char s_magic[16]; /* reiserfs magic string indicates that file system is reiserfs */ 71 __u16 s_tree_height; /* height of disk tree */ 72 __u16 s_bmap_nr; /* amount of bitmap blocks needed to address each block of file system */ 73 __u16 s_version; 74 char s_unused[128]; /* zero filled by mkreiserfs */ 75 }; 76 77 #define REISERFS_MAX_SUPPORTED_VERSION 2 78 #define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" 79 #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" 80 #define REISER3FS_SUPER_MAGIC_STRING "ReIsEr3Fs" 81 82 #define MAX_HEIGHT 7 83 84 /* must be correct to keep the desc and commit structs at 4k */ 85 #define JOURNAL_TRANS_HALF 1018 86 87 /* first block written in a commit. */ 88 struct reiserfs_journal_desc { 89 __u32 j_trans_id; /* id of commit */ 90 __u32 j_len; /* length of commit. len +1 is the commit block */ 91 __u32 j_mount_id; /* mount id of this trans*/ 92 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the first blocks */ 93 char j_magic[12]; 94 }; 95 96 /* last block written in a commit */ 97 struct reiserfs_journal_commit { 98 __u32 j_trans_id; /* must match j_trans_id from the desc block */ 99 __u32 j_len; /* ditto */ 100 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the last blocks */ 101 char j_digest[16]; /* md5 sum of all the blocks involved, including desc and commit. not used, kill it */ 102 }; 103 104 /* this header block gets written whenever a transaction is considered 105 fully flushed, and is more recent than the last fully flushed 106 transaction. 107 fully flushed means all the log blocks and all the real blocks are 108 on disk, and this transaction does not need to be replayed. 109 */ 110 struct reiserfs_journal_header { 111 /* id of last fully flushed transaction */ 112 __u32 j_last_flush_trans_id; 113 /* offset in the log of where to start replay after a crash */ 114 __u32 j_first_unflushed_offset; 115 /* mount id to detect very old transactions */ 116 __u32 j_mount_id; 117 }; 118 119 /* magic string to find desc blocks in the journal */ 120 #define JOURNAL_DESC_MAGIC "ReIsErLB" 121 122 123 /* 124 * directories use this key as well as old files 125 */ 126 struct offset_v1 127 { 128 /* 129 * for regular files this is the offset to the first byte of the 130 * body, contained in the object-item, as measured from the start of 131 * the entire body of the object. 132 * 133 * for directory entries, k_offset consists of hash derived from 134 * hashing the name and using few bits (23 or more) of the resulting 135 * hash, and generation number that allows distinguishing names with 136 * hash collisions. If number of collisions overflows generation 137 * number, we return EEXIST. High order bit is 0 always 138 */ 139 __u32 k_offset; 140 __u32 k_uniqueness; 141 }; 142 143 struct offset_v2 144 { 145 /* 146 * for regular files this is the offset to the first byte of the 147 * body, contained in the object-item, as measured from the start of 148 * the entire body of the object. 149 * 150 * for directory entries, k_offset consists of hash derived from 151 * hashing the name and using few bits (23 or more) of the resulting 152 * hash, and generation number that allows distinguishing names with 153 * hash collisions. If number of collisions overflows generation 154 * number, we return EEXIST. High order bit is 0 always 155 */ 156 __u64 k_offset:60; 157 __u64 k_type: 4; 158 }; 159 160 161 struct key 162 { 163 /* packing locality: by default parent directory object id */ 164 __u32 k_dir_id; 165 /* object identifier */ 166 __u32 k_objectid; 167 /* the offset and node type (old and new form) */ 168 union 169 { 170 struct offset_v1 v1; 171 struct offset_v2 v2; 172 } 173 u; 174 }; 175 176 #define KEY_SIZE (sizeof (struct key)) 177 178 /* Header of a disk block. More precisely, header of a formatted leaf 179 or internal node, and not the header of an unformatted node. */ 180 struct block_head 181 { 182 __u16 blk_level; /* Level of a block in the tree. */ 183 __u16 blk_nr_item; /* Number of keys/items in a block. */ 184 __u16 blk_free_space; /* Block free space in bytes. */ 185 struct key blk_right_delim_key; /* Right delimiting key for this block (supported for leaf level nodes 186 only) */ 187 }; 188 #define BLKH_SIZE (sizeof (struct block_head)) 189 #define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ 190 191 struct item_head 192 { 193 struct key ih_key; /* Everything in the tree is found by searching for it based on its key.*/ 194 195 union 196 { 197 __u16 ih_free_space; /* The free space in the last unformatted node of an indirect item if this 198 is an indirect item. This equals 0xFFFF iff this is a direct item or 199 stat data item. Note that the key, not this field, is used to determine 200 the item type, and thus which field this union contains. */ 201 __u16 ih_entry_count; /* Iff this is a directory item, this field equals the number of directory 202 entries in the directory item. */ 203 } 204 u; 205 __u16 ih_item_len; /* total size of the item body */ 206 __u16 ih_item_location; /* an offset to the item body within the block */ 207 __u16 ih_version; /* ITEM_VERSION_1 for all old items, 208 ITEM_VERSION_2 for new ones. 209 Highest bit is set by fsck 210 temporary, cleaned after all done */ 211 }; 212 /* size of item header */ 213 #define IH_SIZE (sizeof (struct item_head)) 214 215 #define ITEM_VERSION_1 0 216 #define ITEM_VERSION_2 1 217 #define IH_KEY_OFFSET(ih) ((ih)->ih_version == ITEM_VERSION_1 \ 218 ? (ih)->ih_key.u.v1.k_offset \ 219 : (ih)->ih_key.u.v2.k_offset) 220 221 #define IH_KEY_ISTYPE(ih, type) ((ih)->ih_version == ITEM_VERSION_1 \ 222 ? (ih)->ih_key.u.v1.k_uniqueness == V1_##type \ 223 : (ih)->ih_key.u.v2.k_type == V2_##type) 224 225 struct disk_child 226 { 227 unsigned long dc_block_number; /* Disk child's block number. */ 228 unsigned short dc_size; /* Disk child's used space. */ 229 }; 230 231 #define DC_SIZE (sizeof (struct disk_child)) 232 233 /* Stat Data on disk. 234 * 235 * Note that reiserfs has two different forms of stat data. Luckily 236 * the fields needed by grub are at the same position. 237 */ 238 struct stat_data 239 { 240 __u16 sd_mode; /* file type, permissions */ 241 __u16 sd_notused1[3]; /* fields not needed by reiserfs */ 242 __u32 sd_size; /* file size */ 243 __u32 sd_size_hi; /* file size high 32 bits (since version 2) */ 244 }; 245 246 struct reiserfs_de_head 247 { 248 __u32 deh_offset; /* third component of the directory entry key */ 249 __u32 deh_dir_id; /* objectid of the parent directory of the 250 object, that is referenced by directory entry */ 251 __u32 deh_objectid;/* objectid of the object, that is referenced by 252 directory entry */ 253 __u16 deh_location;/* offset of name in the whole item */ 254 __u16 deh_state; /* whether 1) entry contains stat data (for 255 future), and 2) whether entry is hidden 256 (unlinked) */ 257 }; 258 259 #define DEH_SIZE (sizeof (struct reiserfs_de_head)) 260 261 #define DEH_Statdata (1 << 0) /* not used now */ 262 #define DEH_Visible (1 << 2) 263 264 #define SD_OFFSET 0 265 #define SD_UNIQUENESS 0 266 #define DOT_OFFSET 1 267 #define DOT_DOT_OFFSET 2 268 #define DIRENTRY_UNIQUENESS 500 269 270 #define V1_TYPE_STAT_DATA 0x0 271 #define V1_TYPE_DIRECT 0xffffffff 272 #define V1_TYPE_INDIRECT 0xfffffffe 273 #define V1_TYPE_DIRECTORY_MAX 0xfffffffd 274 #define V2_TYPE_STAT_DATA 0 275 #define V2_TYPE_INDIRECT 1 276 #define V2_TYPE_DIRECT 2 277 #define V2_TYPE_DIRENTRY 3 278 279 #define REISERFS_ROOT_OBJECTID 2 280 #define REISERFS_ROOT_PARENT_OBJECTID 1 281 #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) 282 /* the spot for the super in versions 3.5 - 3.5.11 (inclusive) */ 283 #define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024) 284 #define REISERFS_OLD_BLOCKSIZE 4096 285 286 #define S_ISREG(mode) (((mode) & 0170000) == 0100000) 287 #define S_ISDIR(mode) (((mode) & 0170000) == 0040000) 288 #define S_ISLNK(mode) (((mode) & 0170000) == 0120000) 289 290 #define PATH_MAX 1024 /* include/linux/limits.h */ 291 #define MAX_LINK_COUNT 5 /* number of symbolic links to follow */ 292 293 /* The size of the node cache */ 294 #define FSYSREISER_CACHE_SIZE 24*1024 295 #define FSYSREISER_MIN_BLOCKSIZE SECTOR_SIZE 296 #define FSYSREISER_MAX_BLOCKSIZE FSYSREISER_CACHE_SIZE / 3 297 298 /* Info about currently opened file */ 299 struct fsys_reiser_fileinfo 300 { 301 __u32 k_dir_id; 302 __u32 k_objectid; 303 }; 304 305 /* In memory info about the currently mounted filesystem */ 306 struct fsys_reiser_info 307 { 308 /* The last read item head */ 309 struct item_head *current_ih; 310 /* The last read item */ 311 char *current_item; 312 /* The information for the currently opened file */ 313 struct fsys_reiser_fileinfo fileinfo; 314 /* The start of the journal */ 315 __u32 journal_block; 316 /* The size of the journal */ 317 __u32 journal_block_count; 318 /* The first valid descriptor block in journal 319 (relative to journal_block) */ 320 __u32 journal_first_desc; 321 322 /* The ReiserFS version. */ 323 __u16 version; 324 /* The current depth of the reiser tree. */ 325 __u16 tree_depth; 326 /* SECTOR_SIZE << blocksize_shift == blocksize. */ 327 __u8 blocksize_shift; 328 /* 1 << full_blocksize_shift == blocksize. */ 329 __u8 fullblocksize_shift; 330 /* The reiserfs block size (must be a power of 2) */ 331 __u16 blocksize; 332 /* The number of cached tree nodes */ 333 __u16 cached_slots; 334 /* The number of valid transactions in journal */ 335 __u16 journal_transactions; 336 337 unsigned int blocks[MAX_HEIGHT]; 338 unsigned int next_key_nr[MAX_HEIGHT]; 339 }; 340 341 /* The cached s+tree blocks in FSYS_BUF, see below 342 * for a more detailed description. 343 */ 344 #define ROOT ((char *) ((int) FSYS_BUF)) 345 #define CACHE(i) (ROOT + ((i) << INFO->fullblocksize_shift)) 346 #define LEAF CACHE (DISK_LEAF_NODE_LEVEL) 347 348 #define BLOCKHEAD(cache) ((struct block_head *) cache) 349 #define ITEMHEAD ((struct item_head *) ((int) LEAF + BLKH_SIZE)) 350 #define KEY(cache) ((struct key *) ((int) cache + BLKH_SIZE)) 351 #define DC(cache) ((struct disk_child *) \ 352 ((int) cache + BLKH_SIZE + KEY_SIZE * nr_item)) 353 /* The fsys_reiser_info block. 354 */ 355 #define INFO \ 356 ((struct fsys_reiser_info *) ((int) FSYS_BUF + FSYSREISER_CACHE_SIZE)) 357 /* 358 * The journal cache. For each transaction it contains the number of 359 * blocks followed by the real block numbers of this transaction. 360 * 361 * If the block numbers of some transaction won't fit in this space, 362 * this list is stopped with a 0xffffffff marker and the remaining 363 * uncommitted transactions aren't cached. 364 */ 365 #define JOURNAL_START ((__u32 *) (INFO + 1)) 366 #define JOURNAL_END ((__u32 *) (FSYS_BUF + FSYS_BUFLEN)) 367 368 369 static __inline__ unsigned long 370 log2 (unsigned long word) 371 { 372 __asm__ ("bsfl %1,%0" 373 : "=r" (word) 374 : "r" (word)); 375 return word; 376 } 377 378 static __inline__ int 379 is_power_of_two (unsigned long word) 380 { 381 return (word & -word) == word; 382 } 383 384 static int 385 journal_read (int block, int len, char *buffer) 386 { 387 return devread ((INFO->journal_block + block) << INFO->blocksize_shift, 388 0, len, buffer); 389 } 390 391 /* Read a block from ReiserFS file system, taking the journal into 392 * account. If the block nr is in the journal, the block from the 393 * journal taken. 394 */ 395 static int 396 block_read (int blockNr, int start, int len, char *buffer) 397 { 398 int transactions = INFO->journal_transactions; 399 int desc_block = INFO->journal_first_desc; 400 int journal_mask = INFO->journal_block_count - 1; 401 int translatedNr = blockNr; 402 __u32 *journal_table = JOURNAL_START; 403 while (transactions-- > 0) 404 { 405 int i = 0; 406 int j_len; 407 if (*journal_table != 0xffffffff) 408 { 409 /* Search for the blockNr in cached journal */ 410 j_len = *journal_table++; 411 while (i++ < j_len) 412 { 413 if (*journal_table++ == blockNr) 414 { 415 journal_table += j_len - i; 416 goto found; 417 } 418 } 419 } 420 else 421 { 422 /* This is the end of cached journal marker. The remaining 423 * transactions are still on disk. 424 */ 425 struct reiserfs_journal_desc desc; 426 struct reiserfs_journal_commit commit; 427 428 if (! journal_read (desc_block, sizeof (desc), (char *) &desc)) 429 return 0; 430 431 j_len = desc.j_len; 432 while (i < j_len && i < JOURNAL_TRANS_HALF) 433 if (desc.j_realblock[i++] == blockNr) 434 goto found; 435 436 if (j_len >= JOURNAL_TRANS_HALF) 437 { 438 int commit_block = (desc_block + 1 + j_len) & journal_mask; 439 if (! journal_read (commit_block, 440 sizeof (commit), (char *) &commit)) 441 return 0; 442 while (i < j_len) 443 if (commit.j_realblock[i++ - JOURNAL_TRANS_HALF] == blockNr) 444 goto found; 445 } 446 } 447 goto not_found; 448 449 found: 450 translatedNr = INFO->journal_block + ((desc_block + i) & journal_mask); 451 #ifdef REISERDEBUG 452 printf ("block_read: block %d is mapped to journal block %d.\n", 453 blockNr, translatedNr - INFO->journal_block); 454 #endif 455 /* We must continue the search, as this block may be overwritten 456 * in later transactions. 457 */ 458 not_found: 459 desc_block = (desc_block + 2 + j_len) & journal_mask; 460 } 461 return devread (translatedNr << INFO->blocksize_shift, start, len, buffer); 462 } 463 464 /* Init the journal data structure. We try to cache as much as 465 * possible in the JOURNAL_START-JOURNAL_END space, but if it is full 466 * we can still read the rest from the disk on demand. 467 * 468 * The first number of valid transactions and the descriptor block of the 469 * first valid transaction are held in INFO. The transactions are all 470 * adjacent, but we must take care of the journal wrap around. 471 */ 472 static int 473 journal_init (void) 474 { 475 unsigned int block_count = INFO->journal_block_count; 476 unsigned int desc_block; 477 unsigned int commit_block; 478 unsigned int next_trans_id; 479 struct reiserfs_journal_header header; 480 struct reiserfs_journal_desc desc; 481 struct reiserfs_journal_commit commit; 482 __u32 *journal_table = JOURNAL_START; 483 484 journal_read (block_count, sizeof (header), (char *) &header); 485 desc_block = header.j_first_unflushed_offset; 486 if (desc_block >= block_count) 487 return 0; 488 489 INFO->journal_first_desc = desc_block; 490 next_trans_id = header.j_last_flush_trans_id + 1; 491 492 #ifdef REISERDEBUG 493 printf ("journal_init: last flushed %d\n", 494 header.j_last_flush_trans_id); 495 #endif 496 497 while (1) 498 { 499 journal_read (desc_block, sizeof (desc), (char *) &desc); 500 if (substring (JOURNAL_DESC_MAGIC, desc.j_magic) > 0 501 || desc.j_trans_id != next_trans_id 502 || desc.j_mount_id != header.j_mount_id) 503 /* no more valid transactions */ 504 break; 505 506 commit_block = (desc_block + desc.j_len + 1) & (block_count - 1); 507 journal_read (commit_block, sizeof (commit), (char *) &commit); 508 if (desc.j_trans_id != commit.j_trans_id 509 || desc.j_len != commit.j_len) 510 /* no more valid transactions */ 511 break; 512 513 #ifdef REISERDEBUG 514 printf ("Found valid transaction %d/%d at %d.\n", 515 desc.j_trans_id, desc.j_mount_id, desc_block); 516 #endif 517 518 next_trans_id++; 519 if (journal_table < JOURNAL_END) 520 { 521 if ((journal_table + 1 + desc.j_len) >= JOURNAL_END) 522 { 523 /* The table is almost full; mark the end of the cached 524 * journal.*/ 525 *journal_table = 0xffffffff; 526 journal_table = JOURNAL_END; 527 } 528 else 529 { 530 int i; 531 /* Cache the length and the realblock numbers in the table. 532 * The block number of descriptor can easily be computed. 533 * and need not to be stored here. 534 */ 535 *journal_table++ = desc.j_len; 536 for (i = 0; i < desc.j_len && i < JOURNAL_TRANS_HALF; i++) 537 { 538 *journal_table++ = desc.j_realblock[i]; 539 #ifdef REISERDEBUG 540 printf ("block %d is in journal %d.\n", 541 desc.j_realblock[i], desc_block); 542 #endif 543 } 544 for ( ; i < desc.j_len; i++) 545 { 546 *journal_table++ = commit.j_realblock[i-JOURNAL_TRANS_HALF]; 547 #ifdef REISERDEBUG 548 printf ("block %d is in journal %d.\n", 549 commit.j_realblock[i-JOURNAL_TRANS_HALF], 550 desc_block); 551 #endif 552 } 553 } 554 } 555 desc_block = (commit_block + 1) & (block_count - 1); 556 } 557 #ifdef REISERDEBUG 558 printf ("Transaction %d/%d at %d isn't valid.\n", 559 desc.j_trans_id, desc.j_mount_id, desc_block); 560 #endif 561 562 INFO->journal_transactions 563 = next_trans_id - header.j_last_flush_trans_id - 1; 564 return errnum == 0; 565 } 566 567 /* check filesystem types and read superblock into memory buffer */ 568 int 569 reiserfs_mount (void) 570 { 571 struct reiserfs_super_block super; 572 int superblock = REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS; 573 574 if (part_length < superblock + (sizeof (super) >> SECTOR_BITS) 575 || ! devread (superblock, 0, sizeof (struct reiserfs_super_block), 576 (char *) &super) 577 || (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0 578 && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0 579 && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0) 580 || (/* check that this is not a copy inside the journal log */ 581 super.s_journal_block * super.s_blocksize 582 <= REISERFS_DISK_OFFSET_IN_BYTES)) 583 { 584 /* Try old super block position */ 585 superblock = REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS; 586 if (part_length < superblock + (sizeof (super) >> SECTOR_BITS) 587 || ! devread (superblock, 0, sizeof (struct reiserfs_super_block), 588 (char *) &super)) 589 return 0; 590 591 if (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0 592 && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0 593 && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0) 594 { 595 /* pre journaling super block ? */ 596 if (substring (REISERFS_SUPER_MAGIC_STRING, 597 (char*) ((int) &super + 20)) > 0) 598 return 0; 599 600 super.s_blocksize = REISERFS_OLD_BLOCKSIZE; 601 super.s_journal_block = 0; 602 super.s_version = 0; 603 } 604 } 605 606 /* check the version number. */ 607 if (super.s_version > REISERFS_MAX_SUPPORTED_VERSION) 608 return 0; 609 610 INFO->version = super.s_version; 611 INFO->blocksize = super.s_blocksize; 612 INFO->fullblocksize_shift = log2 (super.s_blocksize); 613 INFO->blocksize_shift = INFO->fullblocksize_shift - SECTOR_BITS; 614 INFO->cached_slots = 615 (FSYSREISER_CACHE_SIZE >> INFO->fullblocksize_shift) - 1; 616 617 #ifdef REISERDEBUG 618 printf ("reiserfs_mount: version=%d, blocksize=%d\n", 619 INFO->version, INFO->blocksize); 620 #endif /* REISERDEBUG */ 621 622 /* Clear node cache. */ 623 memset (INFO->blocks, 0, sizeof (INFO->blocks)); 624 625 if (super.s_blocksize < FSYSREISER_MIN_BLOCKSIZE 626 || super.s_blocksize > FSYSREISER_MAX_BLOCKSIZE 627 || (SECTOR_SIZE << INFO->blocksize_shift) != super.s_blocksize) 628 return 0; 629 630 /* Initialize journal code. If something fails we end with zero 631 * journal_transactions, so we don't access the journal at all. 632 */ 633 INFO->journal_transactions = 0; 634 if (super.s_journal_block != 0 && super.s_journal_dev == 0) 635 { 636 INFO->journal_block = super.s_journal_block; 637 INFO->journal_block_count = super.s_journal_size; 638 if (is_power_of_two (INFO->journal_block_count)) 639 journal_init (); 640 641 /* Read in super block again, maybe it is in the journal */ 642 block_read (superblock >> INFO->blocksize_shift, 643 0, sizeof (struct reiserfs_super_block), (char *) &super); 644 } 645 646 if (! block_read (super.s_root_block, 0, INFO->blocksize, (char*) ROOT)) 647 return 0; 648 649 INFO->tree_depth = BLOCKHEAD (ROOT)->blk_level; 650 651 #ifdef REISERDEBUG 652 printf ("root read_in: block=%d, depth=%d\n", 653 super.s_root_block, INFO->tree_depth); 654 #endif /* REISERDEBUG */ 655 656 if (INFO->tree_depth >= MAX_HEIGHT) 657 return 0; 658 if (INFO->tree_depth == DISK_LEAF_NODE_LEVEL) 659 { 660 /* There is only one node in the whole filesystem, 661 * which is simultanously leaf and root */ 662 memcpy (LEAF, ROOT, INFO->blocksize); 663 } 664 return 1; 665 } 666 667 /***************** TREE ACCESSING METHODS *****************************/ 668 669 /* I assume you are familiar with the ReiserFS tree, if not go to 670 * http://www.namesys.com/content_table.html 671 * 672 * My tree node cache is organized as following 673 * 0 ROOT node 674 * 1 LEAF node (if the ROOT is also a LEAF it is copied here 675 * 2-n other nodes on current path from bottom to top. 676 * if there is not enough space in the cache, the top most are 677 * omitted. 678 * 679 * I have only two methods to find a key in the tree: 680 * search_stat(dir_id, objectid) searches for the stat entry (always 681 * the first entry) of an object. 682 * next_key() gets the next key in tree order. 683 * 684 * This means, that I can only sequential reads of files are 685 * efficient, but this really doesn't hurt for grub. 686 */ 687 688 /* Read in the node at the current path and depth into the node cache. 689 * You must set INFO->blocks[depth] before. 690 */ 691 static char * 692 read_tree_node (unsigned int blockNr, int depth) 693 { 694 char* cache = CACHE(depth); 695 int num_cached = INFO->cached_slots; 696 if (depth < num_cached) 697 { 698 /* This is the cached part of the path. Check if same block is 699 * needed. 700 */ 701 if (blockNr == INFO->blocks[depth]) 702 return cache; 703 } 704 else 705 cache = CACHE(num_cached); 706 707 #ifdef REISERDEBUG 708 printf (" next read_in: block=%d (depth=%d)\n", 709 blockNr, depth); 710 #endif /* REISERDEBUG */ 711 if (! block_read (blockNr, 0, INFO->blocksize, cache)) 712 return 0; 713 /* Make sure it has the right node level */ 714 if (BLOCKHEAD (cache)->blk_level != depth) 715 { 716 errnum = ERR_FSYS_CORRUPT; 717 return 0; 718 } 719 720 INFO->blocks[depth] = blockNr; 721 return cache; 722 } 723 724 /* Get the next key, i.e. the key following the last retrieved key in 725 * tree order. INFO->current_ih and 726 * INFO->current_info are adapted accordingly. */ 727 static int 728 next_key (void) 729 { 730 int depth; 731 struct item_head *ih = INFO->current_ih + 1; 732 char *cache; 733 734 #ifdef REISERDEBUG 735 printf ("next_key:\n old ih: key %d:%d:%d:%d version:%d\n", 736 INFO->current_ih->ih_key.k_dir_id, 737 INFO->current_ih->ih_key.k_objectid, 738 INFO->current_ih->ih_key.u.v1.k_offset, 739 INFO->current_ih->ih_key.u.v1.k_uniqueness, 740 INFO->current_ih->ih_version); 741 #endif /* REISERDEBUG */ 742 743 if (ih == &ITEMHEAD[BLOCKHEAD (LEAF)->blk_nr_item]) 744 { 745 depth = DISK_LEAF_NODE_LEVEL; 746 /* The last item, was the last in the leaf node. 747 * Read in the next block 748 */ 749 do 750 { 751 if (depth == INFO->tree_depth) 752 { 753 /* There are no more keys at all. 754 * Return a dummy item with MAX_KEY */ 755 ih = (struct item_head *) &BLOCKHEAD (LEAF)->blk_right_delim_key; 756 goto found; 757 } 758 depth++; 759 #ifdef REISERDEBUG 760 printf (" depth=%d, i=%d\n", depth, INFO->next_key_nr[depth]); 761 #endif /* REISERDEBUG */ 762 } 763 while (INFO->next_key_nr[depth] == 0); 764 765 if (depth == INFO->tree_depth) 766 cache = ROOT; 767 else if (depth <= INFO->cached_slots) 768 cache = CACHE (depth); 769 else 770 { 771 cache = read_tree_node (INFO->blocks[depth], depth); 772 if (! cache) 773 return 0; 774 } 775 776 do 777 { 778 int nr_item = BLOCKHEAD (cache)->blk_nr_item; 779 int key_nr = INFO->next_key_nr[depth]++; 780 #ifdef REISERDEBUG 781 printf (" depth=%d, i=%d/%d\n", depth, key_nr, nr_item); 782 #endif /* REISERDEBUG */ 783 if (key_nr == nr_item) 784 /* This is the last item in this block, set the next_key_nr to 0 */ 785 INFO->next_key_nr[depth] = 0; 786 787 cache = read_tree_node (DC (cache)[key_nr].dc_block_number, --depth); 788 if (! cache) 789 return 0; 790 } 791 while (depth > DISK_LEAF_NODE_LEVEL); 792 793 ih = ITEMHEAD; 794 } 795 found: 796 INFO->current_ih = ih; 797 INFO->current_item = &LEAF[ih->ih_item_location]; 798 #ifdef REISERDEBUG 799 printf (" new ih: key %d:%d:%d:%d version:%d\n", 800 INFO->current_ih->ih_key.k_dir_id, 801 INFO->current_ih->ih_key.k_objectid, 802 INFO->current_ih->ih_key.u.v1.k_offset, 803 INFO->current_ih->ih_key.u.v1.k_uniqueness, 804 INFO->current_ih->ih_version); 805 #endif /* REISERDEBUG */ 806 return 1; 807 } 808 809 /* preconditions: reiserfs_mount already executed, therefore 810 * INFO block is valid 811 * returns: 0 if error (errnum is set), 812 * nonzero iff we were able to find the key successfully. 813 * postconditions: on a nonzero return, the current_ih and 814 * current_item fields describe the key that equals the 815 * searched key. INFO->next_key contains the next key after 816 * the searched key. 817 * side effects: messes around with the cache. 818 */ 819 static int 820 search_stat (__u32 dir_id, __u32 objectid) 821 { 822 char *cache; 823 int depth; 824 int nr_item; 825 int i; 826 struct item_head *ih; 827 #ifdef REISERDEBUG 828 printf ("search_stat:\n key %d:%d:0:0\n", dir_id, objectid); 829 #endif /* REISERDEBUG */ 830 831 depth = INFO->tree_depth; 832 cache = ROOT; 833 834 while (depth > DISK_LEAF_NODE_LEVEL) 835 { 836 struct key *key; 837 nr_item = BLOCKHEAD (cache)->blk_nr_item; 838 839 key = KEY (cache); 840 841 for (i = 0; i < nr_item; i++) 842 { 843 if (key->k_dir_id > dir_id 844 || (key->k_dir_id == dir_id 845 && (key->k_objectid > objectid 846 || (key->k_objectid == objectid 847 && (key->u.v1.k_offset 848 | key->u.v1.k_uniqueness) > 0)))) 849 break; 850 key++; 851 } 852 853 #ifdef REISERDEBUG 854 printf (" depth=%d, i=%d/%d\n", depth, i, nr_item); 855 #endif /* REISERDEBUG */ 856 INFO->next_key_nr[depth] = (i == nr_item) ? 0 : i+1; 857 cache = read_tree_node (DC (cache)[i].dc_block_number, --depth); 858 if (! cache) 859 return 0; 860 } 861 862 /* cache == LEAF */ 863 nr_item = BLOCKHEAD (LEAF)->blk_nr_item; 864 ih = ITEMHEAD; 865 for (i = 0; i < nr_item; i++) 866 { 867 if (ih->ih_key.k_dir_id == dir_id 868 && ih->ih_key.k_objectid == objectid 869 && ih->ih_key.u.v1.k_offset == 0 870 && ih->ih_key.u.v1.k_uniqueness == 0) 871 { 872 #ifdef REISERDEBUG 873 printf (" depth=%d, i=%d/%d\n", depth, i, nr_item); 874 #endif /* REISERDEBUG */ 875 INFO->current_ih = ih; 876 INFO->current_item = &LEAF[ih->ih_item_location]; 877 return 1; 878 } 879 ih++; 880 } 881 errnum = ERR_FSYS_CORRUPT; 882 return 0; 883 } 884 885 int 886 reiserfs_read (char *buf, int len) 887 { 888 unsigned int blocksize; 889 unsigned int offset; 890 unsigned int to_read; 891 char *prev_buf = buf; 892 893 #ifdef REISERDEBUG 894 printf ("reiserfs_read: filepos=%d len=%d, offset=%x:%x\n", 895 filepos, len, (__u64) IH_KEY_OFFSET (INFO->current_ih) - 1); 896 #endif /* REISERDEBUG */ 897 898 if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid 899 || IH_KEY_OFFSET (INFO->current_ih) > filepos + 1) 900 { 901 search_stat (INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid); 902 goto get_next_key; 903 } 904 905 while (! errnum) 906 { 907 if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid) 908 break; 909 910 offset = filepos - IH_KEY_OFFSET (INFO->current_ih) + 1; 911 blocksize = INFO->current_ih->ih_item_len; 912 913 #ifdef REISERDEBUG 914 printf (" loop: filepos=%d len=%d, offset=%d blocksize=%d\n", 915 filepos, len, offset, blocksize); 916 #endif /* REISERDEBUG */ 917 918 if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_DIRECT) 919 && offset < blocksize) 920 { 921 #ifdef REISERDEBUG 922 printf ("direct_read: offset=%d, blocksize=%d\n", 923 offset, blocksize); 924 #endif /* REISERDEBUG */ 925 to_read = blocksize - offset; 926 if (to_read > len) 927 to_read = len; 928 929 if (disk_read_hook != NULL) 930 { 931 disk_read_func = disk_read_hook; 932 933 block_read (INFO->blocks[DISK_LEAF_NODE_LEVEL], 934 (INFO->current_item - LEAF + offset), to_read, buf); 935 936 disk_read_func = NULL; 937 } 938 else 939 memcpy (buf, INFO->current_item + offset, to_read); 940 goto update_buf_len; 941 } 942 else if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_INDIRECT)) 943 { 944 blocksize = (blocksize >> 2) << INFO->fullblocksize_shift; 945 #ifdef REISERDEBUG 946 printf ("indirect_read: offset=%d, blocksize=%d\n", 947 offset, blocksize); 948 #endif /* REISERDEBUG */ 949 950 while (offset < blocksize) 951 { 952 __u32 blocknr = ((__u32 *) INFO->current_item) 953 [offset >> INFO->fullblocksize_shift]; 954 int blk_offset = offset & (INFO->blocksize-1); 955 956 to_read = INFO->blocksize - blk_offset; 957 if (to_read > len) 958 to_read = len; 959 960 disk_read_func = disk_read_hook; 961 962 /* Journal is only for meta data. Data blocks can be read 963 * directly without using block_read 964 */ 965 devread (blocknr << INFO->blocksize_shift, 966 blk_offset, to_read, buf); 967 968 disk_read_func = NULL; 969 update_buf_len: 970 len -= to_read; 971 buf += to_read; 972 offset += to_read; 973 filepos += to_read; 974 if (len == 0) 975 goto done; 976 } 977 } 978 get_next_key: 979 next_key (); 980 } 981 done: 982 return errnum ? 0 : buf - prev_buf; 983 } 984 985 986 /* preconditions: reiserfs_mount already executed, therefore 987 * INFO block is valid 988 * returns: 0 if error, nonzero iff we were able to find the file successfully 989 * postconditions: on a nonzero return, INFO->fileinfo contains the info 990 * of the file we were trying to look up, filepos is 0 and filemax is 991 * the size of the file. 992 */ 993 int 994 reiserfs_dir (char *dirname) 995 { 996 struct reiserfs_de_head *de_head; 997 char *rest, ch; 998 __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0; 999 #ifndef STAGE1_5 1000 int do_possibilities = 0; 1001 #endif /* ! STAGE1_5 */ 1002 char linkbuf[PATH_MAX]; /* buffer for following symbolic links */ 1003 int link_count = 0; 1004 int mode; 1005 1006 dir_id = REISERFS_ROOT_PARENT_OBJECTID; 1007 objectid = REISERFS_ROOT_OBJECTID; 1008 1009 while (1) 1010 { 1011 #ifdef REISERDEBUG 1012 printf ("dirname=%s\n", dirname); 1013 #endif /* REISERDEBUG */ 1014 1015 /* Search for the stat info first. */ 1016 if (! search_stat (dir_id, objectid)) 1017 return 0; 1018 1019 #ifdef REISERDEBUG 1020 printf ("sd_mode=%x sd_size=%d\n", 1021 ((struct stat_data *) INFO->current_item)->sd_mode, 1022 ((struct stat_data *) INFO->current_item)->sd_size); 1023 #endif /* REISERDEBUG */ 1024 1025 mode = ((struct stat_data *) INFO->current_item)->sd_mode; 1026 1027 /* If we've got a symbolic link, then chase it. */ 1028 if (S_ISLNK (mode)) 1029 { 1030 int len; 1031 if (++link_count > MAX_LINK_COUNT) 1032 { 1033 errnum = ERR_SYMLINK_LOOP; 1034 return 0; 1035 } 1036 1037 /* Get the symlink size. */ 1038 filemax = ((struct stat_data *) INFO->current_item)->sd_size; 1039 1040 /* Find out how long our remaining name is. */ 1041 len = 0; 1042 while (dirname[len] && !isspace (dirname[len])) 1043 len++; 1044 1045 if (filemax + len > sizeof (linkbuf) - 1) 1046 { 1047 errnum = ERR_FILELENGTH; 1048 return 0; 1049 } 1050 1051 /* Copy the remaining name to the end of the symlink data. 1052 Note that DIRNAME and LINKBUF may overlap! */ 1053 grub_memmove (linkbuf + filemax, dirname, len+1); 1054 1055 INFO->fileinfo.k_dir_id = dir_id; 1056 INFO->fileinfo.k_objectid = objectid; 1057 filepos = 0; 1058 if (! next_key () 1059 || reiserfs_read (linkbuf, filemax) != filemax) 1060 { 1061 if (! errnum) 1062 errnum = ERR_FSYS_CORRUPT; 1063 return 0; 1064 } 1065 1066 #ifdef REISERDEBUG 1067 printf ("symlink=%s\n", linkbuf); 1068 #endif /* REISERDEBUG */ 1069 1070 dirname = linkbuf; 1071 if (*dirname == '/') 1072 { 1073 /* It's an absolute link, so look it up in root. */ 1074 dir_id = REISERFS_ROOT_PARENT_OBJECTID; 1075 objectid = REISERFS_ROOT_OBJECTID; 1076 } 1077 else 1078 { 1079 /* Relative, so look it up in our parent directory. */ 1080 dir_id = parent_dir_id; 1081 objectid = parent_objectid; 1082 } 1083 1084 /* Now lookup the new name. */ 1085 continue; 1086 } 1087 1088 /* if we have a real file (and we're not just printing possibilities), 1089 then this is where we want to exit */ 1090 1091 if (! *dirname || isspace (*dirname)) 1092 { 1093 if (! S_ISREG (mode)) 1094 { 1095 errnum = ERR_BAD_FILETYPE; 1096 return 0; 1097 } 1098 1099 filepos = 0; 1100 filemax = ((struct stat_data *) INFO->current_item)->sd_size; 1101 1102 /* If this is a new stat data and size is > 4GB set filemax to 1103 * maximum 1104 */ 1105 if (INFO->current_ih->ih_version == ITEM_VERSION_2 1106 && ((struct stat_data *) INFO->current_item)->sd_size_hi > 0) 1107 filemax = 0xffffffff; 1108 1109 INFO->fileinfo.k_dir_id = dir_id; 1110 INFO->fileinfo.k_objectid = objectid; 1111 return next_key (); 1112 } 1113 1114 /* continue with the file/directory name interpretation */ 1115 while (*dirname == '/') 1116 dirname++; 1117 if (! S_ISDIR (mode)) 1118 { 1119 errnum = ERR_BAD_FILETYPE; 1120 return 0; 1121 } 1122 for (rest = dirname; (ch = *rest) && ! isspace (ch) && ch != '/'; rest++); 1123 *rest = 0; 1124 1125 # ifndef STAGE1_5 1126 if (print_possibilities && ch != '/') 1127 do_possibilities = 1; 1128 # endif /* ! STAGE1_5 */ 1129 1130 while (1) 1131 { 1132 char *name_end; 1133 int num_entries; 1134 1135 if (! next_key ()) 1136 return 0; 1137 #ifdef REISERDEBUG 1138 printf ("ih: key %d:%d:%d:%d version:%d\n", 1139 INFO->current_ih->ih_key.k_dir_id, 1140 INFO->current_ih->ih_key.k_objectid, 1141 INFO->current_ih->ih_key.u.v1.k_offset, 1142 INFO->current_ih->ih_key.u.v1.k_uniqueness, 1143 INFO->current_ih->ih_version); 1144 #endif /* REISERDEBUG */ 1145 1146 if (INFO->current_ih->ih_key.k_objectid != objectid) 1147 break; 1148 1149 name_end = INFO->current_item + INFO->current_ih->ih_item_len; 1150 de_head = (struct reiserfs_de_head *) INFO->current_item; 1151 num_entries = INFO->current_ih->u.ih_entry_count; 1152 while (num_entries > 0) 1153 { 1154 char *filename = INFO->current_item + de_head->deh_location; 1155 char tmp = *name_end; 1156 if ((de_head->deh_state & DEH_Visible)) 1157 { 1158 int cmp; 1159 /* Directory names in ReiserFS are not null 1160 * terminated. We write a temporary 0 behind it. 1161 * NOTE: that this may overwrite the first block in 1162 * the tree cache. That doesn't hurt as long as we 1163 * don't call next_key () in between. 1164 */ 1165 *name_end = 0; 1166 cmp = substring (dirname, filename); 1167 *name_end = tmp; 1168 # ifndef STAGE1_5 1169 if (do_possibilities) 1170 { 1171 if (cmp <= 0) 1172 { 1173 if (print_possibilities > 0) 1174 print_possibilities = -print_possibilities; 1175 *name_end = 0; 1176 print_a_completion (filename); 1177 *name_end = tmp; 1178 } 1179 } 1180 else 1181 # endif /* ! STAGE1_5 */ 1182 if (cmp == 0) 1183 goto found; 1184 } 1185 /* The beginning of this name marks the end of the next name. 1186 */ 1187 name_end = filename; 1188 de_head++; 1189 num_entries--; 1190 } 1191 } 1192 1193 # ifndef STAGE1_5 1194 if (print_possibilities < 0) 1195 return 1; 1196 # endif /* ! STAGE1_5 */ 1197 1198 errnum = ERR_FILE_NOT_FOUND; 1199 *rest = ch; 1200 return 0; 1201 1202 found: 1203 1204 *rest = ch; 1205 dirname = rest; 1206 1207 parent_dir_id = dir_id; 1208 parent_objectid = objectid; 1209 dir_id = de_head->deh_dir_id; 1210 objectid = de_head->deh_objectid; 1211 } 1212 } 1213 1214 int 1215 reiserfs_embed (int *start_sector, int needed_sectors) 1216 { 1217 struct reiserfs_super_block super; 1218 int num_sectors; 1219 1220 if (! devread (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS, 0, 1221 sizeof (struct reiserfs_super_block), (char *) &super)) 1222 return 0; 1223 1224 *start_sector = 1; /* reserve first sector for stage1 */ 1225 if ((substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) <= 0 1226 || substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) <= 0 1227 || substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) <= 0) 1228 && (/* check that this is not a super block copy inside 1229 * the journal log */ 1230 super.s_journal_block * super.s_blocksize 1231 > REISERFS_DISK_OFFSET_IN_BYTES)) 1232 num_sectors = (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1; 1233 else 1234 num_sectors = (REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1; 1235 1236 return (needed_sectors <= num_sectors); 1237 } 1238 #endif /* FSYS_REISERFS */ 1239