1 /* 2 * readahead.c -- Prefetch filesystem metadata to speed up fsck. 3 * 4 * Copyright (C) 2014 Oracle. 5 * 6 * %Begin-Header% 7 * This file may be redistributed under the terms of the GNU Library 8 * General Public License, version 2. 9 * %End-Header% 10 */ 11 12 #include "config.h" 13 #include <string.h> 14 15 #include "e2fsck.h" 16 17 #undef DEBUG 18 19 #ifdef DEBUG 20 # define dbg_printf(f, a...) do {printf(f, ## a); fflush(stdout); } while (0) 21 #else 22 # define dbg_printf(f, a...) 23 #endif 24 25 struct read_dblist { 26 errcode_t err; 27 blk64_t run_start; 28 blk64_t run_len; 29 int flags; 30 }; 31 32 static int readahead_dir_block(ext2_filsys fs, struct ext2_db_entry2 *db, 33 void *priv_data) 34 { 35 struct read_dblist *pr = priv_data; 36 e2_blkcnt_t count = (pr->flags & E2FSCK_RA_DBLIST_IGNORE_BLOCKCNT ? 37 1 : db->blockcnt); 38 39 if (!pr->run_len || db->blk != pr->run_start + pr->run_len) { 40 if (pr->run_len) { 41 pr->err = io_channel_cache_readahead(fs->io, 42 pr->run_start, 43 pr->run_len); 44 dbg_printf("readahead start=%llu len=%llu err=%d\n", 45 pr->run_start, pr->run_len, 46 (int)pr->err); 47 } 48 pr->run_start = db->blk; 49 pr->run_len = 0; 50 } 51 pr->run_len += count; 52 53 return pr->err ? DBLIST_ABORT : 0; 54 } 55 56 errcode_t e2fsck_readahead_dblist(ext2_filsys fs, int flags, 57 ext2_dblist dblist, 58 unsigned long long start, 59 unsigned long long count) 60 { 61 errcode_t err; 62 struct read_dblist pr; 63 64 dbg_printf("%s: flags=0x%x\n", __func__, flags); 65 if (flags & ~E2FSCK_RA_DBLIST_ALL_FLAGS) 66 return EXT2_ET_INVALID_ARGUMENT; 67 68 memset(&pr, 0, sizeof(pr)); 69 pr.flags = flags; 70 err = ext2fs_dblist_iterate3(dblist, readahead_dir_block, start, 71 count, &pr); 72 if (pr.err) 73 return pr.err; 74 if (err) 75 return err; 76 77 if (pr.run_len) 78 err = io_channel_cache_readahead(fs->io, pr.run_start, 79 pr.run_len); 80 81 return err; 82 } 83 84 static errcode_t e2fsck_readahead_bitmap(ext2_filsys fs, 85 ext2fs_block_bitmap ra_map) 86 { 87 blk64_t start, end, out; 88 errcode_t err; 89 90 start = 1; 91 end = ext2fs_blocks_count(fs->super) - 1; 92 93 err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end, &out); 94 while (err == 0) { 95 start = out; 96 err = ext2fs_find_first_zero_block_bitmap2(ra_map, start, end, 97 &out); 98 if (err == ENOENT) { 99 out = end; 100 err = 0; 101 if (out == start) 102 break; 103 } else if (err) 104 break; 105 106 err = io_channel_cache_readahead(fs->io, start, out - start); 107 if (err) 108 break; 109 start = out; 110 err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end, 111 &out); 112 } 113 114 if (err == ENOENT) 115 err = 0; 116 117 return err; 118 } 119 120 /* Try not to spew bitmap range errors for readahead */ 121 static errcode_t mark_bmap_range(ext2fs_block_bitmap map, 122 blk64_t blk, unsigned int num) 123 { 124 if (blk >= ext2fs_get_generic_bmap_start(map) && 125 blk + num <= ext2fs_get_generic_bmap_end(map)) 126 ext2fs_mark_block_bitmap_range2(map, blk, num); 127 else 128 return EXT2_ET_INVALID_ARGUMENT; 129 return 0; 130 } 131 132 static errcode_t mark_bmap(ext2fs_block_bitmap map, blk64_t blk) 133 { 134 if (blk >= ext2fs_get_generic_bmap_start(map) && 135 blk <= ext2fs_get_generic_bmap_end(map)) 136 ext2fs_mark_block_bitmap2(map, blk); 137 else 138 return EXT2_ET_INVALID_ARGUMENT; 139 return 0; 140 } 141 142 errcode_t e2fsck_readahead(ext2_filsys fs, int flags, dgrp_t start, 143 dgrp_t ngroups) 144 { 145 blk64_t super, old_gdt, new_gdt; 146 blk_t blocks; 147 dgrp_t i; 148 ext2fs_block_bitmap ra_map = NULL; 149 dgrp_t end = start + ngroups; 150 errcode_t err = 0; 151 152 dbg_printf("%s: flags=0x%x start=%d groups=%d\n", __func__, flags, 153 start, ngroups); 154 if (flags & ~E2FSCK_READA_ALL_FLAGS) 155 return EXT2_ET_INVALID_ARGUMENT; 156 157 if (end > fs->group_desc_count) 158 end = fs->group_desc_count; 159 160 if (flags == 0) 161 return 0; 162 163 err = ext2fs_allocate_block_bitmap(fs, "readahead bitmap", 164 &ra_map); 165 if (err) 166 return err; 167 168 for (i = start; i < end; i++) { 169 err = ext2fs_super_and_bgd_loc2(fs, i, &super, &old_gdt, 170 &new_gdt, &blocks); 171 if (err) 172 break; 173 174 if (flags & E2FSCK_READA_SUPER) { 175 err = mark_bmap(ra_map, super); 176 if (err) 177 break; 178 } 179 180 if (flags & E2FSCK_READA_GDT) { 181 err = mark_bmap_range(ra_map, 182 old_gdt ? old_gdt : new_gdt, 183 blocks); 184 if (err) 185 break; 186 } 187 188 if ((flags & E2FSCK_READA_BBITMAP) && 189 !ext2fs_bg_flags_test(fs, i, EXT2_BG_BLOCK_UNINIT) && 190 ext2fs_bg_free_blocks_count(fs, i) < 191 fs->super->s_blocks_per_group) { 192 super = ext2fs_block_bitmap_loc(fs, i); 193 err = mark_bmap(ra_map, super); 194 if (err) 195 break; 196 } 197 198 if ((flags & E2FSCK_READA_IBITMAP) && 199 !ext2fs_bg_flags_test(fs, i, EXT2_BG_INODE_UNINIT) && 200 ext2fs_bg_free_inodes_count(fs, i) < 201 fs->super->s_inodes_per_group) { 202 super = ext2fs_inode_bitmap_loc(fs, i); 203 err = mark_bmap(ra_map, super); 204 if (err) 205 break; 206 } 207 208 if ((flags & E2FSCK_READA_ITABLE) && 209 ext2fs_bg_free_inodes_count(fs, i) < 210 fs->super->s_inodes_per_group) { 211 super = ext2fs_inode_table_loc(fs, i); 212 blocks = fs->inode_blocks_per_group - 213 (ext2fs_bg_itable_unused(fs, i) * 214 EXT2_INODE_SIZE(fs->super) / fs->blocksize); 215 err = mark_bmap_range(ra_map, super, blocks); 216 if (err) 217 break; 218 } 219 } 220 221 if (!err) 222 err = e2fsck_readahead_bitmap(fs, ra_map); 223 224 ext2fs_free_block_bitmap(ra_map); 225 return err; 226 } 227 228 int e2fsck_can_readahead(ext2_filsys fs) 229 { 230 errcode_t err; 231 232 err = io_channel_cache_readahead(fs->io, 0, 1); 233 dbg_printf("%s: supp=%d\n", __func__, err != EXT2_ET_OP_NOT_SUPPORTED); 234 return err != EXT2_ET_OP_NOT_SUPPORTED; 235 } 236 237 unsigned long long e2fsck_guess_readahead(ext2_filsys fs) 238 { 239 unsigned long long guess; 240 241 /* 242 * The optimal readahead sizes were experimentally determined by 243 * djwong in August 2014. Setting the RA size to two block groups' 244 * worth of inode table blocks seems to yield the largest reductions 245 * in e2fsck runtime. 246 */ 247 guess = 2ULL * fs->blocksize * fs->inode_blocks_per_group; 248 249 /* Disable RA if it'd use more 1/50th of RAM. */ 250 if (get_memory_size() > (guess * 50)) 251 return guess / 1024; 252 253 return 0; 254 } 255