1 /* 2 * unix_io.c --- This is the Unix (well, really POSIX) implementation 3 * of the I/O manager. 4 * 5 * Implements a one-block write-through cache. 6 * 7 * Includes support for Windows NT support under Cygwin. 8 * 9 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 10 * 2002 by Theodore Ts'o. 11 * 12 * %Begin-Header% 13 * This file may be redistributed under the terms of the GNU Public 14 * License. 15 * %End-Header% 16 */ 17 18 #define _LARGEFILE_SOURCE 19 #define _LARGEFILE64_SOURCE 20 21 #include <stdio.h> 22 #include <string.h> 23 #if HAVE_UNISTD_H 24 #include <unistd.h> 25 #endif 26 #if HAVE_ERRNO_H 27 #include <errno.h> 28 #endif 29 #include <fcntl.h> 30 #include <time.h> 31 #ifdef __linux__ 32 #include <sys/utsname.h> 33 #endif 34 #if HAVE_SYS_STAT_H 35 #include <sys/stat.h> 36 #endif 37 #if HAVE_SYS_TYPES_H 38 #include <sys/types.h> 39 #endif 40 #if HAVE_SYS_RESOURCE_H 41 #include <sys/resource.h> 42 #endif 43 44 #include "ext2_fs.h" 45 #include "ext2fs.h" 46 47 /* 48 * For checking structure magic numbers... 49 */ 50 51 #define EXT2_CHECK_MAGIC(struct, code) \ 52 if ((struct)->magic != (code)) return (code) 53 54 struct unix_cache { 55 char *buf; 56 unsigned long block; 57 int access_time; 58 unsigned dirty:1; 59 unsigned in_use:1; 60 }; 61 62 #define CACHE_SIZE 8 63 #define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */ 64 #define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */ 65 66 struct unix_private_data { 67 int magic; 68 int dev; 69 int flags; 70 int access_time; 71 ext2_loff_t offset; 72 struct unix_cache cache[CACHE_SIZE]; 73 }; 74 75 static errcode_t unix_open(const char *name, int flags, io_channel *channel); 76 static errcode_t unix_close(io_channel channel); 77 static errcode_t unix_set_blksize(io_channel channel, int blksize); 78 static errcode_t unix_read_blk(io_channel channel, unsigned long block, 79 int count, void *data); 80 static errcode_t unix_write_blk(io_channel channel, unsigned long block, 81 int count, const void *data); 82 static errcode_t unix_flush(io_channel channel); 83 static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 84 int size, const void *data); 85 static errcode_t unix_set_option(io_channel channel, const char *option, 86 const char *arg); 87 88 static void reuse_cache(io_channel channel, struct unix_private_data *data, 89 struct unix_cache *cache, unsigned long block); 90 91 /* __FreeBSD_kernel__ is defined by GNU/kFreeBSD - the FreeBSD kernel 92 * does not know buffered block devices - everything is raw. */ 93 #if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 94 #define NEED_BOUNCE_BUFFER 95 #else 96 #undef NEED_BOUNCE_BUFFER 97 #endif 98 99 static struct struct_io_manager struct_unix_manager = { 100 EXT2_ET_MAGIC_IO_MANAGER, 101 "Unix I/O Manager", 102 unix_open, 103 unix_close, 104 unix_set_blksize, 105 unix_read_blk, 106 unix_write_blk, 107 unix_flush, 108 #ifdef NEED_BOUNCE_BUFFER 109 0, 110 #else 111 unix_write_byte, 112 #endif 113 unix_set_option 114 }; 115 116 io_manager unix_io_manager = &struct_unix_manager; 117 118 /* 119 * Here are the raw I/O functions 120 */ 121 #ifndef NEED_BOUNCE_BUFFER 122 static errcode_t raw_read_blk(io_channel channel, 123 struct unix_private_data *data, 124 unsigned long block, 125 int count, void *buf) 126 { 127 errcode_t retval; 128 ssize_t size; 129 ext2_loff_t location; 130 int actual = 0; 131 132 size = (count < 0) ? -count : count * channel->block_size; 133 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 134 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 135 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 136 goto error_out; 137 } 138 actual = read(data->dev, buf, size); 139 if (actual != size) { 140 if (actual < 0) 141 actual = 0; 142 retval = EXT2_ET_SHORT_READ; 143 goto error_out; 144 } 145 return 0; 146 147 error_out: 148 memset((char *) buf+actual, 0, size-actual); 149 if (channel->read_error) 150 retval = (channel->read_error)(channel, block, count, buf, 151 size, actual, retval); 152 return retval; 153 } 154 #else /* NEED_BOUNCE_BUFFER */ 155 /* 156 * Windows and FreeBSD block devices only allow sector alignment IO in offset and size 157 */ 158 static errcode_t raw_read_blk(io_channel channel, 159 struct unix_private_data *data, 160 unsigned long block, 161 int count, void *buf) 162 { 163 errcode_t retval; 164 size_t size, alignsize, fragment; 165 ext2_loff_t location; 166 int total = 0, actual; 167 #define BLOCKALIGN 512 168 char sector[BLOCKALIGN]; 169 170 size = (count < 0) ? -count : count * channel->block_size; 171 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 172 #ifdef DEBUG 173 printf("count=%d, size=%d, block=%lu, blk_size=%d, location=%llx\n", 174 count, size, block, channel->block_size, (long long)location); 175 #endif 176 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 177 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 178 goto error_out; 179 } 180 fragment = size % BLOCKALIGN; 181 alignsize = size - fragment; 182 if (alignsize) { 183 actual = read(data->dev, buf, alignsize); 184 if (actual != alignsize) 185 goto short_read; 186 } 187 if (fragment) { 188 actual = read(data->dev, sector, BLOCKALIGN); 189 if (actual != BLOCKALIGN) 190 goto short_read; 191 memcpy(buf+alignsize, sector, fragment); 192 } 193 return 0; 194 195 short_read: 196 if (actual>0) 197 total += actual; 198 retval = EXT2_ET_SHORT_READ; 199 200 error_out: 201 memset((char *) buf+total, 0, size-actual); 202 if (channel->read_error) 203 retval = (channel->read_error)(channel, block, count, buf, 204 size, actual, retval); 205 return retval; 206 } 207 #endif 208 209 static errcode_t raw_write_blk(io_channel channel, 210 struct unix_private_data *data, 211 unsigned long block, 212 int count, const void *buf) 213 { 214 ssize_t size; 215 ext2_loff_t location; 216 int actual = 0; 217 errcode_t retval; 218 219 if (count == 1) 220 size = channel->block_size; 221 else { 222 if (count < 0) 223 size = -count; 224 else 225 size = count * channel->block_size; 226 } 227 228 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 229 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 230 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 231 goto error_out; 232 } 233 234 actual = write(data->dev, buf, size); 235 if (actual != size) { 236 retval = EXT2_ET_SHORT_WRITE; 237 goto error_out; 238 } 239 return 0; 240 241 error_out: 242 if (channel->write_error) 243 retval = (channel->write_error)(channel, block, count, buf, 244 size, actual, retval); 245 return retval; 246 } 247 248 249 /* 250 * Here we implement the cache functions 251 */ 252 253 /* Allocate the cache buffers */ 254 static errcode_t alloc_cache(io_channel channel, 255 struct unix_private_data *data) 256 { 257 errcode_t retval; 258 struct unix_cache *cache; 259 int i; 260 261 data->access_time = 0; 262 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 263 cache->block = 0; 264 cache->access_time = 0; 265 cache->dirty = 0; 266 cache->in_use = 0; 267 if ((retval = ext2fs_get_mem(channel->block_size, 268 &cache->buf))) 269 return retval; 270 } 271 return 0; 272 } 273 274 /* Free the cache buffers */ 275 static void free_cache(struct unix_private_data *data) 276 { 277 struct unix_cache *cache; 278 int i; 279 280 data->access_time = 0; 281 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 282 cache->block = 0; 283 cache->access_time = 0; 284 cache->dirty = 0; 285 cache->in_use = 0; 286 if (cache->buf) 287 ext2fs_free_mem(&cache->buf); 288 cache->buf = 0; 289 } 290 } 291 292 #ifndef NO_IO_CACHE 293 /* 294 * Try to find a block in the cache. If the block is not found, and 295 * eldest is a non-zero pointer, then fill in eldest with the cache 296 * entry to that should be reused. 297 */ 298 static struct unix_cache *find_cached_block(struct unix_private_data *data, 299 unsigned long block, 300 struct unix_cache **eldest) 301 { 302 struct unix_cache *cache, *unused_cache, *oldest_cache; 303 int i; 304 305 unused_cache = oldest_cache = 0; 306 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 307 if (!cache->in_use) { 308 if (!unused_cache) 309 unused_cache = cache; 310 continue; 311 } 312 if (cache->block == block) { 313 cache->access_time = ++data->access_time; 314 return cache; 315 } 316 if (!oldest_cache || 317 (cache->access_time < oldest_cache->access_time)) 318 oldest_cache = cache; 319 } 320 if (eldest) 321 *eldest = (unused_cache) ? unused_cache : oldest_cache; 322 return 0; 323 } 324 325 /* 326 * Reuse a particular cache entry for another block. 327 */ 328 static void reuse_cache(io_channel channel, struct unix_private_data *data, 329 struct unix_cache *cache, unsigned long block) 330 { 331 if (cache->dirty && cache->in_use) 332 raw_write_blk(channel, data, cache->block, 1, cache->buf); 333 334 cache->in_use = 1; 335 cache->dirty = 0; 336 cache->block = block; 337 cache->access_time = ++data->access_time; 338 } 339 340 /* 341 * Flush all of the blocks in the cache 342 */ 343 static errcode_t flush_cached_blocks(io_channel channel, 344 struct unix_private_data *data, 345 int invalidate) 346 347 { 348 struct unix_cache *cache; 349 errcode_t retval, retval2; 350 int i; 351 352 retval2 = 0; 353 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 354 if (!cache->in_use) 355 continue; 356 357 if (invalidate) 358 cache->in_use = 0; 359 360 if (!cache->dirty) 361 continue; 362 363 retval = raw_write_blk(channel, data, 364 cache->block, 1, cache->buf); 365 if (retval) 366 retval2 = retval; 367 else 368 cache->dirty = 0; 369 } 370 return retval2; 371 } 372 #endif /* NO_IO_CACHE */ 373 374 static errcode_t unix_open(const char *name, int flags, io_channel *channel) 375 { 376 io_channel io = NULL; 377 struct unix_private_data *data = NULL; 378 errcode_t retval; 379 int open_flags; 380 struct stat st; 381 #ifdef __linux__ 382 struct utsname ut; 383 #endif 384 385 if (name == 0) 386 return EXT2_ET_BAD_DEVICE_NAME; 387 retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io); 388 if (retval) 389 return retval; 390 memset(io, 0, sizeof(struct struct_io_channel)); 391 io->magic = EXT2_ET_MAGIC_IO_CHANNEL; 392 retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data); 393 if (retval) 394 goto cleanup; 395 396 io->manager = unix_io_manager; 397 retval = ext2fs_get_mem(strlen(name)+1, &io->name); 398 if (retval) 399 goto cleanup; 400 401 strcpy(io->name, name); 402 io->private_data = data; 403 io->block_size = 1024; 404 io->read_error = 0; 405 io->write_error = 0; 406 io->refcount = 1; 407 408 memset(data, 0, sizeof(struct unix_private_data)); 409 data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL; 410 411 if ((retval = alloc_cache(io, data))) 412 goto cleanup; 413 414 open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY; 415 if (flags & IO_FLAG_EXCLUSIVE) 416 open_flags |= O_EXCL; 417 #ifdef HAVE_OPEN64 418 data->dev = open64(io->name, open_flags); 419 #else 420 data->dev = open(io->name, open_flags); 421 #endif 422 if (data->dev < 0) { 423 retval = errno; 424 goto cleanup; 425 } 426 427 #ifdef __linux__ 428 #undef RLIM_INFINITY 429 #if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4))) 430 #define RLIM_INFINITY ((unsigned long)(~0UL>>1)) 431 #else 432 #define RLIM_INFINITY (~0UL) 433 #endif 434 /* 435 * Work around a bug in 2.4.10-2.4.18 kernels where writes to 436 * block devices are wrongly getting hit by the filesize 437 * limit. This workaround isn't perfect, since it won't work 438 * if glibc wasn't built against 2.2 header files. (Sigh.) 439 * 440 */ 441 if ((flags & IO_FLAG_RW) && 442 (uname(&ut) == 0) && 443 ((ut.release[0] == '2') && (ut.release[1] == '.') && 444 (ut.release[2] == '4') && (ut.release[3] == '.') && 445 (ut.release[4] == '1') && (ut.release[5] >= '0') && 446 (ut.release[5] < '8')) && 447 (fstat(data->dev, &st) == 0) && 448 (S_ISBLK(st.st_mode))) { 449 struct rlimit rlim; 450 451 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY; 452 setrlimit(RLIMIT_FSIZE, &rlim); 453 getrlimit(RLIMIT_FSIZE, &rlim); 454 if (((unsigned long) rlim.rlim_cur) < 455 ((unsigned long) rlim.rlim_max)) { 456 rlim.rlim_cur = rlim.rlim_max; 457 setrlimit(RLIMIT_FSIZE, &rlim); 458 } 459 } 460 #endif 461 *channel = io; 462 return 0; 463 464 cleanup: 465 if (data) { 466 free_cache(data); 467 ext2fs_free_mem(&data); 468 } 469 if (io) 470 ext2fs_free_mem(&io); 471 return retval; 472 } 473 474 static errcode_t unix_close(io_channel channel) 475 { 476 struct unix_private_data *data; 477 errcode_t retval = 0; 478 479 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 480 data = (struct unix_private_data *) channel->private_data; 481 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 482 483 if (--channel->refcount > 0) 484 return 0; 485 486 #ifndef NO_IO_CACHE 487 retval = flush_cached_blocks(channel, data, 0); 488 #endif 489 490 if (close(data->dev) < 0) 491 retval = errno; 492 free_cache(data); 493 494 ext2fs_free_mem(&channel->private_data); 495 if (channel->name) 496 ext2fs_free_mem(&channel->name); 497 ext2fs_free_mem(&channel); 498 return retval; 499 } 500 501 static errcode_t unix_set_blksize(io_channel channel, int blksize) 502 { 503 struct unix_private_data *data; 504 errcode_t retval; 505 506 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 507 data = (struct unix_private_data *) channel->private_data; 508 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 509 510 if (channel->block_size != blksize) { 511 #ifndef NO_IO_CACHE 512 if ((retval = flush_cached_blocks(channel, data, 0))) 513 return retval; 514 #endif 515 516 channel->block_size = blksize; 517 free_cache(data); 518 if ((retval = alloc_cache(channel, data))) 519 return retval; 520 } 521 return 0; 522 } 523 524 525 static errcode_t unix_read_blk(io_channel channel, unsigned long block, 526 int count, void *buf) 527 { 528 struct unix_private_data *data; 529 struct unix_cache *cache, *reuse[READ_DIRECT_SIZE]; 530 errcode_t retval; 531 char *cp; 532 int i, j; 533 534 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 535 data = (struct unix_private_data *) channel->private_data; 536 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 537 538 #ifdef NO_IO_CACHE 539 return raw_read_blk(channel, data, block, count, buf); 540 #else 541 /* 542 * If we're doing an odd-sized read or a very large read, 543 * flush out the cache and then do a direct read. 544 */ 545 if (count < 0 || count > WRITE_DIRECT_SIZE) { 546 if ((retval = flush_cached_blocks(channel, data, 0))) 547 return retval; 548 return raw_read_blk(channel, data, block, count, buf); 549 } 550 551 cp = buf; 552 while (count > 0) { 553 /* If it's in the cache, use it! */ 554 if ((cache = find_cached_block(data, block, &reuse[0]))) { 555 #ifdef DEBUG 556 printf("Using cached block %lu\n", block); 557 #endif 558 memcpy(cp, cache->buf, channel->block_size); 559 count--; 560 block++; 561 cp += channel->block_size; 562 continue; 563 } 564 /* 565 * Find the number of uncached blocks so we can do a 566 * single read request 567 */ 568 for (i=1; i < count; i++) 569 if (find_cached_block(data, block+i, &reuse[i])) 570 break; 571 #ifdef DEBUG 572 printf("Reading %d blocks starting at %lu\n", i, block); 573 #endif 574 if ((retval = raw_read_blk(channel, data, block, i, cp))) 575 return retval; 576 577 /* Save the results in the cache */ 578 for (j=0; j < i; j++) { 579 count--; 580 cache = reuse[j]; 581 reuse_cache(channel, data, cache, block++); 582 memcpy(cache->buf, cp, channel->block_size); 583 cp += channel->block_size; 584 } 585 } 586 return 0; 587 #endif /* NO_IO_CACHE */ 588 } 589 590 static errcode_t unix_write_blk(io_channel channel, unsigned long block, 591 int count, const void *buf) 592 { 593 struct unix_private_data *data; 594 struct unix_cache *cache, *reuse; 595 errcode_t retval = 0; 596 const char *cp; 597 int writethrough; 598 599 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 600 data = (struct unix_private_data *) channel->private_data; 601 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 602 603 #ifdef NO_IO_CACHE 604 return raw_write_blk(channel, data, block, count, buf); 605 #else 606 /* 607 * If we're doing an odd-sized write or a very large write, 608 * flush out the cache completely and then do a direct write. 609 */ 610 if (count < 0 || count > WRITE_DIRECT_SIZE) { 611 if ((retval = flush_cached_blocks(channel, data, 1))) 612 return retval; 613 return raw_write_blk(channel, data, block, count, buf); 614 } 615 616 /* 617 * For a moderate-sized multi-block write, first force a write 618 * if we're in write-through cache mode, and then fill the 619 * cache with the blocks. 620 */ 621 writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH; 622 if (writethrough) 623 retval = raw_write_blk(channel, data, block, count, buf); 624 625 cp = buf; 626 while (count > 0) { 627 cache = find_cached_block(data, block, &reuse); 628 if (!cache) { 629 cache = reuse; 630 reuse_cache(channel, data, cache, block); 631 } 632 memcpy(cache->buf, cp, channel->block_size); 633 cache->dirty = !writethrough; 634 count--; 635 block++; 636 cp += channel->block_size; 637 } 638 return retval; 639 #endif /* NO_IO_CACHE */ 640 } 641 642 static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 643 int size, const void *buf) 644 { 645 struct unix_private_data *data; 646 errcode_t retval = 0; 647 ssize_t actual; 648 649 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 650 data = (struct unix_private_data *) channel->private_data; 651 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 652 653 #ifndef NO_IO_CACHE 654 /* 655 * Flush out the cache completely 656 */ 657 if ((retval = flush_cached_blocks(channel, data, 1))) 658 return retval; 659 #endif 660 661 if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0) 662 return errno; 663 664 actual = write(data->dev, buf, size); 665 if (actual != size) 666 return EXT2_ET_SHORT_WRITE; 667 668 return 0; 669 } 670 671 /* 672 * Flush data buffers to disk. 673 */ 674 static errcode_t unix_flush(io_channel channel) 675 { 676 struct unix_private_data *data; 677 errcode_t retval = 0; 678 679 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 680 data = (struct unix_private_data *) channel->private_data; 681 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 682 683 #ifndef NO_IO_CACHE 684 retval = flush_cached_blocks(channel, data, 0); 685 #endif 686 fsync(data->dev); 687 return retval; 688 } 689 690 static errcode_t unix_set_option(io_channel channel, const char *option, 691 const char *arg) 692 { 693 struct unix_private_data *data; 694 unsigned long long tmp; 695 char *end; 696 697 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 698 data = (struct unix_private_data *) channel->private_data; 699 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 700 701 if (!strcmp(option, "offset")) { 702 if (!arg) 703 return EXT2_ET_INVALID_ARGUMENT; 704 705 tmp = strtoull(arg, &end, 0); 706 if (*end) 707 return EXT2_ET_INVALID_ARGUMENT; 708 data->offset = tmp; 709 if (data->offset < 0) 710 return EXT2_ET_INVALID_ARGUMENT; 711 return 0; 712 } 713 return EXT2_ET_INVALID_ARGUMENT; 714 } 715