1 /* 2 * unix_io.c --- This is the Unix (well, really POSIX) implementation 3 * of the I/O manager. 4 * 5 * Implements a one-block write-through cache. 6 * 7 * Includes support for Windows NT support under Cygwin. 8 * 9 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 10 * 2002 by Theodore Ts'o. 11 * 12 * %Begin-Header% 13 * This file may be redistributed under the terms of the GNU Library 14 * General Public License, version 2. 15 * %End-Header% 16 */ 17 18 #define _LARGEFILE_SOURCE 19 #define _LARGEFILE64_SOURCE 20 #ifndef _GNU_SOURCE 21 #define _GNU_SOURCE 22 #endif 23 24 #include <stdio.h> 25 #include <string.h> 26 #if HAVE_UNISTD_H 27 #include <unistd.h> 28 #endif 29 #if HAVE_ERRNO_H 30 #include <errno.h> 31 #endif 32 #include <fcntl.h> 33 #include <time.h> 34 #ifdef __linux__ 35 #include <sys/utsname.h> 36 #endif 37 #ifdef HAVE_SYS_IOCTL_H 38 #include <sys/ioctl.h> 39 #endif 40 #ifdef HAVE_SYS_MOUNT_H 41 #include <sys/mount.h> 42 #endif 43 #if HAVE_SYS_STAT_H 44 #include <sys/stat.h> 45 #endif 46 #if HAVE_SYS_TYPES_H 47 #include <sys/types.h> 48 #endif 49 #if HAVE_SYS_RESOURCE_H 50 #include <sys/resource.h> 51 #endif 52 #if HAVE_LINUX_FALLOC_H 53 #include <linux/falloc.h> 54 #endif 55 56 #if defined(__linux__) && defined(_IO) && !defined(BLKROGET) 57 #define BLKROGET _IO(0x12, 94) /* Get read-only status (0 = read_write). */ 58 #endif 59 60 #undef ALIGN_DEBUG 61 62 #include "ext2_fs.h" 63 #include "ext2fs.h" 64 65 /* 66 * For checking structure magic numbers... 67 */ 68 69 #define EXT2_CHECK_MAGIC(struct, code) \ 70 if ((struct)->magic != (code)) return (code) 71 72 struct unix_cache { 73 char *buf; 74 unsigned long long block; 75 int access_time; 76 unsigned dirty:1; 77 unsigned in_use:1; 78 }; 79 80 #define CACHE_SIZE 8 81 #define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */ 82 #define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */ 83 84 struct unix_private_data { 85 int magic; 86 int dev; 87 int flags; 88 int align; 89 int access_time; 90 ext2_loff_t offset; 91 struct unix_cache cache[CACHE_SIZE]; 92 void *bounce; 93 struct struct_io_stats io_stats; 94 }; 95 96 #define IS_ALIGNED(n, align) ((((unsigned long) n) & \ 97 ((unsigned long) ((align)-1))) == 0) 98 99 static errcode_t unix_open(const char *name, int flags, io_channel *channel); 100 static errcode_t unix_close(io_channel channel); 101 static errcode_t unix_set_blksize(io_channel channel, int blksize); 102 static errcode_t unix_read_blk(io_channel channel, unsigned long block, 103 int count, void *data); 104 static errcode_t unix_write_blk(io_channel channel, unsigned long block, 105 int count, const void *data); 106 static errcode_t unix_flush(io_channel channel); 107 static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 108 int size, const void *data); 109 static errcode_t unix_set_option(io_channel channel, const char *option, 110 const char *arg); 111 static errcode_t unix_get_stats(io_channel channel, io_stats *stats) 112 ; 113 static void reuse_cache(io_channel channel, struct unix_private_data *data, 114 struct unix_cache *cache, unsigned long long block); 115 static errcode_t unix_read_blk64(io_channel channel, unsigned long long block, 116 int count, void *data); 117 static errcode_t unix_write_blk64(io_channel channel, unsigned long long block, 118 int count, const void *data); 119 static errcode_t unix_discard(io_channel channel, unsigned long long block, 120 unsigned long long count); 121 122 static struct struct_io_manager struct_unix_manager = { 123 EXT2_ET_MAGIC_IO_MANAGER, 124 "Unix I/O Manager", 125 unix_open, 126 unix_close, 127 unix_set_blksize, 128 unix_read_blk, 129 unix_write_blk, 130 unix_flush, 131 unix_write_byte, 132 unix_set_option, 133 unix_get_stats, 134 unix_read_blk64, 135 unix_write_blk64, 136 unix_discard, 137 }; 138 139 io_manager unix_io_manager = &struct_unix_manager; 140 141 static errcode_t unix_get_stats(io_channel channel, io_stats *stats) 142 { 143 errcode_t retval = 0; 144 145 struct unix_private_data *data; 146 147 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 148 data = (struct unix_private_data *) channel->private_data; 149 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 150 151 if (stats) 152 *stats = &data->io_stats; 153 154 return retval; 155 } 156 157 /* 158 * Here are the raw I/O functions 159 */ 160 static errcode_t raw_read_blk(io_channel channel, 161 struct unix_private_data *data, 162 unsigned long long block, 163 int count, void *bufv) 164 { 165 errcode_t retval; 166 ssize_t size; 167 ext2_loff_t location; 168 int actual = 0; 169 unsigned char *buf = bufv; 170 171 size = (count < 0) ? -count : count * channel->block_size; 172 data->io_stats.bytes_read += size; 173 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 174 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 175 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 176 goto error_out; 177 } 178 if ((channel->align == 0) || 179 (IS_ALIGNED(buf, channel->align) && 180 IS_ALIGNED(size, channel->align))) { 181 actual = read(data->dev, buf, size); 182 if (actual != size) { 183 short_read: 184 if (actual < 0) 185 actual = 0; 186 retval = EXT2_ET_SHORT_READ; 187 goto error_out; 188 } 189 return 0; 190 } 191 192 #ifdef ALIGN_DEBUG 193 printf("raw_read_blk: O_DIRECT fallback: %p %lu\n", buf, 194 (unsigned long) size); 195 #endif 196 197 /* 198 * The buffer or size which we're trying to read isn't aligned 199 * to the O_DIRECT rules, so we need to do this the hard way... 200 */ 201 while (size > 0) { 202 actual = read(data->dev, data->bounce, channel->block_size); 203 if (actual != channel->block_size) 204 goto short_read; 205 actual = size; 206 if (size > channel->block_size) 207 actual = channel->block_size; 208 memcpy(buf, data->bounce, actual); 209 size -= actual; 210 buf += actual; 211 } 212 return 0; 213 214 error_out: 215 memset((char *) buf+actual, 0, size-actual); 216 if (channel->read_error) 217 retval = (channel->read_error)(channel, block, count, buf, 218 size, actual, retval); 219 return retval; 220 } 221 222 static errcode_t raw_write_blk(io_channel channel, 223 struct unix_private_data *data, 224 unsigned long long block, 225 int count, const void *bufv) 226 { 227 ssize_t size; 228 ext2_loff_t location; 229 int actual = 0; 230 errcode_t retval; 231 const unsigned char *buf = bufv; 232 233 if (count == 1) 234 size = channel->block_size; 235 else { 236 if (count < 0) 237 size = -count; 238 else 239 size = count * channel->block_size; 240 } 241 data->io_stats.bytes_written += size; 242 243 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 244 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 245 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 246 goto error_out; 247 } 248 249 if ((channel->align == 0) || 250 (IS_ALIGNED(buf, channel->align) && 251 IS_ALIGNED(size, channel->align))) { 252 actual = write(data->dev, buf, size); 253 if (actual != size) { 254 short_write: 255 retval = EXT2_ET_SHORT_WRITE; 256 goto error_out; 257 } 258 return 0; 259 } 260 261 #ifdef ALIGN_DEBUG 262 printf("raw_write_blk: O_DIRECT fallback: %p %lu\n", buf, 263 (unsigned long) size); 264 #endif 265 /* 266 * The buffer or size which we're trying to write isn't aligned 267 * to the O_DIRECT rules, so we need to do this the hard way... 268 */ 269 while (size > 0) { 270 if (size < channel->block_size) { 271 actual = read(data->dev, data->bounce, 272 channel->block_size); 273 if (actual != channel->block_size) { 274 retval = EXT2_ET_SHORT_READ; 275 goto error_out; 276 } 277 } 278 actual = size; 279 if (size > channel->block_size) 280 actual = channel->block_size; 281 memcpy(data->bounce, buf, actual); 282 actual = write(data->dev, data->bounce, channel->block_size); 283 if (actual != channel->block_size) 284 goto short_write; 285 size -= actual; 286 buf += actual; 287 } 288 return 0; 289 290 error_out: 291 if (channel->write_error) 292 retval = (channel->write_error)(channel, block, count, buf, 293 size, actual, retval); 294 return retval; 295 } 296 297 298 /* 299 * Here we implement the cache functions 300 */ 301 302 /* Allocate the cache buffers */ 303 static errcode_t alloc_cache(io_channel channel, 304 struct unix_private_data *data) 305 { 306 errcode_t retval; 307 struct unix_cache *cache; 308 int i; 309 310 data->access_time = 0; 311 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 312 cache->block = 0; 313 cache->access_time = 0; 314 cache->dirty = 0; 315 cache->in_use = 0; 316 if (cache->buf) 317 ext2fs_free_mem(&cache->buf); 318 retval = io_channel_alloc_buf(channel, 0, &cache->buf); 319 if (retval) 320 return retval; 321 } 322 if (channel->align) { 323 if (data->bounce) 324 ext2fs_free_mem(&data->bounce); 325 retval = io_channel_alloc_buf(channel, 0, &data->bounce); 326 } 327 return retval; 328 } 329 330 /* Free the cache buffers */ 331 static void free_cache(struct unix_private_data *data) 332 { 333 struct unix_cache *cache; 334 int i; 335 336 data->access_time = 0; 337 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 338 cache->block = 0; 339 cache->access_time = 0; 340 cache->dirty = 0; 341 cache->in_use = 0; 342 if (cache->buf) 343 ext2fs_free_mem(&cache->buf); 344 } 345 if (data->bounce) 346 ext2fs_free_mem(&data->bounce); 347 } 348 349 #ifndef NO_IO_CACHE 350 /* 351 * Try to find a block in the cache. If the block is not found, and 352 * eldest is a non-zero pointer, then fill in eldest with the cache 353 * entry to that should be reused. 354 */ 355 static struct unix_cache *find_cached_block(struct unix_private_data *data, 356 unsigned long long block, 357 struct unix_cache **eldest) 358 { 359 struct unix_cache *cache, *unused_cache, *oldest_cache; 360 int i; 361 362 unused_cache = oldest_cache = 0; 363 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 364 if (!cache->in_use) { 365 if (!unused_cache) 366 unused_cache = cache; 367 continue; 368 } 369 if (cache->block == block) { 370 cache->access_time = ++data->access_time; 371 return cache; 372 } 373 if (!oldest_cache || 374 (cache->access_time < oldest_cache->access_time)) 375 oldest_cache = cache; 376 } 377 if (eldest) 378 *eldest = (unused_cache) ? unused_cache : oldest_cache; 379 return 0; 380 } 381 382 /* 383 * Reuse a particular cache entry for another block. 384 */ 385 static void reuse_cache(io_channel channel, struct unix_private_data *data, 386 struct unix_cache *cache, unsigned long long block) 387 { 388 if (cache->dirty && cache->in_use) 389 raw_write_blk(channel, data, cache->block, 1, cache->buf); 390 391 cache->in_use = 1; 392 cache->dirty = 0; 393 cache->block = block; 394 cache->access_time = ++data->access_time; 395 } 396 397 /* 398 * Flush all of the blocks in the cache 399 */ 400 static errcode_t flush_cached_blocks(io_channel channel, 401 struct unix_private_data *data, 402 int invalidate) 403 404 { 405 struct unix_cache *cache; 406 errcode_t retval, retval2; 407 int i; 408 409 retval2 = 0; 410 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 411 if (!cache->in_use) 412 continue; 413 414 if (invalidate) 415 cache->in_use = 0; 416 417 if (!cache->dirty) 418 continue; 419 420 retval = raw_write_blk(channel, data, 421 cache->block, 1, cache->buf); 422 if (retval) 423 retval2 = retval; 424 else 425 cache->dirty = 0; 426 } 427 return retval2; 428 } 429 #endif /* NO_IO_CACHE */ 430 431 #ifdef __linux__ 432 #ifndef BLKDISCARDZEROES 433 #define BLKDISCARDZEROES _IO(0x12,124) 434 #endif 435 #endif 436 437 int ext2fs_open_file(const char *pathname, int flags, mode_t mode) 438 { 439 if (mode) 440 #if defined(HAVE_OPEN64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED) 441 return open64(pathname, flags, mode); 442 else 443 return open64(pathname, flags); 444 #else 445 return open(pathname, flags, mode); 446 else 447 return open(pathname, flags); 448 #endif 449 } 450 451 int ext2fs_stat(const char *path, ext2fs_struct_stat *buf) 452 { 453 #if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED) 454 return stat64(path, buf); 455 #else 456 return stat(path, buf); 457 #endif 458 } 459 460 int ext2fs_fstat(int fd, ext2fs_struct_stat *buf) 461 { 462 #if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED) 463 return fstat64(fd, buf); 464 #else 465 return fstat(fd, buf); 466 #endif 467 } 468 469 static errcode_t unix_open(const char *name, int flags, io_channel *channel) 470 { 471 io_channel io = NULL; 472 struct unix_private_data *data = NULL; 473 errcode_t retval; 474 int open_flags; 475 int f_nocache = 0; 476 ext2fs_struct_stat st; 477 #ifdef __linux__ 478 struct utsname ut; 479 #endif 480 481 if (name == 0) 482 return EXT2_ET_BAD_DEVICE_NAME; 483 retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io); 484 if (retval) 485 goto cleanup; 486 memset(io, 0, sizeof(struct struct_io_channel)); 487 io->magic = EXT2_ET_MAGIC_IO_CHANNEL; 488 retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data); 489 if (retval) 490 goto cleanup; 491 492 io->manager = unix_io_manager; 493 retval = ext2fs_get_mem(strlen(name)+1, &io->name); 494 if (retval) 495 goto cleanup; 496 497 strcpy(io->name, name); 498 io->private_data = data; 499 io->block_size = 1024; 500 io->read_error = 0; 501 io->write_error = 0; 502 io->refcount = 1; 503 504 memset(data, 0, sizeof(struct unix_private_data)); 505 data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL; 506 data->io_stats.num_fields = 2; 507 data->dev = -1; 508 509 open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY; 510 if (flags & IO_FLAG_EXCLUSIVE) 511 open_flags |= O_EXCL; 512 #if defined(O_DIRECT) 513 if (flags & IO_FLAG_DIRECT_IO) { 514 open_flags |= O_DIRECT; 515 io->align = ext2fs_get_dio_alignment(data->dev); 516 } 517 #elif defined(F_NOCACHE) 518 if (flags & IO_FLAG_DIRECT_IO) { 519 f_nocache = F_NOCACHE; 520 io->align = 4096; 521 } 522 #endif 523 data->flags = flags; 524 525 data->dev = ext2fs_open_file(io->name, open_flags, 0); 526 if (data->dev < 0) { 527 retval = errno; 528 goto cleanup; 529 } 530 if (f_nocache) { 531 if (fcntl(data->dev, f_nocache, 1) < 0) { 532 retval = errno; 533 goto cleanup; 534 } 535 } 536 537 /* 538 * If the device is really a block device, then set the 539 * appropriate flag, otherwise we can set DISCARD_ZEROES flag 540 * because we are going to use punch hole instead of discard 541 * and if it succeed, subsequent read from sparse area returns 542 * zero. 543 */ 544 if (ext2fs_stat(io->name, &st) == 0) { 545 if (S_ISBLK(st.st_mode)) 546 io->flags |= CHANNEL_FLAGS_BLOCK_DEVICE; 547 else 548 io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES; 549 } 550 551 #ifdef BLKDISCARDZEROES 552 { 553 int zeroes = 0; 554 if (ioctl(data->dev, BLKDISCARDZEROES, &zeroes) == 0 && 555 zeroes) 556 io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES; 557 } 558 #endif 559 560 #if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 561 /* 562 * Some operating systems require that the buffers be aligned, 563 * regardless of O_DIRECT 564 */ 565 if (!io->align) 566 io->align = 512; 567 #endif 568 569 570 if ((retval = alloc_cache(io, data))) 571 goto cleanup; 572 573 #ifdef BLKROGET 574 if (flags & IO_FLAG_RW) { 575 int error; 576 int readonly = 0; 577 578 /* Is the block device actually writable? */ 579 error = ioctl(data->dev, BLKROGET, &readonly); 580 if (!error && readonly) { 581 retval = EPERM; 582 goto cleanup; 583 } 584 } 585 #endif 586 587 #ifdef __linux__ 588 #undef RLIM_INFINITY 589 #if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4))) 590 #define RLIM_INFINITY ((unsigned long)(~0UL>>1)) 591 #else 592 #define RLIM_INFINITY (~0UL) 593 #endif 594 /* 595 * Work around a bug in 2.4.10-2.4.18 kernels where writes to 596 * block devices are wrongly getting hit by the filesize 597 * limit. This workaround isn't perfect, since it won't work 598 * if glibc wasn't built against 2.2 header files. (Sigh.) 599 * 600 */ 601 if ((flags & IO_FLAG_RW) && 602 (uname(&ut) == 0) && 603 ((ut.release[0] == '2') && (ut.release[1] == '.') && 604 (ut.release[2] == '4') && (ut.release[3] == '.') && 605 (ut.release[4] == '1') && (ut.release[5] >= '0') && 606 (ut.release[5] < '8')) && 607 (ext2fs_stat(io->name, &st) == 0) && 608 (S_ISBLK(st.st_mode))) { 609 struct rlimit rlim; 610 611 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY; 612 setrlimit(RLIMIT_FSIZE, &rlim); 613 getrlimit(RLIMIT_FSIZE, &rlim); 614 if (((unsigned long) rlim.rlim_cur) < 615 ((unsigned long) rlim.rlim_max)) { 616 rlim.rlim_cur = rlim.rlim_max; 617 setrlimit(RLIMIT_FSIZE, &rlim); 618 } 619 } 620 #endif 621 *channel = io; 622 return 0; 623 624 cleanup: 625 if (data) { 626 if (data->dev >= 0) 627 close(data->dev); 628 free_cache(data); 629 ext2fs_free_mem(&data); 630 } 631 if (io) { 632 if (io->name) { 633 ext2fs_free_mem(&io->name); 634 } 635 ext2fs_free_mem(&io); 636 } 637 return retval; 638 } 639 640 static errcode_t unix_close(io_channel channel) 641 { 642 struct unix_private_data *data; 643 errcode_t retval = 0; 644 645 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 646 data = (struct unix_private_data *) channel->private_data; 647 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 648 649 if (--channel->refcount > 0) 650 return 0; 651 652 #ifndef NO_IO_CACHE 653 retval = flush_cached_blocks(channel, data, 0); 654 #endif 655 656 if (close(data->dev) < 0) 657 retval = errno; 658 free_cache(data); 659 660 ext2fs_free_mem(&channel->private_data); 661 if (channel->name) 662 ext2fs_free_mem(&channel->name); 663 ext2fs_free_mem(&channel); 664 return retval; 665 } 666 667 static errcode_t unix_set_blksize(io_channel channel, int blksize) 668 { 669 struct unix_private_data *data; 670 errcode_t retval; 671 672 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 673 data = (struct unix_private_data *) channel->private_data; 674 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 675 676 if (channel->block_size != blksize) { 677 #ifndef NO_IO_CACHE 678 if ((retval = flush_cached_blocks(channel, data, 0))) 679 return retval; 680 #endif 681 682 channel->block_size = blksize; 683 free_cache(data); 684 if ((retval = alloc_cache(channel, data))) 685 return retval; 686 } 687 return 0; 688 } 689 690 691 static errcode_t unix_read_blk64(io_channel channel, unsigned long long block, 692 int count, void *buf) 693 { 694 struct unix_private_data *data; 695 struct unix_cache *cache, *reuse[READ_DIRECT_SIZE]; 696 errcode_t retval; 697 char *cp; 698 int i, j; 699 700 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 701 data = (struct unix_private_data *) channel->private_data; 702 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 703 704 #ifdef NO_IO_CACHE 705 return raw_read_blk(channel, data, block, count, buf); 706 #else 707 /* 708 * If we're doing an odd-sized read or a very large read, 709 * flush out the cache and then do a direct read. 710 */ 711 if (count < 0 || count > WRITE_DIRECT_SIZE) { 712 if ((retval = flush_cached_blocks(channel, data, 0))) 713 return retval; 714 return raw_read_blk(channel, data, block, count, buf); 715 } 716 717 cp = buf; 718 while (count > 0) { 719 /* If it's in the cache, use it! */ 720 if ((cache = find_cached_block(data, block, &reuse[0]))) { 721 #ifdef DEBUG 722 printf("Using cached block %lu\n", block); 723 #endif 724 memcpy(cp, cache->buf, channel->block_size); 725 count--; 726 block++; 727 cp += channel->block_size; 728 continue; 729 } 730 if (count == 1) { 731 /* 732 * Special case where we read directly into the 733 * cache buffer; important in the O_DIRECT case 734 */ 735 cache = reuse[0]; 736 reuse_cache(channel, data, cache, block); 737 if ((retval = raw_read_blk(channel, data, block, 1, 738 cache->buf))) { 739 cache->in_use = 0; 740 return retval; 741 } 742 memcpy(cp, cache->buf, channel->block_size); 743 return 0; 744 } 745 746 /* 747 * Find the number of uncached blocks so we can do a 748 * single read request 749 */ 750 for (i=1; i < count; i++) 751 if (find_cached_block(data, block+i, &reuse[i])) 752 break; 753 #ifdef DEBUG 754 printf("Reading %d blocks starting at %lu\n", i, block); 755 #endif 756 if ((retval = raw_read_blk(channel, data, block, i, cp))) 757 return retval; 758 759 /* Save the results in the cache */ 760 for (j=0; j < i; j++) { 761 count--; 762 cache = reuse[j]; 763 reuse_cache(channel, data, cache, block++); 764 memcpy(cache->buf, cp, channel->block_size); 765 cp += channel->block_size; 766 } 767 } 768 return 0; 769 #endif /* NO_IO_CACHE */ 770 } 771 772 static errcode_t unix_read_blk(io_channel channel, unsigned long block, 773 int count, void *buf) 774 { 775 return unix_read_blk64(channel, block, count, buf); 776 } 777 778 static errcode_t unix_write_blk64(io_channel channel, unsigned long long block, 779 int count, const void *buf) 780 { 781 struct unix_private_data *data; 782 struct unix_cache *cache, *reuse; 783 errcode_t retval = 0; 784 const char *cp; 785 int writethrough; 786 787 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 788 data = (struct unix_private_data *) channel->private_data; 789 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 790 791 #ifdef NO_IO_CACHE 792 return raw_write_blk(channel, data, block, count, buf); 793 #else 794 /* 795 * If we're doing an odd-sized write or a very large write, 796 * flush out the cache completely and then do a direct write. 797 */ 798 if (count < 0 || count > WRITE_DIRECT_SIZE) { 799 if ((retval = flush_cached_blocks(channel, data, 1))) 800 return retval; 801 return raw_write_blk(channel, data, block, count, buf); 802 } 803 804 /* 805 * For a moderate-sized multi-block write, first force a write 806 * if we're in write-through cache mode, and then fill the 807 * cache with the blocks. 808 */ 809 writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH; 810 if (writethrough) 811 retval = raw_write_blk(channel, data, block, count, buf); 812 813 cp = buf; 814 while (count > 0) { 815 cache = find_cached_block(data, block, &reuse); 816 if (!cache) { 817 cache = reuse; 818 reuse_cache(channel, data, cache, block); 819 } 820 memcpy(cache->buf, cp, channel->block_size); 821 cache->dirty = !writethrough; 822 count--; 823 block++; 824 cp += channel->block_size; 825 } 826 return retval; 827 #endif /* NO_IO_CACHE */ 828 } 829 830 static errcode_t unix_write_blk(io_channel channel, unsigned long block, 831 int count, const void *buf) 832 { 833 return unix_write_blk64(channel, block, count, buf); 834 } 835 836 static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 837 int size, const void *buf) 838 { 839 struct unix_private_data *data; 840 errcode_t retval = 0; 841 ssize_t actual; 842 843 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 844 data = (struct unix_private_data *) channel->private_data; 845 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 846 847 if (channel->align != 0) { 848 #ifdef ALIGN_DEBUG 849 printf("unix_write_byte: O_DIRECT fallback\n"); 850 #endif 851 return EXT2_ET_UNIMPLEMENTED; 852 } 853 854 #ifndef NO_IO_CACHE 855 /* 856 * Flush out the cache completely 857 */ 858 if ((retval = flush_cached_blocks(channel, data, 1))) 859 return retval; 860 #endif 861 862 if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0) 863 return errno; 864 865 actual = write(data->dev, buf, size); 866 if (actual != size) 867 return EXT2_ET_SHORT_WRITE; 868 869 return 0; 870 } 871 872 /* 873 * Flush data buffers to disk. 874 */ 875 static errcode_t unix_flush(io_channel channel) 876 { 877 struct unix_private_data *data; 878 errcode_t retval = 0; 879 880 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 881 data = (struct unix_private_data *) channel->private_data; 882 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 883 884 #ifndef NO_IO_CACHE 885 retval = flush_cached_blocks(channel, data, 0); 886 #endif 887 fsync(data->dev); 888 return retval; 889 } 890 891 static errcode_t unix_set_option(io_channel channel, const char *option, 892 const char *arg) 893 { 894 struct unix_private_data *data; 895 unsigned long long tmp; 896 char *end; 897 898 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 899 data = (struct unix_private_data *) channel->private_data; 900 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 901 902 if (!strcmp(option, "offset")) { 903 if (!arg) 904 return EXT2_ET_INVALID_ARGUMENT; 905 906 tmp = strtoull(arg, &end, 0); 907 if (*end) 908 return EXT2_ET_INVALID_ARGUMENT; 909 data->offset = tmp; 910 if (data->offset < 0) 911 return EXT2_ET_INVALID_ARGUMENT; 912 return 0; 913 } 914 return EXT2_ET_INVALID_ARGUMENT; 915 } 916 917 #if defined(__linux__) && !defined(BLKDISCARD) 918 #define BLKDISCARD _IO(0x12,119) 919 #endif 920 921 static errcode_t unix_discard(io_channel channel, unsigned long long block, 922 unsigned long long count) 923 { 924 struct unix_private_data *data; 925 int ret; 926 927 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 928 data = (struct unix_private_data *) channel->private_data; 929 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 930 931 if (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE) { 932 #ifdef BLKDISCARD 933 __uint64_t range[2]; 934 935 range[0] = (__uint64_t)(block) * channel->block_size; 936 range[1] = (__uint64_t)(count) * channel->block_size; 937 938 ret = ioctl(data->dev, BLKDISCARD, &range); 939 #else 940 goto unimplemented; 941 #endif 942 } else { 943 #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) 944 /* 945 * If we are not on block device, try to use punch hole 946 * to reclaim free space. 947 */ 948 ret = fallocate(data->dev, 949 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 950 (off_t)(block) * channel->block_size, 951 (off_t)(count) * channel->block_size); 952 #else 953 goto unimplemented; 954 #endif 955 } 956 if (ret < 0) { 957 if (errno == EOPNOTSUPP) 958 goto unimplemented; 959 return errno; 960 } 961 return 0; 962 unimplemented: 963 return EXT2_ET_UNIMPLEMENTED; 964 } 965