Home | History | Annotate | Download | only in ext2fs
      1 /*
      2  * unix_io.c --- This is the Unix (well, really POSIX) implementation
      3  * 	of the I/O manager.
      4  *
      5  * Implements a one-block write-through cache.
      6  *
      7  * Includes support for Windows NT support under Cygwin.
      8  *
      9  * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
     10  * 	2002 by Theodore Ts'o.
     11  *
     12  * %Begin-Header%
     13  * This file may be redistributed under the terms of the GNU Library
     14  * General Public License, version 2.
     15  * %End-Header%
     16  */
     17 
     18 #define _LARGEFILE_SOURCE
     19 #define _LARGEFILE64_SOURCE
     20 #define _GNU_SOURCE
     21 
     22 #include <stdio.h>
     23 #include <string.h>
     24 #if HAVE_UNISTD_H
     25 #include <unistd.h>
     26 #endif
     27 #if HAVE_ERRNO_H
     28 #include <errno.h>
     29 #endif
     30 #include <fcntl.h>
     31 #include <time.h>
     32 #ifdef __linux__
     33 #include <sys/utsname.h>
     34 #endif
     35 #ifdef HAVE_SYS_IOCTL_H
     36 #include <sys/ioctl.h>
     37 #endif
     38 #ifdef HAVE_SYS_MOUNT_H
     39 #include <sys/mount.h>
     40 #endif
     41 #if HAVE_SYS_STAT_H
     42 #include <sys/stat.h>
     43 #endif
     44 #if HAVE_SYS_TYPES_H
     45 #include <sys/types.h>
     46 #endif
     47 #if HAVE_SYS_RESOURCE_H
     48 #include <sys/resource.h>
     49 #endif
     50 
     51 #if defined(__linux__) && defined(_IO) && !defined(BLKROGET)
     52 #define BLKROGET   _IO(0x12, 94) /* Get read-only status (0 = read_write).  */
     53 #endif
     54 
     55 #if defined(__linux__) && defined(_IO) && !defined(BLKSSZGET)
     56 #define BLKSSZGET  _IO(0x12,104)/* get block device sector size */
     57 #endif
     58 
     59 #undef ALIGN_DEBUG
     60 
     61 #include "ext2_fs.h"
     62 #include "ext2fs.h"
     63 
     64 /*
     65  * For checking structure magic numbers...
     66  */
     67 
     68 #define EXT2_CHECK_MAGIC(struct, code) \
     69 	  if ((struct)->magic != (code)) return (code)
     70 
     71 struct unix_cache {
     72 	char		*buf;
     73 	unsigned long	block;
     74 	int		access_time;
     75 	unsigned	dirty:1;
     76 	unsigned	in_use:1;
     77 };
     78 
     79 #define CACHE_SIZE 8
     80 #define WRITE_DIRECT_SIZE 4	/* Must be smaller than CACHE_SIZE */
     81 #define READ_DIRECT_SIZE 4	/* Should be smaller than CACHE_SIZE */
     82 
     83 struct unix_private_data {
     84 	int	magic;
     85 	int	dev;
     86 	int	flags;
     87 	int	align;
     88 	int	access_time;
     89 	ext2_loff_t offset;
     90 	struct unix_cache cache[CACHE_SIZE];
     91 	void	*bounce;
     92 	struct struct_io_stats io_stats;
     93 };
     94 
     95 #define IS_ALIGNED(n, align) ((((unsigned long) n) & \
     96 			       ((unsigned long) ((align)-1))) == 0)
     97 
     98 static errcode_t unix_open(const char *name, int flags, io_channel *channel);
     99 static errcode_t unix_close(io_channel channel);
    100 static errcode_t unix_set_blksize(io_channel channel, int blksize);
    101 static errcode_t unix_read_blk(io_channel channel, unsigned long block,
    102 			       int count, void *data);
    103 static errcode_t unix_write_blk(io_channel channel, unsigned long block,
    104 				int count, const void *data);
    105 static errcode_t unix_flush(io_channel channel);
    106 static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
    107 				int size, const void *data);
    108 static errcode_t unix_set_option(io_channel channel, const char *option,
    109 				 const char *arg);
    110 static errcode_t unix_get_stats(io_channel channel, io_stats *stats)
    111 ;
    112 static void reuse_cache(io_channel channel, struct unix_private_data *data,
    113 		 struct unix_cache *cache, unsigned long long block);
    114 static errcode_t unix_read_blk64(io_channel channel, unsigned long long block,
    115 			       int count, void *data);
    116 static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
    117 				int count, const void *data);
    118 
    119 static struct struct_io_manager struct_unix_manager = {
    120 	EXT2_ET_MAGIC_IO_MANAGER,
    121 	"Unix I/O Manager",
    122 	unix_open,
    123 	unix_close,
    124 	unix_set_blksize,
    125 	unix_read_blk,
    126 	unix_write_blk,
    127 	unix_flush,
    128 	unix_write_byte,
    129 	unix_set_option,
    130 	unix_get_stats,
    131 	unix_read_blk64,
    132 	unix_write_blk64,
    133 };
    134 
    135 io_manager unix_io_manager = &struct_unix_manager;
    136 
    137 static errcode_t unix_get_stats(io_channel channel, io_stats *stats)
    138 {
    139 	errcode_t 	retval = 0;
    140 
    141 	struct unix_private_data *data;
    142 
    143 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    144 	data = (struct unix_private_data *) channel->private_data;
    145 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    146 
    147 	if (stats)
    148 		*stats = &data->io_stats;
    149 
    150 	return retval;
    151 }
    152 
    153 /*
    154  * Here are the raw I/O functions
    155  */
    156 static errcode_t raw_read_blk(io_channel channel,
    157 			      struct unix_private_data *data,
    158 			      unsigned long long block,
    159 			      int count, void *buf)
    160 {
    161 	errcode_t	retval;
    162 	ssize_t		size;
    163 	ext2_loff_t	location;
    164 	int		actual = 0;
    165 
    166 	size = (count < 0) ? -count : count * channel->block_size;
    167 	data->io_stats.bytes_read += size;
    168 	location = ((ext2_loff_t) block * channel->block_size) + data->offset;
    169 	if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
    170 		retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
    171 		goto error_out;
    172 	}
    173 	if ((data->align == 0) ||
    174 	    ((IS_ALIGNED(buf, data->align)) && IS_ALIGNED(size, data->align))) {
    175 		actual = read(data->dev, buf, size);
    176 		if (actual != size) {
    177 		short_read:
    178 			if (actual < 0)
    179 				actual = 0;
    180 			retval = EXT2_ET_SHORT_READ;
    181 			goto error_out;
    182 		}
    183 		return 0;
    184 	}
    185 
    186 #ifdef ALIGN_DEBUG
    187 	printf("raw_read_blk: O_DIRECT fallback: %p %lu\n", buf,
    188 	       (unsigned long) size);
    189 #endif
    190 
    191 	/*
    192 	 * The buffer or size which we're trying to read isn't aligned
    193 	 * to the O_DIRECT rules, so we need to do this the hard way...
    194 	 */
    195 	while (size > 0) {
    196 		actual = read(data->dev, data->bounce, channel->block_size);
    197 		if (actual != channel->block_size)
    198 			goto short_read;
    199 		actual = size;
    200 		if (size > channel->block_size)
    201 			actual = channel->block_size;
    202 		memcpy(buf, data->bounce, actual);
    203 		size -= actual;
    204 		buf += actual;
    205 	}
    206 	return 0;
    207 
    208 error_out:
    209 	memset((char *) buf+actual, 0, size-actual);
    210 	if (channel->read_error)
    211 		retval = (channel->read_error)(channel, block, count, buf,
    212 					       size, actual, retval);
    213 	return retval;
    214 }
    215 
    216 static errcode_t raw_write_blk(io_channel channel,
    217 			       struct unix_private_data *data,
    218 			       unsigned long long block,
    219 			       int count, const void *buf)
    220 {
    221 	ssize_t		size;
    222 	ext2_loff_t	location;
    223 	int		actual = 0;
    224 	errcode_t	retval;
    225 
    226 	if (count == 1)
    227 		size = channel->block_size;
    228 	else {
    229 		if (count < 0)
    230 			size = -count;
    231 		else
    232 			size = count * channel->block_size;
    233 	}
    234 	data->io_stats.bytes_written += size;
    235 
    236 	location = ((ext2_loff_t) block * channel->block_size) + data->offset;
    237 	if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
    238 		retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
    239 		goto error_out;
    240 	}
    241 
    242 	if ((data->align == 0) ||
    243 	    ((IS_ALIGNED(buf, data->align)) && IS_ALIGNED(size, data->align))) {
    244 		actual = write(data->dev, buf, size);
    245 		if (actual != size) {
    246 		short_write:
    247 			retval = EXT2_ET_SHORT_WRITE;
    248 			goto error_out;
    249 		}
    250 		return 0;
    251 	}
    252 
    253 #ifdef ALIGN_DEBUG
    254 	printf("raw_write_blk: O_DIRECT fallback: %p %lu\n", buf,
    255 	       (unsigned long) size);
    256 #endif
    257 	/*
    258 	 * The buffer or size which we're trying to write isn't aligned
    259 	 * to the O_DIRECT rules, so we need to do this the hard way...
    260 	 */
    261 	while (size > 0) {
    262 		if (size < channel->block_size) {
    263 			actual = read(data->dev, data->bounce,
    264 				      channel->block_size);
    265 			if (actual != channel->block_size) {
    266 				retval = EXT2_ET_SHORT_READ;
    267 				goto error_out;
    268 			}
    269 		}
    270 		actual = size;
    271 		if (size > channel->block_size)
    272 			actual = channel->block_size;
    273 		memcpy(data->bounce, buf, actual);
    274 		actual = write(data->dev, data->bounce, channel->block_size);
    275 		if (actual != channel->block_size)
    276 			goto short_write;
    277 		size -= actual;
    278 		buf += actual;
    279 	}
    280 	return 0;
    281 
    282 error_out:
    283 	if (channel->write_error)
    284 		retval = (channel->write_error)(channel, block, count, buf,
    285 						size, actual, retval);
    286 	return retval;
    287 }
    288 
    289 
    290 /*
    291  * Here we implement the cache functions
    292  */
    293 
    294 /* Allocate the cache buffers */
    295 static errcode_t alloc_cache(io_channel channel,
    296 			     struct unix_private_data *data)
    297 {
    298 	errcode_t		retval;
    299 	struct unix_cache	*cache;
    300 	int			i;
    301 
    302 	data->access_time = 0;
    303 	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
    304 		cache->block = 0;
    305 		cache->access_time = 0;
    306 		cache->dirty = 0;
    307 		cache->in_use = 0;
    308 		if (cache->buf)
    309 			ext2fs_free_mem(&cache->buf);
    310 		retval = ext2fs_get_memalign(channel->block_size,
    311 					     data->align, &cache->buf);
    312 		if (retval)
    313 			return retval;
    314 	}
    315 	if (data->align) {
    316 		if (data->bounce)
    317 			ext2fs_free_mem(&data->bounce);
    318 		retval = ext2fs_get_memalign(channel->block_size, data->align,
    319 					     &data->bounce);
    320 	}
    321 	return retval;
    322 }
    323 
    324 /* Free the cache buffers */
    325 static void free_cache(struct unix_private_data *data)
    326 {
    327 	struct unix_cache	*cache;
    328 	int			i;
    329 
    330 	data->access_time = 0;
    331 	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
    332 		cache->block = 0;
    333 		cache->access_time = 0;
    334 		cache->dirty = 0;
    335 		cache->in_use = 0;
    336 		if (cache->buf)
    337 			ext2fs_free_mem(&cache->buf);
    338 	}
    339 	if (data->bounce)
    340 		ext2fs_free_mem(&data->bounce);
    341 }
    342 
    343 #ifndef NO_IO_CACHE
    344 /*
    345  * Try to find a block in the cache.  If the block is not found, and
    346  * eldest is a non-zero pointer, then fill in eldest with the cache
    347  * entry to that should be reused.
    348  */
    349 static struct unix_cache *find_cached_block(struct unix_private_data *data,
    350 					    unsigned long long block,
    351 					    struct unix_cache **eldest)
    352 {
    353 	struct unix_cache	*cache, *unused_cache, *oldest_cache;
    354 	int			i;
    355 
    356 	unused_cache = oldest_cache = 0;
    357 	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
    358 		if (!cache->in_use) {
    359 			if (!unused_cache)
    360 				unused_cache = cache;
    361 			continue;
    362 		}
    363 		if (cache->block == block) {
    364 			cache->access_time = ++data->access_time;
    365 			return cache;
    366 		}
    367 		if (!oldest_cache ||
    368 		    (cache->access_time < oldest_cache->access_time))
    369 			oldest_cache = cache;
    370 	}
    371 	if (eldest)
    372 		*eldest = (unused_cache) ? unused_cache : oldest_cache;
    373 	return 0;
    374 }
    375 
    376 /*
    377  * Reuse a particular cache entry for another block.
    378  */
    379 static void reuse_cache(io_channel channel, struct unix_private_data *data,
    380 		 struct unix_cache *cache, unsigned long long block)
    381 {
    382 	if (cache->dirty && cache->in_use)
    383 		raw_write_blk(channel, data, cache->block, 1, cache->buf);
    384 
    385 	cache->in_use = 1;
    386 	cache->dirty = 0;
    387 	cache->block = block;
    388 	cache->access_time = ++data->access_time;
    389 }
    390 
    391 /*
    392  * Flush all of the blocks in the cache
    393  */
    394 static errcode_t flush_cached_blocks(io_channel channel,
    395 				     struct unix_private_data *data,
    396 				     int invalidate)
    397 
    398 {
    399 	struct unix_cache	*cache;
    400 	errcode_t		retval, retval2;
    401 	int			i;
    402 
    403 	retval2 = 0;
    404 	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
    405 		if (!cache->in_use)
    406 			continue;
    407 
    408 		if (invalidate)
    409 			cache->in_use = 0;
    410 
    411 		if (!cache->dirty)
    412 			continue;
    413 
    414 		retval = raw_write_blk(channel, data,
    415 				       cache->block, 1, cache->buf);
    416 		if (retval)
    417 			retval2 = retval;
    418 		else
    419 			cache->dirty = 0;
    420 	}
    421 	return retval2;
    422 }
    423 #endif /* NO_IO_CACHE */
    424 
    425 static errcode_t unix_open(const char *name, int flags, io_channel *channel)
    426 {
    427 	io_channel	io = NULL;
    428 	struct unix_private_data *data = NULL;
    429 	errcode_t	retval;
    430 	int		open_flags;
    431 	int		f_nocache = 0;
    432 	struct stat	st;
    433 #ifdef __linux__
    434 	struct 		utsname ut;
    435 #endif
    436 
    437 	if (name == 0)
    438 		return EXT2_ET_BAD_DEVICE_NAME;
    439 	retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
    440 	if (retval)
    441 		return retval;
    442 	memset(io, 0, sizeof(struct struct_io_channel));
    443 	io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
    444 	retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data);
    445 	if (retval)
    446 		goto cleanup;
    447 
    448 	io->manager = unix_io_manager;
    449 	retval = ext2fs_get_mem(strlen(name)+1, &io->name);
    450 	if (retval)
    451 		goto cleanup;
    452 
    453 	strcpy(io->name, name);
    454 	io->private_data = data;
    455 	io->block_size = 1024;
    456 	io->read_error = 0;
    457 	io->write_error = 0;
    458 	io->refcount = 1;
    459 
    460 	memset(data, 0, sizeof(struct unix_private_data));
    461 	data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
    462 	data->io_stats.num_fields = 2;
    463 
    464 	open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
    465 	if (flags & IO_FLAG_EXCLUSIVE)
    466 		open_flags |= O_EXCL;
    467 	if (flags & IO_FLAG_DIRECT_IO)
    468 #if !defined(O_DIRECT) && defined(F_NOCACHE)
    469 		f_nocache = F_NOCACHE;
    470 #else
    471 		open_flags |= O_DIRECT;
    472 #endif
    473 	data->flags = flags;
    474 
    475 #ifdef HAVE_OPEN64
    476 	data->dev = open64(io->name, open_flags);
    477 #else
    478 	data->dev = open(io->name, open_flags);
    479 #endif
    480 	if (data->dev < 0) {
    481 		retval = errno;
    482 		goto cleanup;
    483 	}
    484 
    485 	if (f_nocache) {
    486 		if (fcntl(data->dev, f_nocache, 1) < 0) {
    487 			retval = errno;
    488 			goto cleanup;
    489 		}
    490 	}
    491 
    492 #ifdef BLKSSZGET
    493 	if (flags & IO_FLAG_DIRECT_IO) {
    494 		if (ioctl(data->dev, BLKSSZGET, &data->align) != 0)
    495 			data->align = io->block_size;
    496 	}
    497 #endif
    498 
    499 #if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
    500 	/*
    501 	 * Some operating systems require that the buffers be aligned,
    502 	 * regardless of O_DIRECT
    503 	 */
    504 	data->align = 512;
    505 #endif
    506 
    507 
    508 	if ((retval = alloc_cache(io, data)))
    509 		goto cleanup;
    510 
    511 #ifdef BLKROGET
    512 	if (flags & IO_FLAG_RW) {
    513 		int error;
    514 		int readonly = 0;
    515 
    516 		/* Is the block device actually writable? */
    517 		error = ioctl(data->dev, BLKROGET, &readonly);
    518 		if (!error && readonly) {
    519 			close(data->dev);
    520 			retval = EPERM;
    521 			goto cleanup;
    522 		}
    523 	}
    524 #endif
    525 
    526 #ifdef __linux__
    527 #undef RLIM_INFINITY
    528 #if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4)))
    529 #define RLIM_INFINITY	((unsigned long)(~0UL>>1))
    530 #else
    531 #define RLIM_INFINITY  (~0UL)
    532 #endif
    533 	/*
    534 	 * Work around a bug in 2.4.10-2.4.18 kernels where writes to
    535 	 * block devices are wrongly getting hit by the filesize
    536 	 * limit.  This workaround isn't perfect, since it won't work
    537 	 * if glibc wasn't built against 2.2 header files.  (Sigh.)
    538 	 *
    539 	 */
    540 	if ((flags & IO_FLAG_RW) &&
    541 	    (uname(&ut) == 0) &&
    542 	    ((ut.release[0] == '2') && (ut.release[1] == '.') &&
    543 	     (ut.release[2] == '4') && (ut.release[3] == '.') &&
    544 	     (ut.release[4] == '1') && (ut.release[5] >= '0') &&
    545 	     (ut.release[5] < '8')) &&
    546 	    (fstat(data->dev, &st) == 0) &&
    547 	    (S_ISBLK(st.st_mode))) {
    548 		struct rlimit	rlim;
    549 
    550 		rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY;
    551 		setrlimit(RLIMIT_FSIZE, &rlim);
    552 		getrlimit(RLIMIT_FSIZE, &rlim);
    553 		if (((unsigned long) rlim.rlim_cur) <
    554 		    ((unsigned long) rlim.rlim_max)) {
    555 			rlim.rlim_cur = rlim.rlim_max;
    556 			setrlimit(RLIMIT_FSIZE, &rlim);
    557 		}
    558 	}
    559 #endif
    560 	*channel = io;
    561 	return 0;
    562 
    563 cleanup:
    564 	if (data) {
    565 		free_cache(data);
    566 		ext2fs_free_mem(&data);
    567 	}
    568 	if (io)
    569 		ext2fs_free_mem(&io);
    570 	return retval;
    571 }
    572 
    573 static errcode_t unix_close(io_channel channel)
    574 {
    575 	struct unix_private_data *data;
    576 	errcode_t	retval = 0;
    577 
    578 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    579 	data = (struct unix_private_data *) channel->private_data;
    580 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    581 
    582 	if (--channel->refcount > 0)
    583 		return 0;
    584 
    585 #ifndef NO_IO_CACHE
    586 	retval = flush_cached_blocks(channel, data, 0);
    587 #endif
    588 
    589 	if (close(data->dev) < 0)
    590 		retval = errno;
    591 	free_cache(data);
    592 
    593 	ext2fs_free_mem(&channel->private_data);
    594 	if (channel->name)
    595 		ext2fs_free_mem(&channel->name);
    596 	ext2fs_free_mem(&channel);
    597 	return retval;
    598 }
    599 
    600 static errcode_t unix_set_blksize(io_channel channel, int blksize)
    601 {
    602 	struct unix_private_data *data;
    603 	errcode_t		retval;
    604 
    605 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    606 	data = (struct unix_private_data *) channel->private_data;
    607 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    608 
    609 	if (channel->block_size != blksize) {
    610 #ifndef NO_IO_CACHE
    611 		if ((retval = flush_cached_blocks(channel, data, 0)))
    612 			return retval;
    613 #endif
    614 
    615 		channel->block_size = blksize;
    616 		free_cache(data);
    617 		if ((retval = alloc_cache(channel, data)))
    618 			return retval;
    619 	}
    620 	return 0;
    621 }
    622 
    623 
    624 static errcode_t unix_read_blk64(io_channel channel, unsigned long long block,
    625 			       int count, void *buf)
    626 {
    627 	struct unix_private_data *data;
    628 	struct unix_cache *cache, *reuse[READ_DIRECT_SIZE];
    629 	errcode_t	retval;
    630 	char		*cp;
    631 	int		i, j;
    632 
    633 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    634 	data = (struct unix_private_data *) channel->private_data;
    635 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    636 
    637 #ifdef NO_IO_CACHE
    638 	return raw_read_blk(channel, data, block, count, buf);
    639 #else
    640 	/*
    641 	 * If we're doing an odd-sized read or a very large read,
    642 	 * flush out the cache and then do a direct read.
    643 	 */
    644 	if (count < 0 || count > WRITE_DIRECT_SIZE) {
    645 		if ((retval = flush_cached_blocks(channel, data, 0)))
    646 			return retval;
    647 		return raw_read_blk(channel, data, block, count, buf);
    648 	}
    649 
    650 	cp = buf;
    651 	while (count > 0) {
    652 		/* If it's in the cache, use it! */
    653 		if ((cache = find_cached_block(data, block, &reuse[0]))) {
    654 #ifdef DEBUG
    655 			printf("Using cached block %lu\n", block);
    656 #endif
    657 			memcpy(cp, cache->buf, channel->block_size);
    658 			count--;
    659 			block++;
    660 			cp += channel->block_size;
    661 			continue;
    662 		}
    663 		if (count == 1) {
    664 			/*
    665 			 * Special case where we read directly into the
    666 			 * cache buffer; important in the O_DIRECT case
    667 			 */
    668 			cache = reuse[0];
    669 			reuse_cache(channel, data, cache, block);
    670 			if ((retval = raw_read_blk(channel, data, block, 1,
    671 						   cache->buf))) {
    672 				cache->in_use = 0;
    673 				return retval;
    674 			}
    675 			memcpy(cp, cache->buf, channel->block_size);
    676 			return 0;
    677 		}
    678 
    679 		/*
    680 		 * Find the number of uncached blocks so we can do a
    681 		 * single read request
    682 		 */
    683 		for (i=1; i < count; i++)
    684 			if (find_cached_block(data, block+i, &reuse[i]))
    685 				break;
    686 #ifdef DEBUG
    687 		printf("Reading %d blocks starting at %lu\n", i, block);
    688 #endif
    689 		if ((retval = raw_read_blk(channel, data, block, i, cp)))
    690 			return retval;
    691 
    692 		/* Save the results in the cache */
    693 		for (j=0; j < i; j++) {
    694 			count--;
    695 			cache = reuse[j];
    696 			reuse_cache(channel, data, cache, block++);
    697 			memcpy(cache->buf, cp, channel->block_size);
    698 			cp += channel->block_size;
    699 		}
    700 	}
    701 	return 0;
    702 #endif /* NO_IO_CACHE */
    703 }
    704 
    705 static errcode_t unix_read_blk(io_channel channel, unsigned long block,
    706 			       int count, void *buf)
    707 {
    708 	return unix_read_blk64(channel, block, count, buf);
    709 }
    710 
    711 static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
    712 				int count, const void *buf)
    713 {
    714 	struct unix_private_data *data;
    715 	struct unix_cache *cache, *reuse;
    716 	errcode_t	retval = 0;
    717 	const char	*cp;
    718 	int		writethrough;
    719 
    720 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    721 	data = (struct unix_private_data *) channel->private_data;
    722 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    723 
    724 #ifdef NO_IO_CACHE
    725 	return raw_write_blk(channel, data, block, count, buf);
    726 #else
    727 	/*
    728 	 * If we're doing an odd-sized write or a very large write,
    729 	 * flush out the cache completely and then do a direct write.
    730 	 */
    731 	if (count < 0 || count > WRITE_DIRECT_SIZE) {
    732 		if ((retval = flush_cached_blocks(channel, data, 1)))
    733 			return retval;
    734 		return raw_write_blk(channel, data, block, count, buf);
    735 	}
    736 
    737 	/*
    738 	 * For a moderate-sized multi-block write, first force a write
    739 	 * if we're in write-through cache mode, and then fill the
    740 	 * cache with the blocks.
    741 	 */
    742 	writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
    743 	if (writethrough)
    744 		retval = raw_write_blk(channel, data, block, count, buf);
    745 
    746 	cp = buf;
    747 	while (count > 0) {
    748 		cache = find_cached_block(data, block, &reuse);
    749 		if (!cache) {
    750 			cache = reuse;
    751 			reuse_cache(channel, data, cache, block);
    752 		}
    753 		memcpy(cache->buf, cp, channel->block_size);
    754 		cache->dirty = !writethrough;
    755 		count--;
    756 		block++;
    757 		cp += channel->block_size;
    758 	}
    759 	return retval;
    760 #endif /* NO_IO_CACHE */
    761 }
    762 
    763 static errcode_t unix_write_blk(io_channel channel, unsigned long block,
    764 				int count, const void *buf)
    765 {
    766 	return unix_write_blk64(channel, block, count, buf);
    767 }
    768 
    769 static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
    770 				 int size, const void *buf)
    771 {
    772 	struct unix_private_data *data;
    773 	errcode_t	retval = 0;
    774 	ssize_t		actual;
    775 
    776 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    777 	data = (struct unix_private_data *) channel->private_data;
    778 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    779 
    780 	if (data->align != 0) {
    781 #ifdef ALIGN_DEBUG
    782 		printf("unix_write_byte: O_DIRECT fallback\n");
    783 #endif
    784 		return EXT2_ET_UNIMPLEMENTED;
    785 	}
    786 
    787 #ifndef NO_IO_CACHE
    788 	/*
    789 	 * Flush out the cache completely
    790 	 */
    791 	if ((retval = flush_cached_blocks(channel, data, 1)))
    792 		return retval;
    793 #endif
    794 
    795 	if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0)
    796 		return errno;
    797 
    798 	actual = write(data->dev, buf, size);
    799 	if (actual != size)
    800 		return EXT2_ET_SHORT_WRITE;
    801 
    802 	return 0;
    803 }
    804 
    805 /*
    806  * Flush data buffers to disk.
    807  */
    808 static errcode_t unix_flush(io_channel channel)
    809 {
    810 	struct unix_private_data *data;
    811 	errcode_t retval = 0;
    812 
    813 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    814 	data = (struct unix_private_data *) channel->private_data;
    815 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    816 
    817 #ifndef NO_IO_CACHE
    818 	retval = flush_cached_blocks(channel, data, 0);
    819 #endif
    820 	fsync(data->dev);
    821 	return retval;
    822 }
    823 
    824 static errcode_t unix_set_option(io_channel channel, const char *option,
    825 				 const char *arg)
    826 {
    827 	struct unix_private_data *data;
    828 	unsigned long long tmp;
    829 	char *end;
    830 
    831 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
    832 	data = (struct unix_private_data *) channel->private_data;
    833 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
    834 
    835 	if (!strcmp(option, "offset")) {
    836 		if (!arg)
    837 			return EXT2_ET_INVALID_ARGUMENT;
    838 
    839 		tmp = strtoull(arg, &end, 0);
    840 		if (*end)
    841 			return EXT2_ET_INVALID_ARGUMENT;
    842 		data->offset = tmp;
    843 		if (data->offset < 0)
    844 			return EXT2_ET_INVALID_ARGUMENT;
    845 		return 0;
    846 	}
    847 	return EXT2_ET_INVALID_ARGUMENT;
    848 }
    849