]> git.ipfire.org Git - thirdparty/e2fsprogs.git/blame - lib/ext2fs/unix_io.c
Update release notes, etc., for 1.43.3 release
[thirdparty/e2fsprogs.git] / lib / ext2fs / unix_io.c
CommitLineData
3839e657 1/*
fff45483 2 * unix_io.c --- This is the Unix (well, really POSIX) implementation
a4613d13 3 * of the I/O manager.
3839e657
TT
4 *
5 * Implements a one-block write-through cache.
6 *
efc6f628 7 * Includes support for Windows NT support under Cygwin.
fff45483 8 *
64e1b274 9 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
a4613d13 10 * 2002 by Theodore Ts'o.
19c78dc0
TT
11 *
12 * %Begin-Header%
543547a5
TT
13 * This file may be redistributed under the terms of the GNU Library
14 * General Public License, version 2.
19c78dc0 15 * %End-Header%
3839e657
TT
16 */
17
ca82d22b 18#if !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
ca209dc6
AD
19#define _XOPEN_SOURCE 600
20#define _DARWIN_C_SOURCE
21#define _FILE_OFFSET_BITS 64
f1644c32 22#ifndef _LARGEFILE_SOURCE
dc5f68ca 23#define _LARGEFILE_SOURCE
f1644c32
TT
24#endif
25#ifndef _LARGEFILE64_SOURCE
dc5f68ca 26#define _LARGEFILE64_SOURCE
f1644c32 27#endif
cf5301d7 28#ifndef _GNU_SOURCE
7f1a1fbf 29#define _GNU_SOURCE
cf5301d7 30#endif
ca82d22b 31#endif
dc5f68ca 32
d1154eb4 33#include "config.h"
3839e657
TT
34#include <stdio.h>
35#include <string.h>
4cbe8af4 36#if HAVE_UNISTD_H
3839e657 37#include <unistd.h>
4cbe8af4 38#endif
c4e749ab
TT
39#if HAVE_ERRNO_H
40#include <errno.h>
41#endif
3839e657
TT
42#include <fcntl.h>
43#include <time.h>
f154d2f6
TT
44#ifdef __linux__
45#include <sys/utsname.h>
46#endif
ca209dc6
AD
47#if HAVE_SYS_TYPES_H
48#include <sys/types.h>
49#endif
7ed7a4b6
ES
50#ifdef HAVE_SYS_IOCTL_H
51#include <sys/ioctl.h>
52#endif
53#ifdef HAVE_SYS_MOUNT_H
54#include <sys/mount.h>
55#endif
1d2ff46a 56#if HAVE_SYS_STAT_H
3839e657 57#include <sys/stat.h>
1d2ff46a 58#endif
fff45483 59#if HAVE_SYS_RESOURCE_H
8880e759 60#include <sys/resource.h>
fff45483 61#endif
d2bfdc7f
LC
62#if HAVE_LINUX_FALLOC_H
63#include <linux/falloc.h>
64#endif
3839e657 65
7f1a1fbf 66#if defined(__linux__) && defined(_IO) && !defined(BLKROGET)
7ed7a4b6
ES
67#define BLKROGET _IO(0x12, 94) /* Get read-only status (0 = read_write). */
68#endif
69
7f1a1fbf
TT
70#undef ALIGN_DEBUG
71
b5abe6fa 72#include "ext2_fs.h"
7b4e4534 73#include "ext2fs.h"
3839e657 74
f3db3566
TT
75/*
76 * For checking structure magic numbers...
77 */
78
79#define EXT2_CHECK_MAGIC(struct, code) \
80 if ((struct)->magic != (code)) return (code)
adfc8c6c
TT
81
82struct unix_cache {
40024fdb
TT
83 char *buf;
84 unsigned long long block;
85 int access_time;
86 unsigned dirty:1;
87 unsigned in_use:1;
adfc8c6c
TT
88};
89
90#define CACHE_SIZE 8
82c4660c
TT
91#define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */
92#define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */
adfc8c6c 93
3839e657 94struct unix_private_data {
f3db3566 95 int magic;
3839e657
TT
96 int dev;
97 int flags;
7f1a1fbf 98 int align;
adfc8c6c 99 int access_time;
2e8ca9a2 100 ext2_loff_t offset;
adfc8c6c 101 struct unix_cache cache[CACHE_SIZE];
7f1a1fbf 102 void *bounce;
6d96b00d 103 struct struct_io_stats io_stats;
3839e657
TT
104};
105
d4e5abfb
AS
106#define IS_ALIGNED(n, align) ((((uintptr_t) n) & \
107 ((uintptr_t) ((align)-1))) == 0)
7f1a1fbf 108
6d96b00d
TT
109static errcode_t unix_get_stats(io_channel channel, io_stats *stats)
110{
a4613d13 111 errcode_t retval = 0;
6d96b00d
TT
112
113 struct unix_private_data *data;
114
115 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
116 data = (struct unix_private_data *) channel->private_data;
117 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
118
119 if (stats)
120 *stats = &data->io_stats;
121
122 return retval;
123}
124
adfc8c6c
TT
125/*
126 * Here are the raw I/O functions
127 */
128static errcode_t raw_read_blk(io_channel channel,
129 struct unix_private_data *data,
59ecd32d 130 unsigned long long block,
d32c915a 131 int count, void *bufv)
adfc8c6c
TT
132{
133 errcode_t retval;
54434927 134 ssize_t size;
adfc8c6c
TT
135 ext2_loff_t location;
136 int actual = 0;
d32c915a 137 unsigned char *buf = bufv;
adfc8c6c
TT
138
139 size = (count < 0) ? -count : count * channel->block_size;
6d96b00d 140 data->io_stats.bytes_read += size;
2e8ca9a2 141 location = ((ext2_loff_t) block * channel->block_size) + data->offset;
baa35446 142
f00948ad 143#ifdef HAVE_PREAD64
baa35446
DW
144 /* Try an aligned pread */
145 if ((channel->align == 0) ||
146 (IS_ALIGNED(buf, channel->align) &&
147 IS_ALIGNED(size, channel->align))) {
f00948ad
TT
148 actual = pread64(data->dev, buf, size, location);
149 if (actual == size)
150 return 0;
151 }
152#elif HAVE_PREAD
153 /* Try an aligned pread */
154 if ((sizeof(off_t) >= sizeof(ext2_loff_t)) &&
155 ((channel->align == 0) ||
156 (IS_ALIGNED(buf, channel->align) &&
157 IS_ALIGNED(size, channel->align)))) {
baa35446
DW
158 actual = pread(data->dev, buf, size, location);
159 if (actual == size)
160 return 0;
161 }
162#endif /* HAVE_PREAD */
163
adfc8c6c
TT
164 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
165 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
166 goto error_out;
167 }
0a05b903
TT
168 if ((channel->align == 0) ||
169 (IS_ALIGNED(buf, channel->align) &&
170 IS_ALIGNED(size, channel->align))) {
7f1a1fbf
TT
171 actual = read(data->dev, buf, size);
172 if (actual != size) {
173 short_read:
174 if (actual < 0)
175 actual = 0;
176 retval = EXT2_ET_SHORT_READ;
177 goto error_out;
178 }
179 return 0;
adfc8c6c 180 }
fff45483 181
7f1a1fbf
TT
182#ifdef ALIGN_DEBUG
183 printf("raw_read_blk: O_DIRECT fallback: %p %lu\n", buf,
184 (unsigned long) size);
fff45483 185#endif
7f1a1fbf
TT
186
187 /*
188 * The buffer or size which we're trying to read isn't aligned
189 * to the O_DIRECT rules, so we need to do this the hard way...
190 */
191 while (size > 0) {
192 actual = read(data->dev, data->bounce, channel->block_size);
193 if (actual != channel->block_size)
fff45483 194 goto short_read;
7f1a1fbf
TT
195 actual = size;
196 if (size > channel->block_size)
197 actual = channel->block_size;
198 memcpy(buf, data->bounce, actual);
199 size -= actual;
200 buf += actual;
fff45483
TT
201 }
202 return 0;
203
fff45483 204error_out:
7f1a1fbf 205 memset((char *) buf+actual, 0, size-actual);
fff45483
TT
206 if (channel->read_error)
207 retval = (channel->read_error)(channel, block, count, buf,
208 size, actual, retval);
209 return retval;
210}
adfc8c6c
TT
211
212static errcode_t raw_write_blk(io_channel channel,
213 struct unix_private_data *data,
59ecd32d 214 unsigned long long block,
d32c915a 215 int count, const void *bufv)
adfc8c6c 216{
54434927 217 ssize_t size;
adfc8c6c
TT
218 ext2_loff_t location;
219 int actual = 0;
220 errcode_t retval;
d32c915a 221 const unsigned char *buf = bufv;
adfc8c6c
TT
222
223 if (count == 1)
224 size = channel->block_size;
225 else {
226 if (count < 0)
227 size = -count;
228 else
229 size = count * channel->block_size;
230 }
6d96b00d 231 data->io_stats.bytes_written += size;
adfc8c6c 232
2e8ca9a2 233 location = ((ext2_loff_t) block * channel->block_size) + data->offset;
baa35446 234
f00948ad 235#ifdef HAVE_PWRITE64
baa35446
DW
236 /* Try an aligned pwrite */
237 if ((channel->align == 0) ||
238 (IS_ALIGNED(buf, channel->align) &&
239 IS_ALIGNED(size, channel->align))) {
f00948ad
TT
240 actual = pwrite64(data->dev, buf, size, location);
241 if (actual == size)
242 return 0;
243 }
244#elif HAVE_PWRITE
245 /* Try an aligned pwrite */
246 if ((sizeof(off_t) >= sizeof(ext2_loff_t)) &&
247 ((channel->align == 0) ||
248 (IS_ALIGNED(buf, channel->align) &&
249 IS_ALIGNED(size, channel->align)))) {
baa35446
DW
250 actual = pwrite(data->dev, buf, size, location);
251 if (actual == size)
252 return 0;
253 }
254#endif /* HAVE_PWRITE */
255
adfc8c6c
TT
256 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
257 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
258 goto error_out;
259 }
efc6f628 260
0a05b903
TT
261 if ((channel->align == 0) ||
262 (IS_ALIGNED(buf, channel->align) &&
263 IS_ALIGNED(size, channel->align))) {
7f1a1fbf
TT
264 actual = write(data->dev, buf, size);
265 if (actual != size) {
266 short_write:
267 retval = EXT2_ET_SHORT_WRITE;
268 goto error_out;
269 }
270 return 0;
271 }
272
273#ifdef ALIGN_DEBUG
274 printf("raw_write_blk: O_DIRECT fallback: %p %lu\n", buf,
275 (unsigned long) size);
276#endif
277 /*
278 * The buffer or size which we're trying to write isn't aligned
279 * to the O_DIRECT rules, so we need to do this the hard way...
280 */
281 while (size > 0) {
282 if (size < channel->block_size) {
283 actual = read(data->dev, data->bounce,
284 channel->block_size);
285 if (actual != channel->block_size) {
286 retval = EXT2_ET_SHORT_READ;
287 goto error_out;
288 }
289 }
290 actual = size;
291 if (size > channel->block_size)
292 actual = channel->block_size;
293 memcpy(data->bounce, buf, actual);
127e2291
TT
294 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
295 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
296 goto error_out;
297 }
7f1a1fbf
TT
298 actual = write(data->dev, data->bounce, channel->block_size);
299 if (actual != channel->block_size)
300 goto short_write;
301 size -= actual;
302 buf += actual;
adfc8c6c
TT
303 }
304 return 0;
efc6f628 305
adfc8c6c
TT
306error_out:
307 if (channel->write_error)
308 retval = (channel->write_error)(channel, block, count, buf,
309 size, actual, retval);
310 return retval;
311}
312
313
314/*
315 * Here we implement the cache functions
316 */
317
318/* Allocate the cache buffers */
319static errcode_t alloc_cache(io_channel channel,
320 struct unix_private_data *data)
321{
322 errcode_t retval;
323 struct unix_cache *cache;
324 int i;
efc6f628 325
adfc8c6c
TT
326 data->access_time = 0;
327 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
328 cache->block = 0;
329 cache->access_time = 0;
330 cache->dirty = 0;
331 cache->in_use = 0;
faafdb76
TT
332 if (cache->buf)
333 ext2fs_free_mem(&cache->buf);
fd1c5a06 334 retval = io_channel_alloc_buf(channel, 0, &cache->buf);
7f1a1fbf 335 if (retval)
adfc8c6c
TT
336 return retval;
337 }
0a05b903 338 if (channel->align) {
7f1a1fbf
TT
339 if (data->bounce)
340 ext2fs_free_mem(&data->bounce);
fd1c5a06 341 retval = io_channel_alloc_buf(channel, 0, &data->bounce);
7f1a1fbf
TT
342 }
343 return retval;
adfc8c6c
TT
344}
345
346/* Free the cache buffers */
54434927 347static void free_cache(struct unix_private_data *data)
adfc8c6c
TT
348{
349 struct unix_cache *cache;
350 int i;
efc6f628 351
adfc8c6c
TT
352 data->access_time = 0;
353 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
354 cache->block = 0;
355 cache->access_time = 0;
356 cache->dirty = 0;
357 cache->in_use = 0;
358 if (cache->buf)
c4e3d3f3 359 ext2fs_free_mem(&cache->buf);
adfc8c6c 360 }
7f1a1fbf
TT
361 if (data->bounce)
362 ext2fs_free_mem(&data->bounce);
adfc8c6c
TT
363}
364
b8a95315 365#ifndef NO_IO_CACHE
adfc8c6c 366/*
82c4660c
TT
367 * Try to find a block in the cache. If the block is not found, and
368 * eldest is a non-zero pointer, then fill in eldest with the cache
369 * entry to that should be reused.
adfc8c6c 370 */
54434927 371static struct unix_cache *find_cached_block(struct unix_private_data *data,
59ecd32d 372 unsigned long long block,
82c4660c 373 struct unix_cache **eldest)
adfc8c6c 374{
31dbecd4 375 struct unix_cache *cache, *unused_cache, *oldest_cache;
adfc8c6c 376 int i;
efc6f628 377
31dbecd4 378 unused_cache = oldest_cache = 0;
adfc8c6c
TT
379 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
380 if (!cache->in_use) {
82c4660c
TT
381 if (!unused_cache)
382 unused_cache = cache;
adfc8c6c
TT
383 continue;
384 }
385 if (cache->block == block) {
386 cache->access_time = ++data->access_time;
387 return cache;
388 }
389 if (!oldest_cache ||
390 (cache->access_time < oldest_cache->access_time))
391 oldest_cache = cache;
392 }
82c4660c
TT
393 if (eldest)
394 *eldest = (unused_cache) ? unused_cache : oldest_cache;
395 return 0;
396}
397
398/*
399 * Reuse a particular cache entry for another block.
400 */
23b7c8b8 401static void reuse_cache(io_channel channel, struct unix_private_data *data,
59ecd32d 402 struct unix_cache *cache, unsigned long long block)
82c4660c
TT
403{
404 if (cache->dirty && cache->in_use)
405 raw_write_blk(channel, data, cache->block, 1, cache->buf);
406
adfc8c6c 407 cache->in_use = 1;
1d47dfb9 408 cache->dirty = 0;
adfc8c6c
TT
409 cache->block = block;
410 cache->access_time = ++data->access_time;
adfc8c6c
TT
411}
412
413/*
414 * Flush all of the blocks in the cache
415 */
416static errcode_t flush_cached_blocks(io_channel channel,
417 struct unix_private_data *data,
418 int invalidate)
419
420{
421 struct unix_cache *cache;
422 errcode_t retval, retval2;
423 int i;
efc6f628 424
adfc8c6c
TT
425 retval2 = 0;
426 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
427 if (!cache->in_use)
428 continue;
efc6f628 429
adfc8c6c
TT
430 if (invalidate)
431 cache->in_use = 0;
efc6f628 432
adfc8c6c
TT
433 if (!cache->dirty)
434 continue;
efc6f628 435
adfc8c6c
TT
436 retval = raw_write_blk(channel, data,
437 cache->block, 1, cache->buf);
438 if (retval)
439 retval2 = retval;
440 else
441 cache->dirty = 0;
442 }
443 return retval2;
444}
b8a95315 445#endif /* NO_IO_CACHE */
adfc8c6c 446
d866599a
LC
447#ifdef __linux__
448#ifndef BLKDISCARDZEROES
449#define BLKDISCARDZEROES _IO(0x12,124)
450#endif
451#endif
452
182acd17
AD
453int ext2fs_open_file(const char *pathname, int flags, mode_t mode)
454{
455 if (mode)
456#if defined(HAVE_OPEN64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
457 return open64(pathname, flags, mode);
458 else
459 return open64(pathname, flags);
460#else
461 return open(pathname, flags, mode);
462 else
463 return open(pathname, flags);
464#endif
465}
466
467int ext2fs_stat(const char *path, ext2fs_struct_stat *buf)
468{
469#if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
470 return stat64(path, buf);
471#else
472 return stat(path, buf);
473#endif
474}
475
476int ext2fs_fstat(int fd, ext2fs_struct_stat *buf)
477{
478#if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
479 return fstat64(fd, buf);
480#else
481 return fstat(fd, buf);
482#endif
483}
484
4ccf9e4f
AS
485
486static errcode_t unix_open_channel(const char *name, int fd,
487 int flags, io_channel *channel,
488 io_manager io_mgr)
3839e657
TT
489{
490 io_channel io = NULL;
491 struct unix_private_data *data = NULL;
492 errcode_t retval;
c859cb1d 493 ext2fs_struct_stat st;
f154d2f6 494#ifdef __linux__
a4613d13 495 struct utsname ut;
f154d2f6 496#endif
3839e657 497
c4e3d3f3 498 retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
7b4e4534 499 if (retval)
624e8ebe 500 goto cleanup;
f3db3566
TT
501 memset(io, 0, sizeof(struct struct_io_channel));
502 io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
c4e3d3f3 503 retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data);
7b4e4534 504 if (retval)
3839e657 505 goto cleanup;
7b4e4534 506
4ccf9e4f 507 io->manager = io_mgr;
c4e3d3f3 508 retval = ext2fs_get_mem(strlen(name)+1, &io->name);
7b4e4534 509 if (retval)
3839e657 510 goto cleanup;
7b4e4534 511
3839e657
TT
512 strcpy(io->name, name);
513 io->private_data = data;
f3db3566
TT
514 io->block_size = 1024;
515 io->read_error = 0;
516 io->write_error = 0;
a29f4d30 517 io->refcount = 1;
3839e657
TT
518
519 memset(data, 0, sizeof(struct unix_private_data));
f3db3566 520 data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
6d96b00d 521 data->io_stats.num_fields = 2;
4ccf9e4f
AS
522 data->flags = flags;
523 data->dev = fd;
7b4e4534 524
d9a5d375 525#if defined(O_DIRECT)
4ccf9e4f 526 if (flags & IO_FLAG_DIRECT_IO)
dd0a2679 527 io->align = ext2fs_get_dio_alignment(data->dev);
d9a5d375 528#elif defined(F_NOCACHE)
4ccf9e4f 529 if (flags & IO_FLAG_DIRECT_IO)
dd0a2679 530 io->align = 4096;
534a4c3d 531#endif
64e1b274 532
d2bfdc7f
LC
533 /*
534 * If the device is really a block device, then set the
535 * appropriate flag, otherwise we can set DISCARD_ZEROES flag
536 * because we are going to use punch hole instead of discard
537 * and if it succeed, subsequent read from sparse area returns
538 * zero.
539 */
4ccf9e4f 540 if (ext2fs_fstat(data->dev, &st) == 0) {
d2bfdc7f
LC
541 if (S_ISBLK(st.st_mode))
542 io->flags |= CHANNEL_FLAGS_BLOCK_DEVICE;
543 else
544 io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES;
545 }
546
d866599a 547#ifdef BLKDISCARDZEROES
1d6fd6d0
AD
548 {
549 int zeroes = 0;
550 if (ioctl(data->dev, BLKDISCARDZEROES, &zeroes) == 0 &&
551 zeroes)
552 io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES;
553 }
d866599a
LC
554#endif
555
7f1a1fbf
TT
556#if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
557 /*
558 * Some operating systems require that the buffers be aligned,
559 * regardless of O_DIRECT
560 */
dd0a2679
TT
561 if (!io->align)
562 io->align = 512;
7f1a1fbf
TT
563#endif
564
565
566 if ((retval = alloc_cache(io, data)))
567 goto cleanup;
568
7ed7a4b6
ES
569#ifdef BLKROGET
570 if (flags & IO_FLAG_RW) {
571 int error;
572 int readonly = 0;
573
574 /* Is the block device actually writable? */
575 error = ioctl(data->dev, BLKROGET, &readonly);
576 if (!error && readonly) {
7ed7a4b6
ES
577 retval = EPERM;
578 goto cleanup;
579 }
580 }
581#endif
582
64e1b274
TT
583#ifdef __linux__
584#undef RLIM_INFINITY
585#if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4)))
586#define RLIM_INFINITY ((unsigned long)(~0UL>>1))
587#else
588#define RLIM_INFINITY (~0UL)
589#endif
8880e759 590 /*
f154d2f6
TT
591 * Work around a bug in 2.4.10-2.4.18 kernels where writes to
592 * block devices are wrongly getting hit by the filesize
593 * limit. This workaround isn't perfect, since it won't work
594 * if glibc wasn't built against 2.2 header files. (Sigh.)
efc6f628 595 *
8880e759 596 */
f154d2f6
TT
597 if ((flags & IO_FLAG_RW) &&
598 (uname(&ut) == 0) &&
599 ((ut.release[0] == '2') && (ut.release[1] == '.') &&
600 (ut.release[2] == '4') && (ut.release[3] == '.') &&
601 (ut.release[4] == '1') && (ut.release[5] >= '0') &&
602 (ut.release[5] < '8')) &&
4ccf9e4f 603 (ext2fs_fstat(data->dev, &st) == 0) &&
8880e759
TT
604 (S_ISBLK(st.st_mode))) {
605 struct rlimit rlim;
efc6f628 606
64e1b274 607 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY;
8880e759
TT
608 setrlimit(RLIMIT_FSIZE, &rlim);
609 getrlimit(RLIMIT_FSIZE, &rlim);
bd27880b
TT
610 if (((unsigned long) rlim.rlim_cur) <
611 ((unsigned long) rlim.rlim_max)) {
8880e759
TT
612 rlim.rlim_cur = rlim.rlim_max;
613 setrlimit(RLIMIT_FSIZE, &rlim);
614 }
615 }
64e1b274 616#endif
3839e657
TT
617 *channel = io;
618 return 0;
619
620cleanup:
3839e657 621 if (data) {
4e0bb5eb
TT
622 if (data->dev >= 0)
623 close(data->dev);
54434927 624 free_cache(data);
c4e3d3f3 625 ext2fs_free_mem(&data);
3839e657 626 }
4e0bb5eb
TT
627 if (io) {
628 if (io->name) {
629 ext2fs_free_mem(&io->name);
630 }
c4e3d3f3 631 ext2fs_free_mem(&io);
4e0bb5eb 632 }
3839e657
TT
633 return retval;
634}
635
4ccf9e4f
AS
636static errcode_t unixfd_open(const char *str_fd, int flags,
637 io_channel *channel)
638{
639 int fd;
640 int fd_flags;
641
642 fd = atoi(str_fd);
643 fd_flags = fcntl(fd, F_GETFD);
644 if (fd_flags == -1)
645 return -EBADF;
646
647 flags = 0;
648 if (fd_flags & O_RDWR)
649 flags |= IO_FLAG_RW;
650 if (fd_flags & O_EXCL)
651 flags |= IO_FLAG_EXCLUSIVE;
652#if defined(O_DIRECT)
653 if (fd_flags & O_DIRECT)
654 flags |= IO_FLAG_DIRECT_IO;
655#endif
656
657 return unix_open_channel(str_fd, fd, flags, channel, unixfd_io_manager);
658}
659
660static errcode_t unix_open(const char *name, int flags,
661 io_channel *channel)
662{
663 int fd = -1;
664 int open_flags;
665
666 if (name == 0)
667 return EXT2_ET_BAD_DEVICE_NAME;
668
669 open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
670 if (flags & IO_FLAG_EXCLUSIVE)
671 open_flags |= O_EXCL;
672#if defined(O_DIRECT)
673 if (flags & IO_FLAG_DIRECT_IO)
674 open_flags |= O_DIRECT;
675#endif
676 fd = ext2fs_open_file(name, open_flags, 0);
677 if (fd < 0)
678 return errno;
679#if defined(F_NOCACHE) && !defined(IO_DIRECT)
680 if (flags & IO_FLAG_DIRECT_IO) {
681 if (fcntl(fd, F_NOCACHE, 1) < 0)
682 return errno;
683 }
684#endif
685 return unix_open_channel(name, fd, flags, channel, unix_io_manager);
686}
687
3839e657
TT
688static errcode_t unix_close(io_channel channel)
689{
690 struct unix_private_data *data;
691 errcode_t retval = 0;
692
f3db3566 693 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
3839e657 694 data = (struct unix_private_data *) channel->private_data;
f3db3566 695 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
a29f4d30
TT
696
697 if (--channel->refcount > 0)
698 return 0;
adfc8c6c 699
b8a95315 700#ifndef NO_IO_CACHE
adfc8c6c 701 retval = flush_cached_blocks(channel, data, 0);
b8a95315 702#endif
adfc8c6c 703
3839e657
TT
704 if (close(data->dev) < 0)
705 retval = errno;
54434927 706 free_cache(data);
f12e285f 707
c4e3d3f3 708 ext2fs_free_mem(&channel->private_data);
3839e657 709 if (channel->name)
c4e3d3f3
TT
710 ext2fs_free_mem(&channel->name);
711 ext2fs_free_mem(&channel);
3839e657
TT
712 return retval;
713}
714
715static errcode_t unix_set_blksize(io_channel channel, int blksize)
716{
717 struct unix_private_data *data;
7b4e4534 718 errcode_t retval;
3839e657 719
f3db3566 720 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
3839e657 721 data = (struct unix_private_data *) channel->private_data;
f3db3566
TT
722 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
723
3839e657 724 if (channel->block_size != blksize) {
b8a95315 725#ifndef NO_IO_CACHE
adfc8c6c
TT
726 if ((retval = flush_cached_blocks(channel, data, 0)))
727 return retval;
b8a95315 728#endif
efc6f628 729
3839e657 730 channel->block_size = blksize;
54434927 731 free_cache(data);
adfc8c6c 732 if ((retval = alloc_cache(channel, data)))
7b4e4534 733 return retval;
3839e657
TT
734 }
735 return 0;
736}
737
59ecd32d 738static errcode_t unix_read_blk64(io_channel channel, unsigned long long block,
3839e657
TT
739 int count, void *buf)
740{
741 struct unix_private_data *data;
82c4660c 742 struct unix_cache *cache, *reuse[READ_DIRECT_SIZE];
3839e657 743 errcode_t retval;
31dbecd4 744 char *cp;
adfc8c6c 745 int i, j;
3839e657 746
f3db3566 747 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
3839e657 748 data = (struct unix_private_data *) channel->private_data;
f3db3566 749 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
3839e657 750
b8a95315
TT
751#ifdef NO_IO_CACHE
752 return raw_read_blk(channel, data, block, count, buf);
753#else
3839e657 754 /*
82c4660c
TT
755 * If we're doing an odd-sized read or a very large read,
756 * flush out the cache and then do a direct read.
3839e657 757 */
82c4660c 758 if (count < 0 || count > WRITE_DIRECT_SIZE) {
adfc8c6c
TT
759 if ((retval = flush_cached_blocks(channel, data, 0)))
760 return retval;
761 return raw_read_blk(channel, data, block, count, buf);
3839e657 762 }
adfc8c6c 763
31dbecd4 764 cp = buf;
adfc8c6c
TT
765 while (count > 0) {
766 /* If it's in the cache, use it! */
54434927 767 if ((cache = find_cached_block(data, block, &reuse[0]))) {
adfc8c6c 768#ifdef DEBUG
d0ff90d5 769 printf("Using cached block %lu\n", block);
f3db3566 770#endif
31dbecd4 771 memcpy(cp, cache->buf, channel->block_size);
adfc8c6c
TT
772 count--;
773 block++;
31dbecd4 774 cp += channel->block_size;
adfc8c6c
TT
775 continue;
776 }
7f1a1fbf
TT
777 if (count == 1) {
778 /*
779 * Special case where we read directly into the
780 * cache buffer; important in the O_DIRECT case
781 */
782 cache = reuse[0];
783 reuse_cache(channel, data, cache, block);
784 if ((retval = raw_read_blk(channel, data, block, 1,
785 cache->buf))) {
786 cache->in_use = 0;
787 return retval;
788 }
789 memcpy(cp, cache->buf, channel->block_size);
790 return 0;
791 }
792
adfc8c6c
TT
793 /*
794 * Find the number of uncached blocks so we can do a
795 * single read request
796 */
797 for (i=1; i < count; i++)
54434927 798 if (find_cached_block(data, block+i, &reuse[i]))
adfc8c6c
TT
799 break;
800#ifdef DEBUG
d0ff90d5 801 printf("Reading %d blocks starting at %lu\n", i, block);
adfc8c6c 802#endif
31dbecd4 803 if ((retval = raw_read_blk(channel, data, block, i, cp)))
adfc8c6c 804 return retval;
efc6f628 805
adfc8c6c
TT
806 /* Save the results in the cache */
807 for (j=0; j < i; j++) {
808 count--;
82c4660c
TT
809 cache = reuse[j];
810 reuse_cache(channel, data, cache, block++);
811 memcpy(cache->buf, cp, channel->block_size);
31dbecd4 812 cp += channel->block_size;
adfc8c6c 813 }
3839e657
TT
814 }
815 return 0;
b8a95315 816#endif /* NO_IO_CACHE */
3839e657
TT
817}
818
59ecd32d
JS
819static errcode_t unix_read_blk(io_channel channel, unsigned long block,
820 int count, void *buf)
821{
822 return unix_read_blk64(channel, block, count, buf);
823}
824
825static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
3839e657
TT
826 int count, const void *buf)
827{
828 struct unix_private_data *data;
82c4660c 829 struct unix_cache *cache, *reuse;
23b7c8b8 830 errcode_t retval = 0;
31dbecd4
TT
831 const char *cp;
832 int writethrough;
3839e657 833
f3db3566 834 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
3839e657 835 data = (struct unix_private_data *) channel->private_data;
f3db3566 836 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
3839e657 837
b8a95315
TT
838#ifdef NO_IO_CACHE
839 return raw_write_blk(channel, data, block, count, buf);
efc6f628 840#else
adfc8c6c
TT
841 /*
842 * If we're doing an odd-sized write or a very large write,
843 * flush out the cache completely and then do a direct write.
844 */
82c4660c 845 if (count < 0 || count > WRITE_DIRECT_SIZE) {
adfc8c6c
TT
846 if ((retval = flush_cached_blocks(channel, data, 1)))
847 return retval;
848 return raw_write_blk(channel, data, block, count, buf);
3839e657
TT
849 }
850
adfc8c6c
TT
851 /*
852 * For a moderate-sized multi-block write, first force a write
853 * if we're in write-through cache mode, and then fill the
854 * cache with the blocks.
855 */
856 writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
857 if (writethrough)
858 retval = raw_write_blk(channel, data, block, count, buf);
efc6f628 859
31dbecd4 860 cp = buf;
adfc8c6c 861 while (count > 0) {
54434927 862 cache = find_cached_block(data, block, &reuse);
adfc8c6c 863 if (!cache) {
82c4660c
TT
864 cache = reuse;
865 reuse_cache(channel, data, cache, block);
adfc8c6c 866 }
8d5324c4
DW
867 if (cache->buf != cp)
868 memcpy(cache->buf, cp, channel->block_size);
82c4660c 869 cache->dirty = !writethrough;
adfc8c6c
TT
870 count--;
871 block++;
31dbecd4 872 cp += channel->block_size;
adfc8c6c 873 }
3839e657 874 return retval;
b8a95315 875#endif /* NO_IO_CACHE */
3839e657
TT
876}
877
ca209dc6
AD
878static errcode_t unix_cache_readahead(io_channel channel,
879 unsigned long long block,
880 unsigned long long count)
881{
882#ifdef POSIX_FADV_WILLNEED
883 struct unix_private_data *data;
884
885 data = (struct unix_private_data *)channel->private_data;
a7ac9c22 886 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
ca209dc6 887 return posix_fadvise(data->dev,
a7ac9c22 888 (ext2_loff_t)block * channel->block_size + data->offset,
ca209dc6
AD
889 (ext2_loff_t)count * channel->block_size,
890 POSIX_FADV_WILLNEED);
891#else
892 return EXT2_ET_OP_NOT_SUPPORTED;
893#endif
894}
895
59ecd32d
JS
896static errcode_t unix_write_blk(io_channel channel, unsigned long block,
897 int count, const void *buf)
898{
899 return unix_write_blk64(channel, block, count, buf);
900}
901
c180ac86
TT
902static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
903 int size, const void *buf)
904{
905 struct unix_private_data *data;
31dbecd4 906 errcode_t retval = 0;
54434927 907 ssize_t actual;
c180ac86
TT
908
909 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
910 data = (struct unix_private_data *) channel->private_data;
911 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
912
0a05b903 913 if (channel->align != 0) {
7f1a1fbf
TT
914#ifdef ALIGN_DEBUG
915 printf("unix_write_byte: O_DIRECT fallback\n");
916#endif
917 return EXT2_ET_UNIMPLEMENTED;
918 }
919
b8a95315 920#ifndef NO_IO_CACHE
c180ac86
TT
921 /*
922 * Flush out the cache completely
923 */
924 if ((retval = flush_cached_blocks(channel, data, 1)))
925 return retval;
b8a95315 926#endif
c180ac86 927
2e8ca9a2 928 if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0)
c180ac86 929 return errno;
efc6f628 930
c180ac86
TT
931 actual = write(data->dev, buf, size);
932 if (actual != size)
933 return EXT2_ET_SHORT_WRITE;
934
935 return 0;
936}
937
3839e657 938/*
efc6f628 939 * Flush data buffers to disk.
3839e657
TT
940 */
941static errcode_t unix_flush(io_channel channel)
942{
f3db3566 943 struct unix_private_data *data;
adfc8c6c 944 errcode_t retval = 0;
efc6f628 945
f3db3566
TT
946 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
947 data = (struct unix_private_data *) channel->private_data;
948 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
adfc8c6c 949
b8a95315 950#ifndef NO_IO_CACHE
adfc8c6c 951 retval = flush_cached_blocks(channel, data, 0);
b8a95315 952#endif
36f21439 953 fsync(data->dev);
adfc8c6c 954 return retval;
3839e657
TT
955}
956
efc6f628 957static errcode_t unix_set_option(io_channel channel, const char *option,
2e8ca9a2
TT
958 const char *arg)
959{
960 struct unix_private_data *data;
2aee23f3 961 unsigned long long tmp;
2e8ca9a2
TT
962 char *end;
963
964 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
965 data = (struct unix_private_data *) channel->private_data;
966 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
967
968 if (!strcmp(option, "offset")) {
969 if (!arg)
970 return EXT2_ET_INVALID_ARGUMENT;
971
2aee23f3 972 tmp = strtoull(arg, &end, 0);
2e8ca9a2
TT
973 if (*end)
974 return EXT2_ET_INVALID_ARGUMENT;
975 data->offset = tmp;
2aee23f3
TT
976 if (data->offset < 0)
977 return EXT2_ET_INVALID_ARGUMENT;
2e8ca9a2
TT
978 return 0;
979 }
980 return EXT2_ET_INVALID_ARGUMENT;
981}
e90a59ed
LC
982
983#if defined(__linux__) && !defined(BLKDISCARD)
d2bfdc7f 984#define BLKDISCARD _IO(0x12,119)
e90a59ed
LC
985#endif
986
987static errcode_t unix_discard(io_channel channel, unsigned long long block,
988 unsigned long long count)
989{
e90a59ed 990 struct unix_private_data *data;
e90a59ed
LC
991 int ret;
992
993 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
994 data = (struct unix_private_data *) channel->private_data;
995 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
996
d2bfdc7f
LC
997 if (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE) {
998#ifdef BLKDISCARD
795c02de 999 __u64 range[2];
1d6fd6d0 1000
a7ac9c22 1001 range[0] = (__u64)(block) * channel->block_size + data->offset;
795c02de 1002 range[1] = (__u64)(count) * channel->block_size;
e90a59ed 1003
d2bfdc7f
LC
1004 ret = ioctl(data->dev, BLKDISCARD, &range);
1005#else
1006 goto unimplemented;
1007#endif
1008 } else {
800766ee 1009#if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE)
d2bfdc7f
LC
1010 /*
1011 * If we are not on block device, try to use punch hole
1012 * to reclaim free space.
1013 */
1014 ret = fallocate(data->dev,
1015 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
a7ac9c22 1016 (off_t)(block) * channel->block_size + data->offset,
d2bfdc7f
LC
1017 (off_t)(count) * channel->block_size);
1018#else
1019 goto unimplemented;
1020#endif
1021 }
1022 if (ret < 0) {
1023 if (errno == EOPNOTSUPP)
1024 goto unimplemented;
e90a59ed 1025 return errno;
d2bfdc7f 1026 }
e90a59ed 1027 return 0;
d2bfdc7f 1028unimplemented:
e90a59ed 1029 return EXT2_ET_UNIMPLEMENTED;
e90a59ed 1030}
a4613d13 1031
25f291c9
TT
1032/* parameters might not be used if OS doesn't support zeroout */
1033#pragma GCC diagnostic push
1034#pragma GCC diagnostic ignored "-Wunused-parameter"
3d28f545
DW
1035static errcode_t unix_zeroout(io_channel channel, unsigned long long block,
1036 unsigned long long count)
1037{
1038 struct unix_private_data *data;
1039 int ret;
1040
1041 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1042 data = (struct unix_private_data *) channel->private_data;
1043 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1044
1045 if (getenv("UNIX_IO_NOZEROOUT"))
1046 goto unimplemented;
1047
1048 if (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE) {
1049 /* Not implemented until the BLKZEROOUT mess is fixed */
1050 goto unimplemented;
1051 } else {
1052 /* Regular file, try to use truncate/punch/zero. */
3d28f545
DW
1053 struct stat statbuf;
1054
1055 if (count == 0)
1056 return 0;
1057 /*
1058 * If we're trying to zero a range past the end of the file,
cf491d3a 1059 * extend the file size, then truncate everything.
3d28f545
DW
1060 */
1061 ret = fstat(data->dev, &statbuf);
1062 if (ret)
1063 goto err;
a7ac9c22
MH
1064 if ((unsigned long long) statbuf.st_size <
1065 (block + count) * channel->block_size + data->offset) {
3d28f545 1066 ret = ftruncate(data->dev,
a7ac9c22 1067 (block + count) * channel->block_size + data->offset);
3d28f545
DW
1068 if (ret)
1069 goto err;
1070 }
cf491d3a
TT
1071#if defined(HAVE_FALLOCATE) && (defined(FALLOC_FL_ZERO_RANGE) || \
1072 (defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)))
3d28f545
DW
1073#if defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
1074 ret = fallocate(data->dev,
1075 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
a7ac9c22 1076 (off_t)(block) * channel->block_size + data->offset,
3d28f545
DW
1077 (off_t)(count) * channel->block_size);
1078 if (ret == 0)
1079 goto err;
1080#endif
1081#ifdef FALLOC_FL_ZERO_RANGE
1082 ret = fallocate(data->dev,
1083 FALLOC_FL_ZERO_RANGE,
a7ac9c22 1084 (off_t)(block) * channel->block_size + data->offset,
3d28f545
DW
1085 (off_t)(count) * channel->block_size);
1086#endif
1087#else
1088 goto unimplemented;
1089#endif /* HAVE_FALLOCATE && (ZERO_RANGE || (PUNCH_HOLE && KEEP_SIZE)) */
1090 }
1091err:
1092 if (ret < 0) {
1093 if (errno == EOPNOTSUPP)
1094 goto unimplemented;
1095 return errno;
1096 }
1097 return 0;
1098unimplemented:
1099 return EXT2_ET_UNIMPLEMENTED;
1100}
25f291c9 1101#pragma GCC diagnostic pop
3d28f545 1102
a4613d13 1103static struct struct_io_manager struct_unix_manager = {
d4ecec45
TT
1104 .magic = EXT2_ET_MAGIC_IO_MANAGER,
1105 .name = "Unix I/O Manager",
1106 .open = unix_open,
1107 .close = unix_close,
1108 .set_blksize = unix_set_blksize,
1109 .read_blk = unix_read_blk,
1110 .write_blk = unix_write_blk,
1111 .flush = unix_flush,
1112 .write_byte = unix_write_byte,
1113 .set_option = unix_set_option,
1114 .get_stats = unix_get_stats,
1115 .read_blk64 = unix_read_blk64,
1116 .write_blk64 = unix_write_blk64,
1117 .discard = unix_discard,
ca209dc6 1118 .cache_readahead = unix_cache_readahead,
3d28f545 1119 .zeroout = unix_zeroout,
a4613d13
AD
1120};
1121
1122io_manager unix_io_manager = &struct_unix_manager;
4ccf9e4f
AS
1123
1124static struct struct_io_manager struct_unixfd_manager = {
1125 .magic = EXT2_ET_MAGIC_IO_MANAGER,
1126 .name = "Unix fd I/O Manager",
1127 .open = unixfd_open,
1128 .close = unix_close,
1129 .set_blksize = unix_set_blksize,
1130 .read_blk = unix_read_blk,
1131 .write_blk = unix_write_blk,
1132 .flush = unix_flush,
1133 .write_byte = unix_write_byte,
1134 .set_option = unix_set_option,
1135 .get_stats = unix_get_stats,
1136 .read_blk64 = unix_read_blk64,
1137 .write_blk64 = unix_write_blk64,
1138 .discard = unix_discard,
1139 .cache_readahead = unix_cache_readahead,
1140 .zeroout = unix_zeroout,
1141};
1142
1143io_manager unixfd_io_manager = &struct_unixfd_manager;