]> git.ipfire.org Git - thirdparty/e2fsprogs.git/blame - lib/ext2fs/unix_io.c
Add support for a password salt stored in the superblock
[thirdparty/e2fsprogs.git] / lib / ext2fs / unix_io.c
CommitLineData
3839e657 1/*
fff45483 2 * unix_io.c --- This is the Unix (well, really POSIX) implementation
a4613d13 3 * of the I/O manager.
3839e657
TT
4 *
5 * Implements a one-block write-through cache.
6 *
efc6f628 7 * Includes support for Windows NT support under Cygwin.
fff45483 8 *
64e1b274 9 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
a4613d13 10 * 2002 by Theodore Ts'o.
19c78dc0
TT
11 *
12 * %Begin-Header%
543547a5
TT
13 * This file may be redistributed under the terms of the GNU Library
14 * General Public License, version 2.
19c78dc0 15 * %End-Header%
3839e657
TT
16 */
17
ca209dc6
AD
18#define _XOPEN_SOURCE 600
19#define _DARWIN_C_SOURCE
20#define _FILE_OFFSET_BITS 64
dc5f68ca
TT
21#define _LARGEFILE_SOURCE
22#define _LARGEFILE64_SOURCE
cf5301d7 23#ifndef _GNU_SOURCE
7f1a1fbf 24#define _GNU_SOURCE
cf5301d7 25#endif
dc5f68ca 26
d1154eb4 27#include "config.h"
3839e657
TT
28#include <stdio.h>
29#include <string.h>
4cbe8af4 30#if HAVE_UNISTD_H
3839e657 31#include <unistd.h>
4cbe8af4 32#endif
c4e749ab
TT
33#if HAVE_ERRNO_H
34#include <errno.h>
35#endif
3839e657
TT
36#include <fcntl.h>
37#include <time.h>
f154d2f6
TT
38#ifdef __linux__
39#include <sys/utsname.h>
40#endif
ca209dc6
AD
41#if HAVE_SYS_TYPES_H
42#include <sys/types.h>
43#endif
7ed7a4b6
ES
44#ifdef HAVE_SYS_IOCTL_H
45#include <sys/ioctl.h>
46#endif
47#ifdef HAVE_SYS_MOUNT_H
48#include <sys/mount.h>
49#endif
1d2ff46a 50#if HAVE_SYS_STAT_H
3839e657 51#include <sys/stat.h>
1d2ff46a 52#endif
fff45483 53#if HAVE_SYS_RESOURCE_H
8880e759 54#include <sys/resource.h>
fff45483 55#endif
d2bfdc7f
LC
56#if HAVE_LINUX_FALLOC_H
57#include <linux/falloc.h>
58#endif
3839e657 59
7f1a1fbf 60#if defined(__linux__) && defined(_IO) && !defined(BLKROGET)
7ed7a4b6
ES
61#define BLKROGET _IO(0x12, 94) /* Get read-only status (0 = read_write). */
62#endif
63
7f1a1fbf
TT
64#undef ALIGN_DEBUG
65
b5abe6fa 66#include "ext2_fs.h"
7b4e4534 67#include "ext2fs.h"
3839e657 68
f3db3566
TT
69/*
70 * For checking structure magic numbers...
71 */
72
73#define EXT2_CHECK_MAGIC(struct, code) \
74 if ((struct)->magic != (code)) return (code)
adfc8c6c
TT
75
76struct unix_cache {
40024fdb
TT
77 char *buf;
78 unsigned long long block;
79 int access_time;
80 unsigned dirty:1;
81 unsigned in_use:1;
adfc8c6c
TT
82};
83
84#define CACHE_SIZE 8
82c4660c
TT
85#define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */
86#define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */
adfc8c6c 87
3839e657 88struct unix_private_data {
f3db3566 89 int magic;
3839e657
TT
90 int dev;
91 int flags;
7f1a1fbf 92 int align;
adfc8c6c 93 int access_time;
2e8ca9a2 94 ext2_loff_t offset;
adfc8c6c 95 struct unix_cache cache[CACHE_SIZE];
7f1a1fbf 96 void *bounce;
6d96b00d 97 struct struct_io_stats io_stats;
3839e657
TT
98};
99
7f1a1fbf
TT
100#define IS_ALIGNED(n, align) ((((unsigned long) n) & \
101 ((unsigned long) ((align)-1))) == 0)
102
6d96b00d
TT
103static errcode_t unix_get_stats(io_channel channel, io_stats *stats)
104{
a4613d13 105 errcode_t retval = 0;
6d96b00d
TT
106
107 struct unix_private_data *data;
108
109 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
110 data = (struct unix_private_data *) channel->private_data;
111 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
112
113 if (stats)
114 *stats = &data->io_stats;
115
116 return retval;
117}
118
adfc8c6c
TT
119/*
120 * Here are the raw I/O functions
121 */
122static errcode_t raw_read_blk(io_channel channel,
123 struct unix_private_data *data,
59ecd32d 124 unsigned long long block,
d32c915a 125 int count, void *bufv)
adfc8c6c
TT
126{
127 errcode_t retval;
54434927 128 ssize_t size;
adfc8c6c
TT
129 ext2_loff_t location;
130 int actual = 0;
d32c915a 131 unsigned char *buf = bufv;
adfc8c6c
TT
132
133 size = (count < 0) ? -count : count * channel->block_size;
6d96b00d 134 data->io_stats.bytes_read += size;
2e8ca9a2 135 location = ((ext2_loff_t) block * channel->block_size) + data->offset;
baa35446 136
f00948ad 137#ifdef HAVE_PREAD64
baa35446
DW
138 /* Try an aligned pread */
139 if ((channel->align == 0) ||
140 (IS_ALIGNED(buf, channel->align) &&
141 IS_ALIGNED(size, channel->align))) {
f00948ad
TT
142 actual = pread64(data->dev, buf, size, location);
143 if (actual == size)
144 return 0;
145 }
146#elif HAVE_PREAD
147 /* Try an aligned pread */
148 if ((sizeof(off_t) >= sizeof(ext2_loff_t)) &&
149 ((channel->align == 0) ||
150 (IS_ALIGNED(buf, channel->align) &&
151 IS_ALIGNED(size, channel->align)))) {
baa35446
DW
152 actual = pread(data->dev, buf, size, location);
153 if (actual == size)
154 return 0;
155 }
156#endif /* HAVE_PREAD */
157
adfc8c6c
TT
158 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
159 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
160 goto error_out;
161 }
0a05b903
TT
162 if ((channel->align == 0) ||
163 (IS_ALIGNED(buf, channel->align) &&
164 IS_ALIGNED(size, channel->align))) {
7f1a1fbf
TT
165 actual = read(data->dev, buf, size);
166 if (actual != size) {
167 short_read:
168 if (actual < 0)
169 actual = 0;
170 retval = EXT2_ET_SHORT_READ;
171 goto error_out;
172 }
173 return 0;
adfc8c6c 174 }
fff45483 175
7f1a1fbf
TT
176#ifdef ALIGN_DEBUG
177 printf("raw_read_blk: O_DIRECT fallback: %p %lu\n", buf,
178 (unsigned long) size);
fff45483 179#endif
7f1a1fbf
TT
180
181 /*
182 * The buffer or size which we're trying to read isn't aligned
183 * to the O_DIRECT rules, so we need to do this the hard way...
184 */
185 while (size > 0) {
186 actual = read(data->dev, data->bounce, channel->block_size);
187 if (actual != channel->block_size)
fff45483 188 goto short_read;
7f1a1fbf
TT
189 actual = size;
190 if (size > channel->block_size)
191 actual = channel->block_size;
192 memcpy(buf, data->bounce, actual);
193 size -= actual;
194 buf += actual;
fff45483
TT
195 }
196 return 0;
197
fff45483 198error_out:
7f1a1fbf 199 memset((char *) buf+actual, 0, size-actual);
fff45483
TT
200 if (channel->read_error)
201 retval = (channel->read_error)(channel, block, count, buf,
202 size, actual, retval);
203 return retval;
204}
adfc8c6c
TT
205
206static errcode_t raw_write_blk(io_channel channel,
207 struct unix_private_data *data,
59ecd32d 208 unsigned long long block,
d32c915a 209 int count, const void *bufv)
adfc8c6c 210{
54434927 211 ssize_t size;
adfc8c6c
TT
212 ext2_loff_t location;
213 int actual = 0;
214 errcode_t retval;
d32c915a 215 const unsigned char *buf = bufv;
adfc8c6c
TT
216
217 if (count == 1)
218 size = channel->block_size;
219 else {
220 if (count < 0)
221 size = -count;
222 else
223 size = count * channel->block_size;
224 }
6d96b00d 225 data->io_stats.bytes_written += size;
adfc8c6c 226
2e8ca9a2 227 location = ((ext2_loff_t) block * channel->block_size) + data->offset;
baa35446 228
f00948ad 229#ifdef HAVE_PWRITE64
baa35446
DW
230 /* Try an aligned pwrite */
231 if ((channel->align == 0) ||
232 (IS_ALIGNED(buf, channel->align) &&
233 IS_ALIGNED(size, channel->align))) {
f00948ad
TT
234 actual = pwrite64(data->dev, buf, size, location);
235 if (actual == size)
236 return 0;
237 }
238#elif HAVE_PWRITE
239 /* Try an aligned pwrite */
240 if ((sizeof(off_t) >= sizeof(ext2_loff_t)) &&
241 ((channel->align == 0) ||
242 (IS_ALIGNED(buf, channel->align) &&
243 IS_ALIGNED(size, channel->align)))) {
baa35446
DW
244 actual = pwrite(data->dev, buf, size, location);
245 if (actual == size)
246 return 0;
247 }
248#endif /* HAVE_PWRITE */
249
adfc8c6c
TT
250 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
251 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
252 goto error_out;
253 }
efc6f628 254
0a05b903
TT
255 if ((channel->align == 0) ||
256 (IS_ALIGNED(buf, channel->align) &&
257 IS_ALIGNED(size, channel->align))) {
7f1a1fbf
TT
258 actual = write(data->dev, buf, size);
259 if (actual != size) {
260 short_write:
261 retval = EXT2_ET_SHORT_WRITE;
262 goto error_out;
263 }
264 return 0;
265 }
266
267#ifdef ALIGN_DEBUG
268 printf("raw_write_blk: O_DIRECT fallback: %p %lu\n", buf,
269 (unsigned long) size);
270#endif
271 /*
272 * The buffer or size which we're trying to write isn't aligned
273 * to the O_DIRECT rules, so we need to do this the hard way...
274 */
275 while (size > 0) {
276 if (size < channel->block_size) {
277 actual = read(data->dev, data->bounce,
278 channel->block_size);
279 if (actual != channel->block_size) {
280 retval = EXT2_ET_SHORT_READ;
281 goto error_out;
282 }
283 }
284 actual = size;
285 if (size > channel->block_size)
286 actual = channel->block_size;
287 memcpy(data->bounce, buf, actual);
288 actual = write(data->dev, data->bounce, channel->block_size);
289 if (actual != channel->block_size)
290 goto short_write;
291 size -= actual;
292 buf += actual;
adfc8c6c
TT
293 }
294 return 0;
efc6f628 295
adfc8c6c
TT
296error_out:
297 if (channel->write_error)
298 retval = (channel->write_error)(channel, block, count, buf,
299 size, actual, retval);
300 return retval;
301}
302
303
304/*
305 * Here we implement the cache functions
306 */
307
308/* Allocate the cache buffers */
309static errcode_t alloc_cache(io_channel channel,
310 struct unix_private_data *data)
311{
312 errcode_t retval;
313 struct unix_cache *cache;
314 int i;
efc6f628 315
adfc8c6c
TT
316 data->access_time = 0;
317 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
318 cache->block = 0;
319 cache->access_time = 0;
320 cache->dirty = 0;
321 cache->in_use = 0;
faafdb76
TT
322 if (cache->buf)
323 ext2fs_free_mem(&cache->buf);
fd1c5a06 324 retval = io_channel_alloc_buf(channel, 0, &cache->buf);
7f1a1fbf 325 if (retval)
adfc8c6c
TT
326 return retval;
327 }
0a05b903 328 if (channel->align) {
7f1a1fbf
TT
329 if (data->bounce)
330 ext2fs_free_mem(&data->bounce);
fd1c5a06 331 retval = io_channel_alloc_buf(channel, 0, &data->bounce);
7f1a1fbf
TT
332 }
333 return retval;
adfc8c6c
TT
334}
335
336/* Free the cache buffers */
54434927 337static void free_cache(struct unix_private_data *data)
adfc8c6c
TT
338{
339 struct unix_cache *cache;
340 int i;
efc6f628 341
adfc8c6c
TT
342 data->access_time = 0;
343 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
344 cache->block = 0;
345 cache->access_time = 0;
346 cache->dirty = 0;
347 cache->in_use = 0;
348 if (cache->buf)
c4e3d3f3 349 ext2fs_free_mem(&cache->buf);
adfc8c6c 350 }
7f1a1fbf
TT
351 if (data->bounce)
352 ext2fs_free_mem(&data->bounce);
adfc8c6c
TT
353}
354
b8a95315 355#ifndef NO_IO_CACHE
adfc8c6c 356/*
82c4660c
TT
357 * Try to find a block in the cache. If the block is not found, and
358 * eldest is a non-zero pointer, then fill in eldest with the cache
359 * entry to that should be reused.
adfc8c6c 360 */
54434927 361static struct unix_cache *find_cached_block(struct unix_private_data *data,
59ecd32d 362 unsigned long long block,
82c4660c 363 struct unix_cache **eldest)
adfc8c6c 364{
31dbecd4 365 struct unix_cache *cache, *unused_cache, *oldest_cache;
adfc8c6c 366 int i;
efc6f628 367
31dbecd4 368 unused_cache = oldest_cache = 0;
adfc8c6c
TT
369 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
370 if (!cache->in_use) {
82c4660c
TT
371 if (!unused_cache)
372 unused_cache = cache;
adfc8c6c
TT
373 continue;
374 }
375 if (cache->block == block) {
376 cache->access_time = ++data->access_time;
377 return cache;
378 }
379 if (!oldest_cache ||
380 (cache->access_time < oldest_cache->access_time))
381 oldest_cache = cache;
382 }
82c4660c
TT
383 if (eldest)
384 *eldest = (unused_cache) ? unused_cache : oldest_cache;
385 return 0;
386}
387
388/*
389 * Reuse a particular cache entry for another block.
390 */
23b7c8b8 391static void reuse_cache(io_channel channel, struct unix_private_data *data,
59ecd32d 392 struct unix_cache *cache, unsigned long long block)
82c4660c
TT
393{
394 if (cache->dirty && cache->in_use)
395 raw_write_blk(channel, data, cache->block, 1, cache->buf);
396
adfc8c6c 397 cache->in_use = 1;
1d47dfb9 398 cache->dirty = 0;
adfc8c6c
TT
399 cache->block = block;
400 cache->access_time = ++data->access_time;
adfc8c6c
TT
401}
402
403/*
404 * Flush all of the blocks in the cache
405 */
406static errcode_t flush_cached_blocks(io_channel channel,
407 struct unix_private_data *data,
408 int invalidate)
409
410{
411 struct unix_cache *cache;
412 errcode_t retval, retval2;
413 int i;
efc6f628 414
adfc8c6c
TT
415 retval2 = 0;
416 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
417 if (!cache->in_use)
418 continue;
efc6f628 419
adfc8c6c
TT
420 if (invalidate)
421 cache->in_use = 0;
efc6f628 422
adfc8c6c
TT
423 if (!cache->dirty)
424 continue;
efc6f628 425
adfc8c6c
TT
426 retval = raw_write_blk(channel, data,
427 cache->block, 1, cache->buf);
428 if (retval)
429 retval2 = retval;
430 else
431 cache->dirty = 0;
432 }
433 return retval2;
434}
b8a95315 435#endif /* NO_IO_CACHE */
adfc8c6c 436
d866599a
LC
437#ifdef __linux__
438#ifndef BLKDISCARDZEROES
439#define BLKDISCARDZEROES _IO(0x12,124)
440#endif
441#endif
442
182acd17
AD
443int ext2fs_open_file(const char *pathname, int flags, mode_t mode)
444{
445 if (mode)
446#if defined(HAVE_OPEN64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
447 return open64(pathname, flags, mode);
448 else
449 return open64(pathname, flags);
450#else
451 return open(pathname, flags, mode);
452 else
453 return open(pathname, flags);
454#endif
455}
456
457int ext2fs_stat(const char *path, ext2fs_struct_stat *buf)
458{
459#if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
460 return stat64(path, buf);
461#else
462 return stat(path, buf);
463#endif
464}
465
466int ext2fs_fstat(int fd, ext2fs_struct_stat *buf)
467{
468#if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
469 return fstat64(fd, buf);
470#else
471 return fstat(fd, buf);
472#endif
473}
474
3839e657
TT
475static errcode_t unix_open(const char *name, int flags, io_channel *channel)
476{
477 io_channel io = NULL;
478 struct unix_private_data *data = NULL;
479 errcode_t retval;
1d6fd6d0 480 int open_flags;
d9a5d375 481 int f_nocache = 0;
c859cb1d 482 ext2fs_struct_stat st;
f154d2f6 483#ifdef __linux__
a4613d13 484 struct utsname ut;
f154d2f6 485#endif
3839e657 486
50e1e10f
TT
487 if (name == 0)
488 return EXT2_ET_BAD_DEVICE_NAME;
c4e3d3f3 489 retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
7b4e4534 490 if (retval)
624e8ebe 491 goto cleanup;
f3db3566
TT
492 memset(io, 0, sizeof(struct struct_io_channel));
493 io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
c4e3d3f3 494 retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data);
7b4e4534 495 if (retval)
3839e657 496 goto cleanup;
7b4e4534 497
3839e657 498 io->manager = unix_io_manager;
c4e3d3f3 499 retval = ext2fs_get_mem(strlen(name)+1, &io->name);
7b4e4534 500 if (retval)
3839e657 501 goto cleanup;
7b4e4534 502
3839e657
TT
503 strcpy(io->name, name);
504 io->private_data = data;
f3db3566
TT
505 io->block_size = 1024;
506 io->read_error = 0;
507 io->write_error = 0;
a29f4d30 508 io->refcount = 1;
3839e657
TT
509
510 memset(data, 0, sizeof(struct unix_private_data));
f3db3566 511 data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
6d96b00d 512 data->io_stats.num_fields = 2;
4e0bb5eb 513 data->dev = -1;
7b4e4534 514
dc5f68ca 515 open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
fa6c653e
TT
516 if (flags & IO_FLAG_EXCLUSIVE)
517 open_flags |= O_EXCL;
d9a5d375 518#if defined(O_DIRECT)
dd0a2679 519 if (flags & IO_FLAG_DIRECT_IO) {
7f1a1fbf 520 open_flags |= O_DIRECT;
dd0a2679
TT
521 io->align = ext2fs_get_dio_alignment(data->dev);
522 }
d9a5d375 523#elif defined(F_NOCACHE)
dd0a2679 524 if (flags & IO_FLAG_DIRECT_IO) {
d9a5d375 525 f_nocache = F_NOCACHE;
dd0a2679
TT
526 io->align = 4096;
527 }
534a4c3d 528#endif
7f1a1fbf
TT
529 data->flags = flags;
530
c4012e5a 531 data->dev = ext2fs_open_file(io->name, open_flags, 0);
3839e657
TT
532 if (data->dev < 0) {
533 retval = errno;
534 goto cleanup;
535 }
d9a5d375
TT
536 if (f_nocache) {
537 if (fcntl(data->dev, f_nocache, 1) < 0) {
538 retval = errno;
539 goto cleanup;
540 }
541 }
64e1b274 542
d2bfdc7f
LC
543 /*
544 * If the device is really a block device, then set the
545 * appropriate flag, otherwise we can set DISCARD_ZEROES flag
546 * because we are going to use punch hole instead of discard
547 * and if it succeed, subsequent read from sparse area returns
548 * zero.
549 */
550 if (ext2fs_stat(io->name, &st) == 0) {
551 if (S_ISBLK(st.st_mode))
552 io->flags |= CHANNEL_FLAGS_BLOCK_DEVICE;
553 else
554 io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES;
555 }
556
d866599a 557#ifdef BLKDISCARDZEROES
1d6fd6d0
AD
558 {
559 int zeroes = 0;
560 if (ioctl(data->dev, BLKDISCARDZEROES, &zeroes) == 0 &&
561 zeroes)
562 io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES;
563 }
d866599a
LC
564#endif
565
7f1a1fbf
TT
566#if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
567 /*
568 * Some operating systems require that the buffers be aligned,
569 * regardless of O_DIRECT
570 */
dd0a2679
TT
571 if (!io->align)
572 io->align = 512;
7f1a1fbf
TT
573#endif
574
575
576 if ((retval = alloc_cache(io, data)))
577 goto cleanup;
578
7ed7a4b6
ES
579#ifdef BLKROGET
580 if (flags & IO_FLAG_RW) {
581 int error;
582 int readonly = 0;
583
584 /* Is the block device actually writable? */
585 error = ioctl(data->dev, BLKROGET, &readonly);
586 if (!error && readonly) {
7ed7a4b6
ES
587 retval = EPERM;
588 goto cleanup;
589 }
590 }
591#endif
592
64e1b274
TT
593#ifdef __linux__
594#undef RLIM_INFINITY
595#if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4)))
596#define RLIM_INFINITY ((unsigned long)(~0UL>>1))
597#else
598#define RLIM_INFINITY (~0UL)
599#endif
8880e759 600 /*
f154d2f6
TT
601 * Work around a bug in 2.4.10-2.4.18 kernels where writes to
602 * block devices are wrongly getting hit by the filesize
603 * limit. This workaround isn't perfect, since it won't work
604 * if glibc wasn't built against 2.2 header files. (Sigh.)
efc6f628 605 *
8880e759 606 */
f154d2f6
TT
607 if ((flags & IO_FLAG_RW) &&
608 (uname(&ut) == 0) &&
609 ((ut.release[0] == '2') && (ut.release[1] == '.') &&
610 (ut.release[2] == '4') && (ut.release[3] == '.') &&
611 (ut.release[4] == '1') && (ut.release[5] >= '0') &&
612 (ut.release[5] < '8')) &&
c859cb1d 613 (ext2fs_stat(io->name, &st) == 0) &&
8880e759
TT
614 (S_ISBLK(st.st_mode))) {
615 struct rlimit rlim;
efc6f628 616
64e1b274 617 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY;
8880e759
TT
618 setrlimit(RLIMIT_FSIZE, &rlim);
619 getrlimit(RLIMIT_FSIZE, &rlim);
bd27880b
TT
620 if (((unsigned long) rlim.rlim_cur) <
621 ((unsigned long) rlim.rlim_max)) {
8880e759
TT
622 rlim.rlim_cur = rlim.rlim_max;
623 setrlimit(RLIMIT_FSIZE, &rlim);
624 }
625 }
64e1b274 626#endif
3839e657
TT
627 *channel = io;
628 return 0;
629
630cleanup:
3839e657 631 if (data) {
4e0bb5eb
TT
632 if (data->dev >= 0)
633 close(data->dev);
54434927 634 free_cache(data);
c4e3d3f3 635 ext2fs_free_mem(&data);
3839e657 636 }
4e0bb5eb
TT
637 if (io) {
638 if (io->name) {
639 ext2fs_free_mem(&io->name);
640 }
c4e3d3f3 641 ext2fs_free_mem(&io);
4e0bb5eb 642 }
3839e657
TT
643 return retval;
644}
645
646static errcode_t unix_close(io_channel channel)
647{
648 struct unix_private_data *data;
649 errcode_t retval = 0;
650
f3db3566 651 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
3839e657 652 data = (struct unix_private_data *) channel->private_data;
f3db3566 653 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
a29f4d30
TT
654
655 if (--channel->refcount > 0)
656 return 0;
adfc8c6c 657
b8a95315 658#ifndef NO_IO_CACHE
adfc8c6c 659 retval = flush_cached_blocks(channel, data, 0);
b8a95315 660#endif
adfc8c6c 661
3839e657
TT
662 if (close(data->dev) < 0)
663 retval = errno;
54434927 664 free_cache(data);
f12e285f 665
c4e3d3f3 666 ext2fs_free_mem(&channel->private_data);
3839e657 667 if (channel->name)
c4e3d3f3
TT
668 ext2fs_free_mem(&channel->name);
669 ext2fs_free_mem(&channel);
3839e657
TT
670 return retval;
671}
672
673static errcode_t unix_set_blksize(io_channel channel, int blksize)
674{
675 struct unix_private_data *data;
7b4e4534 676 errcode_t retval;
3839e657 677
f3db3566 678 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
3839e657 679 data = (struct unix_private_data *) channel->private_data;
f3db3566
TT
680 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
681
3839e657 682 if (channel->block_size != blksize) {
b8a95315 683#ifndef NO_IO_CACHE
adfc8c6c
TT
684 if ((retval = flush_cached_blocks(channel, data, 0)))
685 return retval;
b8a95315 686#endif
efc6f628 687
3839e657 688 channel->block_size = blksize;
54434927 689 free_cache(data);
adfc8c6c 690 if ((retval = alloc_cache(channel, data)))
7b4e4534 691 return retval;
3839e657
TT
692 }
693 return 0;
694}
695
696
59ecd32d 697static errcode_t unix_read_blk64(io_channel channel, unsigned long long block,
3839e657
TT
698 int count, void *buf)
699{
700 struct unix_private_data *data;
82c4660c 701 struct unix_cache *cache, *reuse[READ_DIRECT_SIZE];
3839e657 702 errcode_t retval;
31dbecd4 703 char *cp;
adfc8c6c 704 int i, j;
3839e657 705
f3db3566 706 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
3839e657 707 data = (struct unix_private_data *) channel->private_data;
f3db3566 708 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
3839e657 709
b8a95315
TT
710#ifdef NO_IO_CACHE
711 return raw_read_blk(channel, data, block, count, buf);
712#else
3839e657 713 /*
82c4660c
TT
714 * If we're doing an odd-sized read or a very large read,
715 * flush out the cache and then do a direct read.
3839e657 716 */
82c4660c 717 if (count < 0 || count > WRITE_DIRECT_SIZE) {
adfc8c6c
TT
718 if ((retval = flush_cached_blocks(channel, data, 0)))
719 return retval;
720 return raw_read_blk(channel, data, block, count, buf);
3839e657 721 }
adfc8c6c 722
31dbecd4 723 cp = buf;
adfc8c6c
TT
724 while (count > 0) {
725 /* If it's in the cache, use it! */
54434927 726 if ((cache = find_cached_block(data, block, &reuse[0]))) {
adfc8c6c 727#ifdef DEBUG
d0ff90d5 728 printf("Using cached block %lu\n", block);
f3db3566 729#endif
31dbecd4 730 memcpy(cp, cache->buf, channel->block_size);
adfc8c6c
TT
731 count--;
732 block++;
31dbecd4 733 cp += channel->block_size;
adfc8c6c
TT
734 continue;
735 }
7f1a1fbf
TT
736 if (count == 1) {
737 /*
738 * Special case where we read directly into the
739 * cache buffer; important in the O_DIRECT case
740 */
741 cache = reuse[0];
742 reuse_cache(channel, data, cache, block);
743 if ((retval = raw_read_blk(channel, data, block, 1,
744 cache->buf))) {
745 cache->in_use = 0;
746 return retval;
747 }
748 memcpy(cp, cache->buf, channel->block_size);
749 return 0;
750 }
751
adfc8c6c
TT
752 /*
753 * Find the number of uncached blocks so we can do a
754 * single read request
755 */
756 for (i=1; i < count; i++)
54434927 757 if (find_cached_block(data, block+i, &reuse[i]))
adfc8c6c
TT
758 break;
759#ifdef DEBUG
d0ff90d5 760 printf("Reading %d blocks starting at %lu\n", i, block);
adfc8c6c 761#endif
31dbecd4 762 if ((retval = raw_read_blk(channel, data, block, i, cp)))
adfc8c6c 763 return retval;
efc6f628 764
adfc8c6c
TT
765 /* Save the results in the cache */
766 for (j=0; j < i; j++) {
767 count--;
82c4660c
TT
768 cache = reuse[j];
769 reuse_cache(channel, data, cache, block++);
770 memcpy(cache->buf, cp, channel->block_size);
31dbecd4 771 cp += channel->block_size;
adfc8c6c 772 }
3839e657
TT
773 }
774 return 0;
b8a95315 775#endif /* NO_IO_CACHE */
3839e657
TT
776}
777
59ecd32d
JS
778static errcode_t unix_read_blk(io_channel channel, unsigned long block,
779 int count, void *buf)
780{
781 return unix_read_blk64(channel, block, count, buf);
782}
783
784static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
3839e657
TT
785 int count, const void *buf)
786{
787 struct unix_private_data *data;
82c4660c 788 struct unix_cache *cache, *reuse;
23b7c8b8 789 errcode_t retval = 0;
31dbecd4
TT
790 const char *cp;
791 int writethrough;
3839e657 792
f3db3566 793 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
3839e657 794 data = (struct unix_private_data *) channel->private_data;
f3db3566 795 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
3839e657 796
b8a95315
TT
797#ifdef NO_IO_CACHE
798 return raw_write_blk(channel, data, block, count, buf);
efc6f628 799#else
adfc8c6c
TT
800 /*
801 * If we're doing an odd-sized write or a very large write,
802 * flush out the cache completely and then do a direct write.
803 */
82c4660c 804 if (count < 0 || count > WRITE_DIRECT_SIZE) {
adfc8c6c
TT
805 if ((retval = flush_cached_blocks(channel, data, 1)))
806 return retval;
807 return raw_write_blk(channel, data, block, count, buf);
3839e657
TT
808 }
809
adfc8c6c
TT
810 /*
811 * For a moderate-sized multi-block write, first force a write
812 * if we're in write-through cache mode, and then fill the
813 * cache with the blocks.
814 */
815 writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
816 if (writethrough)
817 retval = raw_write_blk(channel, data, block, count, buf);
efc6f628 818
31dbecd4 819 cp = buf;
adfc8c6c 820 while (count > 0) {
54434927 821 cache = find_cached_block(data, block, &reuse);
adfc8c6c 822 if (!cache) {
82c4660c
TT
823 cache = reuse;
824 reuse_cache(channel, data, cache, block);
adfc8c6c 825 }
8d5324c4
DW
826 if (cache->buf != cp)
827 memcpy(cache->buf, cp, channel->block_size);
82c4660c 828 cache->dirty = !writethrough;
adfc8c6c
TT
829 count--;
830 block++;
31dbecd4 831 cp += channel->block_size;
adfc8c6c 832 }
3839e657 833 return retval;
b8a95315 834#endif /* NO_IO_CACHE */
3839e657
TT
835}
836
ca209dc6
AD
837static errcode_t unix_cache_readahead(io_channel channel,
838 unsigned long long block,
839 unsigned long long count)
840{
841#ifdef POSIX_FADV_WILLNEED
842 struct unix_private_data *data;
843
844 data = (struct unix_private_data *)channel->private_data;
845 return posix_fadvise(data->dev,
846 (ext2_loff_t)block * channel->block_size,
847 (ext2_loff_t)count * channel->block_size,
848 POSIX_FADV_WILLNEED);
849#else
850 return EXT2_ET_OP_NOT_SUPPORTED;
851#endif
852}
853
59ecd32d
JS
854static errcode_t unix_write_blk(io_channel channel, unsigned long block,
855 int count, const void *buf)
856{
857 return unix_write_blk64(channel, block, count, buf);
858}
859
c180ac86
TT
860static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
861 int size, const void *buf)
862{
863 struct unix_private_data *data;
31dbecd4 864 errcode_t retval = 0;
54434927 865 ssize_t actual;
c180ac86
TT
866
867 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
868 data = (struct unix_private_data *) channel->private_data;
869 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
870
0a05b903 871 if (channel->align != 0) {
7f1a1fbf
TT
872#ifdef ALIGN_DEBUG
873 printf("unix_write_byte: O_DIRECT fallback\n");
874#endif
875 return EXT2_ET_UNIMPLEMENTED;
876 }
877
b8a95315 878#ifndef NO_IO_CACHE
c180ac86
TT
879 /*
880 * Flush out the cache completely
881 */
882 if ((retval = flush_cached_blocks(channel, data, 1)))
883 return retval;
b8a95315 884#endif
c180ac86 885
2e8ca9a2 886 if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0)
c180ac86 887 return errno;
efc6f628 888
c180ac86
TT
889 actual = write(data->dev, buf, size);
890 if (actual != size)
891 return EXT2_ET_SHORT_WRITE;
892
893 return 0;
894}
895
3839e657 896/*
efc6f628 897 * Flush data buffers to disk.
3839e657
TT
898 */
899static errcode_t unix_flush(io_channel channel)
900{
f3db3566 901 struct unix_private_data *data;
adfc8c6c 902 errcode_t retval = 0;
efc6f628 903
f3db3566
TT
904 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
905 data = (struct unix_private_data *) channel->private_data;
906 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
adfc8c6c 907
b8a95315 908#ifndef NO_IO_CACHE
adfc8c6c 909 retval = flush_cached_blocks(channel, data, 0);
b8a95315 910#endif
36f21439 911 fsync(data->dev);
adfc8c6c 912 return retval;
3839e657
TT
913}
914
efc6f628 915static errcode_t unix_set_option(io_channel channel, const char *option,
2e8ca9a2
TT
916 const char *arg)
917{
918 struct unix_private_data *data;
2aee23f3 919 unsigned long long tmp;
2e8ca9a2
TT
920 char *end;
921
922 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
923 data = (struct unix_private_data *) channel->private_data;
924 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
925
926 if (!strcmp(option, "offset")) {
927 if (!arg)
928 return EXT2_ET_INVALID_ARGUMENT;
929
2aee23f3 930 tmp = strtoull(arg, &end, 0);
2e8ca9a2
TT
931 if (*end)
932 return EXT2_ET_INVALID_ARGUMENT;
933 data->offset = tmp;
2aee23f3
TT
934 if (data->offset < 0)
935 return EXT2_ET_INVALID_ARGUMENT;
2e8ca9a2
TT
936 return 0;
937 }
938 return EXT2_ET_INVALID_ARGUMENT;
939}
e90a59ed
LC
940
941#if defined(__linux__) && !defined(BLKDISCARD)
d2bfdc7f 942#define BLKDISCARD _IO(0x12,119)
e90a59ed
LC
943#endif
944
945static errcode_t unix_discard(io_channel channel, unsigned long long block,
946 unsigned long long count)
947{
e90a59ed 948 struct unix_private_data *data;
e90a59ed
LC
949 int ret;
950
951 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
952 data = (struct unix_private_data *) channel->private_data;
953 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
954
d2bfdc7f
LC
955 if (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE) {
956#ifdef BLKDISCARD
795c02de 957 __u64 range[2];
1d6fd6d0 958
795c02de
TT
959 range[0] = (__u64)(block) * channel->block_size;
960 range[1] = (__u64)(count) * channel->block_size;
e90a59ed 961
d2bfdc7f
LC
962 ret = ioctl(data->dev, BLKDISCARD, &range);
963#else
964 goto unimplemented;
965#endif
966 } else {
800766ee 967#if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE)
d2bfdc7f
LC
968 /*
969 * If we are not on block device, try to use punch hole
970 * to reclaim free space.
971 */
972 ret = fallocate(data->dev,
973 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
974 (off_t)(block) * channel->block_size,
975 (off_t)(count) * channel->block_size);
976#else
977 goto unimplemented;
978#endif
979 }
980 if (ret < 0) {
981 if (errno == EOPNOTSUPP)
982 goto unimplemented;
e90a59ed 983 return errno;
d2bfdc7f 984 }
e90a59ed 985 return 0;
d2bfdc7f 986unimplemented:
e90a59ed 987 return EXT2_ET_UNIMPLEMENTED;
e90a59ed 988}
a4613d13
AD
989
990static struct struct_io_manager struct_unix_manager = {
d4ecec45
TT
991 .magic = EXT2_ET_MAGIC_IO_MANAGER,
992 .name = "Unix I/O Manager",
993 .open = unix_open,
994 .close = unix_close,
995 .set_blksize = unix_set_blksize,
996 .read_blk = unix_read_blk,
997 .write_blk = unix_write_blk,
998 .flush = unix_flush,
999 .write_byte = unix_write_byte,
1000 .set_option = unix_set_option,
1001 .get_stats = unix_get_stats,
1002 .read_blk64 = unix_read_blk64,
1003 .write_blk64 = unix_write_blk64,
1004 .discard = unix_discard,
ca209dc6 1005 .cache_readahead = unix_cache_readahead,
a4613d13
AD
1006};
1007
1008io_manager unix_io_manager = &struct_unix_manager;