]>
Commit | Line | Data |
---|---|---|
3839e657 | 1 | /* |
fff45483 | 2 | * unix_io.c --- This is the Unix (well, really POSIX) implementation |
a4613d13 | 3 | * of the I/O manager. |
3839e657 TT |
4 | * |
5 | * Implements a one-block write-through cache. | |
6 | * | |
efc6f628 | 7 | * Includes support for Windows NT support under Cygwin. |
fff45483 | 8 | * |
64e1b274 | 9 | * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, |
a4613d13 | 10 | * 2002 by Theodore Ts'o. |
19c78dc0 TT |
11 | * |
12 | * %Begin-Header% | |
543547a5 TT |
13 | * This file may be redistributed under the terms of the GNU Library |
14 | * General Public License, version 2. | |
19c78dc0 | 15 | * %End-Header% |
3839e657 TT |
16 | */ |
17 | ||
ca209dc6 AD |
18 | #define _XOPEN_SOURCE 600 |
19 | #define _DARWIN_C_SOURCE | |
20 | #define _FILE_OFFSET_BITS 64 | |
dc5f68ca TT |
21 | #define _LARGEFILE_SOURCE |
22 | #define _LARGEFILE64_SOURCE | |
cf5301d7 | 23 | #ifndef _GNU_SOURCE |
7f1a1fbf | 24 | #define _GNU_SOURCE |
cf5301d7 | 25 | #endif |
dc5f68ca | 26 | |
d1154eb4 | 27 | #include "config.h" |
3839e657 TT |
28 | #include <stdio.h> |
29 | #include <string.h> | |
4cbe8af4 | 30 | #if HAVE_UNISTD_H |
3839e657 | 31 | #include <unistd.h> |
4cbe8af4 | 32 | #endif |
c4e749ab TT |
33 | #if HAVE_ERRNO_H |
34 | #include <errno.h> | |
35 | #endif | |
3839e657 TT |
36 | #include <fcntl.h> |
37 | #include <time.h> | |
f154d2f6 TT |
38 | #ifdef __linux__ |
39 | #include <sys/utsname.h> | |
40 | #endif | |
ca209dc6 AD |
41 | #if HAVE_SYS_TYPES_H |
42 | #include <sys/types.h> | |
43 | #endif | |
7ed7a4b6 ES |
44 | #ifdef HAVE_SYS_IOCTL_H |
45 | #include <sys/ioctl.h> | |
46 | #endif | |
47 | #ifdef HAVE_SYS_MOUNT_H | |
48 | #include <sys/mount.h> | |
49 | #endif | |
1d2ff46a | 50 | #if HAVE_SYS_STAT_H |
3839e657 | 51 | #include <sys/stat.h> |
1d2ff46a | 52 | #endif |
fff45483 | 53 | #if HAVE_SYS_RESOURCE_H |
8880e759 | 54 | #include <sys/resource.h> |
fff45483 | 55 | #endif |
d2bfdc7f LC |
56 | #if HAVE_LINUX_FALLOC_H |
57 | #include <linux/falloc.h> | |
58 | #endif | |
3839e657 | 59 | |
7f1a1fbf | 60 | #if defined(__linux__) && defined(_IO) && !defined(BLKROGET) |
7ed7a4b6 ES |
61 | #define BLKROGET _IO(0x12, 94) /* Get read-only status (0 = read_write). */ |
62 | #endif | |
63 | ||
7f1a1fbf TT |
64 | #undef ALIGN_DEBUG |
65 | ||
b5abe6fa | 66 | #include "ext2_fs.h" |
7b4e4534 | 67 | #include "ext2fs.h" |
3839e657 | 68 | |
f3db3566 TT |
69 | /* |
70 | * For checking structure magic numbers... | |
71 | */ | |
72 | ||
73 | #define EXT2_CHECK_MAGIC(struct, code) \ | |
74 | if ((struct)->magic != (code)) return (code) | |
adfc8c6c TT |
75 | |
76 | struct unix_cache { | |
40024fdb TT |
77 | char *buf; |
78 | unsigned long long block; | |
79 | int access_time; | |
80 | unsigned dirty:1; | |
81 | unsigned in_use:1; | |
adfc8c6c TT |
82 | }; |
83 | ||
84 | #define CACHE_SIZE 8 | |
82c4660c TT |
85 | #define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */ |
86 | #define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */ | |
adfc8c6c | 87 | |
3839e657 | 88 | struct unix_private_data { |
f3db3566 | 89 | int magic; |
3839e657 TT |
90 | int dev; |
91 | int flags; | |
7f1a1fbf | 92 | int align; |
adfc8c6c | 93 | int access_time; |
2e8ca9a2 | 94 | ext2_loff_t offset; |
adfc8c6c | 95 | struct unix_cache cache[CACHE_SIZE]; |
7f1a1fbf | 96 | void *bounce; |
6d96b00d | 97 | struct struct_io_stats io_stats; |
3839e657 TT |
98 | }; |
99 | ||
7f1a1fbf TT |
100 | #define IS_ALIGNED(n, align) ((((unsigned long) n) & \ |
101 | ((unsigned long) ((align)-1))) == 0) | |
102 | ||
6d96b00d TT |
103 | static errcode_t unix_get_stats(io_channel channel, io_stats *stats) |
104 | { | |
a4613d13 | 105 | errcode_t retval = 0; |
6d96b00d TT |
106 | |
107 | struct unix_private_data *data; | |
108 | ||
109 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); | |
110 | data = (struct unix_private_data *) channel->private_data; | |
111 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); | |
112 | ||
113 | if (stats) | |
114 | *stats = &data->io_stats; | |
115 | ||
116 | return retval; | |
117 | } | |
118 | ||
adfc8c6c TT |
119 | /* |
120 | * Here are the raw I/O functions | |
121 | */ | |
122 | static errcode_t raw_read_blk(io_channel channel, | |
123 | struct unix_private_data *data, | |
59ecd32d | 124 | unsigned long long block, |
d32c915a | 125 | int count, void *bufv) |
adfc8c6c TT |
126 | { |
127 | errcode_t retval; | |
54434927 | 128 | ssize_t size; |
adfc8c6c TT |
129 | ext2_loff_t location; |
130 | int actual = 0; | |
d32c915a | 131 | unsigned char *buf = bufv; |
adfc8c6c TT |
132 | |
133 | size = (count < 0) ? -count : count * channel->block_size; | |
6d96b00d | 134 | data->io_stats.bytes_read += size; |
2e8ca9a2 | 135 | location = ((ext2_loff_t) block * channel->block_size) + data->offset; |
baa35446 | 136 | |
f00948ad | 137 | #ifdef HAVE_PREAD64 |
baa35446 DW |
138 | /* Try an aligned pread */ |
139 | if ((channel->align == 0) || | |
140 | (IS_ALIGNED(buf, channel->align) && | |
141 | IS_ALIGNED(size, channel->align))) { | |
f00948ad TT |
142 | actual = pread64(data->dev, buf, size, location); |
143 | if (actual == size) | |
144 | return 0; | |
145 | } | |
146 | #elif HAVE_PREAD | |
147 | /* Try an aligned pread */ | |
148 | if ((sizeof(off_t) >= sizeof(ext2_loff_t)) && | |
149 | ((channel->align == 0) || | |
150 | (IS_ALIGNED(buf, channel->align) && | |
151 | IS_ALIGNED(size, channel->align)))) { | |
baa35446 DW |
152 | actual = pread(data->dev, buf, size, location); |
153 | if (actual == size) | |
154 | return 0; | |
155 | } | |
156 | #endif /* HAVE_PREAD */ | |
157 | ||
adfc8c6c TT |
158 | if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { |
159 | retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; | |
160 | goto error_out; | |
161 | } | |
0a05b903 TT |
162 | if ((channel->align == 0) || |
163 | (IS_ALIGNED(buf, channel->align) && | |
164 | IS_ALIGNED(size, channel->align))) { | |
7f1a1fbf TT |
165 | actual = read(data->dev, buf, size); |
166 | if (actual != size) { | |
167 | short_read: | |
168 | if (actual < 0) | |
169 | actual = 0; | |
170 | retval = EXT2_ET_SHORT_READ; | |
171 | goto error_out; | |
172 | } | |
173 | return 0; | |
adfc8c6c | 174 | } |
fff45483 | 175 | |
7f1a1fbf TT |
176 | #ifdef ALIGN_DEBUG |
177 | printf("raw_read_blk: O_DIRECT fallback: %p %lu\n", buf, | |
178 | (unsigned long) size); | |
fff45483 | 179 | #endif |
7f1a1fbf TT |
180 | |
181 | /* | |
182 | * The buffer or size which we're trying to read isn't aligned | |
183 | * to the O_DIRECT rules, so we need to do this the hard way... | |
184 | */ | |
185 | while (size > 0) { | |
186 | actual = read(data->dev, data->bounce, channel->block_size); | |
187 | if (actual != channel->block_size) | |
fff45483 | 188 | goto short_read; |
7f1a1fbf TT |
189 | actual = size; |
190 | if (size > channel->block_size) | |
191 | actual = channel->block_size; | |
192 | memcpy(buf, data->bounce, actual); | |
193 | size -= actual; | |
194 | buf += actual; | |
fff45483 TT |
195 | } |
196 | return 0; | |
197 | ||
fff45483 | 198 | error_out: |
7f1a1fbf | 199 | memset((char *) buf+actual, 0, size-actual); |
fff45483 TT |
200 | if (channel->read_error) |
201 | retval = (channel->read_error)(channel, block, count, buf, | |
202 | size, actual, retval); | |
203 | return retval; | |
204 | } | |
adfc8c6c TT |
205 | |
206 | static errcode_t raw_write_blk(io_channel channel, | |
207 | struct unix_private_data *data, | |
59ecd32d | 208 | unsigned long long block, |
d32c915a | 209 | int count, const void *bufv) |
adfc8c6c | 210 | { |
54434927 | 211 | ssize_t size; |
adfc8c6c TT |
212 | ext2_loff_t location; |
213 | int actual = 0; | |
214 | errcode_t retval; | |
d32c915a | 215 | const unsigned char *buf = bufv; |
adfc8c6c TT |
216 | |
217 | if (count == 1) | |
218 | size = channel->block_size; | |
219 | else { | |
220 | if (count < 0) | |
221 | size = -count; | |
222 | else | |
223 | size = count * channel->block_size; | |
224 | } | |
6d96b00d | 225 | data->io_stats.bytes_written += size; |
adfc8c6c | 226 | |
2e8ca9a2 | 227 | location = ((ext2_loff_t) block * channel->block_size) + data->offset; |
baa35446 | 228 | |
f00948ad | 229 | #ifdef HAVE_PWRITE64 |
baa35446 DW |
230 | /* Try an aligned pwrite */ |
231 | if ((channel->align == 0) || | |
232 | (IS_ALIGNED(buf, channel->align) && | |
233 | IS_ALIGNED(size, channel->align))) { | |
f00948ad TT |
234 | actual = pwrite64(data->dev, buf, size, location); |
235 | if (actual == size) | |
236 | return 0; | |
237 | } | |
238 | #elif HAVE_PWRITE | |
239 | /* Try an aligned pwrite */ | |
240 | if ((sizeof(off_t) >= sizeof(ext2_loff_t)) && | |
241 | ((channel->align == 0) || | |
242 | (IS_ALIGNED(buf, channel->align) && | |
243 | IS_ALIGNED(size, channel->align)))) { | |
baa35446 DW |
244 | actual = pwrite(data->dev, buf, size, location); |
245 | if (actual == size) | |
246 | return 0; | |
247 | } | |
248 | #endif /* HAVE_PWRITE */ | |
249 | ||
adfc8c6c TT |
250 | if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { |
251 | retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; | |
252 | goto error_out; | |
253 | } | |
efc6f628 | 254 | |
0a05b903 TT |
255 | if ((channel->align == 0) || |
256 | (IS_ALIGNED(buf, channel->align) && | |
257 | IS_ALIGNED(size, channel->align))) { | |
7f1a1fbf TT |
258 | actual = write(data->dev, buf, size); |
259 | if (actual != size) { | |
260 | short_write: | |
261 | retval = EXT2_ET_SHORT_WRITE; | |
262 | goto error_out; | |
263 | } | |
264 | return 0; | |
265 | } | |
266 | ||
267 | #ifdef ALIGN_DEBUG | |
268 | printf("raw_write_blk: O_DIRECT fallback: %p %lu\n", buf, | |
269 | (unsigned long) size); | |
270 | #endif | |
271 | /* | |
272 | * The buffer or size which we're trying to write isn't aligned | |
273 | * to the O_DIRECT rules, so we need to do this the hard way... | |
274 | */ | |
275 | while (size > 0) { | |
276 | if (size < channel->block_size) { | |
277 | actual = read(data->dev, data->bounce, | |
278 | channel->block_size); | |
279 | if (actual != channel->block_size) { | |
280 | retval = EXT2_ET_SHORT_READ; | |
281 | goto error_out; | |
282 | } | |
283 | } | |
284 | actual = size; | |
285 | if (size > channel->block_size) | |
286 | actual = channel->block_size; | |
287 | memcpy(data->bounce, buf, actual); | |
288 | actual = write(data->dev, data->bounce, channel->block_size); | |
289 | if (actual != channel->block_size) | |
290 | goto short_write; | |
291 | size -= actual; | |
292 | buf += actual; | |
adfc8c6c TT |
293 | } |
294 | return 0; | |
efc6f628 | 295 | |
adfc8c6c TT |
296 | error_out: |
297 | if (channel->write_error) | |
298 | retval = (channel->write_error)(channel, block, count, buf, | |
299 | size, actual, retval); | |
300 | return retval; | |
301 | } | |
302 | ||
303 | ||
304 | /* | |
305 | * Here we implement the cache functions | |
306 | */ | |
307 | ||
308 | /* Allocate the cache buffers */ | |
309 | static errcode_t alloc_cache(io_channel channel, | |
310 | struct unix_private_data *data) | |
311 | { | |
312 | errcode_t retval; | |
313 | struct unix_cache *cache; | |
314 | int i; | |
efc6f628 | 315 | |
adfc8c6c TT |
316 | data->access_time = 0; |
317 | for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { | |
318 | cache->block = 0; | |
319 | cache->access_time = 0; | |
320 | cache->dirty = 0; | |
321 | cache->in_use = 0; | |
faafdb76 TT |
322 | if (cache->buf) |
323 | ext2fs_free_mem(&cache->buf); | |
fd1c5a06 | 324 | retval = io_channel_alloc_buf(channel, 0, &cache->buf); |
7f1a1fbf | 325 | if (retval) |
adfc8c6c TT |
326 | return retval; |
327 | } | |
0a05b903 | 328 | if (channel->align) { |
7f1a1fbf TT |
329 | if (data->bounce) |
330 | ext2fs_free_mem(&data->bounce); | |
fd1c5a06 | 331 | retval = io_channel_alloc_buf(channel, 0, &data->bounce); |
7f1a1fbf TT |
332 | } |
333 | return retval; | |
adfc8c6c TT |
334 | } |
335 | ||
336 | /* Free the cache buffers */ | |
54434927 | 337 | static void free_cache(struct unix_private_data *data) |
adfc8c6c TT |
338 | { |
339 | struct unix_cache *cache; | |
340 | int i; | |
efc6f628 | 341 | |
adfc8c6c TT |
342 | data->access_time = 0; |
343 | for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { | |
344 | cache->block = 0; | |
345 | cache->access_time = 0; | |
346 | cache->dirty = 0; | |
347 | cache->in_use = 0; | |
348 | if (cache->buf) | |
c4e3d3f3 | 349 | ext2fs_free_mem(&cache->buf); |
adfc8c6c | 350 | } |
7f1a1fbf TT |
351 | if (data->bounce) |
352 | ext2fs_free_mem(&data->bounce); | |
adfc8c6c TT |
353 | } |
354 | ||
b8a95315 | 355 | #ifndef NO_IO_CACHE |
adfc8c6c | 356 | /* |
82c4660c TT |
357 | * Try to find a block in the cache. If the block is not found, and |
358 | * eldest is a non-zero pointer, then fill in eldest with the cache | |
359 | * entry to that should be reused. | |
adfc8c6c | 360 | */ |
54434927 | 361 | static struct unix_cache *find_cached_block(struct unix_private_data *data, |
59ecd32d | 362 | unsigned long long block, |
82c4660c | 363 | struct unix_cache **eldest) |
adfc8c6c | 364 | { |
31dbecd4 | 365 | struct unix_cache *cache, *unused_cache, *oldest_cache; |
adfc8c6c | 366 | int i; |
efc6f628 | 367 | |
31dbecd4 | 368 | unused_cache = oldest_cache = 0; |
adfc8c6c TT |
369 | for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { |
370 | if (!cache->in_use) { | |
82c4660c TT |
371 | if (!unused_cache) |
372 | unused_cache = cache; | |
adfc8c6c TT |
373 | continue; |
374 | } | |
375 | if (cache->block == block) { | |
376 | cache->access_time = ++data->access_time; | |
377 | return cache; | |
378 | } | |
379 | if (!oldest_cache || | |
380 | (cache->access_time < oldest_cache->access_time)) | |
381 | oldest_cache = cache; | |
382 | } | |
82c4660c TT |
383 | if (eldest) |
384 | *eldest = (unused_cache) ? unused_cache : oldest_cache; | |
385 | return 0; | |
386 | } | |
387 | ||
388 | /* | |
389 | * Reuse a particular cache entry for another block. | |
390 | */ | |
23b7c8b8 | 391 | static void reuse_cache(io_channel channel, struct unix_private_data *data, |
59ecd32d | 392 | struct unix_cache *cache, unsigned long long block) |
82c4660c TT |
393 | { |
394 | if (cache->dirty && cache->in_use) | |
395 | raw_write_blk(channel, data, cache->block, 1, cache->buf); | |
396 | ||
adfc8c6c | 397 | cache->in_use = 1; |
1d47dfb9 | 398 | cache->dirty = 0; |
adfc8c6c TT |
399 | cache->block = block; |
400 | cache->access_time = ++data->access_time; | |
adfc8c6c TT |
401 | } |
402 | ||
403 | /* | |
404 | * Flush all of the blocks in the cache | |
405 | */ | |
406 | static errcode_t flush_cached_blocks(io_channel channel, | |
407 | struct unix_private_data *data, | |
408 | int invalidate) | |
409 | ||
410 | { | |
411 | struct unix_cache *cache; | |
412 | errcode_t retval, retval2; | |
413 | int i; | |
efc6f628 | 414 | |
adfc8c6c TT |
415 | retval2 = 0; |
416 | for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { | |
417 | if (!cache->in_use) | |
418 | continue; | |
efc6f628 | 419 | |
adfc8c6c TT |
420 | if (invalidate) |
421 | cache->in_use = 0; | |
efc6f628 | 422 | |
adfc8c6c TT |
423 | if (!cache->dirty) |
424 | continue; | |
efc6f628 | 425 | |
adfc8c6c TT |
426 | retval = raw_write_blk(channel, data, |
427 | cache->block, 1, cache->buf); | |
428 | if (retval) | |
429 | retval2 = retval; | |
430 | else | |
431 | cache->dirty = 0; | |
432 | } | |
433 | return retval2; | |
434 | } | |
b8a95315 | 435 | #endif /* NO_IO_CACHE */ |
adfc8c6c | 436 | |
d866599a LC |
437 | #ifdef __linux__ |
438 | #ifndef BLKDISCARDZEROES | |
439 | #define BLKDISCARDZEROES _IO(0x12,124) | |
440 | #endif | |
441 | #endif | |
442 | ||
182acd17 AD |
443 | int ext2fs_open_file(const char *pathname, int flags, mode_t mode) |
444 | { | |
445 | if (mode) | |
446 | #if defined(HAVE_OPEN64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED) | |
447 | return open64(pathname, flags, mode); | |
448 | else | |
449 | return open64(pathname, flags); | |
450 | #else | |
451 | return open(pathname, flags, mode); | |
452 | else | |
453 | return open(pathname, flags); | |
454 | #endif | |
455 | } | |
456 | ||
457 | int ext2fs_stat(const char *path, ext2fs_struct_stat *buf) | |
458 | { | |
459 | #if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED) | |
460 | return stat64(path, buf); | |
461 | #else | |
462 | return stat(path, buf); | |
463 | #endif | |
464 | } | |
465 | ||
466 | int ext2fs_fstat(int fd, ext2fs_struct_stat *buf) | |
467 | { | |
468 | #if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED) | |
469 | return fstat64(fd, buf); | |
470 | #else | |
471 | return fstat(fd, buf); | |
472 | #endif | |
473 | } | |
474 | ||
3839e657 TT |
475 | static errcode_t unix_open(const char *name, int flags, io_channel *channel) |
476 | { | |
477 | io_channel io = NULL; | |
478 | struct unix_private_data *data = NULL; | |
479 | errcode_t retval; | |
1d6fd6d0 | 480 | int open_flags; |
d9a5d375 | 481 | int f_nocache = 0; |
c859cb1d | 482 | ext2fs_struct_stat st; |
f154d2f6 | 483 | #ifdef __linux__ |
a4613d13 | 484 | struct utsname ut; |
f154d2f6 | 485 | #endif |
3839e657 | 486 | |
50e1e10f TT |
487 | if (name == 0) |
488 | return EXT2_ET_BAD_DEVICE_NAME; | |
c4e3d3f3 | 489 | retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io); |
7b4e4534 | 490 | if (retval) |
624e8ebe | 491 | goto cleanup; |
f3db3566 TT |
492 | memset(io, 0, sizeof(struct struct_io_channel)); |
493 | io->magic = EXT2_ET_MAGIC_IO_CHANNEL; | |
c4e3d3f3 | 494 | retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data); |
7b4e4534 | 495 | if (retval) |
3839e657 | 496 | goto cleanup; |
7b4e4534 | 497 | |
3839e657 | 498 | io->manager = unix_io_manager; |
c4e3d3f3 | 499 | retval = ext2fs_get_mem(strlen(name)+1, &io->name); |
7b4e4534 | 500 | if (retval) |
3839e657 | 501 | goto cleanup; |
7b4e4534 | 502 | |
3839e657 TT |
503 | strcpy(io->name, name); |
504 | io->private_data = data; | |
f3db3566 TT |
505 | io->block_size = 1024; |
506 | io->read_error = 0; | |
507 | io->write_error = 0; | |
a29f4d30 | 508 | io->refcount = 1; |
3839e657 TT |
509 | |
510 | memset(data, 0, sizeof(struct unix_private_data)); | |
f3db3566 | 511 | data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL; |
6d96b00d | 512 | data->io_stats.num_fields = 2; |
4e0bb5eb | 513 | data->dev = -1; |
7b4e4534 | 514 | |
dc5f68ca | 515 | open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY; |
fa6c653e TT |
516 | if (flags & IO_FLAG_EXCLUSIVE) |
517 | open_flags |= O_EXCL; | |
d9a5d375 | 518 | #if defined(O_DIRECT) |
dd0a2679 | 519 | if (flags & IO_FLAG_DIRECT_IO) { |
7f1a1fbf | 520 | open_flags |= O_DIRECT; |
dd0a2679 TT |
521 | io->align = ext2fs_get_dio_alignment(data->dev); |
522 | } | |
d9a5d375 | 523 | #elif defined(F_NOCACHE) |
dd0a2679 | 524 | if (flags & IO_FLAG_DIRECT_IO) { |
d9a5d375 | 525 | f_nocache = F_NOCACHE; |
dd0a2679 TT |
526 | io->align = 4096; |
527 | } | |
534a4c3d | 528 | #endif |
7f1a1fbf TT |
529 | data->flags = flags; |
530 | ||
c4012e5a | 531 | data->dev = ext2fs_open_file(io->name, open_flags, 0); |
3839e657 TT |
532 | if (data->dev < 0) { |
533 | retval = errno; | |
534 | goto cleanup; | |
535 | } | |
d9a5d375 TT |
536 | if (f_nocache) { |
537 | if (fcntl(data->dev, f_nocache, 1) < 0) { | |
538 | retval = errno; | |
539 | goto cleanup; | |
540 | } | |
541 | } | |
64e1b274 | 542 | |
d2bfdc7f LC |
543 | /* |
544 | * If the device is really a block device, then set the | |
545 | * appropriate flag, otherwise we can set DISCARD_ZEROES flag | |
546 | * because we are going to use punch hole instead of discard | |
547 | * and if it succeed, subsequent read from sparse area returns | |
548 | * zero. | |
549 | */ | |
550 | if (ext2fs_stat(io->name, &st) == 0) { | |
551 | if (S_ISBLK(st.st_mode)) | |
552 | io->flags |= CHANNEL_FLAGS_BLOCK_DEVICE; | |
553 | else | |
554 | io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES; | |
555 | } | |
556 | ||
d866599a | 557 | #ifdef BLKDISCARDZEROES |
1d6fd6d0 AD |
558 | { |
559 | int zeroes = 0; | |
560 | if (ioctl(data->dev, BLKDISCARDZEROES, &zeroes) == 0 && | |
561 | zeroes) | |
562 | io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES; | |
563 | } | |
d866599a LC |
564 | #endif |
565 | ||
7f1a1fbf TT |
566 | #if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) |
567 | /* | |
568 | * Some operating systems require that the buffers be aligned, | |
569 | * regardless of O_DIRECT | |
570 | */ | |
dd0a2679 TT |
571 | if (!io->align) |
572 | io->align = 512; | |
7f1a1fbf TT |
573 | #endif |
574 | ||
575 | ||
576 | if ((retval = alloc_cache(io, data))) | |
577 | goto cleanup; | |
578 | ||
7ed7a4b6 ES |
579 | #ifdef BLKROGET |
580 | if (flags & IO_FLAG_RW) { | |
581 | int error; | |
582 | int readonly = 0; | |
583 | ||
584 | /* Is the block device actually writable? */ | |
585 | error = ioctl(data->dev, BLKROGET, &readonly); | |
586 | if (!error && readonly) { | |
7ed7a4b6 ES |
587 | retval = EPERM; |
588 | goto cleanup; | |
589 | } | |
590 | } | |
591 | #endif | |
592 | ||
64e1b274 TT |
593 | #ifdef __linux__ |
594 | #undef RLIM_INFINITY | |
595 | #if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4))) | |
596 | #define RLIM_INFINITY ((unsigned long)(~0UL>>1)) | |
597 | #else | |
598 | #define RLIM_INFINITY (~0UL) | |
599 | #endif | |
8880e759 | 600 | /* |
f154d2f6 TT |
601 | * Work around a bug in 2.4.10-2.4.18 kernels where writes to |
602 | * block devices are wrongly getting hit by the filesize | |
603 | * limit. This workaround isn't perfect, since it won't work | |
604 | * if glibc wasn't built against 2.2 header files. (Sigh.) | |
efc6f628 | 605 | * |
8880e759 | 606 | */ |
f154d2f6 TT |
607 | if ((flags & IO_FLAG_RW) && |
608 | (uname(&ut) == 0) && | |
609 | ((ut.release[0] == '2') && (ut.release[1] == '.') && | |
610 | (ut.release[2] == '4') && (ut.release[3] == '.') && | |
611 | (ut.release[4] == '1') && (ut.release[5] >= '0') && | |
612 | (ut.release[5] < '8')) && | |
c859cb1d | 613 | (ext2fs_stat(io->name, &st) == 0) && |
8880e759 TT |
614 | (S_ISBLK(st.st_mode))) { |
615 | struct rlimit rlim; | |
efc6f628 | 616 | |
64e1b274 | 617 | rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY; |
8880e759 TT |
618 | setrlimit(RLIMIT_FSIZE, &rlim); |
619 | getrlimit(RLIMIT_FSIZE, &rlim); | |
bd27880b TT |
620 | if (((unsigned long) rlim.rlim_cur) < |
621 | ((unsigned long) rlim.rlim_max)) { | |
8880e759 TT |
622 | rlim.rlim_cur = rlim.rlim_max; |
623 | setrlimit(RLIMIT_FSIZE, &rlim); | |
624 | } | |
625 | } | |
64e1b274 | 626 | #endif |
3839e657 TT |
627 | *channel = io; |
628 | return 0; | |
629 | ||
630 | cleanup: | |
3839e657 | 631 | if (data) { |
4e0bb5eb TT |
632 | if (data->dev >= 0) |
633 | close(data->dev); | |
54434927 | 634 | free_cache(data); |
c4e3d3f3 | 635 | ext2fs_free_mem(&data); |
3839e657 | 636 | } |
4e0bb5eb TT |
637 | if (io) { |
638 | if (io->name) { | |
639 | ext2fs_free_mem(&io->name); | |
640 | } | |
c4e3d3f3 | 641 | ext2fs_free_mem(&io); |
4e0bb5eb | 642 | } |
3839e657 TT |
643 | return retval; |
644 | } | |
645 | ||
646 | static errcode_t unix_close(io_channel channel) | |
647 | { | |
648 | struct unix_private_data *data; | |
649 | errcode_t retval = 0; | |
650 | ||
f3db3566 | 651 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); |
3839e657 | 652 | data = (struct unix_private_data *) channel->private_data; |
f3db3566 | 653 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); |
a29f4d30 TT |
654 | |
655 | if (--channel->refcount > 0) | |
656 | return 0; | |
adfc8c6c | 657 | |
b8a95315 | 658 | #ifndef NO_IO_CACHE |
adfc8c6c | 659 | retval = flush_cached_blocks(channel, data, 0); |
b8a95315 | 660 | #endif |
adfc8c6c | 661 | |
3839e657 TT |
662 | if (close(data->dev) < 0) |
663 | retval = errno; | |
54434927 | 664 | free_cache(data); |
f12e285f | 665 | |
c4e3d3f3 | 666 | ext2fs_free_mem(&channel->private_data); |
3839e657 | 667 | if (channel->name) |
c4e3d3f3 TT |
668 | ext2fs_free_mem(&channel->name); |
669 | ext2fs_free_mem(&channel); | |
3839e657 TT |
670 | return retval; |
671 | } | |
672 | ||
673 | static errcode_t unix_set_blksize(io_channel channel, int blksize) | |
674 | { | |
675 | struct unix_private_data *data; | |
7b4e4534 | 676 | errcode_t retval; |
3839e657 | 677 | |
f3db3566 | 678 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); |
3839e657 | 679 | data = (struct unix_private_data *) channel->private_data; |
f3db3566 TT |
680 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); |
681 | ||
3839e657 | 682 | if (channel->block_size != blksize) { |
b8a95315 | 683 | #ifndef NO_IO_CACHE |
adfc8c6c TT |
684 | if ((retval = flush_cached_blocks(channel, data, 0))) |
685 | return retval; | |
b8a95315 | 686 | #endif |
efc6f628 | 687 | |
3839e657 | 688 | channel->block_size = blksize; |
54434927 | 689 | free_cache(data); |
adfc8c6c | 690 | if ((retval = alloc_cache(channel, data))) |
7b4e4534 | 691 | return retval; |
3839e657 TT |
692 | } |
693 | return 0; | |
694 | } | |
695 | ||
696 | ||
59ecd32d | 697 | static errcode_t unix_read_blk64(io_channel channel, unsigned long long block, |
3839e657 TT |
698 | int count, void *buf) |
699 | { | |
700 | struct unix_private_data *data; | |
82c4660c | 701 | struct unix_cache *cache, *reuse[READ_DIRECT_SIZE]; |
3839e657 | 702 | errcode_t retval; |
31dbecd4 | 703 | char *cp; |
adfc8c6c | 704 | int i, j; |
3839e657 | 705 | |
f3db3566 | 706 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); |
3839e657 | 707 | data = (struct unix_private_data *) channel->private_data; |
f3db3566 | 708 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); |
3839e657 | 709 | |
b8a95315 TT |
710 | #ifdef NO_IO_CACHE |
711 | return raw_read_blk(channel, data, block, count, buf); | |
712 | #else | |
3839e657 | 713 | /* |
82c4660c TT |
714 | * If we're doing an odd-sized read or a very large read, |
715 | * flush out the cache and then do a direct read. | |
3839e657 | 716 | */ |
82c4660c | 717 | if (count < 0 || count > WRITE_DIRECT_SIZE) { |
adfc8c6c TT |
718 | if ((retval = flush_cached_blocks(channel, data, 0))) |
719 | return retval; | |
720 | return raw_read_blk(channel, data, block, count, buf); | |
3839e657 | 721 | } |
adfc8c6c | 722 | |
31dbecd4 | 723 | cp = buf; |
adfc8c6c TT |
724 | while (count > 0) { |
725 | /* If it's in the cache, use it! */ | |
54434927 | 726 | if ((cache = find_cached_block(data, block, &reuse[0]))) { |
adfc8c6c | 727 | #ifdef DEBUG |
d0ff90d5 | 728 | printf("Using cached block %lu\n", block); |
f3db3566 | 729 | #endif |
31dbecd4 | 730 | memcpy(cp, cache->buf, channel->block_size); |
adfc8c6c TT |
731 | count--; |
732 | block++; | |
31dbecd4 | 733 | cp += channel->block_size; |
adfc8c6c TT |
734 | continue; |
735 | } | |
7f1a1fbf TT |
736 | if (count == 1) { |
737 | /* | |
738 | * Special case where we read directly into the | |
739 | * cache buffer; important in the O_DIRECT case | |
740 | */ | |
741 | cache = reuse[0]; | |
742 | reuse_cache(channel, data, cache, block); | |
743 | if ((retval = raw_read_blk(channel, data, block, 1, | |
744 | cache->buf))) { | |
745 | cache->in_use = 0; | |
746 | return retval; | |
747 | } | |
748 | memcpy(cp, cache->buf, channel->block_size); | |
749 | return 0; | |
750 | } | |
751 | ||
adfc8c6c TT |
752 | /* |
753 | * Find the number of uncached blocks so we can do a | |
754 | * single read request | |
755 | */ | |
756 | for (i=1; i < count; i++) | |
54434927 | 757 | if (find_cached_block(data, block+i, &reuse[i])) |
adfc8c6c TT |
758 | break; |
759 | #ifdef DEBUG | |
d0ff90d5 | 760 | printf("Reading %d blocks starting at %lu\n", i, block); |
adfc8c6c | 761 | #endif |
31dbecd4 | 762 | if ((retval = raw_read_blk(channel, data, block, i, cp))) |
adfc8c6c | 763 | return retval; |
efc6f628 | 764 | |
adfc8c6c TT |
765 | /* Save the results in the cache */ |
766 | for (j=0; j < i; j++) { | |
767 | count--; | |
82c4660c TT |
768 | cache = reuse[j]; |
769 | reuse_cache(channel, data, cache, block++); | |
770 | memcpy(cache->buf, cp, channel->block_size); | |
31dbecd4 | 771 | cp += channel->block_size; |
adfc8c6c | 772 | } |
3839e657 TT |
773 | } |
774 | return 0; | |
b8a95315 | 775 | #endif /* NO_IO_CACHE */ |
3839e657 TT |
776 | } |
777 | ||
59ecd32d JS |
778 | static errcode_t unix_read_blk(io_channel channel, unsigned long block, |
779 | int count, void *buf) | |
780 | { | |
781 | return unix_read_blk64(channel, block, count, buf); | |
782 | } | |
783 | ||
784 | static errcode_t unix_write_blk64(io_channel channel, unsigned long long block, | |
3839e657 TT |
785 | int count, const void *buf) |
786 | { | |
787 | struct unix_private_data *data; | |
82c4660c | 788 | struct unix_cache *cache, *reuse; |
23b7c8b8 | 789 | errcode_t retval = 0; |
31dbecd4 TT |
790 | const char *cp; |
791 | int writethrough; | |
3839e657 | 792 | |
f3db3566 | 793 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); |
3839e657 | 794 | data = (struct unix_private_data *) channel->private_data; |
f3db3566 | 795 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); |
3839e657 | 796 | |
b8a95315 TT |
797 | #ifdef NO_IO_CACHE |
798 | return raw_write_blk(channel, data, block, count, buf); | |
efc6f628 | 799 | #else |
adfc8c6c TT |
800 | /* |
801 | * If we're doing an odd-sized write or a very large write, | |
802 | * flush out the cache completely and then do a direct write. | |
803 | */ | |
82c4660c | 804 | if (count < 0 || count > WRITE_DIRECT_SIZE) { |
adfc8c6c TT |
805 | if ((retval = flush_cached_blocks(channel, data, 1))) |
806 | return retval; | |
807 | return raw_write_blk(channel, data, block, count, buf); | |
3839e657 TT |
808 | } |
809 | ||
adfc8c6c TT |
810 | /* |
811 | * For a moderate-sized multi-block write, first force a write | |
812 | * if we're in write-through cache mode, and then fill the | |
813 | * cache with the blocks. | |
814 | */ | |
815 | writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH; | |
816 | if (writethrough) | |
817 | retval = raw_write_blk(channel, data, block, count, buf); | |
efc6f628 | 818 | |
31dbecd4 | 819 | cp = buf; |
adfc8c6c | 820 | while (count > 0) { |
54434927 | 821 | cache = find_cached_block(data, block, &reuse); |
adfc8c6c | 822 | if (!cache) { |
82c4660c TT |
823 | cache = reuse; |
824 | reuse_cache(channel, data, cache, block); | |
adfc8c6c | 825 | } |
8d5324c4 DW |
826 | if (cache->buf != cp) |
827 | memcpy(cache->buf, cp, channel->block_size); | |
82c4660c | 828 | cache->dirty = !writethrough; |
adfc8c6c TT |
829 | count--; |
830 | block++; | |
31dbecd4 | 831 | cp += channel->block_size; |
adfc8c6c | 832 | } |
3839e657 | 833 | return retval; |
b8a95315 | 834 | #endif /* NO_IO_CACHE */ |
3839e657 TT |
835 | } |
836 | ||
ca209dc6 AD |
837 | static errcode_t unix_cache_readahead(io_channel channel, |
838 | unsigned long long block, | |
839 | unsigned long long count) | |
840 | { | |
841 | #ifdef POSIX_FADV_WILLNEED | |
842 | struct unix_private_data *data; | |
843 | ||
844 | data = (struct unix_private_data *)channel->private_data; | |
845 | return posix_fadvise(data->dev, | |
846 | (ext2_loff_t)block * channel->block_size, | |
847 | (ext2_loff_t)count * channel->block_size, | |
848 | POSIX_FADV_WILLNEED); | |
849 | #else | |
850 | return EXT2_ET_OP_NOT_SUPPORTED; | |
851 | #endif | |
852 | } | |
853 | ||
59ecd32d JS |
854 | static errcode_t unix_write_blk(io_channel channel, unsigned long block, |
855 | int count, const void *buf) | |
856 | { | |
857 | return unix_write_blk64(channel, block, count, buf); | |
858 | } | |
859 | ||
c180ac86 TT |
860 | static errcode_t unix_write_byte(io_channel channel, unsigned long offset, |
861 | int size, const void *buf) | |
862 | { | |
863 | struct unix_private_data *data; | |
31dbecd4 | 864 | errcode_t retval = 0; |
54434927 | 865 | ssize_t actual; |
c180ac86 TT |
866 | |
867 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); | |
868 | data = (struct unix_private_data *) channel->private_data; | |
869 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); | |
870 | ||
0a05b903 | 871 | if (channel->align != 0) { |
7f1a1fbf TT |
872 | #ifdef ALIGN_DEBUG |
873 | printf("unix_write_byte: O_DIRECT fallback\n"); | |
874 | #endif | |
875 | return EXT2_ET_UNIMPLEMENTED; | |
876 | } | |
877 | ||
b8a95315 | 878 | #ifndef NO_IO_CACHE |
c180ac86 TT |
879 | /* |
880 | * Flush out the cache completely | |
881 | */ | |
882 | if ((retval = flush_cached_blocks(channel, data, 1))) | |
883 | return retval; | |
b8a95315 | 884 | #endif |
c180ac86 | 885 | |
2e8ca9a2 | 886 | if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0) |
c180ac86 | 887 | return errno; |
efc6f628 | 888 | |
c180ac86 TT |
889 | actual = write(data->dev, buf, size); |
890 | if (actual != size) | |
891 | return EXT2_ET_SHORT_WRITE; | |
892 | ||
893 | return 0; | |
894 | } | |
895 | ||
3839e657 | 896 | /* |
efc6f628 | 897 | * Flush data buffers to disk. |
3839e657 TT |
898 | */ |
899 | static errcode_t unix_flush(io_channel channel) | |
900 | { | |
f3db3566 | 901 | struct unix_private_data *data; |
adfc8c6c | 902 | errcode_t retval = 0; |
efc6f628 | 903 | |
f3db3566 TT |
904 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); |
905 | data = (struct unix_private_data *) channel->private_data; | |
906 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); | |
adfc8c6c | 907 | |
b8a95315 | 908 | #ifndef NO_IO_CACHE |
adfc8c6c | 909 | retval = flush_cached_blocks(channel, data, 0); |
b8a95315 | 910 | #endif |
36f21439 | 911 | fsync(data->dev); |
adfc8c6c | 912 | return retval; |
3839e657 TT |
913 | } |
914 | ||
efc6f628 | 915 | static errcode_t unix_set_option(io_channel channel, const char *option, |
2e8ca9a2 TT |
916 | const char *arg) |
917 | { | |
918 | struct unix_private_data *data; | |
2aee23f3 | 919 | unsigned long long tmp; |
2e8ca9a2 TT |
920 | char *end; |
921 | ||
922 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); | |
923 | data = (struct unix_private_data *) channel->private_data; | |
924 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); | |
925 | ||
926 | if (!strcmp(option, "offset")) { | |
927 | if (!arg) | |
928 | return EXT2_ET_INVALID_ARGUMENT; | |
929 | ||
2aee23f3 | 930 | tmp = strtoull(arg, &end, 0); |
2e8ca9a2 TT |
931 | if (*end) |
932 | return EXT2_ET_INVALID_ARGUMENT; | |
933 | data->offset = tmp; | |
2aee23f3 TT |
934 | if (data->offset < 0) |
935 | return EXT2_ET_INVALID_ARGUMENT; | |
2e8ca9a2 TT |
936 | return 0; |
937 | } | |
938 | return EXT2_ET_INVALID_ARGUMENT; | |
939 | } | |
e90a59ed LC |
940 | |
941 | #if defined(__linux__) && !defined(BLKDISCARD) | |
d2bfdc7f | 942 | #define BLKDISCARD _IO(0x12,119) |
e90a59ed LC |
943 | #endif |
944 | ||
945 | static errcode_t unix_discard(io_channel channel, unsigned long long block, | |
946 | unsigned long long count) | |
947 | { | |
e90a59ed | 948 | struct unix_private_data *data; |
e90a59ed LC |
949 | int ret; |
950 | ||
951 | EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); | |
952 | data = (struct unix_private_data *) channel->private_data; | |
953 | EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); | |
954 | ||
d2bfdc7f LC |
955 | if (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE) { |
956 | #ifdef BLKDISCARD | |
795c02de | 957 | __u64 range[2]; |
1d6fd6d0 | 958 | |
795c02de TT |
959 | range[0] = (__u64)(block) * channel->block_size; |
960 | range[1] = (__u64)(count) * channel->block_size; | |
e90a59ed | 961 | |
d2bfdc7f LC |
962 | ret = ioctl(data->dev, BLKDISCARD, &range); |
963 | #else | |
964 | goto unimplemented; | |
965 | #endif | |
966 | } else { | |
800766ee | 967 | #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) |
d2bfdc7f LC |
968 | /* |
969 | * If we are not on block device, try to use punch hole | |
970 | * to reclaim free space. | |
971 | */ | |
972 | ret = fallocate(data->dev, | |
973 | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, | |
974 | (off_t)(block) * channel->block_size, | |
975 | (off_t)(count) * channel->block_size); | |
976 | #else | |
977 | goto unimplemented; | |
978 | #endif | |
979 | } | |
980 | if (ret < 0) { | |
981 | if (errno == EOPNOTSUPP) | |
982 | goto unimplemented; | |
e90a59ed | 983 | return errno; |
d2bfdc7f | 984 | } |
e90a59ed | 985 | return 0; |
d2bfdc7f | 986 | unimplemented: |
e90a59ed | 987 | return EXT2_ET_UNIMPLEMENTED; |
e90a59ed | 988 | } |
a4613d13 AD |
989 | |
990 | static struct struct_io_manager struct_unix_manager = { | |
d4ecec45 TT |
991 | .magic = EXT2_ET_MAGIC_IO_MANAGER, |
992 | .name = "Unix I/O Manager", | |
993 | .open = unix_open, | |
994 | .close = unix_close, | |
995 | .set_blksize = unix_set_blksize, | |
996 | .read_blk = unix_read_blk, | |
997 | .write_blk = unix_write_blk, | |
998 | .flush = unix_flush, | |
999 | .write_byte = unix_write_byte, | |
1000 | .set_option = unix_set_option, | |
1001 | .get_stats = unix_get_stats, | |
1002 | .read_blk64 = unix_read_blk64, | |
1003 | .write_blk64 = unix_write_blk64, | |
1004 | .discard = unix_discard, | |
ca209dc6 | 1005 | .cache_readahead = unix_cache_readahead, |
a4613d13 AD |
1006 | }; |
1007 | ||
1008 | io_manager unix_io_manager = &struct_unix_manager; |