]> git.ipfire.org Git - thirdparty/glibc.git/blob - db2/mp/mp_fget.c
Update.
[thirdparty/glibc.git] / db2 / mp / mp_fget.c
1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 1997
5 * Sleepycat Software. All rights reserved.
6 */
7 #include "config.h"
8
9 #ifndef lint
10 static const char sccsid[] = "@(#)mp_fget.c 10.33 (Sleepycat) 12/2/97";
11 #endif /* not lint */
12
13 #ifndef NO_SYSTEM_INCLUDES
14 #include <sys/types.h>
15 #include <sys/stat.h>
16
17 #include <errno.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #endif
21
22 #include "db_int.h"
23 #include "shqueue.h"
24 #include "db_shash.h"
25 #include "mp.h"
26 #include "common_ext.h"
27
28 int __sleep_on_every_page_get; /* XXX: thread debugging option. */
29
30 /*
31 * memp_fget --
32 * Get a page from the file.
33 */
34 int
35 memp_fget(dbmfp, pgnoaddr, flags, addrp)
36 DB_MPOOLFILE *dbmfp;
37 db_pgno_t *pgnoaddr;
38 int flags;
39 void *addrp;
40 {
41 BH *bhp;
42 DB_MPOOL *dbmp;
43 MPOOL *mp;
44 MPOOLFILE *mfp;
45 size_t bucket, mf_offset;
46 u_long cnt;
47 int b_incr, b_inserted, readonly_alloc, ret;
48 void *addr;
49
50 dbmp = dbmfp->dbmp;
51
52 /*
53 * Validate arguments.
54 *
55 * !!!
56 * Don't test for DB_MPOOL_CREATE and DB_MPOOL_NEW flags for readonly
57 * files here, and create non-existent pages in readonly files if the
58 * flags are set, later. The reason is that the hash access method
59 * wants to get empty pages that don't really exist in readonly files.
60 * The only alternative is for hash to write the last "bucket" all the
61 * time, which we don't want to do because one of our big goals in life
62 * is to keep database files small. It's sleazy as hell, but we catch
63 * any attempt to actually write the file in memp_fput().
64 */
65 #define OKFLAGS (DB_MPOOL_CREATE | DB_MPOOL_LAST | DB_MPOOL_NEW)
66 if (flags != 0) {
67 if ((ret =
68 __db_fchk(dbmp->dbenv, "memp_fget", flags, OKFLAGS)) != 0)
69 return (ret);
70
71 switch (flags) {
72 case DB_MPOOL_CREATE:
73 case DB_MPOOL_LAST:
74 case DB_MPOOL_NEW:
75 case 0:
76 break;
77 default:
78 return (__db_ferr(dbmp->dbenv, "memp_fget", 1));
79 }
80 }
81
82 #ifdef DEBUG
83 /*
84 * XXX
85 * We want to switch threads as often as possible. Sleep every time
86 * we get a new page to make it more likely.
87 */
88 if (__sleep_on_every_page_get &&
89 (__db_yield == NULL || __db_yield() != 0))
90 __db_sleep(0, 1);
91 #endif
92
93 mp = dbmp->mp;
94 mfp = dbmfp->mfp;
95 mf_offset = R_OFFSET(dbmp, mfp);
96 addr = NULL;
97 bhp = NULL;
98 b_incr = b_inserted = ret = 0;
99
100 LOCKREGION(dbmp);
101
102 /*
103 * If mmap'ing the file, just return a pointer. However, if another
104 * process has opened the file for writing since we mmap'd it, start
105 * playing the game by their rules, i.e. everything goes through the
106 * cache. All pages previously returned should be safe, as long as
107 * a locking protocol was observed.
108 *
109 * XXX
110 * We don't discard the map because we don't know when all of the
111 * pages will have been discarded from the process' address space.
112 * It would be possible to do so by reference counting the open
113 * pages from the mmap, but it's unclear to me that it's worth it.
114 */
115 if (dbmfp->addr != NULL && F_ISSET(dbmfp->mfp, MP_CAN_MMAP)) {
116 readonly_alloc = 0;
117 if (LF_ISSET(DB_MPOOL_LAST))
118 *pgnoaddr = mfp->last_pgno;
119 else {
120 /*
121 * !!!
122 * Allocate a page that can never really exist. See
123 * the comment above about non-existent pages and the
124 * hash access method.
125 */
126 if (LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW))
127 readonly_alloc = 1;
128 else if (*pgnoaddr > mfp->last_pgno) {
129 __db_err(dbmp->dbenv,
130 "%s: page %lu doesn't exist",
131 __memp_fn(dbmfp), (u_long)*pgnoaddr);
132 ret = EINVAL;
133 goto err;
134 }
135 }
136 if (!readonly_alloc) {
137 addr = R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize);
138
139 ++mp->stat.st_map;
140 ++mfp->stat.st_map;
141
142 goto mapret;
143 }
144 }
145
146 /* Check if requesting the last page or a new page. */
147 if (LF_ISSET(DB_MPOOL_LAST))
148 *pgnoaddr = mfp->last_pgno;
149
150 if (LF_ISSET(DB_MPOOL_NEW)) {
151 *pgnoaddr = mfp->last_pgno + 1;
152 goto alloc;
153 }
154
155 /* Check the BH hash bucket queue. */
156 bucket = BUCKET(mp, mf_offset, *pgnoaddr);
157 for (cnt = 0,
158 bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
159 bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
160 ++cnt;
161 if (bhp->pgno == *pgnoaddr && bhp->mf_offset == mf_offset) {
162 addr = bhp->buf;
163 ++mp->stat.st_hash_searches;
164 if (cnt > mp->stat.st_hash_longest)
165 mp->stat.st_hash_longest = cnt;
166 mp->stat.st_hash_examined += cnt;
167 goto found;
168 }
169 }
170 if (cnt != 0) {
171 ++mp->stat.st_hash_searches;
172 if (cnt > mp->stat.st_hash_longest)
173 mp->stat.st_hash_longest = cnt;
174 mp->stat.st_hash_examined += cnt;
175 }
176
177 alloc: /*
178 * Allocate a new buffer header and data space, and mark the contents
179 * as useless.
180 */
181 if ((ret = __memp_ralloc(dbmp, sizeof(BH) -
182 sizeof(u_int8_t) + mfp->stat.st_pagesize, NULL, &bhp)) != 0)
183 goto err;
184 addr = bhp->buf;
185 #ifdef DEBUG
186 if ((ALIGNTYPE)addr & (sizeof(size_t) - 1)) {
187 __db_err(dbmp->dbenv,
188 "Internal error: BH data NOT size_t aligned.");
189 abort();
190 }
191 #endif
192 memset(bhp, 0, sizeof(BH));
193 LOCKINIT(dbmp, &bhp->mutex);
194
195 /*
196 * Prepend the bucket header to the head of the appropriate MPOOL
197 * bucket hash list. Append the bucket header to the tail of the
198 * MPOOL LRU chain.
199 *
200 * We have to do this before we read in the page so we can discard
201 * our region lock without screwing up the world.
202 */
203 bucket = BUCKET(mp, mf_offset, *pgnoaddr);
204 SH_TAILQ_INSERT_HEAD(&dbmp->htab[bucket], bhp, hq, __bh);
205 SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
206 ++mp->stat.st_page_clean;
207 b_inserted = 1;
208
209 /* Set the page number, and associated MPOOLFILE. */
210 bhp->mf_offset = mf_offset;
211 bhp->pgno = *pgnoaddr;
212
213 /*
214 * If we know we created the page, zero it out and continue.
215 *
216 * !!!
217 * Note: DB_MPOOL_NEW deliberately doesn't call the pgin function.
218 * If DB_MPOOL_CREATE is used, then the application's pgin function
219 * has to be able to handle pages of 0's -- if it uses DB_MPOOL_NEW,
220 * it can detect all of its page creates, and not bother.
221 *
222 * Otherwise, read the page into memory, optionally creating it if
223 * DB_MPOOL_CREATE is set.
224 *
225 * Increment the reference count for created buffers, but importantly,
226 * increment the reference count for buffers we're about to read so
227 * that the buffer can't move.
228 */
229 ++bhp->ref;
230 b_incr = 1;
231
232 if (LF_ISSET(DB_MPOOL_NEW))
233 memset(addr, 0, mfp->stat.st_pagesize);
234 else {
235 /*
236 * It's possible for the read function to fail, which means
237 * that we fail as well.
238 */
239 reread: if ((ret = __memp_pgread(dbmfp,
240 bhp, LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW))) != 0)
241 goto err;
242
243 /*
244 * !!!
245 * The __memp_pgread call discarded and reacquired the region
246 * lock. Because the buffer reference count was incremented
247 * before the region lock was discarded the buffer can't move
248 * and its contents can't change.
249 */
250 ++mp->stat.st_cache_miss;
251 ++mfp->stat.st_cache_miss;
252 }
253
254 if (0) {
255 found: /* Increment the reference count. */
256 if (bhp->ref == UINT16_T_MAX) {
257 __db_err(dbmp->dbenv,
258 "%s: too many references to page %lu",
259 __memp_fn(dbmfp), bhp->pgno);
260 ret = EINVAL;
261 goto err;
262 }
263 ++bhp->ref;
264 b_incr = 1;
265
266 /*
267 * Any found buffer might be trouble.
268 *
269 * BH_LOCKED --
270 * I/O in progress, wait for it to finish. Because the buffer
271 * reference count was incremented before the region lock was
272 * discarded we know the buffer can't move and its contents
273 * can't change.
274 */
275 for (cnt = 0; F_ISSET(bhp, BH_LOCKED); ++cnt) {
276 UNLOCKREGION(dbmp);
277
278 /*
279 * Sleep so that we don't simply spin, switching locks.
280 * (See the comment in include/mp.h.)
281 */
282 if (cnt != 0 &&
283 (__db_yield == NULL || __db_yield() != 0))
284 __db_sleep(0, 1);
285
286 LOCKBUFFER(dbmp, bhp);
287 /* Waiting for I/O to finish... */
288 UNLOCKBUFFER(dbmp, bhp);
289 LOCKREGION(dbmp);
290 }
291
292 /*
293 * BH_TRASH --
294 * The buffer is garbage.
295 */
296 if (F_ISSET(bhp, BH_TRASH))
297 goto reread;
298
299 /*
300 * BH_CALLPGIN --
301 * The buffer was written, and the contents need to be
302 * converted again.
303 */
304 if (F_ISSET(bhp, BH_CALLPGIN)) {
305 if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0)
306 goto err;
307 F_CLR(bhp, BH_CALLPGIN);
308 }
309
310 ++mp->stat.st_cache_hit;
311 ++mfp->stat.st_cache_hit;
312 }
313
314 /*
315 * If we're returning a page after our current notion of the last-page,
316 * update our information. Note, there's no way to un-instantiate this
317 * page, it's going to exist whether it's returned to us dirty or not.
318 */
319 if (bhp->pgno > mfp->last_pgno)
320 mfp->last_pgno = bhp->pgno;
321
322 mapret: LOCKHANDLE(dbmp, dbmfp->mutexp);
323 ++dbmfp->pinref;
324 UNLOCKHANDLE(dbmp, dbmfp->mutexp);
325
326 if (0) {
327 err: /*
328 * If no other process is already waiting on a created buffer,
329 * go ahead and discard it, it's not useful.
330 */
331 if (b_incr)
332 --bhp->ref;
333 if (b_inserted && bhp->ref == 0)
334 __memp_bhfree(dbmp, mfp, bhp, 1);
335 }
336
337 UNLOCKREGION(dbmp);
338
339 *(void **)addrp = addr;
340 return (ret);
341 }