]>
Commit | Line | Data |
---|---|---|
7a938933 ILT |
1 | // Copyright 2009 The Go Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style | |
3 | // license that can be found in the LICENSE file. | |
4 | ||
5 | // Malloc profiling. | |
6 | // Patterned after tcmalloc's algorithms; shorter code. | |
7 | ||
8 | package runtime | |
9 | #include "runtime.h" | |
10 | #include "malloc.h" | |
11 | #include "defs.h" | |
12 | #include "go-type.h" | |
13 | ||
14 | typedef struct __go_open_array Slice; | |
15 | ||
16 | // NOTE(rsc): Everything here could use cas if contention became an issue. | |
a4ad1c7a | 17 | static Lock proflock; |
7a938933 ILT |
18 | |
19 | // Per-call-stack allocation information. | |
20 | // Lookup by hashing call stack into a linked-list hash table. | |
21 | typedef struct Bucket Bucket; | |
22 | struct Bucket | |
23 | { | |
24 | Bucket *next; // next in hash list | |
25 | Bucket *allnext; // next in list of all buckets | |
26 | uintptr allocs; | |
27 | uintptr frees; | |
28 | uintptr alloc_bytes; | |
29 | uintptr free_bytes; | |
30 | uintptr hash; | |
31 | uintptr nstk; | |
32 | uintptr stk[1]; | |
33 | }; | |
34 | enum { | |
35 | BuckHashSize = 179999, | |
36 | }; | |
37 | static Bucket **buckhash; | |
38 | static Bucket *buckets; | |
39 | static uintptr bucketmem; | |
40 | ||
41 | // Return the bucket for stk[0:nstk], allocating new bucket if needed. | |
42 | static Bucket* | |
43 | stkbucket(uintptr *stk, int32 nstk) | |
44 | { | |
45 | int32 i; | |
46 | uintptr h; | |
47 | Bucket *b; | |
48 | ||
49 | if(buckhash == nil) { | |
50 | buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]); | |
51 | mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0]; | |
52 | } | |
53 | ||
54 | // Hash stack. | |
55 | h = 0; | |
56 | for(i=0; i<nstk; i++) { | |
57 | h += stk[i]; | |
58 | h += h<<10; | |
59 | h ^= h>>6; | |
60 | } | |
61 | h += h<<3; | |
62 | h ^= h>>11; | |
63 | ||
64 | i = h%BuckHashSize; | |
65 | for(b = buckhash[i]; b; b=b->next) | |
66 | if(b->hash == h && b->nstk == (uintptr)nstk && | |
67 | runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0) | |
68 | return b; | |
69 | ||
70 | b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], RefNoProfiling, 0, 1); | |
71 | bucketmem += sizeof *b + nstk*sizeof stk[0]; | |
72 | runtime_memmove(b->stk, stk, nstk*sizeof stk[0]); | |
73 | b->hash = h; | |
74 | b->nstk = nstk; | |
75 | b->next = buckhash[i]; | |
76 | buckhash[i] = b; | |
77 | b->allnext = buckets; | |
78 | buckets = b; | |
79 | return b; | |
80 | } | |
81 | ||
82 | // Map from pointer to Bucket* that allocated it. | |
83 | // Three levels: | |
84 | // Linked-list hash table for top N-20 bits. | |
85 | // Array index for next 13 bits. | |
86 | // Linked list for next 7 bits. | |
87 | // This is more efficient than using a general map, | |
88 | // because of the typical clustering of the pointer keys. | |
89 | ||
90 | typedef struct AddrHash AddrHash; | |
91 | typedef struct AddrEntry AddrEntry; | |
92 | ||
93 | struct AddrHash | |
94 | { | |
95 | AddrHash *next; // next in top-level hash table linked list | |
96 | uintptr addr; // addr>>20 | |
97 | AddrEntry *dense[1<<13]; | |
98 | }; | |
99 | ||
100 | struct AddrEntry | |
101 | { | |
102 | AddrEntry *next; // next in bottom-level linked list | |
103 | uint32 addr; | |
104 | Bucket *b; | |
105 | }; | |
106 | ||
107 | enum { | |
108 | AddrHashBits = 12 // 1MB per entry, so good for 4GB of used address space | |
109 | }; | |
110 | static AddrHash *addrhash[1<<AddrHashBits]; | |
111 | static AddrEntry *addrfree; | |
112 | static uintptr addrmem; | |
113 | ||
114 | // Multiplicative hash function: | |
115 | // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)). | |
116 | // This is a good multiplier as suggested in CLR, Knuth. The hash | |
117 | // value is taken to be the top AddrHashBits bits of the bottom 32 bits | |
118 | // of the muliplied value. | |
119 | enum { | |
120 | HashMultiplier = 2654435769U | |
121 | }; | |
122 | ||
123 | // Set the bucket associated with addr to b. | |
124 | static void | |
125 | setaddrbucket(uintptr addr, Bucket *b) | |
126 | { | |
127 | int32 i; | |
128 | uint32 h; | |
129 | AddrHash *ah; | |
130 | AddrEntry *e; | |
131 | ||
132 | h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits); | |
133 | for(ah=addrhash[h]; ah; ah=ah->next) | |
134 | if(ah->addr == (addr>>20)) | |
135 | goto found; | |
136 | ||
137 | ah = runtime_mallocgc(sizeof *ah, RefNoProfiling, 0, 1); | |
138 | addrmem += sizeof *ah; | |
139 | ah->next = addrhash[h]; | |
140 | ah->addr = addr>>20; | |
141 | addrhash[h] = ah; | |
142 | ||
143 | found: | |
144 | if((e = addrfree) == nil) { | |
145 | e = runtime_mallocgc(64*sizeof *e, RefNoProfiling, 0, 0); | |
146 | addrmem += 64*sizeof *e; | |
147 | for(i=0; i+1<64; i++) | |
148 | e[i].next = &e[i+1]; | |
149 | e[63].next = nil; | |
150 | } | |
151 | addrfree = e->next; | |
152 | e->addr = (uint32)~(addr & ((1<<20)-1)); | |
153 | e->b = b; | |
154 | h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20. | |
155 | e->next = ah->dense[h]; | |
156 | ah->dense[h] = e; | |
157 | } | |
158 | ||
159 | // Get the bucket associated with addr and clear the association. | |
160 | static Bucket* | |
161 | getaddrbucket(uintptr addr) | |
162 | { | |
163 | uint32 h; | |
164 | AddrHash *ah; | |
165 | AddrEntry *e, **l; | |
166 | Bucket *b; | |
167 | ||
168 | h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits); | |
169 | for(ah=addrhash[h]; ah; ah=ah->next) | |
170 | if(ah->addr == (addr>>20)) | |
171 | goto found; | |
172 | return nil; | |
173 | ||
174 | found: | |
175 | h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20. | |
176 | for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) { | |
177 | if(e->addr == (uint32)~(addr & ((1<<20)-1))) { | |
178 | *l = e->next; | |
179 | b = e->b; | |
180 | e->next = addrfree; | |
181 | addrfree = e; | |
182 | return b; | |
183 | } | |
184 | } | |
185 | return nil; | |
186 | } | |
187 | ||
a4ad1c7a ILT |
188 | void |
189 | runtime_Mprof_Init() | |
190 | { | |
191 | runtime_initlock(&proflock); | |
192 | } | |
193 | ||
7a938933 ILT |
194 | // Called by malloc to record a profiled block. |
195 | void | |
196 | runtime_MProf_Malloc(void *p, uintptr size) | |
197 | { | |
198 | int32 nstk; | |
199 | uintptr stk[32]; | |
200 | Bucket *b; | |
201 | ||
202 | if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1)) | |
203 | return; | |
204 | #if 0 | |
205 | nstk = runtime_callers(1, stk, 32); | |
206 | #else | |
207 | nstk = 0; | |
208 | #endif | |
209 | runtime_lock(&proflock); | |
210 | b = stkbucket(stk, nstk); | |
211 | b->allocs++; | |
212 | b->alloc_bytes += size; | |
213 | setaddrbucket((uintptr)p, b); | |
214 | runtime_unlock(&proflock); | |
215 | __sync_bool_compare_and_swap(&m->nomemprof, 1, 0); | |
216 | ||
217 | if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0)) | |
218 | __go_run_goroutine_gc(100); | |
219 | } | |
220 | ||
221 | // Called when freeing a profiled block. | |
222 | void | |
223 | runtime_MProf_Free(void *p, uintptr size) | |
224 | { | |
225 | Bucket *b; | |
226 | ||
227 | if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1)) | |
228 | return; | |
229 | ||
230 | runtime_lock(&proflock); | |
231 | b = getaddrbucket((uintptr)p); | |
232 | if(b != nil) { | |
233 | b->frees++; | |
234 | b->free_bytes += size; | |
235 | } | |
236 | runtime_unlock(&proflock); | |
237 | __sync_bool_compare_and_swap(&m->nomemprof, 1, 0); | |
238 | ||
239 | if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0)) | |
240 | __go_run_goroutine_gc(101); | |
241 | } | |
242 | ||
243 | ||
244 | // Go interface to profile data. (Declared in extern.go) | |
245 | // Assumes Go sizeof(int) == sizeof(int32) | |
246 | ||
247 | // Must match MemProfileRecord in extern.go. | |
248 | typedef struct Record Record; | |
249 | struct Record { | |
250 | int64 alloc_bytes, free_bytes; | |
251 | int64 alloc_objects, free_objects; | |
252 | uintptr stk[32]; | |
253 | }; | |
254 | ||
255 | // Write b's data to r. | |
256 | static void | |
257 | record(Record *r, Bucket *b) | |
258 | { | |
259 | uint32 i; | |
260 | ||
261 | r->alloc_bytes = b->alloc_bytes; | |
262 | r->free_bytes = b->free_bytes; | |
263 | r->alloc_objects = b->allocs; | |
264 | r->free_objects = b->frees; | |
265 | for(i=0; i<b->nstk && i<nelem(r->stk); i++) | |
266 | r->stk[i] = b->stk[i]; | |
267 | for(; i<nelem(r->stk); i++) | |
268 | r->stk[i] = 0; | |
269 | } | |
270 | ||
271 | func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) { | |
272 | Bucket *b; | |
273 | Record *r; | |
274 | ||
275 | __sync_bool_compare_and_swap(&m->nomemprof, 0, 1); | |
276 | ||
277 | runtime_lock(&proflock); | |
278 | n = 0; | |
279 | for(b=buckets; b; b=b->allnext) | |
280 | if(include_inuse_zero || b->alloc_bytes != b->free_bytes) | |
281 | n++; | |
282 | ok = false; | |
283 | if(n <= p.__count) { | |
284 | ok = true; | |
285 | r = (Record*)p.__values; | |
286 | for(b=buckets; b; b=b->allnext) | |
287 | if(include_inuse_zero || b->alloc_bytes != b->free_bytes) | |
288 | record(r++, b); | |
289 | } | |
290 | runtime_unlock(&proflock); | |
291 | ||
292 | __sync_bool_compare_and_swap(&m->nomemprof, 1, 0); | |
293 | ||
294 | if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0)) | |
295 | __go_run_goroutine_gc(102); | |
296 | } | |
297 | ||
298 | void | |
299 | runtime_MProf_Mark(void (*scan)(byte *, int64)) | |
300 | { | |
301 | // buckhash is not allocated via mallocgc. | |
302 | scan((byte*)&buckets, sizeof buckets); | |
303 | scan((byte*)&addrhash, sizeof addrhash); | |
304 | scan((byte*)&addrfree, sizeof addrfree); | |
305 | } |