]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgo/runtime/mprof.goc
Rework locking code to split stack much less.
[thirdparty/gcc.git] / libgo / runtime / mprof.goc
CommitLineData
7a938933
ILT
1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Malloc profiling.
6// Patterned after tcmalloc's algorithms; shorter code.
7
8package runtime
9#include "runtime.h"
10#include "malloc.h"
11#include "defs.h"
12#include "go-type.h"
13
14typedef struct __go_open_array Slice;
15
16// NOTE(rsc): Everything here could use cas if contention became an issue.
a4ad1c7a 17static Lock proflock;
7a938933
ILT
18
19// Per-call-stack allocation information.
20// Lookup by hashing call stack into a linked-list hash table.
21typedef struct Bucket Bucket;
22struct Bucket
23{
24 Bucket *next; // next in hash list
25 Bucket *allnext; // next in list of all buckets
26 uintptr allocs;
27 uintptr frees;
28 uintptr alloc_bytes;
29 uintptr free_bytes;
30 uintptr hash;
31 uintptr nstk;
32 uintptr stk[1];
33};
34enum {
35 BuckHashSize = 179999,
36};
37static Bucket **buckhash;
38static Bucket *buckets;
39static uintptr bucketmem;
40
41// Return the bucket for stk[0:nstk], allocating new bucket if needed.
42static Bucket*
43stkbucket(uintptr *stk, int32 nstk)
44{
45 int32 i;
46 uintptr h;
47 Bucket *b;
48
49 if(buckhash == nil) {
50 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]);
51 mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
52 }
53
54 // Hash stack.
55 h = 0;
56 for(i=0; i<nstk; i++) {
57 h += stk[i];
58 h += h<<10;
59 h ^= h>>6;
60 }
61 h += h<<3;
62 h ^= h>>11;
63
64 i = h%BuckHashSize;
65 for(b = buckhash[i]; b; b=b->next)
66 if(b->hash == h && b->nstk == (uintptr)nstk &&
67 runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
68 return b;
69
70 b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], RefNoProfiling, 0, 1);
71 bucketmem += sizeof *b + nstk*sizeof stk[0];
72 runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
73 b->hash = h;
74 b->nstk = nstk;
75 b->next = buckhash[i];
76 buckhash[i] = b;
77 b->allnext = buckets;
78 buckets = b;
79 return b;
80}
81
82// Map from pointer to Bucket* that allocated it.
83// Three levels:
84// Linked-list hash table for top N-20 bits.
85// Array index for next 13 bits.
86// Linked list for next 7 bits.
87// This is more efficient than using a general map,
88// because of the typical clustering of the pointer keys.
89
90typedef struct AddrHash AddrHash;
91typedef struct AddrEntry AddrEntry;
92
93struct AddrHash
94{
95 AddrHash *next; // next in top-level hash table linked list
96 uintptr addr; // addr>>20
97 AddrEntry *dense[1<<13];
98};
99
100struct AddrEntry
101{
102 AddrEntry *next; // next in bottom-level linked list
103 uint32 addr;
104 Bucket *b;
105};
106
107enum {
108 AddrHashBits = 12 // 1MB per entry, so good for 4GB of used address space
109};
110static AddrHash *addrhash[1<<AddrHashBits];
111static AddrEntry *addrfree;
112static uintptr addrmem;
113
114// Multiplicative hash function:
115// hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
116// This is a good multiplier as suggested in CLR, Knuth. The hash
117// value is taken to be the top AddrHashBits bits of the bottom 32 bits
118// of the muliplied value.
119enum {
120 HashMultiplier = 2654435769U
121};
122
123// Set the bucket associated with addr to b.
124static void
125setaddrbucket(uintptr addr, Bucket *b)
126{
127 int32 i;
128 uint32 h;
129 AddrHash *ah;
130 AddrEntry *e;
131
132 h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
133 for(ah=addrhash[h]; ah; ah=ah->next)
134 if(ah->addr == (addr>>20))
135 goto found;
136
137 ah = runtime_mallocgc(sizeof *ah, RefNoProfiling, 0, 1);
138 addrmem += sizeof *ah;
139 ah->next = addrhash[h];
140 ah->addr = addr>>20;
141 addrhash[h] = ah;
142
143found:
144 if((e = addrfree) == nil) {
145 e = runtime_mallocgc(64*sizeof *e, RefNoProfiling, 0, 0);
146 addrmem += 64*sizeof *e;
147 for(i=0; i+1<64; i++)
148 e[i].next = &e[i+1];
149 e[63].next = nil;
150 }
151 addrfree = e->next;
152 e->addr = (uint32)~(addr & ((1<<20)-1));
153 e->b = b;
154 h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20.
155 e->next = ah->dense[h];
156 ah->dense[h] = e;
157}
158
159// Get the bucket associated with addr and clear the association.
160static Bucket*
161getaddrbucket(uintptr addr)
162{
163 uint32 h;
164 AddrHash *ah;
165 AddrEntry *e, **l;
166 Bucket *b;
167
168 h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
169 for(ah=addrhash[h]; ah; ah=ah->next)
170 if(ah->addr == (addr>>20))
171 goto found;
172 return nil;
173
174found:
175 h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20.
176 for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
177 if(e->addr == (uint32)~(addr & ((1<<20)-1))) {
178 *l = e->next;
179 b = e->b;
180 e->next = addrfree;
181 addrfree = e;
182 return b;
183 }
184 }
185 return nil;
186}
187
a4ad1c7a
ILT
188void
189runtime_Mprof_Init()
190{
191 runtime_initlock(&proflock);
192}
193
7a938933
ILT
194// Called by malloc to record a profiled block.
195void
196runtime_MProf_Malloc(void *p, uintptr size)
197{
198 int32 nstk;
199 uintptr stk[32];
200 Bucket *b;
201
202 if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1))
203 return;
204#if 0
205 nstk = runtime_callers(1, stk, 32);
206#else
207 nstk = 0;
208#endif
209 runtime_lock(&proflock);
210 b = stkbucket(stk, nstk);
211 b->allocs++;
212 b->alloc_bytes += size;
213 setaddrbucket((uintptr)p, b);
214 runtime_unlock(&proflock);
215 __sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
216
217 if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
218 __go_run_goroutine_gc(100);
219}
220
221// Called when freeing a profiled block.
222void
223runtime_MProf_Free(void *p, uintptr size)
224{
225 Bucket *b;
226
227 if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1))
228 return;
229
230 runtime_lock(&proflock);
231 b = getaddrbucket((uintptr)p);
232 if(b != nil) {
233 b->frees++;
234 b->free_bytes += size;
235 }
236 runtime_unlock(&proflock);
237 __sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
238
239 if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
240 __go_run_goroutine_gc(101);
241}
242
243
244// Go interface to profile data. (Declared in extern.go)
245// Assumes Go sizeof(int) == sizeof(int32)
246
247// Must match MemProfileRecord in extern.go.
248typedef struct Record Record;
249struct Record {
250 int64 alloc_bytes, free_bytes;
251 int64 alloc_objects, free_objects;
252 uintptr stk[32];
253};
254
255// Write b's data to r.
256static void
257record(Record *r, Bucket *b)
258{
259 uint32 i;
260
261 r->alloc_bytes = b->alloc_bytes;
262 r->free_bytes = b->free_bytes;
263 r->alloc_objects = b->allocs;
264 r->free_objects = b->frees;
265 for(i=0; i<b->nstk && i<nelem(r->stk); i++)
266 r->stk[i] = b->stk[i];
267 for(; i<nelem(r->stk); i++)
268 r->stk[i] = 0;
269}
270
271func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) {
272 Bucket *b;
273 Record *r;
274
275 __sync_bool_compare_and_swap(&m->nomemprof, 0, 1);
276
277 runtime_lock(&proflock);
278 n = 0;
279 for(b=buckets; b; b=b->allnext)
280 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
281 n++;
282 ok = false;
283 if(n <= p.__count) {
284 ok = true;
285 r = (Record*)p.__values;
286 for(b=buckets; b; b=b->allnext)
287 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
288 record(r++, b);
289 }
290 runtime_unlock(&proflock);
291
292 __sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
293
294 if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
295 __go_run_goroutine_gc(102);
296}
297
298void
299runtime_MProf_Mark(void (*scan)(byte *, int64))
300{
301 // buckhash is not allocated via mallocgc.
302 scan((byte*)&buckets, sizeof buckets);
303 scan((byte*)&addrhash, sizeof addrhash);
304 scan((byte*)&addrfree, sizeof addrfree);
305}