-- Create a new LRU with given value type
-- By default the LRU will have a capacity of 65536 elements
-- Note: At the point the parametrized type must be finalized
- __new = function (ct, max_slots)
+ __new = function (ct, max_slots, alignment)
-- {0} will make sure that the value is coercible to a number
- local o = ffi.new(ct, {0}, C.lru_create_impl(max_slots or 65536, nil, nil))
+ local o = ffi.new(ct, {0}, C.lru_create_impl(max_slots or 65536, alignment or 1, nil, nil))
if o.lru == nil then
return
end
rrset = knot_rrset_t,
packet = knot_pkt_t,
lru = function (max_size, value_type)
- local ct = ffi.typeof(typed_lru_t, value_type or ffi.typeof('uint64_t'))
- return ffi.metatype(ct, lru_metatype)(max_size)
+ value_type = value_type or ffi.typeof('uint64_t')
+ local ct = ffi.typeof(typed_lru_t, value_type)
+ return ffi.metatype(ct, lru_metatype)(max_size, ffi.alignof(value_type))
end,
-- Metatypes. Beware that any pointer will be cast silently...
* Any type can be accessed through char-pointer,
* so we can use a common struct definition
* for all types being held.
+ *
+ * The value address is restricted by val_alignment.
+ * Approach: require slightly larger sizes from the allocator
+ * and shift value on the closest address with val_alignment.
*/
};
-/** @internal Compute offset of value in struct lru_item. */
-static uint val_offset(uint key_len)
+/** @brief Round the value up to a multiple of mul (a power of two). */
+static inline size_t round_power(size_t size, size_t mult)
{
- uint key_end = offsetof(struct lru_item, data) + key_len;
- // align it to the closest multiple of four
- return round_power(key_end, 2);
+ assert(__builtin_popcount(mult) == 1);
+ size_t res = ((size - 1) & ~(mult - 1)) + mult;
+ assert(__builtin_ctz(res) >= __builtin_ctz(mult));
+ assert(size <= res && res < size + mult);
+ return res;
}
-/** @internal Return pointer to value in an item. */
-static void * item_val(struct lru_item *it)
+/** @internal Compute the allocation size for an lru_item. */
+static uint item_size(const struct lru *lru, uint key_len, uint val_len)
{
- return it->data + val_offset(it->key_len) - offsetof(struct lru_item, data);
+ uint key_end = offsetof(struct lru_item, data) + key_len;
+ return key_end + (lru->val_alignment - 1) + val_len;
+ /* ^^^ worst-case padding length
+ * Well, we might compute the bound a bit more precisely,
+ * as we know that lru_item will get alignment at least
+ * some sizeof(void*) and we know all the lengths,
+ * but let's not complicate it, as the gain would be small anyway. */
}
-/** @internal Compute the size of an item. ATM we don't align/pad the end of it. */
-static uint item_size(uint key_len, uint val_len)
+/** @internal Return pointer to value in an lru_item. */
+static void * item_val(const struct lru *lru, struct lru_item *it)
{
- return val_offset(key_len) + val_len;
+ size_t key_end = it->data + it->key_len - (char *)NULL;
+ size_t val_begin = round_power(key_end, lru->val_alignment);
+ return (char *)NULL + val_begin;
}
/** @internal Free each item. */
if (!it)
continue;
enum lru_apply_do ret =
- f(it->data, it->key_len, item_val(it), baton);
+ f(it->data, it->key_len, item_val(lru, it), baton);
switch(ret) {
case LRU_APPLY_DO_EVICT: // evict
mm_free(lru->mm, it);
}
/** @internal See lru_create. */
-KR_EXPORT struct lru * lru_create_impl(uint max_slots, knot_mm_t *mm_array, knot_mm_t *mm)
+KR_EXPORT struct lru * lru_create_impl(uint max_slots, uint val_alignment,
+ knot_mm_t *mm_array, knot_mm_t *mm)
{
- assert(max_slots);
+ assert(max_slots && __builtin_popcount(val_alignment) == 1);
if (!max_slots)
return NULL;
// let lru->log_groups = ceil(log2(max_slots / (float) assoc))
.mm = mm,
.mm_array = mm_array,
.log_groups = log_groups,
+ .val_alignment = val_alignment,
};
// zeros are a good init
memset(lru->groups, 0, size - offsetof(struct lru, groups));
assert(i < LRU_ASSOC);
g->hashes[i] = khash_top;
it = g->items[i];
- uint new_size = item_size(key_len, val_len);
- if (it == NULL || new_size != item_size(it->key_len, it->val_len)) {
+ uint new_size = item_size(lru, key_len, val_len);
+ if (it == NULL || new_size != item_size(lru, it->key_len, it->val_len)) {
// (re)allocate
mm_free(lru->mm, it);
it = g->items[i] = mm_alloc(lru->mm, new_size);
if (key_len > 0) {
memcpy(it->data, key, key_len);
}
- memset(item_val(it), 0, val_len); // clear the value
+ memset(item_val(lru, it), 0, val_len); // clear the value
is_new_entry = true;
found: // key and hash OK on g->items[i]; now update stamps
assert(i < LRU_ASSOC);
if (is_new) {
*is_new = is_new_entry;
}
- return item_val(g->items[i]);
+ return item_val(lru, g->items[i]);
}
#define lru_create(ptable, max_slots, mm_ctx_array, mm_ctx) do { \
(void)(((__typeof__((*(ptable))->pdata_t))0) == (void *)0); /* typecheck lru_t */ \
*(ptable) = (__typeof__(*(ptable))) \
- lru_create_impl((max_slots), (mm_ctx_array), (mm_ctx)); \
+ lru_create_impl((max_slots), __alignof(*( (*(ptable))->pdata_t )), \
+ (mm_ctx_array), (mm_ctx)); \
} while (false)
/** @brief Free an LRU created by lru_create (it can be NULL). */
#define lru_capacity(table) lru_capacity_impl(&(table)->lru)
-/** @brief Round the value up to a multiple of (1 << power). */
-static inline uint round_power(uint size, uint power)
-{
- uint res = ((size - 1) & ~((1 << power) - 1)) + (1 << power);
- assert(__builtin_ctz(res) >= power);
- assert(size <= res && res < size + (1 << power));
- return res;
-}
-
/* ======================== Inlined part of implementation ======================== */
/** @cond internal */
struct lru;
void lru_free_items_impl(struct lru *lru);
-struct lru * lru_create_impl(uint max_slots, knot_mm_t *mm_array, knot_mm_t *mm);
+struct lru * lru_create_impl(uint max_slots, uint val_alignment,
+ knot_mm_t *mm_array, knot_mm_t *mm);
void * lru_get_impl(struct lru *lru, const char *key, uint key_len,
uint val_len, bool do_insert, bool *is_new);
void lru_apply_impl(struct lru *lru, lru_apply_fun f, void *baton);
struct knot_mm *mm, /**< Memory context to use for keys. */
*mm_array; /**< Memory context to use for this structure itself. */
uint log_groups; /**< Logarithm of the number of LRU groups. */
+ uint val_alignment; /**< Alignment for the values. */
struct lru_group groups[] CACHE_ALIGNED; /**< The groups of items. */
};