From ea548e9c48c9484399add43f73d084d44f301479 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Mon, 1 Feb 2021 22:11:11 +0200 Subject: [PATCH] lib-index: Add comments to struct mail_cache and related Also reorder some fields so the grouping makes more sense. --- src/lib-index/mail-cache-fields.c | 9 +- src/lib-index/mail-cache-private.h | 129 +++++++++++++++++++++++++---- src/lib-index/mail-cache.h | 10 ++- 3 files changed, 128 insertions(+), 20 deletions(-) diff --git a/src/lib-index/mail-cache-fields.c b/src/lib-index/mail-cache-fields.c index 63d0714650..e929fb559d 100644 --- a/src/lib-index/mail-cache-fields.c +++ b/src/lib-index/mail-cache-fields.c @@ -437,7 +437,14 @@ int mail_cache_header_fields_read(struct mail_cache *cache) cache->field_file_map[fidx] = i; cache->file_field_map[i] = fidx; - /* update last_used if it's newer than ours */ + /* Update last_used if it's newer than ours. Note that the + last_used may have been overwritten while we were reading + this cache header. In theory this can mean that the + last_used field is only half-updated and contains garbage. + This practically won't matter, since the worst that can + happen is that we trigger a purge earlier than necessary. + The purging re-reads the last_used while cache is locked and + correctly figures out whether to drop the field. */ if ((time_t)last_used[i] > cache->fields[fidx].field.last_used) cache->fields[fidx].field.last_used = last_used[i]; diff --git a/src/lib-index/mail-cache-private.h b/src/lib-index/mail-cache-private.h index 562202e893..156a1a53b7 100644 --- a/src/lib-index/mail-cache-private.h +++ b/src/lib-index/mail-cache-private.h @@ -68,20 +68,25 @@ struct mail_cache_header_fields { uint32_t fields_count; #if 0 - /* last time the field was accessed. not updated more often than - once a day. */ + /* Last time the field was accessed. Not updated more often than + once a day. This field may be overwritten later on, which in theory + could cause reading to see a partially updated (corrupted) value. + Don't fully trust this field unless it was read while cache is + locked. */ uint32_t last_used[fields_count]; /* (uint32_t)-1 for variable sized fields */ uint32_t size[fields_count]; /* enum mail_cache_field_type */ uint8_t type[fields_count]; - /* enum mail_cache_decision_type */ + /* enum mail_cache_decision_type. This field can be overwritten + later on to update the caching decision. */ uint8_t decision[fields_count]; /* NUL-separated list of field names */ char name[fields_count][]; #endif }; +/* Macros to return offsets to the fields in mail_cache_header_fields. */ #define MAIL_CACHE_FIELD_LAST_USED() \ (sizeof(uint32_t) * 3) #define MAIL_CACHE_FIELD_SIZE(count) \ @@ -102,28 +107,40 @@ struct mail_cache_record { struct mail_cache_field_private { struct mail_cache_field field; + /* Highest message UID whose cache field of this type have been + accessed within this session. This is used to track whether messages + are accessed in non-ascending order, which indicates an IMAP client + that doesn't have a local cache. That will result in the caching + decision to change from TEMP to YES. */ uint32_t uid_highwater; /* Unused fields aren't written to cache file */ bool used:1; + /* field.decision is pending a write to cache file header. If the + cache header is read from disk, don't overwrite it. */ bool decision_dirty:1; }; struct mail_cache { struct mail_index *index; struct event *event; + /* Registered "cache" extension ID */ uint32_t ext_id; char *filepath; int fd; + struct dotlock_settings dotlock_settings; + struct file_lock *file_lock; + + /* Cache file's inode, device and size when it was last fstat()ed. */ ino_t st_ino; dev_t st_dev; + uoff_t last_stat_size; + /* Used to avoid logging mmap() errors too rapidly. */ time_t last_mmap_error_time; - uoff_t last_stat_size; - size_t mmap_length; /* a) mmaping the whole file */ void *mmap_base; /* b) using file cache */ @@ -131,14 +148,14 @@ struct mail_cache { /* c) using small read() calls with MAIL_INDEX_OPEN_FLAG_SAVEONLY */ uoff_t read_offset; buffer_t *read_buf; - /* mail_cache_map() increases this always. */ + /* Size of the cache file as currently mapped to memory. Used for all + of a), b), and c). */ + size_t mmap_length; + /* mail_cache_map() increases this always. Used only for asserts. */ unsigned int remap_counter; - + /* Linked list of all cache views. */ struct mail_cache_view *views; - struct dotlock_settings dotlock_settings; - struct file_lock *file_lock; - /* mmap_disable=no: hdr points to data / NULL when cache is invalid. mmap_disable=yes: hdr points to hdr_ro_copy. this is needed because cache invalidation can zero the data any time */ @@ -147,28 +164,60 @@ struct mail_cache { /* hdr_copy gets updated when cache is locked and written when unlocking and hdr_modified=TRUE */ struct mail_cache_header hdr_copy; + /* If non-0, the offset for the last seen mail_cache_header_fields. + Used as a cache to avoid reading through multiple next_offset + pointers. */ + uint32_t last_field_header_offset; + /* Memory pool used for permanent field allocations. Currently this + means mail_cache_field.name and field_name_hash. */ pool_t field_pool; + /* Size of fields[] and field_file_map[] */ + unsigned int fields_count; + /* All the registered cache fields. */ struct mail_cache_field_private *fields; + /* mail_cache_field.idx -> file-specific header index. The reverse + of this is file_field_map[]. */ uint32_t *field_file_map; - unsigned int fields_count; + /* mail_cache_field.name -> mail_cache_field.idx */ HASH_TABLE(char *, void *) field_name_hash; /* name -> idx */ - uint32_t last_field_header_offset; - - /* 0 is no need for purging, otherwise the file sequence number - which we want purged. */ - uint32_t need_purge_file_seq; - char *need_purge_reason; + /* file-specific header index -> mail_cache_fields.idx. The reverse + of this is field_file_map[]. */ unsigned int *file_field_map; + /* Size of file_field_map[] */ unsigned int file_fields_count; + /* mail_cache_purge_later() sets these values to trigger purging on + the next index sync. need_purge_file_seq is set to the current + cache file_seq. If at sync time the file_seq differs, it means + the cache was already purged and another purge isn't necessary. */ + uint32_t need_purge_file_seq; + /* Human-readable reason for purging. Used for debugging and events. */ + char *need_purge_reason; + + /* Cache has been opened (or it doesn't exist). */ bool opened:1; + /* Cache has been locked with mail_cache_lock(). */ bool locked:1; + /* TRUE if the last lock attempt failed. The next locking attempt will + be non-blocking to avoid unnecessarily waiting on a cache that has + been locked for a long time. Since cache isn't strictly required, + this could avoid unnecessarily long waits with some edge cases. */ bool last_lock_failed:1; + /* cache->hdr_copy has been modified. This must be used only while + cache is locked. */ bool hdr_modified:1; + /* At least one of the cache fields' last_used or cache decision has + changed. mail_cache_header_fields_update() will be used to overwrite + these to the latest mail_cache_header_fields. */ bool field_header_write_pending:1; + /* Cache is currently being purged. */ bool purging:1; + /* Access the cache file by reading as little as possible from it + (as opposed to mmap()ing it or using file-cache.h API to cache + larger parts of it). This is used with MAIL_INDEX_OPEN_FLAG_SAVEONLY + to avoid unnecessary cache reads. */ bool map_with_read:1; }; @@ -193,9 +242,19 @@ struct mail_cache_view { struct mail_index_view *view, *trans_view; struct mail_cache_transaction_ctx *transaction; + /* mail_cache_add() has been called for some of the messages between + trans_seq1..trans_seq2 in an uncommitted transaction. Check also + the transaction contents when looking up cache fields for these + mails. */ uint32_t trans_seq1, trans_seq2; + /* Used to avoid infinite loops in case cache records point to each + others, causing a loop. FIXME: New cache files no longer support + overwriting existing data, so this could be removed and replaced + with a simple check that prev_offset is always smaller than the + current record's offset. */ struct mail_cache_loop_track loop_track; + /* Used for optimizing mail_cache_get_missing_reason() */ struct mail_cache_missing_reason_cache reason_cache; /* if cached_exists_buf[field] == cached_exists_value, it's cached. @@ -205,31 +264,65 @@ struct mail_cache_view { uint8_t cached_exists_value; uint32_t cached_exists_seq; + /* mail_cache_view_update_cache_decisions() has been used to disable + updating cache decisions. */ bool no_decision_updates:1; }; +/* mail_cache_lookup_iter_next() returns the next found field. */ struct mail_cache_iterate_field { + /* mail_cache_field.idx */ unsigned int field_idx; + /* Size of data */ unsigned int size; + /* Cache field content in the field type-specific format */ const void *data; + /* Offset to data in cache file */ uoff_t offset; }; struct mail_cache_lookup_iterate_ctx { struct mail_cache_view *view; + /* This must match mail_cache.remap_counter or the iterator is + invalid. */ unsigned int remap_counter; + /* Message sequence as given to mail_cache_lookup_iter_init() */ uint32_t seq; + /* Pointer to current cache record being iterated. This may point + to the cache file or uncommitted transaction. */ const struct mail_cache_record *rec; - unsigned int pos, rec_size; + /* Iterator's current position in the cache record. Starts from + sizeof(mail_cache_record). */ + unsigned int pos; + /* Copy of rec->size */ + unsigned int rec_size; + /* Cache file offset to the beginning of rec, or 0 if it points to + an uncommitted transaction. */ uint32_t offset; + /* Used to loop through all changes in the uncommited transaction, + in case there are multiple changes to the same message. */ unsigned int trans_next_idx; + /* Cache has become unusable. Stop the iteration. */ bool stop:1; + /* I/O error or lock timeout occurred during iteration. Normally there + is no locking during iteration, but it may happen while cache is + being purged to wait for the purging to finish before cache can be + accessed again. */ bool failed:1; + /* Iteration has finished returning changes from uncommitted + transaction's in-memory buffer. */ bool memory_appends_checked:1; + /* Iteration has finished returning changes from uncommitted + transaction that were already written to cache file, but not + to main index. */ bool disk_appends_checked:1; + /* TRUE if the field index numbers in rec as the internal + mail_cache_field.idx (instead of the file-specific indexes). + This indicates that the rec points to uncommited transaction's + in-memory buffer. */ bool inmemory_field_idx:1; }; diff --git a/src/lib-index/mail-cache.h b/src/lib-index/mail-cache.h index 49ab57dd3b..a42baf8432 100644 --- a/src/lib-index/mail-cache.h +++ b/src/lib-index/mail-cache.h @@ -46,13 +46,21 @@ enum mail_cache_field_type { }; struct mail_cache_field { + /* Unique name for the cache field. The field name doesn't matter + internally. */ const char *name; + /* Field index name. Used to optimize accessing the cache field. */ unsigned int idx; + /* Type of the field */ enum mail_cache_field_type type; + /* Size of the field, if it's a fixed size type. */ unsigned int field_size; + /* Current caching decision */ enum mail_cache_decision_type decision; - /* If higher than the current last_used field, update it */ + /* Timestamp when the cache field was last intentionally read (e.g. + by an IMAP client). Saving new mails doesn't update this field. + This is used to track when an unaccessed field should be dropped. */ time_t last_used; }; -- 2.47.3