From: Mike Stepanek (mstepane) Date: Tue, 27 Aug 2019 15:20:02 +0000 (-0400) Subject: Merge pull request #1699 in SNORT/snort3 from ~SMINUT/snort3:host_cache_derived_memca... X-Git-Tag: 3.0.0-260~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bb70a3775b510b40cf1c8150363a476542609024;p=thirdparty%2Fsnort3.git Merge pull request #1699 in SNORT/snort3 from ~SMINUT/snort3:host_cache_derived_memcap to master Squashed commit of the following: commit 097b3573f23a1ddfc2176d7f2c68ad4fd613e818 Author: Silviu Minut Date: Thu Aug 1 17:01:39 2019 -0400 host_tracker: derive LruCacheSharedMemcap from the general LruCacheShared that trakcs size in bytes, rather than number of items and instantiate host_cache from LruCacheSharedMemcap. --- diff --git a/src/hash/lru_cache_shared.h b/src/hash/lru_cache_shared.h index 2ffd1a84c..16bc26cfb 100644 --- a/src/hash/lru_cache_shared.h +++ b/src/hash/lru_cache_shared.h @@ -24,6 +24,7 @@ // LruCacheShared -- Implements a thread-safe unordered map where the // least-recently-used (LRU) entries are removed once a fixed size is hit. +#include #include #include #include @@ -49,6 +50,7 @@ template class LruCacheShared { public: + // Do not allow default constructor, copy constructor or assignment // operator. Cannot safely copy the LruCacheShared due to the mutex // lock. @@ -60,6 +62,7 @@ public: max_size(initial_size), current_size(0) { } using Data = std::shared_ptr; + using ValueType = Value; // Return data entry associated with key. If doesn't exist, return nullptr. Data find(const Key& key); @@ -77,7 +80,13 @@ public: size_t size() { std::lock_guard cache_lock(cache_mutex); - return current_size; + return list.size(); + } + + virtual size_t mem_size() + { + std::lock_guard cache_lock(cache_mutex); + return list.size() * mem_chunk; } size_t get_max_size() @@ -119,17 +128,17 @@ public: cache_mutex.unlock(); } -private: +protected: using LruList = std::list >; using LruListIter = typename LruList::iterator; using LruMap = std::unordered_map; using LruMapIter = typename LruMap::iterator; + static constexpr size_t mem_chunk = sizeof(Data) + sizeof(Value); + size_t max_size; // Once max_size elements are in the cache, start to // remove the least-recently-used elements. - // NOTE: std::list::size() is O(n) (it recounts the list every time) - // so instead we keep track of the current size manually. size_t current_size; // Number of entries currently in the cache. std::mutex cache_mutex; @@ -138,30 +147,53 @@ private: LruMap map; // Maps key to list iterator for fast lookup. struct LruCacheSharedStats stats; + + // These get called only from within the LRU and assume the LRU is locked. + virtual void increase_size() + { + current_size++; + } + + virtual void decrease_size() + { + current_size--; + } + + // Caller must lock and unlock. + void prune(std::list& data) + { + LruListIter list_iter; + assert(data.empty()); + while (current_size > max_size && !list.empty()) + { + list_iter = --list.end(); + data.push_back(list_iter->second); // increase reference count + decrease_size(); + map.erase(list_iter->first); + list.erase(list_iter); + stats.prunes++; + } + } }; template bool LruCacheShared::set_max_size(size_t newsize) { - LruListIter list_iter; - if (newsize == 0) return false; // Not allowed to set size to zero. + // Like with remove(), we need local temporary references to data being + // deleted, to avoid race condition. This data list needs to self-destruct + // after the cache_lock does. + std::list data; + std::lock_guard cache_lock(cache_mutex); // Remove the oldest entries if we have to reduce cache size. - while (current_size > newsize) - { - list_iter = list.end(); - --list_iter; - current_size--; - map.erase(list_iter->first); - list.erase(list_iter); - stats.prunes++; - } - max_size = newsize; + + prune(data); + return true; } @@ -195,6 +227,15 @@ std::shared_ptr LruCacheShared:: find_else_create(const Key& key, bool* new_data) { LruMapIter map_iter; + + // As with remove and operator[], we need a temporary list of references + // to delay the destruction of the items being removed by prune(). + // This is one instance where we cannot get by with directly locking and + // unlocking the cache_mutex, because the cache must be locked when we + // return the data pointer (below), or else, some other thread might + // delete it before we got a chance to return it. + std::list tmp_data; + std::lock_guard cache_lock(cache_mutex); map_iter = map.find(key); @@ -213,24 +254,13 @@ find_else_create(const Key& key, bool* new_data) // Add key/data pair to front of list. list.emplace_front(std::make_pair(key, data)); + increase_size(); // Add list iterator for the new entry to map. map[key] = list.begin(); - // If we've exceeded the configured size, remove the oldest entry. - if (current_size >= max_size) - { - LruListIter list_iter; - list_iter = list.end(); - --list_iter; - map.erase(list_iter->first); - list.erase(list_iter); - stats.prunes++; - } - else - { - current_size++; - } + prune(tmp_data); + return data; } @@ -253,16 +283,40 @@ template bool LruCacheShared::remove(const Key& key) { LruMapIter map_iter; + + // There is a potential race condition here, when the destructor of + // the object being removed needs to call back into the cache and lock + // the cache (e.g. via an allocator) to update the size of the cache. + // + // The shared pointer below fixes this condition by increasing the + // reference count to the object being deleted, thus delaying the + // call to the destructor until after the cache is unlocked. + // + // In particular, since the cache must be unlocked when data self-destructs + // data must be defined before cache_lock. Do not change the order of + // data and cache_lock! + Data data; + std::lock_guard cache_lock(cache_mutex); map_iter = map.find(key); if (map_iter == map.end()) + { return false; // Key is not in LruCache. + } + + data = map_iter->second->second; - current_size--; + decrease_size(); list.erase(map_iter->second); map.erase(map_iter); stats.removes++; + + assert( data.use_count() > 0 ); + + // Now, data can go out of scope and if it needs to lock again while + // deleting the Value object, it can do so. + return true; } @@ -270,20 +324,25 @@ template bool LruCacheShared::remove(const Key& key, std::shared_ptr& data) { LruMapIter map_iter; + std::lock_guard cache_lock(cache_mutex); map_iter = map.find(key); if (map_iter == map.end()) + { return false; // Key is not in LruCache. + } data = map_iter->second->second; - current_size--; + decrease_size(); list.erase(map_iter->second); map.erase(map_iter); stats.removes++; + + assert( data.use_count() > 0 ); + return true; } - #endif diff --git a/src/hash/xhash.cc b/src/hash/xhash.cc index 0fcb2ce3f..6c001e593 100644 --- a/src/hash/xhash.cc +++ b/src/hash/xhash.cc @@ -278,7 +278,7 @@ int xhash_change_memcap(XHash *t, unsigned long new_memcap, unsigned *max_work) { if (t == nullptr or new_memcap < t->overhead_bytes) return XHASH_ERR; - + if (new_memcap == t->mc.memcap) return XHASH_OK; @@ -304,9 +304,9 @@ int xhash_change_memcap(XHash *t, unsigned long new_memcap, unsigned *max_work) //or (we have undefined behavior: t->usrfree is set and xhash_free_anr_lru is returning XHASH_ERR) if (new_memcap < t->mc.memused) return XHASH_NOMEM; - + t->mc.memcap = new_memcap; - return XHASH_OK; + return XHASH_OK; } @@ -1040,7 +1040,7 @@ int xhash_free_anr_lru(XHash *t) { if (t == nullptr) return XHASH_ERR; - + if (t->fhead) { if (xhash_delete_free_node(t) == XHASH_OK) @@ -1128,4 +1128,3 @@ void xhash_set_keyops(XHash* h, hash_func hash_fcn, keycmp_func keycmp_fcn) hashfcn_set_keyops(h->hashfcn, hash_fcn, keycmp_fcn); } } // namespace snort - diff --git a/src/host_tracker/CMakeLists.txt b/src/host_tracker/CMakeLists.txt index 917afdee5..94fe83580 100644 --- a/src/host_tracker/CMakeLists.txt +++ b/src/host_tracker/CMakeLists.txt @@ -1,11 +1,14 @@ set (HOST_TRACKER_INCLUDES host_cache.h + host_cache_allocator.h + host_cache_interface.h host_tracker.h ) add_library( host_tracker OBJECT ${HOST_TRACKER_INCLUDES} host_cache.cc + host_cache_allocator.cc host_cache_module.cc host_cache_module.h host_tracker_module.cc diff --git a/src/host_tracker/dev_notes.txt b/src/host_tracker/dev_notes.txt index faff25c54..3e438462b 100644 --- a/src/host_tracker/dev_notes.txt +++ b/src/host_tracker/dev_notes.txt @@ -7,7 +7,7 @@ manner. * The global host_cache is used to cache HostTracker objects so that they can be shared between threads. - The host_cache holds a shared_ptr to each HostTracker object. This - allows the HostTracker to be removed from the host cache without + allows the HostTracker to be removed from the host cache without invalidating the HostTracker held by other threads. * The HostTrackerModule is used to read in initial known information about @@ -22,3 +22,112 @@ about hosts. * The HostCacheModule is used to configure the HostCache's size. + +Memory Usage Issues + +* The host cache can grow fairly big, so we want to cap it at a certain size. +This size can be set in the lua configuration file as host_cache.memcap and +will be read in and honored by host_cache_module.cc. + +* The LruCacheShared class defined in hash/lru_cache_shared.h tracks its +memory usage in terms of number of items it contains. That is fine as long +as the items in the cache have constant size at run-time, since in that case +the memory in bytes is a constant multiple of the number of items. + +However, in the case of HostTracker items, the memory usage does not remain +constant at run-time. The HostTracker class contains a vector, +which can grow indefinitely as snort discovers more services on a given host. + +The LruCacheShared container and the items within know nothing about each other. +This independence is desirable and we want to maintain it. However, when an +item owned by the cache grows, it must - in good faith - update the cache size, +or the cache won't know that it now owns more memory. This breaks the +independence between the cache and its items. + +We address this problem by passing a custom allocator to the STL containers +that the HostTracker contains - for example, vector. The +allocator gets called by STL whenever the vector requests or releases memory. +In turn, the allocator calls back into the global host cache object, updating +it with the size that it just allocated or de-allocated. This way, although the +HostTracker communicates with the host cache (indirectly, through the +allocator), the memory accounting is done automatically by the allocator, +transparently to the HostTracker users. The alternative would be that each +time new information is added to the HostTracker, the user updates the cache +explicitly - which is prone to error. + +Every container HostTracker might contain must be instantiated with our +custom allocator as a parameter. + + +Memory Usage vs. Number of Items + +In some cases it is preferable that the size of the cache is measured in +number of items within the cache, whereas in other cases (HostTracker) we +measure the size of the cache in bytes. + +Upon careful analysis, the only difference between the two cases is that +we must increase/decrease the size by 1 when size is measured in number of +items, and by sizeof(Item) when the size is measured in bytes. The rest of +the code (insert, prune, remove, etc.) remains the same. + +Consequently, we can have the LruCacheShared class measure its size in +number of items, and derive from it another cache class - LruCacheSharedMemcap - +that measures size in bytes. All we need to do is provide virtual +increase_size() / decrease_size() functions in the base class, which will +update the size by the appropriate amount in each case. See host_cache.h and +hash/lru_cache_shared.h. + +The LruCacheShared need not know anything about the custom allocator described +above, since it operates under the assumption that its items do not grow at +run-time. All size accounting can be done at item insertion time. + +The derived LruCacheSharedMemcap, however, must contain an update() function +to be used solely by the allocator. The update() function must lock the cache. + +The prune(), increase_size() and decrease_size() functions do not lock the +cache. They must be called exclusively from functions like insert() or remove(), +that do lock. On the other hand, the update() function in the derived cache +class has to lock the cache, as it is called asynchronously from different +threads (via the allocator). + + +Allocator Implementation Issues + +There is a circular dependency between the HostCache, the HostTracker and the +allocator that needs to be broken. This is true in the HostTracker case, but +it will be true for any cache item that requires an allocator. + +The allocator is ephemeral, it comes into existence inside STL for a brief +period of time, allocates/deallocates memory and then it gets destroyed. +STL assumes the allocator constructor has no parameters, so we can't pass +a (pointer to a) host cache object to the allocator upon construction. +Therefore, the allocator must have an internal host cache pointer that gets set +in the constructor to the global host cache instance. Hence, the allocator +must know about the host cache. + +On the other hand, the host cache must know about its item (the HostTracker) +at instantiation time. + +Finally, the HostTracker must know about the allocator, so it can inform the +cache about memory changes. + +This is a circular dependency: + + + ------> Allocator ---- + | | + | | + | V +HostCache <----------- HostTracker + +It is common to implement a class template (like Allocator, in our case) in +a single .h file. However, to break this dependency, we have to split the +Allocator into a .h and a .cc file. We include the .h file in HostTracker +and declare HostCache extern only in the .cc file. Then, we have to include +the .cc file also in the HostTracker implementation file because Allocator +is templated. See host_cache.h, host_cache_allocator.h, host_cache_allocator.cc, +host_tracker.h and host_tracker.cc. + +Illustrative examples are test/host_cache_allocator_test.cc (standalone +host cache / allocator example) and test/host_cache_allocator_ht_test.cc +(host_cache / allocator with host tracker example). diff --git a/src/host_tracker/host_cache.cc b/src/host_tracker/host_cache.cc index 9a55ab808..07ad8c687 100644 --- a/src/host_tracker/host_cache.cc +++ b/src/host_tracker/host_cache.cc @@ -26,6 +26,8 @@ using namespace snort; -#define LRU_CACHE_INITIAL_SIZE 65535 +// Default host cache size in bytes. +// Must agree with default memcap in host_cache_module.cc. +#define LRU_CACHE_INITIAL_SIZE 16384 * 512 -LruCacheShared host_cache(LRU_CACHE_INITIAL_SIZE); +HostCacheIp host_cache(LRU_CACHE_INITIAL_SIZE); diff --git a/src/host_tracker/host_cache.h b/src/host_tracker/host_cache.h index ac3e9a095..9060ac55f 100644 --- a/src/host_tracker/host_cache.h +++ b/src/host_tracker/host_cache.h @@ -24,9 +24,16 @@ // The host cache is used to cache information about hosts so that it can // be shared among threads. +#include + #include "hash/lru_cache_shared.h" -#include "host_tracker/host_tracker.h" +#include "host_cache_interface.h" +#include "host_cache_allocator.h" +#include "host_tracker.h" +#include "log/messages.h" +#include "main/snort_config.h" #include "sfip/sf_ip.h" +#include "utils/stats.h" // Used to create hash of key for indexing into cache. struct HashIp @@ -39,7 +46,98 @@ struct HashIp } }; -extern SO_PUBLIC LruCacheShared host_cache; +template +class LruCacheSharedMemcap : public LruCacheShared, public HostCacheInterface +{ +public: + using LruBase = LruCacheShared; + + LruCacheSharedMemcap() = delete; + LruCacheSharedMemcap(const LruCacheSharedMemcap& arg) = delete; + LruCacheSharedMemcap& operator=(const LruCacheSharedMemcap& arg) = delete; -#endif + LruCacheSharedMemcap(const size_t initial_size) : LruCacheShared(initial_size) {} + + size_t mem_size() override + { + std::lock_guard cache_lock(cache_mutex); + return current_size; + } + + void print_config() + { + if ( snort::SnortConfig::log_verbose() ) + { + std::lock_guard cache_lock(cache_mutex); + + snort::LogLabel("host_cache"); + snort::LogMessage(" memcap: %zu bytes\n", max_size); + } + + } + + using Data = typename LruBase::Data; + using ValueType = typename LruBase::ValueType; + + using LruBase::current_size; + using LruBase::max_size; + using LruBase::mem_chunk; + using LruBase::cache_mutex; + + template + friend class HostCacheAllocIp; + +private: + // Only the allocator calls this. The allocator, in turn, is called e.g. + // from HostTracker::add_service(), which locks the host tracker + // but not the cache. Therefore, update() must lock the cache. + // + // Note that any cache item object that is not yet owned by the cache + // will increase / decrease the current_size of the cache any time it + // adds / removes something to itself. Case in point: HostTracker. + // + // Therefore, if the cache items are containers that can grow dynamically, + // then those items should be added to the cache first, and only accessed + // via the cache. Then, any size change of the item, will legitimately + // and correctly update the current_size of the cache. + // + // In concrete terms, never have a standalone HostTracker object outside + // the host cache add or remove stuff to itself, as that will incorrectly + // change the current_size of the cache. + void update(int size) override + { + // Same idea as in LruCacheShared::remove(), use shared pointers + // to hold the pruned data until after the cache is unlocked. + // Do not change the order of data and cache_lock, as the data must + // self destruct after cache_lock. + std::list data; + + std::lock_guard cache_lock(cache_mutex); + + if (size < 0) + assert( current_size >= (size_t) -size ); + current_size += size; + if (current_size > max_size) + LruBase::prune(data); + } + + // These get called only from within the LRU and assume the LRU is locked. + void increase_size() override + { + current_size += mem_chunk; + } + + void decrease_size() override + { + assert( current_size >= mem_chunk ); + current_size -= mem_chunk; + } + +}; + +typedef LruCacheSharedMemcap HostCacheIp; + +extern SO_PUBLIC HostCacheIp host_cache; + +#endif diff --git a/src/host_tracker/host_cache_allocator.cc b/src/host_tracker/host_cache_allocator.cc new file mode 100644 index 000000000..f06fd4ce0 --- /dev/null +++ b/src/host_tracker/host_cache_allocator.cc @@ -0,0 +1,49 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2016-2019 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- + +// host_cache_allocator.cc author Silviu Minut + +#ifndef HOST_CACHE_ALLOCATOR_CC +#define HOST_CACHE_ALLOCATOR_CC + +#include "host_cache.h" + +template +T* HostCacheAlloc::allocate(std::size_t n) +{ + size_t sz=n*sizeof(T); + T* out=std::allocator::allocate(n); + lru->update(sz); + return out; +} + +template +void HostCacheAlloc::deallocate(T* p, std::size_t n) noexcept +{ + size_t sz = n*sizeof(T); + std::allocator::deallocate(p, n); + lru->update( -(int) sz); +} + +template +HostCacheAllocIp::HostCacheAllocIp() +{ + lru = &host_cache; +} + +#endif diff --git a/src/host_tracker/host_cache_allocator.h b/src/host_tracker/host_cache_allocator.h new file mode 100644 index 000000000..4cfa1ab69 --- /dev/null +++ b/src/host_tracker/host_cache_allocator.h @@ -0,0 +1,72 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2016-2019 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- + +// host_cache_allocator.h author Silviu Minut + +#ifndef HOST_CACHE_ALLOCATOR_H +#define HOST_CACHE_ALLOCATOR_H + +#include + +#include "host_cache_interface.h" + +template +class HostCacheAlloc : public std::allocator +{ +public: + + template + struct rebind + { + typedef HostCacheAlloc other; + }; + + T* allocate(std::size_t n); + void deallocate(T* p, std::size_t n) noexcept; + +protected: + + HostCacheInterface* lru = 0; +}; + + +// Trivial derived allocator, pointing to their own host cache. +// HostCacheAllocIp has a HostCacheInterface* pointing to an lru cache +// instantiated using snort::SfIp as the key. See host_cache.h. +// We can create different cache types by instantiating the lru cache using +// different keys and derive here allocators with HostCacheInterface* +// pointing to the appropriate lru cache object. +template +class HostCacheAllocIp : public HostCacheAlloc +{ +public: + + // This needs to be in every derived class: + template + struct rebind + { + typedef HostCacheAllocIp other; + }; + + using HostCacheAlloc::lru; + + HostCacheAllocIp(); + +}; + +#endif diff --git a/src/host_tracker/host_cache_interface.h b/src/host_tracker/host_cache_interface.h new file mode 100644 index 000000000..a75502660 --- /dev/null +++ b/src/host_tracker/host_cache_interface.h @@ -0,0 +1,50 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2016-2019 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- + +// host_cache_interface.h author Silviu Minut + +#ifndef HOST_CACHE_INTERFACE_H +#define HOST_CACHE_INTERFACE_H + +class HostCacheInterface +{ +public: + + // This abstract class is the interface of the host cache for the allocator. + // + // Only the allocator calls update(), in the derived class. In turn, + // the allocator is called, for instance, from HostTracker::add_service(), + // which locks the host tracker but not the cache. Therefore, update() + // must lock the cache. + // + // Note that any cache item object that is not yet owned by the cache + // will increase / decrease the current_size of the cache any time it + // adds / removes something to itself. Case in point: HostTracker. + // + // Therefore, if the cache items are containers that can grow dynamically, + // then those items should be added to the cache first, and only accessed + // via the cache. Then, any size change of the item, will legitimately + // and correctly update the current_size of the cache. + // + // In concrete terms, never have a zombie HostTracker object outside + // the host cache add or remove stuff to itself, as that will incorrectly + // change the current_size of the cache. + virtual void update(int size) = 0; +}; + +#endif diff --git a/src/host_tracker/host_cache_module.cc b/src/host_tracker/host_cache_module.cc index f253644f5..6aca91ddd 100644 --- a/src/host_tracker/host_cache_module.cc +++ b/src/host_tracker/host_cache_module.cc @@ -75,8 +75,8 @@ static const Parameter host_cache_params[] = { "dump_file", Parameter::PT_STRING, nullptr, nullptr, "file name to dump host cache on shutdown; won't dump by default" }, - { "size", Parameter::PT_INT, "1:max32", nullptr, - "size of host cache" }, + { "memcap", Parameter::PT_INT, "512:max32", "8388608", + "maximum host cache size in bytes" }, { nullptr, Parameter::PT_MAX, nullptr, nullptr, nullptr } }; @@ -85,7 +85,7 @@ bool HostCacheModule::set(const char*, Value& v, SnortConfig*) { if ( v.is("dump_file") ) dump_file = snort_strdup(v.get_string()); - else if ( v.is("size") ) + else if ( v.is("memcap") ) host_cache_size = v.get_uint32(); else return false; diff --git a/src/host_tracker/host_tracker.cc b/src/host_tracker/host_tracker.cc index 179b62978..28f5c8fb7 100644 --- a/src/host_tracker/host_tracker.cc +++ b/src/host_tracker/host_tracker.cc @@ -23,6 +23,7 @@ #endif #include "host_tracker.h" +#include "host_cache_allocator.cc" #include "utils/util.h" diff --git a/src/host_tracker/host_tracker.h b/src/host_tracker/host_tracker.h index 9e032e6bf..ac2bce1ce 100644 --- a/src/host_tracker/host_tracker.h +++ b/src/host_tracker/host_tracker.h @@ -31,6 +31,7 @@ #include #include "framework/counts.h" +#include "host_cache_allocator.h" #include "main/snort_types.h" #include "main/thread.h" #include "network_inspectors/appid/application_ids.h" @@ -70,6 +71,9 @@ struct HostApplication bool inferred_appid; }; +typedef HostCacheAllocIp HostMacAllocator; +typedef HostCacheAllocIp HostAppAllocator; + class SO_PUBLIC HostTracker { public: @@ -98,8 +102,25 @@ private: std::mutex host_tracker_lock; // ensure that updates to a shared object are safe uint8_t hops; // hops from the snort inspector, e.g., zero for ARP uint32_t last_seen; // the last time this host was seen - std::list macs; - std::vector services; + std::list macs; + std::vector services; + + // Hide / delete the constructor from the outside world. We don't want to + // have zombie host trackers, i.e. host tracker objects that live outside + // the host cache. + HostTracker( const HostTracker& ) = delete; + HostTracker( const HostTracker&& ) = delete; + + HostTracker& operator=( const HostTracker& ) = delete; + HostTracker& operator=( const HostTracker&& ) = delete; + + // Only the host cache can create them ... + template + friend class LruCacheShared; + + // ... and some unit tests. See Utest.h and UtestMacros.h in cpputest. + friend class TEST_host_tracker_add_find_service_test_Test; + friend class TEST_host_tracker_stringify_Test; }; } // namespace snort #endif diff --git a/src/host_tracker/host_tracker_module.cc b/src/host_tracker/host_tracker_module.cc index f56c03ca4..1e06fbe42 100644 --- a/src/host_tracker/host_tracker_module.cc +++ b/src/host_tracker/host_tracker_module.cc @@ -23,6 +23,7 @@ #endif #include "host_tracker_module.h" +#include "host_cache_allocator.cc" #include "log/messages.h" #include "main/snort_config.h" @@ -108,4 +109,3 @@ const PegInfo* HostTrackerModule::get_pegs() const PegCount* HostTrackerModule::get_counts() const { return (PegCount*)&host_tracker_stats; } - diff --git a/src/host_tracker/test/CMakeLists.txt b/src/host_tracker/test/CMakeLists.txt index c7551d8ce..b22a7ba6e 100644 --- a/src/host_tracker/test/CMakeLists.txt +++ b/src/host_tracker/test/CMakeLists.txt @@ -38,3 +38,13 @@ add_cpputest( host_tracker_module_test ${DNET_LIBRARIES} ) +add_cpputest( host_cache_allocator_ht_test + SOURCES + ../host_tracker.cc + ../../sfip/sf_ip.cc +) + +add_cpputest( host_cache_allocator_test + SOURCES + ../host_tracker.cc +) diff --git a/src/host_tracker/test/host_cache_allocator_ht_test.cc b/src/host_tracker/test/host_cache_allocator_ht_test.cc new file mode 100644 index 000000000..013f7c1e0 --- /dev/null +++ b/src/host_tracker/test/host_cache_allocator_ht_test.cc @@ -0,0 +1,150 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2016-2019 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- + +// host_cache_test.cc author Steve Chew +// unit tests for the host cache APIs + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "host_tracker/host_cache.h" +#include "host_tracker/host_cache_allocator.cc" + +#include + +#include "main/snort_config.h" + +#include +#include + +using namespace std; +using namespace snort; + +namespace snort +{ +// Fake snort_strdup() because sfutil dependencies suck +char* snort_strdup(const char* str) +{ return strdup(str); } +time_t packet_time() { return 0; } +} + +HostCacheIp host_cache(100); + +TEST_GROUP(host_cache_allocator_ht) +{ +}; + +// Test allocation / deallocation, pruning and remove. +TEST(host_cache_allocator_ht, allocate) +{ + uint8_t hk[16]; + SfIp ip; + const size_t n = 5, m = 3; + const size_t hc_item_sz = sizeof(HostCacheIp::Data) + sizeof(HostTracker); + const size_t ht_item_sz = sizeof(HostApplication); + + // room for n host trackers in the cache and m host applications in ht + const size_t max_size = n * hc_item_sz + m * ht_item_sz; + + host_cache.set_max_size(max_size); + + // insert n empty host trackers: + for (size_t i=0; iadd_service(port, IpProtocol::TCP, 676, true)); + + // Insert a new host tracker item. The sequence of operations is this: + // - host tracker vector is holding 2 * ht_item_sz = 24 and wants to + // double in size. + // - the allocator honors the vector's request, allocating 48 bytes and + // informs the host_cache about the new size, which exceeds max_size now + // - host_cache prunes, removing the least recent host tracker. + // Since this ht is empty, precisely hc_item_sz = 88 bytes are freed. + // - the host tracker vector destructor frees up an additional 24 bytes + // that it reallocated. + // Hence, after the next insert, the math is this: + size_t sz = host_cache.mem_size() + 4*ht_item_sz - hc_item_sz - 2*ht_item_sz; + + CHECK(true == ht_ptr->add_service(m, IpProtocol::TCP, 676, true)); + CHECK(sz == host_cache.mem_size()); + } + + // Try a remove too, to catch any potential deadlocks. + // + // The host cache remove() function locks the cache, and then destroys + // a host tracker. The host tracker destructor locks the cache again + // via the allocator, thus leading to a race condition. This condition + // is addressed in the remove() function by setting a temporary shared + // pointer to the host tracker being deleted. This will postpone the + // call to the host tracker destructor until after remove() releases + // the lock. + // + // As a consequence, to test for this potential race condition, there + // must be no exogenous pointers to the host tracker being removed, as + // any such reference will inherently break the deadlock. We want to see + // that this mechanism is built-in into the remove function. + + bool res = host_cache.remove(ip); + CHECK(res == true); + + // And, finally, a remove with a reference too: + i--; + memset(hk, 0, 16); + hk[i] = (uint8_t) i; + ip.set(hk); + + HostCacheIp::Data ht_ptr; + res = host_cache.remove(ip, ht_ptr); + CHECK(res == true); + CHECK(ht_ptr != nullptr); + +} + +int main(int argc, char** argv) +{ + // Use this if you want to turn off memory checks entirely: + MemoryLeakWarningPlugin::turnOffNewDeleteOverloads(); + + return CommandLineTestRunner::RunAllTests(argc, argv); +} diff --git a/src/host_tracker/test/host_cache_allocator_test.cc b/src/host_tracker/test/host_cache_allocator_test.cc new file mode 100644 index 000000000..9609108a7 --- /dev/null +++ b/src/host_tracker/test/host_cache_allocator_test.cc @@ -0,0 +1,132 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2016-2019 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- + +// host_cache_allocator_test.cc author Silviu Minut + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "host_tracker/host_cache.h" +#include "host_tracker/host_cache_allocator.h" + +#include + +#include "main/snort_config.h" + +#include +#include + +using namespace std; +using namespace snort; + +// Derive an allocator from HostCacheAlloc: +template +class Allocator : public HostCacheAlloc +{ +public: + + // This needs to be in every derived class: + template + struct rebind + { + typedef Allocator other; + }; + + using HostCacheAlloc::lru; + + Allocator(); +}; + + +// Define an lru item: +class Item +{ +public: + typedef int ValueType; + vector> data; +}; + +// Instantiate a cache, as soon as we know the Item type: +typedef LruCacheSharedMemcap> CacheType; +CacheType cache(100); + +// Unorthodox, but necessary because we need the implementation of +// HostCacheAlloc in this file, which we inherit. +#include "host_tracker/host_cache_allocator.cc" + +// Implement the allocator constructor AFTER we have a cache object +// to point to and the implementation of our base HostCacheAlloc: +template +Allocator::Allocator() +{ + lru = &cache; +} + +namespace snort +{ +time_t packet_time() { return 0; } +} + +TEST_GROUP(cache_allocator) +{ +}; + +// Test allocation / deallocation and pruning. +TEST(cache_allocator, allocate) +{ + const size_t n = 5, m = 3; // do not change, these need to be just right + const size_t item_sz = sizeof(CacheType::Data) + sizeof(CacheType::ValueType); + const size_t item_data_sz = sizeof(Item::ValueType); + + // room for n items in the cache and m data in the Item. + const size_t max_size = n * item_sz + m * item_data_sz; + + cache.set_max_size(max_size); + + // insert n empty host trackers: + for (size_t i=0; idata.emplace_back(i); + + // the oldest (0) is no longer the oldest after the look-up above, + // so it should still be in the cache: + CHECK( cache.find(key) != nullptr ); + + // however, the second oldest should have become oldest and be pruned: + CHECK( cache.find(to_string(1)) == nullptr ); +} + +int main(int argc, char** argv) +{ + // Use this if you want to turn off memory checks entirely: + MemoryLeakWarningPlugin::turnOffNewDeleteOverloads(); + + return CommandLineTestRunner::RunAllTests(argc, argv); +} diff --git a/src/host_tracker/test/host_tracker_test.cc b/src/host_tracker/test/host_tracker_test.cc index dcc52ce89..5e4b4c25b 100644 --- a/src/host_tracker/test/host_tracker_test.cc +++ b/src/host_tracker/test/host_tracker_test.cc @@ -25,7 +25,8 @@ #include -#include "host_tracker/host_tracker.h" +#include "host_tracker/host_cache.h" +#include "host_tracker/host_cache_allocator.cc" #include #include @@ -42,6 +43,11 @@ char* snort_strdup(const char* str) time_t packet_time() { return test_time; } } +// There always needs to be a HostCacheIp associated with HostTracker, +// because any allocation / deallocation into the HostTracker will take up +// memory managed by the cache. +HostCacheIp host_cache(1024); + TEST_GROUP(host_tracker) { }; @@ -49,6 +55,8 @@ TEST_GROUP(host_tracker) // Test HostTracker find appid and add service functions. TEST(host_tracker, add_find_service_test) { + // Having standalone host tracker objects works, but it should be avoided + // because they take up memory from the host cache. OK for testing. HostTracker ht; // Try a find on an empty list. @@ -130,4 +138,3 @@ int main(int argc, char** argv) { return CommandLineTestRunner::RunAllTests(argc, argv); } - diff --git a/src/main/snort.cc b/src/main/snort.cc index a3f333e9b..d9a4aacaf 100644 --- a/src/main/snort.cc +++ b/src/main/snort.cc @@ -38,6 +38,7 @@ #include "flow/ha.h" #include "framework/mpse.h" #include "helpers/process.h" +#include "host_tracker/host_cache.h" #include "ips_options/ips_options.h" #include "log/log.h" #include "log/messages.h" @@ -397,6 +398,7 @@ void Snort::setup(int argc, char* argv[]) memory::MemoryCap::calculate(ThreadConfig::get_instance_max()); memory::MemoryCap::print(); + host_cache.print_config(); TimeStart(); } diff --git a/src/network_inspectors/appid/lua_detector_api.cc b/src/network_inspectors/appid/lua_detector_api.cc index c95d05e5f..f42e31b3d 100644 --- a/src/network_inspectors/appid/lua_detector_api.cc +++ b/src/network_inspectors/appid/lua_detector_api.cc @@ -1490,13 +1490,13 @@ static int detector_create_chp_multi_application(lua_State* L) continue; break; } - + if (!control) { lua_pushnumber(L, appIdInstance); return 1; } - + // We only want a maximum of these for each appId. if (instance == CHP_APPID_INSTANCE_MAX) { @@ -1923,7 +1923,7 @@ static int detector_add_sip_user_agent(lua_State* L) } static int create_custom_application(lua_State* L) -{ +{ auto& ud = *UserData::check(L, DETECTOR, 1); // Verify detector user data and that we are NOT in packet context ud->validate_lua_state(false); @@ -1948,9 +1948,9 @@ static int create_custom_application(lua_State* L) appId = entry->appId; AppIdPegCounts::add_app_peg_info(tmp_string, appId); } - else + else appId = AppInfoManager::get_instance().get_appid_by_name(tmp_string); - + lua_pushnumber(L, appId); return 1; /*number of results */ } @@ -2631,26 +2631,26 @@ LuaServiceObject::LuaServiceObject(AppIdDiscovery* sdm, const std::string& detec appid_detectors = ServiceDiscovery::get_instance().get_tcp_detectors(); auto detector = appid_detectors->find(detector_name); if (detector != appid_detectors->end()) - ad = detector->second; + ad = detector->second; } else if (protocol == IpProtocol::UDP) { appid_detectors = ServiceDiscovery::get_instance().get_udp_detectors(); auto detector = appid_detectors->find(detector_name); if (detector != appid_detectors->end()) - ad = detector->second; + ad = detector->second; } sd = (ServiceDetector*)ad; - } + } UserData::push(L, DETECTOR, this); lua_pushvalue(L, -1); - // FIXIT-M: RELOAD - go back to using lua reference + // FIXIT-M: RELOAD - go back to using lua reference // instead of using a string for lookups // lsd.detector_user_data_ref = luaL_ref(L, LUA_REGISTRYINDEX); - + // FIXIT-H: The control and thread states have the same initialization // sequence, the stack index shouldn't change between the states, maybe // use a common index for a detector between all the states @@ -2701,26 +2701,26 @@ LuaClientObject::LuaClientObject(AppIdDiscovery* cdm, const std::string& detecto appid_detectors = ClientDiscovery::get_instance().get_tcp_detectors(); auto detector = appid_detectors->find(detector_name); if (detector != appid_detectors->end()) - ad = detector->second; + ad = detector->second; } else if (protocol == IpProtocol::UDP) { appid_detectors = ClientDiscovery::get_instance().get_udp_detectors(); auto detector = appid_detectors->find(detector_name); if (detector != appid_detectors->end()) - ad = detector->second; + ad = detector->second; } cd = (ClientDetector*)ad; - } - + } + UserData::push(L, DETECTOR, this); lua_pushvalue(L, -1); - // FIXIT-M: RELOAD - go back to using lua reference + // FIXIT-M: RELOAD - go back to using lua reference // instead of using a string for lookups // lsd.detector_user_data_ref = luaL_ref(L, LUA_REGISTRYINDEX); - + // FIXIT-H: The control and thread states have the same initialization // sequence, the stack index shouldn't change between the states, maybe // use a common index for a detector between all the states diff --git a/src/network_inspectors/appid/test/appid_discovery_test.cc b/src/network_inspectors/appid/test/appid_discovery_test.cc index 756049974..e5da21705 100644 --- a/src/network_inspectors/appid/test/appid_discovery_test.cc +++ b/src/network_inspectors/appid/test/appid_discovery_test.cc @@ -219,7 +219,7 @@ ServiceDiscovery& ServiceDiscovery::get_instance() return *s_discovery_manager; } -LruCacheShared host_cache(50); +HostCacheIp host_cache(50); AppId HostTracker::get_appid(Port, IpProtocol, bool) { return APP_ID_NONE;