From: Oliver Kurth Date: Tue, 18 Dec 2018 21:19:46 +0000 (-0800) Subject: lib/file: Improve File_ListDirectory and File_WalkDirectory* X-Git-Tag: stable-11.0.0~294 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=74561dc19faf9a44fffc0676c27c16176b35d3af;p=thirdparty%2Fopen-vm-tools.git lib/file: Improve File_ListDirectory and File_WalkDirectory* The File_WalkDirectory implementation has an initial latency (before file names are available) that is unnecessary. Using the unicode library, when not necessary, adds a huge amount of memory usage and wastes CPU time. These routines duplicate quite a bit of code. Rewrite File_ListDirectory to use File_WalkDirectory*. Rewrite File_WalkDirectory*: - Return files as they are discovered, not after the entire directory contents are parsed. If one decides to stop early, they don't have to pay the price for the entire directory contents. - Preserve the protection from duplicate file names. While Windows locks a directory during a content tranverse, POSIXen does nott. Code is here in the lib/file for quite some time so that the numerous callers do not have to handle dealing with dups. - Avoid using the unicode library on platforms where we can, this avoid most of the eggregious memory usage. - Don't use Dynbuf... Handling things directly is more efficient. --- diff --git a/open-vm-tools/lib/file/filePosix.c b/open-vm-tools/lib/file/filePosix.c index e81050ea7..802a64477 100644 --- a/open-vm-tools/lib/file/filePosix.c +++ b/open-vm-tools/lib/file/filePosix.c @@ -55,6 +55,7 @@ #include "vmware.h" #include "posix.h" +#include "codeset.h" #include "file.h" #include "fileInt.h" #include "msg.h" @@ -63,7 +64,6 @@ #include "util.h" #include "timeutil.h" #include "dynbuf.h" -#include "localconfig.h" #include "hostType.h" #include "vmfs.h" #include "hashTable.h" @@ -96,9 +96,9 @@ static char *FilePosixNearestExistingAncestor(char const *path); #endif struct WalkDirContextImpl { - int cnt; - int iter; - char **files; + char *dirName; + DIR *dir; + HashTable *hash; }; @@ -2836,126 +2836,58 @@ FileCreateDirectory(const char *pathName, // IN: *---------------------------------------------------------------------- */ -static int -FileKeyDispose(const char *key, // IN: - void *value, // IN: - void *clientData) // IN: -{ - Posix_Free((void *) key); - - return 0; -} +typedef struct { + char **fileNames; + uint32 pos; +} ListParams; static int -FileUnique(const char *key, // IN: - void *value, // IN: - void *clientData) // IN/OUT: a DynBuf +FileNameArray(const char *key, // IN: + void *value, // IN: + void *clientData) // IN/OUT: ListParams pointer { - DynBuf *b = clientData; + ListParams *params = clientData; - DynBuf_Append(b, &key, sizeof key); + ASSERT(value == NULL); + + params->fileNames[params->pos++] = Util_SafeStrdup(key); return 0; } int -File_ListDirectory(const char *pathName, // IN: - char ***ids) // OUT: relative paths +File_ListDirectory(const char *dirName, // IN: + char ***ids) // OUT: relative paths { - int err; - DIR *dir; - int count; - HashTable *hash; - - ASSERT(pathName != NULL); - - dir = Posix_OpenDir(pathName); - - if (dir == NULL) { - // errno is preserved - return -1; - } - - hash = HashTable_Alloc(256, HASH_STRING_KEY, NULL); - count = 0; - - while (TRUE) { - struct dirent *entry; - - errno = 0; - entry = readdir(dir); - if (entry == NULL) { - err = errno; - break; - } + int err = 0; + int count = -1; + WalkDirContext context = File_WalkDirectoryStart(dirName); - /* Strip out undesirable paths. No one ever cares about these. */ - if ((strcmp(entry->d_name, ".") == 0) || - (strcmp(entry->d_name, "..") == 0)) { - continue; - } - - /* Don't create the file list if we aren't providing it to the caller. */ - if (ids) { - char *id; + if (context != NULL) { + while (File_WalkDirectoryNext(context, NULL)) + ; - if (Unicode_IsBufferValid(entry->d_name, -1, - STRING_ENCODING_DEFAULT)) { - id = Unicode_Alloc(entry->d_name, STRING_ENCODING_DEFAULT); - } else { - id = Unicode_EscapeBuffer(entry->d_name, -1, - STRING_ENCODING_DEFAULT); + err = errno; - Warning("%s: file '%s' in directory '%s' cannot be converted to " - "UTF8\n", __FUNCTION__, pathName, id); + if (err == 0) { + count = HashTable_GetNumElements(context->hash); + if (ids) { + ListParams params; - Posix_Free(id); + params.fileNames = Util_SafeCalloc(count, sizeof(char *)); + params.pos = 0; - id = Unicode_Duplicate(UNICODE_SUBSTITUTION_CHAR - UNICODE_SUBSTITUTION_CHAR - UNICODE_SUBSTITUTION_CHAR); + HashTable_ForEach(context->hash, FileNameArray, ¶ms); + *ids = params.fileNames; } - - /* - * It is possible for a directory operation to change the contents - * of a directory while this routine is running. If the OS decides - * to physically rearrange the contents of the directory it is - * possible for readdir to report a file more than once. Only add - * a file to the return data if it is unique within the return - * data. - */ - - if (HashTable_Insert(hash, id, NULL)) { - count++; - } else { - Posix_Free(id); - } - } else { - count++; } - } - - closedir(dir); - - if (ids) { - ASSERT(count == HashTable_GetNumElements(hash)); - - if (err == 0) { - DynBuf b; - DynBuf_Init(&b); + File_WalkDirectoryEnd(context); - HashTable_ForEach(hash, FileUnique, &b); - *ids = DynBuf_Detach(&b); - DynBuf_Destroy(&b); - } else { - HashTable_ForEach(hash, FileKeyDispose, NULL); - } + errno = err; } - HashTable_Free(hash); - - return (errno = err) == 0 ? count : -1; + return count; } @@ -2975,13 +2907,29 @@ File_ListDirectory(const char *pathName, // IN: *----------------------------------------------------------------------------- */ +static int +FileKeyDispose(const char *key, // IN: + void *value, // IN: + void *clientData) // IN: +{ + Posix_Free((void *) key); + + return 0; +} + void File_WalkDirectoryEnd(WalkDirContext context) // IN: { if (context != NULL) { - if (context->cnt > 0) { - Util_FreeStringList(context->files, context->cnt); + if (context->dir != NULL) { + closedir(context->dir); } + + HashTable_ForEach(context->hash, FileKeyDispose, NULL); + + HashTable_Free(context->hash); + + Posix_Free(context->dirName); Posix_Free(context); } } @@ -3011,19 +2959,25 @@ File_WalkDirectoryEnd(WalkDirContext context) // IN: */ WalkDirContext -File_WalkDirectoryStart(const char *parentPath) // IN: +File_WalkDirectoryStart(const char *dirName) // IN: { - WalkDirContextImpl *context = malloc(sizeof *context); + WalkDirContextImpl *context; - if (context != NULL) { - context->files = NULL; - context->iter = 0; - context->cnt = File_ListDirectory(parentPath, &context->files); + ASSERT(dirName != NULL); - if (context->cnt == -1) { - File_WalkDirectoryEnd(context); - context = NULL; - } + context = Util_SafeMalloc(sizeof *context); + + context->dirName = Util_SafeStrdup(dirName); + context->hash = HashTable_Alloc(256, HASH_STRING_KEY, NULL); + context->dir = Posix_OpenDir(dirName); + + if (context->dir == NULL) { + int err = errno; + + File_WalkDirectoryEnd(context); + + errno = err; + context = NULL; } return context; @@ -3033,18 +2987,17 @@ File_WalkDirectoryStart(const char *parentPath) // IN: /* *----------------------------------------------------------------------------- * - * File_WalkDirectoryNext -- + * File_WalkDirectoryNextEntry -- * - * Get the next entry in a directory traversal started with + * Get the next file name during a directory traversal started with * File_WalkDirectoryStart. * * Results: - * TRUE iff the traversal hasn't completed. - * - * If TRUE, *path holds an allocated string of a directory entry that the - * caller must free (see free). - * - * If FALSE, errno is 0 iff the walk completed sucessfully. + * TRUE Traversal hasn't completed yet. If *fileName is not NULL, + * an allocated string of the file name found is returned. + * The caller must free the string. + * FALSE The traversal has completed. Check errno; if it is zero (0), + * the walk completed sucessfully. * * Side effects: * None @@ -3054,19 +3007,75 @@ File_WalkDirectoryStart(const char *parentPath) // IN: Bool File_WalkDirectoryNext(WalkDirContext context, // IN: - char **path) // OUT: + char **fileName) // OUT/OPT: { - ASSERT(context); - ASSERT(path != NULL); + int err = 0; + Bool callAgain = FALSE; - errno = 0; // Any errors showed up at "start time". + ASSERT(context != NULL); - if (context->iter < context->cnt) { - *path = Util_SafeStrdup(context->files[context->iter++]); - return TRUE; + while (TRUE) { + char *allocName; + struct dirent *entry; + + errno = 0; + entry = readdir(context->dir); + if (entry == NULL) { + err = errno; + break; + } + + /* Strip out undesirable paths. No one ever cares about these. */ + if ((strcmp(entry->d_name, ".") == 0) || + (strcmp(entry->d_name, "..") == 0)) { + continue; + } + + /* + * It is possible for a directory operation to change the contents + * of a directory while this routine is running. If the OS decides + * to physically rearrange the contents of the directory it is + * possible for readdir to report a file more than once. Only add + * a file to the return data if it is unique within the return + * data. + */ + +#if defined(VMX86_SERVER) || defined(__APPLE__) // UTF8 native platforms + if (CodeSet_IsStringValidUTF8(entry->d_name)) { + allocName = Util_SafeStrdup(entry->d_name); +#else + if (Unicode_IsBufferValid(entry->d_name, -1, STRING_ENCODING_DEFAULT)) { + allocName = Unicode_Alloc(entry->d_name, STRING_ENCODING_DEFAULT); +#endif + } else { + char *id = Unicode_EscapeBuffer(fileName, -1, STRING_ENCODING_DEFAULT); + + Warning("%s: file '%s' in directory '%s' cannot be converted to " + "UTF8\n", __FUNCTION__, context->dirName, id); + + Posix_Free(id); + + allocName = Unicode_Duplicate(UNICODE_SUBSTITUTION_CHAR + UNICODE_SUBSTITUTION_CHAR + UNICODE_SUBSTITUTION_CHAR); + } + + if (HashTable_Insert(context->hash, allocName, NULL)) { + if (fileName != NULL) { + *fileName = Util_SafeStrdup(allocName); + } + + callAgain = TRUE; + break; + } else { + /* Ignore duplicates */ + continue; + } } - return FALSE; + errno = err; + + return callAgain; } diff --git a/open-vm-tools/lib/include/file.h b/open-vm-tools/lib/include/file.h index fced23233..71ee4ea94 100644 --- a/open-vm-tools/lib/include/file.h +++ b/open-vm-tools/lib/include/file.h @@ -176,7 +176,7 @@ Bool File_DeleteDirectoryContent(const char *pathName); Bool File_DeleteDirectoryTree(const char *pathName); -int File_ListDirectory(const char *pathName, +int File_ListDirectory(const char *dirName, char ***ids); Bool File_IsOsfsVolumeEmpty(const char *pathName); @@ -189,10 +189,10 @@ char * File_StripFwdSlashes(const char *pathName); * Simple file-system walk. */ -WalkDirContext File_WalkDirectoryStart(const char *parentPath); +WalkDirContext File_WalkDirectoryStart(const char *dirName); Bool File_WalkDirectoryNext(WalkDirContext context, - char **path); + char **fileName); void File_WalkDirectoryEnd(WalkDirContext context);