len = strlen(name);
suf = find_filename_suffix(name, len, &suf_len);
- dist = fuzzy_distance(name, len, fname, fname_len);
- /* Add some extra weight to how well the suffixes match. */
- dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len) * 10;
+ dist = fuzzy_distance(name, len, fname, fname_len, lowest_dist);
+ /* Add some extra weight to how well the suffixes match unless we've already disqualified
+ * this file based on a heuristic. */
+ if (dist < 0xFFFF0000U) {
+ dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len, 0xFFFF0000U) * 10;
+ }
if (DEBUG_GTE(FUZZY, 2)) {
rprintf(FINFO, "fuzzy distance for %s = %d.%05d\n",
f_name(fp, NULL), (int)(dist>>16), (int)(dist&0xFFFF));
#define UNIT (1 << 16)
-uint32 fuzzy_distance(const char *s1, unsigned len1, const char *s2, unsigned len2)
+uint32 fuzzy_distance(const char *s1, unsigned len1, const char *s2, unsigned len2, uint32 upperlimit)
{
uint32 a[MAXPATHLEN], diag, above, left, diag_inc, above_inc, left_inc;
int32 cost;
unsigned i1, i2;
+ /* Check to see if the Levenshtein distance must be greater than the
+ * upper limit defined by the previously found lowest distance using
+ * the heuristic that the Levenshtein distance is greater than the
+ * difference in length of the two strings */
+ if ((len1 > len2 ? len1 - len2 : len2 - len1) * UNIT > upperlimit)
+ return 0xFFFFU * UNIT + 1;
+
if (!len1 || !len2) {
if (!len1) {
s1 = s2;