Bug fixes
~~~~~~~~~
+- Fixed bug which could result in false cache hits when source code contains
+ `'"'` followed by `" /*"` or `" //"` (with variations).
+
- Made hash of cached result created with and without `CCACHE_CPP2` different.
This makes it possible to rebuild with `CCACHE_CPP2` set without having to
clear the cache to get new results.
headers = \
ccache.h hashtable.h hashtable_itr.h hashtable_private.h hashutil.h \
manifest.h mdfour.h counters.h murmurhashneutral2.h getopt_long.h \
- language.h system.h compopt.h \
+ language.h macroskip.h system.h compopt.h \
test/framework.h test/suites.h test/util.h
files_to_clean += *.tar.bz2 *.tar.gz *.tar.xz *.xml .deps/*
/*
- * Copyright (C) 2009-2010, 2012 Joel Rosdahl
+ * Copyright (C) 2009-2015 Joel Rosdahl
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
#include "ccache.h"
#include "hashutil.h"
+#include "macroskip.h"
#include "murmurhashneutral2.h"
unsigned
} while (0)
/*
- * Hash a string ignoring comments. Returns a bitmask of HASH_SOURCE_CODE_*
- * results.
+ * Search for the strings "__DATE__" and "__TIME__" in str.
+ *
+ * Returns a bitmask with HASH_SOURCE_CODE_FOUND_DATE and
+ * HASH_SOURCE_CODE_FOUND_TIME set appropriately.
*/
int
-hash_source_code_string(
- struct mdfour *hash, const char *str, size_t len, const char *path)
+check_for_temporal_macros(const char *str, size_t len)
{
- const char *p;
- const char *end;
- char hashbuf[64];
- size_t hashbuflen = 0;
- int result = HASH_SOURCE_CODE_OK;
- extern unsigned sloppiness;
- bool seen_backslash;
+ int result = 0;
- p = str;
- end = str + len;
- while (1) {
- if (p >= end) {
- goto end;
- }
- switch (*p) {
- /* Potential start of comment. */
- case '/':
- if (p+1 == end) {
- break;
- }
- switch (*(p+1)) {
- case '*':
- HASH(' '); /* Don't paste tokens together when removing the comment. */
- p += 2;
- while (p+1 < end && (*p != '*' || *(p+1) != '/')) {
- if (*p == '\n') {
- /* Keep line numbers. */
- HASH('\n');
- }
- p++;
- }
- if (p+1 == end) {
- goto end;
- }
- p += 2;
- continue;
+ /*
+ * We're using the Boyer-Moore-Horspool algorithm, which searches starting
+ * from the *end* of the needle. Our needles are 8 characters long, so i
+ * starts at 7.
+ */
+ size_t i = 7;
- case '/':
- p += 2;
- while (p < end && (*p != '\n' || *(p-1) == '\\')) {
- p++;
- }
- continue;
-
- default:
- break;
- }
- break;
-
- /* Start of string. */
- case '"':
- HASH(*p);
- p++;
- seen_backslash = false;
- while (p < end) {
- if (seen_backslash) {
- seen_backslash = false;
- } else if (*p == '"') {
- /* Found end of string. */
- HASH(*p);
- p++;
- break;
- } else if (*p == '\\') {
- seen_backslash = true;
- }
- HASH(*p);
- p++;
- }
- if (p == end) {
- goto end;
+ while (i < len) {
+ /*
+ * Check whether the substring ending at str[i] has the form "__...E__". On
+ * the assumption that 'E' is less common in source than '_', we check
+ * str[i-2] first.
+ */
+ if (str[i - 2] == 'E' &&
+ str[i - 0] == '_' &&
+ str[i - 7] == '_' &&
+ str[i - 1] == '_' &&
+ str[i - 6] == '_') {
+ /*
+ * Check the remaining characters to see if the substring is "__DATE__"
+ * or "__TIME__".
+ */
+ if (str[i - 5] == 'D' && str[i - 4] == 'A' &&
+ str[i - 3] == 'T') {
+ result |= HASH_SOURCE_CODE_FOUND_DATE;
}
- break;
-
- /* Potential start of volatile macro. */
- case '_':
- if (p + 7 < end
- && p[1] == '_' && p[5] == 'E'
- && p[6] == '_' && p[7] == '_') {
- if (p[2] == 'D' && p[3] == 'A'
- && p[4] == 'T') {
- result |= HASH_SOURCE_CODE_FOUND_DATE;
- } else if (p[2] == 'T' && p[3] == 'I'
- && p[4] == 'M') {
- result |= HASH_SOURCE_CODE_FOUND_TIME;
- }
- /*
- * Of course, we can't be sure that we have found a __{DATE,TIME}__
- * that's actually used, but better safe than sorry. And if you do
- * something like
- *
- * #define TIME __TI ## ME__
- *
- * in your code, you deserve to get a false cache hit.
- */
+ else if (str[i - 5] == 'T' && str[i - 4] == 'I' &&
+ str[i - 3] == 'M') {
+ result |= HASH_SOURCE_CODE_FOUND_TIME;
}
- break;
-
- default:
- break;
}
- HASH(*p);
- p++;
+ /*
+ * macro_skip tells us how far we can skip forward upon seeing str[i] at
+ * the end of a substring.
+ */
+ i += macro_skip[(uint8_t)str[i]];
}
-end:
- hash_buffer(hash, hashbuf, hashbuflen);
+ return result;
+}
- if (sloppiness & SLOPPY_TIME_MACROS) {
- return 0;
+/*
+ * Hash a string ignoring comments. Returns a bitmask of HASH_SOURCE_CODE_*
+ * results.
+ */
+int
+hash_source_code_string(
+ struct mdfour *hash, const char *str, size_t len, const char *path)
+{
+ int result = HASH_SOURCE_CODE_OK;
+ extern unsigned sloppiness;
+
+ /*
+ * Check for __DATE__ and __TIME__ if the sloppiness configuration tells us
+ * we should.
+ */
+ if (!(sloppiness & SLOPPY_TIME_MACROS)) {
+ result |= check_for_temporal_macros(str, len);
}
+
+ /*
+ * Hash the source string.
+ */
+ hash_buffer(hash, str, len);
+
if (result & HASH_SOURCE_CODE_FOUND_DATE) {
/*
* Make sure that the hash sum changes if the (potential) expansion of
#define HASH_SOURCE_CODE_FOUND_DATE 2
#define HASH_SOURCE_CODE_FOUND_TIME 4
+int check_for_temporal_macros(const char *str, size_t len);
int hash_source_code_string(
struct mdfour *hash, const char *str, size_t len, const char *path);
int hash_source_code_file(struct mdfour *hash, const char *path);
--- /dev/null
+/*
+ * A Boyer-Moore-Horspool skip table used for searching for the strings
+ * "__TIME__" and "__DATE__".
+ *
+ * macro_skip[c] = 8 for all c not in "__TIME__" and "__DATE__".
+ *
+ * The other characters map as follows:
+ *
+ * _ -> 1
+ * A -> 4
+ * D -> 5
+ * E -> 2
+ * I -> 4
+ * M -> 3
+ * T -> 3
+ *
+ *
+ * This was generated with the following Python script:
+ *
+ * m = {'_': 1,
+ * 'A': 4,
+ * 'D': 5,
+ * 'E': 2,
+ * 'I': 4,
+ * 'M': 3,
+ * 'T': 3}
+ *
+ * for i in range(0, 256):
+ * if chr(i) in m:
+ * num = m[chr(i)]
+ * else:
+ * num = 8
+ * print ("%d, " % num),
+ *
+ * if i % 16 == 15:
+ * print ""
+ */
+
+static const uint32_t macro_skip[] = {
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 4, 8, 8, 5, 2, 8, 8, 8, 4, 8, 8, 8, 3, 8, 8,
+ 8, 8, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+};
# A simple test suite for ccache.
#
# Copyright (C) 2002-2007 Andrew Tridgell
-# Copyright (C) 2009-2014 Joel Rosdahl
+# Copyright (C) 2009-2015 Joel Rosdahl
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
checkstat 'cache miss' 1
checkfile stderr-mf.txt "`cat stderr-orig.txt`"
- ##################################################################
- # Check that changes in comments are ignored when hashing.
- testname="changes in comments"
- $CCACHE -C >/dev/null
- $CCACHE -z >/dev/null
- cat <<EOF >comments.h
-/*
- * /* foo comment
- */
-EOF
- backdate comments.h
- cat <<'EOF' >comments.c
-#include "comments.h"
-char test[] = "\
-/* apple */ // banana"; // foo comment
-EOF
-
- $CCACHE $COMPILER -c comments.c
- checkstat 'cache hit (direct)' 0
- checkstat 'cache hit (preprocessed)' 0
- checkstat 'cache miss' 1
-
- sed_in_place 's/foo/ignored/' comments.h comments.c
- backdate comments.h
-
- $CCACHE $COMPILER -c comments.c
- checkstat 'cache hit (direct)' 1
- checkstat 'cache hit (preprocessed)' 0
- checkstat 'cache miss' 1
-
- # Check that comment-like string contents are hashed.
- sed_in_place 's/apple/orange/' comments.c
- backdate comments.h
-
- $CCACHE $COMPILER -c comments.c
- checkstat 'cache hit (direct)' 1
- checkstat 'cache hit (preprocessed)' 0
- checkstat 'cache miss' 2
-
##################################################################
# Check that it's possible to compile and cache an empty source code file.
testname="empty source file"
checkstat 'cache hit (direct)' 2
checkstat 'cache hit (preprocessed)' 0
checkstat 'cache miss' 2
+
+ testname="comment in strings"
+ $CCACHE -Cz >/dev/null
+ echo 'char *comment = " /* \\\\u" "foo" " */";' >comment.c
+ $CCACHE $COMPILER -c comment.c
+ checkstat 'cache hit (direct)' 0
+ checkstat 'cache hit (preprocessed)' 0
+ checkstat 'cache miss' 1
+ $CCACHE $COMPILER -c comment.c
+ checkstat 'cache hit (direct)' 1
+ checkstat 'cache hit (preprocessed)' 0
+ checkstat 'cache miss' 1
+ echo 'char *comment = " /* \\\\u" "goo" " */";' >comment.c
+ $CCACHE $COMPILER -c comment.c
+ checkstat 'cache hit (direct)' 1
+ checkstat 'cache hit (preprocessed)' 0
+ checkstat 'cache miss' 2
}
basedir_suite() {
/*
- * Copyright (C) 2010, 2012 Joel Rosdahl
+ * Copyright (C) 2010-2015 Joel Rosdahl
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
CHECK_STR_EQ_FREE2("a448017aaf21d8525fc10ae87aa6729d-3", hash_result(&h));
}
-TEST(hash_source_code_with_c_style_comment)
+TEST(check_for_temporal_macros)
{
- struct mdfour h;
- char input[] = "a/*b*/c";
- size_t input_len = strlen(input);
-
- hash_start(&h);
- hash_source_code_string(&h, input, input_len, "");
- CHECK_STR_EQ_FREE2("1c2c87080ee03418fb1279e3b1f09a68-3", hash_result(&h));
-
- input[3] = 'd';
- hash_start(&h);
- hash_source_code_string(&h, input, input_len, "");
- CHECK_STR_EQ_FREE2("1c2c87080ee03418fb1279e3b1f09a68-3", hash_result(&h));
-}
-
-TEST(hash_source_code_with_cplusplus_style_comment)
-{
- struct mdfour h;
- char input[] = "a//b\nc";
- size_t input_len = strlen(input);
-
- hash_start(&h);
- hash_source_code_string(&h, input, input_len, "");
- CHECK_STR_EQ_FREE2("4a3fbbe3c140fa193227dba3814db6e6-3", hash_result(&h));
-
- input[3] = 'd';
- hash_start(&h);
- hash_source_code_string(&h, input, input_len, "");
- CHECK_STR_EQ_FREE2("4a3fbbe3c140fa193227dba3814db6e6-3", hash_result(&h));
-}
-
-TEST(hash_source_code_with_comment_inside_string)
-{
- struct mdfour h;
- char input[] = "a\"//b\"c";
- size_t input_len = strlen(input);
-
- hash_start(&h);
- hash_source_code_string(&h, input, input_len, "");
- CHECK_STR_EQ_FREE2("4c2fa74b0843d8f93df5c04c98ccb0a4-7", hash_result(&h));
-
- input[4] = 'd';
- hash_start(&h);
- hash_source_code_string(&h, input, input_len, "");
- CHECK_STR_EQ_FREE2("f0069218ec640008cbfa2d150c1061bb-7", hash_result(&h));
-}
-
-TEST(hash_source_code_with_quote_in_string)
-{
- struct mdfour h;
- char input[] = "a\"\\\"b//c\""; /* a"\"b//c" */
- size_t input_len = strlen(input);
-
- hash_start(&h);
- hash_source_code_string(&h, input, input_len, "");
- CHECK_STR_EQ_FREE2("c4e45e7a7f6f29b000a51f187dc4cf06-9", hash_result(&h));
-
- hash_start(&h);
- input[7] = 'd';
- hash_source_code_string(&h, input, input_len, "");
- CHECK_STR_EQ_FREE2("bef8fb852dddcee189b91b068a621c55-9", hash_result(&h));
-}
-
-TEST(hash_source_code_with_backslash_at_string_end)
-{
- struct mdfour h;
- char input[] = "a\"\\\\\"b//c"; /* a"\\"b//c */
- size_t input_len = strlen(input);
-
- hash_start(&h);
- hash_source_code_string(&h, input, input_len, "");
- CHECK_STR_EQ_FREE2("7f3ccf27edadad1b90cb2cffb59775d6-6", hash_result(&h));
-
- input[input_len - 1] = 'd';
- hash_source_code_string(&h, input, input_len, "");
- CHECK_STR_EQ_FREE2("7f3ccf27edadad1b90cb2cffb59775d6-6", hash_result(&h));
+ const char time_start[] =
+ "__TIME__\n"
+ "int a;\n";
+ const char time_middle[] =
+ "#define a __TIME__\n"
+ "int a;\n";
+ const char time_end[] =
+ "#define a __TIME__";
+
+ const char date_start[] =
+ "__DATE__\n"
+ "int ab;\n";
+ const char date_middle[] =
+ "#define ab __DATE__\n"
+ "int ab;\n";
+ const char date_end[] =
+ "#define ab __DATE__";
+
+ const char no_temporal[] =
+ "#define ab _ _DATE__\n"
+ "#define ab __ DATE__\n"
+ "#define ab __D ATE__\n"
+ "#define ab __DA TE__\n"
+ "#define ab __DAT E__\n"
+ "#define ab __DATE __\n"
+ "#define ab __DATE_ _\n"
+ "#define ab _ _TIME__\n"
+ "#define ab __ TIME__\n"
+ "#define ab __T IME__\n"
+ "#define ab __TI ME__\n"
+ "#define ab __TIM E__\n"
+ "#define ab __TIME __\n"
+ "#define ab __TIME_ _\n";
+
+ CHECK(check_for_temporal_macros(time_start + 0, sizeof(time_start) - 0));
+ CHECK(!check_for_temporal_macros(time_start + 1, sizeof(time_start) - 1));
+
+ CHECK(check_for_temporal_macros(time_middle + 0, sizeof(time_middle) - 0));
+ CHECK(check_for_temporal_macros(time_middle + 1, sizeof(time_middle) - 1));
+ CHECK(check_for_temporal_macros(time_middle + 2, sizeof(time_middle) - 2));
+ CHECK(check_for_temporal_macros(time_middle + 3, sizeof(time_middle) - 3));
+ CHECK(check_for_temporal_macros(time_middle + 4, sizeof(time_middle) - 4));
+ CHECK(check_for_temporal_macros(time_middle + 5, sizeof(time_middle) - 5));
+ CHECK(check_for_temporal_macros(time_middle + 6, sizeof(time_middle) - 6));
+ CHECK(check_for_temporal_macros(time_middle + 7, sizeof(time_middle) - 7));
+
+ CHECK(check_for_temporal_macros(time_end + 0, sizeof(time_end) - 0));
+ CHECK(check_for_temporal_macros(time_end + sizeof(time_end) - 9, 9));
+ CHECK(!check_for_temporal_macros(time_end + sizeof(time_end) - 8, 8));
+
+ CHECK(check_for_temporal_macros(date_start + 0, sizeof(date_start) - 0));
+ CHECK(!check_for_temporal_macros(date_start + 1, sizeof(date_start) - 1));
+
+ CHECK(check_for_temporal_macros(date_middle + 0, sizeof(date_middle) - 0));
+ CHECK(check_for_temporal_macros(date_middle + 1, sizeof(date_middle) - 1));
+ CHECK(check_for_temporal_macros(date_middle + 2, sizeof(date_middle) - 2));
+ CHECK(check_for_temporal_macros(date_middle + 3, sizeof(date_middle) - 3));
+ CHECK(check_for_temporal_macros(date_middle + 4, sizeof(date_middle) - 4));
+ CHECK(check_for_temporal_macros(date_middle + 5, sizeof(date_middle) - 5));
+ CHECK(check_for_temporal_macros(date_middle + 6, sizeof(date_middle) - 6));
+ CHECK(check_for_temporal_macros(date_middle + 7, sizeof(date_middle) - 7));
+
+ CHECK(check_for_temporal_macros(date_end + 0, sizeof(date_end) - 0));
+ CHECK(check_for_temporal_macros(date_end + sizeof(date_end) - 9, 9));
+ CHECK(!check_for_temporal_macros(date_end + sizeof(date_end) - 8, 8));
+
+ CHECK(!check_for_temporal_macros(no_temporal + 0, sizeof(no_temporal) - 0));
+ CHECK(!check_for_temporal_macros(no_temporal + 1, sizeof(no_temporal) - 1));
+ CHECK(!check_for_temporal_macros(no_temporal + 2, sizeof(no_temporal) - 2));
+ CHECK(!check_for_temporal_macros(no_temporal + 3, sizeof(no_temporal) - 3));
+ CHECK(!check_for_temporal_macros(no_temporal + 4, sizeof(no_temporal) - 4));
+ CHECK(!check_for_temporal_macros(no_temporal + 5, sizeof(no_temporal) - 5));
+ CHECK(!check_for_temporal_macros(no_temporal + 6, sizeof(no_temporal) - 6));
+ CHECK(!check_for_temporal_macros(no_temporal + 7, sizeof(no_temporal) - 7));
}
TEST_SUITE_END