#include "ccache.h"
#include "hashutil.h"
#include "murmurhashneutral2.h"
+#include "macroskip.h"
unsigned
hash_from_string(void *str)
&& fh1->size == fh2->size;
}
-#define HASH(ch) \
- do {\
- hashbuf[hashbuflen] = ch; \
- hashbuflen++; \
- if (hashbuflen == sizeof(hashbuf)) {\
- hash_buffer(hash, hashbuf, sizeof(hashbuf)); \
- hashbuflen = 0; \
- } \
- } while (0)
-
/*
- * Hash a string ignoring comments. Returns a bitmask of HASH_SOURCE_CODE_*
- * results.
+ * Search for the strings "__DATE__" and "__TIME__" in str.
+ *
+ * Returns a bitmask with HASH_SOURCE_CODE_FOUND_DATE and
+ * HASH_SOURCE_CODE_FOUND_TIME set appropriately.
*/
-int
-hash_source_code_string(
- struct mdfour *hash, const char *str, size_t len, const char *path)
+static int
+check_for_temporal_macros(const char *str, size_t len)
{
- const char *p;
- const char *end;
- char hashbuf[64];
- size_t hashbuflen = 0;
- int result = HASH_SOURCE_CODE_OK;
- extern unsigned sloppiness;
+ int result = 0;
- p = str;
- end = str + len;
- while (1) {
- if (p >= end) {
- goto end;
- }
- switch (*p) {
- /* Potential start of comment. */
- case '/':
- if (p+1 == end) {
- break;
- }
- switch (*(p+1)) {
- case '*':
- HASH(' '); /* Don't paste tokens together when removing the comment. */
- p += 2;
- while (p+1 < end
- && (*p != '*' || *(p+1) != '/')) {
- if (*p == '\n') {
- /* Keep line numbers. */
- HASH('\n');
- }
- p++;
- }
- if (p+1 == end) {
- goto end;
- }
- p += 2;
- continue;
+ /*
+ * We're using the Boyer-Moore-Horspool algorithm, which searches
+ * starting from the *end* of the needle. Our needles are 8 characters
+ * long, so i starts at 7.
+ */
+ size_t i = 7;
- case '/':
- p += 2;
- while (p < end
- && (*p != '\n' || *(p-1) == '\\')) {
- p++;
- }
- continue;
+ while (i < len) {
+ /*
+ * Check whether the substring ending at str[i] has the form
+ * '__...E__'. On the assumption that 'E' is less common in
+ * source than '_', we check str[i-2] first.
+ */
+ if (str[i - 2] == 'E' &&
+ str[i - 0] == '_' &&
+ str[i - 7] == '_' &&
+ str[i - 1] == '_' &&
+ str[i - 6] == '_') {
- default:
- break;
- }
- break;
+ /*
+ * Check the remaining characters to see if the
+ * substring is '__DATE__' or '__TIME__'.
+ */
- /* Start of string. */
- case '"':
- HASH(*p);
- p++;
- while (p < end && (*p != '"' || *(p-1) == '\\')) {
- HASH(*p);
- p++;
- }
- if (p == end) {
- goto end;
+ if (str[i - 5] == 'D' && str[i - 4] == 'A' &&
+ str[i - 3] == 'T') {
+ result |= HASH_SOURCE_CODE_FOUND_DATE;
}
- break;
-
- /* Potential start of volatile macro. */
- case '_':
- if (p + 7 < end
- && p[1] == '_' && p[5] == 'E'
- && p[6] == '_' && p[7] == '_') {
- if (p[2] == 'D' && p[3] == 'A'
- && p[4] == 'T') {
- result |= HASH_SOURCE_CODE_FOUND_DATE;
- } else if (p[2] == 'T' && p[3] == 'I'
- && p[4] == 'M') {
- result |= HASH_SOURCE_CODE_FOUND_TIME;
- }
- /*
- * Of course, we can't be sure that we have found a __{DATE,TIME}__
- * that's actually used, but better safe than sorry. And if you do
- * something like
- *
- * #define TIME __TI ## ME__
- *
- * in your code, you deserve to get a false cache hit.
- */
+ else if (str[i - 5] == 'T' && str[i - 4] == 'I' &&
+ str[i - 3] == 'M') {
+ result |= HASH_SOURCE_CODE_FOUND_TIME;
}
- break;
-
- default:
- break;
}
- HASH(*p);
- p++;
+ /*
+ * macro_skip tells us how far we can skip forward upon seeing
+ * str[i] at the end of a substring.
+ */
+ i += macro_skip[(uint8_t)str[i]];
}
-end:
- hash_buffer(hash, hashbuf, hashbuflen);
+ return result;
+}
- if (sloppiness & SLOPPY_TIME_MACROS) {
- return 0;
+/*
+ * Hash a string. Returns a bitmask of HASH_SOURCE_CODE_* results.
+ */
+int
+hash_source_code_string(
+ struct mdfour *hash, const char *str, size_t len, const char *path)
+{
+ int result = HASH_SOURCE_CODE_OK;
+ extern unsigned sloppiness;
+
+ /*
+ * Check for __DATE__ and __TIME__ if the sloppiness argument tells us
+ * we have to.
+ */
+ if (!(sloppiness & SLOPPY_TIME_MACROS)) {
+ result |= check_for_temporal_macros(str, len);
}
+
+ /*
+ * Hash the source string.
+ */
+ hash_buffer(hash, str, len);
+
if (result & HASH_SOURCE_CODE_FOUND_DATE) {
/*
* Make sure that the hash sum changes if the (potential) expansion of
--- /dev/null
+/*
+ * A Boyer-Moore-Horspool skip table used for searching for the strings
+ * "__TIME__" and "__DATE__".
+ *
+ * macro_skip[c] = 8 for all c not in "__TIME__" and "__DATE__".
+ *
+ * The other characters map as follows:
+ *
+ * _ -> 1
+ * A -> 5
+ * D -> 6
+ * E -> 3
+ * I -> 5
+ * M -> 4
+ * T -> 4
+ *
+ *
+ * This was generated with the following Python script:
+ *
+ * m = {'_': 1,
+ * 'A': 5,
+ * 'D': 6,
+ * 'E': 3,
+ * 'I': 5,
+ * 'M': 4,
+ * 'T': 4}
+ *
+ * for i in range(0, 256):
+ * if chr(i) in m:
+ * num = m[chr(i)]
+ * else:
+ * num = 8
+ * print ("%d, " % num),
+ *
+ * if i % 16 == 15:
+ * print ""
+ */
+
+static const uint32_t macro_skip[] = {
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 5, 8, 8, 6, 3, 8, 8, 8, 5, 8, 8, 8, 4, 8, 8,
+ 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+};