]> git.ipfire.org Git - thirdparty/git.git/commitdiff
fast-export: do not modify memory from get_commit_buffer
authorLuke Shumaker <lukeshu@datawire.io>
Mon, 10 Mar 2025 15:57:45 +0000 (16:57 +0100)
committerJunio C Hamano <gitster@pobox.com>
Mon, 10 Mar 2025 20:24:56 +0000 (13:24 -0700)
fast-export's helper function find_encoding() takes a `const char *`, but
modifies that memory despite the `const`.  Ultimately, this memory came
from get_commit_buffer(), and you're not supposed to modify the memory
that you get from get_commit_buffer().

So, get rid of find_encoding() in favor of commit.h:find_commit_header(),
which gives back a string length, rather than mutating the memory to
insert a '\0' terminator.

Because find_commit_header() detects the "\n\n" string that separates the
headers and the commit message, move the call to be above the
`message = strstr(..., "\n\n")` call.  This helps readability, and allows
for the value of `encoding` to be used for a better value of "..." so that
the same memory doesn't need to be checked twice.  Introduce a
`commit_buffer_cursor` variable to avoid writing an awkward
`encoding ? encoding + encoding_len : committer_end` expression.

Signed-off-by: Luke Shumaker <lukeshu@datawire.io>
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin/fast-export.c

index 2de2adc30eda6735701a3ba998e081d0ce409d0f..39d43c2a292d434f8a2ba696151ccbc5c5811a05 100644 (file)
@@ -510,21 +510,6 @@ static void show_filemodify(struct diff_queue_struct *q,
        }
 }
 
-static const char *find_encoding(const char *begin, const char *end)
-{
-       const char *needle = "\nencoding ";
-       char *bol, *eol;
-
-       bol = memmem(begin, end ? end - begin : strlen(begin),
-                    needle, strlen(needle));
-       if (!bol)
-               return NULL;
-       bol += strlen(needle);
-       eol = strchrnul(bol, '\n');
-       *eol = '\0';
-       return bol;
-}
-
 static char *anonymize_ref_component(void)
 {
        static int counter;
@@ -630,9 +615,11 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
                          struct string_list *paths_of_changed_objects)
 {
        int saved_output_format = rev->diffopt.output_format;
-       const char *commit_buffer;
+       const char *commit_buffer, *commit_buffer_cursor;
        const char *author, *author_end, *committer, *committer_end;
-       const char *encoding, *message;
+       const char *encoding = NULL;
+       size_t encoding_len;
+       const char *message;
        char *reencoded = NULL;
        struct commit_list *p;
        const char *refname;
@@ -641,21 +628,35 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
        rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
 
        parse_commit_or_die(commit);
-       commit_buffer = repo_get_commit_buffer(the_repository, commit, NULL);
-       author = strstr(commit_buffer, "\nauthor ");
+       commit_buffer_cursor = commit_buffer = repo_get_commit_buffer(the_repository, commit, NULL);
+
+       author = strstr(commit_buffer_cursor, "\nauthor ");
        if (!author)
                die("could not find author in commit %s",
                    oid_to_hex(&commit->object.oid));
        author++;
-       author_end = strchrnul(author, '\n');
-       committer = strstr(author_end, "\ncommitter ");
+       commit_buffer_cursor = author_end = strchrnul(author, '\n');
+
+       committer = strstr(commit_buffer_cursor, "\ncommitter ");
        if (!committer)
                die("could not find committer in commit %s",
                    oid_to_hex(&commit->object.oid));
        committer++;
-       committer_end = strchrnul(committer, '\n');
-       message = strstr(committer_end, "\n\n");
-       encoding = find_encoding(committer_end, message);
+       commit_buffer_cursor = committer_end = strchrnul(committer, '\n');
+
+       /*
+        * find_commit_header() gets a `+ 1` because
+        * commit_buffer_cursor points at the trailing "\n" at the end
+        * of the previous line, but find_commit_header() wants a
+        * pointer to the beginning of the next line.
+        */
+       if (*commit_buffer_cursor == '\n') {
+               encoding = find_commit_header(commit_buffer_cursor + 1, "encoding", &encoding_len);
+               if (encoding)
+                       commit_buffer_cursor = encoding + encoding_len;
+       }
+
+       message = strstr(commit_buffer_cursor, "\n\n");
        if (message)
                message += 2;
 
@@ -694,16 +695,20 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
        if (anonymize) {
                reencoded = anonymize_commit_message();
        } else if (encoding) {
+               char *buf;
                switch (reencode_mode) {
                case REENCODE_YES:
-                       reencoded = reencode_string(message, "UTF-8", encoding);
+                       buf = xstrfmt("%.*s", (int)encoding_len, encoding);
+                       reencoded = reencode_string(message, "UTF-8", buf);
+                       free(buf);
                        break;
                case REENCODE_NO:
                        break;
                case REENCODE_ABORT:
-                       die("Encountered commit-specific encoding %s in commit "
+                       die("Encountered commit-specific encoding %.*s in commit "
                            "%s; use --reencode=[yes|no] to handle it",
-                           encoding, oid_to_hex(&commit->object.oid));
+                           (int)encoding_len, encoding,
+                           oid_to_hex(&commit->object.oid));
                }
        }
        if (!commit->parents)
@@ -715,7 +720,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
               (int)(author_end - author), author,
               (int)(committer_end - committer), committer);
        if (!reencoded && encoding)
-               printf("encoding %s\n", encoding);
+               printf("encoding %.*s\n", (int)encoding_len, encoding);
        printf("data %u\n%s",
               (unsigned)(reencoded
                          ? strlen(reencoded) : message