]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-78214: marshal: Stabilize FLAG_REF usage (GH-8226)
authorInada Naoki <songofacandy@gmail.com>
Wed, 4 May 2022 01:01:15 +0000 (10:01 +0900)
committerGitHub <noreply@github.com>
Wed, 4 May 2022 01:01:15 +0000 (10:01 +0900)
Use FLAG_REF always for interned strings.

Refcounts of interned string is very unstable.
When compiling same source, refcounts of interned string in the output may be 1 or >1.
It makes FLAG_REF usage unstable.

To help reproducible build, use FLAG_REF for interned string even if refcnt(obj)==1.

Misc/NEWS.d/next/Core and Builtins/2018-07-14-16-58-00.bpo-34093.WaVD-f.rst [new file with mode: 0644]
Programs/test_frozenmain.h
Python/marshal.c

diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-07-14-16-58-00.bpo-34093.WaVD-f.rst b/Misc/NEWS.d/next/Core and Builtins/2018-07-14-16-58-00.bpo-34093.WaVD-f.rst
new file mode 100644 (file)
index 0000000..383daac
--- /dev/null
@@ -0,0 +1,2 @@
+``marshal.dumps()`` uses ``FLAG_REF`` for all interned strings.  This makes
+output more deterministic and helps reproducible build.
index 3034927d7a12b9f86ccb55b050d8a3a50b2e3289..1c279134e94dc960339fa3aa9640fd246f2b5e75 100644 (file)
@@ -15,19 +15,19 @@ unsigned char M_test_frozenmain[] = {
     0,0,1,0,140,26,100,1,83,0,41,8,233,0,0,0,
     0,78,122,18,70,114,111,122,101,110,32,72,101,108,108,111,
     32,87,111,114,108,100,122,8,115,121,115,46,97,114,103,118,
-    218,6,99,111,110,102,105,103,41,5,90,12,112,114,111,103,
+    218,6,99,111,110,102,105,103,41,5,218,12,112,114,111,103,
     114,97,109,95,110,97,109,101,218,10,101,120,101,99,117,116,
-    97,98,108,101,90,15,117,115,101,95,101,110,118,105,114,111,
-    110,109,101,110,116,90,17,99,111,110,102,105,103,117,114,101,
-    95,99,95,115,116,100,105,111,90,14,98,117,102,102,101,114,
+    97,98,108,101,218,15,117,115,101,95,101,110,118,105,114,111,
+    110,109,101,110,116,218,17,99,111,110,102,105,103,117,114,101,
+    95,99,95,115,116,100,105,111,218,14,98,117,102,102,101,114,
     101,100,95,115,116,100,105,111,122,7,99,111,110,102,105,103,
-    32,122,2,58,32,41,7,218,3,115,121,115,90,17,95,116,
+    32,122,2,58,32,41,7,218,3,115,121,115,218,17,95,116,
     101,115,116,105,110,116,101,114,110,97,108,99,97,112,105,218,
-    5,112,114,105,110,116,218,4,97,114,103,118,90,11,103,101,
+    5,112,114,105,110,116,218,4,97,114,103,118,218,11,103,101,
     116,95,99,111,110,102,105,103,115,114,2,0,0,0,218,3,
     107,101,121,169,0,243,0,0,0,0,250,18,116,101,115,116,
     95,102,114,111,122,101,110,109,97,105,110,46,112,121,250,8,
-    60,109,111,100,117,108,101,62,114,11,0,0,0,1,0,0,
+    60,109,111,100,117,108,101,62,114,17,0,0,0,1,0,0,
     0,115,152,0,0,0,248,240,6,0,1,11,128,10,128,10,
     128,10,216,0,24,208,0,24,208,0,24,208,0,24,224,0,
     5,128,5,208,6,26,209,0,27,212,0,27,208,0,27,216,
@@ -37,6 +37,6 @@ unsigned char M_test_frozenmain[] = {
     7,1,42,240,0,7,1,42,128,67,240,14,0,5,10,128,
     69,208,10,40,144,67,208,10,40,208,10,40,152,54,160,35,
     156,59,208,10,40,208,10,40,209,4,41,212,4,41,208,4,
-    41,208,4,41,240,15,7,1,42,240,0,7,1,42,114,9,
+    41,208,4,41,240,15,7,1,42,240,0,7,1,42,114,15,
     0,0,0,
 };
index bbe67e3379fd93ef8dfb2a5d98c0b44a7d01488f..90a440509180060eb2214ebf9999b9ba6bf0e996 100644 (file)
@@ -298,9 +298,14 @@ w_ref(PyObject *v, char *flag, WFILE *p)
     if (p->version < 3 || p->hashtable == NULL)
         return 0; /* not writing object references */
 
-    /* if it has only one reference, it definitely isn't shared */
-    if (Py_REFCNT(v) == 1)
+    /* If it has only one reference, it definitely isn't shared.
+     * But we use TYPE_REF always for interned string, to PYC file stable
+     * as possible.
+     */
+    if (Py_REFCNT(v) == 1 &&
+            !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
         return 0;
+    }
 
     entry = _Py_hashtable_get_entry(p->hashtable, v);
     if (entry != NULL) {