]>
Commit | Line | Data |
---|---|---|
107642fe LS |
1 | #!/bin/sh |
2 | ||
3 | test_description='working-tree-encoding conversion via gitattributes' | |
4 | ||
06d53148 | 5 | GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main |
334afbc7 JS |
6 | export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME |
7 | ||
27472b51 | 8 | TEST_PASSES_SANITIZE_LEAK=true |
8da0b02d | 9 | TEST_CREATE_REPO_NO_TEMPLATE=1 |
107642fe | 10 | . ./test-lib.sh |
2fa3cbad | 11 | . "$TEST_DIRECTORY/lib-encoding.sh" |
107642fe | 12 | |
541d059c LS |
13 | GIT_TRACE_WORKING_TREE_ENCODING=1 && export GIT_TRACE_WORKING_TREE_ENCODING |
14 | ||
107642fe LS |
15 | test_expect_success 'setup test files' ' |
16 | git config core.eol lf && | |
17 | ||
18 | text="hallo there!\ncan you read me?" && | |
19 | echo "*.utf16 text working-tree-encoding=utf-16" >.gitattributes && | |
aab2a1ae | 20 | echo "*.utf16lebom text working-tree-encoding=UTF-16LE-BOM" >>.gitattributes && |
107642fe | 21 | printf "$text" >test.utf8.raw && |
79444c92 | 22 | printf "$text" | write_utf16 >test.utf16.raw && |
23 | printf "$text" | write_utf32 >test.utf32.raw && | |
aab2a1ae | 24 | printf "\377\376" >test.utf16lebom.raw && |
0b63fd69 | 25 | printf "$text" | iconv -f UTF-8 -t UTF-16LE >>test.utf16lebom.raw && |
107642fe LS |
26 | |
27 | # Line ending tests | |
28 | printf "one\ntwo\nthree\n" >lf.utf8.raw && | |
29 | printf "one\r\ntwo\r\nthree\r\n" >crlf.utf8.raw && | |
30 | ||
31 | # BOM tests | |
32 | printf "\0a\0b\0c" >nobom.utf16be.raw && | |
33 | printf "a\0b\0c\0" >nobom.utf16le.raw && | |
99e9ab54 KD |
34 | printf "\376\377\0a\0b\0c" >bebom.utf16be.raw && |
35 | printf "\377\376a\0b\0c\0" >lebom.utf16le.raw && | |
107642fe LS |
36 | printf "\0\0\0a\0\0\0b\0\0\0c" >nobom.utf32be.raw && |
37 | printf "a\0\0\0b\0\0\0c\0\0\0" >nobom.utf32le.raw && | |
99e9ab54 KD |
38 | printf "\0\0\376\377\0\0\0a\0\0\0b\0\0\0c" >bebom.utf32be.raw && |
39 | printf "\377\376\0\0a\0\0\0b\0\0\0c\0\0\0" >lebom.utf32le.raw && | |
107642fe LS |
40 | |
41 | # Add only UTF-16 file, we will add the UTF-32 file later | |
42 | cp test.utf16.raw test.utf16 && | |
43 | cp test.utf32.raw test.utf32 && | |
aab2a1ae TB |
44 | cp test.utf16lebom.raw test.utf16lebom && |
45 | git add .gitattributes test.utf16 test.utf16lebom && | |
107642fe LS |
46 | git commit -m initial |
47 | ' | |
48 | ||
49 | test_expect_success 'ensure UTF-8 is stored in Git' ' | |
50 | test_when_finished "rm -f test.utf16.git" && | |
51 | ||
52 | git cat-file -p :test.utf16 >test.utf16.git && | |
53 | test_cmp_bin test.utf8.raw test.utf16.git | |
54 | ' | |
55 | ||
56 | test_expect_success 're-encode to UTF-16 on checkout' ' | |
57 | test_when_finished "rm -f test.utf16.raw" && | |
58 | ||
59 | rm test.utf16 && | |
60 | git checkout test.utf16 && | |
61 | test_cmp_bin test.utf16.raw test.utf16 | |
62 | ' | |
63 | ||
aab2a1ae TB |
64 | test_expect_success 're-encode to UTF-16-LE-BOM on checkout' ' |
65 | rm test.utf16lebom && | |
66 | git checkout test.utf16lebom && | |
67 | test_cmp_bin test.utf16lebom.raw test.utf16lebom | |
68 | ' | |
69 | ||
107642fe LS |
70 | test_expect_success 'check $GIT_DIR/info/attributes support' ' |
71 | test_when_finished "rm -f test.utf32.git" && | |
72 | test_when_finished "git reset --hard HEAD" && | |
73 | ||
8da0b02d | 74 | mkdir .git/info && |
107642fe LS |
75 | echo "*.utf32 text working-tree-encoding=utf-32" >.git/info/attributes && |
76 | git add test.utf32 && | |
77 | ||
78 | git cat-file -p :test.utf32 >test.utf32.git && | |
79 | test_cmp_bin test.utf8.raw test.utf32.git | |
80 | ' | |
81 | ||
82 | for i in 16 32 | |
83 | do | |
7a17918c LS |
84 | test_expect_success "check prohibited UTF-${i} BOM" ' |
85 | test_when_finished "git reset --hard HEAD" && | |
86 | ||
87 | echo "*.utf${i}be text working-tree-encoding=utf-${i}be" >>.gitattributes && | |
88 | echo "*.utf${i}le text working-tree-encoding=utf-${i}LE" >>.gitattributes && | |
89 | ||
90 | # Here we add a UTF-16 (resp. UTF-32) files with BOM (big/little-endian) | |
91 | # but we tell Git to treat it as UTF-16BE/UTF-16LE (resp. UTF-32). | |
92 | # In these cases the BOM is prohibited. | |
93 | cp bebom.utf${i}be.raw bebom.utf${i}be && | |
94 | test_must_fail git add bebom.utf${i}be 2>err.out && | |
6789275d JH |
95 | test_grep "fatal: BOM is prohibited .* utf-${i}be" err.out && |
96 | test_grep "use UTF-${i} as working-tree-encoding" err.out && | |
7a17918c LS |
97 | |
98 | cp lebom.utf${i}le.raw lebom.utf${i}be && | |
99 | test_must_fail git add lebom.utf${i}be 2>err.out && | |
6789275d JH |
100 | test_grep "fatal: BOM is prohibited .* utf-${i}be" err.out && |
101 | test_grep "use UTF-${i} as working-tree-encoding" err.out && | |
7a17918c LS |
102 | |
103 | cp bebom.utf${i}be.raw bebom.utf${i}le && | |
104 | test_must_fail git add bebom.utf${i}le 2>err.out && | |
6789275d JH |
105 | test_grep "fatal: BOM is prohibited .* utf-${i}LE" err.out && |
106 | test_grep "use UTF-${i} as working-tree-encoding" err.out && | |
7a17918c LS |
107 | |
108 | cp lebom.utf${i}le.raw lebom.utf${i}le && | |
109 | test_must_fail git add lebom.utf${i}le 2>err.out && | |
6789275d JH |
110 | test_grep "fatal: BOM is prohibited .* utf-${i}LE" err.out && |
111 | test_grep "use UTF-${i} as working-tree-encoding" err.out | |
7a17918c LS |
112 | ' |
113 | ||
114 | test_expect_success "check required UTF-${i} BOM" ' | |
115 | test_when_finished "git reset --hard HEAD" && | |
116 | ||
117 | echo "*.utf${i} text working-tree-encoding=utf-${i}" >>.gitattributes && | |
118 | ||
119 | cp nobom.utf${i}be.raw nobom.utf${i} && | |
120 | test_must_fail git add nobom.utf${i} 2>err.out && | |
6789275d JH |
121 | test_grep "fatal: BOM is required .* utf-${i}" err.out && |
122 | test_grep "use UTF-${i}BE or UTF-${i}LE" err.out && | |
7a17918c LS |
123 | |
124 | cp nobom.utf${i}le.raw nobom.utf${i} && | |
125 | test_must_fail git add nobom.utf${i} 2>err.out && | |
6789275d JH |
126 | test_grep "fatal: BOM is required .* utf-${i}" err.out && |
127 | test_grep "use UTF-${i}BE or UTF-${i}LE" err.out | |
7a17918c LS |
128 | ' |
129 | ||
107642fe LS |
130 | test_expect_success "eol conversion for UTF-${i} encoded files on checkout" ' |
131 | test_when_finished "rm -f crlf.utf${i}.raw lf.utf${i}.raw" && | |
132 | test_when_finished "git reset --hard HEAD^" && | |
133 | ||
74615c2a BB |
134 | write_utf${i} <lf.utf8.raw >lf.utf${i}.raw && |
135 | write_utf${i} <crlf.utf8.raw >crlf.utf${i}.raw && | |
107642fe LS |
136 | cp crlf.utf${i}.raw eol.utf${i} && |
137 | ||
138 | cat >expectIndexLF <<-EOF && | |
139 | i/lf w/-text attr/text eol.utf${i} | |
140 | EOF | |
141 | ||
142 | git add eol.utf${i} && | |
143 | git commit -m eol && | |
144 | ||
145 | # UTF-${i} with CRLF (Windows line endings) | |
146 | rm eol.utf${i} && | |
147 | git -c core.eol=crlf checkout eol.utf${i} && | |
148 | test_cmp_bin crlf.utf${i}.raw eol.utf${i} && | |
149 | ||
150 | # Although the file has CRLF in the working tree, | |
151 | # ensure LF in the index | |
152 | git ls-files --eol eol.utf${i} >actual && | |
153 | test_cmp expectIndexLF actual && | |
154 | ||
155 | # UTF-${i} with LF (Unix line endings) | |
156 | rm eol.utf${i} && | |
157 | git -c core.eol=lf checkout eol.utf${i} && | |
158 | test_cmp_bin lf.utf${i}.raw eol.utf${i} && | |
159 | ||
160 | # The file LF in the working tree, ensure LF in the index | |
161 | git ls-files --eol eol.utf${i} >actual && | |
162 | test_cmp expectIndexLF actual | |
163 | ' | |
164 | done | |
165 | ||
166 | test_expect_success 'check unsupported encodings' ' | |
167 | test_when_finished "git reset --hard HEAD" && | |
168 | ||
169 | echo "*.set text working-tree-encoding" >.gitattributes && | |
170 | printf "set" >t.set && | |
171 | test_must_fail git add t.set 2>err.out && | |
6789275d | 172 | test_grep "true/false are no valid working-tree-encodings" err.out && |
107642fe LS |
173 | |
174 | echo "*.unset text -working-tree-encoding" >.gitattributes && | |
175 | printf "unset" >t.unset && | |
176 | git add t.unset && | |
177 | ||
178 | echo "*.empty text working-tree-encoding=" >.gitattributes && | |
179 | printf "empty" >t.empty && | |
180 | git add t.empty && | |
181 | ||
182 | echo "*.garbage text working-tree-encoding=garbage" >.gitattributes && | |
183 | printf "garbage" >t.garbage && | |
184 | test_must_fail git add t.garbage 2>err.out && | |
6789275d | 185 | test_grep "failed to encode" err.out |
107642fe LS |
186 | ' |
187 | ||
188 | test_expect_success 'error if encoding round trip is not the same during refresh' ' | |
189 | BEFORE_STATE=$(git rev-parse HEAD) && | |
190 | test_when_finished "git reset --hard $BEFORE_STATE" && | |
191 | ||
192 | # Add and commit a UTF-16 file but skip the "working-tree-encoding" | |
193 | # filter. Consequently, the in-repo representation is UTF-16 and not | |
194 | # UTF-8. This simulates a Git version that has no working tree encoding | |
195 | # support. | |
196 | echo "*.utf16le text working-tree-encoding=utf-16le" >.gitattributes && | |
197 | echo "hallo" >nonsense.utf16le && | |
198 | TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16le) && | |
199 | git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16le && | |
200 | COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) && | |
06d53148 | 201 | git update-ref refs/heads/main $COMMIT && |
107642fe LS |
202 | |
203 | test_must_fail git checkout HEAD^ 2>err.out && | |
6789275d | 204 | test_grep "error: .* overwritten by checkout:" err.out |
107642fe LS |
205 | ' |
206 | ||
7a17918c LS |
207 | test_expect_success 'error if encoding garbage is already in Git' ' |
208 | BEFORE_STATE=$(git rev-parse HEAD) && | |
209 | test_when_finished "git reset --hard $BEFORE_STATE" && | |
210 | ||
211 | # Skip the UTF-16 filter for the added file | |
212 | # This simulates a Git version that has no checkoutEncoding support | |
213 | cp nobom.utf16be.raw nonsense.utf16 && | |
214 | TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16) && | |
215 | git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16 && | |
216 | COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) && | |
06d53148 | 217 | git update-ref refs/heads/main $COMMIT && |
7a17918c LS |
218 | |
219 | git diff 2>err.out && | |
6789275d | 220 | test_grep "error: BOM is required" err.out |
7a17918c LS |
221 | ' |
222 | ||
f6af6f99 ÆAB |
223 | test_lazy_prereq ICONV_SHIFT_JIS ' |
224 | iconv -f UTF-8 -t SHIFT-JIS </dev/null | |
225 | ' | |
226 | ||
227 | test_expect_success ICONV_SHIFT_JIS 'check roundtrip encoding' ' | |
e92d6225 LS |
228 | test_when_finished "rm -f roundtrip.shift roundtrip.utf16" && |
229 | test_when_finished "git reset --hard HEAD" && | |
230 | ||
231 | text="hallo there!\nroundtrip test here!" && | |
232 | printf "$text" | iconv -f UTF-8 -t SHIFT-JIS >roundtrip.shift && | |
79444c92 | 233 | printf "$text" | write_utf16 >roundtrip.utf16 && |
e92d6225 LS |
234 | echo "*.shift text working-tree-encoding=SHIFT-JIS" >>.gitattributes && |
235 | ||
236 | # SHIFT-JIS encoded files are round-trip checked by default... | |
237 | GIT_TRACE=1 git add .gitattributes roundtrip.shift 2>&1 | | |
238 | grep "Checking roundtrip encoding for SHIFT-JIS" && | |
239 | git reset && | |
240 | ||
241 | # ... unless we overwrite the Git config! | |
242 | ! GIT_TRACE=1 git -c core.checkRoundtripEncoding=garbage \ | |
243 | add .gitattributes roundtrip.shift 2>&1 | | |
244 | grep "Checking roundtrip encoding for SHIFT-JIS" && | |
245 | git reset && | |
246 | ||
247 | # UTF-16 encoded files should not be round-trip checked by default... | |
248 | ! GIT_TRACE=1 git add roundtrip.utf16 2>&1 | | |
249 | grep "Checking roundtrip encoding for UTF-16" && | |
250 | git reset && | |
251 | ||
252 | # ... unless we tell Git to check it! | |
253 | GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-16, UTF-32" \ | |
254 | add roundtrip.utf16 2>&1 | | |
255 | grep "Checking roundtrip encoding for utf-16" && | |
256 | git reset && | |
257 | ||
258 | # ... unless we tell Git to check it! | |
259 | # (here we also check that the casing of the encoding is irrelevant) | |
260 | GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-32, utf-16" \ | |
261 | add roundtrip.utf16 2>&1 | | |
262 | grep "Checking roundtrip encoding for utf-16" && | |
263 | git reset | |
264 | ' | |
265 | ||
d928a838 AM |
266 | # $1: checkout encoding |
267 | # $2: test string | |
268 | # $3: binary test string in checkout encoding | |
269 | test_commit_utf8_checkout_other () { | |
270 | encoding="$1" | |
271 | orig_string="$2" | |
272 | expect_bytes="$3" | |
273 | ||
274 | test_expect_success "Commit UTF-8, checkout $encoding" ' | |
275 | test_when_finished "git checkout HEAD -- .gitattributes" && | |
276 | ||
277 | test_ext="commit_utf8_checkout_$encoding" && | |
278 | test_file="test.$test_ext" && | |
279 | ||
280 | # Commit as UTF-8 | |
281 | echo "*.$test_ext text working-tree-encoding=UTF-8" >.gitattributes && | |
282 | printf "$orig_string" >$test_file && | |
283 | git add $test_file && | |
284 | git commit -m "Test data" && | |
285 | ||
286 | # Checkout in tested encoding | |
287 | rm $test_file && | |
288 | echo "*.$test_ext text working-tree-encoding=$encoding" >.gitattributes && | |
289 | git checkout HEAD -- $test_file && | |
290 | ||
291 | # Test | |
292 | printf $expect_bytes >$test_file.raw && | |
293 | test_cmp_bin $test_file.raw $test_file | |
294 | ' | |
295 | } | |
296 | ||
297 | test_commit_utf8_checkout_other "UTF-8" "Test Тест" "\124\145\163\164\040\320\242\320\265\321\201\321\202" | |
298 | test_commit_utf8_checkout_other "UTF-16LE" "Test Тест" "\124\000\145\000\163\000\164\000\040\000\042\004\065\004\101\004\102\004" | |
299 | test_commit_utf8_checkout_other "UTF-16BE" "Test Тест" "\000\124\000\145\000\163\000\164\000\040\004\042\004\065\004\101\004\102" | |
300 | test_commit_utf8_checkout_other "UTF-16LE-BOM" "Test Тест" "\377\376\124\000\145\000\163\000\164\000\040\000\042\004\065\004\101\004\102\004" | |
301 | test_commit_utf8_checkout_other "UTF-16BE-BOM" "Test Тест" "\376\377\000\124\000\145\000\163\000\164\000\040\004\042\004\065\004\101\004\102" | |
302 | test_commit_utf8_checkout_other "UTF-32LE" "Test Тест" "\124\000\000\000\145\000\000\000\163\000\000\000\164\000\000\000\040\000\000\000\042\004\000\000\065\004\000\000\101\004\000\000\102\004\000\000" | |
303 | test_commit_utf8_checkout_other "UTF-32BE" "Test Тест" "\000\000\000\124\000\000\000\145\000\000\000\163\000\000\000\164\000\000\000\040\000\000\004\042\000\000\004\065\000\000\004\101\000\000\004\102" | |
304 | ||
107642fe | 305 | test_done |