]>
Commit | Line | Data |
---|---|---|
09c434b8 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
64c70b1c | 2 | /* |
8b975bd3 | 3 | * LZO1X Compressor from LZO |
64c70b1c | 4 | * |
8b975bd3 | 5 | * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> |
64c70b1c RP |
6 | * |
7 | * The full LZO package can be found at: | |
8 | * http://www.oberhumer.com/opensource/lzo/ | |
9 | * | |
8b975bd3 | 10 | * Changed for Linux kernel use by: |
64c70b1c RP |
11 | * Nitin Gupta <nitingupta910@gmail.com> |
12 | * Richard Purdie <rpurdie@openedhand.com> | |
13 | */ | |
14 | ||
15 | #include <linux/module.h> | |
16 | #include <linux/kernel.h> | |
64c70b1c | 17 | #include <asm/unaligned.h> |
8b975bd3 | 18 | #include <linux/lzo.h> |
64c70b1c RP |
19 | #include "lzodefs.h" |
20 | ||
21 | static noinline size_t | |
8b975bd3 MO |
22 | lzo1x_1_do_compress(const unsigned char *in, size_t in_len, |
23 | unsigned char *out, size_t *out_len, | |
45ec975e DR |
24 | size_t ti, void *wrkmem, signed char *state_offset, |
25 | const unsigned char bitstream_version) | |
64c70b1c | 26 | { |
8b975bd3 MO |
27 | const unsigned char *ip; |
28 | unsigned char *op; | |
64c70b1c | 29 | const unsigned char * const in_end = in + in_len; |
8b975bd3 MO |
30 | const unsigned char * const ip_end = in + in_len - 20; |
31 | const unsigned char *ii; | |
32 | lzo_dict_t * const dict = (lzo_dict_t *) wrkmem; | |
64c70b1c | 33 | |
8b975bd3 MO |
34 | op = out; |
35 | ip = in; | |
36 | ii = ip; | |
37 | ip += ti < 4 ? 4 - ti : 0; | |
64c70b1c RP |
38 | |
39 | for (;;) { | |
5ee4014a | 40 | const unsigned char *m_pos = NULL; |
8b975bd3 MO |
41 | size_t t, m_len, m_off; |
42 | u32 dv; | |
5ee4014a | 43 | u32 run_length = 0; |
64c70b1c | 44 | literal: |
8b975bd3 MO |
45 | ip += 1 + ((ip - ii) >> 5); |
46 | next: | |
64c70b1c RP |
47 | if (unlikely(ip >= ip_end)) |
48 | break; | |
8b975bd3 | 49 | dv = get_unaligned_le32(ip); |
5ee4014a | 50 | |
45ec975e | 51 | if (dv == 0 && bitstream_version) { |
5ee4014a DR |
52 | const unsigned char *ir = ip + 4; |
53 | const unsigned char *limit = ip_end | |
54 | < (ip + MAX_ZERO_RUN_LENGTH + 1) | |
55 | ? ip_end : ip + MAX_ZERO_RUN_LENGTH + 1; | |
56 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ | |
57 | defined(LZO_FAST_64BIT_MEMORY_ACCESS) | |
58 | u64 dv64; | |
59 | ||
60 | for (; (ir + 32) <= limit; ir += 32) { | |
61 | dv64 = get_unaligned((u64 *)ir); | |
62 | dv64 |= get_unaligned((u64 *)ir + 1); | |
63 | dv64 |= get_unaligned((u64 *)ir + 2); | |
64 | dv64 |= get_unaligned((u64 *)ir + 3); | |
65 | if (dv64) | |
66 | break; | |
67 | } | |
68 | for (; (ir + 8) <= limit; ir += 8) { | |
69 | dv64 = get_unaligned((u64 *)ir); | |
70 | if (dv64) { | |
71 | # if defined(__LITTLE_ENDIAN) | |
72 | ir += __builtin_ctzll(dv64) >> 3; | |
73 | # elif defined(__BIG_ENDIAN) | |
74 | ir += __builtin_clzll(dv64) >> 3; | |
75 | # else | |
76 | # error "missing endian definition" | |
77 | # endif | |
78 | break; | |
79 | } | |
80 | } | |
81 | #else | |
82 | while ((ir < (const unsigned char *) | |
83 | ALIGN((uintptr_t)ir, 4)) && | |
84 | (ir < limit) && (*ir == 0)) | |
85 | ir++; | |
09b35b41 DR |
86 | if (IS_ALIGNED((uintptr_t)ir, 4)) { |
87 | for (; (ir + 4) <= limit; ir += 4) { | |
88 | dv = *((u32 *)ir); | |
89 | if (dv) { | |
5ee4014a | 90 | # if defined(__LITTLE_ENDIAN) |
09b35b41 | 91 | ir += __builtin_ctz(dv) >> 3; |
5ee4014a | 92 | # elif defined(__BIG_ENDIAN) |
09b35b41 | 93 | ir += __builtin_clz(dv) >> 3; |
5ee4014a DR |
94 | # else |
95 | # error "missing endian definition" | |
96 | # endif | |
09b35b41 DR |
97 | break; |
98 | } | |
5ee4014a DR |
99 | } |
100 | } | |
101 | #endif | |
102 | while (likely(ir < limit) && unlikely(*ir == 0)) | |
103 | ir++; | |
104 | run_length = ir - ip; | |
105 | if (run_length > MAX_ZERO_RUN_LENGTH) | |
106 | run_length = MAX_ZERO_RUN_LENGTH; | |
107 | } else { | |
108 | t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; | |
109 | m_pos = in + dict[t]; | |
110 | dict[t] = (lzo_dict_t) (ip - in); | |
111 | if (unlikely(dv != get_unaligned_le32(m_pos))) | |
112 | goto literal; | |
113 | } | |
64c70b1c | 114 | |
8b975bd3 MO |
115 | ii -= ti; |
116 | ti = 0; | |
117 | t = ip - ii; | |
118 | if (t != 0) { | |
64c70b1c | 119 | if (t <= 3) { |
5ee4014a | 120 | op[*state_offset] |= t; |
8b975bd3 MO |
121 | COPY4(op, ii); |
122 | op += t; | |
123 | } else if (t <= 16) { | |
64c70b1c | 124 | *op++ = (t - 3); |
8b975bd3 MO |
125 | COPY8(op, ii); |
126 | COPY8(op + 8, ii + 8); | |
127 | op += t; | |
64c70b1c | 128 | } else { |
8b975bd3 MO |
129 | if (t <= 18) { |
130 | *op++ = (t - 3); | |
131 | } else { | |
132 | size_t tt = t - 18; | |
64c70b1c | 133 | *op++ = 0; |
8b975bd3 MO |
134 | while (unlikely(tt > 255)) { |
135 | tt -= 255; | |
136 | *op++ = 0; | |
137 | } | |
138 | *op++ = tt; | |
64c70b1c | 139 | } |
8b975bd3 MO |
140 | do { |
141 | COPY8(op, ii); | |
142 | COPY8(op + 8, ii + 8); | |
143 | op += 16; | |
144 | ii += 16; | |
145 | t -= 16; | |
146 | } while (t >= 16); | |
147 | if (t > 0) do { | |
148 | *op++ = *ii++; | |
149 | } while (--t > 0); | |
64c70b1c | 150 | } |
64c70b1c RP |
151 | } |
152 | ||
5ee4014a DR |
153 | if (unlikely(run_length)) { |
154 | ip += run_length; | |
155 | run_length -= MIN_ZERO_RUN_LENGTH; | |
156 | put_unaligned_le32((run_length << 21) | 0xfffc18 | |
157 | | (run_length & 0x7), op); | |
158 | op += 4; | |
159 | run_length = 0; | |
160 | *state_offset = -3; | |
161 | goto finished_writing_instruction; | |
162 | } | |
163 | ||
8b975bd3 MO |
164 | m_len = 4; |
165 | { | |
166 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64) | |
167 | u64 v; | |
168 | v = get_unaligned((const u64 *) (ip + m_len)) ^ | |
169 | get_unaligned((const u64 *) (m_pos + m_len)); | |
170 | if (unlikely(v == 0)) { | |
171 | do { | |
172 | m_len += 8; | |
173 | v = get_unaligned((const u64 *) (ip + m_len)) ^ | |
174 | get_unaligned((const u64 *) (m_pos + m_len)); | |
175 | if (unlikely(ip + m_len >= ip_end)) | |
176 | goto m_len_done; | |
177 | } while (v == 0); | |
178 | } | |
179 | # if defined(__LITTLE_ENDIAN) | |
180 | m_len += (unsigned) __builtin_ctzll(v) / 8; | |
181 | # elif defined(__BIG_ENDIAN) | |
182 | m_len += (unsigned) __builtin_clzll(v) / 8; | |
183 | # else | |
184 | # error "missing endian definition" | |
185 | # endif | |
186 | #elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32) | |
187 | u32 v; | |
188 | v = get_unaligned((const u32 *) (ip + m_len)) ^ | |
189 | get_unaligned((const u32 *) (m_pos + m_len)); | |
190 | if (unlikely(v == 0)) { | |
191 | do { | |
192 | m_len += 4; | |
193 | v = get_unaligned((const u32 *) (ip + m_len)) ^ | |
194 | get_unaligned((const u32 *) (m_pos + m_len)); | |
195 | if (v != 0) | |
196 | break; | |
197 | m_len += 4; | |
198 | v = get_unaligned((const u32 *) (ip + m_len)) ^ | |
199 | get_unaligned((const u32 *) (m_pos + m_len)); | |
200 | if (unlikely(ip + m_len >= ip_end)) | |
201 | goto m_len_done; | |
202 | } while (v == 0); | |
203 | } | |
204 | # if defined(__LITTLE_ENDIAN) | |
205 | m_len += (unsigned) __builtin_ctz(v) / 8; | |
206 | # elif defined(__BIG_ENDIAN) | |
207 | m_len += (unsigned) __builtin_clz(v) / 8; | |
208 | # else | |
209 | # error "missing endian definition" | |
210 | # endif | |
211 | #else | |
212 | if (unlikely(ip[m_len] == m_pos[m_len])) { | |
213 | do { | |
214 | m_len += 1; | |
215 | if (ip[m_len] != m_pos[m_len]) | |
216 | break; | |
217 | m_len += 1; | |
218 | if (ip[m_len] != m_pos[m_len]) | |
219 | break; | |
220 | m_len += 1; | |
221 | if (ip[m_len] != m_pos[m_len]) | |
222 | break; | |
223 | m_len += 1; | |
224 | if (ip[m_len] != m_pos[m_len]) | |
225 | break; | |
226 | m_len += 1; | |
227 | if (ip[m_len] != m_pos[m_len]) | |
228 | break; | |
229 | m_len += 1; | |
230 | if (ip[m_len] != m_pos[m_len]) | |
231 | break; | |
232 | m_len += 1; | |
233 | if (ip[m_len] != m_pos[m_len]) | |
234 | break; | |
235 | m_len += 1; | |
236 | if (unlikely(ip + m_len >= ip_end)) | |
237 | goto m_len_done; | |
238 | } while (ip[m_len] == m_pos[m_len]); | |
239 | } | |
240 | #endif | |
241 | } | |
242 | m_len_done: | |
64c70b1c | 243 | |
8b975bd3 MO |
244 | m_off = ip - m_pos; |
245 | ip += m_len; | |
8b975bd3 MO |
246 | if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { |
247 | m_off -= 1; | |
248 | *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); | |
249 | *op++ = (m_off >> 3); | |
250 | } else if (m_off <= M3_MAX_OFFSET) { | |
251 | m_off -= 1; | |
252 | if (m_len <= M3_MAX_LEN) | |
64c70b1c | 253 | *op++ = (M3_MARKER | (m_len - 2)); |
8b975bd3 MO |
254 | else { |
255 | m_len -= M3_MAX_LEN; | |
256 | *op++ = M3_MARKER | 0; | |
257 | while (unlikely(m_len > 255)) { | |
258 | m_len -= 255; | |
259 | *op++ = 0; | |
260 | } | |
261 | *op++ = (m_len); | |
64c70b1c | 262 | } |
8b975bd3 MO |
263 | *op++ = (m_off << 2); |
264 | *op++ = (m_off >> 6); | |
64c70b1c | 265 | } else { |
8b975bd3 MO |
266 | m_off -= 0x4000; |
267 | if (m_len <= M4_MAX_LEN) | |
268 | *op++ = (M4_MARKER | ((m_off >> 11) & 8) | |
64c70b1c | 269 | | (m_len - 2)); |
8b975bd3 MO |
270 | else { |
271 | m_len -= M4_MAX_LEN; | |
272 | *op++ = (M4_MARKER | ((m_off >> 11) & 8)); | |
273 | while (unlikely(m_len > 255)) { | |
274 | m_len -= 255; | |
275 | *op++ = 0; | |
64c70b1c | 276 | } |
8b975bd3 | 277 | *op++ = (m_len); |
64c70b1c | 278 | } |
8b975bd3 | 279 | *op++ = (m_off << 2); |
64c70b1c RP |
280 | *op++ = (m_off >> 6); |
281 | } | |
5ee4014a DR |
282 | *state_offset = -2; |
283 | finished_writing_instruction: | |
284 | ii = ip; | |
8b975bd3 | 285 | goto next; |
64c70b1c | 286 | } |
64c70b1c | 287 | *out_len = op - out; |
8b975bd3 | 288 | return in_end - (ii - ti); |
64c70b1c RP |
289 | } |
290 | ||
45ec975e | 291 | int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len, |
8b975bd3 | 292 | unsigned char *out, size_t *out_len, |
45ec975e | 293 | void *wrkmem, const unsigned char bitstream_version) |
64c70b1c | 294 | { |
8b975bd3 | 295 | const unsigned char *ip = in; |
64c70b1c | 296 | unsigned char *op = out; |
b11ed18e | 297 | unsigned char *data_start; |
8b975bd3 MO |
298 | size_t l = in_len; |
299 | size_t t = 0; | |
5ee4014a | 300 | signed char state_offset = -2; |
45ec975e | 301 | unsigned int m4_max_offset; |
5ee4014a | 302 | |
b11ed18e DR |
303 | // LZO v0 will never write 17 as first byte (except for zero-length |
304 | // input), so this is used to version the bitstream | |
45ec975e DR |
305 | if (bitstream_version > 0) { |
306 | *op++ = 17; | |
307 | *op++ = bitstream_version; | |
308 | m4_max_offset = M4_MAX_OFFSET_V1; | |
309 | } else { | |
310 | m4_max_offset = M4_MAX_OFFSET_V0; | |
311 | } | |
64c70b1c | 312 | |
b11ed18e DR |
313 | data_start = op; |
314 | ||
8b975bd3 | 315 | while (l > 20) { |
45ec975e | 316 | size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1); |
8b975bd3 MO |
317 | uintptr_t ll_end = (uintptr_t) ip + ll; |
318 | if ((ll_end + ((t + ll) >> 5)) <= ll_end) | |
319 | break; | |
320 | BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); | |
321 | memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); | |
45ec975e DR |
322 | t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem, |
323 | &state_offset, bitstream_version); | |
8b975bd3 | 324 | ip += ll; |
64c70b1c | 325 | op += *out_len; |
8b975bd3 | 326 | l -= ll; |
64c70b1c | 327 | } |
8b975bd3 | 328 | t += l; |
64c70b1c RP |
329 | |
330 | if (t > 0) { | |
8b975bd3 | 331 | const unsigned char *ii = in + in_len - t; |
64c70b1c | 332 | |
b11ed18e | 333 | if (op == data_start && t <= 238) { |
64c70b1c RP |
334 | *op++ = (17 + t); |
335 | } else if (t <= 3) { | |
5ee4014a | 336 | op[state_offset] |= t; |
64c70b1c RP |
337 | } else if (t <= 18) { |
338 | *op++ = (t - 3); | |
339 | } else { | |
340 | size_t tt = t - 18; | |
64c70b1c RP |
341 | *op++ = 0; |
342 | while (tt > 255) { | |
343 | tt -= 255; | |
344 | *op++ = 0; | |
345 | } | |
64c70b1c RP |
346 | *op++ = tt; |
347 | } | |
8b975bd3 MO |
348 | if (t >= 16) do { |
349 | COPY8(op, ii); | |
350 | COPY8(op + 8, ii + 8); | |
351 | op += 16; | |
352 | ii += 16; | |
353 | t -= 16; | |
354 | } while (t >= 16); | |
355 | if (t > 0) do { | |
64c70b1c RP |
356 | *op++ = *ii++; |
357 | } while (--t > 0); | |
358 | } | |
359 | ||
360 | *op++ = M4_MARKER | 1; | |
361 | *op++ = 0; | |
362 | *op++ = 0; | |
363 | ||
364 | *out_len = op - out; | |
365 | return LZO_E_OK; | |
366 | } | |
45ec975e DR |
367 | |
368 | int lzo1x_1_compress(const unsigned char *in, size_t in_len, | |
369 | unsigned char *out, size_t *out_len, | |
370 | void *wrkmem) | |
371 | { | |
372 | return lzogeneric1x_1_compress(in, in_len, out, out_len, wrkmem, 0); | |
373 | } | |
374 | ||
375 | int lzorle1x_1_compress(const unsigned char *in, size_t in_len, | |
376 | unsigned char *out, size_t *out_len, | |
377 | void *wrkmem) | |
378 | { | |
379 | return lzogeneric1x_1_compress(in, in_len, out, out_len, | |
380 | wrkmem, LZO_VERSION); | |
381 | } | |
382 | ||
64c70b1c | 383 | EXPORT_SYMBOL_GPL(lzo1x_1_compress); |
45ec975e | 384 | EXPORT_SYMBOL_GPL(lzorle1x_1_compress); |
64c70b1c RP |
385 | |
386 | MODULE_LICENSE("GPL"); | |
387 | MODULE_DESCRIPTION("LZO1X-1 Compressor"); |