]>
Commit | Line | Data |
---|---|---|
5133f00e ILT |
1 | // Copyright 2011 The Go Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style | |
3 | // license that can be found in the LICENSE file. | |
4 | ||
5 | package lzw | |
6 | ||
7 | import ( | |
8 | "bufio" | |
2fd401c8 | 9 | "errors" |
5133f00e ILT |
10 | "fmt" |
11 | "io" | |
5133f00e ILT |
12 | ) |
13 | ||
14 | // A writer is a buffered, flushable writer. | |
15 | type writer interface { | |
2fd401c8 ILT |
16 | WriteByte(byte) error |
17 | Flush() error | |
5133f00e ILT |
18 | } |
19 | ||
20 | // An errWriteCloser is an io.WriteCloser that always returns a given error. | |
21 | type errWriteCloser struct { | |
2fd401c8 | 22 | err error |
5133f00e ILT |
23 | } |
24 | ||
2fd401c8 | 25 | func (e *errWriteCloser) Write([]byte) (int, error) { |
5133f00e ILT |
26 | return 0, e.err |
27 | } | |
28 | ||
2fd401c8 | 29 | func (e *errWriteCloser) Close() error { |
5133f00e ILT |
30 | return e.err |
31 | } | |
32 | ||
33 | const ( | |
34 | // A code is a 12 bit value, stored as a uint32 when encoding to avoid | |
35 | // type conversions when shifting bits. | |
36 | maxCode = 1<<12 - 1 | |
37 | invalidCode = 1<<32 - 1 | |
38 | // There are 1<<12 possible codes, which is an upper bound on the number of | |
39 | // valid hash table entries at any given point in time. tableSize is 4x that. | |
40 | tableSize = 4 * 1 << 12 | |
41 | tableMask = tableSize - 1 | |
42 | // A hash table entry is a uint32. Zero is an invalid entry since the | |
43 | // lower 12 bits of a valid entry must be a non-literal code. | |
44 | invalidEntry = 0 | |
45 | ) | |
46 | ||
47 | // encoder is LZW compressor. | |
48 | type encoder struct { | |
49 | // w is the writer that compressed bytes are written to. | |
50 | w writer | |
ab61e9c4 ILT |
51 | // order, write, bits, nBits and width are the state for |
52 | // converting a code stream into a byte stream. | |
53 | order Order | |
2fd401c8 | 54 | write func(*encoder, uint32) error |
5133f00e ILT |
55 | bits uint32 |
56 | nBits uint | |
57 | width uint | |
58 | // litWidth is the width in bits of literal codes. | |
59 | litWidth uint | |
60 | // hi is the code implied by the next code emission. | |
61 | // overflow is the code at which hi overflows the code width. | |
62 | hi, overflow uint32 | |
63 | // savedCode is the accumulated code at the end of the most recent Write | |
64 | // call. It is equal to invalidCode if there was no such call. | |
65 | savedCode uint32 | |
66 | // err is the first error encountered during writing. Closing the encoder | |
ab61e9c4 | 67 | // will make any future Write calls return errClosed |
2fd401c8 | 68 | err error |
5133f00e ILT |
69 | // table is the hash table from 20-bit keys to 12-bit values. Each table |
70 | // entry contains key<<12|val and collisions resolve by linear probing. | |
71 | // The keys consist of a 12-bit code prefix and an 8-bit byte suffix. | |
72 | // The values are a 12-bit code. | |
73 | table [tableSize]uint32 | |
74 | } | |
75 | ||
76 | // writeLSB writes the code c for "Least Significant Bits first" data. | |
2fd401c8 | 77 | func (e *encoder) writeLSB(c uint32) error { |
5133f00e ILT |
78 | e.bits |= c << e.nBits |
79 | e.nBits += e.width | |
80 | for e.nBits >= 8 { | |
81 | if err := e.w.WriteByte(uint8(e.bits)); err != nil { | |
82 | return err | |
83 | } | |
84 | e.bits >>= 8 | |
85 | e.nBits -= 8 | |
86 | } | |
87 | return nil | |
88 | } | |
89 | ||
90 | // writeMSB writes the code c for "Most Significant Bits first" data. | |
2fd401c8 | 91 | func (e *encoder) writeMSB(c uint32) error { |
5133f00e ILT |
92 | e.bits |= c << (32 - e.width - e.nBits) |
93 | e.nBits += e.width | |
94 | for e.nBits >= 8 { | |
95 | if err := e.w.WriteByte(uint8(e.bits >> 24)); err != nil { | |
96 | return err | |
97 | } | |
98 | e.bits <<= 8 | |
99 | e.nBits -= 8 | |
100 | } | |
101 | return nil | |
102 | } | |
103 | ||
104 | // errOutOfCodes is an internal error that means that the encoder has run out | |
105 | // of unused codes and a clear code needs to be sent next. | |
2fd401c8 | 106 | var errOutOfCodes = errors.New("lzw: out of codes") |
5133f00e ILT |
107 | |
108 | // incHi increments e.hi and checks for both overflow and running out of | |
109 | // unused codes. In the latter case, incHi sends a clear code, resets the | |
110 | // encoder state and returns errOutOfCodes. | |
2fd401c8 | 111 | func (e *encoder) incHi() error { |
5133f00e ILT |
112 | e.hi++ |
113 | if e.hi == e.overflow { | |
114 | e.width++ | |
115 | e.overflow <<= 1 | |
116 | } | |
117 | if e.hi == maxCode { | |
118 | clear := uint32(1) << e.litWidth | |
119 | if err := e.write(e, clear); err != nil { | |
120 | return err | |
121 | } | |
122 | e.width = uint(e.litWidth) + 1 | |
123 | e.hi = clear + 1 | |
124 | e.overflow = clear << 1 | |
125 | for i := range e.table { | |
126 | e.table[i] = invalidEntry | |
127 | } | |
128 | return errOutOfCodes | |
129 | } | |
130 | return nil | |
131 | } | |
132 | ||
133 | // Write writes a compressed representation of p to e's underlying writer. | |
4ccad563 | 134 | func (e *encoder) Write(p []byte) (n int, err error) { |
5133f00e ILT |
135 | if e.err != nil { |
136 | return 0, e.err | |
137 | } | |
138 | if len(p) == 0 { | |
139 | return 0, nil | |
140 | } | |
4ccad563 | 141 | n = len(p) |
5133f00e ILT |
142 | litMask := uint32(1<<e.litWidth - 1) |
143 | code := e.savedCode | |
144 | if code == invalidCode { | |
145 | // The first code sent is always a literal code. | |
146 | code, p = uint32(p[0])&litMask, p[1:] | |
147 | } | |
148 | loop: | |
149 | for _, x := range p { | |
150 | literal := uint32(x) & litMask | |
151 | key := code<<8 | literal | |
152 | // If there is a hash table hit for this key then we continue the loop | |
153 | // and do not emit a code yet. | |
154 | hash := (key>>12 ^ key) & tableMask | |
155 | for h, t := hash, e.table[hash]; t != invalidEntry; { | |
156 | if key == t>>12 { | |
157 | code = t & maxCode | |
158 | continue loop | |
159 | } | |
160 | h = (h + 1) & tableMask | |
161 | t = e.table[h] | |
162 | } | |
163 | // Otherwise, write the current code, and literal becomes the start of | |
164 | // the next emitted code. | |
165 | if e.err = e.write(e, code); e.err != nil { | |
166 | return 0, e.err | |
167 | } | |
168 | code = literal | |
169 | // Increment e.hi, the next implied code. If we run out of codes, reset | |
170 | // the encoder state (including clearing the hash table) and continue. | |
4ccad563 ILT |
171 | if err1 := e.incHi(); err1 != nil { |
172 | if err1 == errOutOfCodes { | |
5133f00e ILT |
173 | continue |
174 | } | |
4ccad563 | 175 | e.err = err1 |
5133f00e ILT |
176 | return 0, e.err |
177 | } | |
178 | // Otherwise, insert key -> e.hi into the map that e.table represents. | |
179 | for { | |
180 | if e.table[hash] == invalidEntry { | |
181 | e.table[hash] = (key << 12) | e.hi | |
182 | break | |
183 | } | |
184 | hash = (hash + 1) & tableMask | |
185 | } | |
186 | } | |
187 | e.savedCode = code | |
4ccad563 | 188 | return n, nil |
5133f00e ILT |
189 | } |
190 | ||
191 | // Close closes the encoder, flushing any pending output. It does not close or | |
192 | // flush e's underlying writer. | |
2fd401c8 | 193 | func (e *encoder) Close() error { |
5133f00e | 194 | if e.err != nil { |
ab61e9c4 | 195 | if e.err == errClosed { |
5133f00e ILT |
196 | return nil |
197 | } | |
198 | return e.err | |
199 | } | |
ab61e9c4 ILT |
200 | // Make any future calls to Write return errClosed. |
201 | e.err = errClosed | |
5133f00e ILT |
202 | // Write the savedCode if valid. |
203 | if e.savedCode != invalidCode { | |
204 | if err := e.write(e, e.savedCode); err != nil { | |
205 | return err | |
206 | } | |
207 | if err := e.incHi(); err != nil && err != errOutOfCodes { | |
208 | return err | |
209 | } | |
210 | } | |
211 | // Write the eof code. | |
212 | eof := uint32(1)<<e.litWidth + 1 | |
213 | if err := e.write(e, eof); err != nil { | |
214 | return err | |
215 | } | |
216 | // Write the final bits. | |
217 | if e.nBits > 0 { | |
ab61e9c4 | 218 | if e.order == MSB { |
5133f00e ILT |
219 | e.bits >>= 24 |
220 | } | |
221 | if err := e.w.WriteByte(uint8(e.bits)); err != nil { | |
222 | return err | |
223 | } | |
224 | } | |
225 | return e.w.Flush() | |
226 | } | |
227 | ||
228 | // NewWriter creates a new io.WriteCloser that satisfies writes by compressing | |
229 | // the data and writing it to w. | |
230 | // It is the caller's responsibility to call Close on the WriteCloser when | |
231 | // finished writing. | |
232 | // The number of bits to use for literal codes, litWidth, must be in the | |
233 | // range [2,8] and is typically 8. | |
234 | func NewWriter(w io.Writer, order Order, litWidth int) io.WriteCloser { | |
2fd401c8 | 235 | var write func(*encoder, uint32) error |
5133f00e ILT |
236 | switch order { |
237 | case LSB: | |
238 | write = (*encoder).writeLSB | |
239 | case MSB: | |
240 | write = (*encoder).writeMSB | |
241 | default: | |
2fd401c8 | 242 | return &errWriteCloser{errors.New("lzw: unknown order")} |
5133f00e ILT |
243 | } |
244 | if litWidth < 2 || 8 < litWidth { | |
245 | return &errWriteCloser{fmt.Errorf("lzw: litWidth %d out of range", litWidth)} | |
246 | } | |
247 | bw, ok := w.(writer) | |
248 | if !ok { | |
249 | bw = bufio.NewWriter(w) | |
250 | } | |
251 | lw := uint(litWidth) | |
252 | return &encoder{ | |
253 | w: bw, | |
ab61e9c4 | 254 | order: order, |
5133f00e ILT |
255 | write: write, |
256 | width: 1 + lw, | |
257 | litWidth: lw, | |
258 | hi: 1<<lw + 1, | |
259 | overflow: 1 << (lw + 1), | |
260 | savedCode: invalidCode, | |
261 | } | |
262 | } |