001 /**
002 * Copyright (C) 2013 - Francesc Auli-Llinas
003 *
004 * This program is distributed under the BOI License.
005 * This program is distributed in the hope that it will be useful, but without any warranty; without
006 * even the implied warranty of merchantability or fitness for a particular purpose.
007 * You should have received a copy of the BOI License along with this program. If not,
008 * see <http://www.deic.uab.cat/~francesc/software/license/>.
009 */
010 package coders;
011
012 import streams.ByteStream;
013
014
015 /**
016 * This class implements an arithmetic coder with codewords of fixed length. This class can perform both
017 * the operations of the encoder and the decoder. The constructor of the class permits the use of codewords
018 * with different lengths, and the use of probabilities with different precision. It also permits the use
019 * of context-adaptive coding. As the MQ coder, this coder stuffs one 0 bit after a byte equal to 0xFF.<br>
020 *
021 * Usage: once the object is created, the functions to code/decode symbols are used to code the message. Instead
022 * of destroying the object and creating another one to encode a new message, it is more computationally
023 * efficient to reuse the same object. When encoding, the same object can be reused by calling the functions
024 * <code>terminate</code>, get the stream wherever is needed, <code>changeStream</code>,
025 * <code>restartEncoding</code> and <code>reset</code> in this order. To reuse the decoder, the functions
026 * <code>changeStream</code>, <code>restartDecoding</code>, and <code>reset</code> have to be called in this
027 * order.<br>
028 *
029 * Multithreading support: the object must be created and manipulated by a single thread. There can be many
030 * objects of this class running simultaneously as long as a single thread manipulates each object.<br>
031 *
032 * @author Francesc Auli-Llinas
033 * @version 1.1
034 */
035 public final class ArithmeticCoderFLW{
036
037 /**
038 * Indicates the number of symbols coded before updating the context probability.
039 * <p>
040 * Must be of the form 2^X - 1.
041 */
042 private static final int UPDATE_PROB0 = 7;
043
044 /**
045 * Indicates the maximum number of symbols within the variable-size sliding window that are employed
046 * to compute the probability of the context.
047 * <p>
048 * Must be of the form 2^X - 1.
049 */
050 private static final int WINDOW_PROB = 127;
051
052 /**
053 * ByteStream employed by the coder to write/read the output/input bytes.
054 * <p>
055 * The stream may contain zero bytes.
056 */
057 private ByteStream stream;
058
059 /**
060 * Length of the codeword encoded. It can be changed by the constructor.
061 * <p>
062 * If must be greater than 0 and smaller than 64.
063 */
064 private int codewordLength = 32;
065
066 /**
067 * Number of bits to represent the probability employed to code the symbols. Note that to use fewer
068 * bits than the codewordLength may decrease compression efficiency for very high/low probabilities
069 * (to use more bits is inconsequential). It can be changed by the constructor.
070 * <p>
071 * If must be greater than 0 and smaller than 64. Be aware that codewordLength + precisionBits < 64.
072 */
073 private int precisionBits = 15;
074
075 /**
076 * Maximum allowed value of the codeword.
077 * <p>
078 * It is 2^codewordLength - 1, and is computed by the constructor of the class.
079 */
080 private long codewordMax = -1;
081
082 /**
083 * Number of complete bytes of the codeword.
084 * <p>
085 * It is ceil(codewordLength / 8), and is computed by the constructor of the class.
086 */
087 private int codewordBytes = -1;
088
089 /**
090 * Probability 0.5 using the precision bits of the class.
091 * <p>
092 * It is 2^(precisionBits - 1), and is computed by the constructor of the class.
093 */
094 private int precisionMid = -1;
095
096 /**
097 * Minimum value of the interval.
098 * <p>
099 * Must be greater than 0 and smaller than 2^codewordLength - 1.
100 */
101 private long intervalMin = -1;
102
103 /**
104 * Size of the interval - 1.
105 * <p>
106 * Must be greater than 0 and equal or smaller than 2^codewordLength - 1.
107 */
108 private long intervalSize = -1;
109
110 /**
111 * Number within the interval stored in the codeword.
112 * <p>
113 * Must be equal or greater than intervalMin and equal or smaller than intervalMin + intervalSize.
114 */
115 private long interval = -1;
116
117 /**
118 * Byte to flush out/read.
119 * <p>
120 * Flushed byte to the stream.
121 */
122 private int Tr = -1;
123
124 /**
125 * Number of bits to transfer.
126 * <p>
127 * It is useful when the codewordLength does not correspond to a fixed number of bytes.
128 */
129 private int t = -1;
130
131 /**
132 * Current byte read/write.
133 * <p>
134 * Current position in the stream.
135 */
136 private int L;
137
138 /**
139 * Number of contexts.
140 * <p>
141 * Set when the class is instantiated.
142 */
143 private int numContexts = -1;
144
145 /**
146 * Current probability of each context.
147 * <p>
148 * Each index corresponds to one context. The probability is computed via {@link #prob0ToFLW}.
149 */
150 private int[] contextProb0FLW = null;
151
152 /**
153 * Number of 0s coded in this context.
154 * <p>
155 * It is initialized to 0 in the <code>reset</code> function.
156 */
157 private int[] context0s = null;
158
159 /**
160 * Number of 0s coded in the last WINDOW_PROB symbols coded.
161 * <p>
162 * It is initialized to 0 in the <code>reset</code> function.
163 */
164 private int[] context0sWindow = null;
165
166 /**
167 * Total number of symbols coded in this context.
168 * <p>
169 * It is initialized to 0 in the <code>reset</code> function.
170 */
171 private int[] contextTotal = null;
172
173 /**
174 * Indicates whether the function <code>fillInterval</code> fills the interval with zeroes when
175 * no more bytes are in the stream (only for decoding).
176 * <p>
177 * True indicates that replenishment with 0s is activated.
178 */
179 private boolean replenishment = true;
180
181 /**
182 * Masks employed when transferring the bits of the interval to the stream.
183 * <p>
184 * The position in the array indicates the number of least significant bits for which the mask is computed.
185 */
186 private static final long[] BIT_MASKS = {0x0, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F, 0xFF};
187
188 /**
189 * Bit masks (employed when coding integers).
190 * <p>
191 * The position in the array indicates the bit for which the mask is computed.
192 */
193 protected static final int[] BIT_MASKS2 = {1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
194 1 << 7, 1 << 8, 1 << 9, 1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, 1 << 17,
195 1 << 18, 1 << 19, 1 << 20, 1 << 21, 1 << 22, 1 << 23, 1 << 24, 1 << 25, 1 << 26, 1 << 27,
196 1 << 28, 1 << 29, 1 << 30};
197
198
199 /**
200 * Initializes internal registers. Before using the coder, a stream has to be set
201 * through <code>changeStream</code>.
202 */
203 public ArithmeticCoderFLW(){
204 assert(codewordLength > 0);
205 assert(precisionBits > 0);
206 assert(codewordLength + precisionBits < 64);
207
208 codewordMax = ((long) 1 << codewordLength) - 1;
209 codewordBytes = (int) Math.ceil((float) codewordLength / 8f);
210 precisionMid = 1 << (precisionBits - 1);
211 restartEncoding();
212 }
213
214 /**
215 * Initializes internal registers. Before using the coder, a stream has to be set
216 * through <code>changeStream</code>.
217 *
218 * @param codewordLength number of bits of the codeword
219 */
220 public ArithmeticCoderFLW(int codewordLength){
221 this.codewordLength = codewordLength;
222
223 assert(codewordLength > 0);
224 assert(precisionBits > 0);
225 assert(codewordLength + precisionBits < 64);
226
227 codewordMax = ((long) 1 << codewordLength) - 1;
228 codewordBytes = (int) Math.ceil((float) codewordLength / 8f);
229 precisionMid = 1 << (precisionBits - 1);
230 restartEncoding();
231 }
232
233 /**
234 * Initializes internal registers. Before using the coder, a stream has to be set
235 * through <code>changeStream</code>.
236 *
237 * @param codewordLength number of bits of the codeword
238 * @param precisionBits number of bits employed in the probabilities
239 */
240 public ArithmeticCoderFLW(int codewordLength, int precisionBits){
241 this.codewordLength = codewordLength;
242 this.precisionBits = precisionBits;
243
244 assert(codewordLength > 0);
245 assert(precisionBits > 0);
246 assert(codewordLength + precisionBits < 64);
247
248 codewordMax = ((long) 1 << codewordLength) - 1;
249 codewordBytes = (int) Math.ceil((float) codewordLength / 8f);
250 precisionMid = 1 << (precisionBits - 1);
251 restartEncoding();
252 }
253
254 /**
255 * Initializes internal registers. Before using the coder, a stream has to be set
256 * through <code>changeStream</code>.
257 *
258 * @param codewordLength number of bits of the codeword
259 * @param precisionBits number of bits employed in the probabilities
260 * @param numContexts number of contexts available for this object
261 */
262 public ArithmeticCoderFLW(int codewordLength, int precisionBits, int numContexts){
263 this.codewordLength = codewordLength;
264 this.precisionBits = precisionBits;
265 this.numContexts = numContexts;
266
267 assert(codewordLength > 0);
268 assert(precisionBits > 0);
269 assert(numContexts > 0);
270 assert(codewordLength + precisionBits < 64);
271
272 codewordMax = ((long) 1 << codewordLength) - 1;
273 codewordBytes = (int) Math.ceil((float) codewordLength / 8f);
274 precisionMid = 1 << (precisionBits - 1);
275 contextProb0FLW = new int[numContexts];
276 context0s = new int[numContexts];
277 context0sWindow = new int[numContexts];
278 contextTotal = new int[numContexts];
279 reset();
280 restartEncoding();
281 }
282
283 /**
284 * Transforms the probability of the symbol 0 (or false) in the range (0,1) into the integer
285 * required in the FLW coder to represent that probability.
286 *
287 * @param prob0 in the range [0:1]. If 0 or 1, the closest probability to 0/1 given the
288 * precision bits employed will be used --but it will not be 0 or 1.
289 * @param precisionBits number of bits employed to represent the probability in the
290 * returned integer
291 * @return integer that can be fed to the FLW coder. It is computed as (2^precisionBits) * prob0.
292 */
293 public static int prob0ToFLW(float prob0, int precisionBits){
294 assert((prob0 >= 0f) && (prob0 <= 1f));
295
296 int prob0FLW = (int) ((float) (1 << precisionBits) * prob0);
297 if(prob0FLW == 0){
298 prob0FLW = 1;
299 }else if(prob0FLW == (1 << precisionBits)){
300 prob0FLW = (1 << precisionBits) - 1;
301 }
302 assert((prob0FLW > 0) && (prob0FLW < (1 << precisionBits)));
303 return(prob0FLW);
304 }
305
306 /**
307 * Transforms the FLW integer to the probability of the symbol 0 (or false) in the range (0,1).
308 *
309 * @param prob0FLW integer fed to the FLW coder. It is (2^precisionBits) * prob0.
310 * @param precisionBits number of bits employed to represent the probability in prob0FLW. It
311 * is recommended to be, at least, equal to the codewordLength. To use fewer bits than the
312 * codewordLength may decrease compression efficiency for very high/low probabilities,
313 * and to use more is inconsequential. Also, be aware that codewordLength + precisionBits < 64.
314 * @return prob0 in the range (0:1)
315 */
316 public static float FLWToProb0(int prob0FLW, int precisionBits){
317 assert((prob0FLW > 0) && (prob0FLW < (1 << precisionBits)));
318
319 float prob0 = (float) prob0FLW / (float) (1 << precisionBits);
320 assert((prob0 > 0f) && (prob0 < 1f));
321 return(prob0);
322 }
323
324 /**
325 * Encodes a bit without using any probability model.
326 *
327 * @param bit input
328 */
329 public void encodeBit(boolean bit){
330 encodeBitProb(bit, precisionMid);
331 }
332
333 /**
334 * Decodes a bit without using any probability model.
335 *
336 * @return bit output
337 * @throws Exception when some problem manipulating the stream occurs
338 */
339 public boolean decodeBit() throws Exception{
340 return(decodeBitProb(precisionMid));
341 }
342
343 /**
344 * Encodes a bit using a context so that the probabilities are adaptively adjusted
345 * depending on the incoming symbols.
346 *
347 * @param bit input
348 * @param context context of the symbol
349 */
350 public void encodeBitContext(boolean bit, int context){
351 //Updates the probability of this context, when necessary
352 if((contextTotal[context] & UPDATE_PROB0) == UPDATE_PROB0){
353 if(context0s[context] == 0){
354 contextProb0FLW[context] = 1;
355 }else if(context0s[context] == contextTotal[context]){
356 contextProb0FLW[context] = (1 << precisionBits) - 1;
357 }else{
358 contextProb0FLW[context] = (context0s[context] << precisionBits) / contextTotal[context];
359 }
360 assert((contextProb0FLW[context] > 0) && (contextProb0FLW[context] < (1 << precisionBits)));
361 if((contextTotal[context] & WINDOW_PROB) == WINDOW_PROB){
362 if(context0sWindow[context] != -1){
363 contextTotal[context] -= WINDOW_PROB;
364 context0s[context] -= context0sWindow[context];
365 }
366 context0sWindow[context] = context0s[context];
367 }
368 }
369
370 //Encodes the bit
371 encodeBitProb(bit, contextProb0FLW[context]);
372
373 //Updates the number of symbols coded for this context
374 if(bit == false){
375 context0s[context]++;
376 }
377 contextTotal[context]++;
378 }
379
380 /**
381 * Decodes a bit using a context so that the probabilities are adaptively adjusted
382 * depending on the outcoming symbols.
383 *
384 * @param context context of the symbols
385 * @return output bit
386 * @throws Exception when some problem manipulating the stream occurs
387 */
388 public boolean decodeBitContext(int context) throws Exception{
389 //Updates the probability of this context, when necessary
390 if((contextTotal[context] & UPDATE_PROB0) == UPDATE_PROB0){
391 if(context0s[context] == 0){
392 contextProb0FLW[context] = 1;
393 }else if(context0s[context] == contextTotal[context]){
394 contextProb0FLW[context] = (1 << precisionBits) - 1;
395 }else{
396 contextProb0FLW[context] = (context0s[context] << precisionBits) / contextTotal[context];
397 }
398 assert((contextProb0FLW[context] > 0) && (contextProb0FLW[context] < (1 << precisionBits)));
399 if((contextTotal[context] & WINDOW_PROB) == WINDOW_PROB){
400 if(context0sWindow[context] != -1){
401 contextTotal[context] -= WINDOW_PROB;
402 context0s[context] -= context0sWindow[context];
403 }
404 context0sWindow[context] = context0s[context];
405 }
406 }
407
408 //Decodes the bit
409 boolean bit = decodeBitProb(contextProb0FLW[context]);
410
411 //Updates the number of symbols coded for this context
412 if(bit == false){
413 context0s[context]++;
414 }
415 contextTotal[context]++;
416 return(bit);
417 }
418
419 /**
420 * Encodes a bit using a specified probability.
421 *
422 * @param bit input
423 * @param prob0FLW probability of bit == false. This probability is generated via {@link #prob0ToFLW}
424 */
425 public void encodeBitProb(boolean bit, int prob0FLW){
426 assert((prob0FLW > 0) && (prob0FLW < (1 << precisionBits)));
427 assert(intervalSize >= 1);
428
429 if(bit == false){
430 intervalSize = ((long) prob0FLW * intervalSize) >> precisionBits;
431 }else{
432 long tmp = (((long) prob0FLW * intervalSize) >> precisionBits) + 1;
433 intervalMin += tmp;
434 intervalSize -= tmp;
435 }
436
437 if(intervalSize == 0){
438 interval = intervalMin;
439 transferInterval(codewordLength);
440 intervalMin = 0;
441 intervalSize = codewordMax;
442 }
443 }
444
445 /**
446 * Decodes a bit using a specified probability.
447 *
448 * @param prob0FLW probability of bit == false. This probability is generated via {@link #prob0ToFLW}
449 * @return bit decoded bit
450 * @throws Exception when some problem manipulating the stream occurs
451 */
452 public boolean decodeBitProb(int prob0FLW) throws Exception{
453 assert((prob0FLW > 0) && (prob0FLW < (1 << precisionBits)));
454
455 if(intervalSize == 0){
456 fillInterval();
457 intervalMin = 0;
458 intervalSize = codewordMax;
459 }
460 assert(intervalSize >= 1);
461
462 long tmp = (((long) prob0FLW * intervalSize) >> precisionBits) + 1;
463 long mid = intervalMin + tmp;
464 boolean bit;
465 if(interval >= mid){
466 bit = true;
467 intervalMin = mid;
468 intervalSize -= tmp;
469 }else{
470 bit = false;
471 intervalSize = tmp - 1;
472 }
473 return(bit);
474 }
475
476 /**
477 * Encodes an integer without using any probability model.
478 *
479 * @param num input
480 * @param numBits number of bits coded for that integer
481 */
482 public void encodeInteger(int num, int numBits){
483 assert(num >= 0);
484 assert(num < (1 << (numBits + 1)));
485
486 for(int bit = numBits - 1; bit >= 0; bit--){
487 boolean realBit = (num & BIT_MASKS2[bit]) != 0;
488 encodeBit(realBit);
489 }
490 }
491
492 /**
493 * Decodes an integer without using any probability model.
494 *
495 * @param numBits number of bits decoded for that integer
496 * @return output integer
497 * @throws Exception when some problem manipulating the stream occurs
498 */
499 public int decodeInteger(int numBits) throws Exception{
500 int num = 0;
501 for(int bit = numBits - 1; bit >= 0; bit--){
502 if(decodeBit()){
503 num += BIT_MASKS2[bit];
504 }
505 }
506 return(num);
507 }
508
509 /**
510 * Transfers the interval to the stream (for encoding only).
511 *
512 * @param length number of bits of the interval to be transferred
513 */
514 private void transferInterval(int length){
515 int transferredBits = 0;
516 while(transferredBits < length){
517 int remainingBits = codewordLength - transferredBits;
518 int transfer = remainingBits <= t? remainingBits: t;
519
520 if((remainingBits - t) >= 0){
521 Tr |= (interval >> (remainingBits - t)) & BIT_MASKS[transfer];
522 }else{
523 Tr |= (interval & BIT_MASKS[transfer]) << (t - remainingBits);
524 }
525
526 t -= transfer;
527 if(t == 0){
528 stream.putByte((byte) Tr);
529 if(Tr == 0xFF){
530 t = 7;
531 }else{
532 t = 8;
533 }
534 Tr = 0;
535 L++;
536 }
537 assert(t >= 0 && t <= 8);
538
539 transferredBits += transfer;
540 }
541 }
542
543 /**
544 * Reads the coded interval from the stream (for decoding only). If at the middle
545 * of reading the interval, the stream runs out of bytes, this function fills the
546 * least significant bits of the codeword with 0s.
547 */
548 private void fillInterval() throws Exception{
549 if(!replenishment && (L >= stream.getLength())){
550 //This placed here to throw the exception only when trying to fill a new interval
551 throw new Exception("No more data in the stream to fill the interval.");
552 }
553
554 int readBits = 0;
555 interval = 0;
556 do{
557 if(t == 0){
558 if(L < stream.getLength()){
559 if(Tr == 0xFF){
560 t = 7;
561 }else{
562 t = 8;
563 }
564 Tr = (0x000000FF & stream.getByte(L));
565 L++;
566 }else{
567 //Fills with 0s
568 Tr = 0;
569 t = 8;
570 }
571 }
572
573 int remainingBits = codewordLength - readBits;
574 int read = remainingBits <= t? remainingBits: t;
575
576 if((remainingBits - t) >= 0){
577 interval |= ((long) Tr & BIT_MASKS[read]) << (remainingBits - t);
578 }else{
579 interval |= ((long) Tr >> (t - remainingBits)) & BIT_MASKS[read];
580 }
581 assert(interval >= 0 && interval <= codewordMax);
582
583 t -= read;
584 assert(t >= 0 && t <= 8);
585
586 readBits += read;
587 }while(readBits < codewordLength);
588 }
589
590 /**
591 * Changes the current stream. When encoding, before calling this function the stream
592 * should be terminated calling the <code>terminate</code> function, and after calling
593 * this function the function <code>restartEncoding</code> must be called. When decoding,
594 * after calling this function the function <code>restartDecoding</code> must be called.
595 *
596 * @param stream the new ByteStream
597 */
598 public void changeStream(ByteStream stream){
599 if(stream == null){
600 stream = new ByteStream();
601 }
602 this.stream = stream;
603 }
604
605 /**
606 * Resets the state of all contexts.
607 */
608 public void reset(){
609 for(int c = 0; c < numContexts; c++){
610 contextProb0FLW[c] = prob0ToFLW(0.66f, precisionBits); //Slightly biased towards 0
611 context0s[c] = 2;
612 context0sWindow[c] = -1;
613 contextTotal[c] = 3;
614 }
615 }
616
617 /**
618 * Restarts the internal registers of the coder for encoding.
619 */
620 public void restartEncoding(){
621 intervalMin = 0;
622 intervalSize = codewordMax;
623 Tr = 0;
624 t = 8;
625 L = -1;
626 }
627
628 /**
629 * Restarts the internal registers of the coder for decoding.
630 *
631 * @throws Exception when some problem manipulating the stream occurs
632 */
633 public void restartDecoding() throws Exception{
634 intervalMin = 0;
635 intervalSize = 0;
636 Tr = 0;
637 L = 0;
638 t = 0;
639 }
640
641 /**
642 * Terminates the current stream using an optimal termination (for encoding purposes).
643 *
644 * @throws Exception when some problem manipulating the stream occurs
645 */
646 public void terminate() throws Exception{
647 long interval1 = 0;
648 long interval2 = 0;
649 int bits = codewordLength;
650 long intervalMax = intervalMin + intervalSize;
651 while(((interval1 < intervalMin) || (interval1 > intervalMax))
652 && ((interval2 < intervalMin) || (interval2 > intervalMax))){
653 bits--;
654 interval1 |= ((long) 1 << bits) & intervalMin;
655 interval2 |= ((long) 1 << bits) & intervalMax;
656 }
657
658 if((interval1 >= intervalMin) && (interval1 <= intervalMax)){
659 interval = interval1;
660 }else{
661 assert((interval2 >= intervalMin) && (interval2 <= intervalMax));
662 interval = interval2;
663 }
664 assert(bits >= 0);
665
666 transferInterval(codewordLength - bits);
667 if(t != 8){
668 stream.putByte((byte) Tr);
669 Tr = 0;
670 t = 8;
671 }
672 }
673
674 /**
675 * Computes the number of bytes belonging to the currently encoded data needed to flush
676 * the internal registers (for encoding purposes). This function is useful to determine
677 * potential truncation points of the stream.
678 *
679 * @return number of bytes
680 */
681 public int remainingBytes(){
682 if(t == 8){
683 return(codewordBytes);
684 }else{
685 return(codewordBytes + 1);
686 }
687 }
688
689 /**
690 * Gets the number of bytes read or written to the stream associated to the coder.
691 *
692 * @return number of bytes
693 */
694 public int getReadBytes(){
695 return(L);
696 }
697
698 /**
699 * Sets the {@link #replenishment} parameter.
700 *
701 * @param replenishment the new value for the parameter
702 */
703 public void setReplenishment(boolean replenishment){
704 this.replenishment = replenishment;
705 }
706 }
|