sim/testsuite/sim/bfin/conv_enc_gen.s

   1 # mach: bfin
   2
   3 // GENERIC CONVOLUTIONAL ENCODER
   4 // This a generic rate 1/n convolutional encoder. It computes n output
   5 // bits for each input bit, based on n generic polynomials.
   6 // It uses the set of BXOR_CC instructions to compute bit XOR
   7 // reduction from a state masked by a polynomial.  For an alternate
   8 // solution based on assembling several partial words, as in
   9 // the BDT benchmark, see file conv_enc.c. The solution presented
  10 // here is slower than conv_enc.c, but more generic.
  11 //
  12 // Forward Shift Register
  13 // -----------------------
  14 // This solution implements the XOR function by shifting the state
  15 // left by one, applying a mask to the state, and reducing
  16 // the result with a bit XOR reduction function.
  17 //                   ----- XOR------------> G0
  18 //                   |     |     |  |
  19 //        +------------------------------+
  20 //        | b0 b1 b2 b3          b14 b15 | <- in
  21 //        +------------------------------+
  22 //                   |  |  |  |     |
  23 //                   ----- XOR------------> G1
  24 // Instruction BXOR computes the bit G0 or G1 and stores it into CC
  25 // and also into a destination reg half. Here, we take CC and rotate it
  26 // into an output register.
  27 // However, one can also store the output bit directly by storing
  28 // the register half where this bit is placed. This would result
  29 // in an output structure similar to the one in the original function
  30 // Convolutional_Encode(), where an entire half word holds a bit.
  31 // The resulting execution speed would be roughly twice as fast,
  32 // since there is no need to rotate output bit via CC.
  33
  34 .include "testutils.inc"
  35         start
  36
  37         loadsym P0, input;
  38         loadsym P1, output;
  39
  40         R1 = 0; R2 = 0;R3 = 0;
  41
  42         R2.L = 0;
  43         R2.H = 0xa01d;  // polynom 0
  44         R3.L = 0;
  45         R3.H = 0x12f4;  // polynom 1
  46
  47         // load and  CurrentState to upper half of A0
  48         A1 = A0 = 0;
  49         R0 = 0x0000;
  50         A0.w = R0;
  51         A0 = A0 << 16;
  52
  53         // l-loop counter is in P4
  54         P4 = 2(Z);
  55         // **** START l-LOOP *****
  56 l$0:
  57
  58         // insert 16 bits of input into lower half of A0
  59         // and advance input pointer
  60         R0 = W [ P0 ++ ] (Z);
  61         A0.L = R0.L;
  62
  63         P5 = 2 (Z);
  64         LSETUP ( m$0 , m$0end ) LC0 = P5;       // **** BEGIN m-LOOP *****
  65 m$0:
  66
  67         P5 = 8 (Z);
  68         LSETUP ( i$1 , i$1end ) LC1 = P5;       // **** BEGIN i-LOOP *****
  69 i$1:
  70         R4.L = CC = BXORSHIFT( A0 , R2 );       // polynom0 -> CC
  71         R1 = ROT R1 BY 1;                       // CC -> R1
  72         R4.L = CC = BXOR( A0 , R3 );            // polynom1 -> CC
  73 i$1end:
  74         R1 = ROT R1 BY 1;                       // CC -> R1
  75
  76         // store 16 bits of outdata RL1
  77 m$0end:
  78         W [ P1 ++ ] = R1;
  79
  80         P4 += -1;
  81         CC = P4 == 0;
  82         IF !CC JUMP l$0;        // **** END l-LOOP *****
  83
  84                                 // Check results
  85         loadsym I2, output;
  86         R0.L = W [ I2 ++ ];     DBGA ( R0.L , 0x8c62 );
  87         R0.L = W [ I2 ++ ];     DBGA ( R0.L , 0x262e );
  88         R0.L = W [ I2 ++ ];     DBGA ( R0.L , 0x5b4d );
  89         R0.L = W [ I2 ++ ];     DBGA ( R0.L , 0x834f );
  90         pass
  91
  92         .data
  93 input:
  94         .dw 0x999f
  95         .dw 0x1999
  96
  97 output:
  98         .dw 0x0000
  99         .dw 0x0000
 100         .dw 0x0000
 101         .dw 0x0000