conv_enc_gen.s 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. # mach: bfin
  2. // GENERIC CONVOLUTIONAL ENCODER
  3. // This a generic rate 1/n convolutional encoder. It computes n output
  4. // bits for each input bit, based on n generic polynomials.
  5. // It uses the set of BXOR_CC instructions to compute bit XOR
  6. // reduction from a state masked by a polynomial. For an alternate
  7. // solution based on assembling several partial words, as in
  8. // the BDT benchmark, see file conv_enc.c. The solution presented
  9. // here is slower than conv_enc.c, but more generic.
  10. //
  11. // Forward Shift Register
  12. // -----------------------
  13. // This solution implements the XOR function by shifting the state
  14. // left by one, applying a mask to the state, and reducing
  15. // the result with a bit XOR reduction function.
  16. // ----- XOR------------> G0
  17. // | | | |
  18. // +------------------------------+
  19. // | b0 b1 b2 b3 b14 b15 | <- in
  20. // +------------------------------+
  21. // | | | | |
  22. // ----- XOR------------> G1
  23. // Instruction BXOR computes the bit G0 or G1 and stores it into CC
  24. // and also into a destination reg half. Here, we take CC and rotate it
  25. // into an output register.
  26. // However, one can also store the output bit directly by storing
  27. // the register half where this bit is placed. This would result
  28. // in an output structure similar to the one in the original function
  29. // Convolutional_Encode(), where an entire half word holds a bit.
  30. // The resulting execution speed would be roughly twice as fast,
  31. // since there is no need to rotate output bit via CC.
  32. .include "testutils.inc"
  33. start
  34. loadsym P0, input;
  35. loadsym P1, output;
  36. R1 = 0; R2 = 0;R3 = 0;
  37. R2.L = 0;
  38. R2.H = 0xa01d; // polynom 0
  39. R3.L = 0;
  40. R3.H = 0x12f4; // polynom 1
  41. // load and CurrentState to upper half of A0
  42. A1 = A0 = 0;
  43. R0 = 0x0000;
  44. A0.w = R0;
  45. A0 = A0 << 16;
  46. // l-loop counter is in P4
  47. P4 = 2(Z);
  48. // **** START l-LOOP *****
  49. l$0:
  50. // insert 16 bits of input into lower half of A0
  51. // and advance input pointer
  52. R0 = W [ P0 ++ ] (Z);
  53. A0.L = R0.L;
  54. P5 = 2 (Z);
  55. LSETUP ( m$0 , m$0end ) LC0 = P5; // **** BEGIN m-LOOP *****
  56. m$0:
  57. P5 = 8 (Z);
  58. LSETUP ( i$1 , i$1end ) LC1 = P5; // **** BEGIN i-LOOP *****
  59. i$1:
  60. R4.L = CC = BXORSHIFT( A0 , R2 ); // polynom0 -> CC
  61. R1 = ROT R1 BY 1; // CC -> R1
  62. R4.L = CC = BXOR( A0 , R3 ); // polynom1 -> CC
  63. i$1end:
  64. R1 = ROT R1 BY 1; // CC -> R1
  65. // store 16 bits of outdata RL1
  66. m$0end:
  67. W [ P1 ++ ] = R1;
  68. P4 += -1;
  69. CC = P4 == 0;
  70. IF !CC JUMP l$0; // **** END l-LOOP *****
  71. // Check results
  72. loadsym I2, output;
  73. R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x8c62 );
  74. R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x262e );
  75. R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x5b4d );
  76. R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x834f );
  77. pass
  78. .data
  79. input:
  80. .dw 0x999f
  81. .dw 0x1999
  82. output:
  83. .dw 0x0000
  84. .dw 0x0000
  85. .dw 0x0000
  86. .dw 0x0000