mls.s 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. # mach: aarch64
  2. # Check the vector multiply subtract instruction: mls.
  3. .include "testutils.inc"
  4. .data
  5. .align 4
  6. input:
  7. .word 0x04030201
  8. .word 0x08070605
  9. .word 0x0c0b0a09
  10. .word 0x100f0e0d
  11. m8b:
  12. .word 0xf1f8fd00
  13. .word 0xc1d0dde8
  14. m16b:
  15. .word 0xf1f8fd00
  16. .word 0xc1d0dde8
  17. .word 0x71889db0
  18. .word 0x01203d58
  19. m4h:
  20. .word 0xe7f8fc00
  21. .word 0x8fd0c3e8
  22. m8h:
  23. .word 0xe7f8fc00
  24. .word 0x8fd0c3e8
  25. .word 0xf7884bb0
  26. .word 0x1f209358
  27. m2s:
  28. .word 0xebf5fc00
  29. .word 0x5b95c3e8
  30. m4s:
  31. .word 0xebf5fc00
  32. .word 0x5b95c3e8
  33. .word 0x4ad54bb0
  34. .word 0xb9b49358
  35. start
  36. adrp x0, input
  37. ldr q0, [x0, #:lo12:input]
  38. movi v1.8b, #1
  39. mls v1.8b, v0.8b, v0.8b
  40. mov x1, v1.d[0]
  41. adrp x3, m8b
  42. ldr x4, [x3, #:lo12:m8b]
  43. cmp x1, x4
  44. bne .Lfailure
  45. movi v1.16b, #1
  46. mls v1.16b, v0.16b, v0.16b
  47. mov x1, v1.d[0]
  48. mov x2, v1.d[1]
  49. adrp x3, m16b
  50. ldr x4, [x3, #:lo12:m16b]
  51. cmp x1, x4
  52. bne .Lfailure
  53. ldr x5, [x3, #:lo12:m16b+8]
  54. cmp x2, x5
  55. bne .Lfailure
  56. movi v1.4h, #1
  57. mls v1.4h, v0.4h, v0.4h
  58. mov x1, v1.d[0]
  59. adrp x3, m4h
  60. ldr x4, [x3, #:lo12:m4h]
  61. cmp x1, x4
  62. bne .Lfailure
  63. movi v1.8h, #1
  64. mls v1.8h, v0.8h, v0.8h
  65. mov x1, v1.d[0]
  66. mov x2, v1.d[1]
  67. adrp x3, m8h
  68. ldr x4, [x3, #:lo12:m8h]
  69. cmp x1, x4
  70. bne .Lfailure
  71. ldr x5, [x3, #:lo12:m8h+8]
  72. cmp x2, x5
  73. bne .Lfailure
  74. movi v1.2s, #1
  75. mls v1.2s, v0.2s, v0.2s
  76. mov x1, v1.d[0]
  77. adrp x3, m2s
  78. ldr x4, [x3, #:lo12:m2s]
  79. cmp x1, x4
  80. bne .Lfailure
  81. movi v1.4s, #1
  82. mls v1.4s, v0.4s, v0.4s
  83. mov x1, v1.d[0]
  84. mov x2, v1.d[1]
  85. adrp x3, m4s
  86. ldr x4, [x3, #:lo12:m4s]
  87. cmp x1, x4
  88. bne .Lfailure
  89. ldr x5, [x3, #:lo12:m4s+8]
  90. cmp x2, x5
  91. bne .Lfailure
  92. pass
  93. .Lfailure:
  94. fail