ldnr.s 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. # mach: aarch64
  2. # Check the load single 1-element structure and replicate to all lanes insns:
  3. # ld1r, ld2r, ld3r, ld4r.
  4. # Check the addressing modes: no offset, post-index immediate offset,
  5. # post-index register offset.
  6. .include "testutils.inc"
  7. .data
  8. .align 4
  9. input:
  10. .word 0x04030201
  11. .word 0x08070605
  12. .word 0x0c0b0a09
  13. .word 0x100f0e0d
  14. input2:
  15. .word 0x00000001
  16. .word 0x00000002
  17. .word 0x00000003
  18. .word 0x00000004
  19. .word 0x00000005
  20. .word 0x00000006
  21. .word 0x00000007
  22. .word 0x00000008
  23. .word 0x00000009
  24. .word 0x0000000a
  25. .word 0x0000000b
  26. .word 0x0000000c
  27. start
  28. adrp x0, input
  29. add x0, x0, :lo12:input
  30. adrp x1, input2
  31. add x1, x1, :lo12:input2
  32. mov x2, x0
  33. mov x3, #1
  34. ld1r {v0.8b}, [x2], 1
  35. ld1r {v1.16b}, [x2], x3
  36. ld1r {v2.4h}, [x2], 2
  37. ld1r {v3.8h}, [x2]
  38. addv b0, v0.8b
  39. addv b1, v1.16b
  40. addv b2, v2.8b
  41. addv b3, v3.16b
  42. mov x2, v0.d[0]
  43. mov x3, v1.d[0]
  44. mov x4, v2.d[0]
  45. mov x5, v3.d[0]
  46. cmp x2, #8
  47. bne .Lfailure
  48. cmp x3, #32
  49. bne .Lfailure
  50. cmp x4, #28
  51. bne .Lfailure
  52. cmp x5, #88
  53. bne .Lfailure
  54. mov x2, x1
  55. mov x3, #8
  56. ld2r {v0.2s, v1.2s}, [x2], 8
  57. ld2r {v2.4s, v3.4s}, [x2], x3
  58. ld2r {v4.1d, v5.1d}, [x2], 16
  59. ld2r {v6.2d, v7.2d}, [x2]
  60. addp v0.2s, v0.2s, v1.2s
  61. addv s2, v2.4s
  62. addv s3, v3.4s
  63. addp v4.2s, v4.2s, v5.2s
  64. addv s6, v6.4s
  65. addv s7, v7.4s
  66. mov w2, v0.s[0]
  67. mov w3, v0.s[1]
  68. mov x4, v2.d[0]
  69. mov x5, v3.d[0]
  70. mov w6, v4.s[0]
  71. mov w7, v4.s[1]
  72. mov x8, v6.d[0]
  73. mov x9, v7.d[0]
  74. cmp w2, #2
  75. bne .Lfailure
  76. cmp w3, #4
  77. bne .Lfailure
  78. cmp x4, #12
  79. bne .Lfailure
  80. cmp x5, #16
  81. bne .Lfailure
  82. cmp w6, #11
  83. bne .Lfailure
  84. cmp w7, #15
  85. bne .Lfailure
  86. cmp x8, #38
  87. bne .Lfailure
  88. cmp x9, #46
  89. bne .Lfailure
  90. mov x2, x0
  91. mov x3, #3
  92. ld3r {v0.8b, v1.8b, v2.8b}, [x2], 3
  93. ld3r {v3.8b, v4.8b, v5.8b}, [x2], x3
  94. ld3r {v6.8b, v7.8b, v8.8b}, [x2]
  95. addv b0, v0.8b
  96. addv b1, v1.8b
  97. addv b2, v2.8b
  98. addv b3, v3.8b
  99. addv b4, v4.8b
  100. addv b5, v5.8b
  101. addv b6, v6.8b
  102. addv b7, v7.8b
  103. addv b8, v8.8b
  104. addv b9, v9.8b
  105. mov x2, v0.d[0]
  106. mov x3, v1.d[0]
  107. mov x4, v2.d[0]
  108. mov x5, v3.d[0]
  109. mov x6, v4.d[0]
  110. mov x7, v5.d[0]
  111. mov x8, v6.d[0]
  112. mov x9, v7.d[0]
  113. mov x10, v8.d[0]
  114. cmp x2, #8
  115. bne .Lfailure
  116. cmp x3, #16
  117. bne .Lfailure
  118. cmp x4, #24
  119. bne .Lfailure
  120. cmp x5, #32
  121. bne .Lfailure
  122. cmp x6, #40
  123. bne .Lfailure
  124. cmp x7, #48
  125. bne .Lfailure
  126. cmp x8, #56
  127. bne .Lfailure
  128. cmp x9, #64
  129. bne .Lfailure
  130. cmp x10, #72
  131. bne .Lfailure
  132. mov x2, x1
  133. ld4r {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], 16
  134. ld4r {v4.4s, v5.4s, v6.4s, v7.4s}, [x2]
  135. addv s0, v0.4s
  136. addv s1, v1.4s
  137. addv s2, v2.4s
  138. addv s3, v3.4s
  139. addv s4, v4.4s
  140. addv s5, v5.4s
  141. addv s6, v6.4s
  142. addv s7, v7.4s
  143. mov x2, v0.d[0]
  144. mov x3, v1.d[0]
  145. mov x4, v2.d[0]
  146. mov x5, v3.d[0]
  147. mov x6, v4.d[0]
  148. mov x7, v5.d[0]
  149. mov x8, v6.d[0]
  150. mov x9, v7.d[0]
  151. cmp x2, #4
  152. bne .Lfailure
  153. cmp x3, #8
  154. bne .Lfailure
  155. cmp x4, #12
  156. bne .Lfailure
  157. cmp x5, #16
  158. bne .Lfailure
  159. cmp x6, #20
  160. bne .Lfailure
  161. cmp x7, #24
  162. bne .Lfailure
  163. cmp x8, #28
  164. bne .Lfailure
  165. cmp x9, #32
  166. bne .Lfailure
  167. pass
  168. .Lfailure:
  169. fail