1 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-aes,+crypto | FileCheck %s
2 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=generic -mattr=+crypto | FileCheck %s
3 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s
4 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s
5 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s
6 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s
7 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m1 | FileCheck %s
9 declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d, <16 x i8> %k)
10 declare <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %d)
11 declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d, <16 x i8> %k)
12 declare <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %d)
14 define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, <16 x i8> %e) {
15 %d0 = load <16 x i8>, <16 x i8>* %a0
16 %a1 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 1
17 %d1 = load <16 x i8>, <16 x i8>* %a1
18 %a2 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 2
19 %d2 = load <16 x i8>, <16 x i8>* %a2
20 %a3 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 3
21 %d3 = load <16 x i8>, <16 x i8>* %a3
22 %k0 = load <16 x i8>, <16 x i8>* %b0
23 %e00 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d0, <16 x i8> %k0)
24 %f00 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e00)
25 %e01 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d1, <16 x i8> %k0)
26 %f01 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e01)
27 %e02 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d2, <16 x i8> %k0)
28 %f02 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e02)
29 %e03 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d3, <16 x i8> %k0)
30 %f03 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e03)
31 %b1 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 1
32 %k1 = load <16 x i8>, <16 x i8>* %b1
33 %e10 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f00, <16 x i8> %k1)
34 %f10 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e00)
35 %e11 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f01, <16 x i8> %k1)
36 %f11 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e01)
37 %e12 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f02, <16 x i8> %k1)
38 %f12 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e02)
39 %e13 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f03, <16 x i8> %k1)
40 %f13 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e03)
41 %b2 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 2
42 %k2 = load <16 x i8>, <16 x i8>* %b2
43 %e20 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f10, <16 x i8> %k2)
44 %f20 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e10)
45 %e21 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f11, <16 x i8> %k2)
46 %f21 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e11)
47 %e22 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f12, <16 x i8> %k2)
48 %f22 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e12)
49 %e23 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f13, <16 x i8> %k2)
50 %f23 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e13)
51 %b3 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 3
52 %k3 = load <16 x i8>, <16 x i8>* %b3
53 %e30 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f20, <16 x i8> %k3)
54 %f30 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e20)
55 %e31 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f21, <16 x i8> %k3)
56 %f31 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e21)
57 %e32 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f22, <16 x i8> %k3)
58 %f32 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e22)
59 %e33 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f23, <16 x i8> %k3)
60 %f33 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e23)
61 %g0 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f30, <16 x i8> %d)
62 %h0 = xor <16 x i8> %g0, %e
63 %g1 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f31, <16 x i8> %d)
64 %h1 = xor <16 x i8> %g1, %e
65 %g2 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f32, <16 x i8> %d)
66 %h2 = xor <16 x i8> %g2, %e
67 %g3 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f33, <16 x i8> %d)
68 %h3 = xor <16 x i8> %g3, %e
69 store <16 x i8> %h0, <16 x i8>* %c0
70 %c1 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 1
71 store <16 x i8> %h1, <16 x i8>* %c1
72 %c2 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 2
73 store <16 x i8> %h2, <16 x i8>* %c2
74 %c3 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 3
75 store <16 x i8> %h3, <16 x i8>* %c3
79 ; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
80 ; CHECK-NEXT: aesmc [[VA]], [[VA]]
81 ; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
82 ; CHECK-NEXT: aesmc [[VB]], [[VB]]
83 ; CHECK: aese [[VC:v[0-7].16b]], {{v[0-7].16b}}
84 ; CHECK-NEXT: aesmc [[VC]], [[VC]]
85 ; CHECK: aese [[VD:v[0-7].16b]], {{v[0-7].16b}}
86 ; CHECK-NEXT: aesmc [[VD]], [[VD]]
87 ; CHECK: aese [[VE:v[0-7].16b]], {{v[0-7].16b}}
88 ; CHECK-NEXT: aesmc [[VE]], [[VE]]
89 ; CHECK: aese [[VF:v[0-7].16b]], {{v[0-7].16b}}
90 ; CHECK-NEXT: aesmc [[VF]], [[VF]]
91 ; CHECK: aese [[VG:v[0-7].16b]], {{v[0-7].16b}}
92 ; CHECK-NEXT: aesmc [[VG]], [[VG]]
93 ; CHECK: aese [[VH:v[0-7].16b]], {{v[0-7].16b}}
94 ; CHECK-NEXT: aesmc [[VH]], [[VH]]
98 define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, <16 x i8> %e) {
99 %d0 = load <16 x i8>, <16 x i8>* %a0
100 %a1 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 1
101 %d1 = load <16 x i8>, <16 x i8>* %a1
102 %a2 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 2
103 %d2 = load <16 x i8>, <16 x i8>* %a2
104 %a3 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 3
105 %d3 = load <16 x i8>, <16 x i8>* %a3
106 %k0 = load <16 x i8>, <16 x i8>* %b0
107 %e00 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d0, <16 x i8> %k0)
108 %f00 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e00)
109 %e01 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d1, <16 x i8> %k0)
110 %f01 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e01)
111 %e02 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d2, <16 x i8> %k0)
112 %f02 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e02)
113 %e03 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d3, <16 x i8> %k0)
114 %f03 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e03)
115 %b1 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 1
116 %k1 = load <16 x i8>, <16 x i8>* %b1
117 %e10 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f00, <16 x i8> %k1)
118 %f10 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e00)
119 %e11 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f01, <16 x i8> %k1)
120 %f11 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e01)
121 %e12 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f02, <16 x i8> %k1)
122 %f12 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e02)
123 %e13 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f03, <16 x i8> %k1)
124 %f13 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e03)
125 %b2 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 2
126 %k2 = load <16 x i8>, <16 x i8>* %b2
127 %e20 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f10, <16 x i8> %k2)
128 %f20 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e10)
129 %e21 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f11, <16 x i8> %k2)
130 %f21 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e11)
131 %e22 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f12, <16 x i8> %k2)
132 %f22 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e12)
133 %e23 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f13, <16 x i8> %k2)
134 %f23 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e13)
135 %b3 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 3
136 %k3 = load <16 x i8>, <16 x i8>* %b3
137 %e30 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f20, <16 x i8> %k3)
138 %f30 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e20)
139 %e31 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f21, <16 x i8> %k3)
140 %f31 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e21)
141 %e32 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f22, <16 x i8> %k3)
142 %f32 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e22)
143 %e33 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f23, <16 x i8> %k3)
144 %f33 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e23)
145 %g0 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f30, <16 x i8> %d)
146 %h0 = xor <16 x i8> %g0, %e
147 %g1 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f31, <16 x i8> %d)
148 %h1 = xor <16 x i8> %g1, %e
149 %g2 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f32, <16 x i8> %d)
150 %h2 = xor <16 x i8> %g2, %e
151 %g3 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f33, <16 x i8> %d)
152 %h3 = xor <16 x i8> %g3, %e
153 store <16 x i8> %h0, <16 x i8>* %c0
154 %c1 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 1
155 store <16 x i8> %h1, <16 x i8>* %c1
156 %c2 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 2
157 store <16 x i8> %h2, <16 x i8>* %c2
158 %c3 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 3
159 store <16 x i8> %h3, <16 x i8>* %c3
162 ; CHECK-LABEL: aesda:
163 ; CHECK: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}}
164 ; CHECK-NEXT: aesimc [[VA]], [[VA]]
165 ; CHECK: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}}
166 ; CHECK-NEXT: aesimc [[VB]], [[VB]]
167 ; CHECK: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}}
168 ; CHECK-NEXT: aesimc [[VC]], [[VC]]
169 ; CHECK: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}}
170 ; CHECK-NEXT: aesimc [[VD]], [[VD]]
171 ; CHECK: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}}
172 ; CHECK-NEXT: aesimc [[VE]], [[VE]]
173 ; CHECK: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}}
174 ; CHECK-NEXT: aesimc [[VF]], [[VF]]
175 ; CHECK: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}}
176 ; CHECK-NEXT: aesimc [[VG]], [[VG]]
177 ; CHECK: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}}
178 ; CHECK-NEXT: aesimc [[VH]], [[VH]]
182 define void @aes_load_store(<16 x i8> *%p1, <16 x i8> *%p2 , <16 x i8> *%p3) {
184 %x1 = alloca <16 x i8>, align 16
185 %x2 = alloca <16 x i8>, align 16
186 %x3 = alloca <16 x i8>, align 16
187 %x4 = alloca <16 x i8>, align 16
188 %x5 = alloca <16 x i8>, align 16
189 %in1 = load <16 x i8>, <16 x i8>* %p1, align 16
190 store <16 x i8> %in1, <16 x i8>* %x1, align 16
191 %aese1 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %in1, <16 x i8> %in1) #2
192 %in2 = load <16 x i8>, <16 x i8>* %p2, align 16
193 %aesmc1= call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %aese1) #2
194 %aese2 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %in1, <16 x i8> %in2) #2
195 store <16 x i8> %aesmc1, <16 x i8>* %x3, align 16
196 %in3 = load <16 x i8>, <16 x i8>* %p3, align 16
197 %aesmc2= call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %aese2) #2
198 %aese3 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %aesmc2, <16 x i8> %in3) #2
199 store <16 x i8> %aese3, <16 x i8>* %x5, align 16
202 ; CHECK-LABEL: aes_load_store:
203 ; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
204 ; CHECK-NEXT: aesmc [[VA]], [[VA]]
205 ; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
206 ; CHECK-NEXT: aesmc [[VB]], [[VB]]