]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - crypto/openssl/crypto/sha/asm/sha1-armv8.pl
Merge libc++ trunk r300890, and update build glue.
[FreeBSD/FreeBSD.git] / crypto / openssl / crypto / sha / asm / sha1-armv8.pl
1 #!/usr/bin/env perl
2 #
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9 #
10 # SHA1 for ARMv8.
11 #
12 # Performance in cycles per processed byte and improvement coefficient
13 # over code generated with "default" compiler:
14 #
15 #               hardware-assisted       software(*)
16 # Apple A7      2.31                    4.13 (+14%)
17 # Cortex-A53    2.24                    8.03 (+97%)
18 # Cortex-A57    2.35                    7.88 (+74%)
19 # Denver        2.13                    3.97 (+0%)(**)
20 # X-Gene                                8.80 (+200%)
21 #
22 # (*)   Software results are presented mostly for reference purposes.
23 # (**)  Keep in mind that Denver relies on binary translation, which
24 #       optimizes compiler output at run-time.
25
26 $flavour = shift;
27 open STDOUT,">".shift;
28
29 ($ctx,$inp,$num)=("x0","x1","x2");
30 @Xw=map("w$_",(3..17,19));
31 @Xx=map("x$_",(3..17,19));
32 @V=($A,$B,$C,$D,$E)=map("w$_",(20..24));
33 ($t0,$t1,$t2,$K)=map("w$_",(25..28));
34
35
36 sub BODY_00_19 {
37 my ($i,$a,$b,$c,$d,$e)=@_;
38 my $j=($i+2)&15;
39
40 $code.=<<___ if ($i<15 && !($i&1));
41         lsr     @Xx[$i+1],@Xx[$i],#32
42 ___
43 $code.=<<___ if ($i<14 && !($i&1));
44         ldr     @Xx[$i+2],[$inp,#`($i+2)*4-64`]
45 ___
46 $code.=<<___ if ($i<14 && ($i&1));
47 #ifdef  __ARMEB__
48         ror     @Xx[$i+1],@Xx[$i+1],#32
49 #else
50         rev32   @Xx[$i+1],@Xx[$i+1]
51 #endif
52 ___
53 $code.=<<___ if ($i<14);
54         bic     $t0,$d,$b
55         and     $t1,$c,$b
56         ror     $t2,$a,#27
57         add     $d,$d,$K                // future e+=K
58         orr     $t0,$t0,$t1
59         add     $e,$e,$t2               // e+=rot(a,5)
60         ror     $b,$b,#2
61         add     $d,$d,@Xw[($i+1)&15]    // future e+=X[i]
62         add     $e,$e,$t0               // e+=F(b,c,d)
63 ___
64 $code.=<<___ if ($i==19);
65         movz    $K,#0xeba1
66         movk    $K,#0x6ed9,lsl#16
67 ___
68 $code.=<<___ if ($i>=14);
69          eor    @Xw[$j],@Xw[$j],@Xw[($j+2)&15]
70         bic     $t0,$d,$b
71         and     $t1,$c,$b
72         ror     $t2,$a,#27
73          eor    @Xw[$j],@Xw[$j],@Xw[($j+8)&15]
74         add     $d,$d,$K                // future e+=K
75         orr     $t0,$t0,$t1
76         add     $e,$e,$t2               // e+=rot(a,5)
77          eor    @Xw[$j],@Xw[$j],@Xw[($j+13)&15]
78         ror     $b,$b,#2
79         add     $d,$d,@Xw[($i+1)&15]    // future e+=X[i]
80         add     $e,$e,$t0               // e+=F(b,c,d)
81          ror    @Xw[$j],@Xw[$j],#31
82 ___
83 }
84
85 sub BODY_40_59 {
86 my ($i,$a,$b,$c,$d,$e)=@_;
87 my $j=($i+2)&15;
88
89 $code.=<<___ if ($i==59);
90         movz    $K,#0xc1d6
91         movk    $K,#0xca62,lsl#16
92 ___
93 $code.=<<___;
94         orr     $t0,$b,$c
95         and     $t1,$b,$c
96          eor    @Xw[$j],@Xw[$j],@Xw[($j+2)&15]
97         ror     $t2,$a,#27
98         and     $t0,$t0,$d
99         add     $d,$d,$K                // future e+=K
100          eor    @Xw[$j],@Xw[$j],@Xw[($j+8)&15]
101         add     $e,$e,$t2               // e+=rot(a,5)
102         orr     $t0,$t0,$t1
103         ror     $b,$b,#2
104          eor    @Xw[$j],@Xw[$j],@Xw[($j+13)&15]
105         add     $d,$d,@Xw[($i+1)&15]    // future e+=X[i]
106         add     $e,$e,$t0               // e+=F(b,c,d)
107          ror    @Xw[$j],@Xw[$j],#31
108 ___
109 }
110
111 sub BODY_20_39 {
112 my ($i,$a,$b,$c,$d,$e)=@_;
113 my $j=($i+2)&15;
114
115 $code.=<<___ if ($i==39);
116         movz    $K,#0xbcdc
117         movk    $K,#0x8f1b,lsl#16
118 ___
119 $code.=<<___ if ($i<78);
120          eor    @Xw[$j],@Xw[$j],@Xw[($j+2)&15]
121         eor     $t0,$d,$b
122         ror     $t2,$a,#27
123         add     $d,$d,$K                // future e+=K
124          eor    @Xw[$j],@Xw[$j],@Xw[($j+8)&15]
125         eor     $t0,$t0,$c
126         add     $e,$e,$t2               // e+=rot(a,5)
127         ror     $b,$b,#2
128          eor    @Xw[$j],@Xw[$j],@Xw[($j+13)&15]
129         add     $d,$d,@Xw[($i+1)&15]    // future e+=X[i]
130         add     $e,$e,$t0               // e+=F(b,c,d)
131          ror    @Xw[$j],@Xw[$j],#31
132 ___
133 $code.=<<___ if ($i==78);
134         ldp     @Xw[1],@Xw[2],[$ctx]
135         eor     $t0,$d,$b
136         ror     $t2,$a,#27
137         add     $d,$d,$K                // future e+=K
138         eor     $t0,$t0,$c
139         add     $e,$e,$t2               // e+=rot(a,5)
140         ror     $b,$b,#2
141         add     $d,$d,@Xw[($i+1)&15]    // future e+=X[i]
142         add     $e,$e,$t0               // e+=F(b,c,d)
143 ___
144 $code.=<<___ if ($i==79);
145         ldp     @Xw[3],@Xw[4],[$ctx,#8]
146         eor     $t0,$d,$b
147         ror     $t2,$a,#27
148         eor     $t0,$t0,$c
149         add     $e,$e,$t2               // e+=rot(a,5)
150         ror     $b,$b,#2
151         ldr     @Xw[5],[$ctx,#16]
152         add     $e,$e,$t0               // e+=F(b,c,d)
153 ___
154 }
155
156 $code.=<<___;
157 #include "arm_arch.h"
158
159 .text
160
161 .globl  sha1_block_data_order
162 .type   sha1_block_data_order,%function
163 .align  6
164 sha1_block_data_order:
165         ldr     x16,.LOPENSSL_armcap_P
166         adr     x17,.LOPENSSL_armcap_P
167         add     x16,x16,x17
168         ldr     w16,[x16]
169         tst     w16,#ARMV8_SHA1
170         b.ne    .Lv8_entry
171
172         stp     x29,x30,[sp,#-96]!
173         add     x29,sp,#0
174         stp     x19,x20,[sp,#16]
175         stp     x21,x22,[sp,#32]
176         stp     x23,x24,[sp,#48]
177         stp     x25,x26,[sp,#64]
178         stp     x27,x28,[sp,#80]
179
180         ldp     $A,$B,[$ctx]
181         ldp     $C,$D,[$ctx,#8]
182         ldr     $E,[$ctx,#16]
183
184 .Loop:
185         ldr     @Xx[0],[$inp],#64
186         movz    $K,#0x7999
187         sub     $num,$num,#1
188         movk    $K,#0x5a82,lsl#16
189 #ifdef  __ARMEB__
190         ror     $Xx[0],@Xx[0],#32
191 #else
192         rev32   @Xx[0],@Xx[0]
193 #endif
194         add     $E,$E,$K                // warm it up
195         add     $E,$E,@Xw[0]
196 ___
197 for($i=0;$i<20;$i++)    { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
198 for(;$i<40;$i++)        { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
199 for(;$i<60;$i++)        { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
200 for(;$i<80;$i++)        { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
201 $code.=<<___;
202         add     $B,$B,@Xw[2]
203         add     $C,$C,@Xw[3]
204         add     $A,$A,@Xw[1]
205         add     $D,$D,@Xw[4]
206         add     $E,$E,@Xw[5]
207         stp     $A,$B,[$ctx]
208         stp     $C,$D,[$ctx,#8]
209         str     $E,[$ctx,#16]
210         cbnz    $num,.Loop
211
212         ldp     x19,x20,[sp,#16]
213         ldp     x21,x22,[sp,#32]
214         ldp     x23,x24,[sp,#48]
215         ldp     x25,x26,[sp,#64]
216         ldp     x27,x28,[sp,#80]
217         ldr     x29,[sp],#96
218         ret
219 .size   sha1_block_data_order,.-sha1_block_data_order
220 ___
221 {{{
222 my ($ABCD,$E,$E0,$E1)=map("v$_.16b",(0..3));
223 my @MSG=map("v$_.16b",(4..7));
224 my @Kxx=map("v$_.4s",(16..19));
225 my ($W0,$W1)=("v20.4s","v21.4s");
226 my $ABCD_SAVE="v22.16b";
227
228 $code.=<<___;
229 .type   sha1_block_armv8,%function
230 .align  6
231 sha1_block_armv8:
232 .Lv8_entry:
233         stp     x29,x30,[sp,#-16]!
234         add     x29,sp,#0
235
236         adr     x4,.Lconst
237         eor     $E,$E,$E
238         ld1.32  {$ABCD},[$ctx],#16
239         ld1.32  {$E}[0],[$ctx]
240         sub     $ctx,$ctx,#16
241         ld1.32  {@Kxx[0]-@Kxx[3]},[x4]
242
243 .Loop_hw:
244         ld1     {@MSG[0]-@MSG[3]},[$inp],#64
245         sub     $num,$num,#1
246         rev32   @MSG[0],@MSG[0]
247         rev32   @MSG[1],@MSG[1]
248
249         add.i32 $W0,@Kxx[0],@MSG[0]
250         rev32   @MSG[2],@MSG[2]
251         orr     $ABCD_SAVE,$ABCD,$ABCD  // offload
252
253         add.i32 $W1,@Kxx[0],@MSG[1]
254         rev32   @MSG[3],@MSG[3]
255         sha1h   $E1,$ABCD
256         sha1c   $ABCD,$E,$W0            // 0
257         add.i32 $W0,@Kxx[$j],@MSG[2]
258         sha1su0 @MSG[0],@MSG[1],@MSG[2]
259 ___
260 for ($j=0,$i=1;$i<20-3;$i++) {
261 my $f=("c","p","m","p")[$i/5];
262 $code.=<<___;
263         sha1h   $E0,$ABCD               // $i
264         sha1$f  $ABCD,$E1,$W1
265         add.i32 $W1,@Kxx[$j],@MSG[3]
266         sha1su1 @MSG[0],@MSG[3]
267 ___
268 $code.=<<___ if ($i<20-4);
269         sha1su0 @MSG[1],@MSG[2],@MSG[3]
270 ___
271         ($E0,$E1)=($E1,$E0);            ($W0,$W1)=($W1,$W0);
272         push(@MSG,shift(@MSG));         $j++ if ((($i+3)%5)==0);
273 }
274 $code.=<<___;
275         sha1h   $E0,$ABCD               // $i
276         sha1p   $ABCD,$E1,$W1
277         add.i32 $W1,@Kxx[$j],@MSG[3]
278
279         sha1h   $E1,$ABCD               // 18
280         sha1p   $ABCD,$E0,$W0
281
282         sha1h   $E0,$ABCD               // 19
283         sha1p   $ABCD,$E1,$W1
284
285         add.i32 $E,$E,$E0
286         add.i32 $ABCD,$ABCD,$ABCD_SAVE
287
288         cbnz    $num,.Loop_hw
289
290         st1.32  {$ABCD},[$ctx],#16
291         st1.32  {$E}[0],[$ctx]
292
293         ldr     x29,[sp],#16
294         ret
295 .size   sha1_block_armv8,.-sha1_block_armv8
296 .align  6
297 .Lconst:
298 .long   0x5a827999,0x5a827999,0x5a827999,0x5a827999     //K_00_19
299 .long   0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1     //K_20_39
300 .long   0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc     //K_40_59
301 .long   0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6     //K_60_79
302 .LOPENSSL_armcap_P:
303 .quad   OPENSSL_armcap_P-.
304 .asciz  "SHA1 block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
305 .align  2
306 .comm   OPENSSL_armcap_P,4,4
307 ___
308 }}}
309
310 {   my  %opcode = (
311         "sha1c"         => 0x5e000000,  "sha1p"         => 0x5e001000,
312         "sha1m"         => 0x5e002000,  "sha1su0"       => 0x5e003000,
313         "sha1h"         => 0x5e280800,  "sha1su1"       => 0x5e281800   );
314
315     sub unsha1 {
316         my ($mnemonic,$arg)=@_;
317
318         $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
319         &&
320         sprintf ".inst\t0x%08x\t//%s %s",
321                         $opcode{$mnemonic}|$1|($2<<5)|($3<<16),
322                         $mnemonic,$arg;
323     }
324 }
325
326 foreach(split("\n",$code)) {
327
328         s/\`([^\`]*)\`/eval($1)/geo;
329
330         s/\b(sha1\w+)\s+([qv].*)/unsha1($1,$2)/geo;
331
332         s/\.\w?32\b//o          and s/\.16b/\.4s/go;
333         m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go;
334
335         print $_,"\n";
336 }
337
338 close STDOUT;