]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-mips64.pl
service(8): use an environment more consistent with init(8)
[FreeBSD/FreeBSD.git] / sys / dev / if_wg / module / crypto / zinc / poly1305 / poly1305-mips64.pl
1 #!/usr/bin/env perl
2 # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
3 #
4 # This code is taken from the OpenSSL project but the author, Andy Polyakov,
5 # has relicensed it under the licenses specified in the SPDX header above.
6 # The original headers, including the original license headers, are
7 # included below for completeness.
8 #
9 # ====================================================================
10 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11 # project. The module is, however, dual licensed under OpenSSL and
12 # CRYPTOGAMS licenses depending on where you obtain it. For further
13 # details see http://www.openssl.org/~appro/cryptogams/.
14 # ====================================================================
15 #
16 # Poly1305 hash for MIPS64.
17 #
18 # May 2016
19 #
20 # Numbers are cycles per processed byte with poly1305_blocks alone.
21 #
22 #               IALU/gcc
23 # R1x000        5.64/+120%      (big-endian)
24 # Octeon II     3.80/+280%      (little-endian)
25
26 ######################################################################
27 # There is a number of MIPS ABI in use, O32 and N32/64 are most
28 # widely used. Then there is a new contender: NUBI. It appears that if
29 # one picks the latter, it's possible to arrange code in ABI neutral
30 # manner. Therefore let's stick to NUBI register layout:
31 #
32 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
33 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
34 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
35 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
36 #
37 # The return value is placed in $a0. Following coding rules facilitate
38 # interoperability:
39 #
40 # - never ever touch $tp, "thread pointer", former $gp [o32 can be
41 #   excluded from the rule, because it's specified volatile];
42 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
43 #   old code];
44 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
45 #
46 # For reference here is register layout for N32/64 MIPS ABIs:
47 #
48 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
49 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
50 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
51 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
52 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
53 #
54 # <appro@openssl.org>
55 #
56 ######################################################################
57
58 $flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
59
60 die "MIPS64 only" unless ($flavour =~ /64|n32/i);
61
62 $v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
63 $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
64
65 ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
66 ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
67
68 $code.=<<___;
69 #if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
70      defined(_MIPS_ARCH_MIPS64R6)) \\
71      && !defined(_MIPS_ARCH_MIPS64R2)
72 # define _MIPS_ARCH_MIPS64R2
73 #endif
74
75 #if defined(_MIPS_ARCH_MIPS64R6)
76 # define dmultu(rs,rt)
77 # define mflo(rd,rs,rt) dmulu   rd,rs,rt
78 # define mfhi(rd,rs,rt) dmuhu   rd,rs,rt
79 #else
80 # define dmultu(rs,rt)          dmultu  rs,rt
81 # define mflo(rd,rs,rt) mflo    rd
82 # define mfhi(rd,rs,rt) mfhi    rd
83 #endif
84
85 #ifdef  __KERNEL__
86 # define poly1305_init   poly1305_init_mips
87 # define poly1305_blocks poly1305_blocks_mips
88 # define poly1305_emit   poly1305_emit_mips
89 #endif
90
91 #if defined(__MIPSEB__) && !defined(MIPSEB)
92 # define MIPSEB
93 #endif
94
95 #ifdef MIPSEB
96 # define MSB 0
97 # define LSB 7
98 #else
99 # define MSB 7
100 # define LSB 0
101 #endif
102
103 .text
104 .set    noat
105 .set    noreorder
106
107 .align  5
108 .globl  poly1305_init
109 .ent    poly1305_init
110 poly1305_init:
111         .frame  $sp,0,$ra
112         .set    reorder
113
114         sd      $zero,0($ctx)
115         sd      $zero,8($ctx)
116         sd      $zero,16($ctx)
117
118         beqz    $inp,.Lno_key
119
120 #if defined(_MIPS_ARCH_MIPS64R6)
121         ld      $in0,0($inp)
122         ld      $in1,8($inp)
123 #else
124         ldl     $in0,0+MSB($inp)
125         ldl     $in1,8+MSB($inp)
126         ldr     $in0,0+LSB($inp)
127         ldr     $in1,8+LSB($inp)
128 #endif
129 #ifdef  MIPSEB
130 # if defined(_MIPS_ARCH_MIPS64R2)
131         dsbh    $in0,$in0               # byte swap
132          dsbh   $in1,$in1
133         dshd    $in0,$in0
134          dshd   $in1,$in1
135 # else
136         ori     $tmp0,$zero,0xFF
137         dsll    $tmp2,$tmp0,32
138         or      $tmp0,$tmp2             # 0x000000FF000000FF
139
140         and     $tmp1,$in0,$tmp0        # byte swap
141          and    $tmp3,$in1,$tmp0
142         dsrl    $tmp2,$in0,24
143          dsrl   $tmp4,$in1,24
144         dsll    $tmp1,24
145          dsll   $tmp3,24
146         and     $tmp2,$tmp0
147          and    $tmp4,$tmp0
148         dsll    $tmp0,8                 # 0x0000FF000000FF00
149         or      $tmp1,$tmp2
150          or     $tmp3,$tmp4
151         and     $tmp2,$in0,$tmp0
152          and    $tmp4,$in1,$tmp0
153         dsrl    $in0,8
154          dsrl   $in1,8
155         dsll    $tmp2,8
156          dsll   $tmp4,8
157         and     $in0,$tmp0
158          and    $in1,$tmp0
159         or      $tmp1,$tmp2
160          or     $tmp3,$tmp4
161         or      $in0,$tmp1
162          or     $in1,$tmp3
163         dsrl    $tmp1,$in0,32
164          dsrl   $tmp3,$in1,32
165         dsll    $in0,32
166          dsll   $in1,32
167         or      $in0,$tmp1
168          or     $in1,$tmp3
169 # endif
170 #endif
171         li      $tmp0,1
172         dsll    $tmp0,32
173         daddiu  $tmp0,-63
174         dsll    $tmp0,28
175         daddiu  $tmp0,-1                # 0ffffffc0fffffff
176
177         and     $in0,$tmp0
178         daddiu  $tmp0,-3                # 0ffffffc0ffffffc
179         and     $in1,$tmp0
180
181         sd      $in0,24($ctx)
182         dsrl    $tmp0,$in1,2
183         sd      $in1,32($ctx)
184         daddu   $tmp0,$in1              # s1 = r1 + (r1 >> 2)
185         sd      $tmp0,40($ctx)
186
187 .Lno_key:
188         li      $v0,0                   # return 0
189         jr      $ra
190 .end    poly1305_init
191 ___
192 {
193 my ($h0,$h1,$h2,$r0,$r1,$s1,$d0,$d1,$d2) =
194    ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
195
196 $code.=<<___;
197 .align  5
198 .globl  poly1305_blocks
199 .ent    poly1305_blocks
200 poly1305_blocks:
201         .set    noreorder
202         dsrl    $len,4                  # number of complete blocks
203         bnez    $len,poly1305_blocks_internal
204         nop
205         jr      $ra
206         nop
207 .end    poly1305_blocks
208
209 .align  5
210 .ent    poly1305_blocks_internal
211 poly1305_blocks_internal:
212         .frame  $sp,6*8,$ra
213         .mask   $SAVED_REGS_MASK,-8
214         .set    noreorder
215         dsubu   $sp,6*8
216         sd      $s5,40($sp)
217         sd      $s4,32($sp)
218 ___
219 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
220         sd      $s3,24($sp)
221         sd      $s2,16($sp)
222         sd      $s1,8($sp)
223         sd      $s0,0($sp)
224 ___
225 $code.=<<___;
226         .set    reorder
227
228         ld      $h0,0($ctx)             # load hash value
229         ld      $h1,8($ctx)
230         ld      $h2,16($ctx)
231
232         ld      $r0,24($ctx)            # load key
233         ld      $r1,32($ctx)
234         ld      $s1,40($ctx)
235
236 .Loop:
237 #if defined(_MIPS_ARCH_MIPS64R6)
238         ld      $in0,0($inp)            # load input
239         ld      $in1,8($inp)
240 #else
241         ldl     $in0,0+MSB($inp)        # load input
242         ldl     $in1,8+MSB($inp)
243         ldr     $in0,0+LSB($inp)
244         ldr     $in1,8+LSB($inp)
245 #endif
246         daddiu  $len,-1
247         daddiu  $inp,16
248 #ifdef  MIPSEB
249 # if defined(_MIPS_ARCH_MIPS64R2)
250         dsbh    $in0,$in0               # byte swap
251          dsbh   $in1,$in1
252         dshd    $in0,$in0
253          dshd   $in1,$in1
254 # else
255         ori     $tmp0,$zero,0xFF
256         dsll    $tmp2,$tmp0,32
257         or      $tmp0,$tmp2             # 0x000000FF000000FF
258
259         and     $tmp1,$in0,$tmp0        # byte swap
260          and    $tmp3,$in1,$tmp0
261         dsrl    $tmp2,$in0,24
262          dsrl   $tmp4,$in1,24
263         dsll    $tmp1,24
264          dsll   $tmp3,24
265         and     $tmp2,$tmp0
266          and    $tmp4,$tmp0
267         dsll    $tmp0,8                 # 0x0000FF000000FF00
268         or      $tmp1,$tmp2
269          or     $tmp3,$tmp4
270         and     $tmp2,$in0,$tmp0
271          and    $tmp4,$in1,$tmp0
272         dsrl    $in0,8
273          dsrl   $in1,8
274         dsll    $tmp2,8
275          dsll   $tmp4,8
276         and     $in0,$tmp0
277          and    $in1,$tmp0
278         or      $tmp1,$tmp2
279          or     $tmp3,$tmp4
280         or      $in0,$tmp1
281          or     $in1,$tmp3
282         dsrl    $tmp1,$in0,32
283          dsrl   $tmp3,$in1,32
284         dsll    $in0,32
285          dsll   $in1,32
286         or      $in0,$tmp1
287          or     $in1,$tmp3
288 # endif
289 #endif
290         daddu   $h0,$in0                # accumulate input
291         daddu   $h1,$in1
292         sltu    $tmp0,$h0,$in0
293         sltu    $tmp1,$h1,$in1
294         daddu   $h1,$tmp0
295
296         dmultu  ($r0,$h0)               # h0*r0
297          daddu  $h2,$padbit
298          sltu   $tmp0,$h1,$tmp0
299         mflo    ($d0,$r0,$h0)
300         mfhi    ($d1,$r0,$h0)
301
302         dmultu  ($s1,$h1)               # h1*5*r1
303          daddu  $tmp0,$tmp1
304          daddu  $h2,$tmp0
305         mflo    ($tmp0,$s1,$h1)
306         mfhi    ($tmp1,$s1,$h1)
307
308         dmultu  ($r1,$h0)               # h0*r1
309          daddu  $d0,$tmp0
310          daddu  $d1,$tmp1
311         mflo    ($tmp2,$r1,$h0)
312         mfhi    ($d2,$r1,$h0)
313          sltu   $tmp0,$d0,$tmp0
314          daddu  $d1,$tmp0
315
316         dmultu  ($r0,$h1)               # h1*r0
317          daddu  $d1,$tmp2
318          sltu   $tmp2,$d1,$tmp2
319         mflo    ($tmp0,$r0,$h1)
320         mfhi    ($tmp1,$r0,$h1)
321          daddu  $d2,$tmp2
322
323         dmultu  ($s1,$h2)               # h2*5*r1
324          daddu  $d1,$tmp0
325          daddu  $d2,$tmp1
326         mflo    ($tmp2,$s1,$h2)
327
328         dmultu  ($r0,$h2)               # h2*r0
329          sltu   $tmp0,$d1,$tmp0
330          daddu  $d2,$tmp0
331         mflo    ($tmp3,$r0,$h2)
332
333         daddu   $d1,$tmp2
334         daddu   $d2,$tmp3
335         sltu    $tmp2,$d1,$tmp2
336         daddu   $d2,$tmp2
337
338         li      $tmp0,-4                # final reduction
339         and     $tmp0,$d2
340         dsrl    $tmp1,$d2,2
341         andi    $h2,$d2,3
342         daddu   $tmp0,$tmp1
343         daddu   $h0,$d0,$tmp0
344         sltu    $tmp0,$h0,$tmp0
345         daddu   $h1,$d1,$tmp0
346         sltu    $tmp0,$h1,$tmp0
347         daddu   $h2,$h2,$tmp0
348
349         bnez    $len,.Loop
350
351         sd      $h0,0($ctx)             # store hash value
352         sd      $h1,8($ctx)
353         sd      $h2,16($ctx)
354
355         .set    noreorder
356         ld      $s5,40($sp)             # epilogue
357         ld      $s4,32($sp)
358 ___
359 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi epilogue
360         ld      $s3,24($sp)
361         ld      $s2,16($sp)
362         ld      $s1,8($sp)
363         ld      $s0,0($sp)
364 ___
365 $code.=<<___;
366         jr      $ra
367         daddu   $sp,6*8
368 .end    poly1305_blocks_internal
369 ___
370 }
371 {
372 my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
373
374 $code.=<<___;
375 .align  5
376 .globl  poly1305_emit
377 .ent    poly1305_emit
378 poly1305_emit:
379         .frame  $sp,0,$ra
380         .set    reorder
381
382         ld      $tmp0,0($ctx)
383         ld      $tmp1,8($ctx)
384         ld      $tmp2,16($ctx)
385
386         daddiu  $in0,$tmp0,5            # compare to modulus
387         sltiu   $tmp3,$in0,5
388         daddu   $in1,$tmp1,$tmp3
389         sltu    $tmp3,$in1,$tmp3
390         daddu   $tmp2,$tmp2,$tmp3
391
392         dsrl    $tmp2,2                 # see if it carried/borrowed
393         dsubu   $tmp2,$zero,$tmp2
394         nor     $tmp3,$zero,$tmp2
395
396         and     $in0,$tmp2
397         and     $tmp0,$tmp3
398         and     $in1,$tmp2
399         and     $tmp1,$tmp3
400         or      $in0,$tmp0
401         or      $in1,$tmp1
402
403         lwu     $tmp0,0($nonce)         # load nonce
404         lwu     $tmp1,4($nonce)
405         lwu     $tmp2,8($nonce)
406         lwu     $tmp3,12($nonce)
407         dsll    $tmp1,32
408         dsll    $tmp3,32
409         or      $tmp0,$tmp1
410         or      $tmp2,$tmp3
411
412         daddu   $in0,$tmp0              # accumulate nonce
413         daddu   $in1,$tmp2
414         sltu    $tmp0,$in0,$tmp0
415         daddu   $in1,$tmp0
416
417         dsrl    $tmp0,$in0,8            # write mac value
418         dsrl    $tmp1,$in0,16
419         dsrl    $tmp2,$in0,24
420         sb      $in0,0($mac)
421         dsrl    $tmp3,$in0,32
422         sb      $tmp0,1($mac)
423         dsrl    $tmp0,$in0,40
424         sb      $tmp1,2($mac)
425         dsrl    $tmp1,$in0,48
426         sb      $tmp2,3($mac)
427         dsrl    $tmp2,$in0,56
428         sb      $tmp3,4($mac)
429         dsrl    $tmp3,$in1,8
430         sb      $tmp0,5($mac)
431         dsrl    $tmp0,$in1,16
432         sb      $tmp1,6($mac)
433         dsrl    $tmp1,$in1,24
434         sb      $tmp2,7($mac)
435
436         sb      $in1,8($mac)
437         dsrl    $tmp2,$in1,32
438         sb      $tmp3,9($mac)
439         dsrl    $tmp3,$in1,40
440         sb      $tmp0,10($mac)
441         dsrl    $tmp0,$in1,48
442         sb      $tmp1,11($mac)
443         dsrl    $tmp1,$in1,56
444         sb      $tmp2,12($mac)
445         sb      $tmp3,13($mac)
446         sb      $tmp0,14($mac)
447         sb      $tmp1,15($mac)
448
449         jr      $ra
450 .end    poly1305_emit
451 .rdata
452 .align  2
453 ___
454 }
455
456 open SELF,$0;
457 while(<SELF>) {
458         next if (/^#!/);
459         last if (!s/^#/\/\// and !/^$/);
460         print;
461 }
462 close SELF;
463
464 $output=pop and open STDOUT,">$output";
465 print $code;
466 close STDOUT;
467