2 # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
4 # This code is taken from the OpenSSL project but the author, Andy Polyakov,
5 # has relicensed it under the licenses specified in the SPDX header above.
6 # The original headers, including the original license headers, are
7 # included below for completeness.
9 # ====================================================================
10 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11 # project. The module is, however, dual licensed under OpenSSL and
12 # CRYPTOGAMS licenses depending on where you obtain it. For further
13 # details see http://www.openssl.org/~appro/cryptogams/.
14 # ====================================================================
16 # Poly1305 hash for MIPS64.
20 # Numbers are cycles per processed byte with poly1305_blocks alone.
23 # R1x000 5.64/+120% (big-endian)
24 # Octeon II 3.80/+280% (little-endian)
26 ######################################################################
27 # There is a number of MIPS ABI in use, O32 and N32/64 are most
28 # widely used. Then there is a new contender: NUBI. It appears that if
29 # one picks the latter, it's possible to arrange code in ABI neutral
30 # manner. Therefore let's stick to NUBI register layout:
32 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
33 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
34 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
35 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
37 # The return value is placed in $a0. Following coding rules facilitate
40 # - never ever touch $tp, "thread pointer", former $gp [o32 can be
41 # excluded from the rule, because it's specified volatile];
42 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
44 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
46 # For reference here is register layout for N32/64 MIPS ABIs:
48 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
49 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
50 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
51 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
52 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
56 ######################################################################
58 $flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
60 die "MIPS64 only" unless ($flavour =~ /64|n32/i);
62 $v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
63 $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
65 ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
66 ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
69 #if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
70 defined(_MIPS_ARCH_MIPS64R6)) \\
71 && !defined(_MIPS_ARCH_MIPS64R2)
72 # define _MIPS_ARCH_MIPS64R2
75 #if defined(_MIPS_ARCH_MIPS64R6)
76 # define dmultu(rs,rt)
77 # define mflo(rd,rs,rt) dmulu rd,rs,rt
78 # define mfhi(rd,rs,rt) dmuhu rd,rs,rt
80 # define dmultu(rs,rt) dmultu rs,rt
81 # define mflo(rd,rs,rt) mflo rd
82 # define mfhi(rd,rs,rt) mfhi rd
86 # define poly1305_init poly1305_init_mips
87 # define poly1305_blocks poly1305_blocks_mips
88 # define poly1305_emit poly1305_emit_mips
91 #if defined(__MIPSEB__) && !defined(MIPSEB)
120 #if defined(_MIPS_ARCH_MIPS64R6)
130 # if defined(_MIPS_ARCH_MIPS64R2)
131 dsbh $in0,$in0 # byte swap
138 or $tmp0,$tmp2 # 0x000000FF000000FF
140 and $tmp1,$in0,$tmp0 # byte swap
148 dsll $tmp0,8 # 0x0000FF000000FF00
175 daddiu $tmp0,-1 # 0ffffffc0fffffff
178 daddiu $tmp0,-3 # 0ffffffc0ffffffc
184 daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2)
193 my ($h0,$h1,$h2,$r0,$r1,$s1,$d0,$d1,$d2) =
194 ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
198 .globl poly1305_blocks
202 dsrl $len,4 # number of complete blocks
203 bnez $len,poly1305_blocks_internal
210 .ent poly1305_blocks_internal
211 poly1305_blocks_internal:
213 .mask $SAVED_REGS_MASK,-8
219 $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
228 ld $h0,0($ctx) # load hash value
232 ld $r0,24($ctx) # load key
237 #if defined(_MIPS_ARCH_MIPS64R6)
238 ld $in0,0($inp) # load input
241 ldl $in0,0+MSB($inp) # load input
249 # if defined(_MIPS_ARCH_MIPS64R2)
250 dsbh $in0,$in0 # byte swap
257 or $tmp0,$tmp2 # 0x000000FF000000FF
259 and $tmp1,$in0,$tmp0 # byte swap
267 dsll $tmp0,8 # 0x0000FF000000FF00
290 daddu $h0,$in0 # accumulate input
296 dmultu ($r0,$h0) # h0*r0
302 dmultu ($s1,$h1) # h1*5*r1
308 dmultu ($r1,$h0) # h0*r1
316 dmultu ($r0,$h1) # h1*r0
323 dmultu ($s1,$h2) # h2*5*r1
328 dmultu ($r0,$h2) # h2*r0
338 li $tmp0,-4 # final reduction
351 sd $h0,0($ctx) # store hash value
356 ld $s5,40($sp) # epilogue
359 $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
368 .end poly1305_blocks_internal
372 my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
386 daddiu $in0,$tmp0,5 # compare to modulus
388 daddu $in1,$tmp1,$tmp3
389 sltu $tmp3,$in1,$tmp3
390 daddu $tmp2,$tmp2,$tmp3
392 dsrl $tmp2,2 # see if it carried/borrowed
393 dsubu $tmp2,$zero,$tmp2
394 nor $tmp3,$zero,$tmp2
403 lwu $tmp0,0($nonce) # load nonce
412 daddu $in0,$tmp0 # accumulate nonce
414 sltu $tmp0,$in0,$tmp0
417 dsrl $tmp0,$in0,8 # write mac value
459 last if (!s/^#/\/\// and !/^$/);
464 $output=pop and open STDOUT,">$output";