Pull in r198157 from upstream llvm trunk (by Venkatraman Govindaraju): [SparcV9] Use separate instruction patterns for 64 bit arithmetic instructions instead of reusing 32 bit instruction patterns. This is done to avoid spilling the result of the 64-bit instructions to a 4-byte slot. Introduced here: http://svnweb.freebsd.org/changeset/base/262261 Index: lib/Target/Sparc/SparcAsmPrinter.cpp =================================================================== --- lib/Target/Sparc/SparcAsmPrinter.cpp +++ lib/Target/Sparc/SparcAsmPrinter.cpp @@ -227,7 +227,7 @@ void SparcAsmPrinter::printOperand(const MachineIn if (MI->getOpcode() == SP::CALL) assert(TF == SPII::MO_NO_FLAG && "Cannot handle target flags on call address"); - else if (MI->getOpcode() == SP::SETHIi) + else if (MI->getOpcode() == SP::SETHIi || MI->getOpcode() == SP::SETHIXi) assert((TF == SPII::MO_HI || TF == SPII::MO_H44 || TF == SPII::MO_HH || TF == SPII::MO_TLS_GD_HI22 || TF == SPII::MO_TLS_LDM_HI22 @@ -250,7 +250,7 @@ void SparcAsmPrinter::printOperand(const MachineIn else if (MI->getOpcode() == SP::TLS_LDXrr) assert(TF == SPII::MO_TLS_IE_LDX && "Cannot handle target flags on ldx for TLS"); - else if (MI->getOpcode() == SP::XORri) + else if (MI->getOpcode() == SP::XORri || MI->getOpcode() == SP::XORXri) assert((TF == SPII::MO_TLS_LDO_LOX10 || TF == SPII::MO_TLS_LE_LOX10) && "Cannot handle target flags on xor for TLS"); else Index: lib/Target/Sparc/SparcInstr64Bit.td =================================================================== --- lib/Target/Sparc/SparcInstr64Bit.td +++ lib/Target/Sparc/SparcInstr64Bit.td @@ -141,32 +141,36 @@ def : Pat<(i64 imm:$val), let Predicates = [Is64Bit] in { // Register-register instructions. +defm ANDX : F3_12<"and", 0b000001, and, I64Regs, i64, i64imm>; +defm ORX : F3_12<"or", 0b000010, or, I64Regs, i64, i64imm>; +defm XORX : F3_12<"xor", 0b000011, xor, I64Regs, i64, i64imm>; -def : Pat<(and i64:$a, i64:$b), (ANDrr $a, $b)>; -def : Pat<(or i64:$a, i64:$b), (ORrr $a, $b)>; -def : Pat<(xor i64:$a, i64:$b), (XORrr $a, $b)>; +def ANDXNrr : F3_1<2, 0b000101, + (outs I64Regs:$dst), (ins I64Regs:$b, I64Regs:$c), + "andn $b, $c, $dst", + [(set i64:$dst, (and i64:$b, (not i64:$c)))]>; +def ORXNrr : F3_1<2, 0b000110, + (outs I64Regs:$dst), (ins I64Regs:$b, I64Regs:$c), + "orn $b, $c, $dst", + [(set i64:$dst, (or i64:$b, (not i64:$c)))]>; +def XNORXrr : F3_1<2, 0b000111, + (outs I64Regs:$dst), (ins I64Regs:$b, I64Regs:$c), + "xnor $b, $c, $dst", + [(set i64:$dst, (not (xor i64:$b, i64:$c)))]>; -def : Pat<(and i64:$a, (not i64:$b)), (ANDNrr $a, $b)>; -def : Pat<(or i64:$a, (not i64:$b)), (ORNrr $a, $b)>; -def : Pat<(xor i64:$a, (not i64:$b)), (XNORrr $a, $b)>; +defm ADDX : F3_12<"add", 0b000000, add, I64Regs, i64, i64imm>; +defm SUBX : F3_12<"sub", 0b000100, sub, I64Regs, i64, i64imm>; -def : Pat<(add i64:$a, i64:$b), (ADDrr $a, $b)>; -def : Pat<(sub i64:$a, i64:$b), (SUBrr $a, $b)>; - def : Pat<(SPcmpicc i64:$a, i64:$b), (CMPrr $a, $b)>; -def : Pat<(tlsadd i64:$a, i64:$b, tglobaltlsaddr:$sym), - (TLS_ADDrr $a, $b, $sym)>; +def TLS_ADDXrr : F3_1<2, 0b000000, (outs I64Regs:$rd), + (ins I64Regs:$rs1, I64Regs:$rs2, TLSSym:$sym), + "add $rs1, $rs2, $rd, $sym", + [(set i64:$rd, + (tlsadd i64:$rs1, i64:$rs2, tglobaltlsaddr:$sym))]>; // Register-immediate instructions. -def : Pat<(and i64:$a, (i64 simm13:$b)), (ANDri $a, (as_i32imm $b))>; -def : Pat<(or i64:$a, (i64 simm13:$b)), (ORri $a, (as_i32imm $b))>; -def : Pat<(xor i64:$a, (i64 simm13:$b)), (XORri $a, (as_i32imm $b))>; - -def : Pat<(add i64:$a, (i64 simm13:$b)), (ADDri $a, (as_i32imm $b))>; -def : Pat<(sub i64:$a, (i64 simm13:$b)), (SUBri $a, (as_i32imm $b))>; - def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>; def : Pat<(ctpop i64:$src), (POPCrr $src)>; @@ -402,3 +406,38 @@ def : Pat<(SPselectfcc (i64 simm11:$t), i64:$f, im (MOVFCCri (as_i32imm $t), $f, imm:$cond)>; } // Predicates = [Is64Bit] + + +// 64 bit SETHI +let Predicates = [Is64Bit] in { +def SETHIXi : F2_1<0b100, + (outs IntRegs:$rd), (ins i64imm:$imm22), + "sethi $imm22, $rd", + [(set i64:$rd, SETHIimm:$imm22)]>; +} +// Global addresses, constant pool entries +let Predicates = [Is64Bit] in { + +def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>; +def : Pat<(SPlo tglobaladdr:$in), (ORXri (i64 G0), tglobaladdr:$in)>; +def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>; +def : Pat<(SPlo tconstpool:$in), (ORXri (i64 G0), tconstpool:$in)>; + +// GlobalTLS addresses +def : Pat<(SPhi tglobaltlsaddr:$in), (SETHIi tglobaltlsaddr:$in)>; +def : Pat<(SPlo tglobaltlsaddr:$in), (ORXri (i64 G0), tglobaltlsaddr:$in)>; +def : Pat<(add (SPhi tglobaltlsaddr:$in1), (SPlo tglobaltlsaddr:$in2)), + (ADDXri (SETHIXi tglobaltlsaddr:$in1), (tglobaltlsaddr:$in2))>; +def : Pat<(xor (SPhi tglobaltlsaddr:$in1), (SPlo tglobaltlsaddr:$in2)), + (XORXri (SETHIXi tglobaltlsaddr:$in1), (tglobaltlsaddr:$in2))>; + +// Blockaddress +def : Pat<(SPhi tblockaddress:$in), (SETHIi tblockaddress:$in)>; +def : Pat<(SPlo tblockaddress:$in), (ORXri (i64 G0), tblockaddress:$in)>; + +// Add reg, lo. This is used when taking the addr of a global/constpool entry. +def : Pat<(add iPTR:$r, (SPlo tglobaladdr:$in)), (ADDXri $r, tglobaladdr:$in)>; +def : Pat<(add iPTR:$r, (SPlo tconstpool:$in)), (ADDXri $r, tconstpool:$in)>; +def : Pat<(add iPTR:$r, (SPlo tblockaddress:$in)), + (ADDXri $r, tblockaddress:$in)>; +} Index: lib/Target/Sparc/SparcInstrInfo.td =================================================================== --- lib/Target/Sparc/SparcInstrInfo.td +++ lib/Target/Sparc/SparcInstrInfo.td @@ -210,15 +210,16 @@ def FCC_O : FCC_VAL<29>; // Ordered //===----------------------------------------------------------------------===// /// F3_12 multiclass - Define a normal F3_1/F3_2 pattern in one shot. -multiclass F3_12 Op3Val, SDNode OpNode> { +multiclass F3_12 Op3Val, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp> { def rr : F3_1<2, Op3Val, - (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + (outs RC:$dst), (ins RC:$b, RC:$c), !strconcat(OpcStr, " $b, $c, $dst"), - [(set i32:$dst, (OpNode i32:$b, i32:$c))]>; + [(set Ty:$dst, (OpNode Ty:$b, Ty:$c))]>; def ri : F3_2<2, Op3Val, - (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), + (outs RC:$dst), (ins RC:$b, immOp:$c), !strconcat(OpcStr, " $b, $c, $dst"), - [(set i32:$dst, (OpNode i32:$b, (i32 simm13:$c)))]>; + [(set Ty:$dst, (OpNode Ty:$b, (Ty simm13:$c)))]>; } /// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no @@ -464,7 +465,7 @@ let rd = 0, imm22 = 0 in def NOP : F2_1<0b100, (outs), (ins), "nop", []>; // Section B.11 - Logical Instructions, p. 106 -defm AND : F3_12<"and", 0b000001, and>; +defm AND : F3_12<"and", 0b000001, and, IntRegs, i32, i32imm>; def ANDNrr : F3_1<2, 0b000101, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), @@ -474,7 +475,7 @@ def ANDNri : F3_2<2, 0b000101, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), "andn $b, $c, $dst", []>; -defm OR : F3_12<"or", 0b000010, or>; +defm OR : F3_12<"or", 0b000010, or, IntRegs, i32, i32imm>; def ORNrr : F3_1<2, 0b000110, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), @@ -483,7 +484,7 @@ def ORNrr : F3_1<2, 0b000110, def ORNri : F3_2<2, 0b000110, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), "orn $b, $c, $dst", []>; -defm XOR : F3_12<"xor", 0b000011, xor>; +defm XOR : F3_12<"xor", 0b000011, xor, IntRegs, i32, i32imm>; def XNORrr : F3_1<2, 0b000111, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), @@ -494,12 +495,12 @@ def XNORri : F3_2<2, 0b000111, "xnor $b, $c, $dst", []>; // Section B.12 - Shift Instructions, p. 107 -defm SLL : F3_12<"sll", 0b100101, shl>; -defm SRL : F3_12<"srl", 0b100110, srl>; -defm SRA : F3_12<"sra", 0b100111, sra>; +defm SLL : F3_12<"sll", 0b100101, shl, IntRegs, i32, i32imm>; +defm SRL : F3_12<"srl", 0b100110, srl, IntRegs, i32, i32imm>; +defm SRA : F3_12<"sra", 0b100111, sra, IntRegs, i32, i32imm>; // Section B.13 - Add Instructions, p. 108 -defm ADD : F3_12<"add", 0b000000, add>; +defm ADD : F3_12<"add", 0b000000, add, IntRegs, i32, i32imm>; // "LEA" forms of add (patterns to make tblgen happy) let Predicates = [Is32Bit] in @@ -509,18 +510,18 @@ let Predicates = [Is32Bit] in [(set iPTR:$dst, ADDRri:$addr)]>; let Defs = [ICC] in - defm ADDCC : F3_12<"addcc", 0b010000, addc>; + defm ADDCC : F3_12<"addcc", 0b010000, addc, IntRegs, i32, i32imm>; let Uses = [ICC], Defs = [ICC] in - defm ADDX : F3_12<"addxcc", 0b011000, adde>; + defm ADDE : F3_12<"addxcc", 0b011000, adde, IntRegs, i32, i32imm>; // Section B.15 - Subtract Instructions, p. 110 -defm SUB : F3_12 <"sub" , 0b000100, sub>; +defm SUB : F3_12 <"sub" , 0b000100, sub, IntRegs, i32, i32imm>; let Uses = [ICC], Defs = [ICC] in - defm SUBX : F3_12 <"subxcc" , 0b011100, sube>; + defm SUBE : F3_12 <"subxcc" , 0b011100, sube, IntRegs, i32, i32imm>; let Defs = [ICC] in - defm SUBCC : F3_12 <"subcc", 0b010100, subc>; + defm SUBCC : F3_12 <"subcc", 0b010100, subc, IntRegs, i32, i32imm>; let Defs = [ICC], rd = 0 in { def CMPrr : F3_1<2, 0b010100, @@ -542,7 +543,7 @@ let Uses = [ICC], Defs = [ICC] in // Section B.18 - Multiply Instructions, p. 113 let Defs = [Y] in { defm UMUL : F3_12np<"umul", 0b001010>; - defm SMUL : F3_12 <"smul", 0b001011, mul>; + defm SMUL : F3_12 <"smul", 0b001011, mul, IntRegs, i32, i32imm>; } // Section B.19 - Divide Instructions, p. 115 @@ -987,6 +988,8 @@ def : Pat<(i32 imm:$val), // Global addresses, constant pool entries +let Predicates = [Is32Bit] in { + def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>; def : Pat<(SPlo tglobaladdr:$in), (ORri (i32 G0), tglobaladdr:$in)>; def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>; @@ -1009,6 +1012,7 @@ def : Pat<(add iPTR:$r, (SPlo tglobaladdr:$in)), ( def : Pat<(add iPTR:$r, (SPlo tconstpool:$in)), (ADDri $r, tconstpool:$in)>; def : Pat<(add iPTR:$r, (SPlo tblockaddress:$in)), (ADDri $r, tblockaddress:$in)>; +} // Calls: def : Pat<(call tglobaladdr:$dst), Index: test/CodeGen/SPARC/64spill.ll =================================================================== --- test/CodeGen/SPARC/64spill.ll +++ test/CodeGen/SPARC/64spill.ll @@ -0,0 +1,116 @@ +; RUN: llc < %s -march=sparcv9 | FileCheck %s + +target datalayout = "E-i64:64-n32:64-S128" +target triple = "sparc64-sun-sparc" + +; CHECK-LABEL: test_and_spill +; CHECK: and %i0, %i1, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_and_spill(i64 %a, i64 %b) { +entry: + %r0 = and i64 %a, %b + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_or_spill +; CHECK: or %i0, %i1, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_or_spill(i64 %a, i64 %b) { +entry: + %r0 = or i64 %a, %b + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_xor_spill +; CHECK: xor %i0, %i1, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_xor_spill(i64 %a, i64 %b) { +entry: + %r0 = xor i64 %a, %b + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + + +; CHECK-LABEL: test_add_spill +; CHECK: add %i0, %i1, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_add_spill(i64 %a, i64 %b) { +entry: + %r0 = add i64 %a, %b + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_sub_spill +; CHECK: sub %i0, %i1, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_sub_spill(i64 %a, i64 %b) { +entry: + %r0 = sub i64 %a, %b + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_andi_spill +; CHECK: and %i0, 1729, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_andi_spill(i64 %a) { +entry: + %r0 = and i64 %a, 1729 + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_ori_spill +; CHECK: or %i0, 1729, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_ori_spill(i64 %a) { +entry: + %r0 = or i64 %a, 1729 + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_xori_spill +; CHECK: xor %i0, 1729, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_xori_spill(i64 %a) { +entry: + %r0 = xor i64 %a, 1729 + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_addi_spill +; CHECK: add %i0, 1729, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_addi_spill(i64 %a) { +entry: + %r0 = add i64 %a, 1729 + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_subi_spill +; CHECK: add %i0, -1729, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_subi_spill(i64 %a) { +entry: + %r0 = sub i64 %a, 1729 + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} +