Pull in r211627 from upstream llvm trunk (by Bill Schmidt): [PPC64] Fix PR20071 (fctiduz generated for targets lacking that instruction) PR20071 identifies a problem in PowerPC's fast-isel implementation for floating-point conversion to integer. The fctiduz instruction was added in Power ISA 2.06 (i.e., Power7 and later). However, this instruction is being generated regardless of which 64-bit PowerPC target is selected. The intent is for fast-isel to punt to DAG selection when this instruction is not available. This patch implements that change. For testing purposes, the existing fast-isel-conversion.ll test adds a RUN line for -mcpu=970 and tests for the expected code generation. Additionally, the existing test fast-isel-conversion-p5.ll was found to be incorrectly expecting the unavailable instruction to be generated. I've removed these test variants since we have adequate coverage in fast-isel-conversion.ll. This is needed to compile clang with debug+asserts on older powerpc64 and ppc970 targets. Introduced here: http://svnweb.freebsd.org/changeset/base/267981 Index: lib/Target/PowerPC/PPCFastISel.cpp =================================================================== --- lib/Target/PowerPC/PPCFastISel.cpp (revision 106) +++ lib/Target/PowerPC/PPCFastISel.cpp (revision 107) @@ -1026,6 +1026,10 @@ bool PPCFastISel::SelectFPToI(const Instruction *I if (DstVT != MVT::i32 && DstVT != MVT::i64) return false; + // If we don't have FCTIDUZ and we need it, punt to SelectionDAG. + if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget.hasFPCVT()) + return false; + Value *Src = I->getOperand(0); Type *SrcTy = Src->getType(); if (!isTypeLegal(SrcTy, SrcVT)) Index: test/CodeGen/PowerPC/fast-isel-conversion-p5.ll =================================================================== --- test/CodeGen/PowerPC/fast-isel-conversion-p5.ll (revision 106) +++ test/CodeGen/PowerPC/fast-isel-conversion-p5.ll (revision 107) @@ -116,18 +116,6 @@ entry: ret void } -define void @fptoui_float_i64(float %a) nounwind ssp { -entry: -; ELF64: fptoui_float_i64 - %b.addr = alloca i64, align 4 - %conv = fptoui float %a to i64 -; ELF64: fctiduz -; ELF64: stfd -; ELF64: ld - store i64 %conv, i64* %b.addr, align 4 - ret void -} - define void @fptoui_double_i32(double %a) nounwind ssp { entry: ; ELF64: fptoui_double_i32 @@ -140,14 +128,3 @@ entry: ret void } -define void @fptoui_double_i64(double %a) nounwind ssp { -entry: -; ELF64: fptoui_double_i64 - %b.addr = alloca i64, align 8 - %conv = fptoui double %a to i64 -; ELF64: fctiduz -; ELF64: stfd -; ELF64: ld - store i64 %conv, i64* %b.addr, align 8 - ret void -} Index: test/CodeGen/PowerPC/fast-isel-conversion.ll =================================================================== --- test/CodeGen/PowerPC/fast-isel-conversion.ll (revision 106) +++ test/CodeGen/PowerPC/fast-isel-conversion.ll (revision 107) @@ -1,15 +1,24 @@ ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 | FileCheck %s --check-prefix=PPC970 +;; Tests for 970 don't use -fast-isel-abort because we intentionally punt +;; to SelectionDAG in some cases. + ; Test sitofp define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp { entry: ; ELF64: sitofp_single_i64 +; PPC970: sitofp_single_i64 %b.addr = alloca float, align 4 %conv = sitofp i64 %a to float ; ELF64: std ; ELF64: lfd ; ELF64: fcfids +; PPC970: std +; PPC970: lfd +; PPC970: fcfid +; PPC970: frsp store float %conv, float* %b.addr, align 4 ret void } @@ -17,11 +26,16 @@ entry: define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp { entry: ; ELF64: sitofp_single_i32 +; PPC970: sitofp_single_i32 %b.addr = alloca float, align 4 %conv = sitofp i32 %a to float ; ELF64: std ; ELF64: lfiwax ; ELF64: fcfids +; PPC970: std +; PPC970: lfd +; PPC970: fcfid +; PPC970: frsp store float %conv, float* %b.addr, align 4 ret void } @@ -29,6 +43,7 @@ entry: define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp { entry: ; ELF64: sitofp_single_i16 +; PPC970: sitofp_single_i16 %b.addr = alloca float, align 4 %conv = sitofp i16 %a to float ; ELF64: extsh @@ -35,6 +50,11 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfids +; PPC970: extsh +; PPC970: std +; PPC970: lfd +; PPC970: fcfid +; PPC970: frsp store float %conv, float* %b.addr, align 4 ret void } @@ -42,6 +62,7 @@ entry: define void @sitofp_single_i8(i8 %a) nounwind ssp { entry: ; ELF64: sitofp_single_i8 +; PPC970: sitofp_single_i8 %b.addr = alloca float, align 4 %conv = sitofp i8 %a to float ; ELF64: extsb @@ -48,6 +69,11 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfids +; PPC970: extsb +; PPC970: std +; PPC970: lfd +; PPC970: fcfid +; PPC970: frsp store float %conv, float* %b.addr, align 4 ret void } @@ -55,11 +81,15 @@ entry: define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp { entry: ; ELF64: sitofp_double_i32 +; PPC970: sitofp_double_i32 %b.addr = alloca double, align 8 %conv = sitofp i32 %a to double ; ELF64: std ; ELF64: lfiwax ; ELF64: fcfid +; PPC970: std +; PPC970: lfd +; PPC970: fcfid store double %conv, double* %b.addr, align 8 ret void } @@ -67,11 +97,15 @@ entry: define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp { entry: ; ELF64: sitofp_double_i64 +; PPC970: sitofp_double_i64 %b.addr = alloca double, align 8 %conv = sitofp i64 %a to double ; ELF64: std ; ELF64: lfd ; ELF64: fcfid +; PPC970: std +; PPC970: lfd +; PPC970: fcfid store double %conv, double* %b.addr, align 8 ret void } @@ -79,6 +113,7 @@ entry: define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp { entry: ; ELF64: sitofp_double_i16 +; PPC970: sitofp_double_i16 %b.addr = alloca double, align 8 %conv = sitofp i16 %a to double ; ELF64: extsh @@ -85,6 +120,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfid +; PPC970: extsh +; PPC970: std +; PPC970: lfd +; PPC970: fcfid store double %conv, double* %b.addr, align 8 ret void } @@ -92,6 +131,7 @@ entry: define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp { entry: ; ELF64: sitofp_double_i8 +; PPC970: sitofp_double_i8 %b.addr = alloca double, align 8 %conv = sitofp i8 %a to double ; ELF64: extsb @@ -98,6 +138,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfid +; PPC970: extsb +; PPC970: std +; PPC970: lfd +; PPC970: fcfid store double %conv, double* %b.addr, align 8 ret void } @@ -107,11 +151,13 @@ entry: define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp { entry: ; ELF64: uitofp_single_i64 +; PPC970: uitofp_single_i64 %b.addr = alloca float, align 4 %conv = uitofp i64 %a to float ; ELF64: std ; ELF64: lfd ; ELF64: fcfidus +; PPC970-NOT: fcfidus store float %conv, float* %b.addr, align 4 ret void } @@ -119,11 +165,14 @@ entry: define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp { entry: ; ELF64: uitofp_single_i32 +; PPC970: uitofp_single_i32 %b.addr = alloca float, align 4 %conv = uitofp i32 %a to float ; ELF64: std ; ELF64: lfiwzx ; ELF64: fcfidus +; PPC970-NOT: lfiwzx +; PPC970-NOT: fcfidus store float %conv, float* %b.addr, align 4 ret void } @@ -131,6 +180,7 @@ entry: define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp { entry: ; ELF64: uitofp_single_i16 +; PPC970: uitofp_single_i16 %b.addr = alloca float, align 4 %conv = uitofp i16 %a to float ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48 @@ -137,6 +187,11 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfidus +; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31 +; PPC970: std +; PPC970: lfd +; PPC970: fcfid +; PPC970: frsp store float %conv, float* %b.addr, align 4 ret void } @@ -144,6 +199,7 @@ entry: define void @uitofp_single_i8(i8 %a) nounwind ssp { entry: ; ELF64: uitofp_single_i8 +; PPC970: uitofp_single_i8 %b.addr = alloca float, align 4 %conv = uitofp i8 %a to float ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56 @@ -150,6 +206,11 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfidus +; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31 +; PPC970: std +; PPC970: lfd +; PPC970: fcfid +; PPC970: frsp store float %conv, float* %b.addr, align 4 ret void } @@ -157,11 +218,13 @@ entry: define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp { entry: ; ELF64: uitofp_double_i64 +; PPC970: uitofp_double_i64 %b.addr = alloca double, align 8 %conv = uitofp i64 %a to double ; ELF64: std ; ELF64: lfd ; ELF64: fcfidu +; PPC970-NOT: fcfidu store double %conv, double* %b.addr, align 8 ret void } @@ -169,11 +232,14 @@ entry: define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp { entry: ; ELF64: uitofp_double_i32 +; PPC970: uitofp_double_i32 %b.addr = alloca double, align 8 %conv = uitofp i32 %a to double ; ELF64: std ; ELF64: lfiwzx ; ELF64: fcfidu +; PPC970-NOT: lfiwzx +; PPC970-NOT: fcfidu store double %conv, double* %b.addr, align 8 ret void } @@ -181,6 +247,7 @@ entry: define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp { entry: ; ELF64: uitofp_double_i16 +; PPC970: uitofp_double_i16 %b.addr = alloca double, align 8 %conv = uitofp i16 %a to double ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48 @@ -187,6 +254,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfidu +; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31 +; PPC970: std +; PPC970: lfd +; PPC970: fcfid store double %conv, double* %b.addr, align 8 ret void } @@ -194,6 +265,7 @@ entry: define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp { entry: ; ELF64: uitofp_double_i8 +; PPC970: uitofp_double_i8 %b.addr = alloca double, align 8 %conv = uitofp i8 %a to double ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56 @@ -200,6 +272,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfidu +; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31 +; PPC970: std +; PPC970: lfd +; PPC970: fcfid store double %conv, double* %b.addr, align 8 ret void } @@ -209,11 +285,15 @@ entry: define void @fptosi_float_i32(float %a) nounwind ssp { entry: ; ELF64: fptosi_float_i32 +; PPC970: fptosi_float_i32 %b.addr = alloca i32, align 4 %conv = fptosi float %a to i32 ; ELF64: fctiwz ; ELF64: stfd ; ELF64: lwa +; PPC970: fctiwz +; PPC970: stfd +; PPC970: lwa store i32 %conv, i32* %b.addr, align 4 ret void } @@ -221,11 +301,15 @@ entry: define void @fptosi_float_i64(float %a) nounwind ssp { entry: ; ELF64: fptosi_float_i64 +; PPC970: fptosi_float_i64 %b.addr = alloca i64, align 4 %conv = fptosi float %a to i64 ; ELF64: fctidz ; ELF64: stfd ; ELF64: ld +; PPC970: fctidz +; PPC970: stfd +; PPC970: ld store i64 %conv, i64* %b.addr, align 4 ret void } @@ -233,11 +317,15 @@ entry: define void @fptosi_double_i32(double %a) nounwind ssp { entry: ; ELF64: fptosi_double_i32 +; PPC970: fptosi_double_i32 %b.addr = alloca i32, align 8 %conv = fptosi double %a to i32 ; ELF64: fctiwz ; ELF64: stfd ; ELF64: lwa +; PPC970: fctiwz +; PPC970: stfd +; PPC970: lwa store i32 %conv, i32* %b.addr, align 8 ret void } @@ -245,11 +333,15 @@ entry: define void @fptosi_double_i64(double %a) nounwind ssp { entry: ; ELF64: fptosi_double_i64 +; PPC970: fptosi_double_i64 %b.addr = alloca i64, align 8 %conv = fptosi double %a to i64 ; ELF64: fctidz ; ELF64: stfd ; ELF64: ld +; PPC970: fctidz +; PPC970: stfd +; PPC970: ld store i64 %conv, i64* %b.addr, align 8 ret void } @@ -259,11 +351,15 @@ entry: define void @fptoui_float_i32(float %a) nounwind ssp { entry: ; ELF64: fptoui_float_i32 +; PPC970: fptoui_float_i32 %b.addr = alloca i32, align 4 %conv = fptoui float %a to i32 ; ELF64: fctiwuz ; ELF64: stfd ; ELF64: lwz +; PPC970: fctidz +; PPC970: stfd +; PPC970: lwz store i32 %conv, i32* %b.addr, align 4 ret void } @@ -271,11 +367,13 @@ entry: define void @fptoui_float_i64(float %a) nounwind ssp { entry: ; ELF64: fptoui_float_i64 +; PPC970: fptoui_float_i64 %b.addr = alloca i64, align 4 %conv = fptoui float %a to i64 ; ELF64: fctiduz ; ELF64: stfd ; ELF64: ld +; PPC970-NOT: fctiduz store i64 %conv, i64* %b.addr, align 4 ret void } @@ -283,11 +381,15 @@ entry: define void @fptoui_double_i32(double %a) nounwind ssp { entry: ; ELF64: fptoui_double_i32 +; PPC970: fptoui_double_i32 %b.addr = alloca i32, align 8 %conv = fptoui double %a to i32 ; ELF64: fctiwuz ; ELF64: stfd ; ELF64: lwz +; PPC970: fctidz +; PPC970: stfd +; PPC970: lwz store i32 %conv, i32* %b.addr, align 8 ret void } @@ -295,11 +397,13 @@ entry: define void @fptoui_double_i64(double %a) nounwind ssp { entry: ; ELF64: fptoui_double_i64 +; PPC970: fptoui_double_i64 %b.addr = alloca i64, align 8 %conv = fptoui double %a to i64 ; ELF64: fctiduz ; ELF64: stfd ; ELF64: ld +; PPC970-NOT: fctiduz store i64 %conv, i64* %b.addr, align 8 ret void }