contrib/llvm/patches/patch-r267981-llvm-r211435-fix-ppc-fctiduz.diff

   1 Pull in r211627 from upstream llvm trunk (by Bill Schmidt):
   2
   3   [PPC64] Fix PR20071 (fctiduz generated for targets lacking that
   4   instruction)
   5
   6   PR20071 identifies a problem in PowerPC's fast-isel implementation
   7   for floating-point conversion to integer.  The fctiduz instruction
   8   was added in Power ISA 2.06 (i.e., Power7 and later).  However, this
   9   instruction is being generated regardless of which 64-bit PowerPC
  10   target is selected.
  11
  12   The intent is for fast-isel to punt to DAG selection when this
  13   instruction is not available.  This patch implements that change.
  14   For testing purposes, the existing fast-isel-conversion.ll test adds
  15   a RUN line for -mcpu=970 and tests for the expected code generation.
  16   Additionally, the existing test fast-isel-conversion-p5.ll was found
  17   to be incorrectly expecting the unavailable instruction to be
  18   generated.  I've removed these test variants since we have adequate
  19   coverage in fast-isel-conversion.ll.
  20
  21 This is needed to compile clang with debug+asserts on older powerpc64
  22 and ppc970 targets.
  23
  24 Introduced here: http://svnweb.freebsd.org/changeset/base/267981
  25
  26 Index: lib/Target/PowerPC/PPCFastISel.cpp
  27 ===================================================================
  28 --- lib/Target/PowerPC/PPCFastISel.cpp  (revision 106)
  29 +++ lib/Target/PowerPC/PPCFastISel.cpp  (revision 107)
  30 @@ -1026,6 +1026,10 @@ bool PPCFastISel::SelectFPToI(const Instruction *I
  31    if (DstVT != MVT::i32 && DstVT != MVT::i64)
  32      return false;
  33
  34 +  // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
  35 +  if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget.hasFPCVT())
  36 +    return false;
  37 +
  38    Value *Src = I->getOperand(0);
  39    Type *SrcTy = Src->getType();
  40    if (!isTypeLegal(SrcTy, SrcVT))
  41 Index: test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
  42 ===================================================================
  43 --- test/CodeGen/PowerPC/fast-isel-conversion-p5.ll     (revision 106)
  44 +++ test/CodeGen/PowerPC/fast-isel-conversion-p5.ll     (revision 107)
  45 @@ -116,18 +116,6 @@ entry:
  46    ret void
  47  }
  48
  49 -define void @fptoui_float_i64(float %a) nounwind ssp {
  50 -entry:
  51 -; ELF64: fptoui_float_i64
  52 -  %b.addr = alloca i64, align 4
  53 -  %conv = fptoui float %a to i64
  54 -; ELF64: fctiduz
  55 -; ELF64: stfd
  56 -; ELF64: ld
  57 -  store i64 %conv, i64* %b.addr, align 4
  58 -  ret void
  59 -}
  60 -
  61  define void @fptoui_double_i32(double %a) nounwind ssp {
  62  entry:
  63  ; ELF64: fptoui_double_i32
  64 @@ -140,14 +128,3 @@ entry:
  65    ret void
  66  }
  67
  68 -define void @fptoui_double_i64(double %a) nounwind ssp {
  69 -entry:
  70 -; ELF64: fptoui_double_i64
  71 -  %b.addr = alloca i64, align 8
  72 -  %conv = fptoui double %a to i64
  73 -; ELF64: fctiduz
  74 -; ELF64: stfd
  75 -; ELF64: ld
  76 -  store i64 %conv, i64* %b.addr, align 8
  77 -  ret void
  78 -}
  79 Index: test/CodeGen/PowerPC/fast-isel-conversion.ll
  80 ===================================================================
  81 --- test/CodeGen/PowerPC/fast-isel-conversion.ll        (revision 106)
  82 +++ test/CodeGen/PowerPC/fast-isel-conversion.ll        (revision 107)
  83 @@ -1,15 +1,24 @@
  84  ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
  85 +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 | FileCheck %s --check-prefix=PPC970
  86
  87 +;; Tests for 970 don't use -fast-isel-abort because we intentionally punt
  88 +;; to SelectionDAG in some cases.
  89 +
  90  ; Test sitofp
  91
  92  define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp {
  93  entry:
  94  ; ELF64: sitofp_single_i64
  95 +; PPC970: sitofp_single_i64
  96    %b.addr = alloca float, align 4
  97    %conv = sitofp i64 %a to float
  98  ; ELF64: std
  99  ; ELF64: lfd
 100  ; ELF64: fcfids
 101 +; PPC970: std
 102 +; PPC970: lfd
 103 +; PPC970: fcfid
 104 +; PPC970: frsp
 105    store float %conv, float* %b.addr, align 4
 106    ret void
 107  }
 108 @@ -17,11 +26,16 @@ entry:
 109  define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
 110  entry:
 111  ; ELF64: sitofp_single_i32
 112 +; PPC970: sitofp_single_i32
 113    %b.addr = alloca float, align 4
 114    %conv = sitofp i32 %a to float
 115  ; ELF64: std
 116  ; ELF64: lfiwax
 117  ; ELF64: fcfids
 118 +; PPC970: std
 119 +; PPC970: lfd
 120 +; PPC970: fcfid
 121 +; PPC970: frsp
 122    store float %conv, float* %b.addr, align 4
 123    ret void
 124  }
 125 @@ -29,6 +43,7 @@ entry:
 126  define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
 127  entry:
 128  ; ELF64: sitofp_single_i16
 129 +; PPC970: sitofp_single_i16
 130    %b.addr = alloca float, align 4
 131    %conv = sitofp i16 %a to float
 132  ; ELF64: extsh
 133 @@ -35,6 +50,11 @@ entry:
 134  ; ELF64: std
 135  ; ELF64: lfd
 136  ; ELF64: fcfids
 137 +; PPC970: extsh
 138 +; PPC970: std
 139 +; PPC970: lfd
 140 +; PPC970: fcfid
 141 +; PPC970: frsp
 142    store float %conv, float* %b.addr, align 4
 143    ret void
 144  }
 145 @@ -42,6 +62,7 @@ entry:
 146  define void @sitofp_single_i8(i8 %a) nounwind ssp {
 147  entry:
 148  ; ELF64: sitofp_single_i8
 149 +; PPC970: sitofp_single_i8
 150    %b.addr = alloca float, align 4
 151    %conv = sitofp i8 %a to float
 152  ; ELF64: extsb
 153 @@ -48,6 +69,11 @@ entry:
 154  ; ELF64: std
 155  ; ELF64: lfd
 156  ; ELF64: fcfids
 157 +; PPC970: extsb
 158 +; PPC970: std
 159 +; PPC970: lfd
 160 +; PPC970: fcfid
 161 +; PPC970: frsp
 162    store float %conv, float* %b.addr, align 4
 163    ret void
 164  }
 165 @@ -55,11 +81,15 @@ entry:
 166  define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
 167  entry:
 168  ; ELF64: sitofp_double_i32
 169 +; PPC970: sitofp_double_i32
 170    %b.addr = alloca double, align 8
 171    %conv = sitofp i32 %a to double
 172  ; ELF64: std
 173  ; ELF64: lfiwax
 174  ; ELF64: fcfid
 175 +; PPC970: std
 176 +; PPC970: lfd
 177 +; PPC970: fcfid
 178    store double %conv, double* %b.addr, align 8
 179    ret void
 180  }
 181 @@ -67,11 +97,15 @@ entry:
 182  define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
 183  entry:
 184  ; ELF64: sitofp_double_i64
 185 +; PPC970: sitofp_double_i64
 186    %b.addr = alloca double, align 8
 187    %conv = sitofp i64 %a to double
 188  ; ELF64: std
 189  ; ELF64: lfd
 190  ; ELF64: fcfid
 191 +; PPC970: std
 192 +; PPC970: lfd
 193 +; PPC970: fcfid
 194    store double %conv, double* %b.addr, align 8
 195    ret void
 196  }
 197 @@ -79,6 +113,7 @@ entry:
 198  define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
 199  entry:
 200  ; ELF64: sitofp_double_i16
 201 +; PPC970: sitofp_double_i16
 202    %b.addr = alloca double, align 8
 203    %conv = sitofp i16 %a to double
 204  ; ELF64: extsh
 205 @@ -85,6 +120,10 @@ entry:
 206  ; ELF64: std
 207  ; ELF64: lfd
 208  ; ELF64: fcfid
 209 +; PPC970: extsh
 210 +; PPC970: std
 211 +; PPC970: lfd
 212 +; PPC970: fcfid
 213    store double %conv, double* %b.addr, align 8
 214    ret void
 215  }
 216 @@ -92,6 +131,7 @@ entry:
 217  define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
 218  entry:
 219  ; ELF64: sitofp_double_i8
 220 +; PPC970: sitofp_double_i8
 221    %b.addr = alloca double, align 8
 222    %conv = sitofp i8 %a to double
 223  ; ELF64: extsb
 224 @@ -98,6 +138,10 @@ entry:
 225  ; ELF64: std
 226  ; ELF64: lfd
 227  ; ELF64: fcfid
 228 +; PPC970: extsb
 229 +; PPC970: std
 230 +; PPC970: lfd
 231 +; PPC970: fcfid
 232    store double %conv, double* %b.addr, align 8
 233    ret void
 234  }
 235 @@ -107,11 +151,13 @@ entry:
 236  define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp {
 237  entry:
 238  ; ELF64: uitofp_single_i64
 239 +; PPC970: uitofp_single_i64
 240    %b.addr = alloca float, align 4
 241    %conv = uitofp i64 %a to float
 242  ; ELF64: std
 243  ; ELF64: lfd
 244  ; ELF64: fcfidus
 245 +; PPC970-NOT: fcfidus
 246    store float %conv, float* %b.addr, align 4
 247    ret void
 248  }
 249 @@ -119,11 +165,14 @@ entry:
 250  define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
 251  entry:
 252  ; ELF64: uitofp_single_i32
 253 +; PPC970: uitofp_single_i32
 254    %b.addr = alloca float, align 4
 255    %conv = uitofp i32 %a to float
 256  ; ELF64: std
 257  ; ELF64: lfiwzx
 258  ; ELF64: fcfidus
 259 +; PPC970-NOT: lfiwzx
 260 +; PPC970-NOT: fcfidus
 261    store float %conv, float* %b.addr, align 4
 262    ret void
 263  }
 264 @@ -131,6 +180,7 @@ entry:
 265  define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
 266  entry:
 267  ; ELF64: uitofp_single_i16
 268 +; PPC970: uitofp_single_i16
 269    %b.addr = alloca float, align 4
 270    %conv = uitofp i16 %a to float
 271  ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
 272 @@ -137,6 +187,11 @@ entry:
 273  ; ELF64: std
 274  ; ELF64: lfd
 275  ; ELF64: fcfidus
 276 +; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
 277 +; PPC970: std
 278 +; PPC970: lfd
 279 +; PPC970: fcfid
 280 +; PPC970: frsp
 281    store float %conv, float* %b.addr, align 4
 282    ret void
 283  }
 284 @@ -144,6 +199,7 @@ entry:
 285  define void @uitofp_single_i8(i8 %a) nounwind ssp {
 286  entry:
 287  ; ELF64: uitofp_single_i8
 288 +; PPC970: uitofp_single_i8
 289    %b.addr = alloca float, align 4
 290    %conv = uitofp i8 %a to float
 291  ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
 292 @@ -150,6 +206,11 @@ entry:
 293  ; ELF64: std
 294  ; ELF64: lfd
 295  ; ELF64: fcfidus
 296 +; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
 297 +; PPC970: std
 298 +; PPC970: lfd
 299 +; PPC970: fcfid
 300 +; PPC970: frsp
 301    store float %conv, float* %b.addr, align 4
 302    ret void
 303  }
 304 @@ -157,11 +218,13 @@ entry:
 305  define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp {
 306  entry:
 307  ; ELF64: uitofp_double_i64
 308 +; PPC970: uitofp_double_i64
 309    %b.addr = alloca double, align 8
 310    %conv = uitofp i64 %a to double
 311  ; ELF64: std
 312  ; ELF64: lfd
 313  ; ELF64: fcfidu
 314 +; PPC970-NOT: fcfidu
 315    store double %conv, double* %b.addr, align 8
 316    ret void
 317  }
 318 @@ -169,11 +232,14 @@ entry:
 319  define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
 320  entry:
 321  ; ELF64: uitofp_double_i32
 322 +; PPC970: uitofp_double_i32
 323    %b.addr = alloca double, align 8
 324    %conv = uitofp i32 %a to double
 325  ; ELF64: std
 326  ; ELF64: lfiwzx
 327  ; ELF64: fcfidu
 328 +; PPC970-NOT: lfiwzx
 329 +; PPC970-NOT: fcfidu
 330    store double %conv, double* %b.addr, align 8
 331    ret void
 332  }
 333 @@ -181,6 +247,7 @@ entry:
 334  define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
 335  entry:
 336  ; ELF64: uitofp_double_i16
 337 +; PPC970: uitofp_double_i16
 338    %b.addr = alloca double, align 8
 339    %conv = uitofp i16 %a to double
 340  ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
 341 @@ -187,6 +254,10 @@ entry:
 342  ; ELF64: std
 343  ; ELF64: lfd
 344  ; ELF64: fcfidu
 345 +; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
 346 +; PPC970: std
 347 +; PPC970: lfd
 348 +; PPC970: fcfid
 349    store double %conv, double* %b.addr, align 8
 350    ret void
 351  }
 352 @@ -194,6 +265,7 @@ entry:
 353  define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
 354  entry:
 355  ; ELF64: uitofp_double_i8
 356 +; PPC970: uitofp_double_i8
 357    %b.addr = alloca double, align 8
 358    %conv = uitofp i8 %a to double
 359  ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
 360 @@ -200,6 +272,10 @@ entry:
 361  ; ELF64: std
 362  ; ELF64: lfd
 363  ; ELF64: fcfidu
 364 +; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
 365 +; PPC970: std
 366 +; PPC970: lfd
 367 +; PPC970: fcfid
 368    store double %conv, double* %b.addr, align 8
 369    ret void
 370  }
 371 @@ -209,11 +285,15 @@ entry:
 372  define void @fptosi_float_i32(float %a) nounwind ssp {
 373  entry:
 374  ; ELF64: fptosi_float_i32
 375 +; PPC970: fptosi_float_i32
 376    %b.addr = alloca i32, align 4
 377    %conv = fptosi float %a to i32
 378  ; ELF64: fctiwz
 379  ; ELF64: stfd
 380  ; ELF64: lwa
 381 +; PPC970: fctiwz
 382 +; PPC970: stfd
 383 +; PPC970: lwa
 384    store i32 %conv, i32* %b.addr, align 4
 385    ret void
 386  }
 387 @@ -221,11 +301,15 @@ entry:
 388  define void @fptosi_float_i64(float %a) nounwind ssp {
 389  entry:
 390  ; ELF64: fptosi_float_i64
 391 +; PPC970: fptosi_float_i64
 392    %b.addr = alloca i64, align 4
 393    %conv = fptosi float %a to i64
 394  ; ELF64: fctidz
 395  ; ELF64: stfd
 396  ; ELF64: ld
 397 +; PPC970: fctidz
 398 +; PPC970: stfd
 399 +; PPC970: ld
 400    store i64 %conv, i64* %b.addr, align 4
 401    ret void
 402  }
 403 @@ -233,11 +317,15 @@ entry:
 404  define void @fptosi_double_i32(double %a) nounwind ssp {
 405  entry:
 406  ; ELF64: fptosi_double_i32
 407 +; PPC970: fptosi_double_i32
 408    %b.addr = alloca i32, align 8
 409    %conv = fptosi double %a to i32
 410  ; ELF64: fctiwz
 411  ; ELF64: stfd
 412  ; ELF64: lwa
 413 +; PPC970: fctiwz
 414 +; PPC970: stfd
 415 +; PPC970: lwa
 416    store i32 %conv, i32* %b.addr, align 8
 417    ret void
 418  }
 419 @@ -245,11 +333,15 @@ entry:
 420  define void @fptosi_double_i64(double %a) nounwind ssp {
 421  entry:
 422  ; ELF64: fptosi_double_i64
 423 +; PPC970: fptosi_double_i64
 424    %b.addr = alloca i64, align 8
 425    %conv = fptosi double %a to i64
 426  ; ELF64: fctidz
 427  ; ELF64: stfd
 428  ; ELF64: ld
 429 +; PPC970: fctidz
 430 +; PPC970: stfd
 431 +; PPC970: ld
 432    store i64 %conv, i64* %b.addr, align 8
 433    ret void
 434  }
 435 @@ -259,11 +351,15 @@ entry:
 436  define void @fptoui_float_i32(float %a) nounwind ssp {
 437  entry:
 438  ; ELF64: fptoui_float_i32
 439 +; PPC970: fptoui_float_i32
 440    %b.addr = alloca i32, align 4
 441    %conv = fptoui float %a to i32
 442  ; ELF64: fctiwuz
 443  ; ELF64: stfd
 444  ; ELF64: lwz
 445 +; PPC970: fctidz
 446 +; PPC970: stfd
 447 +; PPC970: lwz
 448    store i32 %conv, i32* %b.addr, align 4
 449    ret void
 450  }
 451 @@ -271,11 +367,13 @@ entry:
 452  define void @fptoui_float_i64(float %a) nounwind ssp {
 453  entry:
 454  ; ELF64: fptoui_float_i64
 455 +; PPC970: fptoui_float_i64
 456    %b.addr = alloca i64, align 4
 457    %conv = fptoui float %a to i64
 458  ; ELF64: fctiduz
 459  ; ELF64: stfd
 460  ; ELF64: ld
 461 +; PPC970-NOT: fctiduz
 462    store i64 %conv, i64* %b.addr, align 4
 463    ret void
 464  }
 465 @@ -283,11 +381,15 @@ entry:
 466  define void @fptoui_double_i32(double %a) nounwind ssp {
 467  entry:
 468  ; ELF64: fptoui_double_i32
 469 +; PPC970: fptoui_double_i32
 470    %b.addr = alloca i32, align 8
 471    %conv = fptoui double %a to i32
 472  ; ELF64: fctiwuz
 473  ; ELF64: stfd
 474  ; ELF64: lwz
 475 +; PPC970: fctidz
 476 +; PPC970: stfd
 477 +; PPC970: lwz
 478    store i32 %conv, i32* %b.addr, align 8
 479    ret void
 480  }
 481 @@ -295,11 +397,13 @@ entry:
 482  define void @fptoui_double_i64(double %a) nounwind ssp {
 483  entry:
 484  ; ELF64: fptoui_double_i64
 485 +; PPC970: fptoui_double_i64
 486    %b.addr = alloca i64, align 8
 487    %conv = fptoui double %a to i64
 488  ; ELF64: fctiduz
 489  ; ELF64: stfd
 490  ; ELF64: ld
 491 +; PPC970-NOT: fctiduz
 492    store i64 %conv, i64* %b.addr, align 8
 493    ret void
 494  }