test/Analysis/CostModel/X86/cmp.ll

   1 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=pentium4 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2 %s
   2 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=yonah | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE3 %s
   3 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSSE3 %s
   4 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=penryn | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE41 %s
   5 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE42 %s
   6 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 %s
   7 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 %s
   8 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 %s
   9
  10 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  11 target triple = "x86_64-apple-macosx10.8.0"
  12
  13 define i32 @cmp(i32 %arg) {
  14   ;  -- floats --
  15   ;SSE2:  cost of 3 {{.*}} fcmp
  16   ;SSE3:  cost of 3 {{.*}} fcmp
  17   ;SSSE3: cost of 3 {{.*}} fcmp
  18   ;SSE41: cost of 3 {{.*}} fcmp
  19   ;SSE42: cost of 1 {{.*}} fcmp
  20   ;AVX:   cost of 1 {{.*}} fcmp
  21   %A = fcmp olt <2 x float> undef, undef
  22
  23   ;SSE2:  cost of 7 {{.*}} fcmp
  24   ;SSE3:  cost of 7 {{.*}} fcmp
  25   ;SSSE3: cost of 7 {{.*}} fcmp
  26   ;SSE41: cost of 7 {{.*}} fcmp
  27   ;SSE42: cost of 1 {{.*}} fcmp
  28   ;AVX:   cost of 1 {{.*}} fcmp
  29   %B = fcmp olt <4 x float> undef, undef
  30
  31   ;SSE2:  cost of 14 {{.*}} fcmp
  32   ;SSE3:  cost of 14 {{.*}} fcmp
  33   ;SSSE3: cost of 14 {{.*}} fcmp
  34   ;SSE41: cost of 14 {{.*}} fcmp
  35   ;SSE42: cost of 2 {{.*}} fcmp
  36   ;AVX:   cost of 1 {{.*}} fcmp
  37   %C = fcmp olt <8 x float> undef, undef
  38
  39   ;SSE2:  cost of 3 {{.*}} fcmp
  40   ;SSE3:  cost of 3 {{.*}} fcmp
  41   ;SSSE3: cost of 3 {{.*}} fcmp
  42   ;SSE41: cost of 3 {{.*}} fcmp
  43   ;SSE42: cost of 1 {{.*}} fcmp
  44   ;AVX:   cost of 1 {{.*}} fcmp
  45   %D = fcmp olt <2 x double> undef, undef
  46
  47   ;SSE2:  cost of 6 {{.*}} fcmp
  48   ;SSE3:  cost of 6 {{.*}} fcmp
  49   ;SSSE3: cost of 6 {{.*}} fcmp
  50   ;SSE41: cost of 6 {{.*}} fcmp
  51   ;SSE42: cost of 2 {{.*}} fcmp
  52   ;AVX:   cost of 1 {{.*}} fcmp
  53   %E = fcmp olt <4 x double> undef, undef
  54
  55   ; AVX512: cost of 1 {{.*}} %E1 = fcmp
  56   %E1 = fcmp olt <16 x float> undef, undef
  57
  58   ; AVX512: cost of 1 {{.*}} %E2 = fcmp
  59   %E2 = fcmp olt <8 x double> undef, undef
  60
  61   ; AVX512: cost of 2 {{.*}} %E3 = fcmp
  62   %E3 = fcmp olt <16 x double> undef, undef
  63
  64   ;  -- integers --
  65
  66   ;SSE2:  cost of 1 {{.*}} icmp
  67   ;SSE3:  cost of 1 {{.*}} icmp
  68   ;SSSE3: cost of 1 {{.*}} icmp
  69   ;SSE41: cost of 1 {{.*}} icmp
  70   ;SSE42: cost of 1 {{.*}} icmp
  71   ;AVX:   cost of 1 {{.*}} icmp
  72   %F = icmp eq <16 x i8> undef, undef
  73
  74   ;SSE2:  cost of 1 {{.*}} icmp
  75   ;SSE3:  cost of 1 {{.*}} icmp
  76   ;SSSE3: cost of 1 {{.*}} icmp
  77   ;SSE41: cost of 1 {{.*}} icmp
  78   ;SSE42: cost of 1 {{.*}} icmp
  79   ;AVX:   cost of 1 {{.*}} icmp
  80   %G = icmp eq <8 x i16> undef, undef
  81
  82   ;SSE2:  cost of 1 {{.*}} icmp
  83   ;SSE3:  cost of 1 {{.*}} icmp
  84   ;SSSE3: cost of 1 {{.*}} icmp
  85   ;SSE41: cost of 1 {{.*}} icmp
  86   ;SSE42: cost of 1 {{.*}} icmp
  87   ;AVX:   cost of 1 {{.*}} icmp
  88   %H = icmp eq <4 x i32> undef, undef
  89
  90   ;SSE2:  cost of 8 {{.*}} icmp
  91   ;SSE3:  cost of 8 {{.*}} icmp
  92   ;SSSE3: cost of 8 {{.*}} icmp
  93   ;SSE41: cost of 8 {{.*}} icmp
  94   ;SSE42: cost of 1 {{.*}} icmp
  95   ;AVX:   cost of 1 {{.*}} icmp
  96   %I = icmp eq <2 x i64> undef, undef
  97
  98   ;SSE2:  cost of 16 {{.*}} icmp
  99   ;SSE3:  cost of 16 {{.*}} icmp
 100   ;SSSE3: cost of 16 {{.*}} icmp
 101   ;SSE41: cost of 16 {{.*}} icmp
 102   ;SSE42: cost of 2 {{.*}} icmp
 103   ;AVX1:  cost of 4 {{.*}} icmp
 104   ;AVX2:  cost of 1 {{.*}} icmp
 105   %J = icmp eq <4 x i64> undef, undef
 106
 107   ;SSE2:  cost of 2 {{.*}} icmp
 108   ;SSE3:  cost of 2 {{.*}} icmp
 109   ;SSSE3: cost of 2 {{.*}} icmp
 110   ;SSE41: cost of 2 {{.*}} icmp
 111   ;SSE42: cost of 2 {{.*}} icmp
 112   ;AVX1:  cost of 4 {{.*}} icmp
 113   ;AVX2:  cost of 1 {{.*}} icmp
 114   %K = icmp eq <8 x i32> undef, undef
 115
 116   ;SSE2:  cost of 2 {{.*}} icmp
 117   ;SSE3:  cost of 2 {{.*}} icmp
 118   ;SSSE3: cost of 2 {{.*}} icmp
 119   ;SSE41: cost of 2 {{.*}} icmp
 120   ;SSE42: cost of 2 {{.*}} icmp
 121   ;AVX1:  cost of 4 {{.*}} icmp
 122   ;AVX2:  cost of 1 {{.*}} icmp
 123   %L = icmp eq <16 x i16> undef, undef
 124
 125   ;SSE2:  cost of 2 {{.*}} icmp
 126   ;SSE3:  cost of 2 {{.*}} icmp
 127   ;SSSE3: cost of 2 {{.*}} icmp
 128   ;SSE41: cost of 2 {{.*}} icmp
 129   ;SSE42: cost of 2 {{.*}} icmp
 130   ;AVX1:  cost of 4 {{.*}} icmp
 131   ;AVX2:  cost of 1 {{.*}} icmp
 132   %M = icmp eq <32 x i8> undef, undef
 133
 134   ; AVX512: cost of 1 {{.*}} %M1 = icmp
 135   %M1 = icmp eq <16 x i32> undef, undef
 136
 137   ; AVX512: cost of 1 {{.*}} %M2 = icmp
 138   %M2 = icmp eq <8 x i64> undef, undef
 139
 140   ; AVX512: cost of 2 {{.*}} %M3 = icmp
 141   %M3 = icmp eq <16 x i64> undef, undef
 142
 143   ;CHECK: cost of 0 {{.*}} ret
 144   ret i32 undef
 145 }
 146
 147