test/CodeGen/X86/dagcombine-cse.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X32
   3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
   4
   5 define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind  {
   6 ; X32-LABEL: t:
   7 ; X32:       ## %bb.0: ## %entry
   8 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
   9 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  10 ; X32-NEXT:    imull {{[0-9]+}}(%esp), %ecx
  11 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
  12 ; X32-NEXT:    movl (%eax,%ecx), %eax
  13 ; X32-NEXT:    retl
  14 ;
  15 ; X64-LABEL: t:
  16 ; X64:       ## %bb.0: ## %entry
  17 ; X64-NEXT:    ## kill: def $edx killed $edx def $rdx
  18 ; X64-NEXT:    ## kill: def $esi killed $esi def $rsi
  19 ; X64-NEXT:    imull %ecx, %esi
  20 ; X64-NEXT:    leal (%rsi,%rdx), %eax
  21 ; X64-NEXT:    cltq
  22 ; X64-NEXT:    movl (%rdi,%rax), %eax
  23 ; X64-NEXT:    leal 4(%rsi,%rdx), %ecx
  24 ; X64-NEXT:    movslq %ecx, %rcx
  25 ; X64-NEXT:    movzwl (%rdi,%rcx), %ecx
  26 ; X64-NEXT:    shlq $32, %rcx
  27 ; X64-NEXT:    orq %rax, %rcx
  28 ; X64-NEXT:    movq %rcx, %xmm0
  29 ; X64-NEXT:    movd %xmm0, %eax
  30 ; X64-NEXT:    retq
  31 entry:
  32         %tmp7 = mul i32 %idxY, %ref_frame_stride                ; <i32> [#uses=2]
  33         %tmp9 = add i32 %tmp7, %idxX            ; <i32> [#uses=1]
  34         %tmp11 = getelementptr i8, i8* %ref_frame_ptr, i32 %tmp9                ; <i8*> [#uses=1]
  35         %tmp1112 = bitcast i8* %tmp11 to i32*           ; <i32*> [#uses=1]
  36         %tmp13 = load i32, i32* %tmp1112, align 4               ; <i32> [#uses=1]
  37         %tmp18 = add i32 %idxX, 4               ; <i32> [#uses=1]
  38         %tmp20.sum = add i32 %tmp18, %tmp7              ; <i32> [#uses=1]
  39         %tmp21 = getelementptr i8, i8* %ref_frame_ptr, i32 %tmp20.sum           ; <i8*> [#uses=1]
  40         %tmp2122 = bitcast i8* %tmp21 to i16*           ; <i16*> [#uses=1]
  41         %tmp23 = load i16, i16* %tmp2122, align 2               ; <i16> [#uses=1]
  42         %tmp2425 = zext i16 %tmp23 to i64               ; <i64> [#uses=1]
  43         %tmp26 = shl i64 %tmp2425, 32           ; <i64> [#uses=1]
  44         %tmp2728 = zext i32 %tmp13 to i64               ; <i64> [#uses=1]
  45         %tmp29 = or i64 %tmp26, %tmp2728                ; <i64> [#uses=1]
  46         %tmp3454 = bitcast i64 %tmp29 to double         ; <double> [#uses=1]
  47         %tmp35 = insertelement <2 x double> undef, double %tmp3454, i32 0               ; <<2 x double>> [#uses=1]
  48         %tmp36 = insertelement <2 x double> %tmp35, double 0.000000e+00, i32 1          ; <<2 x double>> [#uses=1]
  49         %tmp42 = bitcast <2 x double> %tmp36 to <8 x i16>               ; <<8 x i16>> [#uses=1]
  50         %tmp43 = shufflevector <8 x i16> %tmp42, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7 >          ; <<8 x i16>> [#uses=1]
  51         %tmp47 = bitcast <8 x i16> %tmp43 to <4 x i32>          ; <<4 x i32>> [#uses=1]
  52         %tmp48 = extractelement <4 x i32> %tmp47, i32 0         ; <i32> [#uses=1]
  53         ret i32 %tmp48
  54 }