196 lines
8.4 KiB
LLVM
196 lines
8.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mcpu=pentium4 -O0 | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
|
|
target triple = "i386-unknown-linux-unknown"
|
|
|
|
define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind {
|
|
; CHECK-LABEL: doTheTestMod:
|
|
; CHECK: # %bb.0: # %Entry
|
|
; CHECK-NEXT: subl $140, %esp
|
|
; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: movaps %xmm0, %xmm6
|
|
; CHECK-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
; CHECK-NEXT: movaps %xmm0, %xmm3
|
|
; CHECK-NEXT: psrlq $48, %xmm3
|
|
; CHECK-NEXT: movaps %xmm0, %xmm2
|
|
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,1,1]
|
|
; CHECK-NEXT: psrld $16, %xmm0
|
|
; CHECK-NEXT: movaps %xmm6, %xmm7
|
|
; CHECK-NEXT: movaps %xmm6, %xmm4
|
|
; CHECK-NEXT: psrlq $48, %xmm4
|
|
; CHECK-NEXT: movaps %xmm6, %xmm5
|
|
; CHECK-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1,1,1]
|
|
; CHECK-NEXT: psrld $16, %xmm6
|
|
; CHECK-NEXT: pextrw $0, %xmm7, %eax
|
|
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
|
|
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: pextrw $0, %xmm6, %eax
|
|
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
|
|
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: pextrw $0, %xmm5, %eax
|
|
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
|
|
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: pextrw $0, %xmm4, %eax
|
|
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
|
|
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: pextrw $0, %xmm3, %eax
|
|
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
|
|
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: pextrw $0, %xmm2, %eax
|
|
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
|
|
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: pextrw $0, %xmm0, %eax
|
|
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
|
|
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: pextrw $0, %xmm1, %eax
|
|
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
|
|
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: # implicit-def: $xmm0
|
|
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
|
|
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: # implicit-def: $xmm0
|
|
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
|
|
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: # implicit-def: $xmm0
|
|
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
|
|
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: # implicit-def: $xmm0
|
|
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
|
|
; CHECK-NEXT: # implicit-def: $xmm1
|
|
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1
|
|
; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: # implicit-def: $xmm1
|
|
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1
|
|
; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: # implicit-def: $xmm1
|
|
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1
|
|
; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: # implicit-def: $xmm1
|
|
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1
|
|
; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: pextrw $0, %xmm0, %eax
|
|
; CHECK-NEXT: movw %ax, %cx
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: movw %cx, (%eax)
|
|
; CHECK-NEXT: calll __extendhfsf2
|
|
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
|
|
; CHECK-NEXT: pextrw $0, %xmm0, %eax
|
|
; CHECK-NEXT: movw %ax, %cx
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: movw %cx, (%eax)
|
|
; CHECK-NEXT: calll __extendhfsf2
|
|
; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: fxch %st(1)
|
|
; CHECK-NEXT: fstps 4(%eax)
|
|
; CHECK-NEXT: fstps (%eax)
|
|
; CHECK-NEXT: calll fmodf
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: fstps (%eax)
|
|
; CHECK-NEXT: calll __truncsfhf2
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: pextrw $0, %xmm0, %eax
|
|
; CHECK-NEXT: movw %ax, %cx
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: movw %cx, (%eax)
|
|
; CHECK-NEXT: calll __extendhfsf2
|
|
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
|
|
; CHECK-NEXT: pextrw $0, %xmm0, %eax
|
|
; CHECK-NEXT: movw %ax, %cx
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: movw %cx, (%eax)
|
|
; CHECK-NEXT: calll __extendhfsf2
|
|
; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: fxch %st(1)
|
|
; CHECK-NEXT: fstps 4(%eax)
|
|
; CHECK-NEXT: fstps (%eax)
|
|
; CHECK-NEXT: calll fmodf
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: fstps (%eax)
|
|
; CHECK-NEXT: calll __truncsfhf2
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: pextrw $0, %xmm0, %eax
|
|
; CHECK-NEXT: movw %ax, %cx
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: movw %cx, (%eax)
|
|
; CHECK-NEXT: calll __extendhfsf2
|
|
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
|
|
; CHECK-NEXT: pextrw $0, %xmm0, %eax
|
|
; CHECK-NEXT: movw %ax, %cx
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: movw %cx, (%eax)
|
|
; CHECK-NEXT: calll __extendhfsf2
|
|
; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: fxch %st(1)
|
|
; CHECK-NEXT: fstps 4(%eax)
|
|
; CHECK-NEXT: fstps (%eax)
|
|
; CHECK-NEXT: calll fmodf
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: fstps (%eax)
|
|
; CHECK-NEXT: calll __truncsfhf2
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: pextrw $0, %xmm0, %eax
|
|
; CHECK-NEXT: movw %ax, %cx
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: movw %cx, (%eax)
|
|
; CHECK-NEXT: calll __extendhfsf2
|
|
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
|
|
; CHECK-NEXT: pextrw $0, %xmm0, %eax
|
|
; CHECK-NEXT: movw %ax, %cx
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: movw %cx, (%eax)
|
|
; CHECK-NEXT: calll __extendhfsf2
|
|
; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: fxch %st(1)
|
|
; CHECK-NEXT: fstps 4(%eax)
|
|
; CHECK-NEXT: fstps (%eax)
|
|
; CHECK-NEXT: calll fmodf
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: fstps (%eax)
|
|
; CHECK-NEXT: calll __truncsfhf2
|
|
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 4-byte Reload
|
|
; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 4-byte Reload
|
|
; CHECK-NEXT: # xmm2 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: movaps %xmm0, %xmm3
|
|
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
|
|
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
|
|
; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
|
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
; CHECK-NEXT: addl $140, %esp
|
|
; CHECK-NEXT: retl
|
|
Entry:
|
|
%x = alloca <4 x half>, align 8
|
|
%y = alloca <4 x half>, align 8
|
|
store <4 x half> %0, ptr %x, align 8
|
|
store <4 x half> %1, ptr %y, align 8
|
|
%2 = load <4 x half>, ptr %x, align 8
|
|
%3 = load <4 x half>, ptr %y, align 8
|
|
%4 = frem <4 x half> %2, %3
|
|
ret <4 x half> %4
|
|
}
|
|
|