AMDGPU: Use modern address spaces in some tests

This was way out of date, still using 4 for generic and 0 for private.
This commit is contained in:
Matt Arsenault 2022-11-28 10:01:20 -05:00
parent f946c70130
commit c1710e7779
14 changed files with 397 additions and 397 deletions

View File

@ -28,13 +28,13 @@ define i1 @fold_negate_intrinsic_test_mask_dbl(double %x) nounwind {
; Negative test: should not transform for variable test masks ; Negative test: should not transform for variable test masks
; CHECK: @fold_negate_intrinsic_test_mask_neg_var ; CHECK: @fold_negate_intrinsic_test_mask_neg_var
; CHECK: %[[X0:.*]] = alloca i32 ; CHECK: %[[X0:.*]] = alloca i32
; CHECK: %[[X1:.*]] = load i32, i32* %[[X0]] ; CHECK: %[[X1:.*]] = load i32, i32 addrspace(5)* %[[X0]]
; CHECK: call i1 @llvm.amdgcn.class.f32(float %x, i32 %[[X1]]) ; CHECK: call i1 @llvm.amdgcn.class.f32(float %x, i32 %[[X1]])
; CHECK: xor ; CHECK: xor
define i1 @fold_negate_intrinsic_test_mask_neg_var(float %x) nounwind { define i1 @fold_negate_intrinsic_test_mask_neg_var(float %x) nounwind {
%1 = alloca i32 %1 = alloca i32, addrspace(5)
store i32 7, i32* %1 store i32 7, i32 addrspace(5)* %1
%2 = load i32, i32* %1 %2 = load i32, i32 addrspace(5)* %1
%3 = call i1 @llvm.amdgcn.class.f32(float %x, i32 %2) %3 = call i1 @llvm.amdgcn.class.f32(float %x, i32 %2)
%4 = xor i1 %3, -1 %4 = xor i1 %3, -1
ret i1 %4 ret i1 %4
@ -47,10 +47,10 @@ define i1 @fold_negate_intrinsic_test_mask_neg_var(float %x) nounwind {
; CHECK: store i1 %[[X1]] ; CHECK: store i1 %[[X1]]
; CHECK: %[[X2:.*]] = xor i1 %[[X1]] ; CHECK: %[[X2:.*]] = xor i1 %[[X1]]
define i1 @fold_negate_intrinsic_test_mask_neg_multiple_uses(float %x) nounwind { define i1 @fold_negate_intrinsic_test_mask_neg_multiple_uses(float %x) nounwind {
%y = alloca i1 %y = alloca i1, addrspace(5)
%1 = call i1 @llvm.amdgcn.class.f32(float %x, i32 7) %1 = call i1 @llvm.amdgcn.class.f32(float %x, i32 7)
%2 = xor i1 %1, -1 %2 = xor i1 %1, -1
store i1 %1, i1* %y store i1 %1, i1 addrspace(5)* %y
%3 = xor i1 %1, -1 %3 = xor i1 %1, -1
ret i1 %2 ret i1 %2
} }

View File

@ -12,15 +12,15 @@ define internal void @indirect() {
define internal void @direct() { define internal void @direct() {
; CHECK-LABEL: define {{[^@]+}}@direct ; CHECK-LABEL: define {{[^@]+}}@direct
; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8 ; CHECK-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8, addrspace(5)
; CHECK-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8 ; CHECK-NEXT: store void ()* @indirect, void ()* addrspace(5)* [[FPTR]], align 8
; CHECK-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8 ; CHECK-NEXT: [[FP:%.*]] = load void ()*, void ()* addrspace(5)* [[FPTR]], align 8
; CHECK-NEXT: call void [[FP]]() ; CHECK-NEXT: call void [[FP]]()
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
%fptr = alloca void()* %fptr = alloca void()*, addrspace(5)
store void()* @indirect, void()** %fptr store void()* @indirect, void()* addrspace(5)* %fptr
%fp = load void()*, void()** %fptr %fp = load void()*, void()* addrspace(5)* %fptr
call void %fp() call void %fp()
ret void ret void
} }

View File

@ -16,23 +16,23 @@ define internal void @indirect() {
define amdgpu_kernel void @test_simple_indirect_call() #0 { define amdgpu_kernel void @test_simple_indirect_call() #0 {
; AKF_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call ; AKF_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] { ; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] {
; AKF_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8 ; AKF_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8, addrspace(5)
; AKF_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8 ; AKF_GCN-NEXT: store void ()* @indirect, void ()* addrspace(5)* [[FPTR]], align 8
; AKF_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8 ; AKF_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()* addrspace(5)* [[FPTR]], align 8
; AKF_GCN-NEXT: call void [[FP]]() ; AKF_GCN-NEXT: call void [[FP]]()
; AKF_GCN-NEXT: ret void ; AKF_GCN-NEXT: ret void
; ;
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call ; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { ; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8 ; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8, addrspace(5)
; ATTRIBUTOR_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8 ; ATTRIBUTOR_GCN-NEXT: store void ()* @indirect, void ()* addrspace(5)* [[FPTR]], align 8
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8 ; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()* addrspace(5)* [[FPTR]], align 8
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]() ; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
; ATTRIBUTOR_GCN-NEXT: ret void ; ATTRIBUTOR_GCN-NEXT: ret void
; ;
%fptr = alloca void()* %fptr = alloca void()*, addrspace(5)
store void()* @indirect, void()** %fptr store void()* @indirect, void()* addrspace(5)* %fptr
%fp = load void()*, void()** %fptr %fp = load void()*, void()* addrspace(5)* %fptr
call void %fp() call void %fp()
ret void ret void
} }

View File

@ -8,19 +8,19 @@
define amdgpu_kernel void @promote_alloca_size_63(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 { define amdgpu_kernel void @promote_alloca_size_63(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry: entry:
%stack = alloca [5 x i32], align 4 %stack = alloca [5 x i32], align 4, addrspace(5)
%0 = load i32, i32 addrspace(1)* %in, align 4 %0 = load i32, i32 addrspace(1)* %in, align 4
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
store i32 4, i32* %arrayidx1, align 4 store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
store i32 5, i32* %arrayidx3, align 4 store i32 5, i32 addrspace(5)* %arrayidx3, align 4
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
%2 = load i32, i32* %arrayidx10, align 4 %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %2, i32 addrspace(1)* %out, align 4 store i32 %2, i32 addrspace(1)* %out, align 4
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
%3 = load i32, i32* %arrayidx12 %3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %3, i32 addrspace(1)* %arrayidx13 store i32 %3, i32 addrspace(1)* %arrayidx13
ret void ret void
@ -30,19 +30,19 @@ entry:
define amdgpu_kernel void @promote_alloca_size_256(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #1 { define amdgpu_kernel void @promote_alloca_size_256(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #1 {
entry: entry:
%stack = alloca [5 x i32], align 4 %stack = alloca [5 x i32], align 4, addrspace(5)
%0 = load i32, i32 addrspace(1)* %in, align 4 %0 = load i32, i32 addrspace(1)* %in, align 4
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
store i32 4, i32* %arrayidx1, align 4 store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
store i32 5, i32* %arrayidx3, align 4 store i32 5, i32 addrspace(5)* %arrayidx3, align 4
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
%2 = load i32, i32* %arrayidx10, align 4 %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %2, i32 addrspace(1)* %out, align 4 store i32 %2, i32 addrspace(1)* %out, align 4
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
%3 = load i32, i32* %arrayidx12 %3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %3, i32 addrspace(1)* %arrayidx13 store i32 %3, i32 addrspace(1)* %arrayidx13
ret void ret void
@ -54,19 +54,19 @@ entry:
define amdgpu_kernel void @promote_alloca_size_1600(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #2 { define amdgpu_kernel void @promote_alloca_size_1600(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #2 {
entry: entry:
%stack = alloca [5 x i32], align 4 %stack = alloca [5 x i32], align 4, addrspace(5)
%0 = load i32, i32 addrspace(1)* %in, align 4 %0 = load i32, i32 addrspace(1)* %in, align 4
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
store i32 4, i32* %arrayidx1, align 4 store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
store i32 5, i32* %arrayidx3, align 4 store i32 5, i32 addrspace(5)* %arrayidx3, align 4
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
%2 = load i32, i32* %arrayidx10, align 4 %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %2, i32 addrspace(1)* %out, align 4 store i32 %2, i32 addrspace(1)* %out, align 4
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
%3 = load i32, i32* %arrayidx12 %3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %3, i32 addrspace(1)* %arrayidx13 store i32 %3, i32 addrspace(1)* %arrayidx13
ret void ret void
@ -77,19 +77,19 @@ entry:
; SI: alloca [5 x i32] ; SI: alloca [5 x i32]
define amdgpu_kernel void @occupancy_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #3 { define amdgpu_kernel void @occupancy_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #3 {
entry: entry:
%stack = alloca [5 x i32], align 4 %stack = alloca [5 x i32], align 4, addrspace(5)
%0 = load i32, i32 addrspace(1)* %in, align 4 %0 = load i32, i32 addrspace(1)* %in, align 4
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
store i32 4, i32* %arrayidx1, align 4 store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
store i32 5, i32* %arrayidx3, align 4 store i32 5, i32 addrspace(5)* %arrayidx3, align 4
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
%2 = load i32, i32* %arrayidx10, align 4 %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %2, i32 addrspace(1)* %out, align 4 store i32 %2, i32 addrspace(1)* %out, align 4
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
%3 = load i32, i32* %arrayidx12 %3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %3, i32 addrspace(1)* %arrayidx13 store i32 %3, i32 addrspace(1)* %arrayidx13
ret void ret void
@ -100,19 +100,19 @@ entry:
; SI: alloca [5 x i32] ; SI: alloca [5 x i32]
define amdgpu_kernel void @occupancy_max(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #4 { define amdgpu_kernel void @occupancy_max(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #4 {
entry: entry:
%stack = alloca [5 x i32], align 4 %stack = alloca [5 x i32], align 4, addrspace(5)
%0 = load i32, i32 addrspace(1)* %in, align 4 %0 = load i32, i32 addrspace(1)* %in, align 4
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
store i32 4, i32* %arrayidx1, align 4 store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
store i32 5, i32* %arrayidx3, align 4 store i32 5, i32 addrspace(5)* %arrayidx3, align 4
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
%2 = load i32, i32* %arrayidx10, align 4 %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %2, i32 addrspace(1)* %out, align 4 store i32 %2, i32 addrspace(1)* %out, align 4
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
%3 = load i32, i32* %arrayidx12 %3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %3, i32 addrspace(1)* %arrayidx13 store i32 %3, i32 addrspace(1)* %arrayidx13
ret void ret void
@ -124,21 +124,21 @@ entry:
; CI-NOT: alloca ; CI-NOT: alloca
define amdgpu_kernel void @occupancy_6(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #5 { define amdgpu_kernel void @occupancy_6(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #5 {
entry: entry:
%stack = alloca [42 x i8], align 4 %stack = alloca [42 x i8], align 4, addrspace(5)
%tmp = load i8, i8 addrspace(1)* %in, align 1 %tmp = load i8, i8 addrspace(1)* %in, align 1
%tmp4 = sext i8 %tmp to i64 %tmp4 = sext i8 %tmp to i64
%arrayidx1 = getelementptr inbounds [42 x i8], [42 x i8]* %stack, i64 0, i64 %tmp4 %arrayidx1 = getelementptr inbounds [42 x i8], [42 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4
store i8 4, i8* %arrayidx1, align 1 store i8 4, i8 addrspace(5)* %arrayidx1, align 1
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1 %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1 %tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
%tmp5 = sext i8 %tmp1 to i64 %tmp5 = sext i8 %tmp1 to i64
%arrayidx3 = getelementptr inbounds [42 x i8], [42 x i8]* %stack, i64 0, i64 %tmp5 %arrayidx3 = getelementptr inbounds [42 x i8], [42 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5
store i8 5, i8* %arrayidx3, align 1 store i8 5, i8 addrspace(5)* %arrayidx3, align 1
%arrayidx10 = getelementptr inbounds [42 x i8], [42 x i8]* %stack, i64 0, i64 0 %arrayidx10 = getelementptr inbounds [42 x i8], [42 x i8] addrspace(5)* %stack, i64 0, i64 0
%tmp2 = load i8, i8* %arrayidx10, align 1 %tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1
store i8 %tmp2, i8 addrspace(1)* %out, align 1 store i8 %tmp2, i8 addrspace(1)* %out, align 1
%arrayidx12 = getelementptr inbounds [42 x i8], [42 x i8]* %stack, i64 0, i64 1 %arrayidx12 = getelementptr inbounds [42 x i8], [42 x i8] addrspace(5)* %stack, i64 0, i64 1
%tmp3 = load i8, i8* %arrayidx12, align 1 %tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1 %arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1 store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
ret void ret void
@ -150,21 +150,21 @@ entry:
define amdgpu_kernel void @occupancy_6_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #5 { define amdgpu_kernel void @occupancy_6_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #5 {
entry: entry:
%stack = alloca [43 x i8], align 4 %stack = alloca [43 x i8], align 4, addrspace(5)
%tmp = load i8, i8 addrspace(1)* %in, align 1 %tmp = load i8, i8 addrspace(1)* %in, align 1
%tmp4 = sext i8 %tmp to i64 %tmp4 = sext i8 %tmp to i64
%arrayidx1 = getelementptr inbounds [43 x i8], [43 x i8]* %stack, i64 0, i64 %tmp4 %arrayidx1 = getelementptr inbounds [43 x i8], [43 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4
store i8 4, i8* %arrayidx1, align 1 store i8 4, i8 addrspace(5)* %arrayidx1, align 1
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1 %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1 %tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
%tmp5 = sext i8 %tmp1 to i64 %tmp5 = sext i8 %tmp1 to i64
%arrayidx3 = getelementptr inbounds [43 x i8], [43 x i8]* %stack, i64 0, i64 %tmp5 %arrayidx3 = getelementptr inbounds [43 x i8], [43 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5
store i8 5, i8* %arrayidx3, align 1 store i8 5, i8 addrspace(5)* %arrayidx3, align 1
%arrayidx10 = getelementptr inbounds [43 x i8], [43 x i8]* %stack, i64 0, i64 0 %arrayidx10 = getelementptr inbounds [43 x i8], [43 x i8] addrspace(5)* %stack, i64 0, i64 0
%tmp2 = load i8, i8* %arrayidx10, align 1 %tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1
store i8 %tmp2, i8 addrspace(1)* %out, align 1 store i8 %tmp2, i8 addrspace(1)* %out, align 1
%arrayidx12 = getelementptr inbounds [43 x i8], [43 x i8]* %stack, i64 0, i64 1 %arrayidx12 = getelementptr inbounds [43 x i8], [43 x i8] addrspace(5)* %stack, i64 0, i64 1
%tmp3 = load i8, i8* %arrayidx12, align 1 %tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1 %arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1 store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
ret void ret void
@ -176,21 +176,21 @@ entry:
; CI-NOT: alloca ; CI-NOT: alloca
define amdgpu_kernel void @occupancy_8(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #6 { define amdgpu_kernel void @occupancy_8(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #6 {
entry: entry:
%stack = alloca [32 x i8], align 4 %stack = alloca [32 x i8], align 4, addrspace(5)
%tmp = load i8, i8 addrspace(1)* %in, align 1 %tmp = load i8, i8 addrspace(1)* %in, align 1
%tmp4 = sext i8 %tmp to i64 %tmp4 = sext i8 %tmp to i64
%arrayidx1 = getelementptr inbounds [32 x i8], [32 x i8]* %stack, i64 0, i64 %tmp4 %arrayidx1 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4
store i8 4, i8* %arrayidx1, align 1 store i8 4, i8 addrspace(5)* %arrayidx1, align 1
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1 %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1 %tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
%tmp5 = sext i8 %tmp1 to i64 %tmp5 = sext i8 %tmp1 to i64
%arrayidx3 = getelementptr inbounds [32 x i8], [32 x i8]* %stack, i64 0, i64 %tmp5 %arrayidx3 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5
store i8 5, i8* %arrayidx3, align 1 store i8 5, i8 addrspace(5)* %arrayidx3, align 1
%arrayidx10 = getelementptr inbounds [32 x i8], [32 x i8]* %stack, i64 0, i64 0 %arrayidx10 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %stack, i64 0, i64 0
%tmp2 = load i8, i8* %arrayidx10, align 1 %tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1
store i8 %tmp2, i8 addrspace(1)* %out, align 1 store i8 %tmp2, i8 addrspace(1)* %out, align 1
%arrayidx12 = getelementptr inbounds [32 x i8], [32 x i8]* %stack, i64 0, i64 1 %arrayidx12 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %stack, i64 0, i64 1
%tmp3 = load i8, i8* %arrayidx12, align 1 %tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1 %arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1 store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
ret void ret void
@ -202,21 +202,21 @@ entry:
define amdgpu_kernel void @occupancy_8_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #6 { define amdgpu_kernel void @occupancy_8_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #6 {
entry: entry:
%stack = alloca [33 x i8], align 4 %stack = alloca [33 x i8], align 4, addrspace(5)
%tmp = load i8, i8 addrspace(1)* %in, align 1 %tmp = load i8, i8 addrspace(1)* %in, align 1
%tmp4 = sext i8 %tmp to i64 %tmp4 = sext i8 %tmp to i64
%arrayidx1 = getelementptr inbounds [33 x i8], [33 x i8]* %stack, i64 0, i64 %tmp4 %arrayidx1 = getelementptr inbounds [33 x i8], [33 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4
store i8 4, i8* %arrayidx1, align 1 store i8 4, i8 addrspace(5)* %arrayidx1, align 1
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1 %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1 %tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
%tmp5 = sext i8 %tmp1 to i64 %tmp5 = sext i8 %tmp1 to i64
%arrayidx3 = getelementptr inbounds [33 x i8], [33 x i8]* %stack, i64 0, i64 %tmp5 %arrayidx3 = getelementptr inbounds [33 x i8], [33 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5
store i8 5, i8* %arrayidx3, align 1 store i8 5, i8 addrspace(5)* %arrayidx3, align 1
%arrayidx10 = getelementptr inbounds [33 x i8], [33 x i8]* %stack, i64 0, i64 0 %arrayidx10 = getelementptr inbounds [33 x i8], [33 x i8] addrspace(5)* %stack, i64 0, i64 0
%tmp2 = load i8, i8* %arrayidx10, align 1 %tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1
store i8 %tmp2, i8 addrspace(1)* %out, align 1 store i8 %tmp2, i8 addrspace(1)* %out, align 1
%arrayidx12 = getelementptr inbounds [33 x i8], [33 x i8]* %stack, i64 0, i64 1 %arrayidx12 = getelementptr inbounds [33 x i8], [33 x i8] addrspace(5)* %stack, i64 0, i64 1
%tmp3 = load i8, i8* %arrayidx12, align 1 %tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1 %arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1 store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
ret void ret void
@ -228,21 +228,21 @@ entry:
; CI-NOT: alloca ; CI-NOT: alloca
define amdgpu_kernel void @occupancy_9(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #7 { define amdgpu_kernel void @occupancy_9(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #7 {
entry: entry:
%stack = alloca [28 x i8], align 4 %stack = alloca [28 x i8], align 4, addrspace(5)
%tmp = load i8, i8 addrspace(1)* %in, align 1 %tmp = load i8, i8 addrspace(1)* %in, align 1
%tmp4 = sext i8 %tmp to i64 %tmp4 = sext i8 %tmp to i64
%arrayidx1 = getelementptr inbounds [28 x i8], [28 x i8]* %stack, i64 0, i64 %tmp4 %arrayidx1 = getelementptr inbounds [28 x i8], [28 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4
store i8 4, i8* %arrayidx1, align 1 store i8 4, i8 addrspace(5)* %arrayidx1, align 1
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1 %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1 %tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
%tmp5 = sext i8 %tmp1 to i64 %tmp5 = sext i8 %tmp1 to i64
%arrayidx3 = getelementptr inbounds [28 x i8], [28 x i8]* %stack, i64 0, i64 %tmp5 %arrayidx3 = getelementptr inbounds [28 x i8], [28 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5
store i8 5, i8* %arrayidx3, align 1 store i8 5, i8 addrspace(5)* %arrayidx3, align 1
%arrayidx10 = getelementptr inbounds [28 x i8], [28 x i8]* %stack, i64 0, i64 0 %arrayidx10 = getelementptr inbounds [28 x i8], [28 x i8] addrspace(5)* %stack, i64 0, i64 0
%tmp2 = load i8, i8* %arrayidx10, align 1 %tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1
store i8 %tmp2, i8 addrspace(1)* %out, align 1 store i8 %tmp2, i8 addrspace(1)* %out, align 1
%arrayidx12 = getelementptr inbounds [28 x i8], [28 x i8]* %stack, i64 0, i64 1 %arrayidx12 = getelementptr inbounds [28 x i8], [28 x i8] addrspace(5)* %stack, i64 0, i64 1
%tmp3 = load i8, i8* %arrayidx12, align 1 %tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1 %arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1 store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
ret void ret void
@ -254,21 +254,21 @@ entry:
define amdgpu_kernel void @occupancy_9_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #7 { define amdgpu_kernel void @occupancy_9_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #7 {
entry: entry:
%stack = alloca [29 x i8], align 4 %stack = alloca [29 x i8], align 4, addrspace(5)
%tmp = load i8, i8 addrspace(1)* %in, align 1 %tmp = load i8, i8 addrspace(1)* %in, align 1
%tmp4 = sext i8 %tmp to i64 %tmp4 = sext i8 %tmp to i64
%arrayidx1 = getelementptr inbounds [29 x i8], [29 x i8]* %stack, i64 0, i64 %tmp4 %arrayidx1 = getelementptr inbounds [29 x i8], [29 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4
store i8 4, i8* %arrayidx1, align 1 store i8 4, i8 addrspace(5)* %arrayidx1, align 1
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1 %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1
%tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1 %tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1
%tmp5 = sext i8 %tmp1 to i64 %tmp5 = sext i8 %tmp1 to i64
%arrayidx3 = getelementptr inbounds [29 x i8], [29 x i8]* %stack, i64 0, i64 %tmp5 %arrayidx3 = getelementptr inbounds [29 x i8], [29 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5
store i8 5, i8* %arrayidx3, align 1 store i8 5, i8 addrspace(5)* %arrayidx3, align 1
%arrayidx10 = getelementptr inbounds [29 x i8], [29 x i8]* %stack, i64 0, i64 0 %arrayidx10 = getelementptr inbounds [29 x i8], [29 x i8] addrspace(5)* %stack, i64 0, i64 0
%tmp2 = load i8, i8* %arrayidx10, align 1 %tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1
store i8 %tmp2, i8 addrspace(1)* %out, align 1 store i8 %tmp2, i8 addrspace(1)* %out, align 1
%arrayidx12 = getelementptr inbounds [29 x i8], [29 x i8]* %stack, i64 0, i64 1 %arrayidx12 = getelementptr inbounds [29 x i8], [29 x i8] addrspace(5)* %stack, i64 0, i64 1
%tmp3 = load i8, i8* %arrayidx12, align 1 %tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1
%arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1 %arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1 store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1
ret void ret void

View File

@ -6,7 +6,7 @@
; FUNC-LABEL: @test_kernel( ; FUNC-LABEL: @test_kernel(
; R600-LABEL: entry ; R600-LABEL: entry
; R600-NOT: call i8 addrspace(1)* @__printf_alloc ; R600-NOT: call i8 addrspace(1)* @__printf_alloc
; R600: call i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str, i32 0, i32 0), i8* %arraydecay, i32 %n) ; R600: call i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str, i32 0, i32 0), i8 addrspace(5)* %arraydecay, i32 %n)
; GCN-LABEL: entry ; GCN-LABEL: entry
; GCN: call i8 addrspace(1)* @__printf_alloc ; GCN: call i8 addrspace(1)* @__printf_alloc
; GCN-LABEL: entry.split ; GCN-LABEL: entry.split
@ -15,7 +15,7 @@
; GCN: %PrintBuffIdCast = bitcast i8 addrspace(1)* %PrintBuffID to i32 addrspace(1)* ; GCN: %PrintBuffIdCast = bitcast i8 addrspace(1)* %PrintBuffID to i32 addrspace(1)*
; GCN: store i32 1, i32 addrspace(1)* %PrintBuffIdCast ; GCN: store i32 1, i32 addrspace(1)* %PrintBuffIdCast
; GCN: %PrintBuffGep = getelementptr i8, i8 addrspace(1)* %printf_alloc_fn, i32 4 ; GCN: %PrintBuffGep = getelementptr i8, i8 addrspace(1)* %printf_alloc_fn, i32 4
; GCN: %PrintArgPtr = ptrtoint i8* %arraydecay to i64 ; GCN: %PrintArgPtr = ptrtoint i8 addrspace(5)* %arraydecay to i64
; GCN: %PrintBuffPtrCast = bitcast i8 addrspace(1)* %PrintBuffGep to i64 addrspace(1)* ; GCN: %PrintBuffPtrCast = bitcast i8 addrspace(1)* %PrintBuffGep to i64 addrspace(1)*
; GCN: store i64 %PrintArgPtr, i64 addrspace(1)* %PrintBuffPtrCast ; GCN: store i64 %PrintArgPtr, i64 addrspace(1)* %PrintBuffPtrCast
; GCN: %PrintBuffNextPtr = getelementptr i8, i8 addrspace(1)* %PrintBuffGep, i32 8 ; GCN: %PrintBuffNextPtr = getelementptr i8, i8 addrspace(1)* %PrintBuffGep, i32 8
@ -26,9 +26,9 @@
define amdgpu_kernel void @test_kernel(i32 %n) { define amdgpu_kernel void @test_kernel(i32 %n) {
entry: entry:
%str = alloca [9 x i8], align 1 %str = alloca [9 x i8], align 1, addrspace(5)
%arraydecay = getelementptr inbounds [9 x i8], [9 x i8]* %str, i32 0, i32 0 %arraydecay = getelementptr inbounds [9 x i8], [9 x i8] addrspace(5)* %str, i32 0, i32 0
%call1 = call i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str, i32 0, i32 0), i8* %arraydecay, i32 %n) %call1 = call i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str, i32 0, i32 0), i8 addrspace(5)* %arraydecay, i32 %n)
ret void ret void
} }

View File

@ -14,23 +14,23 @@
; CHECK-NEXT: OR_INT ; CHECK-NEXT: OR_INT
define amdgpu_kernel void @_Z9chk1D_512v() #0 { define amdgpu_kernel void @_Z9chk1D_512v() #0 {
entry: entry:
%a0 = alloca i32, align 4 %a0 = alloca i32, align 4, addrspace(5)
%b0 = alloca i32, align 4 %b0 = alloca i32, align 4, addrspace(5)
%c0 = alloca i32, align 4 %c0 = alloca i32, align 4, addrspace(5)
%d0 = alloca i32, align 4 %d0 = alloca i32, align 4, addrspace(5)
%a1 = alloca i32, align 4 %a1 = alloca i32, align 4, addrspace(5)
%b1 = alloca i32, align 4 %b1 = alloca i32, align 4, addrspace(5)
%c1 = alloca i32, align 4 %c1 = alloca i32, align 4, addrspace(5)
%d1 = alloca i32, align 4 %d1 = alloca i32, align 4, addrspace(5)
%data = alloca i32, align 4 %data = alloca i32, align 4, addrspace(5)
%0 = load i32, i32* %a0, align 4 %0 = load i32, i32 addrspace(5)* %a0, align 4
%1 = load i32, i32* %b0, align 4 %1 = load i32, i32 addrspace(5)* %b0, align 4
%cmp = icmp ne i32 %0, %1 %cmp = icmp ne i32 %0, %1
br i1 %cmp, label %land.lhs.true, label %if.else br i1 %cmp, label %land.lhs.true, label %if.else
land.lhs.true: ; preds = %entry land.lhs.true: ; preds = %entry
%2 = load i32, i32* %c0, align 4 %2 = load i32, i32 addrspace(5)* %c0, align 4
%3 = load i32, i32* %d0, align 4 %3 = load i32, i32 addrspace(5)* %d0, align 4
%cmp1 = icmp ne i32 %2, %3 %cmp1 = icmp ne i32 %2, %3
br i1 %cmp1, label %if.then, label %if.else br i1 %cmp1, label %if.then, label %if.else
@ -38,18 +38,18 @@ if.then: ; preds = %land.lhs.true
br label %if.end br label %if.end
if.else: ; preds = %land.lhs.true, %entry if.else: ; preds = %land.lhs.true, %entry
store i32 1, i32* %data, align 4 store i32 1, i32 addrspace(5)* %data, align 4
br label %if.end br label %if.end
if.end: ; preds = %if.else, %if.then if.end: ; preds = %if.else, %if.then
%4 = load i32, i32* %a1, align 4 %4 = load i32, i32 addrspace(5)* %a1, align 4
%5 = load i32, i32* %b1, align 4 %5 = load i32, i32 addrspace(5)* %b1, align 4
%cmp2 = icmp ne i32 %4, %5 %cmp2 = icmp ne i32 %4, %5
br i1 %cmp2, label %land.lhs.true3, label %if.else6 br i1 %cmp2, label %land.lhs.true3, label %if.else6
land.lhs.true3: ; preds = %if.end land.lhs.true3: ; preds = %if.end
%6 = load i32, i32* %c1, align 4 %6 = load i32, i32 addrspace(5)* %c1, align 4
%7 = load i32, i32* %d1, align 4 %7 = load i32, i32 addrspace(5)* %d1, align 4
%cmp4 = icmp ne i32 %6, %7 %cmp4 = icmp ne i32 %6, %7
br i1 %cmp4, label %if.then5, label %if.else6 br i1 %cmp4, label %if.then5, label %if.else6
@ -57,7 +57,7 @@ if.then5: ; preds = %land.lhs.true3
br label %if.end7 br label %if.end7
if.else6: ; preds = %land.lhs.true3, %if.end if.else6: ; preds = %land.lhs.true3, %if.end
store i32 1, i32* %data, align 4 store i32 1, i32 addrspace(5)* %data, align 4
br label %if.end7 br label %if.end7
if.end7: ; preds = %if.else6, %if.then5 if.end7: ; preds = %if.else6, %if.then5

View File

@ -5,15 +5,15 @@
; CHECK-LABEL: @invalid_bitcast_addrspace( ; CHECK-LABEL: @invalid_bitcast_addrspace(
; CHECK: getelementptr inbounds [256 x [1 x i32]], [256 x [1 x i32]] addrspace(3)* @invalid_bitcast_addrspace.data, i32 0, i32 %14 ; CHECK: getelementptr inbounds [256 x [1 x i32]], [256 x [1 x i32]] addrspace(3)* @invalid_bitcast_addrspace.data, i32 0, i32 %14
; CHECK: bitcast [1 x i32] addrspace(3)* %{{[0-9]+}} to half addrspace(3)* ; CHECK: bitcast [1 x i32] addrspace(3)* %{{[0-9]+}} to half addrspace(3)*
; CHECK: addrspacecast half addrspace(3)* %tmp to half addrspace(4)* ; CHECK: addrspacecast half addrspace(3)* %tmp to half*
; CHECK: bitcast half addrspace(4)* %tmp1 to <2 x i16> addrspace(4)* ; CHECK: bitcast half* %tmp1 to <2 x i16>*
define amdgpu_kernel void @invalid_bitcast_addrspace() #0 { define amdgpu_kernel void @invalid_bitcast_addrspace() #0 {
entry: entry:
%data = alloca [1 x i32], align 4 %data = alloca [1 x i32], addrspace(5)
%tmp = bitcast [1 x i32]* %data to half* %tmp = bitcast [1 x i32] addrspace(5)* %data to half addrspace(5)*
%tmp1 = addrspacecast half* %tmp to half addrspace(4)* %tmp1 = addrspacecast half addrspace(5)* %tmp to half*
%tmp2 = bitcast half addrspace(4)* %tmp1 to <2 x i16> addrspace(4)* %tmp2 = bitcast half* %tmp1 to <2 x i16>*
%tmp3 = load <2 x i16>, <2 x i16> addrspace(4)* %tmp2, align 2 %tmp3 = load <2 x i16>, <2 x i16>* %tmp2, align 2
%tmp4 = bitcast <2 x i16> %tmp3 to <2 x half> %tmp4 = bitcast <2 x i16> %tmp3 to <2 x half>
ret void ret void
} }

View File

@ -16,19 +16,19 @@
define amdgpu_vs void @promote_1d_aggr() #0 { define amdgpu_vs void @promote_1d_aggr() #0 {
; CHECK-LABEL: @promote_1d_aggr( ; CHECK-LABEL: @promote_1d_aggr(
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
; CHECK-NEXT: [[F1:%.*]] = alloca [1 x float], align 4 ; CHECK-NEXT: [[F1:%.*]] = alloca [1 x float], align 4, addrspace(5)
; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK:%.*]], [[BLOCK]] addrspace(1)* @block, i32 0, i32 1 ; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK:%.*]], [[BLOCK]] addrspace(1)* @block, i32 0, i32 1
; CHECK-NEXT: [[FOO1:%.*]] = load i32, i32 addrspace(1)* [[FOO]], align 4 ; CHECK-NEXT: [[FOO1:%.*]] = load i32, i32 addrspace(1)* [[FOO]], align 4
; CHECK-NEXT: store i32 [[FOO1]], i32* [[I]], align 4 ; CHECK-NEXT: store i32 [[FOO1]], i32 addrspace(5)* [[I]], align 4
; CHECK-NEXT: [[FOO2:%.*]] = getelementptr [[BLOCK]], [[BLOCK]] addrspace(1)* @block, i32 0, i32 0 ; CHECK-NEXT: [[FOO2:%.*]] = getelementptr [[BLOCK]], [[BLOCK]] addrspace(1)* @block, i32 0, i32 0
; CHECK-NEXT: [[FOO3:%.*]] = load [1 x float], [1 x float] addrspace(1)* [[FOO2]], align 4 ; CHECK-NEXT: [[FOO3:%.*]] = load [1 x float], [1 x float] addrspace(1)* [[FOO2]], align 4
; CHECK-NEXT: store [1 x float] [[FOO3]], [1 x float]* [[F1]], align 4 ; CHECK-NEXT: store [1 x float] [[FOO3]], [1 x float] addrspace(5)* [[F1]], align 4
; CHECK-NEXT: [[FOO4:%.*]] = load i32, i32* [[I]], align 4 ; CHECK-NEXT: [[FOO4:%.*]] = load i32, i32 addrspace(5)* [[I]], align 4
; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [1 x float], [1 x float]* [[F1]], i32 0, i32 [[FOO4]] ; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [1 x float], [1 x float] addrspace(5)* [[F1]], i32 0, i32 [[FOO4]]
; CHECK-NEXT: [[FOO6:%.*]] = load float, float* [[FOO5]], align 4 ; CHECK-NEXT: [[FOO6:%.*]] = load float, float addrspace(5)* [[FOO5]], align 4
; CHECK-NEXT: [[FOO7:%.*]] = alloca <4 x float>, align 16 ; CHECK-NEXT: [[FOO7:%.*]] = alloca <4 x float>, align 16, addrspace(5)
; CHECK-NEXT: [[FOO8:%.*]] = load <4 x float>, <4 x float>* [[FOO7]], align 16 ; CHECK-NEXT: [[FOO8:%.*]] = load <4 x float>, <4 x float> addrspace(5)* [[FOO7]], align 16
; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> [[FOO8]], float [[FOO6]], i32 0 ; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> [[FOO8]], float [[FOO6]], i32 0
; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[FOO6]], i32 1 ; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[FOO6]], i32 1
; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[FOO6]], i32 2 ; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[FOO6]], i32 2
@ -37,19 +37,19 @@ define amdgpu_vs void @promote_1d_aggr() #0 {
; CHECK-NEXT: store <4 x float> [[FOO12]], <4 x float> addrspace(1)* [[FOO13]], align 16 ; CHECK-NEXT: store <4 x float> [[FOO12]], <4 x float> addrspace(1)* [[FOO13]], align 16
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
%i = alloca i32 %i = alloca i32, addrspace(5)
%f1 = alloca [1 x float] %f1 = alloca [1 x float], addrspace(5)
%foo = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 1 %foo = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 1
%foo1 = load i32, i32 addrspace(1)* %foo %foo1 = load i32, i32 addrspace(1)* %foo
store i32 %foo1, i32* %i store i32 %foo1, i32 addrspace(5)* %i
%foo2 = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 0 %foo2 = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 0
%foo3 = load [1 x float], [1 x float] addrspace(1)* %foo2 %foo3 = load [1 x float], [1 x float] addrspace(1)* %foo2
store [1 x float] %foo3, [1 x float]* %f1 store [1 x float] %foo3, [1 x float] addrspace(5)* %f1
%foo4 = load i32, i32* %i %foo4 = load i32, i32 addrspace(5)* %i
%foo5 = getelementptr [1 x float], [1 x float]* %f1, i32 0, i32 %foo4 %foo5 = getelementptr [1 x float], [1 x float] addrspace(5)* %f1, i32 0, i32 %foo4
%foo6 = load float, float* %foo5 %foo6 = load float, float addrspace(5)* %foo5
%foo7 = alloca <4 x float> %foo7 = alloca <4 x float>, addrspace(5)
%foo8 = load <4 x float>, <4 x float>* %foo7 %foo8 = load <4 x float>, <4 x float> addrspace(5)* %foo7
%foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0 %foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0
%foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1 %foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1
%foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2 %foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2
@ -64,42 +64,42 @@ define amdgpu_vs void @promote_1d_aggr() #0 {
define amdgpu_vs void @promote_store_aggr() #0 { define amdgpu_vs void @promote_store_aggr() #0 {
; CHECK-LABEL: @promote_store_aggr( ; CHECK-LABEL: @promote_store_aggr(
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
; CHECK-NEXT: [[F1:%.*]] = alloca [2 x float], align 4 ; CHECK-NEXT: [[F1:%.*]] = alloca [2 x float], align 4, addrspace(5)
; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK2:%.*]], [[BLOCK2]] addrspace(1)* @block2, i32 0, i32 0 ; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK2:%.*]], [[BLOCK2]] addrspace(1)* @block2, i32 0, i32 0
; CHECK-NEXT: [[FOO1:%.*]] = load i32, i32 addrspace(1)* [[FOO]], align 4 ; CHECK-NEXT: [[FOO1:%.*]] = load i32, i32 addrspace(1)* [[FOO]], align 4
; CHECK-NEXT: store i32 [[FOO1]], i32* [[I]], align 4 ; CHECK-NEXT: store i32 [[FOO1]], i32 addrspace(5)* [[I]], align 4
; CHECK-NEXT: [[FOO2:%.*]] = load i32, i32* [[I]], align 4 ; CHECK-NEXT: [[FOO2:%.*]] = load i32, i32 addrspace(5)* [[I]], align 4
; CHECK-NEXT: [[FOO3:%.*]] = sitofp i32 [[FOO2]] to float ; CHECK-NEXT: [[FOO3:%.*]] = sitofp i32 [[FOO2]] to float
; CHECK-NEXT: [[FOO4:%.*]] = getelementptr [2 x float], [2 x float]* [[F1]], i32 0, i32 0 ; CHECK-NEXT: [[FOO4:%.*]] = getelementptr [2 x float], [2 x float] addrspace(5)* [[F1]], i32 0, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [2 x float]* [[F1]] to <2 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [2 x float] addrspace(5)* [[F1]] to <2 x float> addrspace(5)*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float> addrspace(5)* [[TMP1]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[FOO3]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[FOO3]], i32 0
; CHECK-NEXT: store <2 x float> [[TMP3]], <2 x float>* [[TMP1]], align 8 ; CHECK-NEXT: store <2 x float> [[TMP3]], <2 x float> addrspace(5)* [[TMP1]], align 8
; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [2 x float], [2 x float]* [[F1]], i32 0, i32 1 ; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [2 x float], [2 x float] addrspace(5)* [[F1]], i32 0, i32 1
; CHECK-NEXT: [[TMP4:%.*]] = bitcast [2 x float]* [[F1]] to <2 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = bitcast [2 x float] addrspace(5)* [[F1]] to <2 x float> addrspace(5)*
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x float>, <2 x float> addrspace(5)* [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float 2.000000e+00, i64 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float 2.000000e+00, i64 1
; CHECK-NEXT: store <2 x float> [[TMP6]], <2 x float>* [[TMP4]], align 8 ; CHECK-NEXT: store <2 x float> [[TMP6]], <2 x float> addrspace(5)* [[TMP4]], align 8
; CHECK-NEXT: [[FOO6:%.*]] = load [2 x float], [2 x float]* [[F1]], align 4 ; CHECK-NEXT: [[FOO6:%.*]] = load [2 x float], [2 x float] addrspace(5)* [[F1]], align 4
; CHECK-NEXT: [[FOO7:%.*]] = getelementptr [[BLOCK2]], [[BLOCK2]] addrspace(1)* @block2, i32 0, i32 1 ; CHECK-NEXT: [[FOO7:%.*]] = getelementptr [[BLOCK2]], [[BLOCK2]] addrspace(1)* @block2, i32 0, i32 1
; CHECK-NEXT: store [2 x float] [[FOO6]], [2 x float] addrspace(1)* [[FOO7]], align 4 ; CHECK-NEXT: store [2 x float] [[FOO6]], [2 x float] addrspace(1)* [[FOO7]], align 4
; CHECK-NEXT: [[FOO8:%.*]] = getelementptr [[GL_PERVERTEX:%.*]], [[GL_PERVERTEX]] addrspace(1)* @pv, i32 0, i32 0 ; CHECK-NEXT: [[FOO8:%.*]] = getelementptr [[GL_PERVERTEX:%.*]], [[GL_PERVERTEX]] addrspace(1)* @pv, i32 0, i32 0
; CHECK-NEXT: store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> addrspace(1)* [[FOO8]], align 16 ; CHECK-NEXT: store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> addrspace(1)* [[FOO8]], align 16
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
%i = alloca i32 %i = alloca i32, addrspace(5)
%f1 = alloca [2 x float] %f1 = alloca [2 x float], addrspace(5)
%foo = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 0 %foo = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 0
%foo1 = load i32, i32 addrspace(1)* %foo %foo1 = load i32, i32 addrspace(1)* %foo
store i32 %foo1, i32* %i store i32 %foo1, i32 addrspace(5)* %i
%foo2 = load i32, i32* %i %foo2 = load i32, i32 addrspace(5)* %i
%foo3 = sitofp i32 %foo2 to float %foo3 = sitofp i32 %foo2 to float
%foo4 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 0 %foo4 = getelementptr [2 x float], [2 x float] addrspace(5)* %f1, i32 0, i32 0
store float %foo3, float* %foo4 store float %foo3, float addrspace(5)* %foo4
%foo5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 1 %foo5 = getelementptr [2 x float], [2 x float] addrspace(5)* %f1, i32 0, i32 1
store float 2.000000e+00, float* %foo5 store float 2.000000e+00, float addrspace(5)* %foo5
%foo6 = load [2 x float], [2 x float]* %f1 %foo6 = load [2 x float], [2 x float] addrspace(5)* %f1
%foo7 = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 1 %foo7 = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 1
store [2 x float] %foo6, [2 x float] addrspace(1)* %foo7 store [2 x float] %foo6, [2 x float] addrspace(1)* %foo7
%foo8 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0 %foo8 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0
@ -112,21 +112,21 @@ define amdgpu_vs void @promote_store_aggr() #0 {
define amdgpu_vs void @promote_load_from_store_aggr() #0 { define amdgpu_vs void @promote_load_from_store_aggr() #0 {
; CHECK-LABEL: @promote_load_from_store_aggr( ; CHECK-LABEL: @promote_load_from_store_aggr(
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
; CHECK-NEXT: [[F1:%.*]] = alloca [2 x float], align 4 ; CHECK-NEXT: [[F1:%.*]] = alloca [2 x float], align 4, addrspace(5)
; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK3:%.*]], [[BLOCK3]] addrspace(1)* @block3, i32 0, i32 1 ; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK3:%.*]], [[BLOCK3]] addrspace(1)* @block3, i32 0, i32 1
; CHECK-NEXT: [[FOO1:%.*]] = load i32, i32 addrspace(1)* [[FOO]], align 4 ; CHECK-NEXT: [[FOO1:%.*]] = load i32, i32 addrspace(1)* [[FOO]], align 4
; CHECK-NEXT: store i32 [[FOO1]], i32* [[I]], align 4 ; CHECK-NEXT: store i32 [[FOO1]], i32 addrspace(5)* [[I]], align 4
; CHECK-NEXT: [[FOO2:%.*]] = getelementptr [[BLOCK3]], [[BLOCK3]] addrspace(1)* @block3, i32 0, i32 0 ; CHECK-NEXT: [[FOO2:%.*]] = getelementptr [[BLOCK3]], [[BLOCK3]] addrspace(1)* @block3, i32 0, i32 0
; CHECK-NEXT: [[FOO3:%.*]] = load [2 x float], [2 x float] addrspace(1)* [[FOO2]], align 4 ; CHECK-NEXT: [[FOO3:%.*]] = load [2 x float], [2 x float] addrspace(1)* [[FOO2]], align 4
; CHECK-NEXT: store [2 x float] [[FOO3]], [2 x float]* [[F1]], align 4 ; CHECK-NEXT: store [2 x float] [[FOO3]], [2 x float] addrspace(5)* [[F1]], align 4
; CHECK-NEXT: [[FOO4:%.*]] = load i32, i32* [[I]], align 4 ; CHECK-NEXT: [[FOO4:%.*]] = load i32, i32 addrspace(5)* [[I]], align 4
; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [2 x float], [2 x float]* [[F1]], i32 0, i32 [[FOO4]] ; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [2 x float], [2 x float] addrspace(5)* [[F1]], i32 0, i32 [[FOO4]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [2 x float]* [[F1]] to <2 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [2 x float] addrspace(5)* [[F1]] to <2 x float> addrspace(5)*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float> addrspace(5)* [[TMP1]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 [[FOO4]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 [[FOO4]]
; CHECK-NEXT: [[FOO7:%.*]] = alloca <4 x float>, align 16 ; CHECK-NEXT: [[FOO7:%.*]] = alloca <4 x float>, align 16, addrspace(5)
; CHECK-NEXT: [[FOO8:%.*]] = load <4 x float>, <4 x float>* [[FOO7]], align 16 ; CHECK-NEXT: [[FOO8:%.*]] = load <4 x float>, <4 x float> addrspace(5)* [[FOO7]], align 16
; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> [[FOO8]], float [[TMP3]], i32 0 ; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> [[FOO8]], float [[TMP3]], i32 0
; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[TMP3]], i32 1 ; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[TMP3]], i32 1
; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[TMP3]], i32 2 ; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[TMP3]], i32 2
@ -135,19 +135,19 @@ define amdgpu_vs void @promote_load_from_store_aggr() #0 {
; CHECK-NEXT: store <4 x float> [[FOO12]], <4 x float> addrspace(1)* [[FOO13]], align 16 ; CHECK-NEXT: store <4 x float> [[FOO12]], <4 x float> addrspace(1)* [[FOO13]], align 16
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
%i = alloca i32 %i = alloca i32, addrspace(5)
%f1 = alloca [2 x float] %f1 = alloca [2 x float], addrspace(5)
%foo = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 1 %foo = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 1
%foo1 = load i32, i32 addrspace(1)* %foo %foo1 = load i32, i32 addrspace(1)* %foo
store i32 %foo1, i32* %i store i32 %foo1, i32 addrspace(5)* %i
%foo2 = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 0 %foo2 = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 0
%foo3 = load [2 x float], [2 x float] addrspace(1)* %foo2 %foo3 = load [2 x float], [2 x float] addrspace(1)* %foo2
store [2 x float] %foo3, [2 x float]* %f1 store [2 x float] %foo3, [2 x float] addrspace(5)* %f1
%foo4 = load i32, i32* %i %foo4 = load i32, i32 addrspace(5)* %i
%foo5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 %foo4 %foo5 = getelementptr [2 x float], [2 x float] addrspace(5)* %f1, i32 0, i32 %foo4
%foo6 = load float, float* %foo5 %foo6 = load float, float addrspace(5)* %foo5
%foo7 = alloca <4 x float> %foo7 = alloca <4 x float>, addrspace(5)
%foo8 = load <4 x float>, <4 x float>* %foo7 %foo8 = load <4 x float>, <4 x float> addrspace(5)* %foo7
%foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0 %foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0
%foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1 %foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1
%foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2 %foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2
@ -162,35 +162,35 @@ define amdgpu_vs void @promote_load_from_store_aggr() #0 {
define amdgpu_ps void @promote_double_aggr() #0 { define amdgpu_ps void @promote_double_aggr() #0 {
; CHECK-LABEL: @promote_double_aggr( ; CHECK-LABEL: @promote_double_aggr(
; CHECK-NEXT: [[S:%.*]] = alloca [2 x double], align 8 ; CHECK-NEXT: [[S:%.*]] = alloca [2 x double], align 8, addrspace(5)
; CHECK-NEXT: [[FOO:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0 ; CHECK-NEXT: [[FOO:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0
; CHECK-NEXT: [[FOO1:%.*]] = load double, double addrspace(1)* [[FOO]], align 8 ; CHECK-NEXT: [[FOO1:%.*]] = load double, double addrspace(1)* [[FOO]], align 8
; CHECK-NEXT: [[FOO2:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1 ; CHECK-NEXT: [[FOO2:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1
; CHECK-NEXT: [[FOO3:%.*]] = load double, double addrspace(1)* [[FOO2]], align 8 ; CHECK-NEXT: [[FOO3:%.*]] = load double, double addrspace(1)* [[FOO2]], align 8
; CHECK-NEXT: [[FOO4:%.*]] = insertvalue [2 x double] undef, double [[FOO1]], 0 ; CHECK-NEXT: [[FOO4:%.*]] = insertvalue [2 x double] undef, double [[FOO1]], 0
; CHECK-NEXT: [[FOO5:%.*]] = insertvalue [2 x double] [[FOO4]], double [[FOO3]], 1 ; CHECK-NEXT: [[FOO5:%.*]] = insertvalue [2 x double] [[FOO4]], double [[FOO3]], 1
; CHECK-NEXT: store [2 x double] [[FOO5]], [2 x double]* [[S]], align 8 ; CHECK-NEXT: store [2 x double] [[FOO5]], [2 x double] addrspace(5)* [[S]], align 8
; CHECK-NEXT: [[FOO6:%.*]] = getelementptr [2 x double], [2 x double]* [[S]], i32 0, i32 1 ; CHECK-NEXT: [[FOO6:%.*]] = getelementptr [2 x double], [2 x double] addrspace(5)* [[S]], i32 0, i32 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [2 x double]* [[S]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [2 x double] addrspace(5)* [[S]] to <2 x double> addrspace(5)*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double> addrspace(5)* [[TMP1]], align 16
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i64 1 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i64 1
; CHECK-NEXT: [[FOO8:%.*]] = getelementptr [2 x double], [2 x double]* [[S]], i32 0, i32 1 ; CHECK-NEXT: [[FOO8:%.*]] = getelementptr [2 x double], [2 x double] addrspace(5)* [[S]], i32 0, i32 1
; CHECK-NEXT: [[TMP4:%.*]] = bitcast [2 x double]* [[S]] to <2 x double>* ; CHECK-NEXT: [[TMP4:%.*]] = bitcast [2 x double] addrspace(5)* [[S]] to <2 x double> addrspace(5)*
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 16 ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double> addrspace(5)* [[TMP4]], align 16
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i64 1 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i64 1
; CHECK-NEXT: [[FOO10:%.*]] = fadd double [[TMP3]], [[TMP6]] ; CHECK-NEXT: [[FOO10:%.*]] = fadd double [[TMP3]], [[TMP6]]
; CHECK-NEXT: [[FOO11:%.*]] = getelementptr [2 x double], [2 x double]* [[S]], i32 0, i32 0 ; CHECK-NEXT: [[FOO11:%.*]] = getelementptr [2 x double], [2 x double] addrspace(5)* [[S]], i32 0, i32 0
; CHECK-NEXT: [[TMP7:%.*]] = bitcast [2 x double]* [[S]] to <2 x double>* ; CHECK-NEXT: [[TMP7:%.*]] = bitcast [2 x double] addrspace(5)* [[S]] to <2 x double> addrspace(5)*
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, <2 x double>* [[TMP7]], align 16 ; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, <2 x double> addrspace(5)* [[TMP7]], align 16
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[FOO10]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[FOO10]], i32 0
; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP7]], align 16 ; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double> addrspace(5)* [[TMP7]], align 16
; CHECK-NEXT: [[FOO12:%.*]] = getelementptr [2 x double], [2 x double]* [[S]], i32 0, i32 0 ; CHECK-NEXT: [[FOO12:%.*]] = getelementptr [2 x double], [2 x double] addrspace(5)* [[S]], i32 0, i32 0
; CHECK-NEXT: [[TMP10:%.*]] = bitcast [2 x double]* [[S]] to <2 x double>* ; CHECK-NEXT: [[TMP10:%.*]] = bitcast [2 x double] addrspace(5)* [[S]] to <2 x double> addrspace(5)*
; CHECK-NEXT: [[TMP11:%.*]] = load <2 x double>, <2 x double>* [[TMP10]], align 16 ; CHECK-NEXT: [[TMP11:%.*]] = load <2 x double>, <2 x double> addrspace(5)* [[TMP10]], align 16
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
; CHECK-NEXT: [[FOO14:%.*]] = getelementptr [2 x double], [2 x double]* [[S]], i32 0, i32 1 ; CHECK-NEXT: [[FOO14:%.*]] = getelementptr [2 x double], [2 x double] addrspace(5)* [[S]], i32 0, i32 1
; CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x double]* [[S]] to <2 x double>* ; CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x double] addrspace(5)* [[S]] to <2 x double> addrspace(5)*
; CHECK-NEXT: [[TMP14:%.*]] = load <2 x double>, <2 x double>* [[TMP13]], align 16 ; CHECK-NEXT: [[TMP14:%.*]] = load <2 x double>, <2 x double> addrspace(5)* [[TMP13]], align 16
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[TMP14]], i64 1 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[TMP14]], i64 1
; CHECK-NEXT: [[FOO16:%.*]] = fadd double [[TMP12]], [[TMP15]] ; CHECK-NEXT: [[FOO16:%.*]] = fadd double [[TMP12]], [[TMP15]]
; CHECK-NEXT: [[FOO17:%.*]] = fptrunc double [[FOO16]] to float ; CHECK-NEXT: [[FOO17:%.*]] = fptrunc double [[FOO16]] to float
@ -201,25 +201,25 @@ define amdgpu_ps void @promote_double_aggr() #0 {
; CHECK-NEXT: store <4 x float> [[FOO21]], <4 x float> addrspace(1)* @frag_color, align 16 ; CHECK-NEXT: store <4 x float> [[FOO21]], <4 x float> addrspace(1)* @frag_color, align 16
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
%s = alloca [2 x double] %s = alloca [2 x double], addrspace(5)
%foo = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0 %foo = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0
%foo1 = load double, double addrspace(1)* %foo %foo1 = load double, double addrspace(1)* %foo
%foo2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1 %foo2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1
%foo3 = load double, double addrspace(1)* %foo2 %foo3 = load double, double addrspace(1)* %foo2
%foo4 = insertvalue [2 x double] undef, double %foo1, 0 %foo4 = insertvalue [2 x double] undef, double %foo1, 0
%foo5 = insertvalue [2 x double] %foo4, double %foo3, 1 %foo5 = insertvalue [2 x double] %foo4, double %foo3, 1
store [2 x double] %foo5, [2 x double]* %s store [2 x double] %foo5, [2 x double] addrspace(5)* %s
%foo6 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1 %foo6 = getelementptr [2 x double], [2 x double] addrspace(5)* %s, i32 0, i32 1
%foo7 = load double, double* %foo6 %foo7 = load double, double addrspace(5)* %foo6
%foo8 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1 %foo8 = getelementptr [2 x double], [2 x double] addrspace(5)* %s, i32 0, i32 1
%foo9 = load double, double* %foo8 %foo9 = load double, double addrspace(5)* %foo8
%foo10 = fadd double %foo7, %foo9 %foo10 = fadd double %foo7, %foo9
%foo11 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0 %foo11 = getelementptr [2 x double], [2 x double] addrspace(5)* %s, i32 0, i32 0
store double %foo10, double* %foo11 store double %foo10, double addrspace(5)* %foo11
%foo12 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0 %foo12 = getelementptr [2 x double], [2 x double] addrspace(5)* %s, i32 0, i32 0
%foo13 = load double, double* %foo12 %foo13 = load double, double addrspace(5)* %foo12
%foo14 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1 %foo14 = getelementptr [2 x double], [2 x double] addrspace(5)* %s, i32 0, i32 1
%foo15 = load double, double* %foo14 %foo15 = load double, double addrspace(5)* %foo14
%foo16 = fadd double %foo13, %foo15 %foo16 = fadd double %foo13, %foo15
%foo17 = fptrunc double %foo16 to float %foo17 = fptrunc double %foo16 to float
%foo18 = insertelement <4 x float> undef, float %foo17, i32 0 %foo18 = insertelement <4 x float> undef, float %foo17, i32 0
@ -253,6 +253,6 @@ define amdgpu_kernel void @alloca_struct() #0 {
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
entry: entry:
%alloca = alloca [2 x %struct], align 4 %alloca = alloca [2 x %struct], align 4, addrspace(5)
ret void ret void
} }

View File

@ -4,44 +4,44 @@
; number of elements. ; number of elements.
; CHECK-LABEL: @array_alloca( ; CHECK-LABEL: @array_alloca(
; CHECK: %stack = alloca i32, i32 5, align 4 ; CHECK: %stack = alloca i32, i32 5, align 4, addrspace(5)
define amdgpu_kernel void @array_alloca(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 { define amdgpu_kernel void @array_alloca(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry: entry:
%stack = alloca i32, i32 5, align 4 %stack = alloca i32, i32 5, align 4, addrspace(5)
%ld0 = load i32, i32 addrspace(1)* %in, align 4 %ld0 = load i32, i32 addrspace(1)* %in, align 4
%arrayidx1 = getelementptr inbounds i32, i32* %stack, i32 %ld0 %arrayidx1 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 %ld0
store i32 4, i32* %arrayidx1, align 4 store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%ld1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 %ld1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
%arrayidx3 = getelementptr inbounds i32, i32* %stack, i32 %ld1 %arrayidx3 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 %ld1
store i32 5, i32* %arrayidx3, align 4 store i32 5, i32 addrspace(5)* %arrayidx3, align 4
%arrayidx10 = getelementptr inbounds i32, i32* %stack, i32 0 %arrayidx10 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 0
%ld2 = load i32, i32* %arrayidx10, align 4 %ld2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %ld2, i32 addrspace(1)* %out, align 4 store i32 %ld2, i32 addrspace(1)* %out, align 4
%arrayidx12 = getelementptr inbounds i32, i32* %stack, i32 1 %arrayidx12 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 1
%ld3 = load i32, i32* %arrayidx12 %ld3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %ld3, i32 addrspace(1)* %arrayidx13 store i32 %ld3, i32 addrspace(1)* %arrayidx13
ret void ret void
} }
; CHECK-LABEL: @array_alloca_dynamic( ; CHECK-LABEL: @array_alloca_dynamic(
; CHECK: %stack = alloca i32, i32 %size, align 4 ; CHECK: %stack = alloca i32, i32 %size, align 4, addrspace(5)
define amdgpu_kernel void @array_alloca_dynamic(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %size) #0 { define amdgpu_kernel void @array_alloca_dynamic(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %size) #0 {
entry: entry:
%stack = alloca i32, i32 %size, align 4 %stack = alloca i32, i32 %size, align 4, addrspace(5)
%ld0 = load i32, i32 addrspace(1)* %in, align 4 %ld0 = load i32, i32 addrspace(1)* %in, align 4
%arrayidx1 = getelementptr inbounds i32, i32* %stack, i32 %ld0 %arrayidx1 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 %ld0
store i32 4, i32* %arrayidx1, align 4 store i32 4, i32 addrspace(5)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
%ld1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 %ld1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
%arrayidx3 = getelementptr inbounds i32, i32* %stack, i32 %ld1 %arrayidx3 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 %ld1
store i32 5, i32* %arrayidx3, align 4 store i32 5, i32 addrspace(5)* %arrayidx3, align 4
%arrayidx10 = getelementptr inbounds i32, i32* %stack, i32 0 %arrayidx10 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 0
%ld2 = load i32, i32* %arrayidx10, align 4 %ld2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
store i32 %ld2, i32 addrspace(1)* %out, align 4 store i32 %ld2, i32 addrspace(1)* %out, align 4
%arrayidx12 = getelementptr inbounds i32, i32* %stack, i32 1 %arrayidx12 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 1
%ld3 = load i32, i32* %arrayidx12 %ld3 = load i32, i32 addrspace(5)* %arrayidx12
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %ld3, i32 addrspace(1)* %arrayidx13 store i32 %ld3, i32 addrspace(1)* %arrayidx13
ret void ret void

View File

@ -1,28 +1,28 @@
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -passes=amdgpu-promote-alloca < %s | FileCheck %s ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -passes=amdgpu-promote-alloca < %s | FileCheck %s
declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0 declare void @llvm.memcpy.p5i8.p1i8.i32(i8 addrspace(5)* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0
declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) #0 declare void @llvm.memcpy.p1i8.p5i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(5)* nocapture, i32, i1) #0
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #0 declare void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* nocapture, i8 addrspace(5)* nocapture, i64, i1) #0
declare void @llvm.memmove.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0 declare void @llvm.memmove.p5i8.p1i8.i32(i8 addrspace(5)* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0
declare void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) #0 declare void @llvm.memmove.p1i8.p5i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(5)* nocapture, i32, i1) #0
declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #0 declare void @llvm.memmove.p5i8.p5i8.i64(i8 addrspace(5)* nocapture, i8 addrspace(5)* nocapture, i64, i1) #0
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture, i8, i32, i1) #0
declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1, i1) #1 declare i32 @llvm.objectsize.i32.p5i8(i8 addrspace(5)*, i1, i1, i1) #1
; CHECK-LABEL: @promote_with_memcpy( ; CHECK-LABEL: @promote_with_memcpy(
; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memcpy.alloca, i32 0, i32 %{{[0-9]+}} ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memcpy.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) ; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false) ; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false)
define amdgpu_kernel void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { define amdgpu_kernel void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%alloca = alloca [17 x i32], align 4 %alloca = alloca [17 x i32], align 4, addrspace(5)
%alloca.bc = bitcast [17 x i32]* %alloca to i8* %alloca.bc = bitcast [17 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
%in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)* %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)*
%out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)* %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)*
call void @llvm.memcpy.p0i8.p1i8.i32(i8* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) call void @llvm.memcpy.p5i8.p1i8.i32(i8 addrspace(5)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 4 %out.bc, i8* align 4 %alloca.bc, i32 68, i1 false) call void @llvm.memcpy.p1i8.p5i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(5)* align 4 %alloca.bc, i32 68, i1 false)
ret void ret void
} }
@ -31,12 +31,12 @@ define amdgpu_kernel void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrs
; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) ; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false) ; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false)
define amdgpu_kernel void @promote_with_memmove(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { define amdgpu_kernel void @promote_with_memmove(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%alloca = alloca [17 x i32], align 4 %alloca = alloca [17 x i32], align 4, addrspace(5)
%alloca.bc = bitcast [17 x i32]* %alloca to i8* %alloca.bc = bitcast [17 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
%in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)* %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)*
%out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)* %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)*
call void @llvm.memmove.p0i8.p1i8.i32(i8* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) call void @llvm.memmove.p5i8.p1i8.i32(i8 addrspace(5)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
call void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* align 4 %out.bc, i8* align 4 %alloca.bc, i32 68, i1 false) call void @llvm.memmove.p1i8.p5i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(5)* align 4 %alloca.bc, i32 68, i1 false)
ret void ret void
} }
@ -44,11 +44,11 @@ define amdgpu_kernel void @promote_with_memmove(i32 addrspace(1)* %out, i32 addr
; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memset.alloca, i32 0, i32 %{{[0-9]+}} ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memset.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 7, i32 68, i1 false) ; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 7, i32 68, i1 false)
define amdgpu_kernel void @promote_with_memset(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { define amdgpu_kernel void @promote_with_memset(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%alloca = alloca [17 x i32], align 4 %alloca = alloca [17 x i32], align 4, addrspace(5)
%alloca.bc = bitcast [17 x i32]* %alloca to i8* %alloca.bc = bitcast [17 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
%in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)* %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)*
%out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)* %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)*
call void @llvm.memset.p0i8.i32(i8* align 4 %alloca.bc, i8 7, i32 68, i1 false) call void @llvm.memset.p5i8.i32(i8 addrspace(5)* align 4 %alloca.bc, i8 7, i32 68, i1 false)
ret void ret void
} }
@ -56,9 +56,9 @@ define amdgpu_kernel void @promote_with_memset(i32 addrspace(1)* %out, i32 addrs
; CHECK: [[PTR:%[0-9]+]] = getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_objectsize.alloca, i32 0, i32 %{{[0-9]+}} ; CHECK: [[PTR:%[0-9]+]] = getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_objectsize.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %alloca.bc, i1 false, i1 false, i1 false) ; CHECK: call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %alloca.bc, i1 false, i1 false, i1 false)
define amdgpu_kernel void @promote_with_objectsize(i32 addrspace(1)* %out) #0 { define amdgpu_kernel void @promote_with_objectsize(i32 addrspace(1)* %out) #0 {
%alloca = alloca [17 x i32], align 4 %alloca = alloca [17 x i32], align 4, addrspace(5)
%alloca.bc = bitcast [17 x i32]* %alloca to i8* %alloca.bc = bitcast [17 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
%size = call i32 @llvm.objectsize.i32.p0i8(i8* %alloca.bc, i1 false, i1 false, i1 false) %size = call i32 @llvm.objectsize.i32.p5i8(i8 addrspace(5)* %alloca.bc, i1 false, i1 false, i1 false)
store i32 %size, i32 addrspace(1)* %out store i32 %size, i32 addrspace(1)* %out
ret void ret void
} }
@ -69,12 +69,12 @@ define amdgpu_kernel void @promote_with_objectsize(i32 addrspace(1)* %out) #0 {
; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* align 8 dereferenceable(16) %i, i8 addrspace(3)* align 8 dereferenceable(16) %i1, i64 16, i1 false) ; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* align 8 dereferenceable(16) %i, i8 addrspace(3)* align 8 dereferenceable(16) %i1, i64 16, i1 false)
define amdgpu_kernel void @promote_alloca_used_twice_in_memcpy(i32 %c) { define amdgpu_kernel void @promote_alloca_used_twice_in_memcpy(i32 %c) {
entry: entry:
%r = alloca double, align 8 %r = alloca double, align 8, addrspace(5)
%arrayidx1 = getelementptr inbounds double, double* %r, i32 1 %arrayidx1 = getelementptr inbounds double, double addrspace(5)* %r, i32 1
%i = bitcast double* %arrayidx1 to i8* %i = bitcast double addrspace(5)* %arrayidx1 to i8 addrspace(5)*
%arrayidx2 = getelementptr inbounds double, double* %r, i32 %c %arrayidx2 = getelementptr inbounds double, double addrspace(5)* %r, i32 %c
%i1 = bitcast double* %arrayidx2 to i8* %i1 = bitcast double addrspace(5)* %arrayidx2 to i8 addrspace(5)*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 dereferenceable(16) %i, i8* align 8 dereferenceable(16) %i1, i64 16, i1 false) call void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* align 8 dereferenceable(16) %i, i8 addrspace(5)* align 8 dereferenceable(16) %i1, i64 16, i1 false)
ret void ret void
} }
@ -84,12 +84,12 @@ entry:
; CHECK: call void @llvm.memmove.p3i8.p3i8.i64(i8 addrspace(3)* align 8 dereferenceable(16) %i, i8 addrspace(3)* align 8 dereferenceable(16) %i1, i64 16, i1 false) ; CHECK: call void @llvm.memmove.p3i8.p3i8.i64(i8 addrspace(3)* align 8 dereferenceable(16) %i, i8 addrspace(3)* align 8 dereferenceable(16) %i1, i64 16, i1 false)
define amdgpu_kernel void @promote_alloca_used_twice_in_memmove(i32 %c) { define amdgpu_kernel void @promote_alloca_used_twice_in_memmove(i32 %c) {
entry: entry:
%r = alloca double, align 8 %r = alloca double, align 8, addrspace(5)
%arrayidx1 = getelementptr inbounds double, double* %r, i32 1 %arrayidx1 = getelementptr inbounds double, double addrspace(5)* %r, i32 1
%i = bitcast double* %arrayidx1 to i8* %i = bitcast double addrspace(5)* %arrayidx1 to i8 addrspace(5)*
%arrayidx2 = getelementptr inbounds double, double* %r, i32 %c %arrayidx2 = getelementptr inbounds double, double addrspace(5)* %r, i32 %c
%i1 = bitcast double* %arrayidx2 to i8* %i1 = bitcast double addrspace(5)* %arrayidx2 to i8 addrspace(5)*
call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 dereferenceable(16) %i, i8* align 8 dereferenceable(16) %i1, i64 16, i1 false) call void @llvm.memmove.p5i8.p5i8.i64(i8 addrspace(5)* align 8 dereferenceable(16) %i, i8 addrspace(5)* align 8 dereferenceable(16) %i1, i64 16, i1 false)
ret void ret void
} }

View File

@ -31,18 +31,18 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(i32 add
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
; NOLDS-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer( ; NOLDS-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer(
; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4 ; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4, addrspace(5)
; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[A:%.*]] ; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* [[ALLOCA]], i32 0, i32 [[A:%.*]]
; NOLDS-NEXT: [[PTR1:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[B:%.*]] ; NOLDS-NEXT: [[PTR1:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* [[ALLOCA]], i32 0, i32 [[B:%.*]]
; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR0]], [[PTR1]] ; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32 addrspace(5)* [[PTR0]], [[PTR1]]
; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32 ; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4 ; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
; NOLDS-NEXT: ret void ; NOLDS-NEXT: ret void
; ;
%alloca = alloca [16 x i32], align 4 %alloca = alloca [16 x i32], align 4, addrspace(5)
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
%ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %b
%cmp = icmp eq i32* %ptr0, %ptr1 %cmp = icmp eq i32 addrspace(5)* %ptr0, %ptr1
%zext = zext i1 %cmp to i32 %zext = zext i1 %cmp to i32
store volatile i32 %zext, i32 addrspace(1)* %out store volatile i32 %zext, i32 addrspace(1)* %out
ret void ret void
@ -73,16 +73,16 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(i32 addrspace(1)* %
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
; NOLDS-LABEL: @lds_promoted_alloca_icmp_null_rhs( ; NOLDS-LABEL: @lds_promoted_alloca_icmp_null_rhs(
; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4 ; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4, addrspace(5)
; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[A:%.*]] ; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* [[ALLOCA]], i32 0, i32 [[A:%.*]]
; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR0]], null ; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32 addrspace(5)* [[PTR0]], null
; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32 ; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4 ; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
; NOLDS-NEXT: ret void ; NOLDS-NEXT: ret void
; ;
%alloca = alloca [16 x i32], align 4 %alloca = alloca [16 x i32], align 4, addrspace(5)
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
%cmp = icmp eq i32* %ptr0, null %cmp = icmp eq i32 addrspace(5)* %ptr0, null
%zext = zext i1 %cmp to i32 %zext = zext i1 %cmp to i32
store volatile i32 %zext, i32 addrspace(1)* %out store volatile i32 %zext, i32 addrspace(1)* %out
ret void ret void
@ -113,16 +113,16 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(i32 addrspace(1)* %
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
; NOLDS-LABEL: @lds_promoted_alloca_icmp_null_lhs( ; NOLDS-LABEL: @lds_promoted_alloca_icmp_null_lhs(
; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4 ; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4, addrspace(5)
; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[A:%.*]] ; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* [[ALLOCA]], i32 0, i32 [[A:%.*]]
; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32* null, [[PTR0]] ; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32 addrspace(5)* null, [[PTR0]]
; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32 ; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4 ; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
; NOLDS-NEXT: ret void ; NOLDS-NEXT: ret void
; ;
%alloca = alloca [16 x i32], align 4 %alloca = alloca [16 x i32], align 4, addrspace(5)
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
%cmp = icmp eq i32* null, %ptr0 %cmp = icmp eq i32 addrspace(5)* null, %ptr0
%zext = zext i1 %cmp to i32 %zext = zext i1 %cmp to i32
store volatile i32 %zext, i32 addrspace(1)* %out store volatile i32 %zext, i32 addrspace(1)* %out
ret void ret void
@ -130,32 +130,32 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(i32 addrspace(1)* %
define amdgpu_kernel void @lds_promoted_alloca_icmp_unknown_ptr(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { define amdgpu_kernel void @lds_promoted_alloca_icmp_unknown_ptr(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
; CHECK-LABEL: @lds_promoted_alloca_icmp_unknown_ptr( ; CHECK-LABEL: @lds_promoted_alloca_icmp_unknown_ptr(
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4 ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4, addrspace(5)
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[A:%.*]] ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* [[ALLOCA]], i32 0, i32 [[A:%.*]]
; CHECK-NEXT: [[PTR1:%.*]] = call i32* @get_unknown_pointer() ; CHECK-NEXT: [[PTR1:%.*]] = call i32 addrspace(5)* @get_unknown_pointer()
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR0]], [[PTR1]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 addrspace(5)* [[PTR0]], [[PTR1]]
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
; CHECK-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4 ; CHECK-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;
; NOLDS-LABEL: @lds_promoted_alloca_icmp_unknown_ptr( ; NOLDS-LABEL: @lds_promoted_alloca_icmp_unknown_ptr(
; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4 ; NOLDS-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 4, addrspace(5)
; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[ALLOCA]], i32 0, i32 [[A:%.*]] ; NOLDS-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* [[ALLOCA]], i32 0, i32 [[A:%.*]]
; NOLDS-NEXT: [[PTR1:%.*]] = call i32* @get_unknown_pointer() ; NOLDS-NEXT: [[PTR1:%.*]] = call i32 addrspace(5)* @get_unknown_pointer()
; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR0]], [[PTR1]] ; NOLDS-NEXT: [[CMP:%.*]] = icmp eq i32 addrspace(5)* [[PTR0]], [[PTR1]]
; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32 ; NOLDS-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4 ; NOLDS-NEXT: store volatile i32 [[ZEXT]], i32 addrspace(1)* [[OUT:%.*]], align 4
; NOLDS-NEXT: ret void ; NOLDS-NEXT: ret void
; ;
%alloca = alloca [16 x i32], align 4 %alloca = alloca [16 x i32], align 4, addrspace(5)
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
%ptr1 = call i32* @get_unknown_pointer() %ptr1 = call i32 addrspace(5)* @get_unknown_pointer()
%cmp = icmp eq i32* %ptr0, %ptr1 %cmp = icmp eq i32 addrspace(5)* %ptr0, %ptr1
%zext = zext i1 %cmp to i32 %zext = zext i1 %cmp to i32
store volatile i32 %zext, i32 addrspace(1)* %out store volatile i32 %zext, i32 addrspace(1)* %out
ret void ret void
} }
declare i32* @get_unknown_pointer() #0 declare i32 addrspace(5)* @get_unknown_pointer() #0
attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" } attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }

View File

@ -15,20 +15,20 @@
; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4 ; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 { define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 {
entry: entry:
%alloca = alloca [64 x i32], align 4 %alloca = alloca [64 x i32], align 4, addrspace(5)
br i1 undef, label %if, label %else br i1 undef, label %if, label %else
if: if:
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a
br label %endif br label %endif
else: else:
%arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %b %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %b
br label %endif br label %endif
endif: endif:
%phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ] %phi.ptr = phi i32 addrspace(5)* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
store i32 0, i32* %phi.ptr, align 4 store i32 0, i32 addrspace(5)* %phi.ptr, align 4
ret void ret void
} }
@ -36,16 +36,16 @@ endif:
; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ null, %entry ] ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ null, %entry ]
define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 { define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 {
entry: entry:
%alloca = alloca [64 x i32], align 4 %alloca = alloca [64 x i32], align 4, addrspace(5)
br i1 undef, label %if, label %endif br i1 undef, label %if, label %endif
if: if:
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a
br label %endif br label %endif
endif: endif:
%phi.ptr = phi i32* [ %arrayidx0, %if ], [ null, %entry ] %phi.ptr = phi i32 addrspace(5)* [ %arrayidx0, %if ], [ null, %entry ]
store i32 0, i32* %phi.ptr, align 4 store i32 0, i32 addrspace(5)* %phi.ptr, align 4
ret void ret void
} }
@ -53,16 +53,16 @@ endif:
; CHECK: %phi.ptr = phi i32 addrspace(3)* [ null, %entry ], [ %arrayidx0, %if ] ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ null, %entry ], [ %arrayidx0, %if ]
define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 { define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 {
entry: entry:
%alloca = alloca [64 x i32], align 4 %alloca = alloca [64 x i32], align 4, addrspace(5)
br i1 undef, label %if, label %endif br i1 undef, label %if, label %endif
if: if:
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a
br label %endif br label %endif
endif: endif:
%phi.ptr = phi i32* [ null, %entry ], [ %arrayidx0, %if ] %phi.ptr = phi i32 addrspace(5)* [ null, %entry ], [ %arrayidx0, %if ]
store i32 0, i32* %phi.ptr, align 4 store i32 0, i32 addrspace(5)* %phi.ptr, align 4
ret void ret void
} }
@ -75,13 +75,13 @@ endif:
; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4 ; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
define amdgpu_kernel void @one_phi_value(i32 %a) #0 { define amdgpu_kernel void @one_phi_value(i32 %a) #0 {
entry: entry:
%alloca = alloca [64 x i32], align 4 %alloca = alloca [64 x i32], align 4, addrspace(5)
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a
br label %exit br label %exit
exit: exit:
%phi.ptr = phi i32* [ %arrayidx0, %entry ] %phi.ptr = phi i32 addrspace(5)* [ %arrayidx0, %entry ]
store i32 0, i32* %phi.ptr, align 4 store i32 0, i32 addrspace(5)* %phi.ptr, align 4
ret void ret void
} }
@ -89,30 +89,30 @@ exit:
; CHECK: %alloca = alloca [64 x i32], align 4 ; CHECK: %alloca = alloca [64 x i32], align 4
; CHECK: if: ; CHECK: if:
; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a ; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a
; CHECK: else: ; CHECK: else:
; CHECK: %arrayidx1 = call i32* @get_unknown_pointer() ; CHECK: %arrayidx1 = call i32 addrspace(5)* @get_unknown_pointer()
; CHECK: endif: ; CHECK: endif:
; CHECK: %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ] ; CHECK: %phi.ptr = phi i32 addrspace(5)* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
; CHECK: store i32 0, i32* %phi.ptr, align 4 ; CHECK: store i32 0, i32 addrspace(5)* %phi.ptr, align 4
define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 { define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 {
entry: entry:
%alloca = alloca [64 x i32], align 4 %alloca = alloca [64 x i32], align 4, addrspace(5)
br i1 undef, label %if, label %else br i1 undef, label %if, label %else
if: if:
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a
br label %endif br label %endif
else: else:
%arrayidx1 = call i32* @get_unknown_pointer() %arrayidx1 = call i32 addrspace(5)* @get_unknown_pointer()
br label %endif br label %endif
endif: endif:
%phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ] %phi.ptr = phi i32 addrspace(5)* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
store i32 0, i32* %phi.ptr, align 4 store i32 0, i32 addrspace(5)* %phi.ptr, align 4
ret void ret void
} }
@ -133,12 +133,12 @@ endif:
; CHECK-LABEL: @ptr_induction_var_same_alloca( ; CHECK-LABEL: @ptr_induction_var_same_alloca(
; CHECK: %alloca = alloca [64 x i32], align 4 ; CHECK: %alloca = alloca [64 x i32], align 4
; CHECK: phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ] ; CHECK: phi i32 addrspace(5)* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
define amdgpu_kernel void @ptr_induction_var_same_alloca() #0 { define amdgpu_kernel void @ptr_induction_var_same_alloca() #0 {
entry: entry:
%alloca = alloca [64 x i32], align 4 %alloca = alloca [64 x i32], align 4, addrspace(5)
%arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2 %arrayidx = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 2
%arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 48 %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 48
br label %for.body br label %for.body
for.cond.cleanup: ; preds = %for.body for.cond.cleanup: ; preds = %for.body
@ -146,11 +146,11 @@ for.cond.cleanup: ; preds = %for.body
for.body: ; preds = %for.body, %entry for.body: ; preds = %for.body, %entry
%i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ] %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%p.08 = phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ] %p.08 = phi i32 addrspace(5)* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
store i32 %i.09, i32* %p.08, align 4 store i32 %i.09, i32 addrspace(5)* %p.08, align 4
%incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1 %incdec.ptr = getelementptr inbounds i32, i32 addrspace(5)* %p.08, i32 1
%inc = add nuw nsw i32 %i.09, 1 %inc = add nuw nsw i32 %i.09, 1
%cmp = icmp eq i32* %incdec.ptr, %arrayidx1 %cmp = icmp eq i32 addrspace(5)* %incdec.ptr, %arrayidx1
br i1 %cmp, label %for.cond.cleanup, label %for.body br i1 %cmp, label %for.cond.cleanup, label %for.body
} }
@ -170,14 +170,14 @@ for.body: ; preds = %for.body, %entry
; CHECK-LABEL: @ptr_induction_var_alloca_unknown( ; CHECK-LABEL: @ptr_induction_var_alloca_unknown(
; CHECK: %alloca = alloca [64 x i32], align 4 ; CHECK: %alloca = alloca [64 x i32], align 4
; CHECK: %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ] ; CHECK: %p.08 = phi i32 addrspace(5)* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
; CHECK: %cmp = icmp eq i32* %incdec.ptr, %call ; CHECK: %cmp = icmp eq i32 addrspace(5)* %incdec.ptr, %call
define amdgpu_kernel void @ptr_induction_var_alloca_unknown() #0 { define amdgpu_kernel void @ptr_induction_var_alloca_unknown() #0 {
entry: entry:
%alloca = alloca [64 x i32], align 4 %alloca = alloca [64 x i32], align 4, addrspace(5)
%arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2 %arrayidx = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 2
%call = tail call i32* @get_unknown_pointer() #2 %call = tail call i32 addrspace(5)* @get_unknown_pointer() #2
%cmp.7 = icmp eq i32* %arrayidx, %call %cmp.7 = icmp eq i32 addrspace(5)* %arrayidx, %call
br i1 %cmp.7, label %for.cond.cleanup, label %for.body.preheader br i1 %cmp.7, label %for.cond.cleanup, label %for.body.preheader
for.body.preheader: ; preds = %entry for.body.preheader: ; preds = %entry
@ -191,14 +191,14 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo
for.body: ; preds = %for.body, %for.body.preheader for.body: ; preds = %for.body, %for.body.preheader
%i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ] %p.08 = phi i32 addrspace(5)* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
store i32 %i.09, i32* %p.08, align 4 store i32 %i.09, i32 addrspace(5)* %p.08, align 4
%incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1 %incdec.ptr = getelementptr inbounds i32, i32 addrspace(5)* %p.08, i32 1
%inc = add nuw nsw i32 %i.09, 1 %inc = add nuw nsw i32 %i.09, 1
%cmp = icmp eq i32* %incdec.ptr, %call %cmp = icmp eq i32 addrspace(5)* %incdec.ptr, %call
br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
} }
declare i32* @get_unknown_pointer() #0 declare i32 addrspace(5)* @get_unknown_pointer() #0
attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" } attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }

View File

@ -2,26 +2,26 @@
; CHECK-LABEL: @volatile_load( ; CHECK-LABEL: @volatile_load(
; CHECK: alloca [4 x i32] ; CHECK: alloca [4 x i32]
; CHECK: load volatile i32, i32* ; CHECK: load volatile i32, i32 addrspace(5)*
define amdgpu_kernel void @volatile_load(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { define amdgpu_kernel void @volatile_load(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry: entry:
%stack = alloca [4 x i32], align 4 %stack = alloca [4 x i32], align 4, addrspace(5)
%tmp = load i32, i32 addrspace(1)* %in, align 4 %tmp = load i32, i32 addrspace(1)* %in, align 4
%arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32]* %stack, i32 0, i32 %tmp %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 %tmp
%load = load volatile i32, i32* %arrayidx1 %load = load volatile i32, i32 addrspace(5)* %arrayidx1
store i32 %load, i32 addrspace(1)* %out store i32 %load, i32 addrspace(1)* %out
ret void ret void
} }
; CHECK-LABEL: @volatile_store( ; CHECK-LABEL: @volatile_store(
; CHECK: alloca [4 x i32] ; CHECK: alloca [4 x i32]
; CHECK: store volatile i32 %tmp, i32* ; CHECK: store volatile i32 %tmp, i32 addrspace(5)*
define amdgpu_kernel void @volatile_store(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { define amdgpu_kernel void @volatile_store(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry: entry:
%stack = alloca [4 x i32], align 4 %stack = alloca [4 x i32], align 4, addrspace(5)
%tmp = load i32, i32 addrspace(1)* %in, align 4 %tmp = load i32, i32 addrspace(1)* %in, align 4
%arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32]* %stack, i32 0, i32 %tmp %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 %tmp
store volatile i32 %tmp, i32* %arrayidx1 store volatile i32 %tmp, i32 addrspace(5)* %arrayidx1
ret void ret void
} }
@ -32,11 +32,11 @@ entry:
; CHECK: load volatile double ; CHECK: load volatile double
define amdgpu_kernel void @volatile_and_non_volatile_load(double addrspace(1)* nocapture %arg, i32 %arg1) #0 { define amdgpu_kernel void @volatile_and_non_volatile_load(double addrspace(1)* nocapture %arg, i32 %arg1) #0 {
bb: bb:
%tmp = alloca double, align 8 %tmp = alloca double, align 8, addrspace(5)
store double 0.000000e+00, double* %tmp, align 8 store double 0.000000e+00, double addrspace(5)* %tmp, align 8
%tmp4 = load double, double* %tmp, align 8 %tmp4 = load double, double addrspace(5)* %tmp, align 8
%tmp5 = load volatile double, double* %tmp, align 8 %tmp5 = load volatile double, double addrspace(5)* %tmp, align 8
store double %tmp4, double addrspace(1)* %arg store double %tmp4, double addrspace(1)* %arg
ret void ret void

View File

@ -4,34 +4,34 @@
; CHECK-LABEL: @test_insertelement( ; CHECK-LABEL: @test_insertelement(
; CHECK: %alloca = alloca i16 ; CHECK: %alloca = alloca i16
; CHECK-NEXT: insertelement <2 x i16*> undef, i16* %alloca, i32 0 ; CHECK-NEXT: insertelement <2 x i16 addrspace(5)*> undef, i16 addrspace(5)* %alloca, i32 0
define amdgpu_kernel void @test_insertelement() #0 { define amdgpu_kernel void @test_insertelement() #0 {
entry: entry:
%alloca = alloca i16, align 4 %alloca = alloca i16, align 4, addrspace(5)
%in = insertelement <2 x i16*> undef, i16* %alloca, i32 0 %in = insertelement <2 x i16 addrspace(5)*> undef, i16 addrspace(5)* %alloca, i32 0
store <2 x i16*> %in, <2 x i16*>* undef, align 4 store <2 x i16 addrspace(5)*> %in, <2 x i16 addrspace(5)*>* undef, align 4
ret void ret void
} }
; CHECK-LABEL: @test_insertvalue( ; CHECK-LABEL: @test_insertvalue(
; CHECK: %alloca = alloca i16 ; CHECK: %alloca = alloca i16
; CHECK-NEXT: insertvalue { i16* } undef, i16* %alloca, 0 ; CHECK-NEXT: insertvalue { i16 addrspace(5)* } undef, i16 addrspace(5)* %alloca, 0
define amdgpu_kernel void @test_insertvalue() #0 { define amdgpu_kernel void @test_insertvalue() #0 {
entry: entry:
%alloca = alloca i16, align 4 %alloca = alloca i16, align 4, addrspace(5)
%in = insertvalue { i16* } undef, i16* %alloca, 0 %in = insertvalue { i16 addrspace(5)* } undef, i16 addrspace(5)* %alloca, 0
store { i16* } %in, { i16* }* undef, align 4 store { i16 addrspace(5)* } %in, { i16 addrspace(5)* }* undef, align 4
ret void ret void
} }
; CHECK-LABEL: @test_insertvalue_array( ; CHECK-LABEL: @test_insertvalue_array(
; CHECK: %alloca = alloca i16 ; CHECK: %alloca = alloca i16
; CHECK-NEXT: insertvalue [2 x i16*] undef, i16* %alloca, 0 ; CHECK-NEXT: insertvalue [2 x i16 addrspace(5)*] undef, i16 addrspace(5)* %alloca, 0
define amdgpu_kernel void @test_insertvalue_array() #0 { define amdgpu_kernel void @test_insertvalue_array() #0 {
entry: entry:
%alloca = alloca i16, align 4 %alloca = alloca i16, align 4, addrspace(5)
%in = insertvalue [2 x i16*] undef, i16* %alloca, 0 %in = insertvalue [2 x i16 addrspace(5)*] undef, i16 addrspace(5)* %alloca, 0
store [2 x i16*] %in, [2 x i16*]* undef, align 4 store [2 x i16 addrspace(5)*] %in, [2 x i16 addrspace(5)*]* undef, align 4
ret void ret void
} }