AMDGPU: Remove llvm.amdgcn.alignbit and handle bitcode upgrade to fshr
This commit is contained in:
parent
ea27adb45b
commit
82de129ab8
|
@ -1514,12 +1514,6 @@ def int_amdgcn_writelane :
|
||||||
[IntrNoMem, IntrConvergent, IntrWillReturn]
|
[IntrNoMem, IntrConvergent, IntrWillReturn]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// FIXME: Deprecated. This is equivalent to llvm.fshr
|
|
||||||
def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty],
|
|
||||||
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
|
||||||
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def int_amdgcn_alignbyte : GCCBuiltin<"__builtin_amdgcn_alignbyte">,
|
def int_amdgcn_alignbyte : GCCBuiltin<"__builtin_amdgcn_alignbyte">,
|
||||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||||
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
|
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
|
||||||
|
|
|
@ -727,6 +727,13 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||||
Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
|
Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
if (Name == "amdgcn.alignbit") {
|
||||||
|
// Target specific intrinsic became redundant
|
||||||
|
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
|
||||||
|
{F->getReturnType()});
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4083,7 +4083,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||||
case Intrinsic::amdgcn_mqsad_pk_u16_u8:
|
case Intrinsic::amdgcn_mqsad_pk_u16_u8:
|
||||||
case Intrinsic::amdgcn_mqsad_u32_u8:
|
case Intrinsic::amdgcn_mqsad_u32_u8:
|
||||||
case Intrinsic::amdgcn_cvt_pk_u8_f32:
|
case Intrinsic::amdgcn_cvt_pk_u8_f32:
|
||||||
case Intrinsic::amdgcn_alignbit:
|
|
||||||
case Intrinsic::amdgcn_alignbyte:
|
case Intrinsic::amdgcn_alignbyte:
|
||||||
case Intrinsic::amdgcn_perm:
|
case Intrinsic::amdgcn_perm:
|
||||||
case Intrinsic::amdgcn_fdot2:
|
case Intrinsic::amdgcn_fdot2:
|
||||||
|
|
|
@ -6941,9 +6941,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||||
DAG.getConstant(1, SL, MVT::i32));
|
DAG.getConstant(1, SL, MVT::i32));
|
||||||
return DAG.getSetCC(SL, MVT::i1, SrcHi, Aperture, ISD::SETEQ);
|
return DAG.getSetCC(SL, MVT::i1, SrcHi, Aperture, ISD::SETEQ);
|
||||||
}
|
}
|
||||||
case Intrinsic::amdgcn_alignbit:
|
|
||||||
return DAG.getNode(ISD::FSHR, DL, VT,
|
|
||||||
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
|
|
||||||
case Intrinsic::amdgcn_perm:
|
case Intrinsic::amdgcn_perm:
|
||||||
return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op.getOperand(1),
|
return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op.getOperand(1),
|
||||||
Op.getOperand(2), Op.getOperand(3));
|
Op.getOperand(2), Op.getOperand(3));
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
|
||||||
|
|
||||||
|
define i32 @user(i32 %a, i32 %b, i32 %c) {
|
||||||
|
; CHECK: %call = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
|
||||||
|
; CHECK-NOT: amdgcn.alignbit
|
||||||
|
%call = call i32 @llvm.amdgcn.alignbit(i32 %a, i32 %b, i32 %c)
|
||||||
|
ret i32 %call
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i32 @llvm.amdgcn.alignbit(i32, i32, i32)
|
||||||
|
; CHECK: declare i32 @llvm.fshr.i32(i32, i32, i32) #0
|
||||||
|
; CHECK-NOT: amdgcn.alignbit
|
|
@ -1,16 +1,7 @@
|
||||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||||
|
|
||||||
declare i32 @llvm.amdgcn.alignbit(i32, i32, i32) #0
|
|
||||||
declare i32 @llvm.amdgcn.alignbyte(i32, i32, i32) #0
|
declare i32 @llvm.amdgcn.alignbyte(i32, i32, i32) #0
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}v_alignbit_b32:
|
|
||||||
; GCN: v_alignbit_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}
|
|
||||||
define amdgpu_kernel void @v_alignbit_b32(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i32 %src3) #1 {
|
|
||||||
%val = call i32 @llvm.amdgcn.alignbit(i32 %src1, i32 %src2, i32 %src3) #0
|
|
||||||
store i32 %val, i32 addrspace(1)* %out
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}v_alignbyte_b32:
|
; GCN-LABEL: {{^}}v_alignbyte_b32:
|
||||||
; GCN: v_alignbyte_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}
|
; GCN: v_alignbyte_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}
|
||||||
define amdgpu_kernel void @v_alignbyte_b32(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i32 %src3) #1 {
|
define amdgpu_kernel void @v_alignbyte_b32(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i32 %src3) #1 {
|
Loading…
Reference in New Issue