forked from OSchip/llvm-project
[AMDGPU][DAG] Fix trunc/shift combine condition
The condition needs to be different for right-shifts, else we may lose information in some cases. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D136059
This commit is contained in:
parent
34020d39b8
commit
824dd811be
|
@ -3245,9 +3245,14 @@ SDValue AMDGPUTargetLowering::performTruncateCombine(
|
|||
Src.getOpcode() == ISD::SHL)) {
|
||||
SDValue Amt = Src.getOperand(1);
|
||||
KnownBits Known = DAG.computeKnownBits(Amt);
|
||||
unsigned Size = VT.getScalarSizeInBits();
|
||||
if ((Known.isConstant() && Known.getConstant().ule(Size)) ||
|
||||
(Known.countMaxActiveBits() <= Log2_32(Size))) {
|
||||
|
||||
// - For left shifts, do the transform as long as the shift
|
||||
// amount is still legal for i32, so when ShiftAmt < 32 (<= 31)
|
||||
// - For right shift, do it if ShiftAmt <= (32 - Size) to avoid
|
||||
// losing information stored in the high bits when truncating.
|
||||
const unsigned MaxCstSize =
|
||||
(Src.getOpcode() == ISD::SHL) ? 31 : (32 - VT.getScalarSizeInBits());
|
||||
if (Known.getMaxValue().ule(MaxCstSize)) {
|
||||
EVT MidVT = VT.isVector() ?
|
||||
EVT::getVectorVT(*DAG.getContext(), MVT::i32,
|
||||
VT.getVectorNumElements()) : MVT::i32;
|
||||
|
|
|
@ -128,8 +128,8 @@ define i16 @trunc_srl_i64_var_mask16_to_i16(i64 %x, i64 %amt) {
|
|||
; GCN-LABEL: trunc_srl_i64_var_mask16_to_i16:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_and_b32_e32 v2, 16, v2
|
||||
; GCN-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1]
|
||||
; GCN-NEXT: v_and_b32_e32 v1, 16, v2
|
||||
; GCN-NEXT: v_lshrrev_b32_e32 v0, v1, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%amt.masked = and i64 %amt, 16
|
||||
%shift = lshr i64 %x, %amt.masked
|
||||
|
@ -149,3 +149,19 @@ define i16 @trunc_srl_i64_var_mask31_to_i16(i64 %x, i64 %amt) {
|
|||
%trunc = trunc i64 %shift to i16
|
||||
ret i16 %trunc
|
||||
}
|
||||
|
||||
define i32 @trunc_srl_i64_25_to_i26(i64 %x) {
|
||||
; GCN-LABEL: trunc_srl_i64_25_to_i26:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 0xa000000, v0
|
||||
; GCN-NEXT: v_alignbit_b32 v0, 0, v0, 25
|
||||
; GCN-NEXT: v_add_u32_e32 v0, 55, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%value.knownbits2 = and i64 %x, 167772160 ; 0xA000000
|
||||
%shift = lshr i64 %value.knownbits2, 25
|
||||
%trunc = trunc i64 %shift to i26
|
||||
%add = add i26 %trunc, 55
|
||||
%ext = zext i26 %add to i32
|
||||
ret i32 %ext
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue