Merge branch 'main' into eliminate_call_frame
This commit is contained in:
commit
965f8c1fb6
|
@ -142,6 +142,7 @@
|
|||
#include <clc/integer/abs_diff.h>
|
||||
#include <clc/integer/add_sat.h>
|
||||
#include <clc/integer/clz.h>
|
||||
#include <clc/integer/ctz.h>
|
||||
#include <clc/integer/hadd.h>
|
||||
#include <clc/integer/mad24.h>
|
||||
#include <clc/integer/mad_hi.h>
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
#define __CLC_BODY <clc/integer/ctz.inc>
|
||||
#include <clc/integer/gentype.inc>
|
|
@ -0,0 +1 @@
|
|||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE ctz(__CLC_GENTYPE x);
|
|
@ -1 +1,3 @@
|
|||
#define mad_hi(a, b, c) (mul_hi((a),(b))+(c))
|
||||
#define __CLC_BODY <clc/integer/mad_hi.inc>
|
||||
#include <clc/integer/gentype.inc>
|
||||
#undef __CLC_BODY
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad_hi(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z);
|
|
@ -67,9 +67,11 @@ integer/abs.cl
|
|||
integer/abs_diff.cl
|
||||
integer/add_sat.cl
|
||||
integer/clz.cl
|
||||
integer/ctz.cl
|
||||
integer/hadd.cl
|
||||
integer/mad24.cl
|
||||
integer/mad_sat.cl
|
||||
integer/mad_hi.cl
|
||||
integer/mul24.cl
|
||||
integer/mul_hi.cl
|
||||
integer/popcount.cl
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
#include <clc/clc.h>
|
||||
#include "../clcmacro.h"
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF char ctz(char x) {
|
||||
return x ? ctz((ushort)(uchar)x) : 8;
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DEF uchar ctz(uchar x) {
|
||||
return x ? ctz((ushort)x) : 8;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF short ctz(short x) {
|
||||
return x ? __builtin_ctzs(x) : 16;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF ushort ctz(ushort x) {
|
||||
return x ? __builtin_ctzs(x) : 16;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF int ctz(int x) {
|
||||
return x ? __builtin_ctz(x) : 32;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF uint ctz(uint x) {
|
||||
return x ? __builtin_ctz(x) : 32;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF long ctz(long x) {
|
||||
return x ? __builtin_ctzl(x) : 64;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF ulong ctz(ulong x) {
|
||||
return x ? __builtin_ctzl(x) : 64;
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, ctz, char)
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, ctz, uchar)
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, ctz, short)
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, ctz, ushort)
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ctz, int)
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, ctz, uint)
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, ctz, long)
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, ctz, ulong)
|
|
@ -0,0 +1,44 @@
|
|||
#include <clc/clc.h>
|
||||
#include "../clcmacro.h"
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF char mad_hi(char x, char y, char z) {
|
||||
return mul_hi(x, y) + z;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF uchar mad_hi(uchar x, uchar y, uchar z) {
|
||||
return mul_hi(x, y) + z;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF short mad_hi(short x, short y, short z) {
|
||||
return mul_hi(x, y) + z;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF ushort mad_hi(ushort x, ushort y, ushort z) {
|
||||
return mul_hi(x, y) + z;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF int mad_hi(int x, int y, int z) {
|
||||
return mul_hi(x, y) + z;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF uint mad_hi(uint x, uint y, uint z) {
|
||||
return mul_hi(x, y) + z;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF long mad_hi(long x, long y, long z) {
|
||||
return mul_hi(x, y) + z;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF ulong mad_hi(ulong x, ulong y, ulong z) {
|
||||
return mul_hi(x, y) + z;
|
||||
}
|
||||
|
||||
|
||||
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, mad_hi, char, char, char)
|
||||
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, mad_hi, uchar, uchar, uchar)
|
||||
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, mad_hi, short, short, short)
|
||||
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, mad_hi, ushort, ushort, ushort)
|
||||
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, mad_hi, int, int, int)
|
||||
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, mad_hi, uint, uint, uint)
|
||||
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, mad_hi, long, long, long)
|
||||
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, mad_hi, ulong, ulong, ulong)
|
|
@ -1,9 +1,9 @@
|
|||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z) {
|
||||
return (x > z ? z : (x < y ? y : x));
|
||||
return (x >= z ? z : (x <= y ? y : x));
|
||||
}
|
||||
|
||||
#ifndef __CLC_SCALAR
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_SCALAR_GENTYPE y, __CLC_SCALAR_GENTYPE z) {
|
||||
return (x > (__CLC_GENTYPE)z ? (__CLC_GENTYPE)z : (x < (__CLC_GENTYPE)y ? (__CLC_GENTYPE)y : x));
|
||||
return (x >= (__CLC_GENTYPE)z ? (__CLC_GENTYPE)z : (x <= (__CLC_GENTYPE)y ? (__CLC_GENTYPE)y : x));
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -22,32 +22,32 @@ double __floatdidf(di_int a) {
|
|||
const unsigned N = sizeof(di_int) * 8;
|
||||
const di_int s = a >> (N - 1);
|
||||
a = (a ^ s) - s;
|
||||
int sd = N - __builtin_clzll(a);
|
||||
int sd = N - clzl(a);
|
||||
int e = sd - 1;
|
||||
if (sd > 53) {
|
||||
if (sd > DBL_MANT_DIG) {
|
||||
|
||||
switch (sd) {
|
||||
case 53 + 1:
|
||||
case DBL_MANT_DIG + 1:
|
||||
a <<= 1;
|
||||
break;
|
||||
case 53 + 2:
|
||||
case DBL_MANT_DIG + 2:
|
||||
break;
|
||||
default:
|
||||
a = ((du_int)a >> (sd - (53 + 2))) |
|
||||
((a & ((du_int)(-1) >> ((N + 53 + 2) - sd))) != 0);
|
||||
a = ((du_int)a >> (sd - (DBL_MANT_DIG + 2))) |
|
||||
((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0);
|
||||
};
|
||||
|
||||
a |= (a & 4) != 0;
|
||||
++a;
|
||||
a >>= 2;
|
||||
|
||||
if (a & ((du_int)1 << 53)) {
|
||||
if (a & ((du_int)1 << DBL_MANT_DIG)) {
|
||||
a >>= 1;
|
||||
++e;
|
||||
}
|
||||
|
||||
} else {
|
||||
a <<= (53 - sd);
|
||||
a <<= (DBL_MANT_DIG - sd);
|
||||
}
|
||||
double_bits fb;
|
||||
fb.u.s.high = ((su_int)s & 0x80000000) | ((su_int)(e + 1023) << 20) |
|
||||
|
|
|
@ -22,32 +22,32 @@ float __floatdisf(di_int a) {
|
|||
const unsigned N = sizeof(di_int) * 8;
|
||||
const di_int s = a >> (N - 1);
|
||||
a = (a ^ s) - s;
|
||||
int sd = N - __builtin_clzll(a);
|
||||
int sd = N - clz64(a);
|
||||
si_int e = sd - 1;
|
||||
if (sd > 24) {
|
||||
if (sd > FLT_MANT_DIG) {
|
||||
|
||||
switch (sd) {
|
||||
case 24 + 1:
|
||||
case FLT_MANT_DIG + 1:
|
||||
a <<= 1;
|
||||
break;
|
||||
case 24 + 2:
|
||||
case FLT_MANT_DIG + 2:
|
||||
break;
|
||||
default:
|
||||
a = ((du_int)a >> (sd - (24 + 2))) |
|
||||
((a & ((du_int)(-1) >> ((N + 24 + 2) - sd))) != 0);
|
||||
a = ((du_int)a >> (sd - (FLT_MANT_DIG + 2))) |
|
||||
((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG + 2) - sd))) != 0);
|
||||
};
|
||||
|
||||
a |= (a & 4) != 0;
|
||||
++a;
|
||||
a >>= 2;
|
||||
|
||||
if (a & ((du_int)1 << 24)) {
|
||||
if (a & ((du_int)1 << FLT_MANT_DIG)) {
|
||||
a >>= 1;
|
||||
++e;
|
||||
}
|
||||
|
||||
} else {
|
||||
a <<= (24 - sd);
|
||||
a <<= (FLT_MANT_DIG - sd);
|
||||
}
|
||||
float_bits fb;
|
||||
fb.u =
|
||||
|
|
|
@ -22,30 +22,30 @@ double __floatundidf(du_int a) {
|
|||
const unsigned N = sizeof(du_int) * 8;
|
||||
int sd = N - __builtin_clzll(a);
|
||||
int e = sd - 1;
|
||||
if (sd > 53) {
|
||||
if (sd > DBL_MANT_DIG) {
|
||||
|
||||
switch (sd) {
|
||||
case 53 + 1:
|
||||
case DBL_MANT_DIG + 1:
|
||||
a <<= 1;
|
||||
break;
|
||||
case 53 + 2:
|
||||
case DBL_MANT_DIG + 2:
|
||||
break;
|
||||
default:
|
||||
a = (a >> (sd - (53 + 2))) |
|
||||
((a & ((du_int)(-1) >> ((N + 53 + 2) - sd))) != 0);
|
||||
a = (a >> (sd - (DBL_MANT_DIG + 2))) |
|
||||
((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0);
|
||||
};
|
||||
|
||||
a |= (a & 4) != 0;
|
||||
++a;
|
||||
a >>= 2;
|
||||
|
||||
if (a & ((du_int)1 << 53)) {
|
||||
if (a & ((du_int)1 << DBL_MANT_DIG)) {
|
||||
a >>= 1;
|
||||
++e;
|
||||
}
|
||||
|
||||
} else {
|
||||
a <<= (53 - sd);
|
||||
a <<= (DBL_MANT_DIG - sd);
|
||||
}
|
||||
double_bits fb;
|
||||
fb.u.s.high = ((su_int)(e + 1023) << 20) | ((su_int)(a >> 32) & 0x000FFFFF);
|
||||
|
|
|
@ -17,18 +17,19 @@
|
|||
|
||||
// seee eeee emmm mmmm mmmm mmmm mmmm mmmm
|
||||
|
||||
typedef union {
|
||||
int u;
|
||||
float f;
|
||||
} float_bits;
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
#include "types.h"
|
||||
|
||||
float __floatundisf(long a) {
|
||||
if (a == 0)
|
||||
return 0.0F;
|
||||
const unsigned N = sizeof(long) * 8;
|
||||
int sd = N - __builtin_clzll(a); // number of significant digits
|
||||
int sd = N - clz64(a); // number of significant digits
|
||||
int e = sd - 1; // 8 exponent
|
||||
if (sd > 24) {
|
||||
if (sd > FLT_MANT_DIG) {
|
||||
// start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
|
||||
// finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
|
||||
// 12345678901234567890123456
|
||||
|
@ -37,27 +38,27 @@ float __floatundisf(long a) {
|
|||
// Q = bit FLT_MANT_DIG bits to the right of 1
|
||||
// R = "or" of all bits to the right of Q
|
||||
switch (sd) {
|
||||
case 24 + 1:
|
||||
case FLT_MANT_DIG + 1:
|
||||
a <<= 1;
|
||||
break;
|
||||
case 24 + 2:
|
||||
case FLT_MANT_DIG + 2:
|
||||
break;
|
||||
default:
|
||||
a = (a >> (sd - (24 + 2))) |
|
||||
((a & ((long)(-1) >> ((N + 24+ 2) - sd))) != 0);
|
||||
a = (a >> (sd - (FLT_MANT_DIG + 2))) |
|
||||
((a & ((long)(-1) >> ((N + FLT_MANT_DIG+ 2) - sd))) != 0);
|
||||
};
|
||||
// finish:
|
||||
a |= (a & 4) != 0; // Or P into R
|
||||
++a; // round - this step may add a significant bit
|
||||
a >>= 2; // dump Q and R
|
||||
// a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits
|
||||
if (a & ((long)1 << 24)) {
|
||||
if (a & ((long)1 << FLT_MANT_DIG)) {
|
||||
a >>= 1;
|
||||
++e;
|
||||
}
|
||||
// a is now rounded to FLT_MANT_DIG bits
|
||||
} else {
|
||||
a <<= (24 - sd);
|
||||
a <<= (FLT_MANT_DIG - sd);
|
||||
// a is now rounded to FLT_MANT_DIG bits
|
||||
}
|
||||
float_bits fb;
|
||||
|
@ -65,3 +66,5 @@ float __floatundisf(long a) {
|
|||
((unsigned)a & 0x007FFFFF); // mantissa
|
||||
return fb.f;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
|
||||
#include <float.h>
|
||||
|
||||
typedef char char2 __attribute__((__ext_vector_type__(2)));
|
||||
typedef char char3 __attribute__((__ext_vector_type__(3)));
|
||||
typedef char char4 __attribute__((__ext_vector_type__(4)));
|
||||
|
@ -525,4 +527,43 @@ do { \
|
|||
(d) = __u.value; \
|
||||
} while (0)
|
||||
|
||||
|
||||
static int __attribute__((noinline)) clzl(unsigned long x)
|
||||
//static int inline clzl(unsigned long x)
|
||||
{
|
||||
for (int i = 0; i != 64; ++i)
|
||||
if ((x >> (63 - i)) & 1)
|
||||
return i;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ctz64(unsigned long x)
|
||||
{
|
||||
int r = 63;
|
||||
|
||||
x &= ~x + 1;
|
||||
if (x & 0x00000000FFFFFFFF) r -= 32;
|
||||
if (x & 0x0000FFFF0000FFFF) r -= 16;
|
||||
if (x & 0x00FF00FF00FF00FF) r -= 8;
|
||||
if (x & 0x0F0F0F0F0F0F0F0F) r -= 4;
|
||||
if (x & 0x3333333333333333) r -= 2;
|
||||
if (x & 0x5555555555555555) r -= 1;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int clz64(unsigned long x) {
|
||||
int r = 0;
|
||||
|
||||
if ((x & 0xFFFFFFFF00000000) == 0) r += 32, x <<= 32;
|
||||
if ((x & 0xFFFF000000000000) == 0) r += 16, x <<= 16;
|
||||
if ((x & 0xFF00000000000000) == 0) r += 8, x <<= 8;
|
||||
if ((x & 0xF000000000000000) == 0) r += 4, x <<= 4;
|
||||
if ((x & 0xC000000000000000) == 0) r += 2, x <<= 2;
|
||||
if ((x & 0x8000000000000000) == 0) r += 1, x <<= 1;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#endif // TYPES_H
|
||||
|
|
|
@ -2650,15 +2650,6 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
|
|||
case RISCV::PseudoLD:
|
||||
emitLoadStoreSymbol(Inst, RISCV::LD, IDLoc, Out, /*HasTmpReg=*/false);
|
||||
return false;
|
||||
case RISCV::PseudoFLH:
|
||||
emitLoadStoreSymbol(Inst, RISCV::FLH, IDLoc, Out, /*HasTmpReg=*/true);
|
||||
return false;
|
||||
case RISCV::PseudoFLW:
|
||||
emitLoadStoreSymbol(Inst, RISCV::FLW, IDLoc, Out, /*HasTmpReg=*/true);
|
||||
return false;
|
||||
case RISCV::PseudoFLD:
|
||||
emitLoadStoreSymbol(Inst, RISCV::FLD, IDLoc, Out, /*HasTmpReg=*/true);
|
||||
return false;
|
||||
case RISCV::PseudoSB:
|
||||
emitLoadStoreSymbol(Inst, RISCV::SB, IDLoc, Out, /*HasTmpReg=*/true);
|
||||
return false;
|
||||
|
@ -2674,12 +2665,6 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
|
|||
case RISCV::PseudoFSH:
|
||||
emitLoadStoreSymbol(Inst, RISCV::FSH, IDLoc, Out, /*HasTmpReg=*/true);
|
||||
return false;
|
||||
case RISCV::PseudoFSW:
|
||||
emitLoadStoreSymbol(Inst, RISCV::FSW, IDLoc, Out, /*HasTmpReg=*/true);
|
||||
return false;
|
||||
case RISCV::PseudoFSD:
|
||||
emitLoadStoreSymbol(Inst, RISCV::FSD, IDLoc, Out, /*HasTmpReg=*/true);
|
||||
return false;
|
||||
case RISCV::PseudoAddTPRel:
|
||||
if (checkPseudoAddTPRel(Inst, Operands))
|
||||
return true;
|
||||
|
|
|
@ -99,6 +99,9 @@ enum {
|
|||
// Check if this instruction meets the format of RVInstVV
|
||||
IsVVALUInstrShift = UsesMaskPolicyShift + 1,
|
||||
IsVVALUInstrMask = 1 << IsVVALUInstrShift,
|
||||
|
||||
IsVOPIMM11Shift = IsVVALUInstrShift + 1,
|
||||
IsVOPIMM11Mask = 1 << IsVOPIMM11Shift,
|
||||
};
|
||||
|
||||
// Match with the definitions in RISCVInstrFormats.td
|
||||
|
@ -142,6 +145,10 @@ static inline bool isVVALUInstr(uint64_t TSFlags) {
|
|||
return TSFlags & IsVVALUInstrMask;
|
||||
}
|
||||
|
||||
static inline bool isVOPIMM11(uint64_t TSFlags) {
|
||||
return TSFlags & IsVOPIMM11Mask;
|
||||
}
|
||||
|
||||
/// \returns true if tail agnostic is enforced for the instruction.
|
||||
static inline bool doesForceTailAgnostic(uint64_t TSFlags) {
|
||||
return TSFlags & ForceTailAgnosticMask;
|
||||
|
|
|
@ -117,7 +117,8 @@ void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) {
|
|||
LowerHWASAN_CHECK_MEMACCESS(*MI);
|
||||
return;
|
||||
}
|
||||
|
||||
if (MI->getOpcode() == RISCV::PseudoVXOR_VI_IMM11)
|
||||
return;
|
||||
if (!lowerRISCVMachineInstrToMCInst(MI, TmpInst, *this))
|
||||
EmitToStreamer(*OutStreamer, TmpInst);
|
||||
}
|
||||
|
@ -197,8 +198,8 @@ bool RISCVAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
|||
NewSTI.setFeatureBits(MF.getSubtarget().getFeatureBits());
|
||||
MCSTI = &NewSTI;
|
||||
STI = &MF.getSubtarget<RISCVSubtarget>();
|
||||
auto *CurrentProgramInfo = const_cast<VentusProgramInfo*>(
|
||||
STI->getVentusProgramInfo());
|
||||
auto *CurrentProgramInfo =
|
||||
const_cast<VentusProgramInfo *>(STI->getVentusProgramInfo());
|
||||
if (MF.getInfo<RISCVMachineFunctionInfo>()->isEntryFunction()) {
|
||||
MCSectionELF *ResourceSection = OutContext.getELFSection(
|
||||
".ventus.resource", ELF::SHT_PROGBITS, ELF::SHF_WRITE);
|
||||
|
|
|
@ -53,6 +53,8 @@ private:
|
|||
bool expandCompareSelect(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
bool expandVIIMM11(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI);
|
||||
};
|
||||
|
||||
char RISCVExpandPseudo::ID = 0;
|
||||
|
@ -85,6 +87,9 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
|||
// RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
|
||||
// expanded instructions for each pseudo is correct in the Size field of the
|
||||
// tablegen definition for the pseudo.
|
||||
if (RISCVII::isVOPIMM11(MBBI->getDesc().TSFlags))
|
||||
return expandVIIMM11(MBB, MBBI);
|
||||
|
||||
switch (MBBI->getOpcode()) {
|
||||
case RISCV::PseudoCCMOVGPR:
|
||||
return expandCCOp(MBB, MBBI, NextMBBI);
|
||||
|
@ -100,6 +105,101 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
|||
return false;
|
||||
}
|
||||
|
||||
bool RISCVExpandPseudo::expandVIIMM11(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI) {
|
||||
const TargetRegisterInfo *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
|
||||
const MCInstrDesc *MCID = nullptr;
|
||||
|
||||
switch (MBBI->getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable("Please add IMM11 Pseudo case here!");
|
||||
case RISCV::PseudoVOR_VI_IMM11:
|
||||
MCID = &TII->get(RISCV::VOR_VI);
|
||||
break;
|
||||
case RISCV::PseudoVXOR_VI_IMM11:
|
||||
MCID = &TII->get(RISCV::VXOR_VI);
|
||||
break;
|
||||
case RISCV::PseudoVRSUB_VI_IMM11:
|
||||
MCID = &TII->get(RISCV::VRSUB_VI);
|
||||
break;
|
||||
case RISCV::PseudoVAND_VI_IMM11:
|
||||
MCID = &TII->get(RISCV::VAND_VI);
|
||||
break;
|
||||
case RISCV::PseudoVMSNE_VI_IMM11:
|
||||
MCID = &TII->get(RISCV::VMSNE_VI);
|
||||
break;
|
||||
case RISCV::PseudoVMSEQ_VI_IMM11:
|
||||
MCID = &TII->get(RISCV::VMSEQ_VI);
|
||||
break;
|
||||
}
|
||||
|
||||
assert(MCID && "Unexpected opcode");
|
||||
MBBI->setDesc(*MCID);
|
||||
|
||||
int64_t Imm = 0;
|
||||
signed LowImm = 0;
|
||||
signed HighImm = 0;
|
||||
signed Offsets = 0;
|
||||
signed TmpImm = 0;
|
||||
|
||||
for (unsigned i = 0; i < MBBI->getNumOperands(); ++i) {
|
||||
MachineOperand &Op = MBBI->getOperand(i);
|
||||
|
||||
if (Op.isImm()) {
|
||||
Imm = Op.getImm();
|
||||
assert((Imm <= 1023 && Imm >= -1024) && "imm not in Imm11 range!");
|
||||
|
||||
if (Imm >= 0) {
|
||||
Imm &= 0b01111111111;
|
||||
}
|
||||
else {
|
||||
Imm = -Imm;
|
||||
Imm = ~Imm + 1;
|
||||
Imm &= 0b01111111111;
|
||||
Imm |= 0b10000000000;
|
||||
}
|
||||
|
||||
LowImm = Imm & 0b00000011111;
|
||||
TmpImm = ~LowImm + 1;
|
||||
TmpImm &= 0b01111;
|
||||
LowImm = (LowImm & 0b10000) ? (TmpImm ? -TmpImm : -15) : LowImm;
|
||||
|
||||
HighImm = (Imm & 0b11111100000) >> 5;
|
||||
TmpImm = ~HighImm + 1;
|
||||
TmpImm &= 0b011111;
|
||||
HighImm = (HighImm & 0b100000) ? (TmpImm ? -TmpImm : -31) : HighImm;
|
||||
|
||||
Op.ChangeToImmediate(LowImm);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!Op.isReg() || MBBI->getDesc().getOperandConstraint(i, MCOI::TIED_TO) != -1)
|
||||
continue;
|
||||
|
||||
// deal with register numbers larger than 32.
|
||||
if (Op.isReg() &&
|
||||
MBBI->getDesc().getOperandConstraint(i, MCOI::TIED_TO) == -1) {
|
||||
uint16_t RegEncodingValue = TRI->getEncodingValue(Op.getReg());
|
||||
if (RegEncodingValue > 31) {
|
||||
int Pos = MBBI->getDesc().getOperandConstraint(i, MCOI::CUSTOM);
|
||||
assert(Pos != -1 && "Out of range[0, 31] register operand custom "
|
||||
"constraint that must be present.");
|
||||
assert(Pos != 1 && Pos != 3 && "Unexpected Pos!");
|
||||
Offsets |= (RegEncodingValue >> 5 & 0x7) << (3 * (Pos == 2 ? 1 : Pos));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DebugLoc DL = MBBI->getDebugLoc();
|
||||
// Create instruction to expand imm5 or register basic offset as imm * 32.
|
||||
BuildMI(MBB, MBBI, DL, TII->get(RISCV::REGEXTI), RISCV::X0)
|
||||
.addReg(RISCV::X0)
|
||||
.addImm((HighImm << 6) | Offsets);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RISCVExpandPseudo::expandBarrier(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
|
|
|
@ -7459,6 +7459,46 @@ SDValue RISCVTargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
|
|||
return DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Offset));
|
||||
}
|
||||
|
||||
SDValue RISCVTargetLowering::getFPExtOrFPRound(SelectionDAG &DAG,
|
||||
SDValue Op,
|
||||
const SDLoc &DL,
|
||||
EVT VT) const {
|
||||
return Op.getValueType().bitsLE(VT) ?
|
||||
DAG.getNode(ISD::FP_EXTEND, DL, VT, Op) :
|
||||
DAG.getNode(ISD::FP_ROUND, DL, VT, Op,
|
||||
DAG.getTargetConstant(0, DL, MVT::i32));
|
||||
}
|
||||
|
||||
SDValue RISCVTargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
|
||||
const SDLoc &SL, SDValue Val,
|
||||
bool Signed,
|
||||
const ISD::InputArg *Arg) const {
|
||||
// First, if it is a widened vector, narrow it.
|
||||
if (VT.isVector() &&
|
||||
VT.getVectorNumElements() != MemVT.getVectorNumElements()) {
|
||||
EVT NarrowedVT =
|
||||
EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(),
|
||||
VT.getVectorNumElements());
|
||||
Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, NarrowedVT, Val,
|
||||
DAG.getConstant(0, SL, MVT::i32));
|
||||
}
|
||||
|
||||
// Then convert the vector elements or scalar value.
|
||||
if (Arg && (Arg->Flags.isSExt() || Arg->Flags.isZExt()) &&
|
||||
VT.bitsLT(MemVT)) {
|
||||
unsigned Opc = Arg->Flags.isZExt() ? ISD::AssertZext : ISD::AssertSext;
|
||||
Val = DAG.getNode(Opc, SL, MemVT, Val, DAG.getValueType(VT));
|
||||
}
|
||||
|
||||
if (MemVT.isFloatingPoint())
|
||||
Val = getFPExtOrFPRound(DAG, Val, SL, VT);
|
||||
else if (Signed)
|
||||
Val = DAG.getSExtOrTrunc(Val, SL, VT);
|
||||
else
|
||||
Val = DAG.getZExtOrTrunc(Val, SL, VT);
|
||||
|
||||
return Val;
|
||||
}
|
||||
|
||||
SDValue RISCVTargetLowering::lowerKernargMemParameter(
|
||||
SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain,
|
||||
|
@ -7489,7 +7529,7 @@ SDValue RISCVTargetLowering::lowerKernargMemParameter(
|
|||
SDValue ArgVal = DAG.getNode(ISD::TRUNCATE, SL, IntVT, Extract);
|
||||
ArgVal = DAG.getNode(ISD::BITCAST, SL, MemVT, ArgVal);
|
||||
// TODO: Support vector and half type.
|
||||
//ArgVal = convertArgType(DAG, VT, MemVT, SL, ArgVal, Signed, Arg);
|
||||
ArgVal = convertArgType(DAG, VT, MemVT, SL, ArgVal, Signed, Arg);
|
||||
|
||||
return DAG.getMergeValues({ ArgVal, Load.getValue(1) }, SL);
|
||||
}
|
||||
|
@ -7499,8 +7539,9 @@ SDValue RISCVTargetLowering::lowerKernargMemParameter(
|
|||
MachineMemOperand::MODereferenceable |
|
||||
MachineMemOperand::MOInvariant);
|
||||
|
||||
// SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load, Signed, Arg);
|
||||
return DAG.getMergeValues({ Load, Load.getValue(1) }, SL);
|
||||
SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load, Signed, Arg);
|
||||
// return DAG.getMergeValues({ Load, Load.getValue(1) }, SL);
|
||||
return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
|
||||
}
|
||||
|
||||
// Returns the opcode of the target-specific SDNode that implements the 32-bit
|
||||
|
@ -11582,15 +11623,20 @@ void RISCVTargetLowering::analyzeFormalArgumentsCompute(MachineFunction &MF,
|
|||
const bool IsByRef = Arg.hasByRefAttr();
|
||||
Type *BaseArgTy = Arg.getType();
|
||||
Type *MemArgTy = IsByRef ? Arg.getParamByRefType() : BaseArgTy;
|
||||
|
||||
uint64_t AllocSize = DL.getTypeAllocSize(MemArgTy);
|
||||
IntegerType *ArgIntTy = IntegerType::get(Ctx, 32);
|
||||
bool IsSmall = (AllocSize < 4);
|
||||
|
||||
Align Alignment = DL.getValueOrABITypeAlignment(
|
||||
IsByRef ? Arg.getParamAlign() : std::nullopt, MemArgTy);
|
||||
IsByRef ? Arg.getParamAlign() : std::nullopt, IsSmall ? ArgIntTy : MemArgTy);
|
||||
ArgOffset = alignTo(ArgOffset, Alignment);
|
||||
|
||||
SmallVector<EVT, 16> ValueVTs;
|
||||
SmallVector<uint64_t, 16> Offsets;
|
||||
ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset);
|
||||
|
||||
ArgOffset += DL.getTypeAllocSize(MemArgTy);
|
||||
ArgOffset += AllocSize;
|
||||
|
||||
for (unsigned Value = 0, NumValues = ValueVTs.size();
|
||||
Value != NumValues; ++Value) {
|
||||
|
|
|
@ -722,6 +722,13 @@ private:
|
|||
SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
|
||||
SDValue Chain, uint64_t Offset) const;
|
||||
SDValue getFPExtOrFPRound(SelectionDAG &DAG, SDValue Op,
|
||||
const SDLoc &DL,
|
||||
EVT VT) const;
|
||||
SDValue convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
|
||||
const SDLoc &SL, SDValue Val,
|
||||
bool Signed,
|
||||
const ISD::InputArg *Arg) const;
|
||||
SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
|
||||
const SDLoc &SL, SDValue Chain,
|
||||
uint64_t Offset, Align Alignment,
|
||||
|
|
|
@ -208,6 +208,9 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr,
|
|||
|
||||
bit IsVVALUInstr = 0;
|
||||
let TSFlags{19} = IsVVALUInstr;
|
||||
|
||||
bit IsVOPIMM11 = 0;
|
||||
let TSFlags{20} = IsVOPIMM11;
|
||||
}
|
||||
|
||||
// Pseudo instructions
|
||||
|
|
|
@ -75,12 +75,9 @@ unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
|
|||
case RISCV::LBU:
|
||||
case RISCV::LH:
|
||||
case RISCV::LHU:
|
||||
case RISCV::FLH:
|
||||
case RISCV::LW:
|
||||
case RISCV::FLW:
|
||||
case RISCV::LWU:
|
||||
case RISCV::LD:
|
||||
case RISCV::FLD:
|
||||
case RISCV::VLW:
|
||||
case RISCV::VLH:
|
||||
case RISCV::VLB:
|
||||
|
@ -123,10 +120,7 @@ unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
|
|||
case RISCV::SB:
|
||||
case RISCV::SH:
|
||||
case RISCV::SW:
|
||||
case RISCV::FSH:
|
||||
case RISCV::FSW:
|
||||
case RISCV::SD:
|
||||
case RISCV::FSD:
|
||||
case RISCV::VSW:
|
||||
case RISCV::VSH:
|
||||
break;
|
||||
|
@ -184,10 +178,7 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
// sGPRF32 -> vGPR move
|
||||
if (RISCV::GPRF32RegClass.contains(SrcReg) &&
|
||||
RISCV::VGPRRegClass.contains(DstReg)) {
|
||||
BuildMI(MBB, MBBI, DL, get(RISCV::VFMV_S_F), DstReg)
|
||||
.addReg(DstReg, RegState::Undef)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
return;
|
||||
llvm_unreachable("Not supported by HW, use vmv.v.x instead.");
|
||||
}
|
||||
|
||||
// Handle copy from csr
|
||||
|
@ -238,10 +229,6 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
|||
if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
|
||||
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
|
||||
RISCV::SW : RISCV::SD;
|
||||
} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
|
||||
Opcode = RISCV::FSH;
|
||||
} else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
|
||||
Opcode = RISCV::FSD;
|
||||
} else if (RISCV::VGPRRegClass.hasSubClassEq(RC)) {
|
||||
Opcode = RISCV::VSW;
|
||||
} else
|
||||
|
@ -279,10 +266,6 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|||
if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
|
||||
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
|
||||
RISCV::LW : RISCV::LD;
|
||||
} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
|
||||
Opcode = RISCV::FLH;
|
||||
} else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
|
||||
Opcode = RISCV::FLD;
|
||||
} else if (RISCV::VGPRRegClass.hasSubClassEq(RC)) {
|
||||
Opcode = RISCV::VLW;
|
||||
} else
|
||||
|
|
|
@ -103,12 +103,9 @@ static unsigned log2LdstWidth(unsigned Opcode) {
|
|||
llvm_unreachable("Unexpected opcode");
|
||||
case RISCV::LW:
|
||||
case RISCV::SW:
|
||||
case RISCV::FLW:
|
||||
case RISCV::FSW:
|
||||
return 2;
|
||||
case RISCV::LD:
|
||||
case RISCV::SD:
|
||||
case RISCV::FLD:
|
||||
case RISCV::FSD:
|
||||
return 3;
|
||||
}
|
||||
|
@ -147,8 +144,7 @@ static bool isCompressibleLoad(const MachineInstr &MI) {
|
|||
const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
|
||||
const unsigned Opcode = MI.getOpcode();
|
||||
|
||||
return Opcode == RISCV::LW || (!STI.is64Bit() && Opcode == RISCV::FLW) ||
|
||||
Opcode == RISCV::LD || Opcode == RISCV::FLD;
|
||||
return Opcode == RISCV::LW || Opcode == RISCV::LD;
|
||||
}
|
||||
|
||||
// Return true if MI is a store for which there exists a compressed version.
|
||||
|
@ -156,8 +152,7 @@ static bool isCompressibleStore(const MachineInstr &MI) {
|
|||
const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
|
||||
const unsigned Opcode = MI.getOpcode();
|
||||
|
||||
return Opcode == RISCV::SW || (!STI.is64Bit() && Opcode == RISCV::FSW) ||
|
||||
Opcode == RISCV::SD || Opcode == RISCV::FSD;
|
||||
return Opcode == RISCV::SW || Opcode == RISCV::SD;
|
||||
}
|
||||
|
||||
// Find a single register and/or large offset which, if compressible, would
|
||||
|
|
|
@ -355,16 +355,10 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
|
|||
case RISCV::LHU:
|
||||
case RISCV::LWU:
|
||||
case RISCV::LD:
|
||||
case RISCV::FLH:
|
||||
case RISCV::FLW:
|
||||
case RISCV::FLD:
|
||||
case RISCV::SB:
|
||||
case RISCV::SH:
|
||||
case RISCV::SW:
|
||||
case RISCV::SD:
|
||||
case RISCV::FSH:
|
||||
case RISCV::FSW:
|
||||
case RISCV::FSD: {
|
||||
case RISCV::SD: {
|
||||
if (UseMI.getOperand(1).isFI())
|
||||
return false;
|
||||
// Register defined by Lo should not be the value register.
|
||||
|
|
|
@ -62,6 +62,27 @@ class RVInstSetVLi<dag outs, dag ins, string opcodestr, string argstr>
|
|||
let Opcode = OPC_OP_V.Value;
|
||||
}
|
||||
|
||||
// VENTUS VOP VI instructions with 11 bits immediate number
|
||||
class PseudoVOPVIIMM11<SDPatternOperator Op> :
|
||||
Pseudo<(outs VGPR:$vd), (ins VGPR:$vs2, simm11:$imm11),
|
||||
[(set (XLenVT VGPR:$vd), (Op (XLenVT VGPR:$vs2), simm11:$imm11))]> {
|
||||
// let usesCustomInserter = 1;
|
||||
let hasSideEffects = 1;
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let IsVOPIMM11 = 1;
|
||||
}
|
||||
|
||||
class PseudoVOPIVIMM11<SDPatternOperator Op> :
|
||||
Pseudo<(outs VGPR:$vd), (ins VGPR:$vs2, simm11:$imm11),
|
||||
[(set (XLenVT VGPR:$vd), (Op simm11:$imm11, (XLenVT VGPR:$vs2)))]> {
|
||||
// let usesCustomInserter = 1;
|
||||
let hasSideEffects = 1;
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let IsVOPIMM11 = 1;
|
||||
}
|
||||
|
||||
class RVInstVV<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
|
||||
string opcodestr, string argstr>
|
||||
: RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
|
||||
|
|
|
@ -341,11 +341,13 @@ class PseudoVFROUND<RegisterClass Ty>
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [HasStdExtZfinx] in {
|
||||
def FLW : FPLoad_r<0b010, "flw", GPRF32, WriteFLD32>;
|
||||
// Operands for stores are in the order srcreg, base, offset rather than
|
||||
// reflecting the order these fields are specified in the instruction
|
||||
// encoding.
|
||||
def FSW : FPStore_r<0b010, "fsw", GPRF32, WriteFST32>;
|
||||
/// Loads
|
||||
def : Pat<(f32 (load (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))),
|
||||
(COPY_TO_REGCLASS (LW GPR:$rs1, simm12:$imm12), GPRF32)>;
|
||||
|
||||
/// Stores
|
||||
def : Pat<(store (f32 FPR32INX:$rs2), (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12)),
|
||||
(SW (COPY_TO_REGCLASS FPR32INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>;
|
||||
} // Predicates = [HasStdExtZfinx]
|
||||
|
||||
let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in {
|
||||
|
@ -450,8 +452,8 @@ defm : FPUnaryOpDynFrmAlias_m<FCVT_S_LU, "fcvt.s.lu", FXIN64X>;
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [HasStdExtZfinx] in {
|
||||
def : InstAlias<"flw $rd, (${rs1})", (FLW GPRF32:$rd, GPR:$rs1, 0), 0>;
|
||||
def : InstAlias<"fsw $rs2, (${rs1})", (FSW GPRF32:$rs2, GPR:$rs1, 0), 0>;
|
||||
// def : InstAlias<"flw $rd, (${rs1})", (FLW GPRF32:$rd, GPR:$rs1, 0), 0>;
|
||||
// def : InstAlias<"fsw $rs2, (${rs1})", (FSW GPRF32:$rs2, GPR:$rs1, 0), 0>;
|
||||
|
||||
def : InstAlias<"fmv.s $rd, $rs", (FSGNJ_S GPRF32:$rd, GPRF32:$rs, GPRF32:$rs)>;
|
||||
def : InstAlias<"fabs.s $rd, $rs", (FSGNJX_S GPRF32:$rd, GPRF32:$rs, GPRF32:$rs)>;
|
||||
|
@ -636,13 +638,13 @@ defm Select_FPR32 : SelectCC_GPR_rrirr<GPRF32>;
|
|||
def PseudoVFROUND_S : PseudoVFROUND<VGPR>;
|
||||
def PseudoFROUND_S : PseudoFROUND<GPRF32>;
|
||||
|
||||
/// Loads
|
||||
// /// Loads
|
||||
|
||||
defm : UniformLdPat<load, FLW, f32>;
|
||||
// defm : UniformLdPat<load, FLW, f32>;
|
||||
|
||||
/// Stores
|
||||
// /// Stores
|
||||
|
||||
defm : UniformStPat<store, FSW, GPRF32, f32>;
|
||||
// defm : UniformStPat<store, FSW, GPRF32, f32>;
|
||||
|
||||
} // Predicates = [HasStdExtZfinx]
|
||||
|
||||
|
|
|
@ -128,8 +128,8 @@ multiclass SleOpePatVXIBin<list<PatFrags> Ops, list<RVInst> Insts,
|
|||
|
||||
// Setcc pattern for interger operations
|
||||
// FIXME: this pattern class can substitude the multiclass above
|
||||
// class PatIntSetCC<list<DAGOperand> Ty, CondCode Cond, RVInst Inst>
|
||||
// : Pat<(setcc (XLenVT Ty[0]:$rs1), (XLenVT Ty[1]:$rs2), Cond), (i32 (Inst Ty[0]:$rs1, Ty[1]:$rs2))>;
|
||||
class PatIntSetCC<list<DAGOperand> Ty, CondCode Cond, RVInst Inst>
|
||||
: Pat<(setcc (XLenVT Ty[0]:$rs1), (XLenVT Ty[1]:$rs2), Cond), (i32 (Inst Ty[0]:$rs1, Ty[1]:$rs2))>;
|
||||
// Setcc pattern for float operations
|
||||
multiclass PatFloatSetCC<list<DAGOperand> Ty, list<CondCode> Conds, RVInst Inst>
|
||||
{
|
||||
|
@ -1134,8 +1134,9 @@ defm VFCVT_XU_F_V : VCVTI_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>;
|
|||
defm VFCVT_X_F_V : VCVTI_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>;
|
||||
}
|
||||
// Follow the way by RISCVInstrInfoF
|
||||
defm VFCVT_RTZ_XU_F_V : VCVTI_FV_VS2_FRM<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>;
|
||||
defm VFCVT_RTZ_X_F_V : VCVTI_FV_VS2_FRM<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>;
|
||||
// TODO: later support
|
||||
// defm VFCVT_RTZ_XU_F_V : VCVTI_FV_VS2_FRM<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>;
|
||||
// defm VFCVT_RTZ_X_F_V : VCVTI_FV_VS2_FRM<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>;
|
||||
let Uses = [FRM] in {
|
||||
defm VFCVT_F_XU_V : VCVTF_IV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>;
|
||||
defm VFCVT_F_X_V : VCVTF_IV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>;
|
||||
|
@ -1275,16 +1276,16 @@ def : DivergentNonPriStPat<truncstorei16, VSUXEI16>;
|
|||
def : DivergentNonPriStPat<store, VSUXEI32>;
|
||||
|
||||
// FIXME: check this review: https://reviews.llvm.org/D131729#inline-1269307
|
||||
// def : PatIntSetCC<[VGPR, VGPR], SETLE, VMSLE_VV>;
|
||||
// def : PatIntSetCC<[VGPR, GPR], SETLE, VMSLE_VX>;
|
||||
// def : PatIntSetCC<[VGPR, uimm5], SETLE, VMSLE_VI>;
|
||||
// def : PatIntSetCC<[VGPR, GPR], SETGT, VMSGT_VX>;
|
||||
// def : PatIntSetCC<[VGPR, uimm5], SETGT, VMSGT_VI>;
|
||||
// def : PatIntSetCC<[VGPR, VGPR], SETULE, VMSLEU_VV>;
|
||||
// def : PatIntSetCC<[VGPR, GPR], SETULE, VMSLEU_VX>;
|
||||
// def : PatIntSetCC<[VGPR, uimm5], SETULE, VMSLEU_VI>;
|
||||
// def : PatIntSetCC<[VGPR, GPR], SETUGT, VMSGTU_VX>;
|
||||
// def : PatIntSetCC<[VGPR, uimm5], SETUGT, VMSGTU_VI>;
|
||||
def : PatIntSetCC<[VGPR, VGPR], SETLE, VMSLE_VV>;
|
||||
def : PatIntSetCC<[VGPR, GPR], SETLE, VMSLE_VX>;
|
||||
def : PatIntSetCC<[VGPR, uimm5], SETLE, VMSLE_VI>;
|
||||
def : PatIntSetCC<[VGPR, GPR], SETGT, VMSGT_VX>;
|
||||
def : PatIntSetCC<[VGPR, uimm5], SETGT, VMSGT_VI>;
|
||||
def : PatIntSetCC<[VGPR, VGPR], SETULE, VMSLEU_VV>;
|
||||
def : PatIntSetCC<[VGPR, GPR], SETULE, VMSLEU_VX>;
|
||||
def : PatIntSetCC<[VGPR, uimm5], SETULE, VMSLEU_VI>;
|
||||
def : PatIntSetCC<[VGPR, GPR], SETUGT, VMSGTU_VX>;
|
||||
def : PatIntSetCC<[VGPR, uimm5], SETUGT, VMSGTU_VI>;
|
||||
|
||||
defm : PatVXIBin<DivergentBinFrag<smin>, [VMIN_VV, VMIN_VX]>;
|
||||
defm : PatVXIBin<DivergentBinFrag<umin>, [VMINU_VV, VMINU_VX]>;
|
||||
|
@ -1316,12 +1317,6 @@ def : Pat<(sub GPR:$rs1, VGPR:$rs2), (VRSUB_VX VGPR:$rs2, GPR:$rs1)>;
|
|||
def : Pat<(XLenVT (sub simm5:$imm, (XLenVT VGPR:$rs1))),
|
||||
(VRSUB_VI VGPR:$rs1, simm5:$imm)>;
|
||||
|
||||
// For now, some instructions are aliaed to other instructions
|
||||
defm : SleOpePatVXIBin<[DivergentBinFrag<xor>, DivergentBinFrag<setlt>],
|
||||
[VMSLE_VV, VMSLE_VX, VMSGT_VI], simm5>;
|
||||
defm : SleOpePatVXIBin<[DivergentBinFrag<xor>, DivergentBinFrag<setult>],
|
||||
[VMSLEU_VV, VMSLEU_VX, VMSGTU_VI], uimm5>;
|
||||
|
||||
|
||||
defm : PatFloatSetCC<[VGPR, VGPR], [SETOEQ, SETEQ], VMFEQ_VV>;
|
||||
defm : PatFloatSetCC<[VGPR, GPRF32], [SETOEQ, SETEQ], VMFEQ_VF>;
|
||||
|
@ -1336,10 +1331,10 @@ defm : PatFloatSetCC<[VGPR, GPRF32], [SETOLE, SETLE], VMFLE_VV>;
|
|||
defm : PatFloatSetCC<[VGPR, GPRF32], [SETOGT, SETGT], VMFGT_VF>;
|
||||
defm : PatFloatSetCC<[VGPR, GPRF32], [SETOGE, SETGE], VMFGE_VF>;
|
||||
|
||||
def : Pat<(i32 (DivergentBinFrag<riscv_fcvt_x> (f32 VGPR:$rs1), timm:$frm)),
|
||||
(VFCVT_RTZ_X_F_V (f32 VGPR:$rs1), $frm)>;
|
||||
def : Pat<(i32 (DivergentBinFrag<riscv_fcvt_xu> (f32 VGPR:$rs1), timm:$frm)),
|
||||
(VFCVT_RTZ_XU_F_V (f32 VGPR:$rs1), $frm)>;
|
||||
// def : Pat<(i32 (DivergentBinFrag<riscv_fcvt_x> (f32 VGPR:$rs1), timm:$frm)),
|
||||
// (VFCVT_RTZ_X_F_V (f32 VGPR:$rs1), $frm)>;
|
||||
// def : Pat<(i32 (DivergentBinFrag<riscv_fcvt_xu> (f32 VGPR:$rs1), timm:$frm)),
|
||||
// (VFCVT_RTZ_XU_F_V (f32 VGPR:$rs1), $frm)>;
|
||||
def : PatFXConvert<DivergentUnaryFrag<any_fp_to_sint>,
|
||||
[XLenVT, f32], VFCVT_X_F_V>;
|
||||
def : PatFXConvert<DivergentUnaryFrag<any_fp_to_uint>,
|
||||
|
@ -1485,6 +1480,14 @@ foreach n = [1] in {
|
|||
// TODO: Will enable this after non divergent execution path are implemented.
|
||||
// include "VentusInstrInfoVPseudos.td"
|
||||
|
||||
// TODO: add other instructions
|
||||
def PseudoVOR_VI_IMM11 : PseudoVOPVIIMM11<or>;
|
||||
def PseudoVXOR_VI_IMM11 : PseudoVOPVIIMM11<xor>;
|
||||
def PseudoVRSUB_VI_IMM11 : PseudoVOPIVIMM11<sub>;
|
||||
def PseudoVAND_VI_IMM11 : PseudoVOPVIIMM11<and>;
|
||||
def PseudoVMSNE_VI_IMM11 : PseudoVOPVIIMM11<setne>;
|
||||
def PseudoVMSEQ_VI_IMM11 : PseudoVOPVIIMM11<seteq>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Ventus vALU divergent extended execution patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1495,6 +1498,6 @@ def : Pat<(XLenVT (DivergentBinFrag<add> (XLenVT VGPR:$rs1), uimm12:$imm)),
|
|||
|
||||
// There already has patterns defined in VentusInstrInfo.td
|
||||
let Predicates = [HasStdExtZfinx] in {
|
||||
// def : Pat<(f32 (bitconvert (i32 GPR:$src))), (VMV_V_X GPR:$src)>;
|
||||
def : Pat<(i32 (bitconvert GPRF32:$src)), (VFMV_V_F GPRF32:$src)>;
|
||||
def : Pat<(f32 (bitconvert (i32 GPR:$src))), (VMV_V_X GPR:$src)>;
|
||||
// def : Pat<(i32 (bitconvert GPRF32:$src)), (VFMV_V_F GPRF32:$src)>;
|
||||
} // Predicates = [HasStdExtZfinx]
|
||||
|
|
|
@ -75,6 +75,9 @@ bool VentusRegextInsertion::insertRegext(MachineBasicBlock &MBB,
|
|||
MachineInstr &MI) {
|
||||
bool hasOverflow = false;
|
||||
|
||||
if (MI.isPseudo())
|
||||
return false;
|
||||
|
||||
// 3 bits encoding for each rd, rs1, rs2, rs3, total 12 bits.
|
||||
// Each 3 bit can encode 0~7 which stands for base register offset 0~7 * 32.
|
||||
unsigned Offsets = 0;
|
||||
|
|
|
@ -11,7 +11,7 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali
|
|||
; VENTUS-NEXT: .cfi_def_cfa_offset 4
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v32, tp
|
||||
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 4
|
||||
; VENTUS-NEXT: .cfi_offset v33.l, 0
|
||||
; VENTUS-NEXT: lw t0, 0(a0)
|
||||
|
@ -35,7 +35,7 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali
|
|||
; VENTUS-NEXT: vlw12.v v2, 0(v1)
|
||||
; VENTUS-NEXT: vadd.vv v0, v2, v0
|
||||
; VENTUS-NEXT: vsw12.v v0, 0(v1)
|
||||
; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi sp, sp, -12
|
||||
; VENTUS-NEXT: addi tp, tp, -4
|
||||
; VENTUS-NEXT: ret
|
||||
|
|
|
@ -13,7 +13,7 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
|||
; VENTUS-NEXT: .cfi_def_cfa_offset 24
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v32, tp
|
||||
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 4
|
||||
; VENTUS-NEXT: .cfi_offset v33.l, 0
|
||||
; VENTUS-NEXT: lw t0, 0(a0)
|
||||
|
@ -58,8 +58,9 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
|||
; VENTUS-NEXT: vadd.vv v0, v33, v0
|
||||
; VENTUS-NEXT: vsw12.v v1, 0(v0)
|
||||
; VENTUS-NEXT: .LBB0_3: # %if.end
|
||||
; VENTUS-NEXT: join
|
||||
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: # Label of block must be emitted
|
||||
; VENTUS-NEXT: join zero, zero, 0
|
||||
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi sp, sp, -8
|
||||
; VENTUS-NEXT: addi tp, tp, -24
|
||||
; VENTUS-NEXT: ret
|
||||
|
@ -241,10 +242,9 @@ define dso_local ventus_kernel void @local_memmory1(ptr addrspace(3) nocapture n
|
|||
; VENTUS-LABEL: local_memmory1:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: lw t0, 0(a0)
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v0)
|
||||
; VENTUS-NEXT: vadd.vi v1, v1, 1
|
||||
; VENTUS-NEXT: vsw12.v v1, 0(v0)
|
||||
; VENTUS-NEXT: lw t1, 0(t0)
|
||||
; VENTUS-NEXT: addi t1, t1, 1
|
||||
; VENTUS-NEXT: sw t1, 0(t0)
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%0 = load i32, ptr addrspace(3) %b, align 4
|
||||
|
|
|
@ -6,12 +6,12 @@ define dso_local ventus_kernel void @bitcast(float noundef %a, ptr addrspace(5)
|
|||
; VENTUS-LABEL: bitcast:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: lw t0, 8(a0)
|
||||
; VENTUS-NEXT: flw t1, 0(a0)
|
||||
; VENTUS-NEXT: lw t1, 0(a0)
|
||||
; VENTUS-NEXT: lw t2, 4(a0)
|
||||
; VENTUS-NEXT: vfmv.s.f v0, t1
|
||||
; VENTUS-NEXT: vmv.v.x v0, t1
|
||||
; VENTUS-NEXT: vmv.v.x v1, t2
|
||||
; VENTUS-NEXT: vsw.v v0, 0(v1)
|
||||
; VENTUS-NEXT: fsw t1, 0(t0)
|
||||
; VENTUS-NEXT: sw t1, 0(t0)
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%conv = bitcast float %a to i32
|
||||
|
|
|
@ -14,7 +14,8 @@ define i32 @foo(i32 noundef %cond, i32 noundef %a, i32 noundef %b, i32 noundef %
|
|||
; VENTUS-NEXT: # %bb.1:
|
||||
; VENTUS-NEXT: vrsub.vi v3, v3, 0
|
||||
; VENTUS-NEXT: .LBB0_2: # %entry
|
||||
; VENTUS-NEXT: join
|
||||
; VENTUS-NEXT: # Label of block must be emitted
|
||||
; VENTUS-NEXT: join zero, zero, 0
|
||||
; VENTUS-NEXT: vmadd.vv v2, v1, v3
|
||||
; VENTUS-NEXT: vadd.vx v0, v2, zero
|
||||
; VENTUS-NEXT: ret
|
||||
|
|
|
@ -11,7 +11,7 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
|
|||
; VENTUS-NEXT: .cfi_def_cfa_offset 8
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v32, tp
|
||||
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 8
|
||||
; VENTUS-NEXT: .cfi_offset v33.l, 4
|
||||
; VENTUS-NEXT: .cfi_offset v34.l, 0
|
||||
|
@ -30,7 +30,7 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
|
|||
; VENTUS-NEXT: vlw12.v v2, 0(v0)
|
||||
; VENTUS-NEXT: vadd.vv v1, v2, v1
|
||||
; VENTUS-NEXT: vsw12.v v1, 0(v0)
|
||||
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi sp, sp, -4
|
||||
; VENTUS-NEXT: addi tp, tp, -8
|
||||
; VENTUS-NEXT: ret
|
||||
|
|
|
@ -7,7 +7,7 @@ define ventus_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 12
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 12
|
||||
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 0
|
||||
; VENTUS-NEXT: lw t0, 0(a0)
|
||||
; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill
|
||||
|
@ -24,7 +24,7 @@ define ventus_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A
|
|||
; VENTUS-NEXT: vlw12.v v2, 0(v0)
|
||||
; VENTUS-NEXT: vadd.vv v1, v2, v1
|
||||
; VENTUS-NEXT: vsw12.v v1, 0(v0)
|
||||
; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi sp, sp, -12
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
|
@ -47,7 +47,7 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
|
|||
; VENTUS-NEXT: .cfi_def_cfa_offset 8
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v32, tp
|
||||
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 8
|
||||
; VENTUS-NEXT: .cfi_offset v33.l, 4
|
||||
; VENTUS-NEXT: .cfi_offset v34.l, 0
|
||||
|
@ -66,7 +66,7 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
|
|||
; VENTUS-NEXT: vlw12.v v2, 0(v0)
|
||||
; VENTUS-NEXT: vadd.vv v1, v2, v1
|
||||
; VENTUS-NEXT: vsw12.v v1, 0(v0)
|
||||
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi sp, sp, -4
|
||||
; VENTUS-NEXT: addi tp, tp, -8
|
||||
; VENTUS-NEXT: ret
|
||||
|
|
|
@ -12,7 +12,7 @@ define dso_local ventus_kernel void @kernel_calling_convention(ptr addrspace(1)
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 16
|
||||
; VENTUS-NEXT: sw ra, -16(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 0
|
||||
; VENTUS-NEXT: lw t0, 4(a0)
|
||||
; VENTUS-NEXT: sw t0, -12(sp) # 4-byte Folded Spill
|
||||
|
@ -30,13 +30,12 @@ define dso_local ventus_kernel void @kernel_calling_convention(ptr addrspace(1)
|
|||
; VENTUS-NEXT: vadd.vx v0, v0, t1
|
||||
; VENTUS-NEXT: vmv.v.x v1, s0
|
||||
; VENTUS-NEXT: vsw12.v v0, 0(v1)
|
||||
; VENTUS-NEXT: lw t0, -12(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v0)
|
||||
; VENTUS-NEXT: lw t0, 0(t2)
|
||||
; VENTUS-NEXT: vadd.vx v1, v1, t0
|
||||
; VENTUS-NEXT: vsw12.v v1, 0(v0)
|
||||
; VENTUS-NEXT: lw ra, -16(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw s0, -12(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw t0, 0(s0)
|
||||
; VENTUS-NEXT: lw t2, 0(t2)
|
||||
; VENTUS-NEXT: add t0, t2, t0
|
||||
; VENTUS-NEXT: sw t0, 0(s0)
|
||||
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi sp, sp, -16
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
|
@ -82,16 +81,16 @@ entry:
|
|||
define dso_local i32 @non_kernel_calling_convention(ptr nocapture noundef readonly %a1, ptr nocapture noundef readonly %a2, ptr nocapture noundef readonly %a3, ptr nocapture noundef readonly %a4, ptr nocapture noundef readonly %a5, ptr nocapture noundef readonly %a6, ptr nocapture noundef readonly %a7, ptr nocapture noundef readonly %a8, ptr nocapture noundef readonly %a9, ptr nocapture noundef readonly %a10, ptr nocapture noundef readonly %a11, ptr nocapture noundef readonly %a12, ptr nocapture noundef readonly %a13, ptr nocapture noundef readonly %a14, ptr nocapture noundef readonly %a15, ptr nocapture noundef readonly %a16, ptr nocapture noundef readonly %a17, ptr nocapture noundef readonly %a18, ptr nocapture noundef readonly %a19, ptr nocapture noundef readonly %a20, ptr nocapture noundef readonly %a21, ptr nocapture noundef readonly %a22, ptr nocapture noundef readonly %a23, ptr nocapture noundef readonly %a24, ptr nocapture noundef readonly %a25, ptr nocapture noundef readonly %a26, ptr nocapture noundef readonly %a27, ptr nocapture noundef readonly %a28, ptr nocapture noundef readonly %a29, ptr nocapture noundef readonly %a30, ptr nocapture noundef readonly %a31, ptr nocapture noundef readonly %a32, ptr addrspace(3) nocapture noundef readonly %a33, ptr addrspace(5) nocapture noundef readonly %a34) local_unnamed_addr #2 {
|
||||
; VENTUS-LABEL: non_kernel_calling_convention:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi tp, tp, 28
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 28
|
||||
; VENTUS-NEXT: addi tp, tp, 16
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 16
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v32, tp
|
||||
; VENTUS-NEXT: .cfi_offset v33.l, 4
|
||||
; VENTUS-NEXT: .cfi_offset v34.l, 0
|
||||
; VENTUS-NEXT: regext zero, zero, 9
|
||||
; VENTUS-NEXT: vlw.v v33, -24(v32)
|
||||
; VENTUS-NEXT: vlw.v v33, -12(v32)
|
||||
; VENTUS-NEXT: regext zero, zero, 9
|
||||
; VENTUS-NEXT: vlw.v v34, -28(v32)
|
||||
; VENTUS-NEXT: vlw.v v34, -16(v32)
|
||||
; VENTUS-NEXT: vlw12.v v0, 0(v0)
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v1)
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v2)
|
||||
|
@ -161,7 +160,7 @@ define dso_local i32 @non_kernel_calling_convention(ptr nocapture noundef readon
|
|||
; VENTUS-NEXT: vadd.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v3
|
||||
; VENTUS-NEXT: addi tp, tp, -28
|
||||
; VENTUS-NEXT: addi tp, tp, -16
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%0 = load i32, ptr %a1, align 4
|
||||
|
@ -270,7 +269,7 @@ define dso_local i32 @test_add(ptr nocapture noundef readonly %a, ptr nocapture
|
|||
; VENTUS-NEXT: .cfi_def_cfa_offset 8
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v32, tp
|
||||
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 0
|
||||
; VENTUS-NEXT: vlw12.v v0, 0(v0)
|
||||
; VENTUS-NEXT: vadd.vi v0, v0, 1
|
||||
|
@ -288,7 +287,7 @@ define dso_local i32 @test_add(ptr nocapture noundef readonly %a, ptr nocapture
|
|||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vlw.v v1, -8(v32)
|
||||
; VENTUS-NEXT: vadd.vv v0, v1, v0
|
||||
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi sp, sp, -4
|
||||
; VENTUS-NEXT: addi tp, tp, -8
|
||||
; VENTUS-NEXT: ret
|
||||
|
|
|
@ -466,25 +466,6 @@ define float @fsgnjnx_v(float %a) nounwind {
|
|||
ret float %1
|
||||
}
|
||||
|
||||
define i32 @fcvt_rtz_x_f_v(float %a) nounwind {
|
||||
; VENTUS-LABEL: fcvt_rtz_x_f_v:
|
||||
; VENTUS: # %bb.0:
|
||||
; VENTUS-NEXT: vfcvt.rtz.x.f.v v0, v0
|
||||
; VENTUS-NEXT: ret
|
||||
%1 = call float @llvm.trunc.f32(float %a)
|
||||
%conv = fptosi float %1 to i32
|
||||
ret i32 %conv
|
||||
}
|
||||
|
||||
define i32 @fcvt_rtz_xu_f_v(float %x) {
|
||||
; VENTUS-LABEL: fcvt_rtz_xu_f_v:
|
||||
; VENTUS: # %bb.0:
|
||||
; VENTUS-NEXT: vfcvt.rtz.xu.f.v v0, v0
|
||||
; VENTUS-NEXT: ret
|
||||
%a = call float @llvm.trunc.f32(float %x)
|
||||
%b = fptoui float %a to i32
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
@global_val = dso_local global float 0x3FF547AE20000000, align 4
|
||||
declare float @llvm.sqrt.f32(float %Val)
|
||||
|
|
|
@ -25,7 +25,7 @@ define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr add
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 8
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 8
|
||||
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 0
|
||||
; VENTUS-NEXT: lw t0, 8(a0)
|
||||
; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
|
||||
|
@ -37,7 +37,7 @@ define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr add
|
|||
; VENTUS-NEXT: lw t0, -4(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: vmv.v.x v1, t0
|
||||
; VENTUS-NEXT: vsw12.v v0, 0(v1)
|
||||
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi sp, sp, -8
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
|
|
|
@ -0,0 +1,168 @@
|
|||
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
||||
; RUN: | FileCheck %s
|
||||
|
||||
%struct.MyStruct = type { i32, i8, i64 }
|
||||
|
||||
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) vscale_range(1,2048)
|
||||
; Here we foucus on kernel struct argument
|
||||
define dso_local ventus_kernel void @test_kernel1(i8 noundef %c, %struct.MyStruct %st.coerce, i8 noundef %uc, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
entry:
|
||||
; CHECK: lb t0, 0(a0)
|
||||
; CHECK: lbu t1, 24(a0)
|
||||
; CHECK: lw t2, 28(a0)
|
||||
; CHECK: lw s0, 8(a0)
|
||||
%st.coerce.fca.0.extract = extractvalue %struct.MyStruct %st.coerce, 0
|
||||
%conv = sitofp i8 %c to float
|
||||
store float %conv, ptr addrspace(1) %result, align 4
|
||||
%conv1 = sitofp i32 %st.coerce.fca.0.extract to float
|
||||
%arrayidx2 = getelementptr inbounds float, ptr addrspace(1) %result, i32 1
|
||||
store float %conv1, ptr addrspace(1) %arrayidx2, align 4
|
||||
%conv3 = uitofp i8 %uc to float
|
||||
%arrayidx4 = getelementptr inbounds float, ptr addrspace(1) %result, i32 2
|
||||
store float %conv3, ptr addrspace(1) %arrayidx4, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) vscale_range(1,2048)
|
||||
; Here we foucus on scalar argument
|
||||
define dso_local ventus_kernel void @test_kernel2(i8 noundef %c, i8 noundef %uc, i16 noundef %s, i16 noundef %us, i32 noundef %i, i32 noundef %ui, float noundef %f, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
entry:
|
||||
; CHECK: flw t0, 24(a0)
|
||||
; CHECK: lw t1, 20(a0)
|
||||
; CHECK: lw t2, 16(a0)
|
||||
; CHECK: lhu s0, 12(a0)
|
||||
; CHECK: lh s1, 8(a0)
|
||||
; CHECK: lb a1, 0(a0)
|
||||
; CHECK: lbu a2, 4(a0)
|
||||
; CHECK: lw a0, 28(a0)
|
||||
%conv = sitofp i8 %c to float
|
||||
store float %conv, ptr addrspace(1) %result, align 4
|
||||
%conv1 = uitofp i8 %uc to float
|
||||
%arrayidx2 = getelementptr inbounds float, ptr addrspace(1) %result, i32 1
|
||||
store float %conv1, ptr addrspace(1) %arrayidx2, align 4
|
||||
%conv3 = sitofp i16 %s to float
|
||||
%arrayidx4 = getelementptr inbounds float, ptr addrspace(1) %result, i32 2
|
||||
store float %conv3, ptr addrspace(1) %arrayidx4, align 4
|
||||
%conv5 = uitofp i16 %us to float
|
||||
%arrayidx6 = getelementptr inbounds float, ptr addrspace(1) %result, i32 3
|
||||
store float %conv5, ptr addrspace(1) %arrayidx6, align 4
|
||||
%conv7 = sitofp i32 %i to float
|
||||
%arrayidx8 = getelementptr inbounds float, ptr addrspace(1) %result, i32 4
|
||||
store float %conv7, ptr addrspace(1) %arrayidx8, align 4
|
||||
%conv9 = uitofp i32 %ui to float
|
||||
%arrayidx10 = getelementptr inbounds float, ptr addrspace(1) %result, i32 5
|
||||
store float %conv9, ptr addrspace(1) %arrayidx10, align 4
|
||||
%arrayidx11 = getelementptr inbounds float, ptr addrspace(1) %result, i32 6
|
||||
store float %f, ptr addrspace(1) %arrayidx11, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn memory(argmem: write) vscale_range(1,2048)
|
||||
; Here we foucus on vector argument
|
||||
define dso_local ventus_kernel void @test_kernel3(<2 x i8> noundef %c, <2 x i8> noundef %uc, <2 x i16> noundef %s, <2 x i16> noundef %us, <2 x i32> noundef %i, <2 x i32> noundef %ui, <2 x float> noundef %f, ptr addrspace(1) nocapture noundef writeonly align 8 %result) {
|
||||
entry:
|
||||
;CHECK: flw t0, 36(a0)
|
||||
;CHECK: flw t0, 32(a0)
|
||||
;CHECK: lw t0, 28(a0)
|
||||
;CHECK: lw t0, 24(a0)
|
||||
;CHECK: lw t0, 20(a0)
|
||||
;CHECK: lw t0, 16(a0)
|
||||
;CHECK: lhu t0, 14(a0)
|
||||
;CHECK: lhu t0, 12(a0)
|
||||
;CHECK: lhu t0, 10(a0)
|
||||
;CHECK: lhu t0, 8(a0)
|
||||
;CHECK: lbu t0, 5(a0)
|
||||
;CHECK: lbu t0, 4(a0)
|
||||
;CHECK: lbu t0, 1(a0)
|
||||
;CHECK: lbu t1, 0(a0)
|
||||
;CHECK: lw t2, 40(a0)
|
||||
%call = call <2 x float> @_Z14convert_float2Dv2_c(<2 x i8> noundef %c)
|
||||
store <2 x float> %call, ptr addrspace(1) %result, align 8
|
||||
%call1 = call <2 x float> @_Z14convert_float2Dv2_h(<2 x i8> noundef %uc)
|
||||
%arrayidx2 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 1
|
||||
store <2 x float> %call1, ptr addrspace(1) %arrayidx2, align 8
|
||||
%call3 = call <2 x float> @_Z14convert_float2Dv2_s(<2 x i16> noundef %s)
|
||||
%arrayidx4 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 2
|
||||
store <2 x float> %call3, ptr addrspace(1) %arrayidx4, align 8
|
||||
%call5 = call <2 x float> @_Z14convert_float2Dv2_t(<2 x i16> noundef %us)
|
||||
%arrayidx6 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 3
|
||||
store <2 x float> %call5, ptr addrspace(1) %arrayidx6, align 8
|
||||
%call7 = call <2 x float> @_Z14convert_float2Dv2_i(<2 x i32> noundef %i)
|
||||
%arrayidx8 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 4
|
||||
store <2 x float> %call7, ptr addrspace(1) %arrayidx8, align 8
|
||||
%call9 = call <2 x float> @_Z14convert_float2Dv2_j(<2 x i32> noundef %ui)
|
||||
%arrayidx10 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 5
|
||||
store <2 x float> %call9, ptr addrspace(1) %arrayidx10, align 8
|
||||
%call11 = call <2 x float> @_Z14convert_float2Dv2_f(<2 x float> noundef %f)
|
||||
%arrayidx12 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 6
|
||||
store <2 x float> %call11, ptr addrspace(1) %arrayidx12, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
declare dso_local <2 x float> @_Z14convert_float2Dv2_c(<2 x i8> noundef)
|
||||
declare dso_local <2 x float> @_Z14convert_float2Dv2_h(<2 x i8> noundef)
|
||||
declare dso_local <2 x float> @_Z14convert_float2Dv2_s(<2 x i16> noundef)
|
||||
declare dso_local <2 x float> @_Z14convert_float2Dv2_t(<2 x i16> noundef)
|
||||
declare dso_local <2 x float> @_Z14convert_float2Dv2_i(<2 x i32> noundef)
|
||||
declare dso_local <2 x float> @_Z14convert_float2Dv2_j(<2 x i32> noundef)
|
||||
declare dso_local <2 x float> @_Z14convert_float2Dv2_f(<2 x float> noundef)
|
||||
|
||||
; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn memory(argmem: write) vscale_range(1,2048)
|
||||
; Here we foucus on vector argument
|
||||
define dso_local ventus_kernel void @test_kernel4(<4 x i8> noundef %c, <4 x i8> noundef %uc, <4 x i16> noundef %s, <4 x i16> noundef %us, <4 x i32> noundef %i, <4 x i32> noundef %ui, <4 x float> noundef %f, ptr addrspace(1) nocapture noundef writeonly align 16 %result) {
|
||||
entry:
|
||||
;CHECK: flw t0, 76(a0)
|
||||
;CHECK: flw t0, 72(a0)
|
||||
;CHECK: flw t0, 68(a0)
|
||||
;CHECK: flw t0, 64(a0)
|
||||
;CHECK: lw t0, 60(a0)
|
||||
;CHECK: lw t0, 56(a0)
|
||||
;CHECK: lw t0, 52(a0)
|
||||
;CHECK: lw t0, 48(a0)
|
||||
;CHECK: lw t0, 44(a0)
|
||||
;CHECK: lw t0, 40(a0)
|
||||
;CHECK: lw t0, 36(a0)
|
||||
;CHECK: lw t0, 32(a0)
|
||||
;CHECK: lhu t0, 22(a0)
|
||||
;CHECK: lhu t0, 20(a0)
|
||||
;CHECK: lhu t0, 18(a0)
|
||||
;CHECK: lhu t0, 16(a0)
|
||||
;CHECK: lhu t0, 14(a0)
|
||||
;CHECK: lhu t0, 12(a0)
|
||||
;CHECK: lhu t0, 10(a0)
|
||||
;CHECK: lhu t0, 8(a0)
|
||||
;CHECK: lw t0, 4(a0)
|
||||
;CHECK: lbu t1, 4(a0)
|
||||
;CHECK: lw t0, 80(a0)
|
||||
;CHECK: lw t0, 0(a0)
|
||||
;CHECK: lbu t1, 0(a0)
|
||||
%call = call <4 x float> @_Z14convert_float4Dv4_c(<4 x i8> noundef %c)
|
||||
store <4 x float> %call, ptr addrspace(1) %result, align 16
|
||||
%call1 = call <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> noundef %uc)
|
||||
%arrayidx2 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 1
|
||||
store <4 x float> %call1, ptr addrspace(1) %arrayidx2, align 16
|
||||
%call3 = call <4 x float> @_Z14convert_float4Dv4_s(<4 x i16> noundef %s)
|
||||
%arrayidx4 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 2
|
||||
store <4 x float> %call3, ptr addrspace(1) %arrayidx4, align 16
|
||||
%call5 = call <4 x float> @_Z14convert_float4Dv4_t(<4 x i16> noundef %us)
|
||||
%arrayidx6 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 3
|
||||
store <4 x float> %call5, ptr addrspace(1) %arrayidx6, align 16
|
||||
%call7 = call <4 x float> @_Z14convert_float4Dv4_i(<4 x i32> noundef %i)
|
||||
%arrayidx8 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 4
|
||||
store <4 x float> %call7, ptr addrspace(1) %arrayidx8, align 16
|
||||
%call9 = call <4 x float> @_Z14convert_float4Dv4_j(<4 x i32> noundef %ui)
|
||||
%arrayidx10 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 5
|
||||
store <4 x float> %call9, ptr addrspace(1) %arrayidx10, align 16
|
||||
%call11 = call <4 x float> @_Z14convert_float4Dv4_f(<4 x float> noundef %f)
|
||||
%arrayidx12 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 6
|
||||
store <4 x float> %call11, ptr addrspace(1) %arrayidx12, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
declare dso_local <4 x float> @_Z14convert_float4Dv4_c(<4 x i8> noundef)
|
||||
declare dso_local <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> noundef)
|
||||
declare dso_local <4 x float> @_Z14convert_float4Dv4_s(<4 x i16> noundef)
|
||||
declare dso_local <4 x float> @_Z14convert_float4Dv4_t(<4 x i16> noundef)
|
||||
declare dso_local <4 x float> @_Z14convert_float4Dv4_i(<4 x i32> noundef)
|
||||
declare dso_local <4 x float> @_Z14convert_float4Dv4_j(<4 x i32> noundef)
|
||||
declare dso_local <4 x float> @_Z14convert_float4Dv4_f(<4 x float> noundef)
|
|
@ -0,0 +1,200 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
||||
; RUN: | FileCheck %s
|
||||
|
||||
define dso_local i32 @regexti1(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: regexti zero, zero, 768
|
||||
; CHECK-NEXT: vor.vi v0, v0, 15
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = or i32 %a, 399
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define dso_local i32 @regexti2(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: regexti zero, zero, -832
|
||||
; CHECK-NEXT: vor.vi v0, v0, -15
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = or i32 %a, -399
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define dso_local i32 @regexti1_1(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti1_1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: regexti zero, zero, 0
|
||||
; CHECK-NEXT: vor.vi v0, v0, -15
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = or i32 %a, 17
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define dso_local i32 @regexti2_1(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti2_1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: regexti zero, zero, -64
|
||||
; CHECK-NEXT: vor.vi v0, v0, 15
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = or i32 %a, -17
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define dso_local i32 @regexti1_2(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti1_2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vor.vi v0, v0, 13
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = or i32 %a, 13
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define dso_local i32 @regexti2_2(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti2_2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vor.vi v0, v0, -13
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = or i32 %a, -13
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define dso_local i32 @regexti3(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti3:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: regexti zero, zero, 448
|
||||
; CHECK-NEXT: vnot.v v0, v0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = xor i32 %a, 255
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define dso_local i32 @regexti4(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti4:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: regexti zero, zero, -832
|
||||
; CHECK-NEXT: vxor.vi v0, v0, -15
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = xor i32 %a, -399
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define dso_local i32 @regexti5(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti5:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: regexti zero, zero, 768
|
||||
; CHECK-NEXT: vrsub.vi v0, v0, 15
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = sub i32 399, %a
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define dso_local i32 @regexti6(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti6:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: regexti zero, zero, -832
|
||||
; CHECK-NEXT: vrsub.vi v0, v0, -15
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = sub i32 -399, %a
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define dso_local i32 @regexti7(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti7:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: regexti zero, zero, 768
|
||||
; CHECK-NEXT: vand.vi v0, v0, 15
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = and i32 %a, 399
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define dso_local i32 @regexti8(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti8:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: regexti zero, zero, -832
|
||||
; CHECK-NEXT: vand.vi v0, v0, -15
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = and i32 %a, -399
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define dso_local i1 @regexti9(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti9:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: regexti zero, zero, 768
|
||||
; CHECK-NEXT: vmseq.vi v0, v0, 15
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = icmp eq i32 %a, 399
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define dso_local i1 @regexti10(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti10:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: regexti zero, zero, -832
|
||||
; CHECK-NEXT: vmseq.vi v0, v0, -15
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = icmp eq i32 %a, -399
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define dso_local i1 @regexti11(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti11:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: regexti zero, zero, 768
|
||||
; CHECK-NEXT: vmsne.vi v0, v0, 15
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = icmp ne i32 %a, 399
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define dso_local i1 @regexti12(i32 noundef %a) {
|
||||
; CHECK-LABEL: regexti12:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: regexti zero, zero, -832
|
||||
; CHECK-NEXT: vmsne.vi v0, v0, -15
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%res = icmp ne i32 %a, -399
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define dso_local ventus_kernel void @regexti13(ptr addrspace(1) nocapture
|
||||
noundef align 4 %A, ptr addrspace(3) nocapture noundef align 4 %B) {
|
||||
; CHECK-LABEL: regexti13:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK: regexti zero, zero, 769
|
||||
; CHECK-NEXT: vand.vi v33, v0, 15
|
||||
|
||||
entry:
|
||||
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
|
||||
%calland = and i32 %call, 399
|
||||
%call1 = tail call i32 @_Z12get_local_idj(i32 noundef 0)
|
||||
%arrayidx = getelementptr inbounds i32, ptr addrspace(3) %B, i32 %call1
|
||||
%0 = load i32, ptr addrspace(3) %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %A, i32 %calland
|
||||
%1 = load i32, ptr addrspace(1) %arrayidx2, align 4
|
||||
%add = add nsw i32 %1, %0
|
||||
store i32 %add, ptr addrspace(1) %arrayidx2, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare dso_local i32 @_Z13get_global_idj(i32 noundef)
|
||||
declare dso_local i32 @_Z12get_local_idj(i32 noundef)
|
|
@ -11,17 +11,14 @@ define dso_local ventus_kernel void @usage(ptr addrspace(1) nocapture noundef al
|
|||
; VENTUS-LABEL: usage:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 4
|
||||
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: lw t0, 4(a0)
|
||||
; VENTUS-NEXT: lw t1, 0(a0)
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: vlw12.v v0, 0(v0)
|
||||
; VENTUS-NEXT: lw t0, 0(t1)
|
||||
; VENTUS-NEXT: vadd.vx v0, v0, t0
|
||||
; VENTUS-NEXT: vmv.v.x v1, t1
|
||||
; VENTUS-NEXT: vsw12.v v0, 0(v1)
|
||||
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: barrier x0, x0, 1
|
||||
; VENTUS-NEXT: lw t0, 0(t0)
|
||||
; VENTUS-NEXT: lw t2, 0(t1)
|
||||
; VENTUS-NEXT: add t0, t2, t0
|
||||
; VENTUS-NEXT: sw t0, 0(t1)
|
||||
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi sp, sp, -4
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
|
|
|
@ -13,23 +13,23 @@ target triple = "riscv32"
|
|||
define dso_local i32 @printf(ptr addrspace(2) noundef %fmt, ...) {
|
||||
; VENTUS-LABEL: printf:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi tp, tp, 64
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 64
|
||||
; VENTUS-NEXT: addi tp, tp, 40
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 40
|
||||
; VENTUS-NEXT: vmv.v.x v8, tp
|
||||
; VENTUS-NEXT: vsw.v v7, -60(v8)
|
||||
; VENTUS-NEXT: vsw.v v6, -56(v8)
|
||||
; VENTUS-NEXT: vsw.v v5, -52(v8)
|
||||
; VENTUS-NEXT: vsw.v v4, -48(v8)
|
||||
; VENTUS-NEXT: vsw.v v3, -44(v8)
|
||||
; VENTUS-NEXT: vsw.v v2, -40(v8)
|
||||
; VENTUS-NEXT: vsw.v v1, -36(v8)
|
||||
; VENTUS-NEXT: addi t0, tp, -36
|
||||
; VENTUS-NEXT: vsw.v v7, -36(v8)
|
||||
; VENTUS-NEXT: vsw.v v6, -32(v8)
|
||||
; VENTUS-NEXT: vsw.v v5, -28(v8)
|
||||
; VENTUS-NEXT: vsw.v v4, -24(v8)
|
||||
; VENTUS-NEXT: vsw.v v3, -20(v8)
|
||||
; VENTUS-NEXT: vsw.v v2, -16(v8)
|
||||
; VENTUS-NEXT: vsw.v v1, -12(v8)
|
||||
; VENTUS-NEXT: addi t0, tp, -12
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: vsw.v v0, -36(v8)
|
||||
; VENTUS-NEXT: addi t0, tp, -32
|
||||
; VENTUS-NEXT: vsw.v v0, -12(v8)
|
||||
; VENTUS-NEXT: addi t0, tp, -8
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: vsw.v v0, -36(v8)
|
||||
; VENTUS-NEXT: addi tp, tp, -64
|
||||
; VENTUS-NEXT: vsw.v v0, -12(v8)
|
||||
; VENTUS-NEXT: addi tp, tp, -40
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%retval = alloca i32, align 4, addrspace(5)
|
||||
|
|
|
@ -8,7 +8,7 @@ define dso_local i32 @branch(i32 noundef %dim) local_unnamed_addr {
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 4
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 4
|
||||
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 0
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
; VENTUS-NEXT: call _Z13get_global_idj
|
||||
|
@ -37,7 +37,7 @@ define dso_local i32 @branch(i32 noundef %dim) local_unnamed_addr {
|
|||
; VENTUS-NEXT: # Label of block must be emitted
|
||||
; VENTUS-NEXT: join zero, zero, 0
|
||||
; VENTUS-NEXT: vadd.vx v0, v1, zero
|
||||
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi sp, sp, -4
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
|
@ -63,7 +63,7 @@ define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noun
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 8
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 8
|
||||
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 0
|
||||
; VENTUS-NEXT: sw a0, -4(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
|
@ -88,7 +88,7 @@ define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noun
|
|||
; VENTUS-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; VENTUS-NEXT: vlw12.v v4, 0(v3)
|
||||
; VENTUS-NEXT: vadd.vv v2, v2, v4
|
||||
; VENTUS-NEXT: vadd.vi v0, v0, -1
|
||||
; VENTUS-NEXT: vsub12.vi v0, v0, 1
|
||||
; VENTUS-NEXT: vsw12.v v2, 0(v1)
|
||||
; VENTUS-NEXT: .Lpcrel_hi3:
|
||||
; VENTUS-NEXT: auipc t1, %pcrel_hi(.LBB1_3)
|
||||
|
@ -97,7 +97,7 @@ define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noun
|
|||
; VENTUS-NEXT: .LBB1_3: # %for.cond.cleanup
|
||||
; VENTUS-NEXT: # Label of block must be emitted
|
||||
; VENTUS-NEXT: join zero, zero, 0
|
||||
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi sp, sp, -8
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
|
@ -134,7 +134,7 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
|
|||
; VENTUS-NEXT: .cfi_def_cfa_offset 4
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v32, tp
|
||||
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 4
|
||||
; VENTUS-NEXT: .cfi_offset v33.l, 0
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
|
@ -169,6 +169,7 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
|
|||
; VENTUS-NEXT: vblt v0, v33, .LBB2_5
|
||||
; VENTUS-NEXT: # %bb.3: # %if.then2
|
||||
; VENTUS-NEXT: li t0, 23
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: j .LBB2_6
|
||||
; VENTUS-NEXT: .LBB2_4: # %if.end7
|
||||
; VENTUS-NEXT: li t0, 4
|
||||
|
@ -177,14 +178,14 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
|
|||
; VENTUS-NEXT: j .LBB2_7
|
||||
; VENTUS-NEXT: .LBB2_5:
|
||||
; VENTUS-NEXT: li t0, 12
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: .LBB2_6: # %cleanup9
|
||||
; VENTUS-NEXT: # Label of block must be emitted
|
||||
; VENTUS-NEXT: join zero, zero, 0
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: .LBB2_7: # %cleanup9
|
||||
; VENTUS-NEXT: # Label of block must be emitted
|
||||
; VENTUS-NEXT: join zero, zero, 0
|
||||
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi sp, sp, -4
|
||||
; VENTUS-NEXT: addi tp, tp, -4
|
||||
; VENTUS-NEXT: ret
|
||||
|
@ -218,7 +219,7 @@ define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noun
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 8
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 8
|
||||
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 0
|
||||
; VENTUS-NEXT: sw a0, -4(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
|
@ -249,7 +250,7 @@ define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noun
|
|||
; VENTUS-NEXT: # => This Inner Loop Header: Depth=2
|
||||
; VENTUS-NEXT: vlw12.v v5, 0(v3)
|
||||
; VENTUS-NEXT: vadd.vv v2, v2, v5
|
||||
; VENTUS-NEXT: vadd.vi v4, v4, -1
|
||||
; VENTUS-NEXT: vsub12.vi v4, v4, 1
|
||||
; VENTUS-NEXT: vsw12.v v2, 0(v1)
|
||||
; VENTUS-NEXT: .Lpcrel_hi8:
|
||||
; VENTUS-NEXT: auipc t1, %pcrel_hi(.LBB3_4)
|
||||
|
@ -268,7 +269,7 @@ define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noun
|
|||
; VENTUS-NEXT: .LBB3_5: # %for.cond.cleanup
|
||||
; VENTUS-NEXT: # Label of block must be emitted
|
||||
; VENTUS-NEXT: join zero, zero, 0
|
||||
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi sp, sp, -8
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
|
@ -312,7 +313,7 @@ define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noun
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 8
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 8
|
||||
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 0
|
||||
; VENTUS-NEXT: sw a0, -4(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
|
@ -374,7 +375,7 @@ define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noun
|
|||
; VENTUS-NEXT: .LBB4_9: # %for.cond.cleanup
|
||||
; VENTUS-NEXT: # Label of block must be emitted
|
||||
; VENTUS-NEXT: join zero, zero, 0
|
||||
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi sp, sp, -8
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
|
@ -426,7 +427,7 @@ define dso_local i32 @_Z13get_global_idj(i32 noundef %dim) local_unnamed_addr {
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 4
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 4
|
||||
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 0
|
||||
; VENTUS-NEXT: li t0, 2
|
||||
; VENTUS-NEXT: vmv.v.x v1, t0
|
||||
|
@ -461,7 +462,7 @@ define dso_local i32 @_Z13get_global_idj(i32 noundef %dim) local_unnamed_addr {
|
|||
; VENTUS-NEXT: .LBB5_7: # %return
|
||||
; VENTUS-NEXT: # Label of block must be emitted
|
||||
; VENTUS-NEXT: join zero, zero, 0
|
||||
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi sp, sp, -4
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
// RUN: clang -target riscv32 -mcpu=ventus-gpgpu < %s \
|
||||
// RUN: | FileCheck -check-prefix=VENTUS %s
|
||||
|
||||
__kernel void test_kernel(
|
||||
char2 c, uchar2 uc, short2 s, ushort2 us, int2 i, uint2 ui, float2 f,
|
||||
__global float2 *result)
|
||||
{
|
||||
// there is no FileCheck now
|
||||
result[0] = convert_float2(c);
|
||||
result[1] = convert_float2(uc);
|
||||
result[2] = convert_float2(s);
|
||||
result[3] = convert_float2(us);
|
||||
result[4] = convert_float2(i);
|
||||
result[5] = convert_float2(ui);
|
||||
result[6] = f;
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue