diff --git a/.arcconfig b/.arcconfig index e200298f603e..134f30720816 100644 --- a/.arcconfig +++ b/.arcconfig @@ -1,8 +1,4 @@ { - "phabricator.uri" : "https://reviews.llvm.org/", - "repository.callsign" : "G", - "conduit_uri" : "https://reviews.llvm.org/", - "base": "git:HEAD^", - "arc.land.onto.default": "main", - "arc.land.onto": ["main"] -} + "phabricator.uri" : "http://www.tpt.com", + "notification" : "The code review's uri is Terapines' private network uri" +} \ No newline at end of file diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 0c62e8cba673..ff73705b04ed 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -40,6 +40,7 @@ add_llvm_target(RISCVCodeGen RISCVTargetObjectFile.cpp RISCVTargetTransformInfo.cpp VentusRegextInsertion.cpp + VentusVVInstrConversion.cpp GISel/RISCVCallLowering.cpp GISel/RISCVInstructionSelector.cpp GISel/RISCVLegalizerInfo.cpp diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 57264c711f6d..8a7680f3dd7f 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -95,6 +95,10 @@ enum { // compiler has free to select either one. UsesMaskPolicyShift = IsRVVWideningReductionShift + 1, UsesMaskPolicyMask = 1 << UsesMaskPolicyShift, + + // Check if this instruction meets the format of RVInstVV + IsVVALUInstrShift = UsesMaskPolicyShift + 1, + IsVVALUInstrMask = 1 << IsVVALUInstrShift, }; // Match with the definitions in RISCVInstrFormats.td @@ -132,6 +136,12 @@ static inline VConstraintType getConstraint(uint64_t TSFlags) { static inline bool hasDummyMaskOp(uint64_t TSFlags) { return TSFlags & HasDummyMaskOpMask; } + +/// \returns true if the instruction meets the format of RVInstVV +static inline bool isVVALUInstr(uint64_t TSFlags) { + return TSFlags & IsVVALUInstrMask; +} + /// \returns true if tail agnostic is enforced for the instruction. static inline bool doesForceTailAgnostic(uint64_t TSFlags) { return TSFlags & ForceTailAgnosticMask; diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 077539cf8951..c0a3f18b9ffb 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -69,6 +69,9 @@ void initializeRISCVRedundantCopyEliminationPass(PassRegistry &); FunctionPass *createVentusRegextInsertionPass(); void initializeVentusRegextInsertionPass(PassRegistry &); +FunctionPass *createVentusVVInstrConversionPass(); +void initializeVentusVVInstrConversionPass(PassRegistry &); + InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &, RISCVSubtarget &, RISCVRegisterBankInfo &); diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index 4b2f5f57f68a..9a788c005e8c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -205,6 +205,9 @@ class RVInstgetOptLevel() != CodeGenOpt::None) addPass(createRISCVMergeBaseOffsetOptPass()); + addPass(createVentusVVInstrConversionPass()); } void RISCVPassConfig::addPostRegAlloc() { diff --git a/llvm/lib/Target/RISCV/VentusInstrFormatsV.td b/llvm/lib/Target/RISCV/VentusInstrFormatsV.td index 979e2061ee8c..8fa70dcd04ed 100644 --- a/llvm/lib/Target/RISCV/VentusInstrFormatsV.td +++ b/llvm/lib/Target/RISCV/VentusInstrFormatsV.td @@ -77,6 +77,7 @@ class RVInstVV funct6, RISCVVFormat opv, dag outs, dag ins, let Inst{14-12} = opv.Value; let Inst{11-7} = vd; let Opcode = OPC_OP_V.Value; + let IsVVALUInstr = 1; } // vALU branch diff --git a/llvm/lib/Target/RISCV/VentusVVInstrConversion.cpp b/llvm/lib/Target/RISCV/VentusVVInstrConversion.cpp new file mode 100644 index 000000000000..2694012147ac --- /dev/null +++ b/llvm/lib/Target/RISCV/VentusVVInstrConversion.cpp @@ -0,0 +1,263 @@ +//===-- VentusVVInstrConversion.cpp - VV instruction conversion -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that convert vop.vv instructions to vop.vx/vf +// instructions because currently, the objects stored in sGPR and sGPRF32 will +// be moved to VGPR in divergent nodes, so the patterns which match VX/VF +// instructions will not be matched +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/RISCVBaseInfo.h" +#include "MCTargetDesc/RISCVMCTargetDesc.h" +#include "RISCV.h" +#include "RISCVInstrInfo.h" +#include "RISCVRegisterInfo.h" +#include "RISCVTargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/TableGen/Record.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +#define VENTUS_VV_INSTRUCTION_CONVRSION "Ventus VV instruction conversion pass" +#define DEBUG_TYPE "Ventus VV instruction conversion" + +using namespace llvm; + +namespace { + +/// This map is a reflection of VV instruction to VX/VF instruction +/// currently, we use enum to represent all the reflections +DenseMap VV2VXOpcodeMap = { + {RISCV::VADD_VV , RISCV::VADD_VX}, + {RISCV::VSUB_VV , RISCV::VSUB_VX}, + {RISCV::VMINU_VV , RISCV::VMINU_VX}, + {RISCV::VMIN_VV , RISCV::VMINU_VX}, + {RISCV::VMAX_VV , RISCV::VMAX_VX}, + {RISCV::VMAXU_VV , RISCV::VMAXU_VX}, + {RISCV::VAND_VV , RISCV::VAND_VX}, + {RISCV::VOR_VV , RISCV::VOR_VX}, + {RISCV::VXOR_VV , RISCV::VXOR_VX}, + {RISCV::VMSEQ_VV , RISCV::VMSEQ_VX}, + {RISCV::VMSNE_VV , RISCV::VMSNE_VX}, + {RISCV::VMSLTU_VV , RISCV::VMSLTU_VX}, + {RISCV::VMSLT_VV , RISCV::VMSLT_VX}, + {RISCV::VMSLEU_VV , RISCV::VMSLEU_VX}, + {RISCV::VMSLE_VV , RISCV::VMSLE_VX}, + {RISCV::VSLL_VV , RISCV::VSLL_VX}, + {RISCV::VSRL_VV , RISCV::VSRL_VX}, + {RISCV::VSRA_VV , RISCV::VSRA_VX}, + {RISCV::VSSRL_VV , RISCV::VSSRL_VX}, + {RISCV::VSSRA_VV , RISCV::VSSRA_VX}, + {RISCV::VDIVU_VV , RISCV::VDIVU_VX}, + {RISCV::VDIV_VV , RISCV::VDIV_VX}, + {RISCV::VREMU_VV , RISCV::VREMU_VX}, + {RISCV::VFSUB_VV , RISCV::VFSUB_VF}, + {RISCV::VREM_VV , RISCV::VREM_VX}, + {RISCV::VMULHU_VV , RISCV::VMULHU_VX}, + {RISCV::VMUL_VV , RISCV::VMUL_VX}, + {RISCV::VMULHSU_VV , RISCV::VMULHSU_VX}, + {RISCV::VMULH_VV , RISCV::VMULH_VX}, + {RISCV::VMADD_VV , RISCV::VMADD_VX}, + {RISCV::VNMSUB_VV , RISCV::VNMSUB_VX}, + {RISCV::VMACC_VV , RISCV::VMACC_VX}, + {RISCV::VNMSAC_VV , RISCV::VNMSAC_VX}, + {RISCV::VFADD_VV , RISCV::VFADD_VF}, + {RISCV::VFMSUB_VV , RISCV::VFMSUB_VF}, + {RISCV::VFMIN_VV , RISCV::VFMIN_VF}, + {RISCV::VFMAX_VV , RISCV::VFMAX_VF}, + {RISCV::VFSGNJ_VV , RISCV::VFSGNJ_VF}, + {RISCV::VFSGNJN_VV , RISCV::VFSGNJN_VF}, + {RISCV::VFSGNJX_VV , RISCV::VFSGNJX_VF}, + {RISCV::VMFEQ_VV , RISCV::VMFEQ_VF}, + {RISCV::VMFLE_VV , RISCV::VMFLE_VF}, + {RISCV::VMFLT_VV , RISCV::VMFLT_VF}, + {RISCV::VMFNE_VV , RISCV::VMFNE_VF}, + {RISCV::VFDIV_VV , RISCV::VFDIV_VF}, + {RISCV::VFMUL_VV , RISCV::VFMUL_VF}, + {RISCV::VFMADD_VV , RISCV::VFMADD_VF}, + {RISCV::VFNMADD_VV , RISCV::VFNMADD_VF}, + {RISCV::VFMACC_VV , RISCV::VFMACC_VF}, + {RISCV::VFNMACC_VV , RISCV::VFNMACC_VF}, + {RISCV::VFNMSUB_VV , RISCV::VFNMSUB_VF}, + {RISCV::VFMSAC_VV , RISCV::VFMSAC_VF}, + {RISCV::VFNMSAC_VV , RISCV::VFNMSAC_VF}}; + +class VentusVVInstrConversion : public MachineFunctionPass { +public: + const RISCVInstrInfo *TII; + static char ID; + const RISCVRegisterInfo *MRI; + const MachineRegisterInfo *MR; + + VentusVVInstrConversion() : MachineFunctionPass(ID) { + initializeVentusVVInstrConversionPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return VENTUS_VV_INSTRUCTION_CONVRSION; + } + +private: + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + + /// Check if the instruction is VV ALU instruction or not + bool isVVALUInstruction(MachineInstr &MI) { + return RISCVII::isVVALUInstr(MI.getDesc().TSFlags); + }; + + bool isVALUCommutableInstr(MachineInstr &MI); + + bool convertInstr(MachineBasicBlock &MBB, MachineInstr &CopyMI, + MachineInstr &VVMI); + + bool swapRegOperands(MachineInstr &MI); + + bool isGPR2VGPRCopy(MachineInstr &MI); +}; + +char VentusVVInstrConversion::ID = 0; + +/// Swap register operands of instruction such as +/// vadd.vv v0, v2, v1 +/// into +/// vadd.vv v0, v1, v2 +bool VentusVVInstrConversion::swapRegOperands(MachineInstr &MI) { + MachineOperand &MO1 = MI.getOperand(1); + MachineOperand &MO2 = MI.getOperand(2); + assert((MO1.isReg() && MO2.isReg()) && "Operand is not register"); + Register Reg1 = MO1.getReg(); + Register Reg2 = MO2.getReg(); + MO1.setReg(Reg2); + MO2.setReg(Reg1); + return true; +} + +bool VentusVVInstrConversion::runOnMachineFunction(MachineFunction &MF) { + bool isChanged = false; + TII = static_cast(MF.getSubtarget().getInstrInfo()); + MRI = MF.getSubtarget().getRegisterInfo(); + MR = &MF.getRegInfo(); + for (auto &MBB : MF) + isChanged |= runOnMachineBasicBlock(MBB); + return isChanged; +} + +bool VentusVVInstrConversion::runOnMachineBasicBlock(MachineBasicBlock &MBB) { + bool isMBBChanged = false; + for (auto &MI : MBB) { + MachineInstr *NextMI = MI.getNextNode(); + // Check RISCV::COPY instructions' format and its next instruction's format + if (isGPR2VGPRCopy(MI) && NextMI && isVVALUInstruction(*NextMI)) { + // When met here, we can ensure the coding logic goes to the conversion + isMBBChanged |= convertInstr(MBB, MI, *NextMI); + } + } + return isMBBChanged; +} + +/// This function tries to convert +/// vmv.s.x v2, a0 +/// vadd.vv v0, v0, v2 +/// into +/// vadd.vx v0, v0, a0 +/// ***************************************************** +/// vmv.s.x v2, a0 +/// vmadd.vv v0, v2, v1 +/// into +/// vmadd.vx v0, a0, v1 +/// VV to VF conversion follows the same routine +/// TODO: vrsub has VX and VI version, need to deal with this specifically? +bool VentusVVInstrConversion::convertInstr(MachineBasicBlock &MBB, + MachineInstr &CopyMI, + MachineInstr &VVMI) { + bool isMBBChanged = false; + if (isVALUCommutableInstr(VVMI) && + CopyMI.getOperand(0).getReg() != VVMI.getOperand(2).getReg()) + isMBBChanged |= swapRegOperands(VVMI); + // Other incommutable instructions check + if (CopyMI.getOperand(0).getReg() != VVMI.getOperand(2).getReg()) + return isMBBChanged; + + unsigned NewOpcode = VV2VXOpcodeMap[VVMI.getOpcode()]; + assert(NewOpcode && "No VV instruction reflection to VX/VF " + "instruction, please check the mapping"); + Register Dst = VVMI.getOperand(0).getReg(); + DebugLoc DL = VVMI.getDebugLoc(); + if (VVMI.getNumExplicitOperands() == 3) { + BuildMI(MBB, VVMI, DL, TII->get(NewOpcode), Dst) + .addReg(VVMI.getOperand(1).getReg()) + .addReg(CopyMI.getOperand(1).getReg()); + VVMI.eraseFromParent(); + } + // Three-operands VV ALU instruction conversion + else if (VVMI.getNumExplicitOperands() == 4 && + CopyMI.getOperand(0).getReg() != VVMI.getOperand(3).getReg()) { + BuildMI(MBB, VVMI, DL, TII->get(NewOpcode), VVMI.getOperand(0).getReg()) + .addReg(VVMI.getOperand(1).getReg()) + .addReg(CopyMI.getOperand(1).getReg()) + .addReg(VVMI.getOperand(3).getReg()); + VVMI.eraseFromParent(); + } + // FIXME: maybe we need to take other unsupported instructions into + // consideration, so we add an else statement here and return false directly + else + return isMBBChanged; + return true; +} + +/// FIXME: we also can add attribute in VentusInstrInfoV.td file, but changes +/// are very trivial which can happen in many separated places, for now we use +/// enum to accomplish our purpose +/// In ventus : V+X = X+V, V*X=X*V +bool VentusVVInstrConversion::isVALUCommutableInstr(MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case RISCV::VADD_VV: + case RISCV::VMUL_VV: + case RISCV::VFADD_VV: + case RISCV::VFMUL_VV: + case RISCV::VMADD_VV: + case RISCV::VFMADD_VV: + case RISCV::VMULH_VV: + case RISCV::VMULHSU_VV: + case RISCV::VMULHU_VV: + return true; + }; +} + +/// Instruction shall be like this: %1:vgpr = COPY %2:gpr +bool VentusVVInstrConversion::isGPR2VGPRCopy(MachineInstr &MI) { + return MI.getOpcode() == RISCV::COPY && + MRI->isSGPRReg(*MR, MI.getOperand(1).getReg()) && + !MRI->isSGPRReg(*MR, MI.getOperand(0).getReg()); +} +} // end of anonymous namespace + +INITIALIZE_PASS(VentusVVInstrConversion, "ventus-VV-instructions-conversion", + VENTUS_VV_INSTRUCTION_CONVRSION, false, false) + +namespace llvm { +FunctionPass *createVentusVVInstrConversionPass() { + return new VentusVVInstrConversion(); +} +} // end of namespace llvm \ No newline at end of file diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll index 6d57f47c23d5..2f1fd8c9715d 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll @@ -25,13 +25,11 @@ define dso_local spir_kernel void @func(ptr addrspace(1) nocapture noundef align ; VENTUS-NEXT: call _Z12get_local_idj ; VENTUS-NEXT: vmv.s.x v1, zero ; VENTUS-NEXT: vsll.vi v0, v0, 2 -; VENTUS-NEXT: vmv.s.x v2, s1 -; VENTUS-NEXT: vadd.vv v0, v2, v0 +; VENTUS-NEXT: vadd.vx v0, v0, s1 ; VENTUS-NEXT: vmv.x.s a0, v0 ; VENTUS-NEXT: vluxei32.v v0, (a0), v1 ; VENTUS-NEXT: vsll.vi v2, v32, 2 -; VENTUS-NEXT: vmv.s.x v3, s0 -; VENTUS-NEXT: vadd.vv v2, v3, v2 +; VENTUS-NEXT: vadd.vx v2, v2, s0 ; VENTUS-NEXT: vmv.x.s a0, v2 ; VENTUS-NEXT: vluxei32.v v2, (a0), v1 ; VENTUS-NEXT: vadd.vv v0, v2, v0 diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll index 27af236959ba..26861c044c60 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll @@ -40,16 +40,16 @@ define spir_kernel void @foo(ptr addrspace(1) noundef align 4 %out) { ; VENTUS-NEXT: vlw v1, zero(s2) ; VENTUS-NEXT: add s1, s1, a0 ; VENTUS-NEXT: lw a1, 0(s1) -; VENTUS-NEXT: add a0, s0, a0 +; VENTUS-NEXT: add a0, a0, s0 ; VENTUS-NEXT: lw a2, 0(a0) ; VENTUS-NEXT: vmv.s.x v2, a1 ; VENTUS-NEXT: vmv.s.x v3, a2 -; VENTUS-NEXT: vmadd.vv v2, v1, v3 -; VENTUS-NEXT: vsuxei32.v v2, (a0), v0 +; VENTUS-NEXT: vmadd.vv v1, v2, v3 +; VENTUS-NEXT: vsuxei32.v v1, (a0), v0 ; VENTUS-NEXT: j .LBB0_3 ; VENTUS-NEXT: .LBB0_2: # %if.else ; VENTUS-NEXT: slli a0, a0, 2 -; VENTUS-NEXT: add a0, s0, a0 +; VENTUS-NEXT: add a0, a0, s0 ; VENTUS-NEXT: sw zero, 0(a0) ; VENTUS-NEXT: .LBB0_3: # %if.end ; VENTUS-NEXT: lw ra, -36(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll index 20b2b32fb0d4..06cb6605aba4 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll @@ -20,12 +20,10 @@ define spir_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ; VENTUS-NEXT: call _Z13get_global_idj ; VENTUS-NEXT: vmv.s.x v1, zero ; VENTUS-NEXT: vsll.vi v0, v0, 2 -; VENTUS-NEXT: vmv.s.x v2, s1 -; VENTUS-NEXT: vadd.vv v2, v2, v0 +; VENTUS-NEXT: vadd.vx v2, v0, s1 ; VENTUS-NEXT: vmv.x.s a0, v2 ; VENTUS-NEXT: vluxei32.v v2, (a0), v1 -; VENTUS-NEXT: vmv.s.x v3, s0 -; VENTUS-NEXT: vadd.vv v0, v3, v0 +; VENTUS-NEXT: vadd.vx v0, v0, s0 ; VENTUS-NEXT: vmv.x.s a0, v0 ; VENTUS-NEXT: vluxei32.v v0, (a0), v1 ; VENTUS-NEXT: vadd.vv v0, v0, v2 diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/float.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/float.ll index 2223dee4918b..3c7e38c0e79d 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/float.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/float.ll @@ -17,8 +17,7 @@ define float @fadd_f(float noundef %a) { ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: lui a0, %hi(global_val) ; VENTUS-NEXT: lw a0, %lo(global_val)(a0) -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vfadd.vv v0, v0, v1 +; VENTUS-NEXT: vfadd.vf v0, v0, a0 ; VENTUS-NEXT: ret entry: %val = load float, ptr @global_val, align 4 @@ -41,8 +40,7 @@ define float @fsub_f(float noundef %a) { ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: lui a0, %hi(global_val) ; VENTUS-NEXT: lw a0, %lo(global_val)(a0) -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vfsub.vv v0, v0, v1 +; VENTUS-NEXT: vfsub.vf v0, v0, a0 ; VENTUS-NEXT: ret entry: %val = load float, ptr @global_val, align 4 @@ -65,8 +63,7 @@ define float @fmul_f(float noundef %a) { ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: lui a0, %hi(global_val) ; VENTUS-NEXT: lw a0, %lo(global_val)(a0) -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vfmul.vv v0, v0, v1 +; VENTUS-NEXT: vfmul.vf v0, v0, a0 ; VENTUS-NEXT: ret entry: %val = load float, ptr @global_val, align 4 @@ -89,8 +86,7 @@ define float @fdiv_f(float noundef %a, float noundef %b) { ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: lui a0, %hi(global_val) ; VENTUS-NEXT: lw a0, %lo(global_val)(a0) -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vfdiv.vv v0, v0, v1 +; VENTUS-NEXT: vfdiv.vf v0, v0, a0 ; VENTUS-NEXT: ret entry: %val = load float, ptr @global_val, align 4 @@ -103,8 +99,7 @@ define float @foo_constant(float noundef %a) { ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: lui a0, %hi(.LCPI8_0) ; VENTUS-NEXT: lw a0, %lo(.LCPI8_0)(a0) -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vfmul.vv v0, v0, v1 +; VENTUS-NEXT: vfmul.vf v0, v0, a0 ; VENTUS-NEXT: ret entry: %mul = fmul float %a, 1.25 @@ -391,8 +386,7 @@ define dso_local float @fmadd_f(float noundef %a, float noundef %b, float nounde ; VENTUS-NEXT: lui a0, %hi(.LCPI24_0) ; VENTUS-NEXT: lw a0, %lo(.LCPI24_0)(a0) ; VENTUS-NEXT: vadd.vx v0, v1, zero -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vfmadd.vv v0, v1, v2 +; VENTUS-NEXT: vfmadd.vf v0, a0, v2 ; VENTUS-NEXT: ret entry: %0 = tail call float @llvm.fmuladd.f32(float %b, float 0x3FF3333340000000, float %c) @@ -419,8 +413,7 @@ define dso_local float @fnmadd_f(float noundef %a, float noundef %b, float nound ; VENTUS-NEXT: lui a0, %hi(.LCPI26_0) ; VENTUS-NEXT: lw a0, %lo(.LCPI26_0)(a0) ; VENTUS-NEXT: vadd.vx v0, v1, zero -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vfmsub.vv v0, v1, v2 +; VENTUS-NEXT: vfmsub.vf v0, a0, v2 ; VENTUS-NEXT: ret entry: %fneg = fmul float %b, 0xBFF3333340000000 @@ -446,8 +439,7 @@ define dso_local float @fmsub_f(float noundef %a, float noundef %b) local_unname ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: lui a0, %hi(.LCPI28_0) ; VENTUS-NEXT: lw a0, %lo(.LCPI28_0)(a0) -; VENTUS-NEXT: vmv.s.x v2, a0 -; VENTUS-NEXT: vfmsub.vv v0, v2, v1 +; VENTUS-NEXT: vfmsub.vf v0, a0, v1 ; VENTUS-NEXT: ret entry: %mul = fmul float %a, 0x3FF3333340000000 @@ -474,8 +466,7 @@ define dso_local float @fnmsub_f(float noundef %a, float noundef %b, float nound ; VENTUS-NEXT: lui a0, %hi(.LCPI30_0) ; VENTUS-NEXT: lw a0, %lo(.LCPI30_0)(a0) ; VENTUS-NEXT: vadd.vx v0, v1, zero -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vfmadd.vv v0, v1, v2 +; VENTUS-NEXT: vfmadd.vf v0, a0, v2 ; VENTUS-NEXT: ret entry: %fneg = fmul float %b, 0x3FF3333340000000 diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/int_arithmetic.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/int_arithmetic.ll index 8116deb29d2a..7cfb3637d393 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/int_arithmetic.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/int_arithmetic.ll @@ -16,8 +16,7 @@ define i32 @vdivu_x(i32 %a) { ; VENTUS: # %bb.0: ; VENTUS-NEXT: lui a0, %hi(global_val) ; VENTUS-NEXT: lw a0, %lo(global_val)(a0) -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vdivu.vv v0, v0, v1 +; VENTUS-NEXT: vdivu.vx v0, v0, a0 ; VENTUS-NEXT: ret %val = load i32, ptr @global_val %udiv = udiv i32 %a, %val @@ -38,8 +37,7 @@ define i32 @vdiv_x(i32 %a) { ; VENTUS: # %bb.0: ; VENTUS-NEXT: lui a0, %hi(global_val) ; VENTUS-NEXT: lw a0, %lo(global_val)(a0) -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vdiv.vv v0, v0, v1 +; VENTUS-NEXT: vdiv.vx v0, v0, a0 ; VENTUS-NEXT: ret %val = load i32, ptr @global_val %sdiv = sdiv i32 %a, %val @@ -82,8 +80,7 @@ define i32 @srem_pow2(i32 %a) nounwind { ; VENTUS-NEXT: vsrl.vi v1, v1, 29 ; VENTUS-NEXT: vadd.vv v1, v0, v1 ; VENTUS-NEXT: li a0, -8 -; VENTUS-NEXT: vmv.s.x v2, a0 -; VENTUS-NEXT: vand.vv v1, v1, v2 +; VENTUS-NEXT: vand.vx v1, v1, a0 ; VENTUS-NEXT: vsub.vv v0, v0, v1 ; VENTUS-NEXT: ret %1 = srem i32 %a, 8 @@ -97,8 +94,7 @@ define i32 @srem_pow2_2(i32 %a) nounwind { ; VENTUS-NEXT: vsrl.vi v1, v1, 16 ; VENTUS-NEXT: vadd.vv v1, v0, v1 ; VENTUS-NEXT: lui a0, 1048560 -; VENTUS-NEXT: vmv.s.x v2, a0 -; VENTUS-NEXT: vand.vv v1, v1, v2 +; VENTUS-NEXT: vand.vx v1, v1, a0 ; VENTUS-NEXT: vsub.vv v0, v0, v1 ; VENTUS-NEXT: ret %1 = srem i32 %a, 65536 @@ -130,8 +126,7 @@ define i32 @vadd_x(i32 %a) nounwind { ; VENTUS: # %bb.0: ; VENTUS-NEXT: lui a0, %hi(global_val) ; VENTUS-NEXT: lw a0, %lo(global_val)(a0) -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vadd.vv v0, v0, v1 +; VENTUS-NEXT: vadd.vx v0, v0, a0 ; VENTUS-NEXT: ret %val = load i32, ptr @global_val %add = add i32 %a, %val @@ -161,8 +156,7 @@ define i32 @vsub_x(i32 %a) nounwind { ; VENTUS: # %bb.0: ; VENTUS-NEXT: lui a0, %hi(global_val) ; VENTUS-NEXT: lw a0, %lo(global_val)(a0) -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vsub.vv v0, v0, v1 +; VENTUS-NEXT: vsub.vx v0, v0, a0 ; VENTUS-NEXT: ret %val = load i32, ptr @global_val %sub = sub i32 %a, %val @@ -196,8 +190,7 @@ define i32 @vmul_x(i32 %a) nounwind { ; VENTUS: # %bb.0: ; VENTUS-NEXT: lui a0, %hi(global_val) ; VENTUS-NEXT: lw a0, %lo(global_val)(a0) -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vmul.vv v0, v0, v1 +; VENTUS-NEXT: vmul.vx v0, v0, a0 ; VENTUS-NEXT: ret %val = load i32, ptr @global_val %mul = mul i32 %a, %val @@ -222,8 +215,7 @@ define i32 @vmulh_x(i32 %a) nounwind { ; VENTUS: # %bb.0: ; VENTUS-NEXT: lui a0, %hi(global_val) ; VENTUS-NEXT: lw a0, %lo(global_val)(a0) -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vmulh.vv v0, v0, v1 +; VENTUS-NEXT: vmulh.vx v0, v0, a0 ; VENTUS-NEXT: ret %val = load i32, ptr @global_val %1 = sext i32 %a to i64 @@ -252,8 +244,7 @@ define i32 @vmulhu_x(i32 %a) nounwind { ; VENTUS: # %bb.0: ; VENTUS-NEXT: lui a0, %hi(global_val) ; VENTUS-NEXT: lw a0, %lo(global_val)(a0) -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vmulhu.vv v0, v0, v1 +; VENTUS-NEXT: vmulhu.vx v0, v0, a0 ; VENTUS-NEXT: ret %val = load i32, ptr @global_val %1 = zext i32 %a to i64 @@ -281,8 +272,7 @@ define i32 @vmulhsu_x(i32 %a) nounwind { ; VENTUS: # %bb.0: ; VENTUS-NEXT: lui a0, %hi(global_val) ; VENTUS-NEXT: lw a0, %lo(global_val)(a0) -; VENTUS-NEXT: vmv.s.x v1, a0 -; VENTUS-NEXT: vmulhsu.vv v0, v0, v1 +; VENTUS-NEXT: vmulhsu.vx v0, v0, a0 ; VENTUS-NEXT: ret %val = load i32, ptr @global_val %1 = sext i32 %a to i64 @@ -331,8 +321,7 @@ define dso_local i32 @nmsub_x(i32 noundef %a, i32 noundef %b) local_unnamed_addr ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: lui a0, %hi(global_val) ; VENTUS-NEXT: lw a0, %lo(global_val)(a0) -; VENTUS-NEXT: vmv.s.x v2, a0 -; VENTUS-NEXT: vmadd.vv v0, v2, v1 +; VENTUS-NEXT: vmadd.vx v0, a0, v1 ; VENTUS-NEXT: ret entry: %val = load i32, ptr @global_val @@ -360,8 +349,7 @@ define dso_local i32 @madd_x(i32 noundef %a, i32 noundef %b) local_unnamed_addr ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: lui a0, %hi(global_val) ; VENTUS-NEXT: lw a0, %lo(global_val)(a0) -; VENTUS-NEXT: vmv.s.x v2, a0 -; VENTUS-NEXT: vmadd.vv v0, v2, v1 +; VENTUS-NEXT: vmadd.vx v0, a0, v1 ; VENTUS-NEXT: ret entry: %val = load i32, ptr @global_val diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/kernel_args.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/kernel_args.ll index bc9d96a95170..bfda0d4b4391 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/kernel_args.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/kernel_args.ll @@ -15,7 +15,7 @@ define dso_local spir_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrs ; VENTUS-NEXT: lw a1, 0(a0) ; VENTUS-NEXT: lw a2, 4(a0) ; VENTUS-NEXT: lw a0, 8(a0) -; VENTUS-NEXT: add a1, a2, a1 +; VENTUS-NEXT: add a1, a1, a2 ; VENTUS-NEXT: sw a1, 0(a0) ; VENTUS-NEXT: ret %add = add nsw i32 %b, %a