//===--- arm_cde.td - ACLE intrinsic functions for CDE --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the set of ACLE-specified source-level intrinsic // functions wrapping the CDE instructions. // //===----------------------------------------------------------------------===// include "arm_mve_defs.td" // f64 is not defined in arm_mve_defs.td because MVE instructions only work with // f16 and f32 def f64: PrimitiveType<"f", 64>; // Float expects t to be a scalar type, and expands to the floating-point // type of the same width. class Float: ComplexType<(CTO_CopyKind t, f32)>; def FScalar: Float; // ACLE CDE intrinsic class CDEIntrinsic : Intrinsic { let builtinExtension = "cde"; } // Immediate (in range [0, 2^numBits - 1]) class IB_ConstBits : IB_ConstRange<0, !add(!shl(1, numBits), -1)>; // numBits-wide immediate of type u32 class CDEImmediateBits : Immediate>; // LLVM IR CDE intrinsic class CDEIRInt params = [], bit appendKind = 0> : IRIntBase<"arm_cde_" # name, params, appendKind>; // Class for generating function macros in arm_cde.h: // "#define () " class FunctionMacro params_, string definition_> { list params = params_; string definition = definition_; } // Coprocessor immediate def imm_coproc : Immediate>; // Immediate integer parameters def imm_3b : CDEImmediateBits<3>; def imm_4b : CDEImmediateBits<4>; def imm_6b : CDEImmediateBits<6>; def imm_7b : CDEImmediateBits<7>; def imm_9b : CDEImmediateBits<9>; def imm_11b : CDEImmediateBits<11>; def imm_12b : CDEImmediateBits<12>; def imm_13b : CDEImmediateBits<13>; // CX* instructions operating on GPRs multiclass CDE_CX_m { defvar cp = (args imm_coproc:$cp); let pnt = PNT_None, params = T.None in { def "" : CDEIntrinsic $cp), cgArgs, (? $imm))>; def a : CDEIntrinsic $cp, $acc), cgArgs, (? $imm))>; def d : CDEIntrinsic $cp), cgArgs, (? $imm)):$pair, (or (shl (u64 (xval $pair, 1)), (u64 32)), (u64 (xval $pair, 0))))>; def da : CDEIntrinsic $cp, $acc_lo, $acc_hi), cgArgs, (? $imm)):$pair, (or (shl (u64 (xval $pair, 1)), (u64 32)), (u64 (xval $pair, 0))))>; } } defm cx1 : CDE_CX_m<(args imm_13b:$imm), (args), (?)>; defm cx2 : CDE_CX_m<(args imm_9b:$imm), (args u32:$n), (? $n)>; defm cx3 : CDE_CX_m<(args imm_6b:$imm), (args u32:$n, u32:$m), (? $n, $m)>; // VCX* instructions operating on VFP registers multiclass CDE_VCXFP_m { defvar cp = (args imm_coproc:$cp); let pnt = PNT_None, params = [u32] in { def "" : CDEIntrinsic $cp), cgArgs, (? $imm)), Scalar)>; def a : CDEIntrinsic $cp, (bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>; } let pnt = PNT_None, params = [u64] in { def d : CDEIntrinsic $cp), cgArgs, (? $imm)), Scalar)>; def da : CDEIntrinsic $cp, (bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>; } } defm vcx1: CDE_VCXFP_m<(args imm_11b:$imm), (args), (args), (?)>; defm vcx2: CDE_VCXFP_m<(args imm_6b:$imm), (args u32:$n), (args u64:$n), (? (bitcast $n, FScalar))>; defm vcx3: CDE_VCXFP_m<(args imm_3b:$imm), (args u32:$n, u32:$m), (args u64:$n, u64:$m), (? (bitcast $n, FScalar), (bitcast $m, FScalar))>; // VCX* instructions operating on Q vector registers def v16u8 : VecOf; let pnt = PNT_None, params = [u8] in def vcx1q : CDEIntrinsic $cp, $imm)>; let pnt = PNT_Type, params = T.All, polymorphicOnly = 1 in { def vcx1qa : CDEIntrinsic $cp, (bitcast $acc, v16u8), $imm), Vector)>; def vcx2q : CDEIntrinsic $cp, (bitcast $n, VecOf), $imm), Vector)>; def vcx2q_u8 : CDEIntrinsic $cp, (bitcast $n, VecOf), $imm)>; def vcx2qa_impl : CDEIntrinsic $cp, (bitcast $acc, v16u8), $n, $imm), Vector)>; def vcx3q_impl : CDEIntrinsic $cp, (bitcast $n, v16u8), $m, $imm), Vector)>; def vcx3q_u8_impl : CDEIntrinsic $cp, (bitcast $n, v16u8), $m, $imm)>; def vcx3qa_impl : CDEIntrinsic $cp, (bitcast $acc, v16u8), $n, $m, $imm), Vector)>; } // Reinterpret intrinsics required to implement __arm_vcx*q with 2 or 3 // polymorphic paramters. let params = [/* no u8 */ s8, u16, s16, u32, s32, u64, s64, f16, f32], headerOnly = 1, polymorphicOnly = 1 in def vreinterpretq_u8 : Intrinsic; // We need vreinterpretq_u8_u8 to avoid doing smart tricks in the macros let params = [u8], polymorphicOnly = 1 in def vreinterpretq_u8_cde : CDEIntrinsic, NameOverride<"vreinterpretq_u8">; def vcx2qa : FunctionMacro< ["cp", "acc", "n", "imm"], "__arm_vcx2qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), (imm))">; def vcx3q : FunctionMacro< ["cp", "n", "m", "imm"], "__arm_vcx3q_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm))">; def vcx3q_u8 : FunctionMacro< ["cp", "n", "m", "imm"], "__arm_vcx3q_u8_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm))">; def vcx3qa : FunctionMacro< ["cp", "acc", "n", "m", "imm"], "__arm_vcx3qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), " "__arm_vreinterpretq_u8(m), (imm))">; class CDEIntrinsicMasked : CDEIntrinsic $cp, $inactive_or_acc), cgArgs, (? $imm, $pred))> { let params = T.All; let polymorphicOnly = 1; } def vcx1q_m : CDEIntrinsicMasked<"vcx1q", (args), (args imm_12b:$imm), (?)>; def vcx1qa_m : CDEIntrinsicMasked<"vcx1qa", (args), (args imm_12b:$imm), (?)>; multiclass VCXPredicated macroArgs, string macro> { def _m_impl : CDEIntrinsicMasked; def a_m_impl : CDEIntrinsicMasked; def _m: FunctionMacro< !listconcat(["cp", "inactive"], macroArgs, ["imm", "pred"]), "__arm_"#NAME#"_m_impl((cp), (inactive), "#macro#" (imm), (pred))">; def a_m: FunctionMacro< !listconcat(["cp", "acc"], macroArgs, ["imm", "pred"]), "__arm_"#NAME#"a_m_impl((cp), (acc), "#macro#" (imm), (pred))">; } defm vcx2q : VCXPredicated<(args v16u8:$n), (args imm_7b:$imm), (? $n), ["n"], "__arm_vreinterpretq_u8(n),">; defm vcx3q : VCXPredicated<(args v16u8:$n, v16u8:$m), (args imm_4b:$imm), (? $n, $m), ["n", "m"], "__arm_vreinterpretq_u8(n), " "__arm_vreinterpretq_u8(m),">; // vreinterpretq intrinsics required by the ACLE CDE specification foreach desttype = [/* no u8 */ s8, u16, s16, u32, s32, u64, s64, f16, f32] in { let params = [u8], headerOnly = 1, pnt = PNT_None in def "vreinterpretq_" # desttype : Intrinsic< VecOf, (args Vector:$x), (vreinterpret $x, VecOf)>; }