//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 Scheduling ---*- tablegen -*-=// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the scheduling model for Cavium ThunderX2T99 // processors. // Based on Broadcom Vulcan. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // 2. Pipeline Description. def ThunderX2T99Model : SchedMachineModel { let IssueWidth = 4; // 4 micro-ops dispatched at a time. let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer. let LoadLatency = 4; // Optimistic load latency. let MispredictPenalty = 12; // Extra cycles for mispredicted branch. // Determined via a mix of micro-arch details and experimentation. let LoopMicroOpBufferSize = 32; let PostRAScheduler = 1; // Using PostRA sched. let CompleteModel = 1; } // Define the issue ports. // Port 0: ALU, FP/SIMD. def THX2T99P0 : ProcResource<1>; // Port 1: ALU, FP/SIMD, integer mul/div. def THX2T99P1 : ProcResource<1>; // Port 2: ALU, Branch. def THX2T99P2 : ProcResource<1>; // Port 3: Store data. def THX2T99P3 : ProcResource<1>; // Port 4: Load/store. def THX2T99P4 : ProcResource<1>; // Port 5: Load/store. def THX2T99P5 : ProcResource<1>; let SchedModel = ThunderX2T99Model in { // Define groups for the functional units on each issue port. Each group // created will be used by a WriteRes later on. // // NOTE: Some groups only contain one member. This is a way to create names for // the various functional units that share a single issue port. For example, // "THX2T99I1" for ALU ops on port 1 and "THX2T99F1" for FP ops on port 1. // Integer divide and multiply micro-ops only on port 1. def THX2T99I1 : ProcResGroup<[THX2T99P1]>; // Branch micro-ops only on port 2. def THX2T99I2 : ProcResGroup<[THX2T99P2]>; // ALU micro-ops on ports 0, 1, and 2. def THX2T99I012 : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2]>; // Crypto FP/SIMD micro-ops only on port 1. def THX2T99F1 : ProcResGroup<[THX2T99P1]>; // FP/SIMD micro-ops on ports 0 and 1. def THX2T99F01 : ProcResGroup<[THX2T99P0, THX2T99P1]>; // Store data micro-ops only on port 3. def THX2T99SD : ProcResGroup<[THX2T99P3]>; // Load/store micro-ops on ports 4 and 5. def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>; // 60 entry unified scheduler. def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2, THX2T99P3, THX2T99P4, THX2T99P5]> { let BufferSize=60; } // Define commonly used write types for InstRW specializations. // All definitions follow the format: THX2T99Write_Cyc_. // 3 cycles on I1. def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 3; } // 4 cycles on I1. def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 4; } // 1 cycle on I0, I1, or I2. def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { let Latency = 1; } // 5 cycles on F1. def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 5; } // 7 cycles on F1. def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 7; } // 4 cycles on F0 or F1. def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 4; } // 5 cycles on F0 or F1. def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 5; } // 6 cycles on F0 or F1. def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 6; } // 7 cycles on F0 or F1. def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 7; } // 8 cycles on F0 or F1. def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 8; } // 16 cycles on F0 or F1. def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 16; let ResourceCycles = [8]; } // 23 cycles on F0 or F1. def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 23; let ResourceCycles = [11]; } // 1 cycles on LS0 or LS1. def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 1; } // 4 cycles on LS0 or LS1. def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 4; } // 5 cycles on LS0 or LS1. def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 5; } // 6 cycles on LS0 or LS1. def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 6; } // 5 cycles on LS0 or LS1 and I0, I1, or I2. def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { let Latency = 5; let NumMicroOps = 2; } // 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2. def THX2T99Write_6Cyc_LS01_I012_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { let Latency = 6; let NumMicroOps = 3; } // 1 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 1; let NumMicroOps = 2; } // 5 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 5; let NumMicroOps = 2; } // 6 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 6; let NumMicroOps = 2; } // 7 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 7; let NumMicroOps = 2; } // 8 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 8; let NumMicroOps = 2; } // Define commonly used read types. // No forwarding is provided for these types. def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; } //===----------------------------------------------------------------------===// // 3. Instruction Tables. let SchedModel = ThunderX2T99Model in { //--- // 3.1 Branch Instructions //--- // Branch, immed // Branch and link, immed // Compare and branch def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } def : WriteRes { let Unsupported = 1; } // Branch, register // Branch and link, register != LR // Branch and link, register = LR def : WriteRes { let Latency = 1; } //--- // 3.2 Arithmetic and Logical Instructions // 3.3 Move and Shift Instructions //--- // ALU, basic // Conditional compare // Conditional select // Address generation def : WriteRes { let Latency = 1; } def : InstRW<[WriteI], (instrs COPY)>; // ALU, extend and/or shift def : WriteRes { let Latency = 2; let ResourceCycles = [2]; } def : WriteRes { let Latency = 2; let ResourceCycles = [2]; } // Move immed def : WriteRes { let Latency = 1; } // Variable shift def : WriteRes { let Latency = 1; } //--- // 3.4 Divide and Multiply Instructions //--- // Divide, W-form // Latency range of 13-23. Take the average. def : WriteRes { let Latency = 18; let ResourceCycles = [18]; } // Divide, X-form // Latency range of 13-39. Take the average. def : WriteRes { let Latency = 26; let ResourceCycles = [26]; } // Multiply accumulate, W-form def : WriteRes { let Latency = 5; } // Multiply accumulate, X-form def : WriteRes { let Latency = 5; } // Bitfield extract, two reg def : WriteRes { let Latency = 1; } // Bitfield move, basic // Bitfield move, insert // NOTE: Handled by WriteIS. // Count leading def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$", "^CLZ(W|X)r$")>; // Reverse bits/bytes // NOTE: Handled by WriteI. //--- // 3.6 Load Instructions // 3.10 FP Load Instructions //--- // Load register, literal // Load register, unscaled immed // Load register, immed unprivileged // Load register, unsigned immed def : WriteRes { let Latency = 4; } // Load register, immed post-index // NOTE: Handled by WriteLD, WriteI. // Load register, immed pre-index // NOTE: Handled by WriteLD, WriteAdr. def : WriteRes { let Latency = 1; } // Load register offset, basic // Load register, register offset, scale by 4/8 // Load register, register offset, scale by 2 // Load register offset, extend // Load register, register offset, extend, scale by 4/8 // Load register, register offset, extend, scale by 2 def THX2T99WriteLDIdx : SchedWriteVariant<[ SchedVar, SchedVar]>; def : SchedAlias; def THX2T99ReadAdrBase : SchedReadVariant<[ SchedVar, SchedVar]>; def : SchedAlias; // Load pair, immed offset, normal // Load pair, immed offset, signed words, base != SP // Load pair, immed offset signed words, base = SP // LDP only breaks into *one* LS micro-op. Thus // the resources are handling by WriteLD. def : WriteRes { let Latency = 5; } // Load pair, immed pre-index, normal // Load pair, immed pre-index, signed words // Load pair, immed post-index, normal // Load pair, immed post-index, signed words // NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. //-- // 3.7 Store Instructions // 3.11 FP Store Instructions //-- // Store register, unscaled immed // Store register, immed unprivileged // Store register, unsigned immed def : WriteRes { let Latency = 1; let NumMicroOps = 2; } // Store register, immed post-index // NOTE: Handled by WriteAdr, WriteST, ReadAdrBase // Store register, immed pre-index // NOTE: Handled by WriteAdr, WriteST // Store register, register offset, basic // Store register, register offset, scaled by 4/8 // Store register, register offset, scaled by 2 // Store register, register offset, extend // Store register, register offset, extend, scale by 4/8 // Store register, register offset, extend, scale by 1 def : WriteRes { let Latency = 1; let NumMicroOps = 3; } // Store pair, immed offset, W-form // Store pair, immed offset, X-form def : WriteRes { let Latency = 1; let NumMicroOps = 2; } // Store pair, immed post-index, W-form // Store pair, immed post-index, X-form // Store pair, immed pre-index, W-form // Store pair, immed pre-index, X-form // NOTE: Handled by WriteAdr, WriteSTP. //--- // 3.8 FP Data Processing Instructions //--- // FP absolute value // FP min/max // FP negate def : WriteRes { let Latency = 5; } // FP arithmetic def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>; // FP compare def : WriteRes { let Latency = 5; } // FP divide, S-form // FP square root, S-form def : WriteRes { let Latency = 16; let ResourceCycles = [8]; } // FP divide, D-form // FP square root, D-form def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>; // FP multiply // FP multiply accumulate def : WriteRes { let Latency = 6; } // FP round to integral def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; // FP select def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>; //--- // 3.9 FP Miscellaneous Instructions //--- // FP convert, from vec to vec reg // FP convert, from gen to vec reg // FP convert, from vec to gen reg def : WriteRes { let Latency = 7; } // FP move, immed // FP move, register def : WriteRes { let Latency = 4; } // FP transfer, from gen to vec reg // FP transfer, from vec to gen reg def : WriteRes { let Latency = 4; } def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; //--- // 3.12 ASIMD Integer Instructions //--- // ASIMD absolute diff, D-form // ASIMD absolute diff, Q-form // ASIMD absolute diff accum, D-form // ASIMD absolute diff accum, Q-form // ASIMD absolute diff accum long // ASIMD absolute diff long // ASIMD arith, basic // ASIMD arith, complex // ASIMD compare // ASIMD logical (AND, BIC, EOR) // ASIMD max/min, basic // ASIMD max/min, reduce, 4H/4S // ASIMD max/min, reduce, 8B/8H // ASIMD max/min, reduce, 16B // ASIMD multiply, D-form // ASIMD multiply, Q-form // ASIMD multiply accumulate long // ASIMD multiply accumulate saturating long // ASIMD multiply long // ASIMD pairwise add and accumulate // ASIMD shift accumulate // ASIMD shift by immed, basic // ASIMD shift by immed and insert, basic, D-form // ASIMD shift by immed and insert, basic, Q-form // ASIMD shift by immed, complex // ASIMD shift by register, basic, D-form // ASIMD shift by register, basic, Q-form // ASIMD shift by register, complex, D-form // ASIMD shift by register, complex, Q-form def : WriteRes { let Latency = 7; } // ASIMD arith, reduce, 4H/4S // ASIMD arith, reduce, 8B/8H // ASIMD arith, reduce, 16B def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; // ASIMD logical (MOV, MVN, ORN, ORR) def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>; // ASIMD polynomial (8x8) multiply long def : InstRW<[THX2T99Write_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>; //--- // 3.13 ASIMD Floating-point Instructions //--- // ASIMD FP absolute value def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>; // ASIMD FP arith, normal, D-form // ASIMD FP arith, normal, Q-form def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>; // ASIMD FP arith,pairwise, D-form // ASIMD FP arith, pairwise, Q-form def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>; // ASIMD FP compare, D-form // ASIMD FP compare, Q-form def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>; def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", "^FCMGTv", "^FCMLEv", "^FCMLTv")>; // ASIMD FP convert, long // ASIMD FP convert, narrow // ASIMD FP convert, other, D-form // ASIMD FP convert, other, Q-form // NOTE: Handled by WriteV. // ASIMD FP divide, D-form, F32 def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>; // ASIMD FP divide, Q-form, F32 def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>; // ASIMD FP divide, Q-form, F64 def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>; // ASIMD FP max/min, normal, D-form // ASIMD FP max/min, normal, Q-form def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv", "^FMINv", "^FMINNMv")>; // ASIMD FP max/min, pairwise, D-form // ASIMD FP max/min, pairwise, Q-form def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv", "^FMINPv", "^FMINNMPv")>; // ASIMD FP max/min, reduce def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv", "^FMINVv", "^FMINNMVv")>; // ASIMD FP multiply, D-form, FZ // ASIMD FP multiply, D-form, no FZ // ASIMD FP multiply, Q-form, FZ // ASIMD FP multiply, Q-form, no FZ def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>; // ASIMD FP multiply accumulate, Dform, FZ // ASIMD FP multiply accumulate, Dform, no FZ // ASIMD FP multiply accumulate, Qform, FZ // ASIMD FP multiply accumulate, Qform, no FZ def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>; // ASIMD FP negate def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>; // ASIMD FP round, D-form // ASIMD FP round, Q-form // NOTE: Handled by WriteV. //-- // 3.14 ASIMD Miscellaneous Instructions //-- // ASIMD bit reverse def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>; // ASIMD bitwise insert, D-form // ASIMD bitwise insert, Q-form def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>; // ASIMD count, D-form // ASIMD count, Q-form def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>; // ASIMD duplicate, gen reg // ASIMD duplicate, element def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>; // ASIMD extract def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>; // ASIMD extract narrow // ASIMD extract narrow, saturating // NOTE: Handled by WriteV. // ASIMD insert, element to element def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; // ASIMD move, integer immed def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>; // ASIMD move, FP immed def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>; // ASIMD reciprocal estimate, D-form // ASIMD reciprocal estimate, Q-form def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", "^FRSQRTEv", "^URSQRTEv")>; // ASIMD reciprocal step, D-form, FZ // ASIMD reciprocal step, D-form, no FZ // ASIMD reciprocal step, Q-form, FZ // ASIMD reciprocal step, Q-form, no FZ def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>; // ASIMD reverse def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^REV16v", "^REV32v", "^REV64v")>; // ASIMD table lookup, D-form // ASIMD table lookup, Q-form def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>; // ASIMD transfer, element to word or word def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^UMOVv")>; // ASIMD transfer, element to gen reg def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>; // ASIMD transfer gen reg to element def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; // ASIMD transpose def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>; // ASIMD unzip/zip def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>; //-- // 3.15 ASIMD Load Instructions //-- // ASIMD load, 1 element, multiple, 1 reg, D-form // ASIMD load, 1 element, multiple, 1 reg, Q-form def : InstRW<[THX2T99Write_4Cyc_LS01], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 2 reg, D-form // ASIMD load, 1 element, multiple, 2 reg, Q-form def : InstRW<[THX2T99Write_4Cyc_LS01], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 3 reg, D-form // ASIMD load, 1 element, multiple, 3 reg, Q-form def : InstRW<[THX2T99Write_5Cyc_LS01], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 4 reg, D-form // ASIMD load, 1 element, multiple, 4 reg, Q-form def : InstRW<[THX2T99Write_6Cyc_LS01], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, one lane, B/H/S // ASIMD load, 1 element, one lane, D def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>; def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD1i(8|16|32|64)_POST$")>; // ASIMD load, 1 element, all lanes, D-form, B/H/S // ASIMD load, 1 element, all lanes, D-form, D // ASIMD load, 1 element, all lanes, Q-form def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 2 element, multiple, D-form, B/H/S // ASIMD load, 2 element, multiple, Q-form, D def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD load, 2 element, one lane, B/H // ASIMD load, 2 element, one lane, S // ASIMD load, 2 element, one lane, D def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>; def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD2i(8|16|32|64)_POST$")>; // ASIMD load, 2 element, all lanes, D-form, B/H/S // ASIMD load, 2 element, all lanes, D-form, D // ASIMD load, 2 element, all lanes, Q-form def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 3 element, multiple, D-form, B/H/S // ASIMD load, 3 element, multiple, Q-form, B/H/S // ASIMD load, 3 element, multiple, Q-form, D def : InstRW<[THX2T99Write_8Cyc_LS01_F01], (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD load, 3 element, one lone, B/H // ASIMD load, 3 element, one lane, S // ASIMD load, 3 element, one lane, D def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>; def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], (instregex "^LD3i(8|16|32|64)_POST$")>; // ASIMD load, 3 element, all lanes, D-form, B/H/S // ASIMD load, 3 element, all lanes, D-form, D // ASIMD load, 3 element, all lanes, Q-form, B/H/S // ASIMD load, 3 element, all lanes, Q-form, D def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 4 element, multiple, D-form, B/H/S // ASIMD load, 4 element, multiple, Q-form, B/H/S // ASIMD load, 4 element, multiple, Q-form, D def : InstRW<[THX2T99Write_8Cyc_LS01_F01], (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD load, 4 element, one lane, B/H // ASIMD load, 4 element, one lane, S // ASIMD load, 4 element, one lane, D def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>; def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], (instregex "^LD4i(8|16|32|64)_POST$")>; // ASIMD load, 4 element, all lanes, D-form, B/H/S // ASIMD load, 4 element, all lanes, D-form, D // ASIMD load, 4 element, all lanes, Q-form, B/H/S // ASIMD load, 4 element, all lanes, Q-form, D def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; //-- // 3.16 ASIMD Store Instructions //-- // ASIMD store, 1 element, multiple, 1 reg, D-form // ASIMD store, 1 element, multiple, 1 reg, Q-form def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 2 reg, D-form // ASIMD store, 1 element, multiple, 2 reg, Q-form def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 3 reg, D-form // ASIMD store, 1 element, multiple, 3 reg, Q-form def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 4 reg, D-form // ASIMD store, 1 element, multiple, 4 reg, Q-form def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, one lane, B/H/S // ASIMD store, 1 element, one lane, D def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST1i(8|16|32|64)$")>; def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST1i(8|16|32|64)_POST$")>; // ASIMD store, 2 element, multiple, D-form, B/H/S // ASIMD store, 2 element, multiple, Q-form, B/H/S // ASIMD store, 2 element, multiple, Q-form, D def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD store, 2 element, one lane, B/H/S // ASIMD store, 2 element, one lane, D def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST2i(8|16|32|64)$")>; def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST2i(8|16|32|64)_POST$")>; // ASIMD store, 3 element, multiple, D-form, B/H/S // ASIMD store, 3 element, multiple, Q-form, B/H/S // ASIMD store, 3 element, multiple, Q-form, D def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD store, 3 element, one lane, B/H // ASIMD store, 3 element, one lane, S // ASIMD store, 3 element, one lane, D def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>; def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST3i(8|16|32|64)_POST$")>; // ASIMD store, 4 element, multiple, D-form, B/H/S // ASIMD store, 4 element, multiple, Q-form, B/H/S // ASIMD store, 4 element, multiple, Q-form, D def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD store, 4 element, one lane, B/H // ASIMD store, 4 element, one lane, S // ASIMD store, 4 element, one lane, D def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>; def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST4i(8|16|32|64)_POST$")>; //-- // 3.17 Cryptography Extensions //-- // Crypto AES ops def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES")>; // Crypto polynomial (64x64) multiply long def : InstRW<[THX2T99Write_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>; // Crypto SHA1 xor ops // Crypto SHA1 schedule acceleration ops // Crypto SHA256 schedule acceleration op (1 u-op) // Crypto SHA256 schedule acceleration op (2 u-ops) // Crypto SHA256 hash acceleration ops def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA")>; //-- // 3.18 CRC //-- // CRC checksum ops def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32")>; } // SchedModel = ThunderX2T99Model