sys/mips/nlm/hal/fmn.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
   3  *
   4  * Copyright 2003-2011 Netlogic Microsystems (Netlogic). All rights
   5  * reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are
   9  * met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY Netlogic Microsystems ``AS IS'' AND
  19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE
  22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  28  * THE POSSIBILITY OF SUCH DAMAGE.
  29  *
  30  * NETLOGIC_BSD */
  31
  32 #include <sys/cdefs.h>
  33 __FBSDID("$FreeBSD$");
  34 #include <sys/types.h>
  35 #include <sys/systm.h>
  36
  37 #include <machine/cpufunc.h>
  38 #include <mips/nlm/hal/mips-extns.h>
  39 #include <mips/nlm/hal/haldefs.h>
  40 #include <mips/nlm/hal/iomap.h>
  41 #include <mips/nlm/hal/fmn.h>
  42
  43 /* XLP can take upto 16K of FMN messages per hardware queue, as spill.
  44 * But, configuring all 16K causes the total spill memory to required
  45 * to blow upto 192MB for single chip configuration, and 768MB in four
  46 * chip configuration. Hence for now, we will setup the per queue spill
  47 * as 1K FMN messages. With this, the total spill memory needed for 1024
  48 * hardware queues (with 12bytes per single entry FMN message) becomes
  49 * (1*1024)*12*1024queues = 12MB. For the four chip config, the memory
  50 * needed = 12 * 4 = 48MB.
  51 */
  52 uint64_t nlm_cms_spill_total_messages = 1 * 1024;
  53
  54 /* On a XLP832, we have the following FMN stations:
  55 * CPU    stations: 8
  56 * PCIE0  stations: 1
  57 * PCIE1  stations: 1
  58 * PCIE2  stations: 1
  59 * PCIE3  stations: 1
  60 * GDX    stations: 1
  61 * CRYPTO stations: 1
  62 * RSA    stations: 1
  63 * CMP    stations: 1
  64 * POE    stations: 1
  65 * NAE    stations: 1
  66 * ==================
  67 * Total          : 18 stations per chip
  68 *
  69 * For all 4 nodes, there are 18*4 = 72 FMN stations
  70 */
  71 uint32_t nlm_cms_total_stations = 18 * 4 /*xlp_num_nodes*/;
  72
  73 /**
  74  * Takes inputs as node, queue_size and maximum number of queues.
  75  * Calculates the base, start & end and returns the same for a
  76  * defined qid.
  77  *
  78  * The output queues are maintained in the internal output buffer
  79  * which is a on-chip SRAM structure. For the actial hardware
  80  * internal implementation, It is a structure which consists
  81  * of eight banks of 4096-entry x message-width SRAMs. The SRAM
  82  * implementation is designed to run at 1GHz with a 1-cycle read/write
  83  * access. A read/write transaction can be initiated for each bank
  84  * every cycle for a total of eight accesses per cycle. Successive
  85  * entries of the same output queue are placed in successive banks.
  86  * This is done to spread different read & write accesses to same/different
  87  * output queue over as many different banks as possible so that they
  88  * can be scheduled concurrently. Spreading the accesses to as many banks
  89  * as possible to maximize the concurrency internally is important for
  90  * achieving the desired peak throughput. This is done by h/w implementation
  91  * itself.
  92  *
  93  * Output queues are allocated from this internal output buffer by
  94  * software. The total capacity of the output buffer is 32K-entry.
  95  * Each output queue can be sized from 32-entry to 1024-entry in
  96  * increments of 32-entry. This is done by specifying a Start & a
  97  * End pointer: pointers to the first & last 32-entry chunks allocated
  98  * to the output queue.
  99  *
 100  * To optimize the storage required for 1024 OQ pointers, the upper 5-bits
 101  * are shared by the Start & the End pointer. The side-effect of this
 102  * optimization is that an OQ can't cross a 1024-entry boundary. Also, the
 103  * lower 5-bits don't need to be specified in the Start & the End pointer
 104  * as the allocation is in increments of 32-entries.
 105  *
 106  * Queue occupancy is tracked by a Head & a Tail pointer. Tail pointer
 107  * indicates the location to which next entry will be written & Head
 108  * pointer indicates the location from which next entry will be read. When
 109  * these pointers reach the top of the allocated space (indicated by the
 110  * End pointer), they are reset to the bottom of the allocated space
 111  * (indicated by the Start pointer).
 112  *
 113  * Output queue pointer information:
 114  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 115  *
 116  *   14               10 9              5 4                 0
 117  *   ------------------
 118  *   | base ptr       |
 119  *   ------------------
 120  *                       ----------------
 121  *                       | start ptr    |
 122  *                       ----------------
 123  *                       ----------------
 124  *                       | end   ptr    |
 125  *                       ----------------
 126  *                       ------------------------------------
 127  *                       |           head ptr               |
 128  *                       ------------------------------------
 129  *                       ------------------------------------
 130  *                       |           tail ptr               |
 131  *                       ------------------------------------
 132  * Note:
 133  * A total of 1024 segments can sit on one software-visible "bank"
 134  * of internal SRAM. Each segment contains 32 entries. Also note
 135  * that sw-visible "banks" are not the same as the actual internal
 136  * 8-bank implementation of hardware. It is an optimization of
 137  * internal access.
 138  *
 139  */
 140
 141 void nlm_cms_setup_credits(uint64_t base, int destid, int srcid, int credit)
 142 {
 143         uint64_t val;
 144
 145         val = (((uint64_t)credit << 24) | (destid << 12) | (srcid << 0));
 146         nlm_write_cms_reg(base, CMS_OUTPUTQ_CREDIT_CFG, val);
 147
 148 }
 149
 150 /*
 151  * base         - CMS module base address for this node.
 152  * qid          - is the output queue id otherwise called as vc id
 153  * spill_base   - is the 40-bit physical address of spill memory. Must be
 154                   4KB aligned.
 155  * nsegs        - No of segments where a "1" indicates 4KB. Spill size must be
 156  *                a multiple of 4KB.
 157  */
 158 int nlm_cms_alloc_spill_q(uint64_t base, int qid, uint64_t spill_base,
 159                                 int nsegs)
 160 {
 161         uint64_t queue_config;
 162         uint32_t spill_start;
 163
 164         if (nsegs > CMS_MAX_SPILL_SEGMENTS_PER_QUEUE) {
 165                 return 1;
 166         }
 167
 168         queue_config = nlm_read_cms_reg(base,(CMS_OUTPUTQ_CONFIG(qid)));
 169
 170         spill_start = ((spill_base >> 12) & 0x3F);
 171         /* Spill configuration */
 172         queue_config = (((uint64_t)CMS_SPILL_ENA << 62) |
 173                                 (((spill_base >> 18) & 0x3FFFFF) << 27) |
 174                                 (spill_start + nsegs - 1) << 21 |
 175                                 (spill_start << 15));
 176
 177         nlm_write_cms_reg(base,(CMS_OUTPUTQ_CONFIG(qid)),queue_config);
 178
 179         return 0;
 180 }
 181
 182 uint64_t nlm_cms_get_onchip_queue (uint64_t base, int qid)
 183 {
 184         return nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
 185 }
 186
 187 void nlm_cms_set_onchip_queue (uint64_t base, int qid, uint64_t val)
 188 {
 189         uint64_t rdval;
 190
 191         rdval = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
 192         rdval |= val;
 193         nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), rdval);
 194 }
 195
 196 void nlm_cms_per_queue_level_intr(uint64_t base, int qid, int sub_type,
 197                                         int intr_val)
 198 {
 199         uint64_t val;
 200
 201         val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
 202
 203         val &= ~((0x7ULL << 56) | (0x3ULL << 54));
 204
 205         val |= (((uint64_t)sub_type<<54) |
 206                 ((uint64_t)intr_val<<56));
 207
 208         nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), val);
 209 }
 210
 211 void nlm_cms_per_queue_timer_intr(uint64_t base, int qid, int sub_type,
 212                                         int intr_val)
 213 {
 214         uint64_t val;
 215
 216         val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
 217
 218         val &= ~((0x7ULL << 51) | (0x3ULL << 49));
 219
 220         val |= (((uint64_t)sub_type<<49) |
 221                 ((uint64_t)intr_val<<51));
 222
 223         nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), val);
 224 }
 225
 226 /* returns 1 if interrupt has been generated for this output queue */
 227 int nlm_cms_outputq_intr_check(uint64_t base, int qid)
 228 {
 229         uint64_t val;
 230         val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
 231
 232         return ((val >> 59) & 0x1);
 233 }
 234
 235 void nlm_cms_outputq_clr_intr(uint64_t base, int qid)
 236 {
 237         uint64_t val;
 238         val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
 239         val |= (1ULL<<59);
 240         nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), val);
 241 }
 242
 243 void nlm_cms_illegal_dst_error_intr(uint64_t base, int en)
 244 {
 245         uint64_t val;
 246
 247         val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
 248         val |= (en<<8);
 249         nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
 250 }
 251
 252 void nlm_cms_timeout_error_intr(uint64_t base, int en)
 253 {
 254         uint64_t val;
 255
 256         val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
 257         val |= (en<<7);
 258         nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
 259 }
 260
 261 void nlm_cms_biu_error_resp_intr(uint64_t base, int en)
 262 {
 263         uint64_t val;
 264
 265         val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
 266         val |= (en<<6);
 267         nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
 268 }
 269
 270 void nlm_cms_spill_uncorrectable_ecc_error_intr(uint64_t base, int en)
 271 {
 272         uint64_t val;
 273
 274         val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
 275         val |= (en<<5) | (en<<3);
 276         nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
 277 }
 278
 279 void nlm_cms_spill_correctable_ecc_error_intr(uint64_t base, int en)
 280 {
 281         uint64_t val;
 282
 283         val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
 284         val |= (en<<4) | (en<<2);
 285         nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
 286 }
 287
 288 void nlm_cms_outputq_uncorrectable_ecc_error_intr(uint64_t base, int en)
 289 {
 290         uint64_t val;
 291
 292         val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
 293         val |= (en<<1);
 294         nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
 295 }
 296
 297 void nlm_cms_outputq_correctable_ecc_error_intr(uint64_t base, int en)
 298 {
 299         uint64_t val;
 300
 301         val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
 302         val |= (en<<0);
 303         nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
 304 }
 305
 306 uint64_t nlm_cms_network_error_status(uint64_t base)
 307 {
 308         return nlm_read_cms_reg(base, CMS_MSG_ERR);
 309 }
 310
 311 int nlm_cms_get_net_error_code(uint64_t err)
 312 {
 313         return ((err >> 12) & 0xf);
 314 }
 315
 316 int nlm_cms_get_net_error_syndrome(uint64_t err)
 317 {
 318         return ((err >> 32) & 0x1ff);
 319 }
 320
 321 int nlm_cms_get_net_error_ramindex(uint64_t err)
 322 {
 323         return ((err >> 44) & 0x7fff);
 324 }
 325
 326 int nlm_cms_get_net_error_outputq(uint64_t err)
 327 {
 328         return ((err >> 16) & 0xfff);
 329 }
 330
 331 /*========================= FMN Tracing related APIs ================*/
 332
 333 void nlm_cms_trace_setup(uint64_t base, int en, uint64_t trace_base,
 334                                 uint64_t trace_limit, int match_dstid_en,
 335                                 int dst_id, int match_srcid_en, int src_id,
 336                                 int wrap)
 337 {
 338         uint64_t val;
 339
 340         nlm_write_cms_reg(base, CMS_TRACE_BASE_ADDR, trace_base);
 341         nlm_write_cms_reg(base, CMS_TRACE_LIMIT_ADDR, trace_limit);
 342
 343         val = nlm_read_cms_reg(base, CMS_TRACE_CONFIG);
 344         val |= (((uint64_t)match_dstid_en << 39) |
 345                 ((dst_id & 0xfff) << 24) |
 346                 (match_srcid_en << 23) |
 347                 ((src_id & 0xfff) << 8) |
 348                 (wrap << 1) |
 349                 (en << 0));
 350         nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
 351 }
 352
 353 void nlm_cms_endian_byte_swap (uint64_t base, int en)
 354 {
 355         nlm_write_cms_reg(base, CMS_MSG_ENDIAN_SWAP, en);
 356 }