sys/contrib/octeon-sdk/cvmx-cmd-queue.h

   1 /***********************license start***************
   2  * Copyright (c) 2003-2010  Cavium Networks (support@cavium.com). All rights
   3  * reserved.
   4  *
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions are
   8  * met:
   9  *
  10  *   * Redistributions of source code must retain the above copyright
  11  *     notice, this list of conditions and the following disclaimer.
  12  *
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17
  18  *   * Neither the name of Cavium Networks nor the names of
  19  *     its contributors may be used to endorse or promote products
  20  *     derived from this software without specific prior written
  21  *     permission.
  22
  23  * This Software, including technical data, may be subject to U.S. export  control
  24  * laws, including the U.S. Export Administration Act and its  associated
  25  * regulations, and may be subject to export or import  regulations in other
  26  * countries.
  27
  28  * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
  29  * AND WITH ALL FAULTS AND CAVIUM  NETWORKS MAKES NO PROMISES, REPRESENTATIONS OR
  30  * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO
  31  * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR
  32  * DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM
  33  * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE,
  34  * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF
  35  * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR
  36  * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE  RISK ARISING OUT OF USE OR
  37  * PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
  38  ***********************license end**************************************/
  39
  40
  41
  42
  43
  44
  45
  46 /**
  47  * @file
  48  *
  49  * Support functions for managing command queues used for
  50  * various hardware blocks.
  51  *
  52  * The common command queue infrastructure abstracts out the
  53  * software necessary for adding to Octeon's chained queue
  54  * structures. These structures are used for commands to the
  55  * PKO, ZIP, DFA, RAID, and DMA engine blocks. Although each
  56  * hardware unit takes commands and CSRs of different types,
  57  * they all use basic linked command buffers to store the
  58  * pending request. In general, users of the CVMX API don't
  59  * call cvmx-cmd-queue functions directly. Instead the hardware
  60  * unit specific wrapper should be used. The wrappers perform
  61  * unit specific validation and CSR writes to submit the
  62  * commands.
  63  *
  64  * Even though most software will never directly interact with
  65  * cvmx-cmd-queue, knowledge of its internal workings can help
  66  * in diagnosing performance problems and help with debugging.
  67  *
  68  * Command queue pointers are stored in a global named block
  69  * called "cvmx_cmd_queues". Except for the PKO queues, each
  70  * hardware queue is stored in its own cache line to reduce SMP
  71  * contention on spin locks. The PKO queues are stored such that
  72  * every 16th queue is next to each other in memory. This scheme
  73  * allows for queues being in separate cache lines when there
  74  * are low number of queues per port. With 16 queues per port,
  75  * the first queue for each port is in the same cache area. The
  76  * second queues for each port are in another area, etc. This
  77  * allows software to implement very efficient lockless PKO with
  78  * 16 queues per port using a minimum of cache lines per core.
  79  * All queues for a given core will be isolated in the same
  80  * cache area.
  81  *
  82  * In addition to the memory pointer layout, cvmx-cmd-queue
  83  * provides an optimized fair ll/sc locking mechanism for the
  84  * queues. The lock uses a "ticket / now serving" model to
  85  * maintain fair order on contended locks. In addition, it uses
  86  * predicted locking time to limit cache contention. When a core
  87  * know it must wait in line for a lock, it spins on the
  88  * internal cycle counter to completely eliminate any causes of
  89  * bus traffic.
  90  *
  91  * <hr> $Revision: 50049 $ <hr>
  92  */
  93
  94 #ifndef __CVMX_CMD_QUEUE_H__
  95 #define __CVMX_CMD_QUEUE_H__
  96
  97 #if !defined(CVMX_BUILD_FOR_LINUX_KERNEL) && !defined(CVMX_BUILD_FOR_FREEBSD_KERNEL)
  98 #include "executive-config.h"
  99 #include "cvmx-config.h"
 100 #endif
 101
 102 #include "cvmx-fpa.h"
 103
 104 #ifdef  __cplusplus
 105 extern "C" {
 106 #endif
 107
 108 /**
 109  * By default we disable the max depth support. Most programs
 110  * don't use it and it slows down the command queue processing
 111  * significantly.
 112  */
 113 #ifndef CVMX_CMD_QUEUE_ENABLE_MAX_DEPTH
 114 #define CVMX_CMD_QUEUE_ENABLE_MAX_DEPTH 0
 115 #endif
 116
 117 /**
 118  * Enumeration representing all hardware blocks that use command
 119  * queues. Each hardware block has up to 65536 sub identifiers for
 120  * multiple command queues. Not all chips support all hardware
 121  * units.
 122  */
 123 typedef enum
 124 {
 125     CVMX_CMD_QUEUE_PKO_BASE = 0x00000,
 126 #define CVMX_CMD_QUEUE_PKO(queue) ((cvmx_cmd_queue_id_t)(CVMX_CMD_QUEUE_PKO_BASE + (0xffff&(queue))))
 127     CVMX_CMD_QUEUE_ZIP      = 0x10000,
 128     CVMX_CMD_QUEUE_DFA      = 0x20000,
 129     CVMX_CMD_QUEUE_RAID     = 0x30000,
 130     CVMX_CMD_QUEUE_DMA_BASE = 0x40000,
 131 #define CVMX_CMD_QUEUE_DMA(queue) ((cvmx_cmd_queue_id_t)(CVMX_CMD_QUEUE_DMA_BASE + (0xffff&(queue))))
 132     CVMX_CMD_QUEUE_END      = 0x50000,
 133 } cvmx_cmd_queue_id_t;
 134
 135 /**
 136  * Command write operations can fail if the command queue needs
 137  * a new buffer and the associated FPA pool is empty. It can also
 138  * fail if the number of queued command words reaches the maximum
 139  * set at initialization.
 140  */
 141 typedef enum
 142 {
 143     CVMX_CMD_QUEUE_SUCCESS = 0,
 144     CVMX_CMD_QUEUE_NO_MEMORY = -1,
 145     CVMX_CMD_QUEUE_FULL = -2,
 146     CVMX_CMD_QUEUE_INVALID_PARAM = -3,
 147     CVMX_CMD_QUEUE_ALREADY_SETUP = -4,
 148 } cvmx_cmd_queue_result_t;
 149
 150 typedef struct
 151 {
 152     uint8_t  now_serving;           /**< You have lock when this is your ticket */
 153     uint64_t unused1        : 24;
 154     uint32_t max_depth;             /**< Maximum outstanding command words */
 155     uint64_t fpa_pool       : 3;    /**< FPA pool buffers come from */
 156     uint64_t base_ptr_div128: 29;   /**< Top of command buffer pointer shifted 7 */
 157     uint64_t unused2        : 6;
 158     uint64_t pool_size_m1   : 13;   /**< FPA buffer size in 64bit words minus 1 */
 159     uint64_t index          : 13;   /**< Number of commands already used in buffer */
 160 } __cvmx_cmd_queue_state_t;
 161
 162 /**
 163  * This structure contains the global state of all command queues.
 164  * It is stored in a bootmem named block and shared by all
 165  * applications running on Octeon. Tickets are stored in a different
 166  * cache line that queue information to reduce the contention on the
 167  * ll/sc used to get a ticket. If this is not the case, the update
 168  * of queue state causes the ll/sc to fail quite often.
 169  */
 170 typedef struct
 171 {
 172     uint64_t                 ticket[(CVMX_CMD_QUEUE_END>>16) * 256];
 173     __cvmx_cmd_queue_state_t state[(CVMX_CMD_QUEUE_END>>16) * 256];
 174 } __cvmx_cmd_queue_all_state_t;
 175
 176 extern CVMX_SHARED __cvmx_cmd_queue_all_state_t *__cvmx_cmd_queue_state_ptr;
 177
 178 /**
 179  * Initialize a command queue for use. The initial FPA buffer is
 180  * allocated and the hardware unit is configured to point to the
 181  * new command queue.
 182  *
 183  * @param queue_id  Hardware command queue to initialize.
 184  * @param max_depth Maximum outstanding commands that can be queued.
 185  * @param fpa_pool  FPA pool the command queues should come from.
 186  * @param pool_size Size of each buffer in the FPA pool (bytes)
 187  *
 188  * @return CVMX_CMD_QUEUE_SUCCESS or a failure code
 189  */
 190 cvmx_cmd_queue_result_t cvmx_cmd_queue_initialize(cvmx_cmd_queue_id_t queue_id, int max_depth, int fpa_pool, int pool_size);
 191
 192 /**
 193  * Shutdown a queue a free it's command buffers to the FPA. The
 194  * hardware connected to the queue must be stopped before this
 195  * function is called.
 196  *
 197  * @param queue_id Queue to shutdown
 198  *
 199  * @return CVMX_CMD_QUEUE_SUCCESS or a failure code
 200  */
 201 cvmx_cmd_queue_result_t cvmx_cmd_queue_shutdown(cvmx_cmd_queue_id_t queue_id);
 202
 203 /**
 204  * Return the number of command words pending in the queue. This
 205  * function may be relatively slow for some hardware units.
 206  *
 207  * @param queue_id Hardware command queue to query
 208  *
 209  * @return Number of outstanding commands
 210  */
 211 int cvmx_cmd_queue_length(cvmx_cmd_queue_id_t queue_id);
 212
 213 /**
 214  * Return the command buffer to be written to. The purpose of this
 215  * function is to allow CVMX routine access to the low level buffer
 216  * for initial hardware setup. User applications should not call this
 217  * function directly.
 218  *
 219  * @param queue_id Command queue to query
 220  *
 221  * @return Command buffer or NULL on failure
 222  */
 223 void *cvmx_cmd_queue_buffer(cvmx_cmd_queue_id_t queue_id);
 224
 225 /**
 226  * @INTERNAL
 227  * Get the index into the state arrays for the supplied queue id.
 228  *
 229  * @param queue_id Queue ID to get an index for
 230  *
 231  * @return Index into the state arrays
 232  */
 233 static inline int __cvmx_cmd_queue_get_index(cvmx_cmd_queue_id_t queue_id)
 234 {
 235     /* Warning: This code currently only works with devices that have 256 queues
 236         or less. Devices with more than 16 queues are layed out in memory to allow
 237         cores quick access to every 16th queue. This reduces cache thrashing
 238         when you are running 16 queues per port to support lockless operation */
 239     int unit = queue_id>>16;
 240     int q = (queue_id >> 4) & 0xf;
 241     int core = queue_id & 0xf;
 242     return unit*256 + core*16 + q;
 243 }
 244
 245
 246 /**
 247  * @INTERNAL
 248  * Lock the supplied queue so nobody else is updating it at the same
 249  * time as us.
 250  *
 251  * @param queue_id Queue ID to lock
 252  * @param qptr     Pointer to the queue's global state
 253  */
 254 static inline void __cvmx_cmd_queue_lock(cvmx_cmd_queue_id_t queue_id, __cvmx_cmd_queue_state_t *qptr)
 255 {
 256     int tmp;
 257     int my_ticket;
 258     CVMX_PREFETCH(qptr, 0);
 259     asm volatile (
 260         ".set push\n"
 261         ".set noreorder\n"
 262         "1:\n"
 263         "ll     %[my_ticket], %[ticket_ptr]\n"          /* Atomic add one to ticket_ptr */
 264         "li     %[ticket], 1\n"                         /*    and store the original value */
 265         "baddu  %[ticket], %[my_ticket]\n"              /*    in my_ticket */
 266         "sc     %[ticket], %[ticket_ptr]\n"
 267         "beqz   %[ticket], 1b\n"
 268         " nop\n"
 269         "lbu    %[ticket], %[now_serving]\n"            /* Load the current now_serving ticket */
 270         "2:\n"
 271         "beq    %[ticket], %[my_ticket], 4f\n"          /* Jump out if now_serving == my_ticket */
 272         " subu   %[ticket], %[my_ticket], %[ticket]\n"  /* Find out how many tickets are in front of me */
 273         "subu  %[ticket], 1\n"                          /* Use tickets in front of me minus one to delay */
 274         "cins   %[ticket], %[ticket], 5, 7\n"           /* Delay will be ((tickets in front)-1)*32 loops */
 275         "3:\n"
 276         "bnez   %[ticket], 3b\n"                        /* Loop here until our ticket might be up */
 277         " subu  %[ticket], 1\n"
 278         "b      2b\n"                                   /* Jump back up to check out ticket again */
 279         " lbu   %[ticket], %[now_serving]\n"            /* Load the current now_serving ticket */
 280         "4:\n"
 281         ".set pop\n"
 282         : [ticket_ptr] "=m" (__cvmx_cmd_queue_state_ptr->ticket[__cvmx_cmd_queue_get_index(queue_id)]),
 283           [now_serving] "=m" (qptr->now_serving),
 284           [ticket] "=r" (tmp),
 285           [my_ticket] "=r" (my_ticket)
 286     );
 287 }
 288
 289
 290 /**
 291  * @INTERNAL
 292  * Unlock the queue, flushing all writes.
 293  *
 294  * @param qptr   Queue to unlock
 295  */
 296 static inline void __cvmx_cmd_queue_unlock(__cvmx_cmd_queue_state_t *qptr)
 297 {
 298     uint8_t ns;
 299
 300     ns = qptr->now_serving + 1;
 301     CVMX_SYNCWS; /* Order queue manipulation with respect to the unlock.  */
 302     qptr->now_serving = ns;
 303     CVMX_SYNCWS; /* nudge out the unlock. */
 304 }
 305
 306
 307 /**
 308  * @INTERNAL
 309  * Get the queue state structure for the given queue id
 310  *
 311  * @param queue_id Queue id to get
 312  *
 313  * @return Queue structure or NULL on failure
 314  */
 315 static inline __cvmx_cmd_queue_state_t *__cvmx_cmd_queue_get_state(cvmx_cmd_queue_id_t queue_id)
 316 {
 317     if (CVMX_ENABLE_PARAMETER_CHECKING)
 318     {
 319         if (cvmx_unlikely(queue_id >= CVMX_CMD_QUEUE_END))
 320             return NULL;
 321         if (cvmx_unlikely((queue_id & 0xffff) >= 256))
 322             return NULL;
 323     }
 324     return &__cvmx_cmd_queue_state_ptr->state[__cvmx_cmd_queue_get_index(queue_id)];
 325 }
 326
 327
 328 /**
 329  * Write an arbitrary number of command words to a command queue.
 330  * This is a generic function; the fixed number of command word
 331  * functions yield higher performance.
 332  *
 333  * @param queue_id  Hardware command queue to write to
 334  * @param use_locking
 335  *                  Use internal locking to ensure exclusive access for queue
 336  *                  updates. If you don't use this locking you must ensure
 337  *                  exclusivity some other way. Locking is strongly recommended.
 338  * @param cmd_count Number of command words to write
 339  * @param cmds      Array of commands to write
 340  *
 341  * @return CVMX_CMD_QUEUE_SUCCESS or a failure code
 342  */
 343 static inline cvmx_cmd_queue_result_t cvmx_cmd_queue_write(cvmx_cmd_queue_id_t queue_id, int use_locking, int cmd_count, uint64_t *cmds)
 344 {
 345     __cvmx_cmd_queue_state_t *qptr = __cvmx_cmd_queue_get_state(queue_id);
 346
 347     if (CVMX_ENABLE_PARAMETER_CHECKING)
 348     {
 349         if (cvmx_unlikely(qptr == NULL))
 350             return CVMX_CMD_QUEUE_INVALID_PARAM;
 351         if (cvmx_unlikely((cmd_count < 1) || (cmd_count > 32)))
 352             return CVMX_CMD_QUEUE_INVALID_PARAM;
 353         if (cvmx_unlikely(cmds == NULL))
 354             return CVMX_CMD_QUEUE_INVALID_PARAM;
 355     }
 356
 357     /* Make sure nobody else is updating the same queue */
 358     if (cvmx_likely(use_locking))
 359         __cvmx_cmd_queue_lock(queue_id, qptr);
 360
 361     /* If a max queue length was specified then make sure we don't
 362         exceed it. If any part of the command would be below the limit
 363         we allow it */
 364     if (CVMX_CMD_QUEUE_ENABLE_MAX_DEPTH && cvmx_unlikely(qptr->max_depth))
 365     {
 366         if (cvmx_unlikely(cvmx_cmd_queue_length(queue_id) > (int)qptr->max_depth))
 367         {
 368             if (cvmx_likely(use_locking))
 369                 __cvmx_cmd_queue_unlock(qptr);
 370             return CVMX_CMD_QUEUE_FULL;
 371         }
 372     }
 373
 374     /* Normally there is plenty of room in the current buffer for the command */
 375     if (cvmx_likely(qptr->index + cmd_count < qptr->pool_size_m1))
 376     {
 377         uint64_t *ptr = (uint64_t *)cvmx_phys_to_ptr((uint64_t)qptr->base_ptr_div128<<7);
 378         ptr += qptr->index;
 379         qptr->index += cmd_count;
 380         while (cmd_count--)
 381             *ptr++ = *cmds++;
 382     }
 383     else
 384     {
 385         uint64_t *ptr;
 386         int count;
 387         /* We need a new command buffer. Fail if there isn't one available */
 388         uint64_t *new_buffer = (uint64_t *)cvmx_fpa_alloc(qptr->fpa_pool);
 389         if (cvmx_unlikely(new_buffer == NULL))
 390         {
 391             if (cvmx_likely(use_locking))
 392                 __cvmx_cmd_queue_unlock(qptr);
 393             return CVMX_CMD_QUEUE_NO_MEMORY;
 394         }
 395         ptr = (uint64_t *)cvmx_phys_to_ptr((uint64_t)qptr->base_ptr_div128<<7);
 396         /* Figure out how many command words will fit in this buffer. One
 397             location will be needed for the next buffer pointer */
 398         count = qptr->pool_size_m1 - qptr->index;
 399         ptr += qptr->index;
 400         cmd_count-=count;
 401         while (count--)
 402             *ptr++ = *cmds++;
 403         *ptr = cvmx_ptr_to_phys(new_buffer);
 404         /* The current buffer is full and has a link to the next buffer. Time
 405             to write the rest of the commands into the new buffer */
 406         qptr->base_ptr_div128 = *ptr >> 7;
 407         qptr->index = cmd_count;
 408         ptr = new_buffer;
 409         while (cmd_count--)
 410             *ptr++ = *cmds++;
 411     }
 412
 413     /* All updates are complete. Release the lock and return */
 414     if (cvmx_likely(use_locking))
 415         __cvmx_cmd_queue_unlock(qptr);
 416     return CVMX_CMD_QUEUE_SUCCESS;
 417 }
 418
 419
 420 /**
 421  * Simple function to write two command words to a command
 422  * queue.
 423  *
 424  * @param queue_id Hardware command queue to write to
 425  * @param use_locking
 426  *                 Use internal locking to ensure exclusive access for queue
 427  *                 updates. If you don't use this locking you must ensure
 428  *                 exclusivity some other way. Locking is strongly recommended.
 429  * @param cmd1     Command
 430  * @param cmd2     Command
 431  *
 432  * @return CVMX_CMD_QUEUE_SUCCESS or a failure code
 433  */
 434 static inline cvmx_cmd_queue_result_t cvmx_cmd_queue_write2(cvmx_cmd_queue_id_t queue_id, int use_locking, uint64_t cmd1, uint64_t cmd2)
 435 {
 436     __cvmx_cmd_queue_state_t *qptr = __cvmx_cmd_queue_get_state(queue_id);
 437
 438     if (CVMX_ENABLE_PARAMETER_CHECKING)
 439     {
 440         if (cvmx_unlikely(qptr == NULL))
 441             return CVMX_CMD_QUEUE_INVALID_PARAM;
 442     }
 443
 444     /* Make sure nobody else is updating the same queue */
 445     if (cvmx_likely(use_locking))
 446         __cvmx_cmd_queue_lock(queue_id, qptr);
 447
 448     /* If a max queue length was specified then make sure we don't
 449         exceed it. If any part of the command would be below the limit
 450         we allow it */
 451     if (CVMX_CMD_QUEUE_ENABLE_MAX_DEPTH && cvmx_unlikely(qptr->max_depth))
 452     {
 453         if (cvmx_unlikely(cvmx_cmd_queue_length(queue_id) > (int)qptr->max_depth))
 454         {
 455             if (cvmx_likely(use_locking))
 456                 __cvmx_cmd_queue_unlock(qptr);
 457             return CVMX_CMD_QUEUE_FULL;
 458         }
 459     }
 460
 461     /* Normally there is plenty of room in the current buffer for the command */
 462     if (cvmx_likely(qptr->index + 2 < qptr->pool_size_m1))
 463     {
 464         uint64_t *ptr = (uint64_t *)cvmx_phys_to_ptr((uint64_t)qptr->base_ptr_div128<<7);
 465         ptr += qptr->index;
 466         qptr->index += 2;
 467         ptr[0] = cmd1;
 468         ptr[1] = cmd2;
 469     }
 470     else
 471     {
 472         uint64_t *ptr;
 473         /* Figure out how many command words will fit in this buffer. One
 474             location will be needed for the next buffer pointer */
 475         int count = qptr->pool_size_m1 - qptr->index;
 476         /* We need a new command buffer. Fail if there isn't one available */
 477         uint64_t *new_buffer = (uint64_t *)cvmx_fpa_alloc(qptr->fpa_pool);
 478         if (cvmx_unlikely(new_buffer == NULL))
 479         {
 480             if (cvmx_likely(use_locking))
 481                 __cvmx_cmd_queue_unlock(qptr);
 482             return CVMX_CMD_QUEUE_NO_MEMORY;
 483         }
 484         count--;
 485         ptr = (uint64_t *)cvmx_phys_to_ptr((uint64_t)qptr->base_ptr_div128<<7);
 486         ptr += qptr->index;
 487         *ptr++ = cmd1;
 488         if (cvmx_likely(count))
 489             *ptr++ = cmd2;
 490         *ptr = cvmx_ptr_to_phys(new_buffer);
 491         /* The current buffer is full and has a link to the next buffer. Time
 492             to write the rest of the commands into the new buffer */
 493         qptr->base_ptr_div128 = *ptr >> 7;
 494         qptr->index = 0;
 495         if (cvmx_unlikely(count == 0))
 496         {
 497             qptr->index = 1;
 498             new_buffer[0] = cmd2;
 499         }
 500     }
 501
 502     /* All updates are complete. Release the lock and return */
 503     if (cvmx_likely(use_locking))
 504         __cvmx_cmd_queue_unlock(qptr);
 505     return CVMX_CMD_QUEUE_SUCCESS;
 506 }
 507
 508
 509 /**
 510  * Simple function to write three command words to a command
 511  * queue.
 512  *
 513  * @param queue_id Hardware command queue to write to
 514  * @param use_locking
 515  *                 Use internal locking to ensure exclusive access for queue
 516  *                 updates. If you don't use this locking you must ensure
 517  *                 exclusivity some other way. Locking is strongly recommended.
 518  * @param cmd1     Command
 519  * @param cmd2     Command
 520  * @param cmd3     Command
 521  *
 522  * @return CVMX_CMD_QUEUE_SUCCESS or a failure code
 523  */
 524 static inline cvmx_cmd_queue_result_t cvmx_cmd_queue_write3(cvmx_cmd_queue_id_t queue_id, int use_locking, uint64_t cmd1, uint64_t cmd2, uint64_t cmd3)
 525 {
 526     __cvmx_cmd_queue_state_t *qptr = __cvmx_cmd_queue_get_state(queue_id);
 527
 528     if (CVMX_ENABLE_PARAMETER_CHECKING)
 529     {
 530         if (cvmx_unlikely(qptr == NULL))
 531             return CVMX_CMD_QUEUE_INVALID_PARAM;
 532     }
 533
 534     /* Make sure nobody else is updating the same queue */
 535     if (cvmx_likely(use_locking))
 536         __cvmx_cmd_queue_lock(queue_id, qptr);
 537
 538     /* If a max queue length was specified then make sure we don't
 539         exceed it. If any part of the command would be below the limit
 540         we allow it */
 541     if (CVMX_CMD_QUEUE_ENABLE_MAX_DEPTH && cvmx_unlikely(qptr->max_depth))
 542     {
 543         if (cvmx_unlikely(cvmx_cmd_queue_length(queue_id) > (int)qptr->max_depth))
 544         {
 545             if (cvmx_likely(use_locking))
 546                 __cvmx_cmd_queue_unlock(qptr);
 547             return CVMX_CMD_QUEUE_FULL;
 548         }
 549     }
 550
 551     /* Normally there is plenty of room in the current buffer for the command */
 552     if (cvmx_likely(qptr->index + 3 < qptr->pool_size_m1))
 553     {
 554         uint64_t *ptr = (uint64_t *)cvmx_phys_to_ptr((uint64_t)qptr->base_ptr_div128<<7);
 555         ptr += qptr->index;
 556         qptr->index += 3;
 557         ptr[0] = cmd1;
 558         ptr[1] = cmd2;
 559         ptr[2] = cmd3;
 560     }
 561     else
 562     {
 563         uint64_t *ptr;
 564         /* Figure out how many command words will fit in this buffer. One
 565             location will be needed for the next buffer pointer */
 566         int count = qptr->pool_size_m1 - qptr->index;
 567         /* We need a new command buffer. Fail if there isn't one available */
 568         uint64_t *new_buffer = (uint64_t *)cvmx_fpa_alloc(qptr->fpa_pool);
 569         if (cvmx_unlikely(new_buffer == NULL))
 570         {
 571             if (cvmx_likely(use_locking))
 572                 __cvmx_cmd_queue_unlock(qptr);
 573             return CVMX_CMD_QUEUE_NO_MEMORY;
 574         }
 575         count--;
 576         ptr = (uint64_t *)cvmx_phys_to_ptr((uint64_t)qptr->base_ptr_div128<<7);
 577         ptr += qptr->index;
 578         *ptr++ = cmd1;
 579         if (count)
 580         {
 581             *ptr++ = cmd2;
 582             if (count > 1)
 583                 *ptr++ = cmd3;
 584         }
 585         *ptr = cvmx_ptr_to_phys(new_buffer);
 586         /* The current buffer is full and has a link to the next buffer. Time
 587             to write the rest of the commands into the new buffer */
 588         qptr->base_ptr_div128 = *ptr >> 7;
 589         qptr->index = 0;
 590         ptr = new_buffer;
 591         if (count == 0)
 592         {
 593             *ptr++ = cmd2;
 594             qptr->index++;
 595         }
 596         if (count < 2)
 597         {
 598             *ptr++ = cmd3;
 599             qptr->index++;
 600         }
 601     }
 602
 603     /* All updates are complete. Release the lock and return */
 604     if (cvmx_likely(use_locking))
 605         __cvmx_cmd_queue_unlock(qptr);
 606     return CVMX_CMD_QUEUE_SUCCESS;
 607 }
 608
 609 #ifdef  __cplusplus
 610 }
 611 #endif
 612
 613 #endif /* __CVMX_CMD_QUEUE_H__ */