1 /***********************license start***************
2 * Copyright (c) 2003-2010 Cavium Networks (support@cavium.com). All rights
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * * Neither the name of Cavium Networks nor the names of
19 * its contributors may be used to endorse or promote products
20 * derived from this software without specific prior written
23 * This Software, including technical data, may be subject to U.S. export control
24 * laws, including the U.S. Export Administration Act and its associated
25 * regulations, and may be subject to export or import regulations in other
28 * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
29 * AND WITH ALL FAULTS AND CAVIUM NETWORKS MAKES NO PROMISES, REPRESENTATIONS OR
30 * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO
31 * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR
32 * DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM
33 * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE,
34 * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF
35 * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR
36 * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK ARISING OUT OF USE OR
37 * PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
38 ***********************license end**************************************/
49 * Interface to the PCI / PCIe DMA engines. These are only avialable
50 * on chips with PCI / PCIe.
52 * <hr>$Revision: 50126 $<hr>
54 #include "executive-config.h"
55 #include "cvmx-config.h"
57 #include "cvmx-cmd-queue.h"
58 #include "cvmx-dma-engine.h"
60 #ifdef CVMX_ENABLE_PKO_FUNCTIONS
63 * Return the number of DMA engimes supported by this chip
65 * @return Number of DMA engines
67 int cvmx_dma_engine_get_num(void)
69 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
71 if (OCTEON_IS_MODEL(OCTEON_CN52XX_PASS1_X))
76 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
83 * Initialize the DMA engines for use
85 * @return Zero on success, negative on failure
87 int cvmx_dma_engine_initialize(void)
91 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
93 cvmx_cmd_queue_result_t result;
94 result = cvmx_cmd_queue_initialize(CVMX_CMD_QUEUE_DMA(engine),
95 0, CVMX_FPA_OUTPUT_BUFFER_POOL,
96 CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE);
97 if (result != CVMX_CMD_QUEUE_SUCCESS)
99 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
101 cvmx_npei_dmax_ibuff_saddr_t dmax_ibuff_saddr;
102 dmax_ibuff_saddr.u64 = 0;
103 dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
104 cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), dmax_ibuff_saddr.u64);
106 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
108 cvmx_dpi_dmax_ibuff_saddr_t dpi_dmax_ibuff_saddr;
109 dpi_dmax_ibuff_saddr.u64 = 0;
110 dpi_dmax_ibuff_saddr.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
111 dpi_dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
112 cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), dpi_dmax_ibuff_saddr.u64);
116 uint64_t address = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine)));
118 cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, address);
120 cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, address);
124 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
126 cvmx_npei_dma_control_t dma_control;
128 if (cvmx_dma_engine_get_num() >= 5)
129 dma_control.s.dma4_enb = 1;
130 dma_control.s.dma3_enb = 1;
131 dma_control.s.dma2_enb = 1;
132 dma_control.s.dma1_enb = 1;
133 dma_control.s.dma0_enb = 1;
134 dma_control.s.o_mode = 1; /* Pull NS and RO from this register, not the pointers */
135 //dma_control.s.dwb_denb = 1;
136 //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
137 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
138 dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
139 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
140 /* As a workaround for errata PCIE-811 we only allow a single
141 outstanding DMA read over PCIe at a time. This limits performance,
142 but works in all cases. If you need higher performance, remove
143 this code and implement the more complicated workaround documented
144 in the errata. This only affects CN56XX pass 2.0 chips */
145 if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS2_0))
147 cvmx_npei_dma_pcie_req_num_t pcie_req_num;
148 pcie_req_num.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM);
149 pcie_req_num.s.dma_cnt = 1;
150 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM, pcie_req_num.u64);
153 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
155 cvmx_dpi_engx_buf_t dpi_engx_buf;
156 cvmx_dpi_dma_control_t dma_control;
157 cvmx_dpi_ctl_t dpi_ctl;
159 /* Give engine 0-4 1KB, and 5 3KB. This gives the packet engines better
160 performance. Total must not exceed 8KB */
161 dpi_engx_buf.u64 = 0;
162 dpi_engx_buf.s.blks = 2;
163 cvmx_write_csr(CVMX_DPI_ENGX_BUF(0), dpi_engx_buf.u64);
164 cvmx_write_csr(CVMX_DPI_ENGX_BUF(1), dpi_engx_buf.u64);
165 cvmx_write_csr(CVMX_DPI_ENGX_BUF(2), dpi_engx_buf.u64);
166 cvmx_write_csr(CVMX_DPI_ENGX_BUF(3), dpi_engx_buf.u64);
167 cvmx_write_csr(CVMX_DPI_ENGX_BUF(4), dpi_engx_buf.u64);
168 dpi_engx_buf.s.blks = 6;
169 cvmx_write_csr(CVMX_DPI_ENGX_BUF(5), dpi_engx_buf.u64);
171 dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
172 dma_control.s.pkt_hp = 1;
173 dma_control.s.pkt_en = 1;
174 dma_control.s.dma_enb = 0x1f;
175 dma_control.s.dwb_denb = 1;
176 dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
177 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
178 dma_control.s.o_mode = 1;
179 cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
180 dpi_ctl.u64 = cvmx_read_csr(CVMX_DPI_CTL);
182 cvmx_write_csr(CVMX_DPI_CTL, dpi_ctl.u64);
186 cvmx_npi_dma_control_t dma_control;
188 //dma_control.s.dwb_denb = 1;
189 //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
190 dma_control.s.o_add1 = 1;
191 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
192 dma_control.s.hp_enb = 1;
193 dma_control.s.lp_enb = 1;
194 dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
195 cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
203 * Shutdown all DMA engines. The engeines must be idle when this
204 * function is called.
206 * @return Zero on success, negative on failure
208 int cvmx_dma_engine_shutdown(void)
212 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
214 if (cvmx_cmd_queue_length(CVMX_CMD_QUEUE_DMA(engine)))
216 cvmx_dprintf("ERROR: cvmx_dma_engine_shutdown: Engine not idle.\n");
221 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
223 cvmx_npei_dma_control_t dma_control;
224 dma_control.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
225 if (cvmx_dma_engine_get_num() >= 5)
226 dma_control.s.dma4_enb = 0;
227 dma_control.s.dma3_enb = 0;
228 dma_control.s.dma2_enb = 0;
229 dma_control.s.dma1_enb = 0;
230 dma_control.s.dma0_enb = 0;
231 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
232 /* Make sure the disable completes */
233 cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
235 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
237 cvmx_dpi_dma_control_t dma_control;
238 dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
239 dma_control.s.dma_enb = 0;
240 cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
241 /* Make sure the disable completes */
242 cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
246 cvmx_npi_dma_control_t dma_control;
247 dma_control.u64 = cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
248 dma_control.s.hp_enb = 0;
249 dma_control.s.lp_enb = 0;
250 cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
251 /* Make sure the disable completes */
252 cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
255 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
257 cvmx_cmd_queue_shutdown(CVMX_CMD_QUEUE_DMA(engine));
258 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
259 cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), 0);
260 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
261 cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), 0);
265 cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, 0);
267 cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, 0);
276 * Submit a series of DMA comamnd to the DMA engines.
278 * @param engine Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
279 * @param header Command header
281 * The number of data pointers
282 * @param buffers Comamnd data pointers
284 * @return Zero on success, negative on failure
286 int cvmx_dma_engine_submit(int engine, cvmx_dma_engine_header_t header, int num_buffers, cvmx_dma_engine_buffer_t buffers[])
288 cvmx_cmd_queue_result_t result;
290 uint64_t cmds[num_buffers + 1];
292 if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS1_X))
294 /* Check for Errata PCIe-604 */
295 if ((header.s.nfst > 11) || (header.s.nlst > 11) || (header.s.nfst + header.s.nlst > 15))
297 cvmx_dprintf("DMA engine submit too large\n");
302 cmds[0] = header.u64;
303 while (num_buffers--)
305 cmds[cmd_count++] = buffers->u64;
309 /* Due to errata PCIE-13315, it is necessary to have the queue lock while we
310 ring the doorbell for the DMA engines. This prevents doorbells from
311 possibly arriving out of order with respect to the command queue
313 __cvmx_cmd_queue_lock(CVMX_CMD_QUEUE_DMA(engine), __cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
314 result = cvmx_cmd_queue_write(CVMX_CMD_QUEUE_DMA(engine), 0, cmd_count, cmds);
315 /* This SYNCWS is needed since the command queue didn't do locking, which
316 normally implies the SYNCWS. This one makes sure the command queue
317 updates make it to L2 before we ring the doorbell */
319 /* A syncw isn't needed here since the command queue did one as part of the queue unlock */
320 if (cvmx_likely(result == CVMX_CMD_QUEUE_SUCCESS))
322 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
324 /* DMA doorbells are 32bit writes in little endian space. This means we need to xor the address with 4 */
325 cvmx_write64_uint32(CVMX_PEXP_NPEI_DMAX_DBELL(engine)^4, cmd_count);
327 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
328 cvmx_write_csr(CVMX_DPI_DMAX_DBELL(engine), cmd_count);
332 cvmx_write_csr(CVMX_NPI_HIGHP_DBELL, cmd_count);
334 cvmx_write_csr(CVMX_NPI_LOWP_DBELL, cmd_count);
337 /* Here is the unlock for the above errata workaround */
338 __cvmx_cmd_queue_unlock(__cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
345 * Function used by cvmx_dma_engine_transfer() to build the
346 * internal address list.
348 * @param buffers Location to store the list
349 * @param address Address to build list for
350 * @param size Length of the memory pointed to by address
352 * @return Number of internal pointer chunks created
354 static inline int __cvmx_dma_engine_build_internal_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
359 /* Each internal chunk can contain a maximum of 8191 bytes */
363 buffers[segments].u64 = 0;
364 buffers[segments].internal.size = chunk;
365 buffers[segments].internal.addr = address;
376 * Function used by cvmx_dma_engine_transfer() to build the PCI / PCIe address
378 * @param buffers Location to store the list
379 * @param address Address to build list for
380 * @param size Length of the memory pointed to by address
382 * @return Number of PCI / PCIe address chunks created. The number of words used
383 * will be segments + (segments-1)/4 + 1.
385 static inline int __cvmx_dma_engine_build_external_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
387 const int MAX_SIZE = 65535;
391 /* Each block of 4 PCI / PCIe pointers uses one dword for lengths followed by
392 up to 4 addresses. This then repeats if more data is needed */
394 if (size <= MAX_SIZE)
396 /* Only one more segment needed */
397 buffers[0].pcie_length.len0 = size;
398 buffers[1].u64 = address;
402 else if (size <= MAX_SIZE * 2)
404 /* Two more segments needed */
405 buffers[0].pcie_length.len0 = MAX_SIZE;
406 buffers[0].pcie_length.len1 = size - MAX_SIZE;
407 buffers[1].u64 = address;
409 buffers[2].u64 = address;
413 else if (size <= MAX_SIZE * 3)
415 /* Three more segments needed */
416 buffers[0].pcie_length.len0 = MAX_SIZE;
417 buffers[0].pcie_length.len1 = MAX_SIZE;
418 buffers[0].pcie_length.len2 = size - MAX_SIZE * 2;
419 buffers[1].u64 = address;
421 buffers[2].u64 = address;
423 buffers[3].u64 = address;
427 else if (size <= MAX_SIZE * 4)
429 /* Four more segments needed */
430 buffers[0].pcie_length.len0 = MAX_SIZE;
431 buffers[0].pcie_length.len1 = MAX_SIZE;
432 buffers[0].pcie_length.len2 = MAX_SIZE;
433 buffers[0].pcie_length.len3 = size - MAX_SIZE * 3;
434 buffers[1].u64 = address;
436 buffers[2].u64 = address;
438 buffers[3].u64 = address;
440 buffers[4].u64 = address;
446 /* Five or more segments are needed */
447 buffers[0].pcie_length.len0 = MAX_SIZE;
448 buffers[0].pcie_length.len1 = MAX_SIZE;
449 buffers[0].pcie_length.len2 = MAX_SIZE;
450 buffers[0].pcie_length.len3 = MAX_SIZE;
451 buffers[1].u64 = address;
453 buffers[2].u64 = address;
455 buffers[3].u64 = address;
457 buffers[4].u64 = address;
469 * Build the first and last pointers based on a DMA engine header
470 * and submit them to the engine. The purpose of this function is
471 * to simplify the building of DMA engine commands by automatically
472 * converting a simple address and size into the apropriate internal
473 * or PCI / PCIe address list. This function does not support gather lists,
474 * so you will need to build your own lists in that case.
476 * @param engine Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
477 * @param header DMA Command header. Note that the nfst and nlst fields do not
478 * need to be filled in. All other fields must be set properly.
479 * @param first_address
480 * Address to use for the first pointers. In the case of INTERNAL,
481 * INBOUND, and OUTBOUND this is an Octeon memory address. In the
482 * case of EXTERNAL, this is the source PCI / PCIe address.
483 * @param last_address
484 * Address to use for the last pointers. In the case of EXTERNAL,
485 * INBOUND, and OUTBOUND this is a PCI / PCIe address. In the
486 * case of INTERNAL, this is the Octeon memory destination address.
487 * @param size Size of the transfer to perform.
489 * @return Zero on success, negative on failure
491 int cvmx_dma_engine_transfer(int engine, cvmx_dma_engine_header_t header,
492 uint64_t first_address, uint64_t last_address,
495 cvmx_dma_engine_buffer_t buffers[32];
498 switch (header.s.type)
500 case CVMX_DMA_ENGINE_TRANSFER_INTERNAL:
501 header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
502 words += header.s.nfst;
503 header.s.nlst = __cvmx_dma_engine_build_internal_pointers(buffers + words, last_address, size);
504 words += header.s.nlst;
506 case CVMX_DMA_ENGINE_TRANSFER_INBOUND:
507 case CVMX_DMA_ENGINE_TRANSFER_OUTBOUND:
508 header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
509 words += header.s.nfst;
510 header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
511 words += header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
513 case CVMX_DMA_ENGINE_TRANSFER_EXTERNAL:
514 header.s.nfst = __cvmx_dma_engine_build_external_pointers(buffers, first_address, size);
515 words += header.s.nfst + ((header.s.nfst-1) >> 2) + 1;
516 header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
517 words += header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
520 return cvmx_dma_engine_submit(engine, header, words, buffers);