1 /***********************license start***************
2 * Copyright (c) 2003-2010 Cavium Inc. (support@cavium.com). All rights
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * * Neither the name of Cavium Inc. nor the names of
19 * its contributors may be used to endorse or promote products
20 * derived from this software without specific prior written
23 * This Software, including technical data, may be subject to U.S. export control
24 * laws, including the U.S. Export Administration Act and its associated
25 * regulations, and may be subject to export or import regulations in other
28 * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
29 * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
30 * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO
31 * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR
32 * DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM
33 * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE,
34 * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF
35 * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR
36 * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK ARISING OUT OF USE OR
37 * PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
38 ***********************license end**************************************/
49 * Interface to the PCI / PCIe DMA engines. These are only avialable
50 * on chips with PCI / PCIe.
52 * <hr>$Revision: 70030 $<hr>
54 #ifdef CVMX_BUILD_FOR_LINUX_KERNEL
55 #include <linux/module.h>
56 #include <asm/octeon/cvmx.h>
57 #include <asm/octeon/octeon-model.h>
58 #include <asm/octeon/cvmx-config.h>
59 #include <asm/octeon/cvmx-cmd-queue.h>
60 #include <asm/octeon/cvmx-dma-engine.h>
61 #include <asm/octeon/octeon-feature.h>
62 #include <asm/octeon/cvmx-npi-defs.h>
63 #include <asm/octeon/cvmx-npei-defs.h>
64 #include <asm/octeon/cvmx-dpi-defs.h>
65 #include <asm/octeon/cvmx-pexp-defs.h>
66 #include <asm/octeon/cvmx-helper-cfg.h>
68 #if !defined(__FreeBSD__) || !defined(_KERNEL)
69 #include "executive-config.h"
70 #include "cvmx-config.h"
73 #include "cvmx-cmd-queue.h"
74 #include "cvmx-dma-engine.h"
75 #include "cvmx-helper-cfg.h"
78 #ifdef CVMX_ENABLE_PKO_FUNCTIONS
81 * Return the number of DMA engimes supported by this chip
83 * @return Number of DMA engines
85 int cvmx_dma_engine_get_num(void)
87 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
89 if (OCTEON_IS_MODEL(OCTEON_CN52XX_PASS1_X))
94 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
101 * Initialize the DMA engines for use
103 * @return Zero on success, negative on failure
105 int cvmx_dma_engine_initialize(void)
109 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
111 cvmx_cmd_queue_result_t result;
112 result = cvmx_cmd_queue_initialize(CVMX_CMD_QUEUE_DMA(engine),
113 0, CVMX_FPA_OUTPUT_BUFFER_POOL,
114 CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE);
115 if (result != CVMX_CMD_QUEUE_SUCCESS)
117 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
119 cvmx_npei_dmax_ibuff_saddr_t dmax_ibuff_saddr;
120 dmax_ibuff_saddr.u64 = 0;
121 dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
122 cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), dmax_ibuff_saddr.u64);
124 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
126 cvmx_dpi_dmax_ibuff_saddr_t dpi_dmax_ibuff_saddr;
127 dpi_dmax_ibuff_saddr.u64 = 0;
128 dpi_dmax_ibuff_saddr.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
129 dpi_dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
130 cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), dpi_dmax_ibuff_saddr.u64);
134 uint64_t address = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine)));
136 cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, address);
138 cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, address);
142 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
144 cvmx_npei_dma_control_t dma_control;
146 if (cvmx_dma_engine_get_num() >= 5)
147 dma_control.s.dma4_enb = 1;
148 dma_control.s.dma3_enb = 1;
149 dma_control.s.dma2_enb = 1;
150 dma_control.s.dma1_enb = 1;
151 dma_control.s.dma0_enb = 1;
152 dma_control.s.o_mode = 1; /* Pull NS and RO from this register, not the pointers */
153 //dma_control.s.dwb_denb = 1;
154 //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
155 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
156 dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
157 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
158 /* As a workaround for errata PCIE-811 we only allow a single
159 outstanding DMA read over PCIe at a time. This limits performance,
160 but works in all cases. If you need higher performance, remove
161 this code and implement the more complicated workaround documented
162 in the errata. This only affects CN56XX pass 2.0 chips */
163 if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS2_0))
165 cvmx_npei_dma_pcie_req_num_t pcie_req_num;
166 pcie_req_num.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM);
167 pcie_req_num.s.dma_cnt = 1;
168 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM, pcie_req_num.u64);
171 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
173 cvmx_dpi_engx_buf_t dpi_engx_buf;
174 cvmx_dpi_dma_engx_en_t dpi_dma_engx_en;
175 cvmx_dpi_dma_control_t dma_control;
176 cvmx_dpi_ctl_t dpi_ctl;
178 /* Give engine 0-4 1KB, and 5 3KB. This gives the packet engines better
179 performance. Total must not exceed 8KB */
180 dpi_engx_buf.u64 = 0;
181 dpi_engx_buf.s.blks = 2;
182 cvmx_write_csr(CVMX_DPI_ENGX_BUF(0), dpi_engx_buf.u64);
183 cvmx_write_csr(CVMX_DPI_ENGX_BUF(1), dpi_engx_buf.u64);
184 cvmx_write_csr(CVMX_DPI_ENGX_BUF(2), dpi_engx_buf.u64);
185 cvmx_write_csr(CVMX_DPI_ENGX_BUF(3), dpi_engx_buf.u64);
186 cvmx_write_csr(CVMX_DPI_ENGX_BUF(4), dpi_engx_buf.u64);
187 dpi_engx_buf.s.blks = 6;
188 cvmx_write_csr(CVMX_DPI_ENGX_BUF(5), dpi_engx_buf.u64);
190 dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
191 dma_control.s.pkt_hp = 1;
192 dma_control.s.pkt_en = 1;
193 dma_control.s.dma_enb = 0x1f;
194 dma_control.s.dwb_denb = cvmx_helper_cfg_opt_get(CVMX_HELPER_CFG_OPT_USE_DWB);
195 dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
196 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
197 dma_control.s.o_mode = 1;
198 cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
199 /* When dma_control[pkt_en] = 1, engine 5 is used for packets and is not
200 available for DMA. */
201 dpi_dma_engx_en.u64 = cvmx_read_csr(CVMX_DPI_DMA_ENGX_EN(5));
202 dpi_dma_engx_en.s.qen = 0;
203 cvmx_write_csr(CVMX_DPI_DMA_ENGX_EN(5), dpi_dma_engx_en.u64);
204 dpi_ctl.u64 = cvmx_read_csr(CVMX_DPI_CTL);
206 cvmx_write_csr(CVMX_DPI_CTL, dpi_ctl.u64);
210 cvmx_npi_dma_control_t dma_control;
212 //dma_control.s.dwb_denb = 1;
213 //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
214 dma_control.s.o_add1 = 1;
215 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
216 dma_control.s.hp_enb = 1;
217 dma_control.s.lp_enb = 1;
218 dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
219 cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
224 #ifdef CVMX_BUILD_FOR_LINUX_KERNEL
225 EXPORT_SYMBOL(cvmx_dma_engine_initialize);
229 * Shutdown all DMA engines. The engines must be idle when this
230 * function is called.
232 * @return Zero on success, negative on failure
234 int cvmx_dma_engine_shutdown(void)
238 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
240 if (cvmx_cmd_queue_length(CVMX_CMD_QUEUE_DMA(engine)))
242 cvmx_dprintf("ERROR: cvmx_dma_engine_shutdown: Engine not idle.\n");
247 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
249 cvmx_npei_dma_control_t dma_control;
250 dma_control.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
251 if (cvmx_dma_engine_get_num() >= 5)
252 dma_control.s.dma4_enb = 0;
253 dma_control.s.dma3_enb = 0;
254 dma_control.s.dma2_enb = 0;
255 dma_control.s.dma1_enb = 0;
256 dma_control.s.dma0_enb = 0;
257 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
258 /* Make sure the disable completes */
259 cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
261 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
263 cvmx_dpi_dma_control_t dma_control;
264 dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
265 dma_control.s.dma_enb = 0;
266 cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
267 /* Make sure the disable completes */
268 cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
272 cvmx_npi_dma_control_t dma_control;
273 dma_control.u64 = cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
274 dma_control.s.hp_enb = 0;
275 dma_control.s.lp_enb = 0;
276 cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
277 /* Make sure the disable completes */
278 cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
281 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
283 cvmx_cmd_queue_shutdown(CVMX_CMD_QUEUE_DMA(engine));
284 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
285 cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), 0);
286 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
287 cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), 0);
291 cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, 0);
293 cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, 0);
299 #ifdef CVMX_BUILD_FOR_LINUX_KERNEL
300 EXPORT_SYMBOL(cvmx_dma_engine_shutdown);
304 * Submit a series of DMA command to the DMA engines.
306 * @param engine Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
307 * @param header Command header
309 * The number of data pointers
310 * @param buffers Command data pointers
312 * @return Zero on success, negative on failure
314 int cvmx_dma_engine_submit(int engine, cvmx_dma_engine_header_t header, int num_buffers, cvmx_dma_engine_buffer_t buffers[])
316 cvmx_cmd_queue_result_t result;
318 uint64_t cmds[num_buffers + 1];
320 if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS1_X))
322 /* Check for Errata PCIe-604 */
323 if ((header.s.nfst > 11) || (header.s.nlst > 11) || (header.s.nfst + header.s.nlst > 15))
325 cvmx_dprintf("DMA engine submit too large\n");
330 cmds[0] = header.u64;
331 while (num_buffers--)
333 cmds[cmd_count++] = buffers->u64;
337 /* Due to errata PCIE-13315, it is necessary to have the queue lock while we
338 ring the doorbell for the DMA engines. This prevents doorbells from
339 possibly arriving out of order with respect to the command queue
341 __cvmx_cmd_queue_lock(CVMX_CMD_QUEUE_DMA(engine), __cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
342 result = cvmx_cmd_queue_write(CVMX_CMD_QUEUE_DMA(engine), 0, cmd_count, cmds);
343 /* This SYNCWS is needed since the command queue didn't do locking, which
344 normally implies the SYNCWS. This one makes sure the command queue
345 updates make it to L2 before we ring the doorbell */
347 /* A syncw isn't needed here since the command queue did one as part of the queue unlock */
348 if (cvmx_likely(result == CVMX_CMD_QUEUE_SUCCESS))
350 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
352 /* DMA doorbells are 32bit writes in little endian space. This means we need to xor the address with 4 */
353 cvmx_write64_uint32(CVMX_PEXP_NPEI_DMAX_DBELL(engine)^4, cmd_count);
355 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
356 cvmx_write_csr(CVMX_DPI_DMAX_DBELL(engine), cmd_count);
360 cvmx_write_csr(CVMX_NPI_HIGHP_DBELL, cmd_count);
362 cvmx_write_csr(CVMX_NPI_LOWP_DBELL, cmd_count);
365 /* Here is the unlock for the above errata workaround */
366 __cvmx_cmd_queue_unlock(__cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
373 * Function used by cvmx_dma_engine_transfer() to build the
374 * internal address list.
376 * @param buffers Location to store the list
377 * @param address Address to build list for
378 * @param size Length of the memory pointed to by address
380 * @return Number of internal pointer chunks created
382 static inline int __cvmx_dma_engine_build_internal_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
387 /* Each internal chunk can contain a maximum of 8191 bytes */
391 buffers[segments].u64 = 0;
392 buffers[segments].internal.size = chunk;
393 buffers[segments].internal.addr = address;
404 * Function used by cvmx_dma_engine_transfer() to build the PCI / PCIe address
406 * @param buffers Location to store the list
407 * @param address Address to build list for
408 * @param size Length of the memory pointed to by address
410 * @return Number of PCI / PCIe address chunks created. The number of words used
411 * will be segments + (segments-1)/4 + 1.
413 static inline int __cvmx_dma_engine_build_external_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
415 const int MAX_SIZE = 65535;
419 /* Each block of 4 PCI / PCIe pointers uses one dword for lengths followed by
420 up to 4 addresses. This then repeats if more data is needed */
422 if (size <= MAX_SIZE)
424 /* Only one more segment needed */
425 buffers[0].pcie_length.len0 = size;
426 buffers[1].u64 = address;
430 else if (size <= MAX_SIZE * 2)
432 /* Two more segments needed */
433 buffers[0].pcie_length.len0 = MAX_SIZE;
434 buffers[0].pcie_length.len1 = size - MAX_SIZE;
435 buffers[1].u64 = address;
437 buffers[2].u64 = address;
441 else if (size <= MAX_SIZE * 3)
443 /* Three more segments needed */
444 buffers[0].pcie_length.len0 = MAX_SIZE;
445 buffers[0].pcie_length.len1 = MAX_SIZE;
446 buffers[0].pcie_length.len2 = size - MAX_SIZE * 2;
447 buffers[1].u64 = address;
449 buffers[2].u64 = address;
451 buffers[3].u64 = address;
455 else if (size <= MAX_SIZE * 4)
457 /* Four more segments needed */
458 buffers[0].pcie_length.len0 = MAX_SIZE;
459 buffers[0].pcie_length.len1 = MAX_SIZE;
460 buffers[0].pcie_length.len2 = MAX_SIZE;
461 buffers[0].pcie_length.len3 = size - MAX_SIZE * 3;
462 buffers[1].u64 = address;
464 buffers[2].u64 = address;
466 buffers[3].u64 = address;
468 buffers[4].u64 = address;
474 /* Five or more segments are needed */
475 buffers[0].pcie_length.len0 = MAX_SIZE;
476 buffers[0].pcie_length.len1 = MAX_SIZE;
477 buffers[0].pcie_length.len2 = MAX_SIZE;
478 buffers[0].pcie_length.len3 = MAX_SIZE;
479 buffers[1].u64 = address;
481 buffers[2].u64 = address;
483 buffers[3].u64 = address;
485 buffers[4].u64 = address;
497 * Build the first and last pointers based on a DMA engine header
498 * and submit them to the engine. The purpose of this function is
499 * to simplify the building of DMA engine commands by automatically
500 * converting a simple address and size into the apropriate internal
501 * or PCI / PCIe address list. This function does not support gather lists,
502 * so you will need to build your own lists in that case.
504 * @param engine Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
505 * @param header DMA Command header. Note that the nfst and nlst fields do not
506 * need to be filled in. All other fields must be set properly.
507 * @param first_address
508 * Address to use for the first pointers. In the case of INTERNAL,
509 * INBOUND, and OUTBOUND this is an Octeon memory address. In the
510 * case of EXTERNAL, this is the source PCI / PCIe address.
511 * @param last_address
512 * Address to use for the last pointers. In the case of EXTERNAL,
513 * INBOUND, and OUTBOUND this is a PCI / PCIe address. In the
514 * case of INTERNAL, this is the Octeon memory destination address.
515 * @param size Size of the transfer to perform.
517 * @return Zero on success, negative on failure
519 int cvmx_dma_engine_transfer(int engine, cvmx_dma_engine_header_t header,
520 uint64_t first_address, uint64_t last_address,
523 cvmx_dma_engine_buffer_t buffers[32];
526 switch (header.s.type)
528 case CVMX_DMA_ENGINE_TRANSFER_INTERNAL:
529 header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
530 words += header.s.nfst;
531 header.s.nlst = __cvmx_dma_engine_build_internal_pointers(buffers + words, last_address, size);
532 words += header.s.nlst;
534 case CVMX_DMA_ENGINE_TRANSFER_INBOUND:
535 case CVMX_DMA_ENGINE_TRANSFER_OUTBOUND:
536 header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
537 words += header.s.nfst;
538 header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
539 words += header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
541 case CVMX_DMA_ENGINE_TRANSFER_EXTERNAL:
542 header.s.nfst = __cvmx_dma_engine_build_external_pointers(buffers, first_address, size);
543 words += header.s.nfst + ((header.s.nfst-1) >> 2) + 1;
544 header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
545 words += header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
548 return cvmx_dma_engine_submit(engine, header, words, buffers);
550 #ifdef CVMX_BUILD_FOR_LINUX_KERNEL
551 EXPORT_SYMBOL(cvmx_dma_engine_transfer);