1 /***********************license start***************
2 * Copyright (c) 2003-2010 Cavium Inc. (support@cavium.com). All rights
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * * Neither the name of Cavium Inc. nor the names of
19 * its contributors may be used to endorse or promote products
20 * derived from this software without specific prior written
23 * This Software, including technical data, may be subject to U.S. export control
24 * laws, including the U.S. Export Administration Act and its associated
25 * regulations, and may be subject to export or import regulations in other
28 * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
29 * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
30 * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO
31 * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR
32 * DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM
33 * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE,
34 * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF
35 * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR
36 * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK ARISING OUT OF USE OR
37 * PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
38 ***********************license end**************************************/
49 * Interface to the PCI / PCIe DMA engines. These are only avialable
50 * on chips with PCI / PCIe.
52 * <hr>$Revision: 70030 $<hr>
54 #ifdef CVMX_BUILD_FOR_LINUX_KERNEL
55 #include <linux/module.h>
56 #include <asm/octeon/cvmx.h>
57 #include <asm/octeon/octeon-model.h>
58 #include <asm/octeon/cvmx-config.h>
59 #include <asm/octeon/cvmx-cmd-queue.h>
60 #include <asm/octeon/cvmx-dma-engine.h>
61 #include <asm/octeon/octeon-feature.h>
62 #include <asm/octeon/cvmx-npi-defs.h>
63 #include <asm/octeon/cvmx-npei-defs.h>
64 #include <asm/octeon/cvmx-dpi-defs.h>
65 #include <asm/octeon/cvmx-pexp-defs.h>
66 #include <asm/octeon/cvmx-helper-cfg.h>
68 #include "executive-config.h"
69 #include "cvmx-config.h"
71 #include "cvmx-cmd-queue.h"
72 #include "cvmx-dma-engine.h"
73 #include "cvmx-helper-cfg.h"
76 #ifdef CVMX_ENABLE_PKO_FUNCTIONS
79 * Return the number of DMA engimes supported by this chip
81 * @return Number of DMA engines
83 int cvmx_dma_engine_get_num(void)
85 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
87 if (OCTEON_IS_MODEL(OCTEON_CN52XX_PASS1_X))
92 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
99 * Initialize the DMA engines for use
101 * @return Zero on success, negative on failure
103 int cvmx_dma_engine_initialize(void)
107 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
109 cvmx_cmd_queue_result_t result;
110 result = cvmx_cmd_queue_initialize(CVMX_CMD_QUEUE_DMA(engine),
111 0, CVMX_FPA_OUTPUT_BUFFER_POOL,
112 CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE);
113 if (result != CVMX_CMD_QUEUE_SUCCESS)
115 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
117 cvmx_npei_dmax_ibuff_saddr_t dmax_ibuff_saddr;
118 dmax_ibuff_saddr.u64 = 0;
119 dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
120 cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), dmax_ibuff_saddr.u64);
122 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
124 cvmx_dpi_dmax_ibuff_saddr_t dpi_dmax_ibuff_saddr;
125 dpi_dmax_ibuff_saddr.u64 = 0;
126 dpi_dmax_ibuff_saddr.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
127 dpi_dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
128 cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), dpi_dmax_ibuff_saddr.u64);
132 uint64_t address = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine)));
134 cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, address);
136 cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, address);
140 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
142 cvmx_npei_dma_control_t dma_control;
144 if (cvmx_dma_engine_get_num() >= 5)
145 dma_control.s.dma4_enb = 1;
146 dma_control.s.dma3_enb = 1;
147 dma_control.s.dma2_enb = 1;
148 dma_control.s.dma1_enb = 1;
149 dma_control.s.dma0_enb = 1;
150 dma_control.s.o_mode = 1; /* Pull NS and RO from this register, not the pointers */
151 //dma_control.s.dwb_denb = 1;
152 //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
153 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
154 dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
155 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
156 /* As a workaround for errata PCIE-811 we only allow a single
157 outstanding DMA read over PCIe at a time. This limits performance,
158 but works in all cases. If you need higher performance, remove
159 this code and implement the more complicated workaround documented
160 in the errata. This only affects CN56XX pass 2.0 chips */
161 if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS2_0))
163 cvmx_npei_dma_pcie_req_num_t pcie_req_num;
164 pcie_req_num.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM);
165 pcie_req_num.s.dma_cnt = 1;
166 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM, pcie_req_num.u64);
169 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
171 cvmx_dpi_engx_buf_t dpi_engx_buf;
172 cvmx_dpi_dma_engx_en_t dpi_dma_engx_en;
173 cvmx_dpi_dma_control_t dma_control;
174 cvmx_dpi_ctl_t dpi_ctl;
176 /* Give engine 0-4 1KB, and 5 3KB. This gives the packet engines better
177 performance. Total must not exceed 8KB */
178 dpi_engx_buf.u64 = 0;
179 dpi_engx_buf.s.blks = 2;
180 cvmx_write_csr(CVMX_DPI_ENGX_BUF(0), dpi_engx_buf.u64);
181 cvmx_write_csr(CVMX_DPI_ENGX_BUF(1), dpi_engx_buf.u64);
182 cvmx_write_csr(CVMX_DPI_ENGX_BUF(2), dpi_engx_buf.u64);
183 cvmx_write_csr(CVMX_DPI_ENGX_BUF(3), dpi_engx_buf.u64);
184 cvmx_write_csr(CVMX_DPI_ENGX_BUF(4), dpi_engx_buf.u64);
185 dpi_engx_buf.s.blks = 6;
186 cvmx_write_csr(CVMX_DPI_ENGX_BUF(5), dpi_engx_buf.u64);
188 dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
189 dma_control.s.pkt_hp = 1;
190 dma_control.s.pkt_en = 1;
191 dma_control.s.dma_enb = 0x1f;
192 dma_control.s.dwb_denb = cvmx_helper_cfg_opt_get(CVMX_HELPER_CFG_OPT_USE_DWB);
193 dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
194 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
195 dma_control.s.o_mode = 1;
196 cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
197 /* When dma_control[pkt_en] = 1, engine 5 is used for packets and is not
198 available for DMA. */
199 dpi_dma_engx_en.u64 = cvmx_read_csr(CVMX_DPI_DMA_ENGX_EN(5));
200 dpi_dma_engx_en.s.qen = 0;
201 cvmx_write_csr(CVMX_DPI_DMA_ENGX_EN(5), dpi_dma_engx_en.u64);
202 dpi_ctl.u64 = cvmx_read_csr(CVMX_DPI_CTL);
204 cvmx_write_csr(CVMX_DPI_CTL, dpi_ctl.u64);
208 cvmx_npi_dma_control_t dma_control;
210 //dma_control.s.dwb_denb = 1;
211 //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
212 dma_control.s.o_add1 = 1;
213 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
214 dma_control.s.hp_enb = 1;
215 dma_control.s.lp_enb = 1;
216 dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
217 cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
222 #ifdef CVMX_BUILD_FOR_LINUX_KERNEL
223 EXPORT_SYMBOL(cvmx_dma_engine_initialize);
227 * Shutdown all DMA engines. The engines must be idle when this
228 * function is called.
230 * @return Zero on success, negative on failure
232 int cvmx_dma_engine_shutdown(void)
236 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
238 if (cvmx_cmd_queue_length(CVMX_CMD_QUEUE_DMA(engine)))
240 cvmx_dprintf("ERROR: cvmx_dma_engine_shutdown: Engine not idle.\n");
245 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
247 cvmx_npei_dma_control_t dma_control;
248 dma_control.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
249 if (cvmx_dma_engine_get_num() >= 5)
250 dma_control.s.dma4_enb = 0;
251 dma_control.s.dma3_enb = 0;
252 dma_control.s.dma2_enb = 0;
253 dma_control.s.dma1_enb = 0;
254 dma_control.s.dma0_enb = 0;
255 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
256 /* Make sure the disable completes */
257 cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
259 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
261 cvmx_dpi_dma_control_t dma_control;
262 dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
263 dma_control.s.dma_enb = 0;
264 cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
265 /* Make sure the disable completes */
266 cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
270 cvmx_npi_dma_control_t dma_control;
271 dma_control.u64 = cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
272 dma_control.s.hp_enb = 0;
273 dma_control.s.lp_enb = 0;
274 cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
275 /* Make sure the disable completes */
276 cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
279 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
281 cvmx_cmd_queue_shutdown(CVMX_CMD_QUEUE_DMA(engine));
282 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
283 cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), 0);
284 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
285 cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), 0);
289 cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, 0);
291 cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, 0);
297 #ifdef CVMX_BUILD_FOR_LINUX_KERNEL
298 EXPORT_SYMBOL(cvmx_dma_engine_shutdown);
302 * Submit a series of DMA command to the DMA engines.
304 * @param engine Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
305 * @param header Command header
307 * The number of data pointers
308 * @param buffers Command data pointers
310 * @return Zero on success, negative on failure
312 int cvmx_dma_engine_submit(int engine, cvmx_dma_engine_header_t header, int num_buffers, cvmx_dma_engine_buffer_t buffers[])
314 cvmx_cmd_queue_result_t result;
316 uint64_t cmds[num_buffers + 1];
318 if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS1_X))
320 /* Check for Errata PCIe-604 */
321 if ((header.s.nfst > 11) || (header.s.nlst > 11) || (header.s.nfst + header.s.nlst > 15))
323 cvmx_dprintf("DMA engine submit too large\n");
328 cmds[0] = header.u64;
329 while (num_buffers--)
331 cmds[cmd_count++] = buffers->u64;
335 /* Due to errata PCIE-13315, it is necessary to have the queue lock while we
336 ring the doorbell for the DMA engines. This prevents doorbells from
337 possibly arriving out of order with respect to the command queue
339 __cvmx_cmd_queue_lock(CVMX_CMD_QUEUE_DMA(engine), __cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
340 result = cvmx_cmd_queue_write(CVMX_CMD_QUEUE_DMA(engine), 0, cmd_count, cmds);
341 /* This SYNCWS is needed since the command queue didn't do locking, which
342 normally implies the SYNCWS. This one makes sure the command queue
343 updates make it to L2 before we ring the doorbell */
345 /* A syncw isn't needed here since the command queue did one as part of the queue unlock */
346 if (cvmx_likely(result == CVMX_CMD_QUEUE_SUCCESS))
348 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
350 /* DMA doorbells are 32bit writes in little endian space. This means we need to xor the address with 4 */
351 cvmx_write64_uint32(CVMX_PEXP_NPEI_DMAX_DBELL(engine)^4, cmd_count);
353 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
354 cvmx_write_csr(CVMX_DPI_DMAX_DBELL(engine), cmd_count);
358 cvmx_write_csr(CVMX_NPI_HIGHP_DBELL, cmd_count);
360 cvmx_write_csr(CVMX_NPI_LOWP_DBELL, cmd_count);
363 /* Here is the unlock for the above errata workaround */
364 __cvmx_cmd_queue_unlock(__cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
371 * Function used by cvmx_dma_engine_transfer() to build the
372 * internal address list.
374 * @param buffers Location to store the list
375 * @param address Address to build list for
376 * @param size Length of the memory pointed to by address
378 * @return Number of internal pointer chunks created
380 static inline int __cvmx_dma_engine_build_internal_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
385 /* Each internal chunk can contain a maximum of 8191 bytes */
389 buffers[segments].u64 = 0;
390 buffers[segments].internal.size = chunk;
391 buffers[segments].internal.addr = address;
402 * Function used by cvmx_dma_engine_transfer() to build the PCI / PCIe address
404 * @param buffers Location to store the list
405 * @param address Address to build list for
406 * @param size Length of the memory pointed to by address
408 * @return Number of PCI / PCIe address chunks created. The number of words used
409 * will be segments + (segments-1)/4 + 1.
411 static inline int __cvmx_dma_engine_build_external_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
413 const int MAX_SIZE = 65535;
417 /* Each block of 4 PCI / PCIe pointers uses one dword for lengths followed by
418 up to 4 addresses. This then repeats if more data is needed */
420 if (size <= MAX_SIZE)
422 /* Only one more segment needed */
423 buffers[0].pcie_length.len0 = size;
424 buffers[1].u64 = address;
428 else if (size <= MAX_SIZE * 2)
430 /* Two more segments needed */
431 buffers[0].pcie_length.len0 = MAX_SIZE;
432 buffers[0].pcie_length.len1 = size - MAX_SIZE;
433 buffers[1].u64 = address;
435 buffers[2].u64 = address;
439 else if (size <= MAX_SIZE * 3)
441 /* Three more segments needed */
442 buffers[0].pcie_length.len0 = MAX_SIZE;
443 buffers[0].pcie_length.len1 = MAX_SIZE;
444 buffers[0].pcie_length.len2 = size - MAX_SIZE * 2;
445 buffers[1].u64 = address;
447 buffers[2].u64 = address;
449 buffers[3].u64 = address;
453 else if (size <= MAX_SIZE * 4)
455 /* Four more segments needed */
456 buffers[0].pcie_length.len0 = MAX_SIZE;
457 buffers[0].pcie_length.len1 = MAX_SIZE;
458 buffers[0].pcie_length.len2 = MAX_SIZE;
459 buffers[0].pcie_length.len3 = size - MAX_SIZE * 3;
460 buffers[1].u64 = address;
462 buffers[2].u64 = address;
464 buffers[3].u64 = address;
466 buffers[4].u64 = address;
472 /* Five or more segments are needed */
473 buffers[0].pcie_length.len0 = MAX_SIZE;
474 buffers[0].pcie_length.len1 = MAX_SIZE;
475 buffers[0].pcie_length.len2 = MAX_SIZE;
476 buffers[0].pcie_length.len3 = MAX_SIZE;
477 buffers[1].u64 = address;
479 buffers[2].u64 = address;
481 buffers[3].u64 = address;
483 buffers[4].u64 = address;
495 * Build the first and last pointers based on a DMA engine header
496 * and submit them to the engine. The purpose of this function is
497 * to simplify the building of DMA engine commands by automatically
498 * converting a simple address and size into the apropriate internal
499 * or PCI / PCIe address list. This function does not support gather lists,
500 * so you will need to build your own lists in that case.
502 * @param engine Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
503 * @param header DMA Command header. Note that the nfst and nlst fields do not
504 * need to be filled in. All other fields must be set properly.
505 * @param first_address
506 * Address to use for the first pointers. In the case of INTERNAL,
507 * INBOUND, and OUTBOUND this is an Octeon memory address. In the
508 * case of EXTERNAL, this is the source PCI / PCIe address.
509 * @param last_address
510 * Address to use for the last pointers. In the case of EXTERNAL,
511 * INBOUND, and OUTBOUND this is a PCI / PCIe address. In the
512 * case of INTERNAL, this is the Octeon memory destination address.
513 * @param size Size of the transfer to perform.
515 * @return Zero on success, negative on failure
517 int cvmx_dma_engine_transfer(int engine, cvmx_dma_engine_header_t header,
518 uint64_t first_address, uint64_t last_address,
521 cvmx_dma_engine_buffer_t buffers[32];
524 switch (header.s.type)
526 case CVMX_DMA_ENGINE_TRANSFER_INTERNAL:
527 header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
528 words += header.s.nfst;
529 header.s.nlst = __cvmx_dma_engine_build_internal_pointers(buffers + words, last_address, size);
530 words += header.s.nlst;
532 case CVMX_DMA_ENGINE_TRANSFER_INBOUND:
533 case CVMX_DMA_ENGINE_TRANSFER_OUTBOUND:
534 header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
535 words += header.s.nfst;
536 header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
537 words += header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
539 case CVMX_DMA_ENGINE_TRANSFER_EXTERNAL:
540 header.s.nfst = __cvmx_dma_engine_build_external_pointers(buffers, first_address, size);
541 words += header.s.nfst + ((header.s.nfst-1) >> 2) + 1;
542 header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
543 words += header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
546 return cvmx_dma_engine_submit(engine, header, words, buffers);
548 #ifdef CVMX_BUILD_FOR_LINUX_KERNEL
549 EXPORT_SYMBOL(cvmx_dma_engine_transfer);