]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - sys/contrib/octeon-sdk/cvmx-dma-engine.c
- Copy stable/9 to releng/9.2 as part of the 9.2-RELEASE cycle.
[FreeBSD/releng/9.2.git] / sys / contrib / octeon-sdk / cvmx-dma-engine.c
1 /***********************license start***************
2  * Copyright (c) 2003-2010  Cavium Networks (support@cavium.com). All rights
3  * reserved.
4  *
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are
8  * met:
9  *
10  *   * Redistributions of source code must retain the above copyright
11  *     notice, this list of conditions and the following disclaimer.
12  *
13  *   * Redistributions in binary form must reproduce the above
14  *     copyright notice, this list of conditions and the following
15  *     disclaimer in the documentation and/or other materials provided
16  *     with the distribution.
17
18  *   * Neither the name of Cavium Networks nor the names of
19  *     its contributors may be used to endorse or promote products
20  *     derived from this software without specific prior written
21  *     permission.
22
23  * This Software, including technical data, may be subject to U.S. export  control
24  * laws, including the U.S. Export Administration Act and its  associated
25  * regulations, and may be subject to export or import  regulations in other
26  * countries.
27
28  * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
29  * AND WITH ALL FAULTS AND CAVIUM  NETWORKS MAKES NO PROMISES, REPRESENTATIONS OR
30  * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO
31  * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR
32  * DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM
33  * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE,
34  * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF
35  * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR
36  * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE  RISK ARISING OUT OF USE OR
37  * PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
38  ***********************license end**************************************/
39
40
41
42
43
44
45
46 /**
47  * @file
48  *
49  * Interface to the PCI / PCIe DMA engines. These are only avialable
50  * on chips with PCI / PCIe.
51  *
52  * <hr>$Revision: 50126 $<hr>
53  */
54 #include "executive-config.h"
55 #include "cvmx-config.h"
56 #include "cvmx.h"
57 #include "cvmx-cmd-queue.h"
58 #include "cvmx-dma-engine.h"
59
60 #ifdef CVMX_ENABLE_PKO_FUNCTIONS
61
62 /**
63  * Return the number of DMA engimes supported by this chip
64  *
65  * @return Number of DMA engines
66  */
67 int cvmx_dma_engine_get_num(void)
68 {
69     if (octeon_has_feature(OCTEON_FEATURE_NPEI))
70     {
71         if (OCTEON_IS_MODEL(OCTEON_CN52XX_PASS1_X))
72             return 4;
73         else
74             return 5;
75     }
76     else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
77         return 8;
78     else
79         return 2;
80 }
81
82 /**
83  * Initialize the DMA engines for use
84  *
85  * @return Zero on success, negative on failure
86  */
87 int cvmx_dma_engine_initialize(void)
88 {
89     int engine;
90
91     for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
92     {
93         cvmx_cmd_queue_result_t result;
94         result = cvmx_cmd_queue_initialize(CVMX_CMD_QUEUE_DMA(engine),
95                                            0, CVMX_FPA_OUTPUT_BUFFER_POOL,
96                                            CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE);
97         if (result != CVMX_CMD_QUEUE_SUCCESS)
98             return -1;
99         if (octeon_has_feature(OCTEON_FEATURE_NPEI))
100         {
101             cvmx_npei_dmax_ibuff_saddr_t dmax_ibuff_saddr;
102             dmax_ibuff_saddr.u64 = 0;
103             dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
104             cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), dmax_ibuff_saddr.u64);
105         }
106         else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
107         {
108             cvmx_dpi_dmax_ibuff_saddr_t dpi_dmax_ibuff_saddr;
109             dpi_dmax_ibuff_saddr.u64 = 0;
110             dpi_dmax_ibuff_saddr.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
111             dpi_dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
112             cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), dpi_dmax_ibuff_saddr.u64);
113         }
114         else
115         {
116             uint64_t address = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine)));
117             if (engine)
118                 cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, address);
119             else
120                 cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, address);
121         }
122     }
123
124     if (octeon_has_feature(OCTEON_FEATURE_NPEI))
125     {
126         cvmx_npei_dma_control_t dma_control;
127         dma_control.u64 = 0;
128         if (cvmx_dma_engine_get_num() >= 5)
129             dma_control.s.dma4_enb = 1;
130         dma_control.s.dma3_enb = 1;
131         dma_control.s.dma2_enb = 1;
132         dma_control.s.dma1_enb = 1;
133         dma_control.s.dma0_enb = 1;
134         dma_control.s.o_mode = 1; /* Pull NS and RO from this register, not the pointers */
135         //dma_control.s.dwb_denb = 1;
136         //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
137         dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
138         dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
139         cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
140         /* As a workaround for errata PCIE-811 we only allow a single
141             outstanding DMA read over PCIe at a time. This limits performance,
142             but works in all cases. If you need higher performance, remove
143             this code and implement the more complicated workaround documented
144             in the errata. This only affects CN56XX pass 2.0 chips */
145         if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS2_0))
146         {
147             cvmx_npei_dma_pcie_req_num_t pcie_req_num;
148             pcie_req_num.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM);
149             pcie_req_num.s.dma_cnt = 1;
150             cvmx_write_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM, pcie_req_num.u64);
151         }
152     }
153     else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
154     {
155         cvmx_dpi_engx_buf_t dpi_engx_buf;
156         cvmx_dpi_dma_control_t dma_control;
157         cvmx_dpi_ctl_t dpi_ctl;
158
159         /* Give engine 0-4 1KB, and 5 3KB. This gives the packet engines better
160             performance. Total must not exceed 8KB */
161         dpi_engx_buf.u64 = 0;
162         dpi_engx_buf.s.blks = 2;
163         cvmx_write_csr(CVMX_DPI_ENGX_BUF(0), dpi_engx_buf.u64);
164         cvmx_write_csr(CVMX_DPI_ENGX_BUF(1), dpi_engx_buf.u64);
165         cvmx_write_csr(CVMX_DPI_ENGX_BUF(2), dpi_engx_buf.u64);
166         cvmx_write_csr(CVMX_DPI_ENGX_BUF(3), dpi_engx_buf.u64);
167         cvmx_write_csr(CVMX_DPI_ENGX_BUF(4), dpi_engx_buf.u64);
168         dpi_engx_buf.s.blks = 6;
169         cvmx_write_csr(CVMX_DPI_ENGX_BUF(5), dpi_engx_buf.u64);
170
171         dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
172         dma_control.s.pkt_hp = 1;
173         dma_control.s.pkt_en = 1;
174         dma_control.s.dma_enb = 0x1f;
175         dma_control.s.dwb_denb = 1;
176         dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
177         dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
178         dma_control.s.o_mode = 1;
179         cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
180         dpi_ctl.u64 = cvmx_read_csr(CVMX_DPI_CTL);
181         dpi_ctl.s.en = 1;
182         cvmx_write_csr(CVMX_DPI_CTL, dpi_ctl.u64);
183     }
184     else
185     {
186         cvmx_npi_dma_control_t dma_control;
187         dma_control.u64 = 0;
188         //dma_control.s.dwb_denb = 1;
189         //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
190         dma_control.s.o_add1 = 1;
191         dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
192         dma_control.s.hp_enb = 1;
193         dma_control.s.lp_enb = 1;
194         dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
195         cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
196     }
197
198     return 0;
199 }
200
201
202 /**
203  * Shutdown all DMA engines. The engeines must be idle when this
204  * function is called.
205  *
206  * @return Zero on success, negative on failure
207  */
208 int cvmx_dma_engine_shutdown(void)
209 {
210     int engine;
211
212     for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
213     {
214         if (cvmx_cmd_queue_length(CVMX_CMD_QUEUE_DMA(engine)))
215         {
216             cvmx_dprintf("ERROR: cvmx_dma_engine_shutdown: Engine not idle.\n");
217             return -1;
218         }
219     }
220
221     if (octeon_has_feature(OCTEON_FEATURE_NPEI))
222     {
223         cvmx_npei_dma_control_t dma_control;
224         dma_control.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
225         if (cvmx_dma_engine_get_num() >= 5)
226             dma_control.s.dma4_enb = 0;
227         dma_control.s.dma3_enb = 0;
228         dma_control.s.dma2_enb = 0;
229         dma_control.s.dma1_enb = 0;
230         dma_control.s.dma0_enb = 0;
231         cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
232         /* Make sure the disable completes */
233         cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
234     }
235     else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
236     {
237         cvmx_dpi_dma_control_t dma_control;
238         dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
239         dma_control.s.dma_enb = 0;
240         cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
241         /* Make sure the disable completes */
242         cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
243     }
244     else
245     {
246         cvmx_npi_dma_control_t dma_control;
247         dma_control.u64 = cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
248         dma_control.s.hp_enb = 0;
249         dma_control.s.lp_enb = 0;
250         cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
251         /* Make sure the disable completes */
252         cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
253     }
254
255     for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
256     {
257         cvmx_cmd_queue_shutdown(CVMX_CMD_QUEUE_DMA(engine));
258         if (octeon_has_feature(OCTEON_FEATURE_NPEI))
259             cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), 0);
260         else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
261             cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), 0);
262         else
263         {
264             if (engine)
265                 cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, 0);
266             else
267                 cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, 0);
268         }
269     }
270
271     return 0;
272 }
273
274
275 /**
276  * Submit a series of DMA comamnd to the DMA engines.
277  *
278  * @param engine  Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
279  * @param header  Command header
280  * @param num_buffers
281  *                The number of data pointers
282  * @param buffers Comamnd data pointers
283  *
284  * @return Zero on success, negative on failure
285  */
286 int cvmx_dma_engine_submit(int engine, cvmx_dma_engine_header_t header, int num_buffers, cvmx_dma_engine_buffer_t buffers[])
287 {
288     cvmx_cmd_queue_result_t result;
289     int cmd_count = 1;
290     uint64_t cmds[num_buffers + 1];
291
292     if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS1_X))
293     {
294         /* Check for Errata PCIe-604 */
295         if ((header.s.nfst > 11) || (header.s.nlst > 11) || (header.s.nfst + header.s.nlst > 15))
296         {
297             cvmx_dprintf("DMA engine submit too large\n");
298             return -1;
299         }
300     }
301
302     cmds[0] = header.u64;
303     while (num_buffers--)
304     {
305         cmds[cmd_count++] = buffers->u64;
306         buffers++;
307     }
308
309     /* Due to errata PCIE-13315, it is necessary to have the queue lock while we
310         ring the doorbell for the DMA engines. This prevents doorbells from
311         possibly arriving out of order with respect to the command queue
312         entries */
313     __cvmx_cmd_queue_lock(CVMX_CMD_QUEUE_DMA(engine), __cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
314     result = cvmx_cmd_queue_write(CVMX_CMD_QUEUE_DMA(engine), 0, cmd_count, cmds);
315     /* This SYNCWS is needed since the command queue didn't do locking, which
316         normally implies the SYNCWS. This one makes sure the command queue
317         updates make it to L2 before we ring the doorbell */
318     CVMX_SYNCWS;
319     /* A syncw isn't needed here since the command queue did one as part of the queue unlock */
320     if (cvmx_likely(result == CVMX_CMD_QUEUE_SUCCESS))
321     {
322         if (octeon_has_feature(OCTEON_FEATURE_NPEI))
323         {
324             /* DMA doorbells are 32bit writes in little endian space. This means we need to xor the address with 4 */
325             cvmx_write64_uint32(CVMX_PEXP_NPEI_DMAX_DBELL(engine)^4, cmd_count);
326         }
327         else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
328             cvmx_write_csr(CVMX_DPI_DMAX_DBELL(engine), cmd_count);
329         else
330         {
331             if (engine)
332                 cvmx_write_csr(CVMX_NPI_HIGHP_DBELL, cmd_count);
333             else
334                 cvmx_write_csr(CVMX_NPI_LOWP_DBELL, cmd_count);
335         }
336     }
337     /* Here is the unlock for the above errata workaround */
338     __cvmx_cmd_queue_unlock(__cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
339     return result;
340 }
341
342
343 /**
344  * @INTERNAL
345  * Function used by cvmx_dma_engine_transfer() to build the
346  * internal address list.
347  *
348  * @param buffers Location to store the list
349  * @param address Address to build list for
350  * @param size    Length of the memory pointed to by address
351  *
352  * @return Number of internal pointer chunks created
353  */
354 static inline int __cvmx_dma_engine_build_internal_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
355 {
356     int segments = 0;
357     while (size)
358     {
359         /* Each internal chunk can contain a maximum of 8191 bytes */
360         int chunk = size;
361         if (chunk > 8191)
362             chunk = 8191;
363         buffers[segments].u64 = 0;
364         buffers[segments].internal.size = chunk;
365         buffers[segments].internal.addr = address;
366         address += chunk;
367         size -= chunk;
368         segments++;
369     }
370     return segments;
371 }
372
373
374 /**
375  * @INTERNAL
376  * Function used by cvmx_dma_engine_transfer() to build the PCI / PCIe address
377  * list.
378  * @param buffers Location to store the list
379  * @param address Address to build list for
380  * @param size    Length of the memory pointed to by address
381  *
382  * @return Number of PCI / PCIe address chunks created. The number of words used
383  *         will be segments + (segments-1)/4 + 1.
384  */
385 static inline int __cvmx_dma_engine_build_external_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
386 {
387     const int MAX_SIZE = 65535;
388     int segments = 0;
389     while (size)
390     {
391         /* Each block of 4 PCI / PCIe pointers uses one dword for lengths followed by
392             up to 4 addresses. This then repeats if more data is needed */
393         buffers[0].u64 = 0;
394         if (size <= MAX_SIZE)
395         {
396             /* Only one more segment needed */
397             buffers[0].pcie_length.len0 = size;
398             buffers[1].u64 = address;
399             segments++;
400             break;
401         }
402         else if (size <= MAX_SIZE * 2)
403         {
404             /* Two more segments needed */
405             buffers[0].pcie_length.len0 = MAX_SIZE;
406             buffers[0].pcie_length.len1 = size - MAX_SIZE;
407             buffers[1].u64 = address;
408             address += MAX_SIZE;
409             buffers[2].u64 = address;
410             segments+=2;
411             break;
412         }
413         else if (size <= MAX_SIZE * 3)
414         {
415             /* Three more segments needed */
416             buffers[0].pcie_length.len0 = MAX_SIZE;
417             buffers[0].pcie_length.len1 = MAX_SIZE;
418             buffers[0].pcie_length.len2 = size - MAX_SIZE * 2;
419             buffers[1].u64 = address;
420             address += MAX_SIZE;
421             buffers[2].u64 = address;
422             address += MAX_SIZE;
423             buffers[3].u64 = address;
424             segments+=3;
425             break;
426         }
427         else if (size <= MAX_SIZE * 4)
428         {
429             /* Four more segments needed */
430             buffers[0].pcie_length.len0 = MAX_SIZE;
431             buffers[0].pcie_length.len1 = MAX_SIZE;
432             buffers[0].pcie_length.len2 = MAX_SIZE;
433             buffers[0].pcie_length.len3 = size - MAX_SIZE * 3;
434             buffers[1].u64 = address;
435             address += MAX_SIZE;
436             buffers[2].u64 = address;
437             address += MAX_SIZE;
438             buffers[3].u64 = address;
439             address += MAX_SIZE;
440             buffers[4].u64 = address;
441             segments+=4;
442             break;
443         }
444         else
445         {
446             /* Five or more segments are needed */
447             buffers[0].pcie_length.len0 = MAX_SIZE;
448             buffers[0].pcie_length.len1 = MAX_SIZE;
449             buffers[0].pcie_length.len2 = MAX_SIZE;
450             buffers[0].pcie_length.len3 = MAX_SIZE;
451             buffers[1].u64 = address;
452             address += MAX_SIZE;
453             buffers[2].u64 = address;
454             address += MAX_SIZE;
455             buffers[3].u64 = address;
456             address += MAX_SIZE;
457             buffers[4].u64 = address;
458             address += MAX_SIZE;
459             size -= MAX_SIZE*4;
460             buffers += 5;
461             segments+=4;
462         }
463     }
464     return segments;
465 }
466
467
468 /**
469  * Build the first and last pointers based on a DMA engine header
470  * and submit them to the engine. The purpose of this function is
471  * to simplify the building of DMA engine commands by automatically
472  * converting a simple address and size into the apropriate internal
473  * or PCI / PCIe address list. This function does not support gather lists,
474  * so you will need to build your own lists in that case.
475  *
476  * @param engine Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
477  * @param header DMA Command header. Note that the nfst and nlst fields do not
478  *               need to be filled in. All other fields must be set properly.
479  * @param first_address
480  *               Address to use for the first pointers. In the case of INTERNAL,
481  *               INBOUND, and OUTBOUND this is an Octeon memory address. In the
482  *               case of EXTERNAL, this is the source PCI / PCIe address.
483  * @param last_address
484  *               Address to use for the last pointers. In the case of EXTERNAL,
485  *               INBOUND, and OUTBOUND this is a PCI / PCIe address. In the
486  *               case of INTERNAL, this is the Octeon memory destination address.
487  * @param size   Size of the transfer to perform.
488  *
489  * @return Zero on success, negative on failure
490  */
491 int cvmx_dma_engine_transfer(int engine, cvmx_dma_engine_header_t header,
492                              uint64_t first_address, uint64_t last_address,
493                              int size)
494 {
495     cvmx_dma_engine_buffer_t buffers[32];
496     int words = 0;
497
498     switch (header.s.type)
499     {
500         case CVMX_DMA_ENGINE_TRANSFER_INTERNAL:
501             header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
502             words += header.s.nfst;
503             header.s.nlst = __cvmx_dma_engine_build_internal_pointers(buffers + words, last_address, size);
504             words += header.s.nlst;
505             break;
506         case CVMX_DMA_ENGINE_TRANSFER_INBOUND:
507         case CVMX_DMA_ENGINE_TRANSFER_OUTBOUND:
508             header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
509             words += header.s.nfst;
510             header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
511             words +=  header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
512             break;
513         case CVMX_DMA_ENGINE_TRANSFER_EXTERNAL:
514             header.s.nfst = __cvmx_dma_engine_build_external_pointers(buffers, first_address, size);
515             words +=  header.s.nfst + ((header.s.nfst-1) >> 2) + 1;
516             header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
517             words +=  header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
518             break;
519     }
520     return cvmx_dma_engine_submit(engine, header, words, buffers);
521 }
522
523 #endif