2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2017 Chelsio Communications, Inc.
5 * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org>
7 * Largely borrowed from ccr(4), Written by: John Baldwin <jhb@FreeBSD.org>
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
36 #include <sys/param.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mutex.h>
42 #include <sys/module.h>
44 #include <sys/sglist.h>
45 #include <sys/sysctl.h>
51 #include <dev/pci/pcireg.h>
52 #include <dev/pci/pcivar.h>
54 #include <machine/bus.h>
55 #include <machine/resource.h>
56 #include <machine/vmparam.h>
58 #include <opencrypto/cryptodev.h>
59 #include <opencrypto/xform.h>
64 #include "cryptodev_if.h"
67 #include "ccp_hardware.h"
70 CTASSERT(sizeof(struct ccp_desc) == 32);
72 static struct ccp_xts_unitsize_map_entry {
73 enum ccp_xts_unitsize cxu_id;
75 } ccp_xts_unitsize_map[] = {
76 { CCP_XTS_AES_UNIT_SIZE_16, 16 },
77 { CCP_XTS_AES_UNIT_SIZE_512, 512 },
78 { CCP_XTS_AES_UNIT_SIZE_1024, 1024 },
79 { CCP_XTS_AES_UNIT_SIZE_2048, 2048 },
80 { CCP_XTS_AES_UNIT_SIZE_4096, 4096 },
83 SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
86 unsigned g_ccp_ring_order = 11;
87 SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order,
88 0, "Set CCP ring order. (1 << this) == ring size. Min: 6, Max: 16");
91 * Zero buffer, sufficient for padding LSB entries, that does not span a page
94 static const char g_zeroes[32] __aligned(32);
96 static inline uint32_t
97 ccp_read_4(struct ccp_softc *sc, uint32_t offset)
99 return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset));
103 ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value)
105 bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value);
108 static inline uint32_t
109 ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset)
112 * Each queue gets its own 4kB register space. Queue 0 is at 0x1000.
114 return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset));
118 ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset,
121 ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value);
125 ccp_queue_write_tail(struct ccp_queue *qp)
127 ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE,
128 ((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail));
132 * Given a queue and a reserved LSB entry index, compute the LSB *entry id* of
133 * that entry for the queue's private LSB region.
135 static inline uint8_t
136 ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry)
138 return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry));
142 * Given a queue and a reserved LSB entry index, compute the LSB *address* of
143 * that entry for the queue's private LSB region.
145 static inline uint32_t
146 ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry)
148 return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE);
154 * LSB - Local Storage Block
155 * =========================
157 * 8 segments/regions, each containing 16 entries.
159 * Each entry contains 256 bits (32 bytes).
161 * Segments are virtually addressed in commands, but accesses cannot cross
162 * segment boundaries. Virtual map uses an identity mapping by default
163 * (virtual segment N corresponds to physical segment N).
165 * Access to a physical region can be restricted to any subset of all five
168 * "Pass-through" mode
169 * ===================
171 * Pass-through is a generic DMA engine, much like ioat(4). Some nice
174 * - Supports byte-swapping for endian conversion (32- or 256-bit words)
175 * - AND, OR, XOR with fixed 256-bit mask
176 * - CRC32 of data (may be used in tandem with bswap, but not bit operations)
177 * - Read/write of LSB
180 * If bit manipulation mode is enabled, input must be a multiple of 256 bits
183 * If byte-swapping is enabled, input must be a multiple of the word size.
185 * Zlib mode -- only usable from one queue at a time, single job at a time.
186 * ========================================================================
188 * Only usable from private host, aka PSP? Not host processor?
193 * Raw bits are conditioned with AES and fed through CTR_DRBG. Output goes in
194 * a ring buffer readable by software.
196 * NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are
197 * implemented on the raw input stream and may be enabled to verify min-entropy
198 * of 0.5 bits per bit.
202 ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
206 KASSERT(error == 0, ("%s: error:%d", __func__, error));
208 *baddr = segs->ds_addr;
212 ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue)
214 struct ccp_softc *sc;
215 struct ccp_queue *qp;
217 size_t ringsz, num_descriptors;
221 sc = device_get_softc(dev);
222 qp = &sc->queues[queue];
225 * Don't bother allocating a ring for queues the host isn't allowed to
228 if ((sc->valid_queues & (1 << queue)) == 0)
231 ccp_queue_decode_lsb_regions(sc, lsbmask, queue);
233 /* Ignore queues that do not have any LSB access. */
234 if (qp->lsb_mask == 0) {
235 device_printf(dev, "Ignoring queue %u with no LSB access\n",
237 sc->valid_queues &= ~(1 << queue);
241 num_descriptors = 1 << sc->ring_size_order;
242 ringsz = sizeof(struct ccp_desc) * num_descriptors;
245 * "Queue_Size" is order - 1.
247 * Queue must be aligned to 5+Queue_Size+1 == 5 + order bits.
249 error = bus_dma_tag_create(bus_get_dma_tag(dev),
250 1 << (5 + sc->ring_size_order),
251 #if defined(__i386__) && !defined(PAE)
252 0, BUS_SPACE_MAXADDR,
254 (bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT,
256 BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1,
257 ringsz, 0, NULL, NULL, &qp->ring_desc_tag);
261 error = bus_dmamem_alloc(qp->ring_desc_tag, &desc,
262 BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map);
266 error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc,
267 ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK);
271 qp->desc_ring = desc;
272 qp->completions_ring = malloc(num_descriptors *
273 sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK);
275 /* Zero control register; among other things, clears the RUN flag. */
277 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
278 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0);
280 /* Clear any leftover interrupt status flags */
281 ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE,
284 qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT;
286 ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE,
287 (uint32_t)qp->desc_ring_bus_addr);
288 ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE,
289 (uint32_t)qp->desc_ring_bus_addr);
292 * Enable completion interrupts, as well as error or administrative
293 * halt interrupts. We don't use administrative halts, but they
294 * shouldn't trip unless we do, so it ought to be harmless.
296 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE,
297 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
299 qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT;
300 qp->qcontrol |= CMD_Q_RUN;
301 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
305 if (qp->desc_ring != NULL)
306 bus_dmamap_unload(qp->ring_desc_tag,
309 bus_dmamem_free(qp->ring_desc_tag, desc,
311 if (qp->ring_desc_tag != NULL)
312 bus_dma_tag_destroy(qp->ring_desc_tag);
318 ccp_hw_detach_queue(device_t dev, unsigned queue)
320 struct ccp_softc *sc;
321 struct ccp_queue *qp;
323 sc = device_get_softc(dev);
324 qp = &sc->queues[queue];
327 * Don't bother allocating a ring for queues the host isn't allowed to
330 if ((sc->valid_queues & (1 << queue)) == 0)
333 free(qp->completions_ring, M_CCP);
334 bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map);
335 bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map);
336 bus_dma_tag_destroy(qp->ring_desc_tag);
340 ccp_map_pci_bar(device_t dev)
342 struct ccp_softc *sc;
344 sc = device_get_softc(dev);
346 sc->pci_resource_id = PCIR_BAR(2);
347 sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
348 &sc->pci_resource_id, RF_ACTIVE);
349 if (sc->pci_resource == NULL) {
350 device_printf(dev, "unable to allocate pci resource\n");
354 sc->pci_resource_id_msix = PCIR_BAR(5);
355 sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
356 &sc->pci_resource_id_msix, RF_ACTIVE);
357 if (sc->pci_resource_msix == NULL) {
358 device_printf(dev, "unable to allocate pci resource msix\n");
359 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
364 sc->pci_bus_tag = rman_get_bustag(sc->pci_resource);
365 sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource);
370 ccp_unmap_pci_bar(device_t dev)
372 struct ccp_softc *sc;
374 sc = device_get_softc(dev);
376 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix,
377 sc->pci_resource_msix);
378 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
382 const static struct ccp_error_code {
387 } ccp_error_codes[] = {
388 { 0x01, "ILLEGAL_ENGINE", EIO, "Requested engine was invalid" },
389 { 0x03, "ILLEGAL_FUNCTION_TYPE", EIO,
390 "A non-supported function type was specified" },
391 { 0x04, "ILLEGAL_FUNCTION_MODE", EIO,
392 "A non-supported function mode was specified" },
393 { 0x05, "ILLEGAL_FUNCTION_ENCRYPT", EIO,
394 "A CMAC type was specified when ENCRYPT was not specified" },
395 { 0x06, "ILLEGAL_FUNCTION_SIZE", EIO,
396 "A non-supported function size was specified.\n"
397 "AES-CFB: Size was not 127 or 7;\n"
398 "3DES-CFB: Size was not 7;\n"
399 "RSA: See supported size table (7.4.2);\n"
400 "ECC: Size was greater than 576 bits." },
401 { 0x07, "Zlib_MISSING_INIT_EOM", EIO,
402 "Zlib command does not have INIT and EOM set" },
403 { 0x08, "ILLEGAL_FUNCTION_RSVD", EIO,
404 "Reserved bits in a function specification were not 0" },
405 { 0x09, "ILLEGAL_BUFFER_LENGTH", EIO,
406 "The buffer length specified was not correct for the selected engine"
408 { 0x0A, "VLSB_FAULT", EIO, "Illegal VLSB segment mapping:\n"
409 "Undefined VLSB segment mapping or\n"
410 "mapping to unsupported LSB segment id" },
411 { 0x0B, "ILLEGAL_MEM_ADDR", EFAULT,
412 "The specified source/destination buffer access was illegal:\n"
413 "Data buffer located in a LSB location disallowed by the LSB protection masks; or\n"
414 "Data buffer not completely contained within a single segment; or\n"
415 "Pointer with Fixed=1 is not 32-bit aligned; or\n"
416 "Pointer with Fixed=1 attempted to reference non-AXI1 (local) memory."
418 { 0x0C, "ILLEGAL_MEM_SEL", EIO,
419 "A src_mem, dst_mem, or key_mem field was illegal:\n"
420 "A field was set to a reserved value; or\n"
421 "A public command attempted to reference AXI1 (local) or GART memory; or\n"
422 "A Zlib command attmpted to use the LSB." },
423 { 0x0D, "ILLEGAL_CONTEXT_ADDR", EIO,
424 "The specified context location was illegal:\n"
425 "Context located in a LSB location disallowed by the LSB protection masks; or\n"
426 "Context not completely contained within a single segment." },
427 { 0x0E, "ILLEGAL_KEY_ADDR", EIO,
428 "The specified key location was illegal:\n"
429 "Key located in a LSB location disallowed by the LSB protection masks; or\n"
430 "Key not completely contained within a single segment." },
431 { 0x12, "CMD_TIMEOUT", EIO, "A command timeout violation occurred" },
432 /* XXX Could fill out these descriptions too */
433 { 0x13, "IDMA0_AXI_SLVERR", EIO, "" },
434 { 0x14, "IDMA0_AXI_DECERR", EIO, "" },
435 { 0x16, "IDMA1_AXI_SLVERR", EIO, "" },
436 { 0x17, "IDMA1_AXI_DECERR", EIO, "" },
437 { 0x19, "ZLIBVHB_AXI_SLVERR", EIO, "" },
438 { 0x1A, "ZLIBVHB_AXI_DECERR", EIO, "" },
439 { 0x1C, "ZLIB_UNEXPECTED_EOM", EIO, "" },
440 { 0x1D, "ZLIB_EXTRA_DATA", EIO, "" },
441 { 0x1E, "ZLIB_BTYPE", EIO, "" },
442 { 0x20, "ZLIB_UNDEFINED_DISTANCE_SYMBOL", EIO, "" },
443 { 0x21, "ZLIB_CODE_LENGTH_SYMBOL", EIO, "" },
444 { 0x22, "ZLIB_VHB_ILLEGAL_FETCH", EIO, "" },
445 { 0x23, "ZLIB_UNCOMPRESSED_LEN", EIO, "" },
446 { 0x24, "ZLIB_LIMIT_REACHED", EIO, "" },
447 { 0x25, "ZLIB_CHECKSUM_MISMATCH", EIO, "" },
448 { 0x26, "ODMA0_AXI_SLVERR", EIO, "" },
449 { 0x27, "ODMA0_AXI_DECERR", EIO, "" },
450 { 0x29, "ODMA1_AXI_SLVERR", EIO, "" },
451 { 0x2A, "ODMA1_AXI_DECERR", EIO, "" },
452 { 0x2B, "LSB_PARITY_ERR", EIO,
453 "A read from the LSB encountered a parity error" },
457 ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc)
459 struct ccp_completion_ctx *cctx;
460 const struct ccp_error_code *ec;
461 struct ccp_softc *sc;
462 uint32_t status, error, esource, faultblock;
469 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
471 error = status & STATUS_ERROR_MASK;
473 /* Decode error status */
475 for (idx = 0; idx < nitems(ccp_error_codes); idx++)
476 if (ccp_error_codes[idx].ce_code == error) {
477 ec = &ccp_error_codes[idx];
481 esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
482 STATUS_ERRORSOURCE_MASK;
483 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
484 STATUS_VLSB_FAULTBLOCK_MASK;
485 device_printf(sc->dev, "Error: %s (%u) Source: %u Faulting LSB block: %u\n",
486 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
489 device_printf(sc->dev, "Error description: %s\n", ec->ce_desc);
491 /* TODO Could format the desc nicely here */
492 idx = desc - qp->desc_ring;
493 DPRINTF(sc->dev, "Bad descriptor index: %u contents: %32D\n", idx,
494 (const void *)desc, " ");
497 * TODO Per § 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status,
498 * Zlib Decompress status may be interesting.
502 /* Keep unused descriptors zero for next use. */
503 memset(&qp->desc_ring[idx], 0, sizeof(qp->desc_ring[idx]));
505 cctx = &qp->completions_ring[idx];
508 * Restart procedure described in § 14.2.5. Could be used by HoC if we
511 * Advance HEAD_LO past bad descriptor + any remaining in
512 * transaction manually, then restart queue.
514 idx = (idx + 1) % (1 << sc->ring_size_order);
516 /* Callback function signals end of transaction */
517 if (cctx->callback_fn != NULL) {
521 errno = ec->ce_errno;
522 /* TODO More specific error code */
523 cctx->callback_fn(qp, cctx->session, cctx->callback_arg, errno);
524 cctx->callback_fn = NULL;
530 qp->cq_waiting = false;
531 wakeup(&qp->cq_tail);
532 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
533 ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE,
534 (uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE));
535 ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol);
536 DPRINTF(sc->dev, "%s: Restarted queue\n", __func__);
540 ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints)
542 struct ccp_completion_ctx *cctx;
543 struct ccp_softc *sc;
544 const struct ccp_desc *desc;
545 uint32_t headlo, idx;
546 unsigned q, completed;
551 mtx_lock(&qp->cq_lock);
554 * Hardware HEAD_LO points to the first incomplete descriptor. Process
555 * any submitted and completed descriptors, up to but not including
558 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
559 idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
561 DPRINTF(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx,
564 while (qp->cq_head != idx) {
565 DPRINTF(sc->dev, "%s: completing:%u\n", __func__, qp->cq_head);
567 cctx = &qp->completions_ring[qp->cq_head];
568 if (cctx->callback_fn != NULL) {
569 cctx->callback_fn(qp, cctx->session,
570 cctx->callback_arg, 0);
571 cctx->callback_fn = NULL;
574 /* Keep unused descriptors zero for next use. */
575 memset(&qp->desc_ring[qp->cq_head], 0,
576 sizeof(qp->desc_ring[qp->cq_head]));
578 qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order);
582 qp->cq_waiting = false;
583 wakeup(&qp->cq_tail);
586 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
589 * Desc points to the first incomplete descriptor, at the time we read
590 * HEAD_LO. If there was an error flagged in interrupt status, the HW
591 * will not proceed past the erroneous descriptor by itself.
593 desc = &qp->desc_ring[idx];
594 if ((ints & INT_ERROR) != 0)
595 ccp_intr_handle_error(qp, desc);
597 mtx_unlock(&qp->cq_lock);
601 ccp_intr_handler(void *arg)
603 struct ccp_softc *sc = arg;
607 DPRINTF(sc->dev, "%s: interrupt\n", __func__);
610 * We get one global interrupt per PCI device, shared over all of
611 * its queues. Scan each valid queue on interrupt for flags indicating
614 for (i = 0; i < nitems(sc->queues); i++) {
615 if ((sc->valid_queues & (1 << i)) == 0)
618 ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE);
623 DPRINTF(sc->dev, "%s: %x interrupts on queue %zu\n", __func__,
626 /* Write back 1s to clear interrupt status bits. */
627 ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints);
630 * If there was an error, we still need to run completions on
631 * any descriptors prior to the error. The completions handler
632 * invoked below will also handle the error descriptor.
634 if ((ints & (INT_COMPLETION | INT_ERROR)) != 0)
635 ccp_intr_run_completions(&sc->queues[i], ints);
637 if ((ints & INT_QUEUE_STOPPED) != 0)
638 device_printf(sc->dev, "%s: queue %zu stopped\n",
642 /* Re-enable interrupts after processing */
643 for (i = 0; i < nitems(sc->queues); i++) {
644 if ((sc->valid_queues & (1 << i)) == 0)
646 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE,
647 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
652 ccp_intr_filter(void *arg)
654 struct ccp_softc *sc = arg;
657 /* TODO: Split individual queues into separate taskqueues? */
658 for (i = 0; i < nitems(sc->queues); i++) {
659 if ((sc->valid_queues & (1 << i)) == 0)
662 /* Mask interrupt until task completes */
663 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 0);
666 return (FILTER_SCHEDULE_THREAD);
670 ccp_setup_interrupts(struct ccp_softc *sc)
673 int rid, error, n, ridcopy;
675 n = pci_msix_count(sc->dev);
677 device_printf(sc->dev, "%s: msix_count: %d\n", __func__, n);
682 error = pci_alloc_msix(sc->dev, &nvec);
684 device_printf(sc->dev, "%s: alloc_msix error: %d\n", __func__,
689 device_printf(sc->dev, "%s: alloc_msix: 0 vectors\n",
693 if (nvec > nitems(sc->intr_res)) {
694 device_printf(sc->dev, "%s: too many vectors: %u\n", __func__,
696 nvec = nitems(sc->intr_res);
699 for (rid = 1; rid < 1 + nvec; rid++) {
701 sc->intr_res[rid - 1] = bus_alloc_resource_any(sc->dev,
702 SYS_RES_IRQ, &ridcopy, RF_ACTIVE);
703 if (sc->intr_res[rid - 1] == NULL) {
704 device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n",
709 sc->intr_tag[rid - 1] = NULL;
710 error = bus_setup_intr(sc->dev, sc->intr_res[rid - 1],
711 INTR_MPSAFE | INTR_TYPE_MISC, ccp_intr_filter,
712 ccp_intr_handler, sc, &sc->intr_tag[rid - 1]);
714 device_printf(sc->dev, "%s: setup_intr: %d\n",
717 sc->intr_count = nvec;
723 ccp_release_interrupts(struct ccp_softc *sc)
727 for (i = 0; i < sc->intr_count; i++) {
728 if (sc->intr_tag[i] != NULL)
729 bus_teardown_intr(sc->dev, sc->intr_res[i],
731 if (sc->intr_res[i] != NULL)
732 bus_release_resource(sc->dev, SYS_RES_IRQ,
733 rman_get_rid(sc->intr_res[i]), sc->intr_res[i]);
736 pci_release_msi(sc->dev);
740 ccp_hw_attach(device_t dev)
742 struct ccp_softc *sc;
744 uint32_t version, lsbmasklo, lsbmaskhi;
745 unsigned queue_idx, j;
747 bool bars_mapped, interrupts_setup;
750 bars_mapped = interrupts_setup = false;
751 sc = device_get_softc(dev);
753 error = ccp_map_pci_bar(dev);
755 device_printf(dev, "%s: couldn't map BAR(s)\n", __func__);
760 error = pci_enable_busmaster(dev);
762 device_printf(dev, "%s: couldn't enable busmaster\n",
767 sc->ring_size_order = g_ccp_ring_order;
768 if (sc->ring_size_order < 6 || sc->ring_size_order > 16) {
769 device_printf(dev, "bogus hw.ccp.ring_order\n");
773 sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET);
775 version = ccp_read_4(sc, VERSION_REG);
776 if ((version & VERSION_NUM_MASK) < 5) {
778 "driver supports version 5 and later hardware\n");
783 error = ccp_setup_interrupts(sc);
786 interrupts_setup = true;
788 sc->hw_version = version & VERSION_NUM_MASK;
789 sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) &
791 sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) &
792 VERSION_LSBSIZE_MASK;
793 sc->hw_features = version & VERSION_CAP_MASK;
796 * Copy private LSB mask to public registers to enable access to LSB
797 * from all queues allowed by BIOS.
799 lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET);
800 lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET);
801 ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo);
802 ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi);
804 lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo;
806 for (; queue_idx < nitems(sc->queues); queue_idx++) {
807 error = ccp_hw_attach_queue(dev, lsbmask, queue_idx);
809 device_printf(dev, "%s: couldn't attach queue %u\n",
810 __func__, queue_idx);
814 ccp_assign_lsb_regions(sc, lsbmask);
818 if (interrupts_setup)
819 ccp_release_interrupts(sc);
820 for (j = 0; j < queue_idx; j++)
821 ccp_hw_detach_queue(dev, j);
822 if (sc->ring_size_order != 0)
823 pci_disable_busmaster(dev);
825 ccp_unmap_pci_bar(dev);
831 ccp_hw_detach(device_t dev)
833 struct ccp_softc *sc;
836 sc = device_get_softc(dev);
838 for (i = 0; i < nitems(sc->queues); i++)
839 ccp_hw_detach_queue(dev, i);
841 ccp_release_interrupts(sc);
842 pci_disable_busmaster(dev);
843 ccp_unmap_pci_bar(dev);
846 static int __must_check
847 ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst,
848 enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type,
849 bus_size_t len, enum ccp_passthru_byteswap swapmode,
850 enum ccp_passthru_bitwise bitmode, bool interrupt,
851 const struct ccp_completion_ctx *cctx)
853 struct ccp_desc *desc;
855 if (ccp_queue_get_ring_space(qp) == 0)
858 desc = &qp->desc_ring[qp->cq_tail];
860 memset(desc, 0, sizeof(*desc));
861 desc->engine = CCP_ENGINE_PASSTHRU;
863 desc->pt.ioc = interrupt;
864 desc->pt.byteswap = swapmode;
865 desc->pt.bitwise = bitmode;
868 desc->src_lo = (uint32_t)src;
869 desc->src_hi = src >> 32;
870 desc->src_mem = src_type;
872 desc->dst_lo = (uint32_t)dst;
873 desc->dst_hi = dst >> 32;
874 desc->dst_mem = dst_type;
876 if (bitmode != CCP_PASSTHRU_BITWISE_NOOP)
877 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY);
880 memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx));
882 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
886 static int __must_check
887 ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb,
888 struct sglist *sgl, bus_size_t len, bool interrupt,
889 const struct ccp_completion_ctx *cctx)
891 struct sglist_seg *seg;
892 size_t i, remain, nb;
896 for (i = 0; i < sgl->sg_nseg && remain != 0; i++) {
897 seg = &sgl->sg_segs[i];
898 /* crp lengths are int, so 32-bit min() is ok. */
899 nb = min(remain, seg->ss_len);
902 error = ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB,
903 seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb,
904 CCP_PASSTHRU_BYTESWAP_NOOP,
905 CCP_PASSTHRU_BITWISE_NOOP,
906 (nb == remain) && interrupt, cctx);
908 error = ccp_passthrough(qp, seg->ss_paddr,
909 CCP_MEMTYPE_SYSTEM, lsb_addr, CCP_MEMTYPE_SB, nb,
910 CCP_PASSTHRU_BYTESWAP_NOOP,
911 CCP_PASSTHRU_BITWISE_NOOP,
912 (nb == remain) && interrupt, cctx);
922 * Note that these vectors are in reverse of the usual order.
924 const struct SHA_vectors {
930 } SHA_H __aligned(PAGE_SIZE) = {
962 0x47b5481dbefa4fa4ull,
963 0xdb0c2e0d64f98fa7ull,
964 0x8eb44a8768581511ull,
965 0x67332667ffc00b31ull,
966 0x152fecd8f70e5939ull,
967 0x9159015a3070dd17ull,
968 0x629a292a367cd507ull,
969 0xcbbb9d5dc1059ed8ull,
972 0x5be0cd19137e2179ull,
973 0x1f83d9abfb41bd6bull,
974 0x9b05688c2b3e6c1full,
975 0x510e527fade682d1ull,
976 0xa54ff53a5f1d36f1ull,
977 0x3c6ef372fe94f82bull,
978 0xbb67ae8584caa73bull,
979 0x6a09e667f3bcc908ull,
983 * Ensure vectors do not cross a page boundary.
985 * Disabled due to a new Clang error: "expression is not an integral constant
986 * expression." GCC (cross toolchain) seems to handle this assertion with
987 * _Static_assert just fine.
990 CTASSERT(PAGE_SIZE - ((uintptr_t)&SHA_H % PAGE_SIZE) >= sizeof(SHA_H));
993 const struct SHA_Defn {
994 enum sha_version version;
995 const void *H_vectors;
997 struct auth_hash *axf;
998 enum ccp_sha_type engine_type;
999 } SHA_definitions[] = {
1002 .H_vectors = SHA_H.SHA1,
1003 .H_size = sizeof(SHA_H.SHA1),
1004 .axf = &auth_hash_hmac_sha1,
1005 .engine_type = CCP_SHA_TYPE_1,
1009 .version = SHA2_224,
1010 .H_vectors = SHA_H.SHA224,
1011 .H_size = sizeof(SHA_H.SHA224),
1012 .axf = &auth_hash_hmac_sha2_224,
1013 .engine_type = CCP_SHA_TYPE_224,
1017 .version = SHA2_256,
1018 .H_vectors = SHA_H.SHA256,
1019 .H_size = sizeof(SHA_H.SHA256),
1020 .axf = &auth_hash_hmac_sha2_256,
1021 .engine_type = CCP_SHA_TYPE_256,
1024 .version = SHA2_384,
1025 .H_vectors = SHA_H.SHA384,
1026 .H_size = sizeof(SHA_H.SHA384),
1027 .axf = &auth_hash_hmac_sha2_384,
1028 .engine_type = CCP_SHA_TYPE_384,
1031 .version = SHA2_512,
1032 .H_vectors = SHA_H.SHA512,
1033 .H_size = sizeof(SHA_H.SHA512),
1034 .axf = &auth_hash_hmac_sha2_512,
1035 .engine_type = CCP_SHA_TYPE_512,
1039 static int __must_check
1040 ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn,
1041 vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits)
1043 struct ccp_desc *desc;
1045 if (ccp_queue_get_ring_space(qp) == 0)
1048 desc = &qp->desc_ring[qp->cq_tail];
1050 memset(desc, 0, sizeof(*desc));
1051 desc->engine = CCP_ENGINE_SHA;
1055 desc->sha.type = defn->engine_type;
1059 desc->sha_len_lo = (uint32_t)msgbits;
1060 desc->sha_len_hi = msgbits >> 32;
1063 desc->src_lo = (uint32_t)addr;
1064 desc->src_hi = addr >> 32;
1065 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1067 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA);
1069 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
1073 static int __must_check
1074 ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src,
1075 struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx)
1077 const struct SHA_Defn *defn;
1078 struct sglist_seg *seg;
1079 size_t i, msgsize, remaining, nb;
1083 for (i = 0; i < nitems(SHA_definitions); i++)
1084 if (SHA_definitions[i].version == version)
1086 if (i == nitems(SHA_definitions))
1088 defn = &SHA_definitions[i];
1090 /* XXX validate input ??? */
1092 /* Load initial SHA state into LSB */
1093 /* XXX ensure H_vectors don't span page boundaries */
1094 error = ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA),
1095 CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors),
1096 CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE),
1097 CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false,
1102 /* Execute series of SHA updates on correctly sized buffers */
1104 for (i = 0; i < sgl_src->sg_nseg; i++) {
1105 seg = &sgl_src->sg_segs[i];
1106 msgsize += seg->ss_len;
1107 error = ccp_sha_single_desc(qp, defn, seg->ss_paddr,
1108 seg->ss_len, i == 0, i == sgl_src->sg_nseg - 1,
1114 /* Copy result out to sgl_dst */
1115 remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE);
1116 lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA);
1117 for (i = 0; i < sgl_dst->sg_nseg; i++) {
1118 seg = &sgl_dst->sg_segs[i];
1119 /* crp lengths are int, so 32-bit min() is ok. */
1120 nb = min(remaining, seg->ss_len);
1122 error = ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM,
1123 lsbaddr, CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP,
1124 CCP_PASSTHRU_BITWISE_NOOP,
1125 (cctx != NULL) ? (nb == remaining) : false,
1126 (nb == remaining) ? cctx : NULL);
1140 byteswap256(uint64_t *buffer)
1144 t = bswap64(buffer[3]);
1145 buffer[3] = bswap64(buffer[0]);
1148 t = bswap64(buffer[2]);
1149 buffer[2] = bswap64(buffer[1]);
1154 * Translate CCP internal LSB hash format into a standard hash ouput.
1156 * Manipulates input buffer with byteswap256 operation.
1159 ccp_sha_copy_result(char *output, char *buffer, enum sha_version version)
1161 const struct SHA_Defn *defn;
1164 for (i = 0; i < nitems(SHA_definitions); i++)
1165 if (SHA_definitions[i].version == version)
1167 if (i == nitems(SHA_definitions))
1168 panic("bogus sha version auth_mode %u\n", (unsigned)version);
1170 defn = &SHA_definitions[i];
1172 /* Swap 256bit manually -- DMA engine can, but with limitations */
1173 byteswap256((void *)buffer);
1174 if (defn->axf->hashsize > LSB_ENTRY_SIZE)
1175 byteswap256((void *)(buffer + LSB_ENTRY_SIZE));
1177 switch (defn->version) {
1179 memcpy(output, buffer + 12, defn->axf->hashsize);
1183 memcpy(output, buffer + XXX, defn->axf->hashsize);
1187 memcpy(output, buffer, defn->axf->hashsize);
1191 buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize,
1192 defn->axf->hashsize - LSB_ENTRY_SIZE);
1193 memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer,
1197 memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE);
1198 memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE);
1204 ccp_do_hmac_done(struct ccp_queue *qp, struct ccp_session *s,
1205 struct cryptop *crp, int error)
1207 char ihash[SHA2_512_HASH_LEN /* max hash len */];
1208 union authctx auth_ctx;
1209 struct auth_hash *axf;
1211 axf = s->hmac.auth_hash;
1216 crp->crp_etype = error;
1220 /* Do remaining outer hash over small inner hash in software */
1221 axf->Init(&auth_ctx);
1222 axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize);
1223 ccp_sha_copy_result(ihash, s->hmac.res, s->hmac.auth_mode);
1225 INSECURE_DEBUG(dev, "%s sha intermediate=%64D\n", __func__,
1226 (u_char *)ihash, " ");
1228 axf->Update(&auth_ctx, ihash, axf->hashsize);
1229 axf->Final(s->hmac.res, &auth_ctx);
1231 if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) {
1232 crypto_copydata(crp, crp->crp_digest_start, s->hmac.hash_len,
1234 if (timingsafe_bcmp(s->hmac.res, ihash, s->hmac.hash_len) != 0)
1235 crp->crp_etype = EBADMSG;
1237 crypto_copyback(crp, crp->crp_digest_start, s->hmac.hash_len,
1240 /* Avoid leaking key material */
1241 explicit_bzero(&auth_ctx, sizeof(auth_ctx));
1242 explicit_bzero(s->hmac.res, sizeof(s->hmac.res));
1249 ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1252 struct cryptop *crp;
1255 ccp_do_hmac_done(qp, s, crp, error);
1258 static int __must_check
1259 ccp_do_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1260 const struct ccp_completion_ctx *cctx)
1263 struct auth_hash *axf;
1266 dev = qp->cq_softc->dev;
1267 axf = s->hmac.auth_hash;
1270 * Populate the SGL describing inside hash contents. We want to hash
1271 * the ipad (key XOR fixed bit pattern) concatenated with the user
1274 sglist_reset(qp->cq_sg_ulptx);
1275 error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize);
1278 if (crp->crp_aad_length != 0) {
1279 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1280 crp->crp_aad_start, crp->crp_aad_length);
1284 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1285 crp->crp_payload_start, crp->crp_payload_length);
1287 DPRINTF(dev, "%s: sglist too short\n", __func__);
1290 /* Populate SGL for output -- use hmac.res buffer. */
1291 sglist_reset(qp->cq_sg_dst);
1292 error = sglist_append(qp->cq_sg_dst, s->hmac.res,
1293 roundup2(axf->hashsize, LSB_ENTRY_SIZE));
1297 error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst,
1300 DPRINTF(dev, "%s: ccp_sha error\n", __func__);
1307 ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1309 struct ccp_completion_ctx ctx;
1311 ctx.callback_fn = ccp_hmac_done;
1312 ctx.callback_arg = crp;
1315 return (ccp_do_hmac(qp, s, crp, &ctx));
1319 ccp_byteswap(char *data, size_t len)
1325 for (i = 0; i < len; i++, len--) {
1327 data[i] = data[len];
1333 ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1336 struct cryptop *crp;
1338 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1345 crp->crp_etype = error;
1347 DPRINTF(qp->cq_softc->dev, "%s: qp=%p crp=%p\n", __func__, qp, crp);
1352 ccp_collect_iv(struct cryptop *crp, const struct crypto_session_params *csp,
1356 crypto_read_iv(crp, iv);
1359 * If the input IV is 12 bytes, append an explicit counter of 1.
1361 if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16 &&
1362 csp->csp_ivlen == 12)
1363 *(uint32_t *)&iv[12] = htobe32(1);
1365 if (csp->csp_cipher_alg == CRYPTO_AES_XTS &&
1366 csp->csp_ivlen < AES_BLOCK_LEN)
1367 memset(&iv[csp->csp_ivlen], 0, AES_BLOCK_LEN - csp->csp_ivlen);
1369 /* Reverse order of IV material for HW */
1370 INSECURE_DEBUG(NULL, "%s: IV: %16D len: %u\n", __func__, iv, " ",
1374 * For unknown reasons, XTS mode expects the IV in the reverse byte
1375 * order to every other AES mode.
1377 if (csp->csp_cipher_alg != CRYPTO_AES_XTS)
1378 ccp_byteswap(iv, AES_BLOCK_LEN);
1381 static int __must_check
1382 ccp_do_pst_to_lsb(struct ccp_queue *qp, uint32_t lsbaddr, const void *src,
1387 sglist_reset(qp->cq_sg_ulptx);
1388 error = sglist_append(qp->cq_sg_ulptx, __DECONST(void *, src), len);
1392 error = ccp_passthrough_sgl(qp, lsbaddr, true, qp->cq_sg_ulptx, len,
1397 static int __must_check
1398 ccp_do_xts(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1399 enum ccp_cipher_dir dir, const struct ccp_completion_ctx *cctx)
1401 struct ccp_desc *desc;
1404 enum ccp_xts_unitsize usize;
1406 /* IV and Key data are already loaded */
1408 dev = qp->cq_softc->dev;
1410 for (i = 0; i < nitems(ccp_xts_unitsize_map); i++)
1411 if (ccp_xts_unitsize_map[i].cxu_size ==
1412 crp->crp_payload_length) {
1413 usize = ccp_xts_unitsize_map[i].cxu_id;
1416 if (i >= nitems(ccp_xts_unitsize_map))
1419 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1420 struct sglist_seg *seg;
1422 seg = &qp->cq_sg_ulptx->sg_segs[i];
1424 desc = &qp->desc_ring[qp->cq_tail];
1425 desc->engine = CCP_ENGINE_XTS_AES;
1426 desc->som = (i == 0);
1427 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1428 desc->ioc = (desc->eom && cctx != NULL);
1429 DPRINTF(dev, "%s: XTS %u: som:%d eom:%d ioc:%d dir:%d\n",
1430 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1431 (int)desc->ioc, (int)dir);
1434 memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1437 desc->aes_xts.encrypt = dir;
1438 desc->aes_xts.type = s->blkcipher.cipher_type;
1439 desc->aes_xts.size = usize;
1441 DPRINTF(dev, "XXX %s: XTS %u: type:%u size:%u\n", __func__,
1442 qp->cq_tail, (unsigned)desc->aes_xts.type,
1443 (unsigned)desc->aes_xts.size);
1445 desc->length = seg->ss_len;
1446 desc->src_lo = (uint32_t)seg->ss_paddr;
1447 desc->src_hi = (seg->ss_paddr >> 32);
1448 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1450 /* Crypt in-place */
1451 desc->dst_lo = desc->src_lo;
1452 desc->dst_hi = desc->src_hi;
1453 desc->dst_mem = desc->src_mem;
1455 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1457 desc->key_mem = CCP_MEMTYPE_SB;
1459 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1461 qp->cq_tail = (qp->cq_tail + 1) %
1462 (1 << qp->cq_softc->ring_size_order);
1467 static int __must_check
1468 ccp_do_blkcipher(struct ccp_queue *qp, struct ccp_session *s,
1469 struct cryptop *crp, const struct ccp_completion_ctx *cctx)
1471 const struct crypto_session_params *csp;
1472 struct ccp_desc *desc;
1475 enum ccp_cipher_dir dir;
1480 dev = qp->cq_softc->dev;
1482 if (s->blkcipher.key_len == 0 || crp->crp_payload_length == 0) {
1483 DPRINTF(dev, "%s: empty\n", __func__);
1486 if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) {
1487 DPRINTF(dev, "%s: len modulo: %d\n", __func__,
1488 crp->crp_payload_length);
1493 * Individual segments must be multiples of AES block size for the HW
1494 * to process it. Non-compliant inputs aren't bogus, just not doable
1497 for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++)
1498 if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1499 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1500 qp->cq_sg_crp->sg_segs[i].ss_len);
1504 /* Gather IV/nonce data */
1505 csp = crypto_get_params(crp->crp_session);
1506 ccp_collect_iv(crp, csp, s->blkcipher.iv);
1507 iv_len = csp->csp_ivlen;
1508 if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1509 iv_len = AES_BLOCK_LEN;
1511 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1512 dir = CCP_CIPHER_DIR_ENCRYPT;
1514 dir = CCP_CIPHER_DIR_DECRYPT;
1516 /* Set up passthrough op(s) to copy IV into LSB */
1517 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1518 s->blkcipher.iv, iv_len);
1523 * Initialize keydata and keydata_len for GCC. The default case of the
1524 * following switch is impossible to reach, but GCC doesn't know that.
1529 switch (csp->csp_cipher_alg) {
1530 case CRYPTO_AES_XTS:
1531 for (j = 0; j < nitems(ccp_xts_unitsize_map); j++)
1532 if (ccp_xts_unitsize_map[j].cxu_size ==
1533 crp->crp_payload_length)
1535 /* Input buffer must be a supported UnitSize */
1536 if (j >= nitems(ccp_xts_unitsize_map)) {
1537 device_printf(dev, "%s: rejected block size: %u\n",
1538 __func__, crp->crp_payload_length);
1539 return (EOPNOTSUPP);
1542 case CRYPTO_AES_CBC:
1543 case CRYPTO_AES_ICM:
1544 keydata = s->blkcipher.enckey;
1545 keydata_len = s->blkcipher.key_len;
1549 INSECURE_DEBUG(dev, "%s: KEY(%zu): %16D\n", __func__, keydata_len,
1551 if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1552 INSECURE_DEBUG(dev, "%s: KEY(XTS): %64D\n", __func__, keydata, " ");
1554 /* Reverse order of key material for HW */
1555 ccp_byteswap(keydata, keydata_len);
1557 /* Store key material into LSB to avoid page boundaries */
1558 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) {
1560 * XTS mode uses 2 256-bit vectors for the primary key and the
1561 * tweak key. For 128-bit keys, the vectors are zero-padded.
1563 * After byteswapping the combined OCF-provided K1:K2 vector
1564 * above, we need to reverse the order again so the hardware
1565 * gets the swapped keys in the order K1':K2'.
1567 error = ccp_do_pst_to_lsb(qp,
1568 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1), keydata,
1572 error = ccp_do_pst_to_lsb(qp,
1573 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1574 keydata + (keydata_len / 2), keydata_len / 2);
1576 /* Zero-pad 128 bit keys */
1577 if (keydata_len == 32) {
1580 error = ccp_do_pst_to_lsb(qp,
1581 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY) +
1582 keydata_len / 2, g_zeroes, keydata_len / 2);
1585 error = ccp_do_pst_to_lsb(qp,
1586 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1) +
1587 keydata_len / 2, g_zeroes, keydata_len / 2);
1590 error = ccp_do_pst_to_lsb(qp,
1591 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), keydata,
1597 * Point SGLs at the subset of cryptop buffer contents representing the
1600 sglist_reset(qp->cq_sg_ulptx);
1601 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1602 crp->crp_payload_start, crp->crp_payload_length);
1606 INSECURE_DEBUG(dev, "%s: Contents: %16D\n", __func__,
1607 (void *)PHYS_TO_DMAP(qp->cq_sg_ulptx->sg_segs[0].ss_paddr), " ");
1609 DPRINTF(dev, "%s: starting AES ops @ %u\n", __func__, qp->cq_tail);
1611 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1614 if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1615 return (ccp_do_xts(qp, s, crp, dir, cctx));
1617 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1618 struct sglist_seg *seg;
1620 seg = &qp->cq_sg_ulptx->sg_segs[i];
1622 desc = &qp->desc_ring[qp->cq_tail];
1623 desc->engine = CCP_ENGINE_AES;
1624 desc->som = (i == 0);
1625 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1626 desc->ioc = (desc->eom && cctx != NULL);
1627 DPRINTF(dev, "%s: AES %u: som:%d eom:%d ioc:%d dir:%d\n",
1628 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1629 (int)desc->ioc, (int)dir);
1632 memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1635 desc->aes.encrypt = dir;
1636 desc->aes.mode = s->blkcipher.cipher_mode;
1637 desc->aes.type = s->blkcipher.cipher_type;
1638 if (csp->csp_cipher_alg == CRYPTO_AES_ICM)
1640 * Size of CTR value in bits, - 1. ICM mode uses all
1641 * 128 bits as counter.
1643 desc->aes.size = 127;
1645 DPRINTF(dev, "%s: AES %u: mode:%u type:%u size:%u\n", __func__,
1646 qp->cq_tail, (unsigned)desc->aes.mode,
1647 (unsigned)desc->aes.type, (unsigned)desc->aes.size);
1649 desc->length = seg->ss_len;
1650 desc->src_lo = (uint32_t)seg->ss_paddr;
1651 desc->src_hi = (seg->ss_paddr >> 32);
1652 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1654 /* Crypt in-place */
1655 desc->dst_lo = desc->src_lo;
1656 desc->dst_hi = desc->src_hi;
1657 desc->dst_mem = desc->src_mem;
1659 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1661 desc->key_mem = CCP_MEMTYPE_SB;
1663 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1665 qp->cq_tail = (qp->cq_tail + 1) %
1666 (1 << qp->cq_softc->ring_size_order);
1672 ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1674 struct ccp_completion_ctx ctx;
1676 ctx.callback_fn = ccp_blkcipher_done;
1678 ctx.callback_arg = crp;
1680 return (ccp_do_blkcipher(qp, s, crp, &ctx));
1684 ccp_authenc_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1687 struct cryptop *crp;
1689 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1693 ccp_do_hmac_done(qp, s, crp, error);
1697 ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1699 struct ccp_completion_ctx ctx;
1702 ctx.callback_fn = ccp_authenc_done;
1704 ctx.callback_arg = crp;
1706 /* Perform first operation */
1707 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1708 error = ccp_do_blkcipher(qp, s, crp, NULL);
1710 error = ccp_do_hmac(qp, s, crp, NULL);
1714 /* Perform second operation */
1715 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1716 error = ccp_do_hmac(qp, s, crp, &ctx);
1718 error = ccp_do_blkcipher(qp, s, crp, &ctx);
1722 static int __must_check
1723 ccp_do_ghash_aad(struct ccp_queue *qp, struct ccp_session *s)
1725 struct ccp_desc *desc;
1726 struct sglist_seg *seg;
1729 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1732 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1733 seg = &qp->cq_sg_ulptx->sg_segs[i];
1735 desc = &qp->desc_ring[qp->cq_tail];
1737 desc->engine = CCP_ENGINE_AES;
1738 desc->aes.mode = CCP_AES_MODE_GHASH;
1739 desc->aes.type = s->blkcipher.cipher_type;
1740 desc->aes.encrypt = CCP_AES_MODE_GHASH_AAD;
1742 desc->som = (i == 0);
1743 desc->length = seg->ss_len;
1745 desc->src_lo = (uint32_t)seg->ss_paddr;
1746 desc->src_hi = (seg->ss_paddr >> 32);
1747 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1749 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1751 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1752 desc->key_mem = CCP_MEMTYPE_SB;
1754 qp->cq_tail = (qp->cq_tail + 1) %
1755 (1 << qp->cq_softc->ring_size_order);
1760 static int __must_check
1761 ccp_do_gctr(struct ccp_queue *qp, struct ccp_session *s,
1762 enum ccp_cipher_dir dir, struct sglist_seg *seg, bool som, bool eom)
1764 struct ccp_desc *desc;
1766 if (ccp_queue_get_ring_space(qp) == 0)
1769 desc = &qp->desc_ring[qp->cq_tail];
1771 desc->engine = CCP_ENGINE_AES;
1772 desc->aes.mode = CCP_AES_MODE_GCTR;
1773 desc->aes.type = s->blkcipher.cipher_type;
1774 desc->aes.encrypt = dir;
1775 desc->aes.size = 8 * (seg->ss_len % GMAC_BLOCK_LEN) - 1;
1780 /* Trailing bytes will be masked off by aes.size above. */
1781 desc->length = roundup2(seg->ss_len, GMAC_BLOCK_LEN);
1783 desc->dst_lo = desc->src_lo = (uint32_t)seg->ss_paddr;
1784 desc->dst_hi = desc->src_hi = seg->ss_paddr >> 32;
1785 desc->dst_mem = desc->src_mem = CCP_MEMTYPE_SYSTEM;
1787 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1789 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1790 desc->key_mem = CCP_MEMTYPE_SB;
1792 qp->cq_tail = (qp->cq_tail + 1) %
1793 (1 << qp->cq_softc->ring_size_order);
1797 static int __must_check
1798 ccp_do_ghash_final(struct ccp_queue *qp, struct ccp_session *s)
1800 struct ccp_desc *desc;
1802 if (ccp_queue_get_ring_space(qp) == 0)
1805 desc = &qp->desc_ring[qp->cq_tail];
1807 desc->engine = CCP_ENGINE_AES;
1808 desc->aes.mode = CCP_AES_MODE_GHASH;
1809 desc->aes.type = s->blkcipher.cipher_type;
1810 desc->aes.encrypt = CCP_AES_MODE_GHASH_FINAL;
1812 desc->length = GMAC_BLOCK_LEN;
1814 desc->src_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN);
1815 desc->src_mem = CCP_MEMTYPE_SB;
1817 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1819 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1820 desc->key_mem = CCP_MEMTYPE_SB;
1822 desc->dst_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH);
1823 desc->dst_mem = CCP_MEMTYPE_SB;
1825 qp->cq_tail = (qp->cq_tail + 1) %
1826 (1 << qp->cq_softc->ring_size_order);
1831 ccp_gcm_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1834 char tag[GMAC_DIGEST_LEN];
1835 struct cryptop *crp;
1842 crp->crp_etype = error;
1846 /* Encrypt is done. Decrypt needs to verify tag. */
1847 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1850 /* Copy in message tag. */
1851 crypto_copydata(crp, crp->crp_digest_start, s->gmac.hash_len, tag);
1853 /* Verify tag against computed GMAC */
1854 if (timingsafe_bcmp(tag, s->gmac.final_block, s->gmac.hash_len) != 0)
1855 crp->crp_etype = EBADMSG;
1858 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1859 explicit_bzero(&s->gmac.final_block, sizeof(s->gmac.final_block));
1864 ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1866 const struct crypto_session_params *csp;
1867 struct ccp_completion_ctx ctx;
1868 enum ccp_cipher_dir dir;
1873 if (s->blkcipher.key_len == 0)
1876 dev = qp->cq_softc->dev;
1878 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1879 dir = CCP_CIPHER_DIR_ENCRYPT;
1881 dir = CCP_CIPHER_DIR_DECRYPT;
1883 /* Zero initial GHASH portion of context */
1884 memset(s->blkcipher.iv, 0, sizeof(s->blkcipher.iv));
1886 /* Gather IV data */
1887 csp = crypto_get_params(crp->crp_session);
1888 ccp_collect_iv(crp, csp, s->blkcipher.iv);
1890 /* Reverse order of key material for HW */
1891 ccp_byteswap(s->blkcipher.enckey, s->blkcipher.key_len);
1893 /* Prepare input buffer of concatenated lengths for final GHASH */
1894 be64enc(s->gmac.final_block, (uint64_t)crp->crp_aad_length * 8);
1895 be64enc(&s->gmac.final_block[8], (uint64_t)crp->crp_payload_length * 8);
1897 /* Send IV + initial zero GHASH, key data, and lengths buffer to LSB */
1898 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1899 s->blkcipher.iv, 32);
1902 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1903 s->blkcipher.enckey, s->blkcipher.key_len);
1906 error = ccp_do_pst_to_lsb(qp,
1907 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN), s->gmac.final_block,
1912 /* First step - compute GHASH over AAD */
1913 if (crp->crp_aad_length != 0) {
1914 sglist_reset(qp->cq_sg_ulptx);
1915 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1916 crp->crp_aad_start, crp->crp_aad_length);
1920 /* This engine cannot process non-block multiple AAD data. */
1921 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1922 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len %
1923 GMAC_BLOCK_LEN) != 0) {
1924 DPRINTF(dev, "%s: AD seg modulo: %zu\n",
1926 qp->cq_sg_ulptx->sg_segs[i].ss_len);
1930 error = ccp_do_ghash_aad(qp, s);
1935 /* Feed data piece by piece into GCTR */
1936 sglist_reset(qp->cq_sg_ulptx);
1937 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1938 crp->crp_payload_start, crp->crp_payload_length);
1943 * All segments except the last must be even multiples of AES block
1944 * size for the HW to process it. Non-compliant inputs aren't bogus,
1945 * just not doable on this hardware.
1947 * XXX: Well, the hardware will produce a valid tag for shorter final
1948 * segment inputs, but it will still write out a block-sized plaintext
1949 * or ciphertext chunk. For a typical CRP this tramples trailing data,
1950 * including the provided message tag. So, reject such inputs for now.
1952 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1953 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1954 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1955 qp->cq_sg_ulptx->sg_segs[i].ss_len);
1959 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1960 struct sglist_seg *seg;
1962 seg = &qp->cq_sg_ulptx->sg_segs[i];
1963 error = ccp_do_gctr(qp, s, dir, seg,
1964 (i == 0 && crp->crp_aad_length == 0),
1965 i == (qp->cq_sg_ulptx->sg_nseg - 1));
1970 /* Send just initial IV (not GHASH!) to LSB again */
1971 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1972 s->blkcipher.iv, AES_BLOCK_LEN);
1976 ctx.callback_fn = ccp_gcm_done;
1978 ctx.callback_arg = crp;
1980 /* Compute final hash and copy result back */
1981 error = ccp_do_ghash_final(qp, s);
1985 /* When encrypting, copy computed tag out to caller buffer. */
1986 sglist_reset(qp->cq_sg_ulptx);
1987 if (dir == CCP_CIPHER_DIR_ENCRYPT)
1988 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1989 crp->crp_digest_start, s->gmac.hash_len);
1992 * For decrypting, copy the computed tag out to our session
1993 * buffer to verify in our callback.
1995 error = sglist_append(qp->cq_sg_ulptx, s->gmac.final_block,
1999 error = ccp_passthrough_sgl(qp,
2000 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH), false, qp->cq_sg_ulptx,
2001 s->gmac.hash_len, true, &ctx);
2005 #define MAX_TRNG_RETRIES 10
2007 random_ccp_read(void *v, u_int c)
2012 KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c));
2015 for (i = c; i > 0; i -= sizeof(*buf)) {
2016 for (j = 0; j < MAX_TRNG_RETRIES; j++) {
2017 *buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET);
2021 if (j == MAX_TRNG_RETRIES)
2031 db_ccp_show_hw(struct ccp_softc *sc)
2034 db_printf(" queue mask: 0x%x\n",
2035 ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET));
2036 db_printf(" queue prio: 0x%x\n",
2037 ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET));
2038 db_printf(" reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET));
2039 db_printf(" trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET));
2040 db_printf(" cmd timeout: 0x%x\n",
2041 ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET));
2042 db_printf(" lsb public mask lo: 0x%x\n",
2043 ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET));
2044 db_printf(" lsb public mask hi: 0x%x\n",
2045 ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET));
2046 db_printf(" lsb private mask lo: 0x%x\n",
2047 ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET));
2048 db_printf(" lsb private mask hi: 0x%x\n",
2049 ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET));
2050 db_printf(" version: 0x%x\n", ccp_read_4(sc, VERSION_REG));
2054 db_ccp_show_queue_hw(struct ccp_queue *qp)
2056 const struct ccp_error_code *ec;
2057 struct ccp_softc *sc;
2058 uint32_t status, error, esource, faultblock, headlo, qcontrol;
2064 qcontrol = ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE);
2065 db_printf(" qcontrol: 0x%x%s%s\n", qcontrol,
2066 (qcontrol & CMD_Q_RUN) ? " RUN" : "",
2067 (qcontrol & CMD_Q_HALTED) ? " HALTED" : "");
2068 db_printf(" tail_lo: 0x%x\n",
2069 ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE));
2070 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
2071 db_printf(" head_lo: 0x%x\n", headlo);
2072 db_printf(" int enable: 0x%x\n",
2073 ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE));
2074 db_printf(" interrupt status: 0x%x\n",
2075 ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE));
2076 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
2077 db_printf(" status: 0x%x\n", status);
2078 db_printf(" int stats: 0x%x\n",
2079 ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE));
2081 error = status & STATUS_ERROR_MASK;
2085 esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
2086 STATUS_ERRORSOURCE_MASK;
2087 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
2088 STATUS_VLSB_FAULTBLOCK_MASK;
2091 for (i = 0; i < nitems(ccp_error_codes); i++)
2092 if (ccp_error_codes[i].ce_code == error)
2094 if (i < nitems(ccp_error_codes))
2095 ec = &ccp_error_codes[i];
2097 db_printf(" Error: %s (%u) Source: %u Faulting LSB block: %u\n",
2098 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
2101 db_printf(" Error description: %s\n", ec->ce_desc);
2103 i = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
2104 db_printf(" Bad descriptor idx: %u contents:\n %32D\n", i,
2105 (void *)&qp->desc_ring[i], " ");