2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2017 Chelsio Communications, Inc.
5 * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org>
7 * Largely borrowed from ccr(4), Written by: John Baldwin <jhb@FreeBSD.org>
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
36 #include <sys/param.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mutex.h>
42 #include <sys/module.h>
44 #include <sys/sglist.h>
45 #include <sys/sysctl.h>
51 #include <dev/pci/pcireg.h>
52 #include <dev/pci/pcivar.h>
54 #include <machine/bus.h>
55 #include <machine/resource.h>
56 #include <machine/vmparam.h>
58 #include <opencrypto/cryptodev.h>
59 #include <opencrypto/xform.h>
64 #include "cryptodev_if.h"
67 #include "ccp_hardware.h"
70 CTASSERT(sizeof(struct ccp_desc) == 32);
72 static struct ccp_xts_unitsize_map_entry {
73 enum ccp_xts_unitsize cxu_id;
75 } ccp_xts_unitsize_map[] = {
76 { CCP_XTS_AES_UNIT_SIZE_16, 16 },
77 { CCP_XTS_AES_UNIT_SIZE_512, 512 },
78 { CCP_XTS_AES_UNIT_SIZE_1024, 1024 },
79 { CCP_XTS_AES_UNIT_SIZE_2048, 2048 },
80 { CCP_XTS_AES_UNIT_SIZE_4096, 4096 },
83 SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
86 unsigned g_ccp_ring_order = 11;
87 SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order,
88 0, "Set CCP ring order. (1 << this) == ring size. Min: 6, Max: 16");
91 * Zero buffer, sufficient for padding LSB entries, that does not span a page
94 static const char g_zeroes[32] __aligned(32);
96 static inline uint32_t
97 ccp_read_4(struct ccp_softc *sc, uint32_t offset)
99 return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset));
103 ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value)
105 bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value);
108 static inline uint32_t
109 ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset)
112 * Each queue gets its own 4kB register space. Queue 0 is at 0x1000.
114 return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset));
118 ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset,
121 ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value);
125 ccp_queue_write_tail(struct ccp_queue *qp)
127 ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE,
128 ((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail));
132 * Given a queue and a reserved LSB entry index, compute the LSB *entry id* of
133 * that entry for the queue's private LSB region.
135 static inline uint8_t
136 ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry)
138 return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry));
142 * Given a queue and a reserved LSB entry index, compute the LSB *address* of
143 * that entry for the queue's private LSB region.
145 static inline uint32_t
146 ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry)
148 return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE);
154 * LSB - Local Storage Block
155 * =========================
157 * 8 segments/regions, each containing 16 entries.
159 * Each entry contains 256 bits (32 bytes).
161 * Segments are virtually addressed in commands, but accesses cannot cross
162 * segment boundaries. Virtual map uses an identity mapping by default
163 * (virtual segment N corresponds to physical segment N).
165 * Access to a physical region can be restricted to any subset of all five
168 * "Pass-through" mode
169 * ===================
171 * Pass-through is a generic DMA engine, much like ioat(4). Some nice
174 * - Supports byte-swapping for endian conversion (32- or 256-bit words)
175 * - AND, OR, XOR with fixed 256-bit mask
176 * - CRC32 of data (may be used in tandem with bswap, but not bit operations)
177 * - Read/write of LSB
180 * If bit manipulation mode is enabled, input must be a multiple of 256 bits
183 * If byte-swapping is enabled, input must be a multiple of the word size.
185 * Zlib mode -- only usable from one queue at a time, single job at a time.
186 * ========================================================================
188 * Only usable from private host, aka PSP? Not host processor?
193 * Raw bits are conditioned with AES and fed through CTR_DRBG. Output goes in
194 * a ring buffer readable by software.
196 * NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are
197 * implemented on the raw input stream and may be enabled to verify min-entropy
198 * of 0.5 bits per bit.
202 ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
206 KASSERT(error == 0, ("%s: error:%d", __func__, error));
208 *baddr = segs->ds_addr;
212 ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue)
214 struct ccp_softc *sc;
215 struct ccp_queue *qp;
217 size_t ringsz, num_descriptors;
221 sc = device_get_softc(dev);
222 qp = &sc->queues[queue];
225 * Don't bother allocating a ring for queues the host isn't allowed to
228 if ((sc->valid_queues & (1 << queue)) == 0)
231 ccp_queue_decode_lsb_regions(sc, lsbmask, queue);
233 /* Ignore queues that do not have any LSB access. */
234 if (qp->lsb_mask == 0) {
235 device_printf(dev, "Ignoring queue %u with no LSB access\n",
237 sc->valid_queues &= ~(1 << queue);
241 num_descriptors = 1 << sc->ring_size_order;
242 ringsz = sizeof(struct ccp_desc) * num_descriptors;
245 * "Queue_Size" is order - 1.
247 * Queue must be aligned to 5+Queue_Size+1 == 5 + order bits.
249 error = bus_dma_tag_create(bus_get_dma_tag(dev),
250 1 << (5 + sc->ring_size_order),
251 #if defined(__i386__) && !defined(PAE)
252 0, BUS_SPACE_MAXADDR,
254 (bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT,
256 BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1,
257 ringsz, 0, NULL, NULL, &qp->ring_desc_tag);
261 error = bus_dmamem_alloc(qp->ring_desc_tag, &desc,
262 BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map);
266 error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc,
267 ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK);
271 qp->desc_ring = desc;
272 qp->completions_ring = malloc(num_descriptors *
273 sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK);
275 /* Zero control register; among other things, clears the RUN flag. */
277 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
278 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0);
280 /* Clear any leftover interrupt status flags */
281 ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE,
284 qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT;
286 ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE,
287 (uint32_t)qp->desc_ring_bus_addr);
288 ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE,
289 (uint32_t)qp->desc_ring_bus_addr);
292 * Enable completion interrupts, as well as error or administrative
293 * halt interrupts. We don't use administrative halts, but they
294 * shouldn't trip unless we do, so it ought to be harmless.
296 ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE,
297 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
299 qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT;
300 qp->qcontrol |= CMD_Q_RUN;
301 ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
305 if (qp->desc_ring != NULL)
306 bus_dmamap_unload(qp->ring_desc_tag,
309 bus_dmamem_free(qp->ring_desc_tag, desc,
311 if (qp->ring_desc_tag != NULL)
312 bus_dma_tag_destroy(qp->ring_desc_tag);
318 ccp_hw_detach_queue(device_t dev, unsigned queue)
320 struct ccp_softc *sc;
321 struct ccp_queue *qp;
323 sc = device_get_softc(dev);
324 qp = &sc->queues[queue];
327 * Don't bother allocating a ring for queues the host isn't allowed to
330 if ((sc->valid_queues & (1 << queue)) == 0)
333 free(qp->completions_ring, M_CCP);
334 bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map);
335 bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map);
336 bus_dma_tag_destroy(qp->ring_desc_tag);
340 ccp_map_pci_bar(device_t dev)
342 struct ccp_softc *sc;
344 sc = device_get_softc(dev);
346 sc->pci_resource_id = PCIR_BAR(2);
347 sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
348 &sc->pci_resource_id, RF_ACTIVE);
349 if (sc->pci_resource == NULL) {
350 device_printf(dev, "unable to allocate pci resource\n");
354 sc->pci_resource_id_msix = PCIR_BAR(5);
355 sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
356 &sc->pci_resource_id_msix, RF_ACTIVE);
357 if (sc->pci_resource_msix == NULL) {
358 device_printf(dev, "unable to allocate pci resource msix\n");
359 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
364 sc->pci_bus_tag = rman_get_bustag(sc->pci_resource);
365 sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource);
370 ccp_unmap_pci_bar(device_t dev)
372 struct ccp_softc *sc;
374 sc = device_get_softc(dev);
376 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix,
377 sc->pci_resource_msix);
378 bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
382 const static struct ccp_error_code {
387 } ccp_error_codes[] = {
388 { 0x01, "ILLEGAL_ENGINE", EIO, "Requested engine was invalid" },
389 { 0x03, "ILLEGAL_FUNCTION_TYPE", EIO,
390 "A non-supported function type was specified" },
391 { 0x04, "ILLEGAL_FUNCTION_MODE", EIO,
392 "A non-supported function mode was specified" },
393 { 0x05, "ILLEGAL_FUNCTION_ENCRYPT", EIO,
394 "A CMAC type was specified when ENCRYPT was not specified" },
395 { 0x06, "ILLEGAL_FUNCTION_SIZE", EIO,
396 "A non-supported function size was specified.\n"
397 "AES-CFB: Size was not 127 or 7;\n"
398 "3DES-CFB: Size was not 7;\n"
399 "RSA: See supported size table (7.4.2);\n"
400 "ECC: Size was greater than 576 bits." },
401 { 0x07, "Zlib_MISSING_INIT_EOM", EIO,
402 "Zlib command does not have INIT and EOM set" },
403 { 0x08, "ILLEGAL_FUNCTION_RSVD", EIO,
404 "Reserved bits in a function specification were not 0" },
405 { 0x09, "ILLEGAL_BUFFER_LENGTH", EIO,
406 "The buffer length specified was not correct for the selected engine"
408 { 0x0A, "VLSB_FAULT", EIO, "Illegal VLSB segment mapping:\n"
409 "Undefined VLSB segment mapping or\n"
410 "mapping to unsupported LSB segment id" },
411 { 0x0B, "ILLEGAL_MEM_ADDR", EFAULT,
412 "The specified source/destination buffer access was illegal:\n"
413 "Data buffer located in a LSB location disallowed by the LSB protection masks; or\n"
414 "Data buffer not completely contained within a single segment; or\n"
415 "Pointer with Fixed=1 is not 32-bit aligned; or\n"
416 "Pointer with Fixed=1 attempted to reference non-AXI1 (local) memory."
418 { 0x0C, "ILLEGAL_MEM_SEL", EIO,
419 "A src_mem, dst_mem, or key_mem field was illegal:\n"
420 "A field was set to a reserved value; or\n"
421 "A public command attempted to reference AXI1 (local) or GART memory; or\n"
422 "A Zlib command attmpted to use the LSB." },
423 { 0x0D, "ILLEGAL_CONTEXT_ADDR", EIO,
424 "The specified context location was illegal:\n"
425 "Context located in a LSB location disallowed by the LSB protection masks; or\n"
426 "Context not completely contained within a single segment." },
427 { 0x0E, "ILLEGAL_KEY_ADDR", EIO,
428 "The specified key location was illegal:\n"
429 "Key located in a LSB location disallowed by the LSB protection masks; or\n"
430 "Key not completely contained within a single segment." },
431 { 0x12, "CMD_TIMEOUT", EIO, "A command timeout violation occurred" },
432 /* XXX Could fill out these descriptions too */
433 { 0x13, "IDMA0_AXI_SLVERR", EIO, "" },
434 { 0x14, "IDMA0_AXI_DECERR", EIO, "" },
435 { 0x16, "IDMA1_AXI_SLVERR", EIO, "" },
436 { 0x17, "IDMA1_AXI_DECERR", EIO, "" },
437 { 0x19, "ZLIBVHB_AXI_SLVERR", EIO, "" },
438 { 0x1A, "ZLIBVHB_AXI_DECERR", EIO, "" },
439 { 0x1C, "ZLIB_UNEXPECTED_EOM", EIO, "" },
440 { 0x1D, "ZLIB_EXTRA_DATA", EIO, "" },
441 { 0x1E, "ZLIB_BTYPE", EIO, "" },
442 { 0x20, "ZLIB_UNDEFINED_DISTANCE_SYMBOL", EIO, "" },
443 { 0x21, "ZLIB_CODE_LENGTH_SYMBOL", EIO, "" },
444 { 0x22, "ZLIB_VHB_ILLEGAL_FETCH", EIO, "" },
445 { 0x23, "ZLIB_UNCOMPRESSED_LEN", EIO, "" },
446 { 0x24, "ZLIB_LIMIT_REACHED", EIO, "" },
447 { 0x25, "ZLIB_CHECKSUM_MISMATCH", EIO, "" },
448 { 0x26, "ODMA0_AXI_SLVERR", EIO, "" },
449 { 0x27, "ODMA0_AXI_DECERR", EIO, "" },
450 { 0x29, "ODMA1_AXI_SLVERR", EIO, "" },
451 { 0x2A, "ODMA1_AXI_DECERR", EIO, "" },
452 { 0x2B, "LSB_PARITY_ERR", EIO,
453 "A read from the LSB encountered a parity error" },
457 ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc)
459 struct ccp_completion_ctx *cctx;
460 const struct ccp_error_code *ec;
461 struct ccp_softc *sc;
462 uint32_t status, error, esource, faultblock;
469 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
471 error = status & STATUS_ERROR_MASK;
473 /* Decode error status */
475 for (idx = 0; idx < nitems(ccp_error_codes); idx++)
476 if (ccp_error_codes[idx].ce_code == error) {
477 ec = &ccp_error_codes[idx];
481 esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
482 STATUS_ERRORSOURCE_MASK;
483 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
484 STATUS_VLSB_FAULTBLOCK_MASK;
485 device_printf(sc->dev, "Error: %s (%u) Source: %u Faulting LSB block: %u\n",
486 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
489 device_printf(sc->dev, "Error description: %s\n", ec->ce_desc);
491 /* TODO Could format the desc nicely here */
492 idx = desc - qp->desc_ring;
493 DPRINTF(sc->dev, "Bad descriptor index: %u contents: %32D\n", idx,
494 (const void *)desc, " ");
497 * TODO Per § 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status,
498 * Zlib Decompress status may be interesting.
502 /* Keep unused descriptors zero for next use. */
503 memset(&qp->desc_ring[idx], 0, sizeof(qp->desc_ring[idx]));
505 cctx = &qp->completions_ring[idx];
508 * Restart procedure described in § 14.2.5. Could be used by HoC if we
511 * Advance HEAD_LO past bad descriptor + any remaining in
512 * transaction manually, then restart queue.
514 idx = (idx + 1) % (1 << sc->ring_size_order);
516 /* Callback function signals end of transaction */
517 if (cctx->callback_fn != NULL) {
521 errno = ec->ce_errno;
522 /* TODO More specific error code */
523 cctx->callback_fn(qp, cctx->session, cctx->callback_arg, errno);
524 cctx->callback_fn = NULL;
530 qp->cq_waiting = false;
531 wakeup(&qp->cq_tail);
532 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
533 ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE,
534 (uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE));
535 ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol);
536 DPRINTF(sc->dev, "%s: Restarted queue\n", __func__);
540 ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints)
542 struct ccp_completion_ctx *cctx;
543 struct ccp_softc *sc;
544 const struct ccp_desc *desc;
545 uint32_t headlo, idx;
546 unsigned q, completed;
551 mtx_lock(&qp->cq_lock);
554 * Hardware HEAD_LO points to the first incomplete descriptor. Process
555 * any submitted and completed descriptors, up to but not including
558 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
559 idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
561 DPRINTF(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx,
564 while (qp->cq_head != idx) {
565 DPRINTF(sc->dev, "%s: completing:%u\n", __func__, qp->cq_head);
567 cctx = &qp->completions_ring[qp->cq_head];
568 if (cctx->callback_fn != NULL) {
569 cctx->callback_fn(qp, cctx->session,
570 cctx->callback_arg, 0);
571 cctx->callback_fn = NULL;
574 /* Keep unused descriptors zero for next use. */
575 memset(&qp->desc_ring[qp->cq_head], 0,
576 sizeof(qp->desc_ring[qp->cq_head]));
578 qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order);
582 qp->cq_waiting = false;
583 wakeup(&qp->cq_tail);
586 DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
589 * Desc points to the first incomplete descriptor, at the time we read
590 * HEAD_LO. If there was an error flagged in interrupt status, the HW
591 * will not proceed past the erroneous descriptor by itself.
593 desc = &qp->desc_ring[idx];
594 if ((ints & INT_ERROR) != 0)
595 ccp_intr_handle_error(qp, desc);
597 mtx_unlock(&qp->cq_lock);
601 ccp_intr_handler(void *arg)
603 struct ccp_softc *sc = arg;
607 DPRINTF(sc->dev, "%s: interrupt\n", __func__);
610 * We get one global interrupt per PCI device, shared over all of
611 * its queues. Scan each valid queue on interrupt for flags indicating
614 for (i = 0; i < nitems(sc->queues); i++) {
615 if ((sc->valid_queues & (1 << i)) == 0)
618 ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE);
623 DPRINTF(sc->dev, "%s: %x interrupts on queue %zu\n", __func__,
626 /* Write back 1s to clear interrupt status bits. */
627 ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints);
630 * If there was an error, we still need to run completions on
631 * any descriptors prior to the error. The completions handler
632 * invoked below will also handle the error descriptor.
634 if ((ints & (INT_COMPLETION | INT_ERROR)) != 0)
635 ccp_intr_run_completions(&sc->queues[i], ints);
637 if ((ints & INT_QUEUE_STOPPED) != 0)
638 device_printf(sc->dev, "%s: queue %zu stopped\n",
642 /* Re-enable interrupts after processing */
643 for (i = 0; i < nitems(sc->queues); i++) {
644 if ((sc->valid_queues & (1 << i)) == 0)
646 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE,
647 INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
652 ccp_intr_filter(void *arg)
654 struct ccp_softc *sc = arg;
657 /* TODO: Split individual queues into separate taskqueues? */
658 for (i = 0; i < nitems(sc->queues); i++) {
659 if ((sc->valid_queues & (1 << i)) == 0)
662 /* Mask interrupt until task completes */
663 ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 0);
666 return (FILTER_SCHEDULE_THREAD);
670 ccp_setup_interrupts(struct ccp_softc *sc)
673 int rid, error, n, ridcopy;
675 n = pci_msix_count(sc->dev);
677 device_printf(sc->dev, "%s: msix_count: %d\n", __func__, n);
682 error = pci_alloc_msix(sc->dev, &nvec);
684 device_printf(sc->dev, "%s: alloc_msix error: %d\n", __func__,
689 device_printf(sc->dev, "%s: alloc_msix: 0 vectors\n",
693 if (nvec > nitems(sc->intr_res)) {
694 device_printf(sc->dev, "%s: too many vectors: %u\n", __func__,
696 nvec = nitems(sc->intr_res);
699 for (rid = 1; rid < 1 + nvec; rid++) {
701 sc->intr_res[rid - 1] = bus_alloc_resource_any(sc->dev,
702 SYS_RES_IRQ, &ridcopy, RF_ACTIVE);
703 if (sc->intr_res[rid - 1] == NULL) {
704 device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n",
709 sc->intr_tag[rid - 1] = NULL;
710 error = bus_setup_intr(sc->dev, sc->intr_res[rid - 1],
711 INTR_MPSAFE | INTR_TYPE_MISC, ccp_intr_filter,
712 ccp_intr_handler, sc, &sc->intr_tag[rid - 1]);
714 device_printf(sc->dev, "%s: setup_intr: %d\n",
717 sc->intr_count = nvec;
723 ccp_release_interrupts(struct ccp_softc *sc)
727 for (i = 0; i < sc->intr_count; i++) {
728 if (sc->intr_tag[i] != NULL)
729 bus_teardown_intr(sc->dev, sc->intr_res[i],
731 if (sc->intr_res[i] != NULL)
732 bus_release_resource(sc->dev, SYS_RES_IRQ,
733 rman_get_rid(sc->intr_res[i]), sc->intr_res[i]);
736 pci_release_msi(sc->dev);
740 ccp_hw_attach(device_t dev)
742 struct ccp_softc *sc;
744 uint32_t version, lsbmasklo, lsbmaskhi;
745 unsigned queue_idx, j;
747 bool bars_mapped, interrupts_setup;
750 bars_mapped = interrupts_setup = false;
751 sc = device_get_softc(dev);
753 error = ccp_map_pci_bar(dev);
755 device_printf(dev, "%s: couldn't map BAR(s)\n", __func__);
760 error = pci_enable_busmaster(dev);
762 device_printf(dev, "%s: couldn't enable busmaster\n",
767 sc->ring_size_order = g_ccp_ring_order;
768 if (sc->ring_size_order < 6 || sc->ring_size_order > 16) {
769 device_printf(dev, "bogus hw.ccp.ring_order\n");
773 sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET);
775 version = ccp_read_4(sc, VERSION_REG);
776 if ((version & VERSION_NUM_MASK) < 5) {
778 "driver supports version 5 and later hardware\n");
783 error = ccp_setup_interrupts(sc);
786 interrupts_setup = true;
788 sc->hw_version = version & VERSION_NUM_MASK;
789 sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) &
791 sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) &
792 VERSION_LSBSIZE_MASK;
793 sc->hw_features = version & VERSION_CAP_MASK;
796 * Copy private LSB mask to public registers to enable access to LSB
797 * from all queues allowed by BIOS.
799 lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET);
800 lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET);
801 ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo);
802 ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi);
804 lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo;
806 for (; queue_idx < nitems(sc->queues); queue_idx++) {
807 error = ccp_hw_attach_queue(dev, lsbmask, queue_idx);
809 device_printf(dev, "%s: couldn't attach queue %u\n",
810 __func__, queue_idx);
814 ccp_assign_lsb_regions(sc, lsbmask);
818 if (interrupts_setup)
819 ccp_release_interrupts(sc);
820 for (j = 0; j < queue_idx; j++)
821 ccp_hw_detach_queue(dev, j);
822 if (sc->ring_size_order != 0)
823 pci_disable_busmaster(dev);
825 ccp_unmap_pci_bar(dev);
831 ccp_hw_detach(device_t dev)
833 struct ccp_softc *sc;
836 sc = device_get_softc(dev);
838 for (i = 0; i < nitems(sc->queues); i++)
839 ccp_hw_detach_queue(dev, i);
841 ccp_release_interrupts(sc);
842 pci_disable_busmaster(dev);
843 ccp_unmap_pci_bar(dev);
846 static int __must_check
847 ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst,
848 enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type,
849 bus_size_t len, enum ccp_passthru_byteswap swapmode,
850 enum ccp_passthru_bitwise bitmode, bool interrupt,
851 const struct ccp_completion_ctx *cctx)
853 struct ccp_desc *desc;
855 if (ccp_queue_get_ring_space(qp) == 0)
858 desc = &qp->desc_ring[qp->cq_tail];
860 memset(desc, 0, sizeof(*desc));
861 desc->engine = CCP_ENGINE_PASSTHRU;
863 desc->pt.ioc = interrupt;
864 desc->pt.byteswap = swapmode;
865 desc->pt.bitwise = bitmode;
868 desc->src_lo = (uint32_t)src;
869 desc->src_hi = src >> 32;
870 desc->src_mem = src_type;
872 desc->dst_lo = (uint32_t)dst;
873 desc->dst_hi = dst >> 32;
874 desc->dst_mem = dst_type;
876 if (bitmode != CCP_PASSTHRU_BITWISE_NOOP)
877 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY);
880 memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx));
882 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
886 static int __must_check
887 ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb,
888 struct sglist *sgl, bus_size_t len, bool interrupt,
889 const struct ccp_completion_ctx *cctx)
891 struct sglist_seg *seg;
892 size_t i, remain, nb;
896 for (i = 0; i < sgl->sg_nseg && remain != 0; i++) {
897 seg = &sgl->sg_segs[i];
898 /* crp lengths are int, so 32-bit min() is ok. */
899 nb = min(remain, seg->ss_len);
902 error = ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB,
903 seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb,
904 CCP_PASSTHRU_BYTESWAP_NOOP,
905 CCP_PASSTHRU_BITWISE_NOOP,
906 (nb == remain) && interrupt, cctx);
908 error = ccp_passthrough(qp, seg->ss_paddr,
909 CCP_MEMTYPE_SYSTEM, lsb_addr, CCP_MEMTYPE_SB, nb,
910 CCP_PASSTHRU_BYTESWAP_NOOP,
911 CCP_PASSTHRU_BITWISE_NOOP,
912 (nb == remain) && interrupt, cctx);
922 * Note that these vectors are in reverse of the usual order.
924 const struct SHA_vectors {
930 } SHA_H __aligned(PAGE_SIZE) = {
962 0x47b5481dbefa4fa4ull,
963 0xdb0c2e0d64f98fa7ull,
964 0x8eb44a8768581511ull,
965 0x67332667ffc00b31ull,
966 0x152fecd8f70e5939ull,
967 0x9159015a3070dd17ull,
968 0x629a292a367cd507ull,
969 0xcbbb9d5dc1059ed8ull,
972 0x5be0cd19137e2179ull,
973 0x1f83d9abfb41bd6bull,
974 0x9b05688c2b3e6c1full,
975 0x510e527fade682d1ull,
976 0xa54ff53a5f1d36f1ull,
977 0x3c6ef372fe94f82bull,
978 0xbb67ae8584caa73bull,
979 0x6a09e667f3bcc908ull,
983 * Ensure vectors do not cross a page boundary.
985 * Disabled due to a new Clang error: "expression is not an integral constant
986 * expression." GCC (cross toolchain) seems to handle this assertion with
987 * _Static_assert just fine.
990 CTASSERT(PAGE_SIZE - ((uintptr_t)&SHA_H % PAGE_SIZE) >= sizeof(SHA_H));
993 const struct SHA_Defn {
994 enum sha_version version;
995 const void *H_vectors;
997 struct auth_hash *axf;
998 enum ccp_sha_type engine_type;
999 } SHA_definitions[] = {
1002 .H_vectors = SHA_H.SHA1,
1003 .H_size = sizeof(SHA_H.SHA1),
1004 .axf = &auth_hash_hmac_sha1,
1005 .engine_type = CCP_SHA_TYPE_1,
1009 .version = SHA2_224,
1010 .H_vectors = SHA_H.SHA224,
1011 .H_size = sizeof(SHA_H.SHA224),
1012 .axf = &auth_hash_hmac_sha2_224,
1013 .engine_type = CCP_SHA_TYPE_224,
1017 .version = SHA2_256,
1018 .H_vectors = SHA_H.SHA256,
1019 .H_size = sizeof(SHA_H.SHA256),
1020 .axf = &auth_hash_hmac_sha2_256,
1021 .engine_type = CCP_SHA_TYPE_256,
1024 .version = SHA2_384,
1025 .H_vectors = SHA_H.SHA384,
1026 .H_size = sizeof(SHA_H.SHA384),
1027 .axf = &auth_hash_hmac_sha2_384,
1028 .engine_type = CCP_SHA_TYPE_384,
1031 .version = SHA2_512,
1032 .H_vectors = SHA_H.SHA512,
1033 .H_size = sizeof(SHA_H.SHA512),
1034 .axf = &auth_hash_hmac_sha2_512,
1035 .engine_type = CCP_SHA_TYPE_512,
1039 static int __must_check
1040 ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn,
1041 vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits)
1043 struct ccp_desc *desc;
1045 if (ccp_queue_get_ring_space(qp) == 0)
1048 desc = &qp->desc_ring[qp->cq_tail];
1050 memset(desc, 0, sizeof(*desc));
1051 desc->engine = CCP_ENGINE_SHA;
1055 desc->sha.type = defn->engine_type;
1059 desc->sha_len_lo = (uint32_t)msgbits;
1060 desc->sha_len_hi = msgbits >> 32;
1063 desc->src_lo = (uint32_t)addr;
1064 desc->src_hi = addr >> 32;
1065 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1067 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA);
1069 qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
1073 static int __must_check
1074 ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src,
1075 struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx)
1077 const struct SHA_Defn *defn;
1078 struct sglist_seg *seg;
1079 size_t i, msgsize, remaining, nb;
1083 for (i = 0; i < nitems(SHA_definitions); i++)
1084 if (SHA_definitions[i].version == version)
1086 if (i == nitems(SHA_definitions))
1088 defn = &SHA_definitions[i];
1090 /* XXX validate input ??? */
1092 /* Load initial SHA state into LSB */
1093 /* XXX ensure H_vectors don't span page boundaries */
1094 error = ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA),
1095 CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors),
1096 CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE),
1097 CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false,
1102 /* Execute series of SHA updates on correctly sized buffers */
1104 for (i = 0; i < sgl_src->sg_nseg; i++) {
1105 seg = &sgl_src->sg_segs[i];
1106 msgsize += seg->ss_len;
1107 error = ccp_sha_single_desc(qp, defn, seg->ss_paddr,
1108 seg->ss_len, i == 0, i == sgl_src->sg_nseg - 1,
1114 /* Copy result out to sgl_dst */
1115 remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE);
1116 lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA);
1117 for (i = 0; i < sgl_dst->sg_nseg; i++) {
1118 seg = &sgl_dst->sg_segs[i];
1119 /* crp lengths are int, so 32-bit min() is ok. */
1120 nb = min(remaining, seg->ss_len);
1122 error = ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM,
1123 lsbaddr, CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP,
1124 CCP_PASSTHRU_BITWISE_NOOP,
1125 (cctx != NULL) ? (nb == remaining) : false,
1126 (nb == remaining) ? cctx : NULL);
1140 byteswap256(uint64_t *buffer)
1144 t = bswap64(buffer[3]);
1145 buffer[3] = bswap64(buffer[0]);
1148 t = bswap64(buffer[2]);
1149 buffer[2] = bswap64(buffer[1]);
1154 * Translate CCP internal LSB hash format into a standard hash ouput.
1156 * Manipulates input buffer with byteswap256 operation.
1159 ccp_sha_copy_result(char *output, char *buffer, enum sha_version version)
1161 const struct SHA_Defn *defn;
1164 for (i = 0; i < nitems(SHA_definitions); i++)
1165 if (SHA_definitions[i].version == version)
1167 if (i == nitems(SHA_definitions))
1168 panic("bogus sha version auth_mode %u\n", (unsigned)version);
1170 defn = &SHA_definitions[i];
1172 /* Swap 256bit manually -- DMA engine can, but with limitations */
1173 byteswap256((void *)buffer);
1174 if (defn->axf->hashsize > LSB_ENTRY_SIZE)
1175 byteswap256((void *)(buffer + LSB_ENTRY_SIZE));
1177 switch (defn->version) {
1179 memcpy(output, buffer + 12, defn->axf->hashsize);
1183 memcpy(output, buffer + XXX, defn->axf->hashsize);
1187 memcpy(output, buffer, defn->axf->hashsize);
1191 buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize,
1192 defn->axf->hashsize - LSB_ENTRY_SIZE);
1193 memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer,
1197 memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE);
1198 memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE);
1204 ccp_do_hmac_done(struct ccp_queue *qp, struct ccp_session *s,
1205 struct cryptop *crp, int error)
1207 char ihash[SHA2_512_HASH_LEN /* max hash len */];
1208 union authctx auth_ctx;
1209 struct auth_hash *axf;
1211 axf = s->hmac.auth_hash;
1216 crp->crp_etype = error;
1220 /* Do remaining outer hash over small inner hash in software */
1221 axf->Init(&auth_ctx);
1222 axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize);
1223 ccp_sha_copy_result(ihash, s->hmac.res, s->hmac.auth_mode);
1225 INSECURE_DEBUG(dev, "%s sha intermediate=%64D\n", __func__,
1226 (u_char *)ihash, " ");
1228 axf->Update(&auth_ctx, ihash, axf->hashsize);
1229 axf->Final(s->hmac.res, &auth_ctx);
1231 if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) {
1232 crypto_copydata(crp, crp->crp_digest_start, s->hmac.hash_len,
1234 if (timingsafe_bcmp(s->hmac.res, ihash, s->hmac.hash_len) != 0)
1235 crp->crp_etype = EBADMSG;
1237 crypto_copyback(crp, crp->crp_digest_start, s->hmac.hash_len,
1240 /* Avoid leaking key material */
1241 explicit_bzero(&auth_ctx, sizeof(auth_ctx));
1242 explicit_bzero(s->hmac.res, sizeof(s->hmac.res));
1249 ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1252 struct cryptop *crp;
1255 ccp_do_hmac_done(qp, s, crp, error);
1258 static int __must_check
1259 ccp_do_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1260 const struct ccp_completion_ctx *cctx)
1263 struct auth_hash *axf;
1266 dev = qp->cq_softc->dev;
1267 axf = s->hmac.auth_hash;
1270 * Populate the SGL describing inside hash contents. We want to hash
1271 * the ipad (key XOR fixed bit pattern) concatenated with the user
1274 sglist_reset(qp->cq_sg_ulptx);
1275 error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize);
1278 if (crp->crp_aad_length != 0) {
1279 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1280 crp->crp_aad_start, crp->crp_aad_length);
1284 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1285 crp->crp_payload_start, crp->crp_payload_length);
1287 DPRINTF(dev, "%s: sglist too short\n", __func__);
1290 /* Populate SGL for output -- use hmac.res buffer. */
1291 sglist_reset(qp->cq_sg_dst);
1292 error = sglist_append(qp->cq_sg_dst, s->hmac.res,
1293 roundup2(axf->hashsize, LSB_ENTRY_SIZE));
1297 error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst,
1300 DPRINTF(dev, "%s: ccp_sha error\n", __func__);
1307 ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1309 struct ccp_completion_ctx ctx;
1311 ctx.callback_fn = ccp_hmac_done;
1312 ctx.callback_arg = crp;
1315 return (ccp_do_hmac(qp, s, crp, &ctx));
1319 ccp_byteswap(char *data, size_t len)
1325 for (i = 0; i < len; i++, len--) {
1327 data[i] = data[len];
1333 ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1336 struct cryptop *crp;
1338 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1345 crp->crp_etype = error;
1347 DPRINTF(qp->cq_softc->dev, "%s: qp=%p crp=%p\n", __func__, qp, crp);
1352 ccp_collect_iv(struct cryptop *crp, const struct crypto_session_params *csp,
1356 if (crp->crp_flags & CRYPTO_F_IV_GENERATE) {
1357 arc4rand(iv, csp->csp_ivlen, 0);
1358 crypto_copyback(crp, crp->crp_iv_start, csp->csp_ivlen, iv);
1359 } else if (crp->crp_flags & CRYPTO_F_IV_SEPARATE)
1360 memcpy(iv, crp->crp_iv, csp->csp_ivlen);
1362 crypto_copydata(crp, crp->crp_iv_start, csp->csp_ivlen, iv);
1365 * If the input IV is 12 bytes, append an explicit counter of 1.
1367 if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16 &&
1368 csp->csp_ivlen == 12)
1369 *(uint32_t *)&iv[12] = htobe32(1);
1371 if (csp->csp_cipher_alg == CRYPTO_AES_XTS &&
1372 csp->csp_ivlen < AES_BLOCK_LEN)
1373 memset(&iv[csp->csp_ivlen], 0, AES_BLOCK_LEN - csp->csp_ivlen);
1375 /* Reverse order of IV material for HW */
1376 INSECURE_DEBUG(NULL, "%s: IV: %16D len: %u\n", __func__, iv, " ",
1380 * For unknown reasons, XTS mode expects the IV in the reverse byte
1381 * order to every other AES mode.
1383 if (csp->csp_cipher_alg != CRYPTO_AES_XTS)
1384 ccp_byteswap(iv, AES_BLOCK_LEN);
1387 static int __must_check
1388 ccp_do_pst_to_lsb(struct ccp_queue *qp, uint32_t lsbaddr, const void *src,
1393 sglist_reset(qp->cq_sg_ulptx);
1394 error = sglist_append(qp->cq_sg_ulptx, __DECONST(void *, src), len);
1398 error = ccp_passthrough_sgl(qp, lsbaddr, true, qp->cq_sg_ulptx, len,
1403 static int __must_check
1404 ccp_do_xts(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1405 enum ccp_cipher_dir dir, const struct ccp_completion_ctx *cctx)
1407 struct ccp_desc *desc;
1410 enum ccp_xts_unitsize usize;
1412 /* IV and Key data are already loaded */
1414 dev = qp->cq_softc->dev;
1416 for (i = 0; i < nitems(ccp_xts_unitsize_map); i++)
1417 if (ccp_xts_unitsize_map[i].cxu_size ==
1418 crp->crp_payload_length) {
1419 usize = ccp_xts_unitsize_map[i].cxu_id;
1422 if (i >= nitems(ccp_xts_unitsize_map))
1425 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1426 struct sglist_seg *seg;
1428 seg = &qp->cq_sg_ulptx->sg_segs[i];
1430 desc = &qp->desc_ring[qp->cq_tail];
1431 desc->engine = CCP_ENGINE_XTS_AES;
1432 desc->som = (i == 0);
1433 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1434 desc->ioc = (desc->eom && cctx != NULL);
1435 DPRINTF(dev, "%s: XTS %u: som:%d eom:%d ioc:%d dir:%d\n",
1436 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1437 (int)desc->ioc, (int)dir);
1440 memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1443 desc->aes_xts.encrypt = dir;
1444 desc->aes_xts.type = s->blkcipher.cipher_type;
1445 desc->aes_xts.size = usize;
1447 DPRINTF(dev, "XXX %s: XTS %u: type:%u size:%u\n", __func__,
1448 qp->cq_tail, (unsigned)desc->aes_xts.type,
1449 (unsigned)desc->aes_xts.size);
1451 desc->length = seg->ss_len;
1452 desc->src_lo = (uint32_t)seg->ss_paddr;
1453 desc->src_hi = (seg->ss_paddr >> 32);
1454 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1456 /* Crypt in-place */
1457 desc->dst_lo = desc->src_lo;
1458 desc->dst_hi = desc->src_hi;
1459 desc->dst_mem = desc->src_mem;
1461 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1463 desc->key_mem = CCP_MEMTYPE_SB;
1465 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1467 qp->cq_tail = (qp->cq_tail + 1) %
1468 (1 << qp->cq_softc->ring_size_order);
1473 static int __must_check
1474 ccp_do_blkcipher(struct ccp_queue *qp, struct ccp_session *s,
1475 struct cryptop *crp, const struct ccp_completion_ctx *cctx)
1477 const struct crypto_session_params *csp;
1478 struct ccp_desc *desc;
1481 enum ccp_cipher_dir dir;
1486 dev = qp->cq_softc->dev;
1488 if (s->blkcipher.key_len == 0 || crp->crp_payload_length == 0) {
1489 DPRINTF(dev, "%s: empty\n", __func__);
1492 if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) {
1493 DPRINTF(dev, "%s: len modulo: %d\n", __func__,
1494 crp->crp_payload_length);
1499 * Individual segments must be multiples of AES block size for the HW
1500 * to process it. Non-compliant inputs aren't bogus, just not doable
1503 for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++)
1504 if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1505 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1506 qp->cq_sg_crp->sg_segs[i].ss_len);
1510 /* Gather IV/nonce data */
1511 csp = crypto_get_params(crp->crp_session);
1512 ccp_collect_iv(crp, csp, s->blkcipher.iv);
1513 iv_len = csp->csp_ivlen;
1514 if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1515 iv_len = AES_BLOCK_LEN;
1517 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1518 dir = CCP_CIPHER_DIR_ENCRYPT;
1520 dir = CCP_CIPHER_DIR_DECRYPT;
1522 /* Set up passthrough op(s) to copy IV into LSB */
1523 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1524 s->blkcipher.iv, iv_len);
1529 * Initialize keydata and keydata_len for GCC. The default case of the
1530 * following switch is impossible to reach, but GCC doesn't know that.
1535 switch (csp->csp_cipher_alg) {
1536 case CRYPTO_AES_XTS:
1537 for (j = 0; j < nitems(ccp_xts_unitsize_map); j++)
1538 if (ccp_xts_unitsize_map[j].cxu_size ==
1539 crp->crp_payload_length)
1541 /* Input buffer must be a supported UnitSize */
1542 if (j >= nitems(ccp_xts_unitsize_map)) {
1543 device_printf(dev, "%s: rejected block size: %u\n",
1544 __func__, crp->crp_payload_length);
1545 return (EOPNOTSUPP);
1548 case CRYPTO_AES_CBC:
1549 case CRYPTO_AES_ICM:
1550 keydata = s->blkcipher.enckey;
1551 keydata_len = s->blkcipher.key_len;
1555 INSECURE_DEBUG(dev, "%s: KEY(%zu): %16D\n", __func__, keydata_len,
1557 if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1558 INSECURE_DEBUG(dev, "%s: KEY(XTS): %64D\n", __func__, keydata, " ");
1560 /* Reverse order of key material for HW */
1561 ccp_byteswap(keydata, keydata_len);
1563 /* Store key material into LSB to avoid page boundaries */
1564 if (csp->csp_cipher_alg == CRYPTO_AES_XTS) {
1566 * XTS mode uses 2 256-bit vectors for the primary key and the
1567 * tweak key. For 128-bit keys, the vectors are zero-padded.
1569 * After byteswapping the combined OCF-provided K1:K2 vector
1570 * above, we need to reverse the order again so the hardware
1571 * gets the swapped keys in the order K1':K2'.
1573 error = ccp_do_pst_to_lsb(qp,
1574 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1), keydata,
1578 error = ccp_do_pst_to_lsb(qp,
1579 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1580 keydata + (keydata_len / 2), keydata_len / 2);
1582 /* Zero-pad 128 bit keys */
1583 if (keydata_len == 32) {
1586 error = ccp_do_pst_to_lsb(qp,
1587 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY) +
1588 keydata_len / 2, g_zeroes, keydata_len / 2);
1591 error = ccp_do_pst_to_lsb(qp,
1592 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1) +
1593 keydata_len / 2, g_zeroes, keydata_len / 2);
1596 error = ccp_do_pst_to_lsb(qp,
1597 ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), keydata,
1603 * Point SGLs at the subset of cryptop buffer contents representing the
1606 sglist_reset(qp->cq_sg_ulptx);
1607 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1608 crp->crp_payload_start, crp->crp_payload_length);
1612 INSECURE_DEBUG(dev, "%s: Contents: %16D\n", __func__,
1613 (void *)PHYS_TO_DMAP(qp->cq_sg_ulptx->sg_segs[0].ss_paddr), " ");
1615 DPRINTF(dev, "%s: starting AES ops @ %u\n", __func__, qp->cq_tail);
1617 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1620 if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1621 return (ccp_do_xts(qp, s, crp, dir, cctx));
1623 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1624 struct sglist_seg *seg;
1626 seg = &qp->cq_sg_ulptx->sg_segs[i];
1628 desc = &qp->desc_ring[qp->cq_tail];
1629 desc->engine = CCP_ENGINE_AES;
1630 desc->som = (i == 0);
1631 desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1632 desc->ioc = (desc->eom && cctx != NULL);
1633 DPRINTF(dev, "%s: AES %u: som:%d eom:%d ioc:%d dir:%d\n",
1634 __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1635 (int)desc->ioc, (int)dir);
1638 memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1641 desc->aes.encrypt = dir;
1642 desc->aes.mode = s->blkcipher.cipher_mode;
1643 desc->aes.type = s->blkcipher.cipher_type;
1644 if (csp->csp_cipher_alg == CRYPTO_AES_ICM)
1646 * Size of CTR value in bits, - 1. ICM mode uses all
1647 * 128 bits as counter.
1649 desc->aes.size = 127;
1651 DPRINTF(dev, "%s: AES %u: mode:%u type:%u size:%u\n", __func__,
1652 qp->cq_tail, (unsigned)desc->aes.mode,
1653 (unsigned)desc->aes.type, (unsigned)desc->aes.size);
1655 desc->length = seg->ss_len;
1656 desc->src_lo = (uint32_t)seg->ss_paddr;
1657 desc->src_hi = (seg->ss_paddr >> 32);
1658 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1660 /* Crypt in-place */
1661 desc->dst_lo = desc->src_lo;
1662 desc->dst_hi = desc->src_hi;
1663 desc->dst_mem = desc->src_mem;
1665 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1667 desc->key_mem = CCP_MEMTYPE_SB;
1669 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1671 qp->cq_tail = (qp->cq_tail + 1) %
1672 (1 << qp->cq_softc->ring_size_order);
1678 ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1680 struct ccp_completion_ctx ctx;
1682 ctx.callback_fn = ccp_blkcipher_done;
1684 ctx.callback_arg = crp;
1686 return (ccp_do_blkcipher(qp, s, crp, &ctx));
1690 ccp_authenc_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1693 struct cryptop *crp;
1695 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1699 ccp_do_hmac_done(qp, s, crp, error);
1703 ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1705 struct ccp_completion_ctx ctx;
1708 ctx.callback_fn = ccp_authenc_done;
1710 ctx.callback_arg = crp;
1712 /* Perform first operation */
1713 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1714 error = ccp_do_blkcipher(qp, s, crp, NULL);
1716 error = ccp_do_hmac(qp, s, crp, NULL);
1720 /* Perform second operation */
1721 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1722 error = ccp_do_hmac(qp, s, crp, &ctx);
1724 error = ccp_do_blkcipher(qp, s, crp, &ctx);
1728 static int __must_check
1729 ccp_do_ghash_aad(struct ccp_queue *qp, struct ccp_session *s)
1731 struct ccp_desc *desc;
1732 struct sglist_seg *seg;
1735 if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1738 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1739 seg = &qp->cq_sg_ulptx->sg_segs[i];
1741 desc = &qp->desc_ring[qp->cq_tail];
1743 desc->engine = CCP_ENGINE_AES;
1744 desc->aes.mode = CCP_AES_MODE_GHASH;
1745 desc->aes.type = s->blkcipher.cipher_type;
1746 desc->aes.encrypt = CCP_AES_MODE_GHASH_AAD;
1748 desc->som = (i == 0);
1749 desc->length = seg->ss_len;
1751 desc->src_lo = (uint32_t)seg->ss_paddr;
1752 desc->src_hi = (seg->ss_paddr >> 32);
1753 desc->src_mem = CCP_MEMTYPE_SYSTEM;
1755 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1757 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1758 desc->key_mem = CCP_MEMTYPE_SB;
1760 qp->cq_tail = (qp->cq_tail + 1) %
1761 (1 << qp->cq_softc->ring_size_order);
1766 static int __must_check
1767 ccp_do_gctr(struct ccp_queue *qp, struct ccp_session *s,
1768 enum ccp_cipher_dir dir, struct sglist_seg *seg, bool som, bool eom)
1770 struct ccp_desc *desc;
1772 if (ccp_queue_get_ring_space(qp) == 0)
1775 desc = &qp->desc_ring[qp->cq_tail];
1777 desc->engine = CCP_ENGINE_AES;
1778 desc->aes.mode = CCP_AES_MODE_GCTR;
1779 desc->aes.type = s->blkcipher.cipher_type;
1780 desc->aes.encrypt = dir;
1781 desc->aes.size = 8 * (seg->ss_len % GMAC_BLOCK_LEN) - 1;
1786 /* Trailing bytes will be masked off by aes.size above. */
1787 desc->length = roundup2(seg->ss_len, GMAC_BLOCK_LEN);
1789 desc->dst_lo = desc->src_lo = (uint32_t)seg->ss_paddr;
1790 desc->dst_hi = desc->src_hi = seg->ss_paddr >> 32;
1791 desc->dst_mem = desc->src_mem = CCP_MEMTYPE_SYSTEM;
1793 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1795 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1796 desc->key_mem = CCP_MEMTYPE_SB;
1798 qp->cq_tail = (qp->cq_tail + 1) %
1799 (1 << qp->cq_softc->ring_size_order);
1803 static int __must_check
1804 ccp_do_ghash_final(struct ccp_queue *qp, struct ccp_session *s)
1806 struct ccp_desc *desc;
1808 if (ccp_queue_get_ring_space(qp) == 0)
1811 desc = &qp->desc_ring[qp->cq_tail];
1813 desc->engine = CCP_ENGINE_AES;
1814 desc->aes.mode = CCP_AES_MODE_GHASH;
1815 desc->aes.type = s->blkcipher.cipher_type;
1816 desc->aes.encrypt = CCP_AES_MODE_GHASH_FINAL;
1818 desc->length = GMAC_BLOCK_LEN;
1820 desc->src_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN);
1821 desc->src_mem = CCP_MEMTYPE_SB;
1823 desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1825 desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1826 desc->key_mem = CCP_MEMTYPE_SB;
1828 desc->dst_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH);
1829 desc->dst_mem = CCP_MEMTYPE_SB;
1831 qp->cq_tail = (qp->cq_tail + 1) %
1832 (1 << qp->cq_softc->ring_size_order);
1837 ccp_gcm_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1840 char tag[GMAC_DIGEST_LEN];
1841 struct cryptop *crp;
1848 crp->crp_etype = error;
1852 /* Encrypt is done. Decrypt needs to verify tag. */
1853 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1856 /* Copy in message tag. */
1857 crypto_copydata(crp, crp->crp_digest_start, s->gmac.hash_len, tag);
1859 /* Verify tag against computed GMAC */
1860 if (timingsafe_bcmp(tag, s->gmac.final_block, s->gmac.hash_len) != 0)
1861 crp->crp_etype = EBADMSG;
1864 explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1865 explicit_bzero(&s->gmac.final_block, sizeof(s->gmac.final_block));
1870 ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1872 const struct crypto_session_params *csp;
1873 struct ccp_completion_ctx ctx;
1874 enum ccp_cipher_dir dir;
1879 if (s->blkcipher.key_len == 0)
1882 dev = qp->cq_softc->dev;
1884 if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1885 dir = CCP_CIPHER_DIR_ENCRYPT;
1887 dir = CCP_CIPHER_DIR_DECRYPT;
1889 /* Zero initial GHASH portion of context */
1890 memset(s->blkcipher.iv, 0, sizeof(s->blkcipher.iv));
1892 /* Gather IV data */
1893 csp = crypto_get_params(crp->crp_session);
1894 ccp_collect_iv(crp, csp, s->blkcipher.iv);
1896 /* Reverse order of key material for HW */
1897 ccp_byteswap(s->blkcipher.enckey, s->blkcipher.key_len);
1899 /* Prepare input buffer of concatenated lengths for final GHASH */
1900 be64enc(s->gmac.final_block, (uint64_t)crp->crp_aad_length * 8);
1901 be64enc(&s->gmac.final_block[8], (uint64_t)crp->crp_payload_length * 8);
1903 /* Send IV + initial zero GHASH, key data, and lengths buffer to LSB */
1904 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1905 s->blkcipher.iv, 32);
1908 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1909 s->blkcipher.enckey, s->blkcipher.key_len);
1912 error = ccp_do_pst_to_lsb(qp,
1913 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN), s->gmac.final_block,
1918 /* First step - compute GHASH over AAD */
1919 if (crp->crp_aad_length != 0) {
1920 sglist_reset(qp->cq_sg_ulptx);
1921 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1922 crp->crp_aad_start, crp->crp_aad_length);
1926 /* This engine cannot process non-block multiple AAD data. */
1927 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1928 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len %
1929 GMAC_BLOCK_LEN) != 0) {
1930 DPRINTF(dev, "%s: AD seg modulo: %zu\n",
1932 qp->cq_sg_ulptx->sg_segs[i].ss_len);
1936 error = ccp_do_ghash_aad(qp, s);
1941 /* Feed data piece by piece into GCTR */
1942 sglist_reset(qp->cq_sg_ulptx);
1943 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1944 crp->crp_payload_start, crp->crp_payload_length);
1949 * All segments except the last must be even multiples of AES block
1950 * size for the HW to process it. Non-compliant inputs aren't bogus,
1951 * just not doable on this hardware.
1953 * XXX: Well, the hardware will produce a valid tag for shorter final
1954 * segment inputs, but it will still write out a block-sized plaintext
1955 * or ciphertext chunk. For a typical CRP this tramples trailing data,
1956 * including the provided message tag. So, reject such inputs for now.
1958 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1959 if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1960 DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1961 qp->cq_sg_ulptx->sg_segs[i].ss_len);
1965 for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1966 struct sglist_seg *seg;
1968 seg = &qp->cq_sg_ulptx->sg_segs[i];
1969 error = ccp_do_gctr(qp, s, dir, seg,
1970 (i == 0 && crp->crp_aad_length == 0),
1971 i == (qp->cq_sg_ulptx->sg_nseg - 1));
1976 /* Send just initial IV (not GHASH!) to LSB again */
1977 error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1978 s->blkcipher.iv, AES_BLOCK_LEN);
1982 ctx.callback_fn = ccp_gcm_done;
1984 ctx.callback_arg = crp;
1986 /* Compute final hash and copy result back */
1987 error = ccp_do_ghash_final(qp, s);
1991 /* When encrypting, copy computed tag out to caller buffer. */
1992 sglist_reset(qp->cq_sg_ulptx);
1993 if (dir == CCP_CIPHER_DIR_ENCRYPT)
1994 error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1995 crp->crp_digest_start, s->gmac.hash_len);
1998 * For decrypting, copy the computed tag out to our session
1999 * buffer to verify in our callback.
2001 error = sglist_append(qp->cq_sg_ulptx, s->gmac.final_block,
2005 error = ccp_passthrough_sgl(qp,
2006 ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH), false, qp->cq_sg_ulptx,
2007 s->gmac.hash_len, true, &ctx);
2011 #define MAX_TRNG_RETRIES 10
2013 random_ccp_read(void *v, u_int c)
2018 KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c));
2021 for (i = c; i > 0; i -= sizeof(*buf)) {
2022 for (j = 0; j < MAX_TRNG_RETRIES; j++) {
2023 *buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET);
2027 if (j == MAX_TRNG_RETRIES)
2037 db_ccp_show_hw(struct ccp_softc *sc)
2040 db_printf(" queue mask: 0x%x\n",
2041 ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET));
2042 db_printf(" queue prio: 0x%x\n",
2043 ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET));
2044 db_printf(" reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET));
2045 db_printf(" trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET));
2046 db_printf(" cmd timeout: 0x%x\n",
2047 ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET));
2048 db_printf(" lsb public mask lo: 0x%x\n",
2049 ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET));
2050 db_printf(" lsb public mask hi: 0x%x\n",
2051 ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET));
2052 db_printf(" lsb private mask lo: 0x%x\n",
2053 ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET));
2054 db_printf(" lsb private mask hi: 0x%x\n",
2055 ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET));
2056 db_printf(" version: 0x%x\n", ccp_read_4(sc, VERSION_REG));
2060 db_ccp_show_queue_hw(struct ccp_queue *qp)
2062 const struct ccp_error_code *ec;
2063 struct ccp_softc *sc;
2064 uint32_t status, error, esource, faultblock, headlo, qcontrol;
2070 qcontrol = ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE);
2071 db_printf(" qcontrol: 0x%x%s%s\n", qcontrol,
2072 (qcontrol & CMD_Q_RUN) ? " RUN" : "",
2073 (qcontrol & CMD_Q_HALTED) ? " HALTED" : "");
2074 db_printf(" tail_lo: 0x%x\n",
2075 ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE));
2076 headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
2077 db_printf(" head_lo: 0x%x\n", headlo);
2078 db_printf(" int enable: 0x%x\n",
2079 ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE));
2080 db_printf(" interrupt status: 0x%x\n",
2081 ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE));
2082 status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
2083 db_printf(" status: 0x%x\n", status);
2084 db_printf(" int stats: 0x%x\n",
2085 ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE));
2087 error = status & STATUS_ERROR_MASK;
2091 esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
2092 STATUS_ERRORSOURCE_MASK;
2093 faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
2094 STATUS_VLSB_FAULTBLOCK_MASK;
2097 for (i = 0; i < nitems(ccp_error_codes); i++)
2098 if (ccp_error_codes[i].ce_code == error)
2100 if (i < nitems(ccp_error_codes))
2101 ec = &ccp_error_codes[i];
2103 db_printf(" Error: %s (%u) Source: %u Faulting LSB block: %u\n",
2104 (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
2107 db_printf(" Error description: %s\n", ec->ce_desc);
2109 i = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
2110 db_printf(" Bad descriptor idx: %u contents:\n %32D\n", i,
2111 (void *)&qp->desc_ring[i], " ");