2 * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <linux/kref.h>
30 #include <linux/random.h>
32 #include <linux/delay.h>
33 #include <rdma/ib_umem.h>
36 CTASSERT((uintptr_t)PAGE_MASK > (uintptr_t)PAGE_SIZE);
39 MAX_PENDING_REG_MR = 8,
40 MAX_MR_RELEASE_TIMEOUT = (60 * 20) /* Allow release timeout up to 20 min */
43 #define MLX5_UMR_ALIGN 2048
45 static int mlx5_mr_sysfs_init(struct mlx5_ib_dev *dev);
46 static void mlx5_mr_sysfs_cleanup(struct mlx5_ib_dev *dev);
48 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
50 int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
55 static int order2idx(struct mlx5_ib_dev *dev, int order)
57 struct mlx5_mr_cache *cache = &dev->cache;
59 if (order < cache->ent[0].order)
62 return order - cache->ent[0].order;
65 static void reg_mr_callback(int status, void *context)
67 struct mlx5_ib_mr *mr = context;
68 struct mlx5_ib_dev *dev = mr->dev;
69 struct mlx5_mr_cache *cache = &dev->cache;
70 int c = order2idx(dev, mr->order);
71 struct mlx5_cache_ent *ent = &cache->ent[c];
72 struct mlx5_core_dev *mdev = dev->mdev;
73 struct mlx5_core_mr *mmr = &mr->mmr;
74 struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
79 spin_lock_irqsave(&ent->lock, flags);
81 spin_unlock_irqrestore(&ent->lock, flags);
83 mlx5_ib_warn(dev, "async reg mr failed. status %d, order %d\n", status, ent->order);
86 mod_timer(&dev->delay_timer, jiffies + HZ);
90 if (mr->out.hdr.status) {
91 mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
93 be32_to_cpu(mr->out.hdr.syndrome));
96 mod_timer(&dev->delay_timer, jiffies + HZ);
100 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
101 key = dev->mdev->priv.mkey_key++;
102 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
103 mmr->key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
104 mlx5_ib_dbg(dev, "callbacked mkey 0x%x created\n",
105 be32_to_cpu(mr->out.mkey));
107 cache->last_add = jiffies;
109 spin_lock_irqsave(&ent->lock, flags);
110 list_add_tail(&mr->list, &ent->head);
113 spin_unlock_irqrestore(&ent->lock, flags);
115 spin_lock_irqsave(&table->lock, flags);
116 err = radix_tree_insert(&table->tree, mlx5_mkey_to_idx(mmr->key), mmr);
117 spin_unlock_irqrestore(&table->lock, flags);
119 mlx5_ib_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n",
121 mlx5_core_destroy_mkey(mdev, mmr);
125 static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
127 struct mlx5_mr_cache *cache = &dev->cache;
128 struct mlx5_cache_ent *ent = &cache->ent[c];
129 struct mlx5_create_mkey_mbox_in *in;
130 struct mlx5_ib_mr *mr;
131 int npages = 1 << ent->order;
135 in = kzalloc(sizeof(*in), GFP_KERNEL);
139 for (i = 0; i < num; i++) {
140 if (ent->pending >= MAX_PENDING_REG_MR) {
145 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
150 mr->order = ent->order;
153 in->seg.status = MLX5_MKEY_STATUS_FREE;
154 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
155 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
156 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
157 in->seg.log2_page_size = 12;
159 spin_lock_irq(&ent->lock);
161 spin_unlock_irq(&ent->lock);
162 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in,
163 sizeof(*in), reg_mr_callback,
166 spin_lock_irq(&ent->lock);
168 spin_unlock_irq(&ent->lock);
169 mlx5_ib_warn(dev, "create mkey failed %d\n", err);
179 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
181 struct mlx5_mr_cache *cache = &dev->cache;
182 struct mlx5_cache_ent *ent = &cache->ent[c];
183 struct mlx5_ib_mr *mr;
187 for (i = 0; i < num; i++) {
188 spin_lock_irq(&ent->lock);
189 if (list_empty(&ent->head)) {
190 spin_unlock_irq(&ent->lock);
193 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
197 spin_unlock_irq(&ent->lock);
198 err = destroy_mkey(dev, mr);
200 mlx5_ib_warn(dev, "failed destroy mkey\n");
206 static int someone_adding(struct mlx5_mr_cache *cache)
210 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
211 if (cache->ent[i].cur < cache->ent[i].limit)
218 static int someone_releasing(struct mlx5_mr_cache *cache)
222 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
223 if (cache->ent[i].cur > 2 * cache->ent[i].limit)
230 static void __cache_work_func(struct mlx5_cache_ent *ent)
232 struct mlx5_ib_dev *dev = ent->dev;
233 struct mlx5_mr_cache *cache = &dev->cache;
234 int i = order2idx(dev, ent->order);
241 ent = &dev->cache.ent[i];
242 if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
243 err = add_keys(dev, i, 1);
244 if (ent->cur < 2 * ent->limit) {
245 if (err == -EAGAIN) {
246 mlx5_ib_dbg(dev, "returned eagain, order %d\n",
248 cancel_delayed_work(&ent->dwork);
249 if (!queue_delayed_work(cache->wq, &ent->dwork,
250 msecs_to_jiffies(3)))
251 mlx5_ib_warn(dev, "failed queueing delayed work\n");
253 mlx5_ib_warn(dev, "command failed order %d, err %d\n",
255 cancel_delayed_work(&ent->dwork);
256 if (!queue_delayed_work(cache->wq, &ent->dwork,
257 msecs_to_jiffies(1000)))
258 mlx5_ib_warn(dev, "failed queueing delayed work\n");
260 if (!queue_work(cache->wq, &ent->work))
261 mlx5_ib_warn(dev, "failed queueing work\n");
264 } else if (ent->cur > 2 * ent->limit) {
265 dtime = (cache->last_add + (s64)cache->rel_timeout * HZ) - jiffies;
266 if (cache->rel_imm ||
267 (cache->rel_timeout >= 0 && !someone_adding(cache) && dtime <= 0)) {
268 remove_keys(dev, i, 1);
269 if (ent->cur > ent->limit)
270 if (!queue_work(cache->wq, &ent->work))
271 mlx5_ib_warn(dev, "failed queueing work\n");
272 } else if (cache->rel_timeout >= 0) {
273 dtime = max_t(s64, dtime, 0);
274 dtime = min_t(s64, dtime, (MAX_MR_RELEASE_TIMEOUT * HZ));
275 cancel_delayed_work(&ent->dwork);
276 if (!queue_delayed_work(cache->wq, &ent->dwork, dtime))
277 mlx5_ib_warn(dev, "failed queueing delayed work\n");
279 } else if (cache->rel_imm && !someone_releasing(cache)) {
284 static void delayed_cache_work_func(struct work_struct *work)
286 struct mlx5_cache_ent *ent;
288 ent = container_of(work, struct mlx5_cache_ent, dwork.work);
289 __cache_work_func(ent);
292 static void cache_work_func(struct work_struct *work)
294 struct mlx5_cache_ent *ent;
296 ent = container_of(work, struct mlx5_cache_ent, work);
297 __cache_work_func(ent);
300 static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
302 struct mlx5_mr_cache *cache = &dev->cache;
303 struct mlx5_cache_ent *ent;
307 c = order2idx(dev, mr->order);
308 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
309 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
312 ent = &cache->ent[c];
313 spin_lock_irq(&ent->lock);
314 list_add_tail(&mr->list, &ent->head);
316 if (ent->cur > 2 * ent->limit)
318 spin_unlock_irq(&ent->lock);
321 if (!queue_work(cache->wq, &ent->work))
322 mlx5_ib_warn(dev, "failed queueing work\n");
325 static void clean_keys(struct mlx5_ib_dev *dev, int c)
327 struct mlx5_mr_cache *cache = &dev->cache;
328 struct mlx5_cache_ent *ent = &cache->ent[c];
329 struct mlx5_ib_mr *mr;
332 cancel_delayed_work(&ent->dwork);
334 spin_lock_irq(&ent->lock);
335 if (list_empty(&ent->head)) {
336 spin_unlock_irq(&ent->lock);
339 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
343 spin_unlock_irq(&ent->lock);
344 err = destroy_mkey(dev, mr);
346 mlx5_ib_warn(dev, "failed destroy mkey 0x%x from order %d\n",
347 mr->mmr.key, ent->order);
353 static void delay_time_func(unsigned long ctx)
355 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
361 MLX5_VF_MR_LIMIT = 2,
364 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
366 struct mlx5_mr_cache *cache = &dev->cache;
367 struct mlx5_cache_ent *ent;
372 mutex_init(&dev->slow_path_mutex);
373 cache->rel_timeout = 300;
374 cache->wq = create_singlethread_workqueue("mkey_cache");
376 mlx5_ib_warn(dev, "failed to create work queue\n");
380 setup_timer(&dev->delay_timer, delay_time_func, (uintptr_t)dev);
381 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
382 INIT_LIST_HEAD(&cache->ent[i].head);
383 spin_lock_init(&cache->ent[i].lock);
385 ent = &cache->ent[i];
386 INIT_LIST_HEAD(&ent->head);
387 spin_lock_init(&ent->lock);
391 if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) {
392 if (mlx5_core_is_pf(dev->mdev))
393 limit = dev->mdev->profile->mr_cache[i].limit;
395 limit = MLX5_VF_MR_LIMIT;
400 INIT_WORK(&ent->work, cache_work_func);
401 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
403 if (!queue_work(cache->wq, &ent->work))
404 mlx5_ib_warn(dev, "failed queueing work\n");
407 err = mlx5_mr_sysfs_init(dev);
409 mlx5_ib_warn(dev, "failed to init mr cache sysfs\n");
414 static void wait_for_async_commands(struct mlx5_ib_dev *dev)
416 struct mlx5_mr_cache *cache = &dev->cache;
417 struct mlx5_cache_ent *ent;
422 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
423 ent = &cache->ent[i];
424 for (j = 0 ; j < 1000; j++) {
430 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
431 ent = &cache->ent[i];
432 total += ent->pending;
436 mlx5_ib_dbg(dev, "aborted, %d pending requests\n", total);
438 mlx5_ib_dbg(dev, "done with all pending requests\n");
441 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
445 dev->cache.stopped = 1;
446 flush_workqueue(dev->cache.wq);
447 mlx5_mr_sysfs_cleanup(dev);
449 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
452 destroy_workqueue(dev->cache.wq);
453 wait_for_async_commands(dev);
454 del_timer_sync(&dev->delay_timer);
458 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
460 struct mlx5_ib_dev *dev = to_mdev(pd->device);
461 struct mlx5_core_dev *mdev = dev->mdev;
462 struct mlx5_create_mkey_mbox_in *in;
463 struct mlx5_mkey_seg *seg;
464 struct mlx5_ib_mr *mr;
467 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
469 return ERR_PTR(-ENOMEM);
471 in = kzalloc(sizeof(*in), GFP_KERNEL);
478 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
479 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
480 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
483 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
489 mr->ibmr.lkey = mr->mmr.key;
490 mr->ibmr.rkey = mr->mmr.key;
504 static int get_octo_len(u64 addr, u64 len, u64 page_size)
509 offset = addr & (page_size - 1ULL);
510 npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
511 return (npages + 1) / 2;
514 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
516 struct mlx5_ib_umr_context *context;
521 err = ib_poll_cq(cq, 1, &wc);
523 printf("mlx5_ib: WARN: ""poll cq error %d\n", err);
529 context = (struct mlx5_ib_umr_context *)(uintptr_t)wc.wr_id;
530 context->status = wc.status;
531 complete(&context->done);
533 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
536 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
537 u64 length, struct ib_umem *umem,
538 int npages, int page_shift,
541 struct mlx5_ib_dev *dev = to_mdev(pd->device);
542 struct mlx5_create_mkey_mbox_in *in;
543 struct mlx5_ib_mr *mr;
546 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
548 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
550 return ERR_PTR(-ENOMEM);
552 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
553 in = mlx5_vzalloc(inlen);
558 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
559 pg_cap ? MLX5_IB_MTT_PRESENT : 0);
561 /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
562 * in the page list submitted with the command. */
563 in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
564 in->seg.flags = convert_access(access_flags) |
565 MLX5_ACCESS_MODE_MTT;
566 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
567 in->seg.start_addr = cpu_to_be64(virt_addr);
568 in->seg.len = cpu_to_be64(length);
569 in->seg.bsfs_octo_size = 0;
570 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
571 in->seg.log2_page_size = page_shift;
572 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
573 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
575 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL,
578 mlx5_ib_warn(dev, "create mkey failed\n");
585 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
599 MLX5_MAX_REG_ORDER = MAX_MR_CACHE_ENTRIES + 1,
600 MLX5_MAX_REG_SIZE = 2ul * 1024 * 1024 * 1024,
603 static int clean_mr(struct mlx5_ib_mr *mr)
605 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
606 int umred = mr->umred;
611 for (i = 0; i < mr->nchild; ++i) {
612 free_cached_mr(dev, mr->children[i]);
616 err = destroy_mkey(dev, mr);
618 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
626 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
627 u64 virt_addr, int access_flags,
628 struct ib_udata *udata, int mr_id)
630 struct mlx5_ib_dev *dev = to_mdev(pd->device);
631 struct mlx5_ib_mr *mr = NULL;
632 struct ib_umem *umem;
639 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
640 (unsigned long long)start, (unsigned long long)virt_addr,
641 (unsigned long long)length, access_flags);
642 umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 0);
644 mlx5_ib_warn(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
648 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
650 mlx5_ib_warn(dev, "avoid zero region\n");
655 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
656 npages, ncont, order, page_shift);
658 mutex_lock(&dev->slow_path_mutex);
659 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, access_flags);
660 mutex_unlock(&dev->slow_path_mutex);
668 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
672 atomic_add(npages, &dev->mdev->priv.reg_pages);
673 mr->ibmr.lkey = mr->mmr.key;
674 mr->ibmr.rkey = mr->mmr.key;
680 * Destroy the umem *before* destroying the MR, to ensure we
681 * will not have any in-flight notifiers when destroying the
684 * As the MR is completely invalid to begin with, and this
685 * error path is only taken if we can't push the mr entry into
686 * the pagefault tree, this is safe.
689 ib_umem_release(umem);
693 CTASSERT(sizeof(((struct ib_phys_buf *)0)->size) == 8);
696 mlx5_ib_reg_phys_mr(struct ib_pd *pd,
697 struct ib_phys_buf *buffer_list,
702 struct mlx5_ib_dev *dev = to_mdev(pd->device);
703 struct mlx5_create_mkey_mbox_in *in;
704 struct mlx5_ib_mr *mr;
707 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
715 mask = buffer_list[0].addr ^ *virt_addr;
717 for (i = 0; i < num_phys_buf; ++i) {
719 mask |= buffer_list[i].addr;
720 if (i != num_phys_buf - 1)
721 mask |= buffer_list[i].addr + buffer_list[i].size;
723 total_size += buffer_list[i].size;
726 if (mask & ~PAGE_MASK)
727 return ERR_PTR(-EINVAL);
729 shift = __ffs(mask | 1 << 31);
731 buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
732 buffer_list[0].addr &= ~0ULL << shift;
735 for (i = 0; i < num_phys_buf; ++i)
736 npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
739 mlx5_ib_warn(dev, "avoid zero region\n");
740 return ERR_PTR(-EINVAL);
743 mr = kzalloc(sizeof *mr, GFP_KERNEL);
745 return ERR_PTR(-ENOMEM);
747 octo_len = get_octo_len(*virt_addr, total_size, 1ULL << shift);
748 octo_len = ALIGN(octo_len, 4);
750 inlen = sizeof(*in) + (octo_len * 16);
751 in = mlx5_vzalloc(inlen);
754 return ERR_PTR(-ENOMEM);
758 for (i = 0; i < num_phys_buf; ++i) {
760 j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
762 u64 temp = buffer_list[i].addr + ((u64) j << shift);
764 temp |= MLX5_IB_MTT_PRESENT;
765 in->pas[n++] = cpu_to_be64(temp);
769 /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
770 * in the page list submitted with the command. */
771 in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
772 in->seg.flags = convert_access(access_flags) |
773 MLX5_ACCESS_MODE_MTT;
774 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
775 in->seg.start_addr = cpu_to_be64(*virt_addr);
776 in->seg.len = cpu_to_be64(total_size);
777 in->seg.bsfs_octo_size = 0;
778 in->seg.xlt_oct_size = cpu_to_be32(octo_len);
779 in->seg.log2_page_size = shift;
780 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
781 in->xlat_oct_act_size = cpu_to_be32(octo_len);
782 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL,
787 mr->ibmr.lkey = mr->mmr.key;
788 mr->ibmr.rkey = mr->mmr.key;
799 int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
801 struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
802 struct mlx5_ib_mr *mr = to_mmr(ibmr);
803 struct ib_umem *umem = mr->umem;
804 int npages = mr->npages;
805 int umred = mr->umred;
813 ib_umem_release(umem);
814 atomic_sub(npages, &dev->mdev->priv.reg_pages);
818 free_cached_mr(dev, mr);
825 int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
827 struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
828 struct mlx5_ib_mr *mr = to_mmr(ibmr);
832 if (mlx5_core_destroy_psv(dev->mdev,
833 mr->sig->psv_memory.psv_idx))
834 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
835 mr->sig->psv_memory.psv_idx);
836 if (mlx5_core_destroy_psv(dev->mdev,
837 mr->sig->psv_wire.psv_idx))
838 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
839 mr->sig->psv_wire.psv_idx);
843 err = destroy_mkey(dev, mr);
845 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
855 struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
856 int max_page_list_len)
858 struct mlx5_ib_dev *dev = to_mdev(pd->device);
859 struct mlx5_create_mkey_mbox_in *in;
860 struct mlx5_ib_mr *mr;
863 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
865 return ERR_PTR(-ENOMEM);
867 in = kzalloc(sizeof(*in), GFP_KERNEL);
873 in->seg.status = MLX5_MKEY_STATUS_FREE;
874 in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
875 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
876 in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
877 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
879 * TBD not needed - issue 197292 */
880 in->seg.log2_page_size = PAGE_SHIFT;
882 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
886 mlx5_ib_warn(dev, "failed create mkey\n");
890 mr->ibmr.lkey = mr->mmr.key;
891 mr->ibmr.rkey = mr->mmr.key;
901 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
904 struct mlx5_ib_fast_reg_page_list *mfrpl;
905 int size = page_list_len * sizeof(u64);
907 mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
909 return ERR_PTR(-ENOMEM);
911 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
912 if (!mfrpl->ibfrpl.page_list)
915 mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
918 if (!mfrpl->mapped_page_list)
921 WARN_ON(mfrpl->map & 0x3f);
923 return &mfrpl->ibfrpl;
926 kfree(mfrpl->ibfrpl.page_list);
928 return ERR_PTR(-ENOMEM);
931 void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
933 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
934 struct mlx5_ib_dev *dev = to_mdev(page_list->device);
935 int size = page_list->max_page_list_len * sizeof(u64);
937 dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list,
939 kfree(mfrpl->ibfrpl.page_list);
943 struct order_attribute {
944 struct attribute attr;
945 ssize_t (*show)(struct cache_order *, struct order_attribute *, char *buf);
946 ssize_t (*store)(struct cache_order *, struct order_attribute *,
947 const char *buf, size_t count);
950 static ssize_t cur_show(struct cache_order *co, struct order_attribute *oa,
953 struct mlx5_ib_dev *dev = co->dev;
954 struct mlx5_mr_cache *cache = &dev->cache;
955 struct mlx5_cache_ent *ent = &cache->ent[co->index];
958 err = snprintf(buf, 20, "%d\n", ent->cur);
962 static ssize_t limit_show(struct cache_order *co, struct order_attribute *oa,
965 struct mlx5_ib_dev *dev = co->dev;
966 struct mlx5_mr_cache *cache = &dev->cache;
967 struct mlx5_cache_ent *ent = &cache->ent[co->index];
970 err = snprintf(buf, 20, "%d\n", ent->limit);
974 static ssize_t limit_store(struct cache_order *co, struct order_attribute *oa,
975 const char *buf, size_t count)
977 struct mlx5_ib_dev *dev = co->dev;
978 struct mlx5_mr_cache *cache = &dev->cache;
979 struct mlx5_cache_ent *ent = &cache->ent[co->index];
983 #define kstrtouint(a,b,c) ({*(c) = strtol(a,0,b); 0;})
984 #define kstrtoint(a,b,c) ({*(c) = strtol(a,0,b); 0;})
986 if (kstrtouint(buf, 0, &var))
994 if (ent->cur < ent->limit) {
995 err = add_keys(dev, co->index, 2 * ent->limit - ent->cur);
1003 static ssize_t miss_show(struct cache_order *co, struct order_attribute *oa,
1006 struct mlx5_ib_dev *dev = co->dev;
1007 struct mlx5_mr_cache *cache = &dev->cache;
1008 struct mlx5_cache_ent *ent = &cache->ent[co->index];
1011 err = snprintf(buf, 20, "%d\n", ent->miss);
1015 static ssize_t miss_store(struct cache_order *co, struct order_attribute *oa,
1016 const char *buf, size_t count)
1018 struct mlx5_ib_dev *dev = co->dev;
1019 struct mlx5_mr_cache *cache = &dev->cache;
1020 struct mlx5_cache_ent *ent = &cache->ent[co->index];
1023 if (kstrtouint(buf, 0, &var))
1034 static ssize_t size_show(struct cache_order *co, struct order_attribute *oa,
1037 struct mlx5_ib_dev *dev = co->dev;
1038 struct mlx5_mr_cache *cache = &dev->cache;
1039 struct mlx5_cache_ent *ent = &cache->ent[co->index];
1042 err = snprintf(buf, 20, "%d\n", ent->size);
1046 static ssize_t size_store(struct cache_order *co, struct order_attribute *oa,
1047 const char *buf, size_t count)
1049 struct mlx5_ib_dev *dev = co->dev;
1050 struct mlx5_mr_cache *cache = &dev->cache;
1051 struct mlx5_cache_ent *ent = &cache->ent[co->index];
1055 if (kstrtouint(buf, 0, &var))
1058 if (var < ent->limit)
1061 if (var > ent->size) {
1063 err = add_keys(dev, co->index, var - ent->size);
1064 if (err && err != -EAGAIN)
1067 usleep_range(3000, 5000);
1069 } else if (var < ent->size) {
1070 remove_keys(dev, co->index, ent->size - var);
1076 static ssize_t order_attr_show(struct kobject *kobj,
1077 struct attribute *attr, char *buf)
1079 struct order_attribute *oa =
1080 container_of(attr, struct order_attribute, attr);
1081 struct cache_order *co = container_of(kobj, struct cache_order, kobj);
1086 return oa->show(co, oa, buf);
1089 static ssize_t order_attr_store(struct kobject *kobj,
1090 struct attribute *attr, const char *buf, size_t size)
1092 struct order_attribute *oa =
1093 container_of(attr, struct order_attribute, attr);
1094 struct cache_order *co = container_of(kobj, struct cache_order, kobj);
1099 return oa->store(co, oa, buf, size);
1102 static const struct sysfs_ops order_sysfs_ops = {
1103 .show = order_attr_show,
1104 .store = order_attr_store,
1107 #define ORDER_ATTR(_name) struct order_attribute order_attr_##_name = \
1108 __ATTR(_name, 0644, _name##_show, _name##_store)
1109 #define ORDER_ATTR_RO(_name) struct order_attribute order_attr_##_name = \
1110 __ATTR(_name, 0444, _name##_show, NULL)
1112 static ORDER_ATTR_RO(cur);
1113 static ORDER_ATTR(limit);
1114 static ORDER_ATTR(miss);
1115 static ORDER_ATTR(size);
1117 static struct attribute *order_default_attrs[] = {
1118 &order_attr_cur.attr,
1119 &order_attr_limit.attr,
1120 &order_attr_miss.attr,
1121 &order_attr_size.attr,
1125 static struct kobj_type order_type = {
1126 .sysfs_ops = &order_sysfs_ops,
1127 .default_attrs = order_default_attrs
1132 struct cache_attribute {
1133 struct attribute attr;
1134 ssize_t (*show)(struct mlx5_ib_dev *dev, char *buf);
1135 ssize_t (*store)(struct mlx5_ib_dev *dev, const char *buf, size_t count);
1138 static ssize_t rel_imm_show(struct mlx5_ib_dev *dev, char *buf)
1140 struct mlx5_mr_cache *cache = &dev->cache;
1143 err = snprintf(buf, 20, "%d\n", cache->rel_imm);
1147 static ssize_t rel_imm_store(struct mlx5_ib_dev *dev, const char *buf, size_t count)
1149 struct mlx5_mr_cache *cache = &dev->cache;
1154 if (kstrtouint(buf, 0, &var))
1160 if (var == cache->rel_imm)
1163 cache->rel_imm = var;
1164 if (cache->rel_imm == 1) {
1165 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
1166 if (cache->ent[i].cur > 2 * cache->ent[i].limit) {
1167 queue_work(cache->wq, &cache->ent[i].work);
1177 static ssize_t rel_timeout_show(struct mlx5_ib_dev *dev, char *buf)
1179 struct mlx5_mr_cache *cache = &dev->cache;
1182 err = snprintf(buf, 20, "%d\n", cache->rel_timeout);
1186 static ssize_t rel_timeout_store(struct mlx5_ib_dev *dev, const char *buf, size_t count)
1188 struct mlx5_mr_cache *cache = &dev->cache;
1192 if (kstrtoint(buf, 0, &var))
1195 if (var < -1 || var > MAX_MR_RELEASE_TIMEOUT)
1198 if (var == cache->rel_timeout)
1201 if (cache->rel_timeout == -1 || (var < cache->rel_timeout && var != -1)) {
1202 cache->rel_timeout = var;
1203 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
1204 if (cache->ent[i].cur > 2 * cache->ent[i].limit)
1205 queue_work(cache->wq, &cache->ent[i].work);
1208 cache->rel_timeout = var;
1214 static ssize_t cache_attr_show(struct kobject *kobj,
1215 struct attribute *attr, char *buf)
1217 struct cache_attribute *ca =
1218 container_of(attr, struct cache_attribute, attr);
1219 struct mlx5_ib_dev *dev = container_of(kobj, struct mlx5_ib_dev, mr_cache);
1224 return ca->show(dev, buf);
1227 static ssize_t cache_attr_store(struct kobject *kobj,
1228 struct attribute *attr, const char *buf, size_t size)
1230 struct cache_attribute *ca =
1231 container_of(attr, struct cache_attribute, attr);
1232 struct mlx5_ib_dev *dev = container_of(kobj, struct mlx5_ib_dev, mr_cache);
1237 return ca->store(dev, buf, size);
1240 static const struct sysfs_ops cache_sysfs_ops = {
1241 .show = cache_attr_show,
1242 .store = cache_attr_store,
1245 #define CACHE_ATTR(_name) struct cache_attribute cache_attr_##_name = \
1246 __ATTR(_name, 0644, _name##_show, _name##_store)
1248 static CACHE_ATTR(rel_imm);
1249 static CACHE_ATTR(rel_timeout);
1251 static struct attribute *cache_default_attrs[] = {
1252 &cache_attr_rel_imm.attr,
1253 &cache_attr_rel_timeout.attr,
1257 static struct kobj_type cache_type = {
1258 .sysfs_ops = &cache_sysfs_ops,
1259 .default_attrs = cache_default_attrs
1262 static int mlx5_mr_sysfs_init(struct mlx5_ib_dev *dev)
1264 struct mlx5_mr_cache *cache = &dev->cache;
1265 struct device *device = &dev->ib_dev.dev;
1266 struct cache_order *co;
1271 err = kobject_init_and_add(&dev->mr_cache, &cache_type,
1272 &device->kobj, "mr_cache");
1276 for (o = 2, i = 0; i < MAX_MR_CACHE_ENTRIES; o++, i++) {
1277 co = &cache->ent[i].co;
1281 err = kobject_init_and_add(&co->kobj, &order_type,
1282 &dev->mr_cache, "%d", o);
1290 for (; i >= 0; i--) {
1291 co = &cache->ent[i].co;
1292 kobject_put(&co->kobj);
1294 kobject_put(&dev->mr_cache);
1299 static void mlx5_mr_sysfs_cleanup(struct mlx5_ib_dev *dev)
1301 struct mlx5_mr_cache *cache = &dev->cache;
1302 struct cache_order *co;
1305 for (i = MAX_MR_CACHE_ENTRIES - 1; i >= 0; i--) {
1306 co = &cache->ent[i].co;
1307 kobject_put(&co->kobj);
1309 kobject_put(&dev->mr_cache);