sys/ofed/drivers/infiniband/hw/mlx4/mr.c

   1 /*
   2  * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
   3  * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
   4  *
   5  * This software is available to you under a choice of one of two
   6  * licenses.  You may choose to be licensed under the terms of the GNU
   7  * General Public License (GPL) Version 2, available from the file
   8  * COPYING in the main directory of this source tree, or the
   9  * OpenIB.org BSD license below:
  10  *
  11  *     Redistribution and use in source and binary forms, with or
  12  *     without modification, are permitted provided that the following
  13  *     conditions are met:
  14  *
  15  *      - Redistributions of source code must retain the above
  16  *        copyright notice, this list of conditions and the following
  17  *        disclaimer.
  18  *
  19  *      - Redistributions in binary form must reproduce the above
  20  *        copyright notice, this list of conditions and the following
  21  *        disclaimer in the documentation and/or other materials
  22  *        provided with the distribution.
  23  *
  24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31  * SOFTWARE.
  32  */
  33
  34 #include <linux/slab.h>
  35 #include <linux/module.h>
  36 #include <linux/sched.h>
  37
  38 #ifdef __linux__
  39 #include <linux/proc_fs.h>
  40 #include <linux/cred.h>
  41 #endif
  42
  43 #include "mlx4_ib.h"
  44
  45 static u32 convert_access(int acc)
  46 {
  47         return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC       : 0) |
  48                (acc & IB_ACCESS_REMOTE_WRITE  ? MLX4_PERM_REMOTE_WRITE : 0) |
  49                (acc & IB_ACCESS_REMOTE_READ   ? MLX4_PERM_REMOTE_READ  : 0) |
  50                (acc & IB_ACCESS_LOCAL_WRITE   ? MLX4_PERM_LOCAL_WRITE  : 0) |
  51                MLX4_PERM_LOCAL_READ;
  52 }
  53 #ifdef __linux__
  54 static ssize_t shared_mr_proc_read(struct file *file,
  55                           char __user *buffer,
  56                           size_t len,
  57                           loff_t *offset)
  58 {
  59
  60         return -ENOSYS;
  61
  62 }
  63
  64 static ssize_t shared_mr_proc_write(struct file *file,
  65                            const char __user *buffer,
  66                            size_t len,
  67                            loff_t *offset)
  68 {
  69
  70         return -ENOSYS;
  71 }
  72
  73 static int shared_mr_mmap(struct file *filep, struct vm_area_struct *vma)
  74 {
  75
  76         struct proc_dir_entry *pde = PDE(filep->f_path.dentry->d_inode);
  77         struct mlx4_shared_mr_info *smr_info =
  78                 (struct mlx4_shared_mr_info *)pde->data;
  79
  80         /* Prevent any mapping not on start of area */
  81         if (vma->vm_pgoff != 0)
  82                 return -EINVAL;
  83
  84         return ib_umem_map_to_vma(smr_info->umem,
  85                                         vma);
  86
  87 }
  88
  89 static const struct file_operations shared_mr_proc_ops = {
  90         .owner  = THIS_MODULE,
  91         .read   = shared_mr_proc_read,
  92         .write  = shared_mr_proc_write,
  93         .mmap   = shared_mr_mmap
  94 };
  95
  96 static mode_t convert_shared_access(int acc)
  97 {
  98
  99         return (acc & IB_ACCESS_SHARED_MR_USER_READ ? S_IRUSR       : 0) |
 100                (acc & IB_ACCESS_SHARED_MR_USER_WRITE  ? S_IWUSR : 0) |
 101                (acc & IB_ACCESS_SHARED_MR_GROUP_READ   ? S_IRGRP  : 0) |
 102                (acc & IB_ACCESS_SHARED_MR_GROUP_WRITE   ? S_IWGRP  : 0) |
 103                (acc & IB_ACCESS_SHARED_MR_OTHER_READ   ? S_IROTH  : 0) |
 104                (acc & IB_ACCESS_SHARED_MR_OTHER_WRITE   ? S_IWOTH  : 0);
 105
 106 }
 107 #endif
 108 struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
 109 {
 110         struct mlx4_ib_mr *mr;
 111         int err;
 112
 113         mr = kzalloc(sizeof *mr, GFP_KERNEL);
 114         if (!mr)
 115                 return ERR_PTR(-ENOMEM);
 116
 117         err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
 118                             ~0ull, convert_access(acc), 0, 0, &mr->mmr);
 119         if (err)
 120                 goto err_free;
 121
 122         err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
 123         if (err)
 124                 goto err_mr;
 125
 126         mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
 127         mr->umem = NULL;
 128
 129         return &mr->ibmr;
 130
 131 err_mr:
 132         mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
 133
 134 err_free:
 135         kfree(mr);
 136
 137         return ERR_PTR(err);
 138 }
 139
 140 static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
 141                                                 struct mlx4_mtt *mtt,
 142                                                 u64 mtt_size,
 143                                                 u64 mtt_shift,
 144                                                 u64 len,
 145                                                 u64 cur_start_addr,
 146                                                 u64 *pages,
 147                                                 int *start_index,
 148                                                 int *npages)
 149 {
 150         int k;
 151         int err = 0;
 152         u64 mtt_entries;
 153         u64 cur_end_addr = cur_start_addr + len;
 154         u64 cur_end_addr_aligned = 0;
 155
 156         len += (cur_start_addr & (mtt_size-1ULL));
 157         cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
 158         len += (cur_end_addr_aligned - cur_end_addr);
 159         if (len & (mtt_size-1ULL)) {
 160                 WARN(1 ,
 161                 "write_block: len %llx is not aligned to mtt_size %llx\n",
 162                         (long long)len, (long long)mtt_size);
 163                 return -EINVAL;
 164         }
 165
 166
 167         mtt_entries = (len >> mtt_shift);
 168
 169         /* Align the MTT start address to
 170                 the mtt_size.
 171                 Required to handle cases when the MR
 172                 starts in the middle of an MTT record.
 173                 Was not required in old code since
 174                 the physical addresses provided by
 175                 the dma subsystem were page aligned,
 176                 which was also the MTT size.
 177         */
 178         cur_start_addr = round_down(cur_start_addr, mtt_size);
 179         /* A new block is started ...*/
 180         for (k = 0; k < mtt_entries; ++k) {
 181                 pages[*npages] = cur_start_addr + (mtt_size * k);
 182                 (*npages)++;
 183                 /*
 184                  * Be friendly to mlx4_write_mtt() and
 185                  * pass it chunks of appropriate size.
 186                  */
 187                 if (*npages == PAGE_SIZE / sizeof(u64)) {
 188                         err = mlx4_write_mtt(dev->dev,
 189                                         mtt, *start_index,
 190                                         *npages, pages);
 191                         if (err)
 192                                 return err;
 193
 194                         (*start_index) += *npages;
 195                         *npages = 0;
 196                 }
 197         }
 198
 199         return 0;
 200 }
 201
 202 int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
 203                            struct ib_umem *umem)
 204 {
 205         u64 *pages;
 206         struct ib_umem_chunk *chunk;
 207         int j;
 208         u64 len = 0;
 209         int err = 0;
 210         u64 mtt_size;
 211         u64 cur_start_addr = 0;
 212         u64 mtt_shift;
 213         int start_index = 0;
 214         int npages = 0;
 215
 216         pages = (u64 *) __get_free_page(GFP_KERNEL);
 217         if (!pages)
 218                 return -ENOMEM;
 219
 220         mtt_shift = mtt->page_shift;
 221         mtt_size = 1ULL << mtt_shift;
 222
 223         list_for_each_entry(chunk, &umem->chunk_list, list)
 224                 for (j = 0; j < chunk->nmap; ++j) {
 225                         if (cur_start_addr + len ==
 226                             sg_dma_address(&chunk->page_list[j])) {
 227                                 /* still the same block */
 228                                 len += sg_dma_len(&chunk->page_list[j]);
 229                                 continue;
 230                         }
 231                         /* A new block is started ...*/
 232                         /* If len is malaligned, write an extra mtt entry to
 233                             cover the misaligned area (round up the division)
 234                         */
 235                         err = mlx4_ib_umem_write_mtt_block(dev,
 236                                                 mtt, mtt_size, mtt_shift,
 237                                                 len, cur_start_addr,
 238                                                 pages,
 239                                                 &start_index,
 240                                                 &npages);
 241                         if (err)
 242                                 goto out;
 243
 244                         cur_start_addr =
 245                                 sg_dma_address(&chunk->page_list[j]);
 246                         len = sg_dma_len(&chunk->page_list[j]);
 247                 }
 248
 249         /* Handle the last block */
 250         if (len > 0) {
 251                 /*  If len is malaligned, write an extra mtt entry to cover
 252                      the misaligned area (round up the division)
 253                 */
 254                 err = mlx4_ib_umem_write_mtt_block(dev,
 255                                                 mtt, mtt_size, mtt_shift,
 256                                                 len, cur_start_addr,
 257                                                 pages,
 258                                                 &start_index,
 259                                                 &npages);
 260                         if (err)
 261                                 goto out;
 262         }
 263
 264
 265         if (npages)
 266                 err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
 267
 268 out:
 269         free_page((unsigned long) pages);
 270         return err;
 271 }
 272
 273 static inline u64 alignment_of(u64 ptr)
 274 {
 275         return ilog2(ptr & (~(ptr-1)));
 276 }
 277
 278 static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
 279                                                 u64 current_block_end,
 280                                                 u64 block_shift)
 281 {
 282         /* Check whether the alignment of the new block
 283              is aligned as well as the previous block.
 284              Block address must start with zeros till size of entity_size.
 285         */
 286         if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
 287                 /* It is not as well aligned as the
 288                 previous block-reduce the mtt size
 289                 accordingly.
 290                 Here we take the last right bit
 291                 which is 1.
 292                 */
 293                 block_shift = alignment_of(next_block_start);
 294
 295         /*  Check whether the alignment of the
 296              end of previous block - is it aligned
 297              as well as the start of the block
 298         */
 299         if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
 300                 /* It is not as well aligned as
 301                 the start of the block - reduce the
 302                 mtt size accordingly.
 303                 */
 304                 block_shift = alignment_of(current_block_end);
 305
 306         return block_shift;
 307 }
 308
 309 /* Calculate optimal mtt size based on contiguous pages.
 310 * Function will return also the number of pages that are not aligned to the
 311    calculated mtt_size to be added to total number
 312     of pages. For that we should check the first chunk length & last chunk
 313     length and if not aligned to mtt_size we should increment
 314     the non_aligned_pages number.
 315     All chunks in the middle already handled as part of mtt shift calculation
 316     for both their start & end addresses.
 317 */
 318 int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
 319                                                 u64 start_va,
 320                                                 int *num_of_mtts)
 321 {
 322         struct ib_umem_chunk *chunk;
 323         int j;
 324         u64 block_shift = MLX4_MAX_MTT_SHIFT;
 325         u64 current_block_len = 0;
 326         u64 current_block_start = 0;
 327         u64 misalignment_bits;
 328         u64 first_block_start = 0;
 329         u64 last_block_end = 0;
 330         u64 total_len = 0;
 331         u64 last_block_aligned_end = 0;
 332         u64 min_shift = ilog2(umem->page_size);
 333
 334         list_for_each_entry(chunk, &umem->chunk_list, list) {
 335                 /* Initialization - save the first chunk start as
 336                     the current_block_start - block means contiguous pages.
 337                 */
 338                 if (current_block_len == 0 && current_block_start == 0) {
 339                         first_block_start = current_block_start =
 340                                 sg_dma_address(&chunk->page_list[0]);
 341                         /* Find the bits that are different between
 342                             the physical address and the virtual
 343                             address for the start of the MR.
 344                         */
 345                         /* umem_get aligned the start_va to a page
 346                            boundry. Therefore, we need to align the
 347                            start va to the same boundry */
 348                         /* misalignment_bits is needed to handle the
 349                            case of a single memory region. In this
 350                            case, the rest of the logic will not reduce
 351                            the block size.  If we use a block size
 352                            which is bigger than the alignment of the
 353                            misalignment bits, we might use the virtual
 354                            page number instead of the physical page
 355                            number, resulting in access to the wrong
 356                            data. */
 357                         misalignment_bits =
 358                         (start_va & (~(((u64)(umem->page_size))-1ULL)))
 359                                                 ^ current_block_start;
 360                         block_shift = min(alignment_of(misalignment_bits)
 361                                 , block_shift);
 362                 }
 363
 364                 /* Go over the scatter entries in the current chunk, check
 365                      if they continue the previous scatter entry.
 366                 */
 367                 for (j = 0; j < chunk->nmap; ++j) {
 368                         u64 next_block_start =
 369                                 sg_dma_address(&chunk->page_list[j]);
 370                         u64 current_block_end = current_block_start
 371                                 + current_block_len;
 372                         /* If we have a split (non-contig.) between two block*/
 373                         if (current_block_end != next_block_start) {
 374                                 block_shift = mlx4_ib_umem_calc_block_mtt(
 375                                                 next_block_start,
 376                                                 current_block_end,
 377                                                 block_shift);
 378
 379                                 /* If we reached the minimum shift for 4k
 380                                      page we stop the loop.
 381                                 */
 382                                 if (block_shift <= min_shift)
 383                                         goto end;
 384
 385                                 /* If not saved yet we are in first block -
 386                                      we save the length of first block to
 387                                      calculate the non_aligned_pages number at
 388                                 *    the end.
 389                                 */
 390                                 total_len += current_block_len;
 391
 392                                 /* Start a new block */
 393                                 current_block_start = next_block_start;
 394                                 current_block_len =
 395                                         sg_dma_len(&chunk->page_list[j]);
 396                                 continue;
 397                         }
 398                         /* The scatter entry is another part of
 399                              the current block, increase the block size
 400                         * An entry in the scatter can be larger than
 401                         4k (page) as of dma mapping
 402                         which merge some blocks together.
 403                         */
 404                         current_block_len +=
 405                                 sg_dma_len(&chunk->page_list[j]);
 406                 }
 407         }
 408
 409         /* Account for the last block in the total len */
 410         total_len += current_block_len;
 411         /* Add to the first block the misalignment that it suffers from.*/
 412         total_len += (first_block_start & ((1ULL<<block_shift)-1ULL));
 413         last_block_end = current_block_start+current_block_len;
 414         last_block_aligned_end = round_up(last_block_end, 1<<block_shift);
 415         total_len += (last_block_aligned_end - last_block_end);
 416
 417         WARN((total_len & ((1ULL<<block_shift)-1ULL)),
 418                 " misaligned total length detected (%llu, %llu)!",
 419                 (long long)total_len, (long long)block_shift);
 420
 421         *num_of_mtts = total_len >> block_shift;
 422 end:
 423         if (block_shift < min_shift) {
 424                 /* If shift is less than the min we set a WARN and
 425                      return the min shift.
 426                 */
 427                 WARN(1,
 428                 "mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n",
 429                 (long long)block_shift);
 430
 431                 block_shift = min_shift;
 432         }
 433         return block_shift;
 434 }
 435
 436 #ifdef __linux__
 437 static int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id)
 438 {
 439         struct proc_dir_entry *mr_proc_entry;
 440         mode_t mode = S_IFREG;
 441         char name_buff[16];
 442
 443         mode |= convert_shared_access(access_flags);
 444         sprintf(name_buff, "%X", mr_id);
 445         mr->smr_info = kmalloc(sizeof(struct mlx4_shared_mr_info), GFP_KERNEL);
 446         mr->smr_info->mr_id = mr_id;
 447         mr->smr_info->umem = mr->umem;
 448
 449         mr_proc_entry = proc_create_data(name_buff, mode,
 450                                 mlx4_mrs_dir_entry,
 451                                 &shared_mr_proc_ops,
 452                                 mr->smr_info);
 453
 454         if (!mr_proc_entry) {
 455                 pr_err("prepare_shared_mr failed via proc\n");
 456                 kfree(mr->smr_info);
 457                 return -ENODEV;
 458         }
 459
 460         current_uid_gid(&(mr_proc_entry->uid), &(mr_proc_entry->gid));
 461         mr_proc_entry->size = mr->umem->length;
 462         return 0;
 463
 464 }
 465 static int is_shared_mr(int access_flags)
 466 {
 467         /* We should check whether IB_ACCESS_SHARED_MR_USER_READ or
 468         other shared bits were turned on.
 469         */
 470         return !!(access_flags & (IB_ACCESS_SHARED_MR_USER_READ |
 471                                 IB_ACCESS_SHARED_MR_USER_WRITE |
 472                                 IB_ACCESS_SHARED_MR_GROUP_READ |
 473                                 IB_ACCESS_SHARED_MR_GROUP_WRITE |
 474                                 IB_ACCESS_SHARED_MR_OTHER_READ |
 475                                 IB_ACCESS_SHARED_MR_OTHER_WRITE));
 476
 477 }
 478 #endif
 479
 480 struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 481                                   u64 virt_addr, int access_flags,
 482                                   struct ib_udata *udata,
 483                                   int mr_id)
 484 {
 485         struct mlx4_ib_dev *dev = to_mdev(pd->device);
 486         struct mlx4_ib_mr *mr;
 487         int shift;
 488         int err;
 489         int n;
 490
 491         mr = kzalloc(sizeof *mr, GFP_KERNEL);
 492         if (!mr)
 493                 return ERR_PTR(-ENOMEM);
 494
 495         mr->umem = ib_umem_get(pd->uobject->context, start, length,
 496                         access_flags, 0);
 497         if (IS_ERR(mr->umem)) {
 498                 err = PTR_ERR(mr->umem);
 499                 goto err_free;
 500         }
 501
 502         n = ib_umem_page_count(mr->umem);
 503         shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start,
 504                 &n);
 505         err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
 506                          convert_access(access_flags), n, shift, &mr->mmr);
 507         if (err)
 508                 goto err_umem;
 509
 510         err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
 511         if (err)
 512                 goto err_mr;
 513
 514         err = mlx4_mr_enable(dev->dev, &mr->mmr);
 515         if (err)
 516                 goto err_mr;
 517
 518         mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
 519 #ifdef __linux__
 520         /* Check whether MR should be shared */
 521         if (is_shared_mr(access_flags)) {
 522         /* start address and length must be aligned to page size in order
 523             to map a full page and preventing leakage of data */
 524                 if (mr->umem->offset || (length & ~PAGE_MASK)) {
 525                         err = -EINVAL;
 526                         goto err_mr;
 527                 }
 528
 529                 err = prepare_shared_mr(mr, access_flags, mr_id);
 530                 if (err)
 531                         goto err_mr;
 532         }
 533 #endif
 534         return &mr->ibmr;
 535
 536 err_mr:
 537         mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
 538
 539 err_umem:
 540         ib_umem_release(mr->umem);
 541
 542 err_free:
 543         kfree(mr);
 544
 545         return ERR_PTR(err);
 546 }
 547
 548
 549 int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
 550 {
 551         struct mlx4_ib_mr *mr = to_mmr(ibmr);
 552
 553         mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
 554         if (mr->smr_info) {
 555                 /* When master/parent shared mr is dereged there is
 556                 no ability to share this mr any more - its mr_id will be
 557                 returned to the kernel as part of ib_uverbs_dereg_mr
 558                 and may be allocated again as part of other reg_mr.
 559                 */
 560                 char name_buff[16];
 561
 562                 sprintf(name_buff, "%X", mr->smr_info->mr_id);
 563                 /* Remove proc entry is checking internally that no operation
 564                     was strated on that proc fs file and if in the middle
 565                     current process will wait till end of operation.
 566                     That's why no sync mechanism is needed when we release
 567                     below the shared umem.
 568                 */
 569 #ifdef __linux__
 570                 remove_proc_entry(name_buff, mlx4_mrs_dir_entry);
 571                 kfree(mr->smr_info);
 572 #endif
 573         }
 574
 575         if (mr->umem)
 576                 ib_umem_release(mr->umem);
 577
 578         kfree(mr);
 579
 580         return 0;
 581 }
 582
 583 struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
 584                                         int max_page_list_len)
 585 {
 586         struct mlx4_ib_dev *dev = to_mdev(pd->device);
 587         struct mlx4_ib_mr *mr;
 588         int err;
 589
 590         mr = kzalloc(sizeof *mr, GFP_KERNEL);
 591         if (!mr)
 592                 return ERR_PTR(-ENOMEM);
 593
 594         err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
 595                             max_page_list_len, 0, &mr->mmr);
 596         if (err)
 597                 goto err_free;
 598
 599         err = mlx4_mr_enable(dev->dev, &mr->mmr);
 600         if (err)
 601                 goto err_mr;
 602
 603         mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
 604         mr->umem = NULL;
 605
 606         return &mr->ibmr;
 607
 608 err_mr:
 609         mlx4_mr_free(dev->dev, &mr->mmr);
 610
 611 err_free:
 612         kfree(mr);
 613         return ERR_PTR(err);
 614 }
 615
 616 struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
 617                                                                int page_list_len)
 618 {
 619         struct mlx4_ib_dev *dev = to_mdev(ibdev);
 620         struct mlx4_ib_fast_reg_page_list *mfrpl;
 621         int size = page_list_len * sizeof (u64);
 622
 623         if (page_list_len > MLX4_MAX_FAST_REG_PAGES)
 624                 return ERR_PTR(-EINVAL);
 625
 626         mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
 627         if (!mfrpl)
 628                 return ERR_PTR(-ENOMEM);
 629
 630         mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
 631         if (!mfrpl->ibfrpl.page_list)
 632                 goto err_free;
 633
 634         mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
 635                                                      size, &mfrpl->map,
 636                                                      GFP_KERNEL);
 637         if (!mfrpl->mapped_page_list)
 638                 goto err_free;
 639
 640         WARN_ON(mfrpl->map & 0x3f);
 641
 642         return &mfrpl->ibfrpl;
 643
 644 err_free:
 645         kfree(mfrpl->ibfrpl.page_list);
 646         kfree(mfrpl);
 647         return ERR_PTR(-ENOMEM);
 648 }
 649
 650 void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
 651 {
 652         struct mlx4_ib_dev *dev = to_mdev(page_list->device);
 653         struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
 654         int size = page_list->max_page_list_len * sizeof (u64);
 655
 656         dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list,
 657                           mfrpl->map);
 658         kfree(mfrpl->ibfrpl.page_list);
 659         kfree(mfrpl);
 660 }
 661
 662 struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
 663                                  struct ib_fmr_attr *fmr_attr)
 664 {
 665         struct mlx4_ib_dev *dev = to_mdev(pd->device);
 666         struct mlx4_ib_fmr *fmr;
 667         int err = -ENOMEM;
 668
 669         fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
 670         if (!fmr)
 671                 return ERR_PTR(-ENOMEM);
 672
 673         err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc),
 674                              fmr_attr->max_pages, fmr_attr->max_maps,
 675                              fmr_attr->page_shift, &fmr->mfmr);
 676         if (err)
 677                 goto err_free;
 678
 679         err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr);
 680         if (err)
 681                 goto err_mr;
 682
 683         fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key;
 684
 685         return &fmr->ibfmr;
 686
 687 err_mr:
 688         mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
 689
 690 err_free:
 691         kfree(fmr);
 692
 693         return ERR_PTR(err);
 694 }
 695
 696 int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 697                       int npages, u64 iova)
 698 {
 699         struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
 700         struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device);
 701
 702         return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova,
 703                                  &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
 704 }
 705
 706 int mlx4_ib_unmap_fmr(struct list_head *fmr_list)
 707 {
 708         struct ib_fmr *ibfmr;
 709         int err;
 710         struct mlx4_dev *mdev = NULL;
 711
 712         list_for_each_entry(ibfmr, fmr_list, list) {
 713                 if (mdev && to_mdev(ibfmr->device)->dev != mdev)
 714                         return -EINVAL;
 715                 mdev = to_mdev(ibfmr->device)->dev;
 716         }
 717
 718         if (!mdev)
 719                 return 0;
 720
 721         list_for_each_entry(ibfmr, fmr_list, list) {
 722                 struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
 723
 724                 mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
 725         }
 726
 727         /*
 728          * Make sure all MPT status updates are visible before issuing
 729          * SYNC_TPT firmware command.
 730          */
 731         wmb();
 732
 733         err = mlx4_SYNC_TPT(mdev);
 734         if (err)
 735                 pr_warn("SYNC_TPT error %d when "
 736                        "unmapping FMRs\n", err);
 737
 738         return 0;
 739 }
 740
 741 int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
 742 {
 743         struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
 744         struct mlx4_ib_dev *dev = to_mdev(ibfmr->device);
 745         int err;
 746
 747         err = mlx4_fmr_free(dev->dev, &ifmr->mfmr);
 748
 749         if (!err)
 750                 kfree(ifmr);
 751
 752         return err;
 753 }