]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c
MFV r356365:
[FreeBSD/FreeBSD.git] / sys / ofed / drivers / infiniband / core / ib_uverbs_cmd.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
5  * Copyright (c) 2005, 2006, 2007 Cisco Systems.  All rights reserved.
6  * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
7  * Copyright (c) 2006 Mellanox Technologies.  All rights reserved.
8  *
9  * This software is available to you under a choice of one of two
10  * licenses.  You may choose to be licensed under the terms of the GNU
11  * General Public License (GPL) Version 2, available from the file
12  * COPYING in the main directory of this source tree, or the
13  * OpenIB.org BSD license below:
14  *
15  *     Redistribution and use in source and binary forms, with or
16  *     without modification, are permitted provided that the following
17  *     conditions are met:
18  *
19  *      - Redistributions of source code must retain the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer.
22  *
23  *      - Redistributions in binary form must reproduce the above
24  *        copyright notice, this list of conditions and the following
25  *        disclaimer in the documentation and/or other materials
26  *        provided with the distribution.
27  *
28  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35  * SOFTWARE.
36  */
37
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40
41 #define LINUXKPI_PARAM_PREFIX ibcore_
42
43 #include <linux/file.h>
44 #include <linux/fs.h>
45 #include <linux/slab.h>
46 #include <linux/sched.h>
47 #include <linux/rbtree.h>
48
49 #include <asm/uaccess.h>
50
51 #include "uverbs.h"
52 #include "core_priv.h"
53
54 #include <sys/priv.h>
55
56 struct uverbs_lock_class {
57         char                    name[16];
58 };
59
60 static struct uverbs_lock_class pd_lock_class   = { .name = "PD-uobj" };
61 static struct uverbs_lock_class mr_lock_class   = { .name = "MR-uobj" };
62 static struct uverbs_lock_class mw_lock_class   = { .name = "MW-uobj" };
63 static struct uverbs_lock_class cq_lock_class   = { .name = "CQ-uobj" };
64 static struct uverbs_lock_class qp_lock_class   = { .name = "QP-uobj" };
65 static struct uverbs_lock_class ah_lock_class   = { .name = "AH-uobj" };
66 static struct uverbs_lock_class srq_lock_class  = { .name = "SRQ-uobj" };
67 static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
68 static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
69 static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" };
70 static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" };
71
72 /*
73  * The ib_uobject locking scheme is as follows:
74  *
75  * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
76  *   needs to be held during all idr write operations.  When an object is
77  *   looked up, a reference must be taken on the object's kref before
78  *   dropping this lock.  For read operations, the rcu_read_lock()
79  *   and rcu_write_lock() but similarly the kref reference is grabbed
80  *   before the rcu_read_unlock().
81  *
82  * - Each object also has an rwsem.  This rwsem must be held for
83  *   reading while an operation that uses the object is performed.
84  *   For example, while registering an MR, the associated PD's
85  *   uobject.mutex must be held for reading.  The rwsem must be held
86  *   for writing while initializing or destroying an object.
87  *
88  * - In addition, each object has a "live" flag.  If this flag is not
89  *   set, then lookups of the object will fail even if it is found in
90  *   the idr.  This handles a reader that blocks and does not acquire
91  *   the rwsem until after the object is destroyed.  The destroy
92  *   operation will set the live flag to 0 and then drop the rwsem;
93  *   this will allow the reader to acquire the rwsem, see that the
94  *   live flag is 0, and then drop the rwsem and its reference to
95  *   object.  The underlying storage will not be freed until the last
96  *   reference to the object is dropped.
97  */
98
99 static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
100                       struct ib_ucontext *context, struct uverbs_lock_class *c)
101 {
102         uobj->user_handle = user_handle;
103         uobj->context     = context;
104         kref_init(&uobj->ref);
105         init_rwsem(&uobj->mutex);
106         uobj->live        = 0;
107 }
108
109 static void release_uobj(struct kref *kref)
110 {
111         kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu);
112 }
113
114 static void put_uobj(struct ib_uobject *uobj)
115 {
116         kref_put(&uobj->ref, release_uobj);
117 }
118
119 static void put_uobj_read(struct ib_uobject *uobj)
120 {
121         up_read(&uobj->mutex);
122         put_uobj(uobj);
123 }
124
125 static void put_uobj_write(struct ib_uobject *uobj)
126 {
127         up_write(&uobj->mutex);
128         put_uobj(uobj);
129 }
130
131 static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
132 {
133         int ret;
134
135         idr_preload(GFP_KERNEL);
136         spin_lock(&ib_uverbs_idr_lock);
137
138         ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT);
139         if (ret >= 0)
140                 uobj->id = ret;
141
142         spin_unlock(&ib_uverbs_idr_lock);
143         idr_preload_end();
144
145         return ret < 0 ? ret : 0;
146 }
147
148 void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
149 {
150         spin_lock(&ib_uverbs_idr_lock);
151         idr_remove(idr, uobj->id);
152         spin_unlock(&ib_uverbs_idr_lock);
153 }
154
155 static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
156                                          struct ib_ucontext *context)
157 {
158         struct ib_uobject *uobj;
159
160         rcu_read_lock();
161         uobj = idr_find(idr, id);
162         if (uobj) {
163                 if (uobj->context == context)
164                         kref_get(&uobj->ref);
165                 else
166                         uobj = NULL;
167         }
168         rcu_read_unlock();
169
170         return uobj;
171 }
172
173 static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
174                                         struct ib_ucontext *context, int nested)
175 {
176         struct ib_uobject *uobj;
177
178         uobj = __idr_get_uobj(idr, id, context);
179         if (!uobj)
180                 return NULL;
181
182         if (nested)
183                 down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
184         else
185                 down_read(&uobj->mutex);
186         if (!uobj->live) {
187                 put_uobj_read(uobj);
188                 return NULL;
189         }
190
191         return uobj;
192 }
193
194 static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
195                                          struct ib_ucontext *context)
196 {
197         struct ib_uobject *uobj;
198
199         uobj = __idr_get_uobj(idr, id, context);
200         if (!uobj)
201                 return NULL;
202
203         down_write(&uobj->mutex);
204         if (!uobj->live) {
205                 put_uobj_write(uobj);
206                 return NULL;
207         }
208
209         return uobj;
210 }
211
212 static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
213                           int nested)
214 {
215         struct ib_uobject *uobj;
216
217         uobj = idr_read_uobj(idr, id, context, nested);
218         return uobj ? uobj->object : NULL;
219 }
220
221 static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
222 {
223         return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
224 }
225
226 static void put_pd_read(struct ib_pd *pd)
227 {
228         put_uobj_read(pd->uobject);
229 }
230
231 static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
232 {
233         return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
234 }
235
236 static void put_cq_read(struct ib_cq *cq)
237 {
238         put_uobj_read(cq->uobject);
239 }
240
241 static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
242 {
243         return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
244 }
245
246 static void put_ah_read(struct ib_ah *ah)
247 {
248         put_uobj_read(ah->uobject);
249 }
250
251 static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
252 {
253         return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
254 }
255
256 static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context)
257 {
258         return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0);
259 }
260
261 static void put_wq_read(struct ib_wq *wq)
262 {
263         put_uobj_read(wq->uobject);
264 }
265
266 static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle,
267                                                                struct ib_ucontext *context)
268 {
269         return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0);
270 }
271
272 static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table)
273 {
274         put_uobj_read(ind_table->uobject);
275 }
276
277 static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
278 {
279         struct ib_uobject *uobj;
280
281         uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context);
282         return uobj ? uobj->object : NULL;
283 }
284
285 static void put_qp_read(struct ib_qp *qp)
286 {
287         put_uobj_read(qp->uobject);
288 }
289
290 static void put_qp_write(struct ib_qp *qp)
291 {
292         put_uobj_write(qp->uobject);
293 }
294
295 static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
296 {
297         return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
298 }
299
300 static void put_srq_read(struct ib_srq *srq)
301 {
302         put_uobj_read(srq->uobject);
303 }
304
305 static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
306                                      struct ib_uobject **uobj)
307 {
308         *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
309         return *uobj ? (*uobj)->object : NULL;
310 }
311
312 static void put_xrcd_read(struct ib_uobject *uobj)
313 {
314         put_uobj_read(uobj);
315 }
316
317 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
318                               struct ib_device *ib_dev,
319                               const char __user *buf,
320                               int in_len, int out_len)
321 {
322         struct ib_uverbs_get_context      cmd;
323         struct ib_uverbs_get_context_resp resp;
324         struct ib_udata                   udata;
325         struct ib_ucontext               *ucontext;
326         struct file                      *filp;
327         int ret;
328
329         if (out_len < sizeof resp)
330                 return -ENOSPC;
331
332         if (copy_from_user(&cmd, buf, sizeof cmd))
333                 return -EFAULT;
334
335         mutex_lock(&file->mutex);
336
337         if (file->ucontext) {
338                 ret = -EINVAL;
339                 goto err;
340         }
341
342         INIT_UDATA(&udata, buf + sizeof cmd,
343                    (unsigned long) cmd.response + sizeof resp,
344                    in_len - sizeof cmd, out_len - sizeof resp);
345
346         ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
347         if (IS_ERR(ucontext)) {
348                 ret = PTR_ERR(ucontext);
349                 goto err;
350         }
351
352         ucontext->device = ib_dev;
353         INIT_LIST_HEAD(&ucontext->pd_list);
354         INIT_LIST_HEAD(&ucontext->mr_list);
355         INIT_LIST_HEAD(&ucontext->mw_list);
356         INIT_LIST_HEAD(&ucontext->cq_list);
357         INIT_LIST_HEAD(&ucontext->qp_list);
358         INIT_LIST_HEAD(&ucontext->srq_list);
359         INIT_LIST_HEAD(&ucontext->ah_list);
360         INIT_LIST_HEAD(&ucontext->wq_list);
361         INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list);
362         INIT_LIST_HEAD(&ucontext->xrcd_list);
363         INIT_LIST_HEAD(&ucontext->rule_list);
364         rcu_read_lock();
365         ucontext->tgid = get_pid(task_pid_group_leader(current));
366         rcu_read_unlock();
367         ucontext->closing = 0;
368
369 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
370         ucontext->umem_tree = RB_ROOT;
371         init_rwsem(&ucontext->umem_rwsem);
372         ucontext->odp_mrs_count = 0;
373         INIT_LIST_HEAD(&ucontext->no_private_counters);
374
375         if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
376                 ucontext->invalidate_range = NULL;
377
378 #endif
379
380         resp.num_comp_vectors = file->device->num_comp_vectors;
381
382         ret = get_unused_fd_flags(O_CLOEXEC);
383         if (ret < 0)
384                 goto err_free;
385         resp.async_fd = ret;
386
387         filp = ib_uverbs_alloc_event_file(file, ib_dev, 1);
388         if (IS_ERR(filp)) {
389                 ret = PTR_ERR(filp);
390                 goto err_fd;
391         }
392
393         if (copy_to_user((void __user *) (unsigned long) cmd.response,
394                          &resp, sizeof resp)) {
395                 ret = -EFAULT;
396                 goto err_file;
397         }
398
399         file->ucontext = ucontext;
400
401         fd_install(resp.async_fd, filp);
402
403         mutex_unlock(&file->mutex);
404
405         return in_len;
406
407 err_file:
408         ib_uverbs_free_async_event_file(file);
409         fput(filp);
410
411 err_fd:
412         put_unused_fd(resp.async_fd);
413
414 err_free:
415         put_pid(ucontext->tgid);
416         ib_dev->dealloc_ucontext(ucontext);
417
418 err:
419         mutex_unlock(&file->mutex);
420         return ret;
421 }
422
423 static void copy_query_dev_fields(struct ib_uverbs_file *file,
424                                   struct ib_device *ib_dev,
425                                   struct ib_uverbs_query_device_resp *resp,
426                                   struct ib_device_attr *attr)
427 {
428         resp->fw_ver            = attr->fw_ver;
429         resp->node_guid         = ib_dev->node_guid;
430         resp->sys_image_guid    = attr->sys_image_guid;
431         resp->max_mr_size       = attr->max_mr_size;
432         resp->page_size_cap     = attr->page_size_cap;
433         resp->vendor_id         = attr->vendor_id;
434         resp->vendor_part_id    = attr->vendor_part_id;
435         resp->hw_ver            = attr->hw_ver;
436         resp->max_qp            = attr->max_qp;
437         resp->max_qp_wr         = attr->max_qp_wr;
438         resp->device_cap_flags  = (u32)(attr->device_cap_flags);
439         resp->max_sge           = attr->max_sge;
440         resp->max_sge_rd        = attr->max_sge_rd;
441         resp->max_cq            = attr->max_cq;
442         resp->max_cqe           = attr->max_cqe;
443         resp->max_mr            = attr->max_mr;
444         resp->max_pd            = attr->max_pd;
445         resp->max_qp_rd_atom    = attr->max_qp_rd_atom;
446         resp->max_ee_rd_atom    = attr->max_ee_rd_atom;
447         resp->max_res_rd_atom   = attr->max_res_rd_atom;
448         resp->max_qp_init_rd_atom       = attr->max_qp_init_rd_atom;
449         resp->max_ee_init_rd_atom       = attr->max_ee_init_rd_atom;
450         resp->atomic_cap                = attr->atomic_cap;
451         resp->max_ee                    = attr->max_ee;
452         resp->max_rdd                   = attr->max_rdd;
453         resp->max_mw                    = attr->max_mw;
454         resp->max_raw_ipv6_qp           = attr->max_raw_ipv6_qp;
455         resp->max_raw_ethy_qp           = attr->max_raw_ethy_qp;
456         resp->max_mcast_grp             = attr->max_mcast_grp;
457         resp->max_mcast_qp_attach       = attr->max_mcast_qp_attach;
458         resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach;
459         resp->max_ah                    = attr->max_ah;
460         resp->max_fmr                   = attr->max_fmr;
461         resp->max_map_per_fmr           = attr->max_map_per_fmr;
462         resp->max_srq                   = attr->max_srq;
463         resp->max_srq_wr                = attr->max_srq_wr;
464         resp->max_srq_sge               = attr->max_srq_sge;
465         resp->max_pkeys                 = attr->max_pkeys;
466         resp->local_ca_ack_delay        = attr->local_ca_ack_delay;
467         resp->phys_port_cnt             = ib_dev->phys_port_cnt;
468 }
469
470 ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
471                                struct ib_device *ib_dev,
472                                const char __user *buf,
473                                int in_len, int out_len)
474 {
475         struct ib_uverbs_query_device      cmd;
476         struct ib_uverbs_query_device_resp resp;
477
478         if (out_len < sizeof resp)
479                 return -ENOSPC;
480
481         if (copy_from_user(&cmd, buf, sizeof cmd))
482                 return -EFAULT;
483
484         memset(&resp, 0, sizeof resp);
485         copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs);
486
487         if (copy_to_user((void __user *) (unsigned long) cmd.response,
488                          &resp, sizeof resp))
489                 return -EFAULT;
490
491         return in_len;
492 }
493
494 ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
495                              struct ib_device *ib_dev,
496                              const char __user *buf,
497                              int in_len, int out_len)
498 {
499         struct ib_uverbs_query_port      cmd;
500         struct ib_uverbs_query_port_resp resp;
501         struct ib_port_attr              attr;
502         int                              ret;
503
504         if (out_len < sizeof resp)
505                 return -ENOSPC;
506
507         if (copy_from_user(&cmd, buf, sizeof cmd))
508                 return -EFAULT;
509
510         ret = ib_query_port(ib_dev, cmd.port_num, &attr);
511         if (ret)
512                 return ret;
513
514         memset(&resp, 0, sizeof resp);
515
516         resp.state           = attr.state;
517         resp.max_mtu         = attr.max_mtu;
518         resp.active_mtu      = attr.active_mtu;
519         resp.gid_tbl_len     = attr.gid_tbl_len;
520         resp.port_cap_flags  = attr.port_cap_flags;
521         resp.max_msg_sz      = attr.max_msg_sz;
522         resp.bad_pkey_cntr   = attr.bad_pkey_cntr;
523         resp.qkey_viol_cntr  = attr.qkey_viol_cntr;
524         resp.pkey_tbl_len    = attr.pkey_tbl_len;
525         resp.lid             = attr.lid;
526         resp.sm_lid          = attr.sm_lid;
527         resp.lmc             = attr.lmc;
528         resp.max_vl_num      = attr.max_vl_num;
529         resp.sm_sl           = attr.sm_sl;
530         resp.subnet_timeout  = attr.subnet_timeout;
531         resp.init_type_reply = attr.init_type_reply;
532         resp.active_width    = attr.active_width;
533         resp.active_speed    = attr.active_speed;
534         resp.phys_state      = attr.phys_state;
535         resp.link_layer      = rdma_port_get_link_layer(ib_dev,
536                                                         cmd.port_num);
537
538         if (copy_to_user((void __user *) (unsigned long) cmd.response,
539                          &resp, sizeof resp))
540                 return -EFAULT;
541
542         return in_len;
543 }
544
545 ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
546                            struct ib_device *ib_dev,
547                            const char __user *buf,
548                            int in_len, int out_len)
549 {
550         struct ib_uverbs_alloc_pd      cmd;
551         struct ib_uverbs_alloc_pd_resp resp;
552         struct ib_udata                udata;
553         struct ib_uobject             *uobj;
554         struct ib_pd                  *pd;
555         int                            ret;
556
557         if (out_len < sizeof resp)
558                 return -ENOSPC;
559
560         if (copy_from_user(&cmd, buf, sizeof cmd))
561                 return -EFAULT;
562
563         INIT_UDATA(&udata, buf + sizeof cmd,
564                    (unsigned long) cmd.response + sizeof resp,
565                    in_len - sizeof cmd, out_len - sizeof resp);
566
567         uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
568         if (!uobj)
569                 return -ENOMEM;
570
571         init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
572         down_write(&uobj->mutex);
573
574         pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
575         if (IS_ERR(pd)) {
576                 ret = PTR_ERR(pd);
577                 goto err;
578         }
579
580         pd->device  = ib_dev;
581         pd->uobject = uobj;
582         pd->__internal_mr = NULL;
583         atomic_set(&pd->usecnt, 0);
584
585         uobj->object = pd;
586         ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj);
587         if (ret)
588                 goto err_idr;
589
590         memset(&resp, 0, sizeof resp);
591         resp.pd_handle = uobj->id;
592
593         if (copy_to_user((void __user *) (unsigned long) cmd.response,
594                          &resp, sizeof resp)) {
595                 ret = -EFAULT;
596                 goto err_copy;
597         }
598
599         mutex_lock(&file->mutex);
600         list_add_tail(&uobj->list, &file->ucontext->pd_list);
601         mutex_unlock(&file->mutex);
602
603         uobj->live = 1;
604
605         up_write(&uobj->mutex);
606
607         return in_len;
608
609 err_copy:
610         idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
611
612 err_idr:
613         ib_dealloc_pd(pd);
614
615 err:
616         put_uobj_write(uobj);
617         return ret;
618 }
619
620 ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
621                              struct ib_device *ib_dev,
622                              const char __user *buf,
623                              int in_len, int out_len)
624 {
625         struct ib_uverbs_dealloc_pd cmd;
626         struct ib_uobject          *uobj;
627         struct ib_pd               *pd;
628         int                         ret;
629
630         if (copy_from_user(&cmd, buf, sizeof cmd))
631                 return -EFAULT;
632
633         uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
634         if (!uobj)
635                 return -EINVAL;
636         pd = uobj->object;
637
638         if (atomic_read(&pd->usecnt)) {
639                 ret = -EBUSY;
640                 goto err_put;
641         }
642
643         ret = pd->device->dealloc_pd(uobj->object);
644         WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
645         if (ret)
646                 goto err_put;
647
648         uobj->live = 0;
649         put_uobj_write(uobj);
650
651         idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
652
653         mutex_lock(&file->mutex);
654         list_del(&uobj->list);
655         mutex_unlock(&file->mutex);
656
657         put_uobj(uobj);
658
659         return in_len;
660
661 err_put:
662         put_uobj_write(uobj);
663         return ret;
664 }
665
666 struct xrcd_table_entry {
667         struct rb_node  node;
668         struct ib_xrcd *xrcd;
669         struct inode   *inode;
670 };
671
672 static int xrcd_table_insert(struct ib_uverbs_device *dev,
673                             struct inode *inode,
674                             struct ib_xrcd *xrcd)
675 {
676         struct xrcd_table_entry *entry, *scan;
677         struct rb_node **p = &dev->xrcd_tree.rb_node;
678         struct rb_node *parent = NULL;
679
680         entry = kmalloc(sizeof *entry, GFP_KERNEL);
681         if (!entry)
682                 return -ENOMEM;
683
684         entry->xrcd  = xrcd;
685         entry->inode = inode;
686
687         while (*p) {
688                 parent = *p;
689                 scan = rb_entry(parent, struct xrcd_table_entry, node);
690
691                 if (inode < scan->inode) {
692                         p = &(*p)->rb_left;
693                 } else if (inode > scan->inode) {
694                         p = &(*p)->rb_right;
695                 } else {
696                         kfree(entry);
697                         return -EEXIST;
698                 }
699         }
700
701         rb_link_node(&entry->node, parent, p);
702         rb_insert_color(&entry->node, &dev->xrcd_tree);
703         igrab(inode);
704         return 0;
705 }
706
707 static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev,
708                                                   struct inode *inode)
709 {
710         struct xrcd_table_entry *entry;
711         struct rb_node *p = dev->xrcd_tree.rb_node;
712
713         while (p) {
714                 entry = rb_entry(p, struct xrcd_table_entry, node);
715
716                 if (inode < entry->inode)
717                         p = p->rb_left;
718                 else if (inode > entry->inode)
719                         p = p->rb_right;
720                 else
721                         return entry;
722         }
723
724         return NULL;
725 }
726
727 static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode)
728 {
729         struct xrcd_table_entry *entry;
730
731         entry = xrcd_table_search(dev, inode);
732         if (!entry)
733                 return NULL;
734
735         return entry->xrcd;
736 }
737
738 static void xrcd_table_delete(struct ib_uverbs_device *dev,
739                               struct inode *inode)
740 {
741         struct xrcd_table_entry *entry;
742
743         entry = xrcd_table_search(dev, inode);
744         if (entry) {
745                 iput(inode);
746                 rb_erase(&entry->node, &dev->xrcd_tree);
747                 kfree(entry);
748         }
749 }
750
751 ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
752                             struct ib_device *ib_dev,
753                             const char __user *buf, int in_len,
754                             int out_len)
755 {
756         struct ib_uverbs_open_xrcd      cmd;
757         struct ib_uverbs_open_xrcd_resp resp;
758         struct ib_udata                 udata;
759         struct ib_uxrcd_object         *obj;
760         struct ib_xrcd                 *xrcd = NULL;
761         struct fd                       f = {NULL};
762         struct inode                   *inode = NULL;
763         int                             ret = 0;
764         int                             new_xrcd = 0;
765
766         if (out_len < sizeof resp)
767                 return -ENOSPC;
768
769         if (copy_from_user(&cmd, buf, sizeof cmd))
770                 return -EFAULT;
771
772         INIT_UDATA(&udata, buf + sizeof cmd,
773                    (unsigned long) cmd.response + sizeof resp,
774                    in_len - sizeof cmd, out_len - sizeof  resp);
775
776         mutex_lock(&file->device->xrcd_tree_mutex);
777
778         if (cmd.fd != -1) {
779                 /* search for file descriptor */
780                 f = fdget(cmd.fd);
781                 if (!f.file) {
782                         ret = -EBADF;
783                         goto err_tree_mutex_unlock;
784                 }
785
786                 inode = f.file->f_dentry->d_inode;
787                 xrcd = find_xrcd(file->device, inode);
788                 if (!xrcd && !(cmd.oflags & O_CREAT)) {
789                         /* no file descriptor. Need CREATE flag */
790                         ret = -EAGAIN;
791                         goto err_tree_mutex_unlock;
792                 }
793
794                 if (xrcd && cmd.oflags & O_EXCL) {
795                         ret = -EINVAL;
796                         goto err_tree_mutex_unlock;
797                 }
798         }
799
800         obj = kmalloc(sizeof *obj, GFP_KERNEL);
801         if (!obj) {
802                 ret = -ENOMEM;
803                 goto err_tree_mutex_unlock;
804         }
805
806         init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class);
807
808         down_write(&obj->uobject.mutex);
809
810         if (!xrcd) {
811                 xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
812                 if (IS_ERR(xrcd)) {
813                         ret = PTR_ERR(xrcd);
814                         goto err;
815                 }
816
817                 xrcd->inode   = inode;
818                 xrcd->device  = ib_dev;
819                 atomic_set(&xrcd->usecnt, 0);
820                 mutex_init(&xrcd->tgt_qp_mutex);
821                 INIT_LIST_HEAD(&xrcd->tgt_qp_list);
822                 new_xrcd = 1;
823         }
824
825         atomic_set(&obj->refcnt, 0);
826         obj->uobject.object = xrcd;
827         ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
828         if (ret)
829                 goto err_idr;
830
831         memset(&resp, 0, sizeof resp);
832         resp.xrcd_handle = obj->uobject.id;
833
834         if (inode) {
835                 if (new_xrcd) {
836                         /* create new inode/xrcd table entry */
837                         ret = xrcd_table_insert(file->device, inode, xrcd);
838                         if (ret)
839                                 goto err_insert_xrcd;
840                 }
841                 atomic_inc(&xrcd->usecnt);
842         }
843
844         if (copy_to_user((void __user *) (unsigned long) cmd.response,
845                          &resp, sizeof resp)) {
846                 ret = -EFAULT;
847                 goto err_copy;
848         }
849
850         if (f.file)
851                 fdput(f);
852
853         mutex_lock(&file->mutex);
854         list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
855         mutex_unlock(&file->mutex);
856
857         obj->uobject.live = 1;
858         up_write(&obj->uobject.mutex);
859
860         mutex_unlock(&file->device->xrcd_tree_mutex);
861         return in_len;
862
863 err_copy:
864         if (inode) {
865                 if (new_xrcd)
866                         xrcd_table_delete(file->device, inode);
867                 atomic_dec(&xrcd->usecnt);
868         }
869
870 err_insert_xrcd:
871         idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
872
873 err_idr:
874         ib_dealloc_xrcd(xrcd);
875
876 err:
877         put_uobj_write(&obj->uobject);
878
879 err_tree_mutex_unlock:
880         if (f.file)
881                 fdput(f);
882
883         mutex_unlock(&file->device->xrcd_tree_mutex);
884
885         return ret;
886 }
887
888 ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
889                              struct ib_device *ib_dev,
890                              const char __user *buf, int in_len,
891                              int out_len)
892 {
893         struct ib_uverbs_close_xrcd cmd;
894         struct ib_uobject           *uobj;
895         struct ib_xrcd              *xrcd = NULL;
896         struct inode                *inode = NULL;
897         struct ib_uxrcd_object      *obj;
898         int                         live;
899         int                         ret = 0;
900
901         if (copy_from_user(&cmd, buf, sizeof cmd))
902                 return -EFAULT;
903
904         mutex_lock(&file->device->xrcd_tree_mutex);
905         uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
906         if (!uobj) {
907                 ret = -EINVAL;
908                 goto out;
909         }
910
911         xrcd  = uobj->object;
912         inode = xrcd->inode;
913         obj   = container_of(uobj, struct ib_uxrcd_object, uobject);
914         if (atomic_read(&obj->refcnt)) {
915                 put_uobj_write(uobj);
916                 ret = -EBUSY;
917                 goto out;
918         }
919
920         if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
921                 ret = ib_dealloc_xrcd(uobj->object);
922                 if (!ret)
923                         uobj->live = 0;
924         }
925
926         live = uobj->live;
927         if (inode && ret)
928                 atomic_inc(&xrcd->usecnt);
929
930         put_uobj_write(uobj);
931
932         if (ret)
933                 goto out;
934
935         if (inode && !live)
936                 xrcd_table_delete(file->device, inode);
937
938         idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
939         mutex_lock(&file->mutex);
940         list_del(&uobj->list);
941         mutex_unlock(&file->mutex);
942
943         put_uobj(uobj);
944         ret = in_len;
945
946 out:
947         mutex_unlock(&file->device->xrcd_tree_mutex);
948         return ret;
949 }
950
951 void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
952                             struct ib_xrcd *xrcd)
953 {
954         struct inode *inode;
955
956         inode = xrcd->inode;
957         if (inode && !atomic_dec_and_test(&xrcd->usecnt))
958                 return;
959
960         ib_dealloc_xrcd(xrcd);
961
962         if (inode)
963                 xrcd_table_delete(dev, inode);
964 }
965
966 ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
967                          struct ib_device *ib_dev,
968                          const char __user *buf, int in_len,
969                          int out_len)
970 {
971         struct ib_uverbs_reg_mr      cmd;
972         struct ib_uverbs_reg_mr_resp resp;
973         struct ib_udata              udata;
974         struct ib_uobject           *uobj;
975         struct ib_pd                *pd;
976         struct ib_mr                *mr;
977         int                          ret;
978
979         if (out_len < sizeof resp)
980                 return -ENOSPC;
981
982         if (copy_from_user(&cmd, buf, sizeof cmd))
983                 return -EFAULT;
984
985         INIT_UDATA(&udata, buf + sizeof cmd,
986                    (unsigned long) cmd.response + sizeof resp,
987                    in_len - sizeof cmd, out_len - sizeof resp);
988
989         if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
990                 return -EINVAL;
991
992         ret = ib_check_mr_access(cmd.access_flags);
993         if (ret)
994                 return ret;
995
996         uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
997         if (!uobj)
998                 return -ENOMEM;
999
1000         init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
1001         down_write(&uobj->mutex);
1002
1003         pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1004         if (!pd) {
1005                 ret = -EINVAL;
1006                 goto err_free;
1007         }
1008
1009         if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
1010                 if (!(pd->device->attrs.device_cap_flags &
1011                       IB_DEVICE_ON_DEMAND_PAGING)) {
1012                         pr_debug("ODP support not available\n");
1013                         ret = -EINVAL;
1014                         goto err_put;
1015                 }
1016         }
1017
1018         mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
1019                                      cmd.access_flags, &udata);
1020         if (IS_ERR(mr)) {
1021                 ret = PTR_ERR(mr);
1022                 goto err_put;
1023         }
1024
1025         mr->device  = pd->device;
1026         mr->pd      = pd;
1027         mr->uobject = uobj;
1028         atomic_inc(&pd->usecnt);
1029
1030         uobj->object = mr;
1031         ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
1032         if (ret)
1033                 goto err_unreg;
1034
1035         memset(&resp, 0, sizeof resp);
1036         resp.lkey      = mr->lkey;
1037         resp.rkey      = mr->rkey;
1038         resp.mr_handle = uobj->id;
1039
1040         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1041                          &resp, sizeof resp)) {
1042                 ret = -EFAULT;
1043                 goto err_copy;
1044         }
1045
1046         put_pd_read(pd);
1047
1048         mutex_lock(&file->mutex);
1049         list_add_tail(&uobj->list, &file->ucontext->mr_list);
1050         mutex_unlock(&file->mutex);
1051
1052         uobj->live = 1;
1053
1054         up_write(&uobj->mutex);
1055
1056         return in_len;
1057
1058 err_copy:
1059         idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
1060
1061 err_unreg:
1062         ib_dereg_mr(mr);
1063
1064 err_put:
1065         put_pd_read(pd);
1066
1067 err_free:
1068         put_uobj_write(uobj);
1069         return ret;
1070 }
1071
1072 ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
1073                            struct ib_device *ib_dev,
1074                            const char __user *buf, int in_len,
1075                            int out_len)
1076 {
1077         struct ib_uverbs_rereg_mr      cmd;
1078         struct ib_uverbs_rereg_mr_resp resp;
1079         struct ib_udata              udata;
1080         struct ib_pd                *pd = NULL;
1081         struct ib_mr                *mr;
1082         struct ib_pd                *old_pd;
1083         int                          ret;
1084         struct ib_uobject           *uobj;
1085
1086         if (out_len < sizeof(resp))
1087                 return -ENOSPC;
1088
1089         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1090                 return -EFAULT;
1091
1092         INIT_UDATA(&udata, buf + sizeof(cmd),
1093                    (unsigned long) cmd.response + sizeof(resp),
1094                    in_len - sizeof(cmd), out_len - sizeof(resp));
1095
1096         if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
1097                 return -EINVAL;
1098
1099         if ((cmd.flags & IB_MR_REREG_TRANS) &&
1100             (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
1101              (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
1102                         return -EINVAL;
1103
1104         uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
1105                               file->ucontext);
1106
1107         if (!uobj)
1108                 return -EINVAL;
1109
1110         mr = uobj->object;
1111
1112         if (cmd.flags & IB_MR_REREG_ACCESS) {
1113                 ret = ib_check_mr_access(cmd.access_flags);
1114                 if (ret)
1115                         goto put_uobjs;
1116         }
1117
1118         if (cmd.flags & IB_MR_REREG_PD) {
1119                 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1120                 if (!pd) {
1121                         ret = -EINVAL;
1122                         goto put_uobjs;
1123                 }
1124         }
1125
1126         old_pd = mr->pd;
1127         ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
1128                                         cmd.length, cmd.hca_va,
1129                                         cmd.access_flags, pd, &udata);
1130         if (!ret) {
1131                 if (cmd.flags & IB_MR_REREG_PD) {
1132                         atomic_inc(&pd->usecnt);
1133                         mr->pd = pd;
1134                         atomic_dec(&old_pd->usecnt);
1135                 }
1136         } else {
1137                 goto put_uobj_pd;
1138         }
1139
1140         memset(&resp, 0, sizeof(resp));
1141         resp.lkey      = mr->lkey;
1142         resp.rkey      = mr->rkey;
1143
1144         if (copy_to_user((void __user *)(unsigned long)cmd.response,
1145                          &resp, sizeof(resp)))
1146                 ret = -EFAULT;
1147         else
1148                 ret = in_len;
1149
1150 put_uobj_pd:
1151         if (cmd.flags & IB_MR_REREG_PD)
1152                 put_pd_read(pd);
1153
1154 put_uobjs:
1155
1156         put_uobj_write(mr->uobject);
1157
1158         return ret;
1159 }
1160
1161 ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
1162                            struct ib_device *ib_dev,
1163                            const char __user *buf, int in_len,
1164                            int out_len)
1165 {
1166         struct ib_uverbs_dereg_mr cmd;
1167         struct ib_mr             *mr;
1168         struct ib_uobject        *uobj;
1169         int                       ret = -EINVAL;
1170
1171         if (copy_from_user(&cmd, buf, sizeof cmd))
1172                 return -EFAULT;
1173
1174         uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
1175         if (!uobj)
1176                 return -EINVAL;
1177
1178         mr = uobj->object;
1179
1180         ret = ib_dereg_mr(mr);
1181         if (!ret)
1182                 uobj->live = 0;
1183
1184         put_uobj_write(uobj);
1185
1186         if (ret)
1187                 return ret;
1188
1189         idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
1190
1191         mutex_lock(&file->mutex);
1192         list_del(&uobj->list);
1193         mutex_unlock(&file->mutex);
1194
1195         put_uobj(uobj);
1196
1197         return in_len;
1198 }
1199
1200 ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
1201                            struct ib_device *ib_dev,
1202                            const char __user *buf, int in_len,
1203                            int out_len)
1204 {
1205         struct ib_uverbs_alloc_mw      cmd;
1206         struct ib_uverbs_alloc_mw_resp resp;
1207         struct ib_uobject             *uobj;
1208         struct ib_pd                  *pd;
1209         struct ib_mw                  *mw;
1210         struct ib_udata                udata;
1211         int                            ret;
1212
1213         if (out_len < sizeof(resp))
1214                 return -ENOSPC;
1215
1216         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1217                 return -EFAULT;
1218
1219         uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
1220         if (!uobj)
1221                 return -ENOMEM;
1222
1223         init_uobj(uobj, 0, file->ucontext, &mw_lock_class);
1224         down_write(&uobj->mutex);
1225
1226         pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1227         if (!pd) {
1228                 ret = -EINVAL;
1229                 goto err_free;
1230         }
1231
1232         INIT_UDATA(&udata, buf + sizeof(cmd),
1233                    (unsigned long)cmd.response + sizeof(resp),
1234                    in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
1235                    out_len - sizeof(resp));
1236
1237         mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
1238         if (IS_ERR(mw)) {
1239                 ret = PTR_ERR(mw);
1240                 goto err_put;
1241         }
1242
1243         mw->device  = pd->device;
1244         mw->pd      = pd;
1245         mw->uobject = uobj;
1246         atomic_inc(&pd->usecnt);
1247
1248         uobj->object = mw;
1249         ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj);
1250         if (ret)
1251                 goto err_unalloc;
1252
1253         memset(&resp, 0, sizeof(resp));
1254         resp.rkey      = mw->rkey;
1255         resp.mw_handle = uobj->id;
1256
1257         if (copy_to_user((void __user *)(unsigned long)cmd.response,
1258                          &resp, sizeof(resp))) {
1259                 ret = -EFAULT;
1260                 goto err_copy;
1261         }
1262
1263         put_pd_read(pd);
1264
1265         mutex_lock(&file->mutex);
1266         list_add_tail(&uobj->list, &file->ucontext->mw_list);
1267         mutex_unlock(&file->mutex);
1268
1269         uobj->live = 1;
1270
1271         up_write(&uobj->mutex);
1272
1273         return in_len;
1274
1275 err_copy:
1276         idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
1277
1278 err_unalloc:
1279         uverbs_dealloc_mw(mw);
1280
1281 err_put:
1282         put_pd_read(pd);
1283
1284 err_free:
1285         put_uobj_write(uobj);
1286         return ret;
1287 }
1288
1289 ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
1290                              struct ib_device *ib_dev,
1291                              const char __user *buf, int in_len,
1292                              int out_len)
1293 {
1294         struct ib_uverbs_dealloc_mw cmd;
1295         struct ib_mw               *mw;
1296         struct ib_uobject          *uobj;
1297         int                         ret = -EINVAL;
1298
1299         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1300                 return -EFAULT;
1301
1302         uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext);
1303         if (!uobj)
1304                 return -EINVAL;
1305
1306         mw = uobj->object;
1307
1308         ret = uverbs_dealloc_mw(mw);
1309         if (!ret)
1310                 uobj->live = 0;
1311
1312         put_uobj_write(uobj);
1313
1314         if (ret)
1315                 return ret;
1316
1317         idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
1318
1319         mutex_lock(&file->mutex);
1320         list_del(&uobj->list);
1321         mutex_unlock(&file->mutex);
1322
1323         put_uobj(uobj);
1324
1325         return in_len;
1326 }
1327
1328 ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
1329                                       struct ib_device *ib_dev,
1330                                       const char __user *buf, int in_len,
1331                                       int out_len)
1332 {
1333         struct ib_uverbs_create_comp_channel       cmd;
1334         struct ib_uverbs_create_comp_channel_resp  resp;
1335         struct file                               *filp;
1336         int ret;
1337
1338         if (out_len < sizeof resp)
1339                 return -ENOSPC;
1340
1341         if (copy_from_user(&cmd, buf, sizeof cmd))
1342                 return -EFAULT;
1343
1344         ret = get_unused_fd_flags(O_CLOEXEC);
1345         if (ret < 0)
1346                 return ret;
1347         resp.fd = ret;
1348
1349         filp = ib_uverbs_alloc_event_file(file, ib_dev, 0);
1350         if (IS_ERR(filp)) {
1351                 put_unused_fd(resp.fd);
1352                 return PTR_ERR(filp);
1353         }
1354
1355         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1356                          &resp, sizeof resp)) {
1357                 put_unused_fd(resp.fd);
1358                 fput(filp);
1359                 return -EFAULT;
1360         }
1361
1362         fd_install(resp.fd, filp);
1363         return in_len;
1364 }
1365
1366 static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
1367                                         struct ib_device *ib_dev,
1368                                        struct ib_udata *ucore,
1369                                        struct ib_udata *uhw,
1370                                        struct ib_uverbs_ex_create_cq *cmd,
1371                                        size_t cmd_sz,
1372                                        int (*cb)(struct ib_uverbs_file *file,
1373                                                  struct ib_ucq_object *obj,
1374                                                  struct ib_uverbs_ex_create_cq_resp *resp,
1375                                                  struct ib_udata *udata,
1376                                                  void *context),
1377                                        void *context)
1378 {
1379         struct ib_ucq_object           *obj;
1380         struct ib_uverbs_event_file    *ev_file = NULL;
1381         struct ib_cq                   *cq;
1382         int                             ret;
1383         struct ib_uverbs_ex_create_cq_resp resp;
1384         struct ib_cq_init_attr attr = {};
1385
1386         if (cmd->comp_vector >= file->device->num_comp_vectors)
1387                 return ERR_PTR(-EINVAL);
1388
1389         obj = kmalloc(sizeof *obj, GFP_KERNEL);
1390         if (!obj)
1391                 return ERR_PTR(-ENOMEM);
1392
1393         init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class);
1394         down_write(&obj->uobject.mutex);
1395
1396         if (cmd->comp_channel >= 0) {
1397                 ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel);
1398                 if (!ev_file) {
1399                         ret = -EINVAL;
1400                         goto err;
1401                 }
1402         }
1403
1404         obj->uverbs_file           = file;
1405         obj->comp_events_reported  = 0;
1406         obj->async_events_reported = 0;
1407         INIT_LIST_HEAD(&obj->comp_list);
1408         INIT_LIST_HEAD(&obj->async_list);
1409
1410         attr.cqe = cmd->cqe;
1411         attr.comp_vector = cmd->comp_vector;
1412
1413         if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
1414                 attr.flags = cmd->flags;
1415
1416         cq = ib_dev->create_cq(ib_dev, &attr,
1417                                              file->ucontext, uhw);
1418         if (IS_ERR(cq)) {
1419                 ret = PTR_ERR(cq);
1420                 goto err_file;
1421         }
1422
1423         cq->device        = ib_dev;
1424         cq->uobject       = &obj->uobject;
1425         cq->comp_handler  = ib_uverbs_comp_handler;
1426         cq->event_handler = ib_uverbs_cq_event_handler;
1427         cq->cq_context    = ev_file;
1428         atomic_set(&cq->usecnt, 0);
1429
1430         obj->uobject.object = cq;
1431         ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject);
1432         if (ret)
1433                 goto err_free;
1434
1435         memset(&resp, 0, sizeof resp);
1436         resp.base.cq_handle = obj->uobject.id;
1437         resp.base.cqe       = cq->cqe;
1438
1439         resp.response_length = offsetof(typeof(resp), response_length) +
1440                 sizeof(resp.response_length);
1441
1442         ret = cb(file, obj, &resp, ucore, context);
1443         if (ret)
1444                 goto err_cb;
1445
1446         mutex_lock(&file->mutex);
1447         list_add_tail(&obj->uobject.list, &file->ucontext->cq_list);
1448         mutex_unlock(&file->mutex);
1449
1450         obj->uobject.live = 1;
1451
1452         up_write(&obj->uobject.mutex);
1453
1454         return obj;
1455
1456 err_cb:
1457         idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
1458
1459 err_free:
1460         ib_destroy_cq(cq);
1461
1462 err_file:
1463         if (ev_file)
1464                 ib_uverbs_release_ucq(file, ev_file, obj);
1465
1466 err:
1467         put_uobj_write(&obj->uobject);
1468
1469         return ERR_PTR(ret);
1470 }
1471
1472 static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file,
1473                                   struct ib_ucq_object *obj,
1474                                   struct ib_uverbs_ex_create_cq_resp *resp,
1475                                   struct ib_udata *ucore, void *context)
1476 {
1477         if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
1478                 return -EFAULT;
1479
1480         return 0;
1481 }
1482
1483 ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
1484                             struct ib_device *ib_dev,
1485                             const char __user *buf, int in_len,
1486                             int out_len)
1487 {
1488         struct ib_uverbs_create_cq      cmd;
1489         struct ib_uverbs_ex_create_cq   cmd_ex;
1490         struct ib_uverbs_create_cq_resp resp;
1491         struct ib_udata                 ucore;
1492         struct ib_udata                 uhw;
1493         struct ib_ucq_object           *obj;
1494
1495         if (out_len < sizeof(resp))
1496                 return -ENOSPC;
1497
1498         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1499                 return -EFAULT;
1500
1501         INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp));
1502
1503         INIT_UDATA(&uhw, buf + sizeof(cmd),
1504                    (unsigned long)cmd.response + sizeof(resp),
1505                    in_len - sizeof(cmd), out_len - sizeof(resp));
1506
1507         memset(&cmd_ex, 0, sizeof(cmd_ex));
1508         cmd_ex.user_handle = cmd.user_handle;
1509         cmd_ex.cqe = cmd.cqe;
1510         cmd_ex.comp_vector = cmd.comp_vector;
1511         cmd_ex.comp_channel = cmd.comp_channel;
1512
1513         obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex,
1514                         offsetof(typeof(cmd_ex), comp_channel) +
1515                         sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
1516                         NULL);
1517
1518         if (IS_ERR(obj))
1519                 return PTR_ERR(obj);
1520
1521         return in_len;
1522 }
1523
1524 static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file,
1525                                      struct ib_ucq_object *obj,
1526                                      struct ib_uverbs_ex_create_cq_resp *resp,
1527                                      struct ib_udata *ucore, void *context)
1528 {
1529         if (ib_copy_to_udata(ucore, resp, resp->response_length))
1530                 return -EFAULT;
1531
1532         return 0;
1533 }
1534
1535 int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
1536                          struct ib_device *ib_dev,
1537                            struct ib_udata *ucore,
1538                            struct ib_udata *uhw)
1539 {
1540         struct ib_uverbs_ex_create_cq_resp resp;
1541         struct ib_uverbs_ex_create_cq  cmd;
1542         struct ib_ucq_object           *obj;
1543         int err;
1544
1545         if (ucore->inlen < sizeof(cmd))
1546                 return -EINVAL;
1547
1548         err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
1549         if (err)
1550                 return err;
1551
1552         if (cmd.comp_mask)
1553                 return -EINVAL;
1554
1555         if (cmd.reserved)
1556                 return -EINVAL;
1557
1558         if (ucore->outlen < (offsetof(typeof(resp), response_length) +
1559                              sizeof(resp.response_length)))
1560                 return -ENOSPC;
1561
1562         obj = create_cq(file, ib_dev, ucore, uhw, &cmd,
1563                         min(ucore->inlen, sizeof(cmd)),
1564                         ib_uverbs_ex_create_cq_cb, NULL);
1565
1566         if (IS_ERR(obj))
1567                 return PTR_ERR(obj);
1568
1569         return 0;
1570 }
1571
1572 ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
1573                             struct ib_device *ib_dev,
1574                             const char __user *buf, int in_len,
1575                             int out_len)
1576 {
1577         struct ib_uverbs_resize_cq      cmd;
1578         struct ib_uverbs_resize_cq_resp resp;
1579         struct ib_udata                 udata;
1580         struct ib_cq                    *cq;
1581         int                             ret = -EINVAL;
1582
1583         if (copy_from_user(&cmd, buf, sizeof cmd))
1584                 return -EFAULT;
1585
1586         INIT_UDATA(&udata, buf + sizeof cmd,
1587                    (unsigned long) cmd.response + sizeof resp,
1588                    in_len - sizeof cmd, out_len - sizeof resp);
1589
1590         cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
1591         if (!cq)
1592                 return -EINVAL;
1593
1594         ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
1595         if (ret)
1596                 goto out;
1597
1598         resp.cqe = cq->cqe;
1599
1600         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1601                          &resp, sizeof resp.cqe))
1602                 ret = -EFAULT;
1603
1604 out:
1605         put_cq_read(cq);
1606
1607         return ret ? ret : in_len;
1608 }
1609
1610 static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
1611 {
1612         struct ib_uverbs_wc tmp;
1613
1614         tmp.wr_id               = wc->wr_id;
1615         tmp.status              = wc->status;
1616         tmp.opcode              = wc->opcode;
1617         tmp.vendor_err          = wc->vendor_err;
1618         tmp.byte_len            = wc->byte_len;
1619         tmp.ex.imm_data         = (__u32 __force) wc->ex.imm_data;
1620         tmp.qp_num              = wc->qp->qp_num;
1621         tmp.src_qp              = wc->src_qp;
1622         tmp.wc_flags            = wc->wc_flags;
1623         tmp.pkey_index          = wc->pkey_index;
1624         tmp.slid                = wc->slid;
1625         tmp.sl                  = wc->sl;
1626         tmp.dlid_path_bits      = wc->dlid_path_bits;
1627         tmp.port_num            = wc->port_num;
1628         tmp.reserved            = 0;
1629
1630         if (copy_to_user(dest, &tmp, sizeof tmp))
1631                 return -EFAULT;
1632
1633         return 0;
1634 }
1635
1636 ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
1637                           struct ib_device *ib_dev,
1638                           const char __user *buf, int in_len,
1639                           int out_len)
1640 {
1641         struct ib_uverbs_poll_cq       cmd;
1642         struct ib_uverbs_poll_cq_resp  resp;
1643         u8 __user                     *header_ptr;
1644         u8 __user                     *data_ptr;
1645         struct ib_cq                  *cq;
1646         struct ib_wc                   wc;
1647         int                            ret;
1648
1649         if (copy_from_user(&cmd, buf, sizeof cmd))
1650                 return -EFAULT;
1651
1652         cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
1653         if (!cq)
1654                 return -EINVAL;
1655
1656         /* we copy a struct ib_uverbs_poll_cq_resp to user space */
1657         header_ptr = (void __user *)(unsigned long) cmd.response;
1658         data_ptr = header_ptr + sizeof resp;
1659
1660         memset(&resp, 0, sizeof resp);
1661         while (resp.count < cmd.ne) {
1662                 ret = ib_poll_cq(cq, 1, &wc);
1663                 if (ret < 0)
1664                         goto out_put;
1665                 if (!ret)
1666                         break;
1667
1668                 ret = copy_wc_to_user(data_ptr, &wc);
1669                 if (ret)
1670                         goto out_put;
1671
1672                 data_ptr += sizeof(struct ib_uverbs_wc);
1673                 ++resp.count;
1674         }
1675
1676         if (copy_to_user(header_ptr, &resp, sizeof resp)) {
1677                 ret = -EFAULT;
1678                 goto out_put;
1679         }
1680
1681         ret = in_len;
1682
1683 out_put:
1684         put_cq_read(cq);
1685         return ret;
1686 }
1687
1688 ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
1689                                 struct ib_device *ib_dev,
1690                                 const char __user *buf, int in_len,
1691                                 int out_len)
1692 {
1693         struct ib_uverbs_req_notify_cq cmd;
1694         struct ib_cq                  *cq;
1695         int retval;
1696
1697         if (copy_from_user(&cmd, buf, sizeof cmd))
1698                 return -EFAULT;
1699
1700         cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
1701         if (!cq)
1702                 return -EINVAL;
1703
1704         if (ib_req_notify_cq(cq, cmd.solicited_only ?
1705                              IB_CQ_SOLICITED : IB_CQ_NEXT_COMP) < 0)
1706                 retval = -ENXIO;
1707         else
1708                 retval = in_len;
1709
1710         put_cq_read(cq);
1711
1712         return retval;
1713 }
1714
1715 ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
1716                              struct ib_device *ib_dev,
1717                              const char __user *buf, int in_len,
1718                              int out_len)
1719 {
1720         struct ib_uverbs_destroy_cq      cmd;
1721         struct ib_uverbs_destroy_cq_resp resp;
1722         struct ib_uobject               *uobj;
1723         struct ib_cq                    *cq;
1724         struct ib_ucq_object            *obj;
1725         struct ib_uverbs_event_file     *ev_file;
1726         int                              ret = -EINVAL;
1727
1728         if (copy_from_user(&cmd, buf, sizeof cmd))
1729                 return -EFAULT;
1730
1731         uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
1732         if (!uobj)
1733                 return -EINVAL;
1734         cq      = uobj->object;
1735         ev_file = cq->cq_context;
1736         obj     = container_of(cq->uobject, struct ib_ucq_object, uobject);
1737
1738         ret = ib_destroy_cq(cq);
1739         if (!ret)
1740                 uobj->live = 0;
1741
1742         put_uobj_write(uobj);
1743
1744         if (ret)
1745                 return ret;
1746
1747         idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
1748
1749         mutex_lock(&file->mutex);
1750         list_del(&uobj->list);
1751         mutex_unlock(&file->mutex);
1752
1753         ib_uverbs_release_ucq(file, ev_file, obj);
1754
1755         memset(&resp, 0, sizeof resp);
1756         resp.comp_events_reported  = obj->comp_events_reported;
1757         resp.async_events_reported = obj->async_events_reported;
1758
1759         put_uobj(uobj);
1760
1761         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1762                          &resp, sizeof resp))
1763                 return -EFAULT;
1764
1765         return in_len;
1766 }
1767
1768 static int create_qp(struct ib_uverbs_file *file,
1769                      struct ib_udata *ucore,
1770                      struct ib_udata *uhw,
1771                      struct ib_uverbs_ex_create_qp *cmd,
1772                      size_t cmd_sz,
1773                      int (*cb)(struct ib_uverbs_file *file,
1774                                struct ib_uverbs_ex_create_qp_resp *resp,
1775                                struct ib_udata *udata),
1776                      void *context)
1777 {
1778         struct ib_uqp_object            *obj;
1779         struct ib_device                *device;
1780         struct ib_pd                    *pd = NULL;
1781         struct ib_xrcd                  *xrcd = NULL;
1782         struct ib_uobject               *uninitialized_var(xrcd_uobj);
1783         struct ib_cq                    *scq = NULL, *rcq = NULL;
1784         struct ib_srq                   *srq = NULL;
1785         struct ib_qp                    *qp;
1786         char                            *buf;
1787         struct ib_qp_init_attr          attr = {};
1788         struct ib_uverbs_ex_create_qp_resp resp;
1789         int                             ret;
1790         struct ib_rwq_ind_table *ind_tbl = NULL;
1791         bool has_sq = true;
1792
1793         if (cmd->qp_type == IB_QPT_RAW_PACKET && priv_check(curthread, PRIV_NET_RAW) != 0)
1794                 return -EPERM;
1795
1796         obj = kzalloc(sizeof *obj, GFP_KERNEL);
1797         if (!obj)
1798                 return -ENOMEM;
1799
1800         init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
1801                   &qp_lock_class);
1802         mutex_init(&obj->mcast_lock);
1803         down_write(&obj->uevent.uobject.mutex);
1804         if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
1805                       sizeof(cmd->rwq_ind_tbl_handle) &&
1806                       (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
1807                 ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle,
1808                                                          file->ucontext);
1809                 if (!ind_tbl) {
1810                         ret = -EINVAL;
1811                         goto err_put;
1812                 }
1813
1814                 attr.rwq_ind_tbl = ind_tbl;
1815         }
1816
1817         if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) +
1818                        sizeof(cmd->reserved1)) && cmd->reserved1) {
1819                 ret = -EOPNOTSUPP;
1820                 goto err_put;
1821         }
1822
1823         if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) {
1824                 ret = -EINVAL;
1825                 goto err_put;
1826         }
1827
1828         if (ind_tbl && !cmd->max_send_wr)
1829                 has_sq = false;
1830
1831         if (cmd->qp_type == IB_QPT_XRC_TGT) {
1832                 xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
1833                                      &xrcd_uobj);
1834                 if (!xrcd) {
1835                         ret = -EINVAL;
1836                         goto err_put;
1837                 }
1838                 device = xrcd->device;
1839         } else {
1840                 if (cmd->qp_type == IB_QPT_XRC_INI) {
1841                         cmd->max_recv_wr = 0;
1842                         cmd->max_recv_sge = 0;
1843                 } else {
1844                         if (cmd->is_srq) {
1845                                 srq = idr_read_srq(cmd->srq_handle,
1846                                                    file->ucontext);
1847                                 if (!srq || srq->srq_type != IB_SRQT_BASIC) {
1848                                         ret = -EINVAL;
1849                                         goto err_put;
1850                                 }
1851                         }
1852
1853                         if (!ind_tbl) {
1854                                 if (cmd->recv_cq_handle != cmd->send_cq_handle) {
1855                                         rcq = idr_read_cq(cmd->recv_cq_handle,
1856                                                           file->ucontext, 0);
1857                                         if (!rcq) {
1858                                                 ret = -EINVAL;
1859                                                 goto err_put;
1860                                         }
1861                                 }
1862                         }
1863                 }
1864
1865                 if (has_sq)
1866                         scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
1867                 if (!ind_tbl)
1868                         rcq = rcq ?: scq;
1869                 pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
1870                 if (!pd || (!scq && has_sq)) {
1871                         ret = -EINVAL;
1872                         goto err_put;
1873                 }
1874
1875                 device = pd->device;
1876         }
1877
1878         attr.event_handler = ib_uverbs_qp_event_handler;
1879         attr.qp_context    = file;
1880         attr.send_cq       = scq;
1881         attr.recv_cq       = rcq;
1882         attr.srq           = srq;
1883         attr.xrcd          = xrcd;
1884         attr.sq_sig_type   = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
1885                                               IB_SIGNAL_REQ_WR;
1886         attr.qp_type       = cmd->qp_type;
1887         attr.create_flags  = 0;
1888
1889         attr.cap.max_send_wr     = cmd->max_send_wr;
1890         attr.cap.max_recv_wr     = cmd->max_recv_wr;
1891         attr.cap.max_send_sge    = cmd->max_send_sge;
1892         attr.cap.max_recv_sge    = cmd->max_recv_sge;
1893         attr.cap.max_inline_data = cmd->max_inline_data;
1894
1895         obj->uevent.events_reported     = 0;
1896         INIT_LIST_HEAD(&obj->uevent.event_list);
1897         INIT_LIST_HEAD(&obj->mcast_list);
1898
1899         if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
1900                       sizeof(cmd->create_flags))
1901                 attr.create_flags = cmd->create_flags;
1902
1903         if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
1904                                 IB_QP_CREATE_CROSS_CHANNEL |
1905                                 IB_QP_CREATE_MANAGED_SEND |
1906                                 IB_QP_CREATE_MANAGED_RECV |
1907                                 IB_QP_CREATE_SCATTER_FCS)) {
1908                 ret = -EINVAL;
1909                 goto err_put;
1910         }
1911
1912         buf = (char *)cmd + sizeof(*cmd);
1913         if (cmd_sz > sizeof(*cmd))
1914                 if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
1915                                              cmd_sz - sizeof(*cmd) - 1))) {
1916                         ret = -EINVAL;
1917                         goto err_put;
1918                 }
1919
1920         if (cmd->qp_type == IB_QPT_XRC_TGT)
1921                 qp = ib_create_qp(pd, &attr);
1922         else
1923                 qp = device->create_qp(pd, &attr, uhw);
1924
1925         if (IS_ERR(qp)) {
1926                 ret = PTR_ERR(qp);
1927                 goto err_put;
1928         }
1929
1930         if (cmd->qp_type != IB_QPT_XRC_TGT) {
1931                 qp->real_qp       = qp;
1932                 qp->device        = device;
1933                 qp->pd            = pd;
1934                 qp->send_cq       = attr.send_cq;
1935                 qp->recv_cq       = attr.recv_cq;
1936                 qp->srq           = attr.srq;
1937                 qp->rwq_ind_tbl   = ind_tbl;
1938                 qp->event_handler = attr.event_handler;
1939                 qp->qp_context    = attr.qp_context;
1940                 qp->qp_type       = attr.qp_type;
1941                 atomic_set(&qp->usecnt, 0);
1942                 atomic_inc(&pd->usecnt);
1943                 if (attr.send_cq)
1944                         atomic_inc(&attr.send_cq->usecnt);
1945                 if (attr.recv_cq)
1946                         atomic_inc(&attr.recv_cq->usecnt);
1947                 if (attr.srq)
1948                         atomic_inc(&attr.srq->usecnt);
1949                 if (ind_tbl)
1950                         atomic_inc(&ind_tbl->usecnt);
1951         } else {
1952                 /* It is done in _ib_create_qp for other QP types */
1953                 qp->uobject = &obj->uevent.uobject;
1954         }
1955         qp->uobject = &obj->uevent.uobject;
1956
1957         obj->uevent.uobject.object = qp;
1958         ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
1959         if (ret)
1960                 goto err_destroy;
1961
1962         memset(&resp, 0, sizeof resp);
1963         resp.base.qpn             = qp->qp_num;
1964         resp.base.qp_handle       = obj->uevent.uobject.id;
1965         resp.base.max_recv_sge    = attr.cap.max_recv_sge;
1966         resp.base.max_send_sge    = attr.cap.max_send_sge;
1967         resp.base.max_recv_wr     = attr.cap.max_recv_wr;
1968         resp.base.max_send_wr     = attr.cap.max_send_wr;
1969         resp.base.max_inline_data = attr.cap.max_inline_data;
1970
1971         resp.response_length = offsetof(typeof(resp), response_length) +
1972                                sizeof(resp.response_length);
1973
1974         ret = cb(file, &resp, ucore);
1975         if (ret)
1976                 goto err_cb;
1977
1978         if (xrcd) {
1979                 obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
1980                                           uobject);
1981                 atomic_inc(&obj->uxrcd->refcnt);
1982                 put_xrcd_read(xrcd_uobj);
1983         }
1984
1985         if (pd)
1986                 put_pd_read(pd);
1987         if (scq)
1988                 put_cq_read(scq);
1989         if (rcq && rcq != scq)
1990                 put_cq_read(rcq);
1991         if (srq)
1992                 put_srq_read(srq);
1993         if (ind_tbl)
1994                 put_rwq_indirection_table_read(ind_tbl);
1995
1996         mutex_lock(&file->mutex);
1997         list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
1998         mutex_unlock(&file->mutex);
1999
2000         obj->uevent.uobject.live = 1;
2001
2002         up_write(&obj->uevent.uobject.mutex);
2003
2004         return 0;
2005 err_cb:
2006         idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
2007
2008 err_destroy:
2009         ib_destroy_qp(qp);
2010
2011 err_put:
2012         if (xrcd)
2013                 put_xrcd_read(xrcd_uobj);
2014         if (pd)
2015                 put_pd_read(pd);
2016         if (scq)
2017                 put_cq_read(scq);
2018         if (rcq && rcq != scq)
2019                 put_cq_read(rcq);
2020         if (srq)
2021                 put_srq_read(srq);
2022         if (ind_tbl)
2023                 put_rwq_indirection_table_read(ind_tbl);
2024
2025         put_uobj_write(&obj->uevent.uobject);
2026         return ret;
2027 }
2028
2029 static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
2030                                   struct ib_uverbs_ex_create_qp_resp *resp,
2031                                   struct ib_udata *ucore)
2032 {
2033         if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
2034                 return -EFAULT;
2035
2036         return 0;
2037 }
2038
2039 ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
2040                             struct ib_device *ib_dev,
2041                             const char __user *buf, int in_len,
2042                             int out_len)
2043 {
2044         struct ib_uverbs_create_qp      cmd;
2045         struct ib_uverbs_ex_create_qp   cmd_ex;
2046         struct ib_udata                 ucore;
2047         struct ib_udata                 uhw;
2048         ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
2049         int                             err;
2050
2051         if (out_len < resp_size)
2052                 return -ENOSPC;
2053
2054         if (copy_from_user(&cmd, buf, sizeof(cmd)))
2055                 return -EFAULT;
2056
2057         INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd),
2058                    resp_size);
2059         INIT_UDATA(&uhw, buf + sizeof(cmd),
2060                    (unsigned long)cmd.response + resp_size,
2061                    in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
2062                    out_len - resp_size);
2063
2064         memset(&cmd_ex, 0, sizeof(cmd_ex));
2065         cmd_ex.user_handle = cmd.user_handle;
2066         cmd_ex.pd_handle = cmd.pd_handle;
2067         cmd_ex.send_cq_handle = cmd.send_cq_handle;
2068         cmd_ex.recv_cq_handle = cmd.recv_cq_handle;
2069         cmd_ex.srq_handle = cmd.srq_handle;
2070         cmd_ex.max_send_wr = cmd.max_send_wr;
2071         cmd_ex.max_recv_wr = cmd.max_recv_wr;
2072         cmd_ex.max_send_sge = cmd.max_send_sge;
2073         cmd_ex.max_recv_sge = cmd.max_recv_sge;
2074         cmd_ex.max_inline_data = cmd.max_inline_data;
2075         cmd_ex.sq_sig_all = cmd.sq_sig_all;
2076         cmd_ex.qp_type = cmd.qp_type;
2077         cmd_ex.is_srq = cmd.is_srq;
2078
2079         err = create_qp(file, &ucore, &uhw, &cmd_ex,
2080                         offsetof(typeof(cmd_ex), is_srq) +
2081                         sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
2082                         NULL);
2083
2084         if (err)
2085                 return err;
2086
2087         return in_len;
2088 }
2089
2090 static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
2091                                      struct ib_uverbs_ex_create_qp_resp *resp,
2092                                      struct ib_udata *ucore)
2093 {
2094         if (ib_copy_to_udata(ucore, resp, resp->response_length))
2095                 return -EFAULT;
2096
2097         return 0;
2098 }
2099
2100 int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
2101                            struct ib_device *ib_dev,
2102                            struct ib_udata *ucore,
2103                            struct ib_udata *uhw)
2104 {
2105         struct ib_uverbs_ex_create_qp_resp resp;
2106         struct ib_uverbs_ex_create_qp cmd = {0};
2107         int err;
2108
2109         if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
2110                             sizeof(cmd.comp_mask)))
2111                 return -EINVAL;
2112
2113         err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
2114         if (err)
2115                 return err;
2116
2117         if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK)
2118                 return -EINVAL;
2119
2120         if (cmd.reserved)
2121                 return -EINVAL;
2122
2123         if (ucore->outlen < (offsetof(typeof(resp), response_length) +
2124                              sizeof(resp.response_length)))
2125                 return -ENOSPC;
2126
2127         err = create_qp(file, ucore, uhw, &cmd,
2128                         min(ucore->inlen, sizeof(cmd)),
2129                         ib_uverbs_ex_create_qp_cb, NULL);
2130
2131         if (err)
2132                 return err;
2133
2134         return 0;
2135 }
2136
2137 ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
2138                           struct ib_device *ib_dev,
2139                           const char __user *buf, int in_len, int out_len)
2140 {
2141         struct ib_uverbs_open_qp        cmd;
2142         struct ib_uverbs_create_qp_resp resp;
2143         struct ib_udata                 udata;
2144         struct ib_uqp_object           *obj;
2145         struct ib_xrcd                 *xrcd;
2146         struct ib_uobject              *uninitialized_var(xrcd_uobj);
2147         struct ib_qp                   *qp;
2148         struct ib_qp_open_attr          attr;
2149         int ret;
2150
2151         if (out_len < sizeof resp)
2152                 return -ENOSPC;
2153
2154         if (copy_from_user(&cmd, buf, sizeof cmd))
2155                 return -EFAULT;
2156
2157         INIT_UDATA(&udata, buf + sizeof cmd,
2158                    (unsigned long) cmd.response + sizeof resp,
2159                    in_len - sizeof cmd, out_len - sizeof resp);
2160
2161         obj = kmalloc(sizeof *obj, GFP_KERNEL);
2162         if (!obj)
2163                 return -ENOMEM;
2164
2165         init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
2166         down_write(&obj->uevent.uobject.mutex);
2167
2168         xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
2169         if (!xrcd) {
2170                 ret = -EINVAL;
2171                 goto err_put;
2172         }
2173
2174         attr.event_handler = ib_uverbs_qp_event_handler;
2175         attr.qp_context    = file;
2176         attr.qp_num        = cmd.qpn;
2177         attr.qp_type       = cmd.qp_type;
2178
2179         obj->uevent.events_reported = 0;
2180         INIT_LIST_HEAD(&obj->uevent.event_list);
2181         INIT_LIST_HEAD(&obj->mcast_list);
2182
2183         qp = ib_open_qp(xrcd, &attr);
2184         if (IS_ERR(qp)) {
2185                 ret = PTR_ERR(qp);
2186                 goto err_put;
2187         }
2188
2189         qp->uobject = &obj->uevent.uobject;
2190
2191         obj->uevent.uobject.object = qp;
2192         ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
2193         if (ret)
2194                 goto err_destroy;
2195
2196         memset(&resp, 0, sizeof resp);
2197         resp.qpn       = qp->qp_num;
2198         resp.qp_handle = obj->uevent.uobject.id;
2199
2200         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2201                          &resp, sizeof resp)) {
2202                 ret = -EFAULT;
2203                 goto err_remove;
2204         }
2205
2206         obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
2207         atomic_inc(&obj->uxrcd->refcnt);
2208         put_xrcd_read(xrcd_uobj);
2209
2210         mutex_lock(&file->mutex);
2211         list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
2212         mutex_unlock(&file->mutex);
2213
2214         obj->uevent.uobject.live = 1;
2215
2216         up_write(&obj->uevent.uobject.mutex);
2217
2218         return in_len;
2219
2220 err_remove:
2221         idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
2222
2223 err_destroy:
2224         ib_destroy_qp(qp);
2225
2226 err_put:
2227         put_xrcd_read(xrcd_uobj);
2228         put_uobj_write(&obj->uevent.uobject);
2229         return ret;
2230 }
2231
2232 ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
2233                            struct ib_device *ib_dev,
2234                            const char __user *buf, int in_len,
2235                            int out_len)
2236 {
2237         struct ib_uverbs_query_qp      cmd;
2238         struct ib_uverbs_query_qp_resp resp;
2239         struct ib_qp                   *qp;
2240         struct ib_qp_attr              *attr;
2241         struct ib_qp_init_attr         *init_attr;
2242         int                            ret;
2243
2244         if (copy_from_user(&cmd, buf, sizeof cmd))
2245                 return -EFAULT;
2246
2247         attr      = kmalloc(sizeof *attr, GFP_KERNEL);
2248         init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
2249         if (!attr || !init_attr) {
2250                 ret = -ENOMEM;
2251                 goto out;
2252         }
2253
2254         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2255         if (!qp) {
2256                 ret = -EINVAL;
2257                 goto out;
2258         }
2259
2260         ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
2261
2262         put_qp_read(qp);
2263
2264         if (ret)
2265                 goto out;
2266
2267         memset(&resp, 0, sizeof resp);
2268
2269         resp.qp_state               = attr->qp_state;
2270         resp.cur_qp_state           = attr->cur_qp_state;
2271         resp.path_mtu               = attr->path_mtu;
2272         resp.path_mig_state         = attr->path_mig_state;
2273         resp.qkey                   = attr->qkey;
2274         resp.rq_psn                 = attr->rq_psn;
2275         resp.sq_psn                 = attr->sq_psn;
2276         resp.dest_qp_num            = attr->dest_qp_num;
2277         resp.qp_access_flags        = attr->qp_access_flags;
2278         resp.pkey_index             = attr->pkey_index;
2279         resp.alt_pkey_index         = attr->alt_pkey_index;
2280         resp.sq_draining            = attr->sq_draining;
2281         resp.max_rd_atomic          = attr->max_rd_atomic;
2282         resp.max_dest_rd_atomic     = attr->max_dest_rd_atomic;
2283         resp.min_rnr_timer          = attr->min_rnr_timer;
2284         resp.port_num               = attr->port_num;
2285         resp.timeout                = attr->timeout;
2286         resp.retry_cnt              = attr->retry_cnt;
2287         resp.rnr_retry              = attr->rnr_retry;
2288         resp.alt_port_num           = attr->alt_port_num;
2289         resp.alt_timeout            = attr->alt_timeout;
2290
2291         memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
2292         resp.dest.flow_label        = attr->ah_attr.grh.flow_label;
2293         resp.dest.sgid_index        = attr->ah_attr.grh.sgid_index;
2294         resp.dest.hop_limit         = attr->ah_attr.grh.hop_limit;
2295         resp.dest.traffic_class     = attr->ah_attr.grh.traffic_class;
2296         resp.dest.dlid              = attr->ah_attr.dlid;
2297         resp.dest.sl                = attr->ah_attr.sl;
2298         resp.dest.src_path_bits     = attr->ah_attr.src_path_bits;
2299         resp.dest.static_rate       = attr->ah_attr.static_rate;
2300         resp.dest.is_global         = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
2301         resp.dest.port_num          = attr->ah_attr.port_num;
2302
2303         memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
2304         resp.alt_dest.flow_label    = attr->alt_ah_attr.grh.flow_label;
2305         resp.alt_dest.sgid_index    = attr->alt_ah_attr.grh.sgid_index;
2306         resp.alt_dest.hop_limit     = attr->alt_ah_attr.grh.hop_limit;
2307         resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
2308         resp.alt_dest.dlid          = attr->alt_ah_attr.dlid;
2309         resp.alt_dest.sl            = attr->alt_ah_attr.sl;
2310         resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
2311         resp.alt_dest.static_rate   = attr->alt_ah_attr.static_rate;
2312         resp.alt_dest.is_global     = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
2313         resp.alt_dest.port_num      = attr->alt_ah_attr.port_num;
2314
2315         resp.max_send_wr            = init_attr->cap.max_send_wr;
2316         resp.max_recv_wr            = init_attr->cap.max_recv_wr;
2317         resp.max_send_sge           = init_attr->cap.max_send_sge;
2318         resp.max_recv_sge           = init_attr->cap.max_recv_sge;
2319         resp.max_inline_data        = init_attr->cap.max_inline_data;
2320         resp.sq_sig_all             = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
2321
2322         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2323                          &resp, sizeof resp))
2324                 ret = -EFAULT;
2325
2326 out:
2327         kfree(attr);
2328         kfree(init_attr);
2329
2330         return ret ? ret : in_len;
2331 }
2332
2333 /* Remove ignored fields set in the attribute mask */
2334 static int modify_qp_mask(enum ib_qp_type qp_type, int mask)
2335 {
2336         switch (qp_type) {
2337         case IB_QPT_XRC_INI:
2338                 return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER);
2339         case IB_QPT_XRC_TGT:
2340                 return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT |
2341                                 IB_QP_RNR_RETRY);
2342         default:
2343                 return mask;
2344         }
2345 }
2346
2347 ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
2348                             struct ib_device *ib_dev,
2349                             const char __user *buf, int in_len,
2350                             int out_len)
2351 {
2352         struct ib_uverbs_modify_qp cmd;
2353         struct ib_udata            udata;
2354         struct ib_qp              *qp;
2355         struct ib_qp_attr         *attr;
2356         int                        ret;
2357
2358         if (copy_from_user(&cmd, buf, sizeof cmd))
2359                 return -EFAULT;
2360
2361         INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
2362                    out_len);
2363
2364         attr = kmalloc(sizeof *attr, GFP_KERNEL);
2365         if (!attr)
2366                 return -ENOMEM;
2367
2368         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2369         if (!qp) {
2370                 ret = -EINVAL;
2371                 goto out;
2372         }
2373
2374         if ((cmd.attr_mask & IB_QP_PORT) &&
2375             !rdma_is_port_valid(qp->device, cmd.port_num)) {
2376                 ret = -EINVAL;
2377                 goto release_qp;
2378         }
2379
2380         if ((cmd.attr_mask & IB_QP_AV) &&
2381             !rdma_is_port_valid(qp->device, cmd.dest.port_num)) {
2382                 ret = -EINVAL;
2383                 goto release_qp;
2384         }
2385
2386         if ((cmd.attr_mask & IB_QP_ALT_PATH) &&
2387             (!rdma_is_port_valid(qp->device, cmd.alt_port_num) ||
2388             !rdma_is_port_valid(qp->device, cmd.alt_dest.port_num))) {
2389                 ret = -EINVAL;
2390                 goto release_qp;
2391         }
2392
2393         attr->qp_state            = cmd.qp_state;
2394         attr->cur_qp_state        = cmd.cur_qp_state;
2395         attr->path_mtu            = cmd.path_mtu;
2396         attr->path_mig_state      = cmd.path_mig_state;
2397         attr->qkey                = cmd.qkey;
2398         attr->rq_psn              = cmd.rq_psn;
2399         attr->sq_psn              = cmd.sq_psn;
2400         attr->dest_qp_num         = cmd.dest_qp_num;
2401         attr->qp_access_flags     = cmd.qp_access_flags;
2402         attr->pkey_index          = cmd.pkey_index;
2403         attr->alt_pkey_index      = cmd.alt_pkey_index;
2404         attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
2405         attr->max_rd_atomic       = cmd.max_rd_atomic;
2406         attr->max_dest_rd_atomic  = cmd.max_dest_rd_atomic;
2407         attr->min_rnr_timer       = cmd.min_rnr_timer;
2408         attr->port_num            = cmd.port_num;
2409         attr->timeout             = cmd.timeout;
2410         attr->retry_cnt           = cmd.retry_cnt;
2411         attr->rnr_retry           = cmd.rnr_retry;
2412         attr->alt_port_num        = cmd.alt_port_num;
2413         attr->alt_timeout         = cmd.alt_timeout;
2414
2415         memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
2416         attr->ah_attr.grh.flow_label        = cmd.dest.flow_label;
2417         attr->ah_attr.grh.sgid_index        = cmd.dest.sgid_index;
2418         attr->ah_attr.grh.hop_limit         = cmd.dest.hop_limit;
2419         attr->ah_attr.grh.traffic_class     = cmd.dest.traffic_class;
2420         attr->ah_attr.dlid                  = cmd.dest.dlid;
2421         attr->ah_attr.sl                    = cmd.dest.sl;
2422         attr->ah_attr.src_path_bits         = cmd.dest.src_path_bits;
2423         attr->ah_attr.static_rate           = cmd.dest.static_rate;
2424         attr->ah_attr.ah_flags              = cmd.dest.is_global ? IB_AH_GRH : 0;
2425         attr->ah_attr.port_num              = cmd.dest.port_num;
2426
2427         memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
2428         attr->alt_ah_attr.grh.flow_label    = cmd.alt_dest.flow_label;
2429         attr->alt_ah_attr.grh.sgid_index    = cmd.alt_dest.sgid_index;
2430         attr->alt_ah_attr.grh.hop_limit     = cmd.alt_dest.hop_limit;
2431         attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
2432         attr->alt_ah_attr.dlid              = cmd.alt_dest.dlid;
2433         attr->alt_ah_attr.sl                = cmd.alt_dest.sl;
2434         attr->alt_ah_attr.src_path_bits     = cmd.alt_dest.src_path_bits;
2435         attr->alt_ah_attr.static_rate       = cmd.alt_dest.static_rate;
2436         attr->alt_ah_attr.ah_flags          = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
2437         attr->alt_ah_attr.port_num          = cmd.alt_dest.port_num;
2438
2439         if (qp->real_qp == qp) {
2440                 if (cmd.attr_mask & IB_QP_AV) {
2441                         ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
2442                         if (ret)
2443                                 goto release_qp;
2444                 }
2445                 ret = qp->device->modify_qp(qp, attr,
2446                         modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
2447         } else {
2448                 ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask));
2449         }
2450
2451         if (ret)
2452                 goto release_qp;
2453
2454         ret = in_len;
2455
2456 release_qp:
2457         put_qp_read(qp);
2458
2459 out:
2460         kfree(attr);
2461
2462         return ret;
2463 }
2464
2465 ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
2466                              struct ib_device *ib_dev,
2467                              const char __user *buf, int in_len,
2468                              int out_len)
2469 {
2470         struct ib_uverbs_destroy_qp      cmd;
2471         struct ib_uverbs_destroy_qp_resp resp;
2472         struct ib_uobject               *uobj;
2473         struct ib_qp                    *qp;
2474         struct ib_uqp_object            *obj;
2475         int                              ret = -EINVAL;
2476
2477         if (copy_from_user(&cmd, buf, sizeof cmd))
2478                 return -EFAULT;
2479
2480         memset(&resp, 0, sizeof resp);
2481
2482         uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext);
2483         if (!uobj)
2484                 return -EINVAL;
2485         qp  = uobj->object;
2486         obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
2487
2488         if (!list_empty(&obj->mcast_list)) {
2489                 put_uobj_write(uobj);
2490                 return -EBUSY;
2491         }
2492
2493         ret = ib_destroy_qp(qp);
2494         if (!ret)
2495                 uobj->live = 0;
2496
2497         put_uobj_write(uobj);
2498
2499         if (ret)
2500                 return ret;
2501
2502         if (obj->uxrcd)
2503                 atomic_dec(&obj->uxrcd->refcnt);
2504
2505         idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
2506
2507         mutex_lock(&file->mutex);
2508         list_del(&uobj->list);
2509         mutex_unlock(&file->mutex);
2510
2511         ib_uverbs_release_uevent(file, &obj->uevent);
2512
2513         resp.events_reported = obj->uevent.events_reported;
2514
2515         put_uobj(uobj);
2516
2517         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2518                          &resp, sizeof resp))
2519                 return -EFAULT;
2520
2521         return in_len;
2522 }
2523
2524 static void *alloc_wr(size_t wr_size, __u32 num_sge)
2525 {
2526         return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
2527                          num_sge * sizeof (struct ib_sge), GFP_KERNEL);
2528 };
2529
2530 ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2531                             struct ib_device *ib_dev,
2532                             const char __user *buf, int in_len,
2533                             int out_len)
2534 {
2535         struct ib_uverbs_post_send      cmd;
2536         struct ib_uverbs_post_send_resp resp;
2537         struct ib_uverbs_send_wr       *user_wr;
2538         struct ib_send_wr              *wr = NULL, *last, *next, *bad_wr;
2539         struct ib_qp                   *qp;
2540         int                             i, sg_ind;
2541         int                             is_ud;
2542         ssize_t                         ret = -EINVAL;
2543         size_t                          next_size;
2544
2545         if (copy_from_user(&cmd, buf, sizeof cmd))
2546                 return -EFAULT;
2547
2548         if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
2549             cmd.sge_count * sizeof (struct ib_uverbs_sge))
2550                 return -EINVAL;
2551
2552         if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
2553                 return -EINVAL;
2554
2555         user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
2556         if (!user_wr)
2557                 return -ENOMEM;
2558
2559         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2560         if (!qp)
2561                 goto out;
2562
2563         is_ud = qp->qp_type == IB_QPT_UD;
2564         sg_ind = 0;
2565         last = NULL;
2566         for (i = 0; i < cmd.wr_count; ++i) {
2567                 if (copy_from_user(user_wr,
2568                                    buf + sizeof cmd + i * cmd.wqe_size,
2569                                    cmd.wqe_size)) {
2570                         ret = -EFAULT;
2571                         goto out_put;
2572                 }
2573
2574                 if (user_wr->num_sge + sg_ind > cmd.sge_count) {
2575                         ret = -EINVAL;
2576                         goto out_put;
2577                 }
2578
2579                 if (is_ud) {
2580                         struct ib_ud_wr *ud;
2581
2582                         if (user_wr->opcode != IB_WR_SEND &&
2583                             user_wr->opcode != IB_WR_SEND_WITH_IMM) {
2584                                 ret = -EINVAL;
2585                                 goto out_put;
2586                         }
2587
2588                         next_size = sizeof(*ud);
2589                         ud = alloc_wr(next_size, user_wr->num_sge);
2590                         if (!ud) {
2591                                 ret = -ENOMEM;
2592                                 goto out_put;
2593                         }
2594
2595                         ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
2596                         if (!ud->ah) {
2597                                 kfree(ud);
2598                                 ret = -EINVAL;
2599                                 goto out_put;
2600                         }
2601                         ud->remote_qpn = user_wr->wr.ud.remote_qpn;
2602                         ud->remote_qkey = user_wr->wr.ud.remote_qkey;
2603
2604                         next = &ud->wr;
2605                 } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2606                            user_wr->opcode == IB_WR_RDMA_WRITE ||
2607                            user_wr->opcode == IB_WR_RDMA_READ) {
2608                         struct ib_rdma_wr *rdma;
2609
2610                         next_size = sizeof(*rdma);
2611                         rdma = alloc_wr(next_size, user_wr->num_sge);
2612                         if (!rdma) {
2613                                 ret = -ENOMEM;
2614                                 goto out_put;
2615                         }
2616
2617                         rdma->remote_addr = user_wr->wr.rdma.remote_addr;
2618                         rdma->rkey = user_wr->wr.rdma.rkey;
2619
2620                         next = &rdma->wr;
2621                 } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
2622                            user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2623                         struct ib_atomic_wr *atomic;
2624
2625                         next_size = sizeof(*atomic);
2626                         atomic = alloc_wr(next_size, user_wr->num_sge);
2627                         if (!atomic) {
2628                                 ret = -ENOMEM;
2629                                 goto out_put;
2630                         }
2631
2632                         atomic->remote_addr = user_wr->wr.atomic.remote_addr;
2633                         atomic->compare_add = user_wr->wr.atomic.compare_add;
2634                         atomic->swap = user_wr->wr.atomic.swap;
2635                         atomic->rkey = user_wr->wr.atomic.rkey;
2636
2637                         next = &atomic->wr;
2638                 } else if (user_wr->opcode == IB_WR_SEND ||
2639                            user_wr->opcode == IB_WR_SEND_WITH_IMM ||
2640                            user_wr->opcode == IB_WR_SEND_WITH_INV) {
2641                         next_size = sizeof(*next);
2642                         next = alloc_wr(next_size, user_wr->num_sge);
2643                         if (!next) {
2644                                 ret = -ENOMEM;
2645                                 goto out_put;
2646                         }
2647                 } else {
2648                         ret = -EINVAL;
2649                         goto out_put;
2650                 }
2651
2652                 if (user_wr->opcode == IB_WR_SEND_WITH_IMM ||
2653                     user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
2654                         next->ex.imm_data =
2655                                         (__be32 __force) user_wr->ex.imm_data;
2656                 } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) {
2657                         next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey;
2658                 }
2659
2660                 if (!last)
2661                         wr = next;
2662                 else
2663                         last->next = next;
2664                 last = next;
2665
2666                 next->next       = NULL;
2667                 next->wr_id      = user_wr->wr_id;
2668                 next->num_sge    = user_wr->num_sge;
2669                 next->opcode     = user_wr->opcode;
2670                 next->send_flags = user_wr->send_flags;
2671
2672                 if (next->num_sge) {
2673                         next->sg_list = (void *)((char *)next +
2674                                 ALIGN(next_size, sizeof(struct ib_sge)));
2675                         if (copy_from_user(next->sg_list,
2676                                            (const char *)buf + sizeof cmd +
2677                                            cmd.wr_count * cmd.wqe_size +
2678                                            sg_ind * sizeof (struct ib_sge),
2679                                            next->num_sge * sizeof (struct ib_sge))) {
2680                                 ret = -EFAULT;
2681                                 goto out_put;
2682                         }
2683                         sg_ind += next->num_sge;
2684                 } else
2685                         next->sg_list = NULL;
2686         }
2687
2688         resp.bad_wr = 0;
2689         ret = qp->device->post_send(qp->real_qp, wr, &bad_wr);
2690         if (ret)
2691                 for (next = wr; next; next = next->next) {
2692                         ++resp.bad_wr;
2693                         if (next == bad_wr)
2694                                 break;
2695                 }
2696
2697         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2698                          &resp, sizeof resp))
2699                 ret = -EFAULT;
2700
2701 out_put:
2702         put_qp_read(qp);
2703
2704         while (wr) {
2705                 if (is_ud && ud_wr(wr)->ah)
2706                         put_ah_read(ud_wr(wr)->ah);
2707                 next = wr->next;
2708                 kfree(wr);
2709                 wr = next;
2710         }
2711
2712 out:
2713         kfree(user_wr);
2714
2715         return ret ? ret : in_len;
2716 }
2717
2718 static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
2719                                                     int in_len,
2720                                                     u32 wr_count,
2721                                                     u32 sge_count,
2722                                                     u32 wqe_size)
2723 {
2724         struct ib_uverbs_recv_wr *user_wr;
2725         struct ib_recv_wr        *wr = NULL, *last, *next;
2726         int                       sg_ind;
2727         int                       i;
2728         int                       ret;
2729
2730         if (in_len < wqe_size * wr_count +
2731             sge_count * sizeof (struct ib_uverbs_sge))
2732                 return ERR_PTR(-EINVAL);
2733
2734         if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
2735                 return ERR_PTR(-EINVAL);
2736
2737         user_wr = kmalloc(wqe_size, GFP_KERNEL);
2738         if (!user_wr)
2739                 return ERR_PTR(-ENOMEM);
2740
2741         sg_ind = 0;
2742         last = NULL;
2743         for (i = 0; i < wr_count; ++i) {
2744                 if (copy_from_user(user_wr, buf + i * wqe_size,
2745                                    wqe_size)) {
2746                         ret = -EFAULT;
2747                         goto err;
2748                 }
2749
2750                 if (user_wr->num_sge + sg_ind > sge_count) {
2751                         ret = -EINVAL;
2752                         goto err;
2753                 }
2754
2755                 next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
2756                                user_wr->num_sge * sizeof (struct ib_sge),
2757                                GFP_KERNEL);
2758                 if (!next) {
2759                         ret = -ENOMEM;
2760                         goto err;
2761                 }
2762
2763                 if (!last)
2764                         wr = next;
2765                 else
2766                         last->next = next;
2767                 last = next;
2768
2769                 next->next       = NULL;
2770                 next->wr_id      = user_wr->wr_id;
2771                 next->num_sge    = user_wr->num_sge;
2772
2773                 if (next->num_sge) {
2774                         next->sg_list = (void *)((char *)next +
2775                                 ALIGN(sizeof *next, sizeof (struct ib_sge)));
2776                         if (copy_from_user(next->sg_list,
2777                                            (const char *)buf + wr_count * wqe_size +
2778                                            sg_ind * sizeof (struct ib_sge),
2779                                            next->num_sge * sizeof (struct ib_sge))) {
2780                                 ret = -EFAULT;
2781                                 goto err;
2782                         }
2783                         sg_ind += next->num_sge;
2784                 } else
2785                         next->sg_list = NULL;
2786         }
2787
2788         kfree(user_wr);
2789         return wr;
2790
2791 err:
2792         kfree(user_wr);
2793
2794         while (wr) {
2795                 next = wr->next;
2796                 kfree(wr);
2797                 wr = next;
2798         }
2799
2800         return ERR_PTR(ret);
2801 }
2802
2803 ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
2804                             struct ib_device *ib_dev,
2805                             const char __user *buf, int in_len,
2806                             int out_len)
2807 {
2808         struct ib_uverbs_post_recv      cmd;
2809         struct ib_uverbs_post_recv_resp resp;
2810         struct ib_recv_wr              *wr, *next, *bad_wr;
2811         struct ib_qp                   *qp;
2812         ssize_t                         ret = -EINVAL;
2813
2814         if (copy_from_user(&cmd, buf, sizeof cmd))
2815                 return -EFAULT;
2816
2817         wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
2818                                        in_len - sizeof cmd, cmd.wr_count,
2819                                        cmd.sge_count, cmd.wqe_size);
2820         if (IS_ERR(wr))
2821                 return PTR_ERR(wr);
2822
2823         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2824         if (!qp)
2825                 goto out;
2826
2827         resp.bad_wr = 0;
2828         ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
2829
2830         put_qp_read(qp);
2831
2832         if (ret)
2833                 for (next = wr; next; next = next->next) {
2834                         ++resp.bad_wr;
2835                         if (next == bad_wr)
2836                                 break;
2837                 }
2838
2839         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2840                          &resp, sizeof resp))
2841                 ret = -EFAULT;
2842
2843 out:
2844         while (wr) {
2845                 next = wr->next;
2846                 kfree(wr);
2847                 wr = next;
2848         }
2849
2850         return ret ? ret : in_len;
2851 }
2852
2853 ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
2854                                 struct ib_device *ib_dev,
2855                                 const char __user *buf, int in_len,
2856                                 int out_len)
2857 {
2858         struct ib_uverbs_post_srq_recv      cmd;
2859         struct ib_uverbs_post_srq_recv_resp resp;
2860         struct ib_recv_wr                  *wr, *next, *bad_wr;
2861         struct ib_srq                      *srq;
2862         ssize_t                             ret = -EINVAL;
2863
2864         if (copy_from_user(&cmd, buf, sizeof cmd))
2865                 return -EFAULT;
2866
2867         wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
2868                                        in_len - sizeof cmd, cmd.wr_count,
2869                                        cmd.sge_count, cmd.wqe_size);
2870         if (IS_ERR(wr))
2871                 return PTR_ERR(wr);
2872
2873         srq = idr_read_srq(cmd.srq_handle, file->ucontext);
2874         if (!srq)
2875                 goto out;
2876
2877         resp.bad_wr = 0;
2878         ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
2879
2880         put_srq_read(srq);
2881
2882         if (ret)
2883                 for (next = wr; next; next = next->next) {
2884                         ++resp.bad_wr;
2885                         if (next == bad_wr)
2886                                 break;
2887                 }
2888
2889         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2890                          &resp, sizeof resp))
2891                 ret = -EFAULT;
2892
2893 out:
2894         while (wr) {
2895                 next = wr->next;
2896                 kfree(wr);
2897                 wr = next;
2898         }
2899
2900         return ret ? ret : in_len;
2901 }
2902
2903 ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
2904                             struct ib_device *ib_dev,
2905                             const char __user *buf, int in_len,
2906                             int out_len)
2907 {
2908         struct ib_uverbs_create_ah       cmd;
2909         struct ib_uverbs_create_ah_resp  resp;
2910         struct ib_uobject               *uobj;
2911         struct ib_pd                    *pd;
2912         struct ib_ah                    *ah;
2913         struct ib_ah_attr               attr;
2914         int ret;
2915         struct ib_udata                   udata;
2916
2917         if (out_len < sizeof resp)
2918                 return -ENOSPC;
2919
2920         if (copy_from_user(&cmd, buf, sizeof cmd))
2921                 return -EFAULT;
2922
2923         if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num))
2924                 return -EINVAL;
2925
2926         INIT_UDATA(&udata, buf + sizeof(cmd),
2927                    (unsigned long)cmd.response + sizeof(resp),
2928                    in_len - sizeof(cmd), out_len - sizeof(resp));
2929
2930         uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
2931         if (!uobj)
2932                 return -ENOMEM;
2933
2934         init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);
2935         down_write(&uobj->mutex);
2936
2937         pd = idr_read_pd(cmd.pd_handle, file->ucontext);
2938         if (!pd) {
2939                 ret = -EINVAL;
2940                 goto err;
2941         }
2942
2943         attr.dlid              = cmd.attr.dlid;
2944         attr.sl                = cmd.attr.sl;
2945         attr.src_path_bits     = cmd.attr.src_path_bits;
2946         attr.static_rate       = cmd.attr.static_rate;
2947         attr.ah_flags          = cmd.attr.is_global ? IB_AH_GRH : 0;
2948         attr.port_num          = cmd.attr.port_num;
2949         attr.grh.flow_label    = cmd.attr.grh.flow_label;
2950         attr.grh.sgid_index    = cmd.attr.grh.sgid_index;
2951         attr.grh.hop_limit     = cmd.attr.grh.hop_limit;
2952         attr.grh.traffic_class = cmd.attr.grh.traffic_class;
2953         memset(&attr.dmac, 0, sizeof(attr.dmac));
2954         memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
2955
2956         ah = pd->device->create_ah(pd, &attr, &udata);
2957
2958         if (IS_ERR(ah)) {
2959                 ret = PTR_ERR(ah);
2960                 goto err_put;
2961         }
2962
2963         ah->device  = pd->device;
2964         ah->pd      = pd;
2965         atomic_inc(&pd->usecnt);
2966         ah->uobject  = uobj;
2967         uobj->object = ah;
2968
2969         ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj);
2970         if (ret)
2971                 goto err_destroy;
2972
2973         resp.ah_handle = uobj->id;
2974
2975         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2976                          &resp, sizeof resp)) {
2977                 ret = -EFAULT;
2978                 goto err_copy;
2979         }
2980
2981         put_pd_read(pd);
2982
2983         mutex_lock(&file->mutex);
2984         list_add_tail(&uobj->list, &file->ucontext->ah_list);
2985         mutex_unlock(&file->mutex);
2986
2987         uobj->live = 1;
2988
2989         up_write(&uobj->mutex);
2990
2991         return in_len;
2992
2993 err_copy:
2994         idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
2995
2996 err_destroy:
2997         ib_destroy_ah(ah);
2998
2999 err_put:
3000         put_pd_read(pd);
3001
3002 err:
3003         put_uobj_write(uobj);
3004         return ret;
3005 }
3006
3007 ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
3008                              struct ib_device *ib_dev,
3009                              const char __user *buf, int in_len, int out_len)
3010 {
3011         struct ib_uverbs_destroy_ah cmd;
3012         struct ib_ah               *ah;
3013         struct ib_uobject          *uobj;
3014         int                         ret;
3015
3016         if (copy_from_user(&cmd, buf, sizeof cmd))
3017                 return -EFAULT;
3018
3019         uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext);
3020         if (!uobj)
3021                 return -EINVAL;
3022         ah = uobj->object;
3023
3024         ret = ib_destroy_ah(ah);
3025         if (!ret)
3026                 uobj->live = 0;
3027
3028         put_uobj_write(uobj);
3029
3030         if (ret)
3031                 return ret;
3032
3033         idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
3034
3035         mutex_lock(&file->mutex);
3036         list_del(&uobj->list);
3037         mutex_unlock(&file->mutex);
3038
3039         put_uobj(uobj);
3040
3041         return in_len;
3042 }
3043
3044 ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
3045                                struct ib_device *ib_dev,
3046                                const char __user *buf, int in_len,
3047                                int out_len)
3048 {
3049         struct ib_uverbs_attach_mcast cmd;
3050         struct ib_qp                 *qp;
3051         struct ib_uqp_object         *obj;
3052         struct ib_uverbs_mcast_entry *mcast;
3053         int                           ret;
3054
3055         if (copy_from_user(&cmd, buf, sizeof cmd))
3056                 return -EFAULT;
3057
3058         qp = idr_write_qp(cmd.qp_handle, file->ucontext);
3059         if (!qp)
3060                 return -EINVAL;
3061
3062         obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
3063
3064         mutex_lock(&obj->mcast_lock);
3065         list_for_each_entry(mcast, &obj->mcast_list, list)
3066                 if (cmd.mlid == mcast->lid &&
3067                     !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
3068                         ret = 0;
3069                         goto out_put;
3070                 }
3071
3072         mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
3073         if (!mcast) {
3074                 ret = -ENOMEM;
3075                 goto out_put;
3076         }
3077
3078         mcast->lid = cmd.mlid;
3079         memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw);
3080
3081         ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid);
3082         if (!ret)
3083                 list_add_tail(&mcast->list, &obj->mcast_list);
3084         else
3085                 kfree(mcast);
3086
3087 out_put:
3088         mutex_unlock(&obj->mcast_lock);
3089         put_qp_write(qp);
3090
3091         return ret ? ret : in_len;
3092 }
3093
3094 ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
3095                                struct ib_device *ib_dev,
3096                                const char __user *buf, int in_len,
3097                                int out_len)
3098 {
3099         struct ib_uverbs_detach_mcast cmd;
3100         struct ib_uqp_object         *obj;
3101         struct ib_qp                 *qp;
3102         struct ib_uverbs_mcast_entry *mcast;
3103         int                           ret = -EINVAL;
3104         bool                          found = false;
3105
3106         if (copy_from_user(&cmd, buf, sizeof cmd))
3107                 return -EFAULT;
3108
3109         qp = idr_write_qp(cmd.qp_handle, file->ucontext);
3110         if (!qp)
3111                 return -EINVAL;
3112
3113         obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
3114         mutex_lock(&obj->mcast_lock);
3115
3116         list_for_each_entry(mcast, &obj->mcast_list, list)
3117                 if (cmd.mlid == mcast->lid &&
3118                     !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
3119                         list_del(&mcast->list);
3120                         kfree(mcast);
3121                         found = true;
3122                         break;
3123                 }
3124
3125         if (!found) {
3126                 ret = -EINVAL;
3127                 goto out_put;
3128         }
3129
3130         ret = ib_detach_mcast(qp, (union ib_gid *)cmd.gid, cmd.mlid);
3131
3132 out_put:
3133         mutex_unlock(&obj->mcast_lock);
3134         put_qp_write(qp);
3135
3136         return ret ? ret : in_len;
3137 }
3138
3139 static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
3140 {
3141         /* Returns user space filter size, includes padding */
3142         return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2;
3143 }
3144
3145 static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
3146                                 u16 ib_real_filter_sz)
3147 {
3148         /*
3149          * User space filter structures must be 64 bit aligned, otherwise this
3150          * may pass, but we won't handle additional new attributes.
3151          */
3152
3153         if (kern_filter_size > ib_real_filter_sz) {
3154                 if (memchr_inv((char *)kern_spec_filter +
3155                                ib_real_filter_sz, 0,
3156                                kern_filter_size - ib_real_filter_sz))
3157                         return -EINVAL;
3158                 return ib_real_filter_sz;
3159         }
3160         return kern_filter_size;
3161 }
3162
3163 static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
3164                                 union ib_flow_spec *ib_spec)
3165 {
3166         ssize_t actual_filter_sz;
3167         ssize_t kern_filter_sz;
3168         ssize_t ib_filter_sz;
3169         void *kern_spec_mask;
3170         void *kern_spec_val;
3171
3172         if (kern_spec->reserved)
3173                 return -EINVAL;
3174
3175         ib_spec->type = kern_spec->type;
3176
3177         kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
3178         /* User flow spec size must be aligned to 4 bytes */
3179         if (kern_filter_sz != ALIGN(kern_filter_sz, 4))
3180                 return -EINVAL;
3181
3182         kern_spec_val = (char *)kern_spec +
3183                 sizeof(struct ib_uverbs_flow_spec_hdr);
3184         kern_spec_mask = (char *)kern_spec_val + kern_filter_sz;
3185
3186         switch (ib_spec->type) {
3187         case IB_FLOW_SPEC_ETH:
3188                 ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz);
3189                 actual_filter_sz = spec_filter_size(kern_spec_mask,
3190                                                     kern_filter_sz,
3191                                                     ib_filter_sz);
3192                 if (actual_filter_sz <= 0)
3193                         return -EINVAL;
3194                 ib_spec->size = sizeof(struct ib_flow_spec_eth);
3195                 memcpy(&ib_spec->eth.val, kern_spec_val, actual_filter_sz);
3196                 memcpy(&ib_spec->eth.mask, kern_spec_mask, actual_filter_sz);
3197                 break;
3198         case IB_FLOW_SPEC_IPV4:
3199                 ib_filter_sz = offsetof(struct ib_flow_ipv4_filter, real_sz);
3200                 actual_filter_sz = spec_filter_size(kern_spec_mask,
3201                                                     kern_filter_sz,
3202                                                     ib_filter_sz);
3203                 if (actual_filter_sz <= 0)
3204                         return -EINVAL;
3205                 ib_spec->size = sizeof(struct ib_flow_spec_ipv4);
3206                 memcpy(&ib_spec->ipv4.val, kern_spec_val, actual_filter_sz);
3207                 memcpy(&ib_spec->ipv4.mask, kern_spec_mask, actual_filter_sz);
3208                 break;
3209         case IB_FLOW_SPEC_IPV6:
3210                 ib_filter_sz = offsetof(struct ib_flow_ipv6_filter, real_sz);
3211                 actual_filter_sz = spec_filter_size(kern_spec_mask,
3212                                                     kern_filter_sz,
3213                                                     ib_filter_sz);
3214                 if (actual_filter_sz <= 0)
3215                         return -EINVAL;
3216                 ib_spec->size = sizeof(struct ib_flow_spec_ipv6);
3217                 memcpy(&ib_spec->ipv6.val, kern_spec_val, actual_filter_sz);
3218                 memcpy(&ib_spec->ipv6.mask, kern_spec_mask, actual_filter_sz);
3219
3220                 if ((ntohl(ib_spec->ipv6.mask.flow_label)) >= BIT(20) ||
3221                     (ntohl(ib_spec->ipv6.val.flow_label)) >= BIT(20))
3222                         return -EINVAL;
3223                 break;
3224         case IB_FLOW_SPEC_TCP:
3225         case IB_FLOW_SPEC_UDP:
3226                 ib_filter_sz = offsetof(struct ib_flow_tcp_udp_filter, real_sz);
3227                 actual_filter_sz = spec_filter_size(kern_spec_mask,
3228                                                     kern_filter_sz,
3229                                                     ib_filter_sz);
3230                 if (actual_filter_sz <= 0)
3231                         return -EINVAL;
3232                 ib_spec->size = sizeof(struct ib_flow_spec_tcp_udp);
3233                 memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz);
3234                 memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz);
3235                 break;
3236         default:
3237                 return -EINVAL;
3238         }
3239         return 0;
3240 }
3241
3242 int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
3243                            struct ib_device *ib_dev,
3244                            struct ib_udata *ucore,
3245                            struct ib_udata *uhw)
3246 {
3247         struct ib_uverbs_ex_create_wq     cmd = {};
3248         struct ib_uverbs_ex_create_wq_resp resp = {};
3249         struct ib_uwq_object           *obj;
3250         int err = 0;
3251         struct ib_cq *cq;
3252         struct ib_pd *pd;
3253         struct ib_wq *wq;
3254         struct ib_wq_init_attr wq_init_attr = {};
3255         size_t required_cmd_sz;
3256         size_t required_resp_len;
3257
3258         required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge);
3259         required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
3260
3261         if (ucore->inlen < required_cmd_sz)
3262                 return -EINVAL;
3263
3264         if (ucore->outlen < required_resp_len)
3265                 return -ENOSPC;
3266
3267         if (ucore->inlen > sizeof(cmd) &&
3268             !ib_is_udata_cleared(ucore, sizeof(cmd),
3269                                  ucore->inlen - sizeof(cmd)))
3270                 return -EOPNOTSUPP;
3271
3272         err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
3273         if (err)
3274                 return err;
3275
3276         if (cmd.comp_mask)
3277                 return -EOPNOTSUPP;
3278
3279         obj = kmalloc(sizeof(*obj), GFP_KERNEL);
3280         if (!obj)
3281                 return -ENOMEM;
3282
3283         init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext,
3284                   &wq_lock_class);
3285         down_write(&obj->uevent.uobject.mutex);
3286         pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
3287         if (!pd) {
3288                 err = -EINVAL;
3289                 goto err_uobj;
3290         }
3291
3292         cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
3293         if (!cq) {
3294                 err = -EINVAL;
3295                 goto err_put_pd;
3296         }
3297
3298         wq_init_attr.cq = cq;
3299         wq_init_attr.max_sge = cmd.max_sge;
3300         wq_init_attr.max_wr = cmd.max_wr;
3301         wq_init_attr.wq_context = file;
3302         wq_init_attr.wq_type = cmd.wq_type;
3303         wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
3304         obj->uevent.events_reported = 0;
3305         INIT_LIST_HEAD(&obj->uevent.event_list);
3306         wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
3307         if (IS_ERR(wq)) {
3308                 err = PTR_ERR(wq);
3309                 goto err_put_cq;
3310         }
3311
3312         wq->uobject = &obj->uevent.uobject;
3313         obj->uevent.uobject.object = wq;
3314         wq->wq_type = wq_init_attr.wq_type;
3315         wq->cq = cq;
3316         wq->pd = pd;
3317         wq->device = pd->device;
3318         wq->wq_context = wq_init_attr.wq_context;
3319         atomic_set(&wq->usecnt, 0);
3320         atomic_inc(&pd->usecnt);
3321         atomic_inc(&cq->usecnt);
3322         wq->uobject = &obj->uevent.uobject;
3323         obj->uevent.uobject.object = wq;
3324         err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
3325         if (err)
3326                 goto destroy_wq;
3327
3328         memset(&resp, 0, sizeof(resp));
3329         resp.wq_handle = obj->uevent.uobject.id;
3330         resp.max_sge = wq_init_attr.max_sge;
3331         resp.max_wr = wq_init_attr.max_wr;
3332         resp.wqn = wq->wq_num;
3333         resp.response_length = required_resp_len;
3334         err = ib_copy_to_udata(ucore,
3335                                &resp, resp.response_length);
3336         if (err)
3337                 goto err_copy;
3338
3339         put_pd_read(pd);
3340         put_cq_read(cq);
3341
3342         mutex_lock(&file->mutex);
3343         list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list);
3344         mutex_unlock(&file->mutex);
3345
3346         obj->uevent.uobject.live = 1;
3347         up_write(&obj->uevent.uobject.mutex);
3348         return 0;
3349
3350 err_copy:
3351         idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
3352 destroy_wq:
3353         ib_destroy_wq(wq);
3354 err_put_cq:
3355         put_cq_read(cq);
3356 err_put_pd:
3357         put_pd_read(pd);
3358 err_uobj:
3359         put_uobj_write(&obj->uevent.uobject);
3360
3361         return err;
3362 }
3363
3364 int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
3365                             struct ib_device *ib_dev,
3366                             struct ib_udata *ucore,
3367                             struct ib_udata *uhw)
3368 {
3369         struct ib_uverbs_ex_destroy_wq  cmd = {};
3370         struct ib_uverbs_ex_destroy_wq_resp     resp = {};
3371         struct ib_wq                    *wq;
3372         struct ib_uobject               *uobj;
3373         struct ib_uwq_object            *obj;
3374         size_t required_cmd_sz;
3375         size_t required_resp_len;
3376         int                             ret;
3377
3378         required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle);
3379         required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
3380
3381         if (ucore->inlen < required_cmd_sz)
3382                 return -EINVAL;
3383
3384         if (ucore->outlen < required_resp_len)
3385                 return -ENOSPC;
3386
3387         if (ucore->inlen > sizeof(cmd) &&
3388             !ib_is_udata_cleared(ucore, sizeof(cmd),
3389                                  ucore->inlen - sizeof(cmd)))
3390                 return -EOPNOTSUPP;
3391
3392         ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
3393         if (ret)
3394                 return ret;
3395
3396         if (cmd.comp_mask)
3397                 return -EOPNOTSUPP;
3398
3399         resp.response_length = required_resp_len;
3400         uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle,
3401                               file->ucontext);
3402         if (!uobj)
3403                 return -EINVAL;
3404
3405         wq = uobj->object;
3406         obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
3407         ret = ib_destroy_wq(wq);
3408         if (!ret)
3409                 uobj->live = 0;
3410
3411         put_uobj_write(uobj);
3412         if (ret)
3413                 return ret;
3414
3415         idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
3416
3417         mutex_lock(&file->mutex);
3418         list_del(&uobj->list);
3419         mutex_unlock(&file->mutex);
3420
3421         ib_uverbs_release_uevent(file, &obj->uevent);
3422         resp.events_reported = obj->uevent.events_reported;
3423         put_uobj(uobj);
3424
3425         ret = ib_copy_to_udata(ucore, &resp, resp.response_length);
3426         if (ret)
3427                 return ret;
3428
3429         return 0;
3430 }
3431
3432 int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
3433                            struct ib_device *ib_dev,
3434                            struct ib_udata *ucore,
3435                            struct ib_udata *uhw)
3436 {
3437         struct ib_uverbs_ex_modify_wq cmd = {};
3438         struct ib_wq *wq;
3439         struct ib_wq_attr wq_attr = {};
3440         size_t required_cmd_sz;
3441         int ret;
3442
3443         required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state);
3444         if (ucore->inlen < required_cmd_sz)
3445                 return -EINVAL;
3446
3447         if (ucore->inlen > sizeof(cmd) &&
3448             !ib_is_udata_cleared(ucore, sizeof(cmd),
3449                                  ucore->inlen - sizeof(cmd)))
3450                 return -EOPNOTSUPP;
3451
3452         ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
3453         if (ret)
3454                 return ret;
3455
3456         if (!cmd.attr_mask)
3457                 return -EINVAL;
3458
3459         if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE))
3460                 return -EINVAL;
3461
3462         wq = idr_read_wq(cmd.wq_handle, file->ucontext);
3463         if (!wq)
3464                 return -EINVAL;
3465
3466         wq_attr.curr_wq_state = cmd.curr_wq_state;
3467         wq_attr.wq_state = cmd.wq_state;
3468         ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
3469         put_wq_read(wq);
3470         return ret;
3471 }
3472
3473 int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
3474                                       struct ib_device *ib_dev,
3475                                       struct ib_udata *ucore,
3476                                       struct ib_udata *uhw)
3477 {
3478         struct ib_uverbs_ex_create_rwq_ind_table          cmd = {};
3479         struct ib_uverbs_ex_create_rwq_ind_table_resp  resp = {};
3480         struct ib_uobject                 *uobj;
3481         int err = 0;
3482         struct ib_rwq_ind_table_init_attr init_attr = {};
3483         struct ib_rwq_ind_table *rwq_ind_tbl;
3484         struct ib_wq    **wqs = NULL;
3485         u32 *wqs_handles = NULL;
3486         struct ib_wq    *wq = NULL;
3487         int i, j, num_read_wqs;
3488         u32 num_wq_handles;
3489         u32 expected_in_size;
3490         size_t required_cmd_sz_header;
3491         size_t required_resp_len;
3492
3493         required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size);
3494         required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
3495
3496         if (ucore->inlen < required_cmd_sz_header)
3497                 return -EINVAL;
3498
3499         if (ucore->outlen < required_resp_len)
3500                 return -ENOSPC;
3501
3502         err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header);
3503         if (err)
3504                 return err;
3505
3506         ucore->inbuf = (const char *)ucore->inbuf + required_cmd_sz_header;
3507         ucore->inlen -= required_cmd_sz_header;
3508
3509         if (cmd.comp_mask)
3510                 return -EOPNOTSUPP;
3511
3512         if (cmd.log_ind_tbl_size > IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE)
3513                 return -EINVAL;
3514
3515         num_wq_handles = 1 << cmd.log_ind_tbl_size;
3516         expected_in_size = num_wq_handles * sizeof(__u32);
3517         if (num_wq_handles == 1)
3518                 /* input size for wq handles is u64 aligned */
3519                 expected_in_size += sizeof(__u32);
3520
3521         if (ucore->inlen < expected_in_size)
3522                 return -EINVAL;
3523
3524         if (ucore->inlen > expected_in_size &&
3525             !ib_is_udata_cleared(ucore, expected_in_size,
3526                                  ucore->inlen - expected_in_size))
3527                 return -EOPNOTSUPP;
3528
3529         wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles),
3530                               GFP_KERNEL);
3531         if (!wqs_handles)
3532                 return -ENOMEM;
3533
3534         err = ib_copy_from_udata(wqs_handles, ucore,
3535                                  num_wq_handles * sizeof(__u32));
3536         if (err)
3537                 goto err_free;
3538
3539         wqs = kcalloc(num_wq_handles, sizeof(*wqs), GFP_KERNEL);
3540         if (!wqs) {
3541                 err = -ENOMEM;
3542                 goto  err_free;
3543         }
3544
3545         for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
3546                         num_read_wqs++) {
3547                 wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext);
3548                 if (!wq) {
3549                         err = -EINVAL;
3550                         goto put_wqs;
3551                 }
3552
3553                 wqs[num_read_wqs] = wq;
3554         }
3555
3556         uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
3557         if (!uobj) {
3558                 err = -ENOMEM;
3559                 goto put_wqs;
3560         }
3561
3562         init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class);
3563         down_write(&uobj->mutex);
3564         init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
3565         init_attr.ind_tbl = wqs;
3566         rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
3567
3568         if (IS_ERR(rwq_ind_tbl)) {
3569                 err = PTR_ERR(rwq_ind_tbl);
3570                 goto err_uobj;
3571         }
3572
3573         rwq_ind_tbl->ind_tbl = wqs;
3574         rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size;
3575         rwq_ind_tbl->uobject = uobj;
3576         uobj->object = rwq_ind_tbl;
3577         rwq_ind_tbl->device = ib_dev;
3578         atomic_set(&rwq_ind_tbl->usecnt, 0);
3579
3580         for (i = 0; i < num_wq_handles; i++)
3581                 atomic_inc(&wqs[i]->usecnt);
3582
3583         err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
3584         if (err)
3585                 goto destroy_ind_tbl;
3586
3587         resp.ind_tbl_handle = uobj->id;
3588         resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
3589         resp.response_length = required_resp_len;
3590
3591         err = ib_copy_to_udata(ucore,
3592                                &resp, resp.response_length);
3593         if (err)
3594                 goto err_copy;
3595
3596         kfree(wqs_handles);
3597
3598         for (j = 0; j < num_read_wqs; j++)
3599                 put_wq_read(wqs[j]);
3600
3601         mutex_lock(&file->mutex);
3602         list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list);
3603         mutex_unlock(&file->mutex);
3604
3605         uobj->live = 1;
3606
3607         up_write(&uobj->mutex);
3608         return 0;
3609
3610 err_copy:
3611         idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
3612 destroy_ind_tbl:
3613         ib_destroy_rwq_ind_table(rwq_ind_tbl);
3614 err_uobj:
3615         put_uobj_write(uobj);
3616 put_wqs:
3617         for (j = 0; j < num_read_wqs; j++)
3618                 put_wq_read(wqs[j]);
3619 err_free:
3620         kfree(wqs_handles);
3621         kfree(wqs);
3622         return err;
3623 }
3624
3625 int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
3626                                        struct ib_device *ib_dev,
3627                                        struct ib_udata *ucore,
3628                                        struct ib_udata *uhw)
3629 {
3630         struct ib_uverbs_ex_destroy_rwq_ind_table       cmd = {};
3631         struct ib_rwq_ind_table *rwq_ind_tbl;
3632         struct ib_uobject               *uobj;
3633         int                     ret;
3634         struct ib_wq    **ind_tbl;
3635         size_t required_cmd_sz;
3636
3637         required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle);
3638
3639         if (ucore->inlen < required_cmd_sz)
3640                 return -EINVAL;
3641
3642         if (ucore->inlen > sizeof(cmd) &&
3643             !ib_is_udata_cleared(ucore, sizeof(cmd),
3644                                  ucore->inlen - sizeof(cmd)))
3645                 return -EOPNOTSUPP;
3646
3647         ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
3648         if (ret)
3649                 return ret;
3650
3651         if (cmd.comp_mask)
3652                 return -EOPNOTSUPP;
3653
3654         uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle,
3655                               file->ucontext);
3656         if (!uobj)
3657                 return -EINVAL;
3658         rwq_ind_tbl = uobj->object;
3659         ind_tbl = rwq_ind_tbl->ind_tbl;
3660
3661         ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
3662         if (!ret)
3663                 uobj->live = 0;
3664
3665         put_uobj_write(uobj);
3666
3667         if (ret)
3668                 return ret;
3669
3670         idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
3671
3672         mutex_lock(&file->mutex);
3673         list_del(&uobj->list);
3674         mutex_unlock(&file->mutex);
3675
3676         put_uobj(uobj);
3677         kfree(ind_tbl);
3678         return ret;
3679 }
3680
3681 int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3682                              struct ib_device *ib_dev,
3683                              struct ib_udata *ucore,
3684                              struct ib_udata *uhw)
3685 {
3686         struct ib_uverbs_create_flow      cmd;
3687         struct ib_uverbs_create_flow_resp resp;
3688         struct ib_uobject                 *uobj;
3689         struct ib_flow                    *flow_id;
3690         struct ib_uverbs_flow_attr        *kern_flow_attr;
3691         struct ib_flow_attr               *flow_attr;
3692         struct ib_qp                      *qp;
3693         int err = 0;
3694         void *kern_spec;
3695         void *ib_spec;
3696         int i;
3697
3698         if (ucore->inlen < sizeof(cmd))
3699                 return -EINVAL;
3700
3701         if (ucore->outlen < sizeof(resp))
3702                 return -ENOSPC;
3703
3704         err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
3705         if (err)
3706                 return err;
3707
3708         ucore->inbuf = (const char *)ucore->inbuf + sizeof(cmd);
3709         ucore->inlen -= sizeof(cmd);
3710
3711         if (cmd.comp_mask)
3712                 return -EINVAL;
3713
3714         if (priv_check(curthread, PRIV_NET_RAW) != 0)
3715                 return -EPERM;
3716
3717         if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED)
3718                 return -EINVAL;
3719
3720         if ((cmd.flow_attr.flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
3721             ((cmd.flow_attr.type == IB_FLOW_ATTR_ALL_DEFAULT) ||
3722              (cmd.flow_attr.type == IB_FLOW_ATTR_MC_DEFAULT)))
3723                 return -EINVAL;
3724
3725         if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
3726                 return -EINVAL;
3727
3728         if (cmd.flow_attr.size > ucore->inlen ||
3729             cmd.flow_attr.size >
3730             (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
3731                 return -EINVAL;
3732
3733         if (cmd.flow_attr.reserved[0] ||
3734             cmd.flow_attr.reserved[1])
3735                 return -EINVAL;
3736
3737         if (cmd.flow_attr.num_of_specs) {
3738                 kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size,
3739                                          GFP_KERNEL);
3740                 if (!kern_flow_attr)
3741                         return -ENOMEM;
3742
3743                 memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
3744                 err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
3745                                          cmd.flow_attr.size);
3746                 if (err)
3747                         goto err_free_attr;
3748         } else {
3749                 kern_flow_attr = &cmd.flow_attr;
3750         }
3751
3752         uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
3753         if (!uobj) {
3754                 err = -ENOMEM;
3755                 goto err_free_attr;
3756         }
3757         init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
3758         down_write(&uobj->mutex);
3759
3760         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
3761         if (!qp) {
3762                 err = -EINVAL;
3763                 goto err_uobj;
3764         }
3765
3766         flow_attr = kzalloc(sizeof(*flow_attr) + cmd.flow_attr.num_of_specs *
3767                             sizeof(union ib_flow_spec), GFP_KERNEL);
3768         if (!flow_attr) {
3769                 err = -ENOMEM;
3770                 goto err_put;
3771         }
3772
3773         flow_attr->type = kern_flow_attr->type;
3774         flow_attr->priority = kern_flow_attr->priority;
3775         flow_attr->num_of_specs = kern_flow_attr->num_of_specs;
3776         flow_attr->port = kern_flow_attr->port;
3777         flow_attr->flags = kern_flow_attr->flags;
3778         flow_attr->size = sizeof(*flow_attr);
3779
3780         kern_spec = kern_flow_attr + 1;
3781         ib_spec = flow_attr + 1;
3782         for (i = 0; i < flow_attr->num_of_specs &&
3783              cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
3784              cmd.flow_attr.size >=
3785              ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
3786                 err = kern_spec_to_ib_spec(kern_spec, ib_spec);
3787                 if (err)
3788                         goto err_free;
3789                 flow_attr->size +=
3790                         ((union ib_flow_spec *) ib_spec)->size;
3791                 cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
3792                 kern_spec = (char *)kern_spec + ((struct ib_uverbs_flow_spec *) kern_spec)->size;
3793                 ib_spec = (char *)ib_spec + ((union ib_flow_spec *)ib_spec)->size;
3794         }
3795         if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
3796                 pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
3797                         i, cmd.flow_attr.size);
3798                 err = -EINVAL;
3799                 goto err_free;
3800         }
3801         flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
3802         if (IS_ERR(flow_id)) {
3803                 err = PTR_ERR(flow_id);
3804                 goto err_free;
3805         }
3806         flow_id->qp = qp;
3807         flow_id->uobject = uobj;
3808         uobj->object = flow_id;
3809
3810         err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
3811         if (err)
3812                 goto destroy_flow;
3813
3814         memset(&resp, 0, sizeof(resp));
3815         resp.flow_handle = uobj->id;
3816
3817         err = ib_copy_to_udata(ucore,
3818                                &resp, sizeof(resp));
3819         if (err)
3820                 goto err_copy;
3821
3822         put_qp_read(qp);
3823         mutex_lock(&file->mutex);
3824         list_add_tail(&uobj->list, &file->ucontext->rule_list);
3825         mutex_unlock(&file->mutex);
3826
3827         uobj->live = 1;
3828
3829         up_write(&uobj->mutex);
3830         kfree(flow_attr);
3831         if (cmd.flow_attr.num_of_specs)
3832                 kfree(kern_flow_attr);
3833         return 0;
3834 err_copy:
3835         idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
3836 destroy_flow:
3837         ib_destroy_flow(flow_id);
3838 err_free:
3839         kfree(flow_attr);
3840 err_put:
3841         put_qp_read(qp);
3842 err_uobj:
3843         put_uobj_write(uobj);
3844 err_free_attr:
3845         if (cmd.flow_attr.num_of_specs)
3846                 kfree(kern_flow_attr);
3847         return err;
3848 }
3849
3850 int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
3851                               struct ib_device *ib_dev,
3852                               struct ib_udata *ucore,
3853                               struct ib_udata *uhw)
3854 {
3855         struct ib_uverbs_destroy_flow   cmd;
3856         struct ib_flow                  *flow_id;
3857         struct ib_uobject               *uobj;
3858         int                             ret;
3859
3860         if (ucore->inlen < sizeof(cmd))
3861                 return -EINVAL;
3862
3863         ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
3864         if (ret)
3865                 return ret;
3866
3867         if (cmd.comp_mask)
3868                 return -EINVAL;
3869
3870         uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
3871                               file->ucontext);
3872         if (!uobj)
3873                 return -EINVAL;
3874         flow_id = uobj->object;
3875
3876         ret = ib_destroy_flow(flow_id);
3877         if (!ret)
3878                 uobj->live = 0;
3879
3880         put_uobj_write(uobj);
3881
3882         idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
3883
3884         mutex_lock(&file->mutex);
3885         list_del(&uobj->list);
3886         mutex_unlock(&file->mutex);
3887
3888         put_uobj(uobj);
3889
3890         return ret;
3891 }
3892
3893 static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
3894                                 struct ib_device *ib_dev,
3895                                 struct ib_uverbs_create_xsrq *cmd,
3896                                 struct ib_udata *udata)
3897 {
3898         struct ib_uverbs_create_srq_resp resp;
3899         struct ib_usrq_object           *obj;
3900         struct ib_pd                    *pd;
3901         struct ib_srq                   *srq;
3902         struct ib_uobject               *uninitialized_var(xrcd_uobj);
3903         struct ib_srq_init_attr          attr;
3904         int ret;
3905
3906         obj = kmalloc(sizeof *obj, GFP_KERNEL);
3907         if (!obj)
3908                 return -ENOMEM;
3909
3910         init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class);
3911         down_write(&obj->uevent.uobject.mutex);
3912
3913         if (cmd->srq_type == IB_SRQT_XRC) {
3914                 attr.ext.xrc.xrcd  = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
3915                 if (!attr.ext.xrc.xrcd) {
3916                         ret = -EINVAL;
3917                         goto err;
3918                 }
3919
3920                 obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
3921                 atomic_inc(&obj->uxrcd->refcnt);
3922
3923                 attr.ext.xrc.cq  = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
3924                 if (!attr.ext.xrc.cq) {
3925                         ret = -EINVAL;
3926                         goto err_put_xrcd;
3927                 }
3928         }
3929
3930         pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
3931         if (!pd) {
3932                 ret = -EINVAL;
3933                 goto err_put_cq;
3934         }
3935
3936         attr.event_handler  = ib_uverbs_srq_event_handler;
3937         attr.srq_context    = file;
3938         attr.srq_type       = cmd->srq_type;
3939         attr.attr.max_wr    = cmd->max_wr;
3940         attr.attr.max_sge   = cmd->max_sge;
3941         attr.attr.srq_limit = cmd->srq_limit;
3942
3943         obj->uevent.events_reported = 0;
3944         INIT_LIST_HEAD(&obj->uevent.event_list);
3945
3946         srq = pd->device->create_srq(pd, &attr, udata);
3947         if (IS_ERR(srq)) {
3948                 ret = PTR_ERR(srq);
3949                 goto err_put;
3950         }
3951
3952         srq->device        = pd->device;
3953         srq->pd            = pd;
3954         srq->srq_type      = cmd->srq_type;
3955         srq->uobject       = &obj->uevent.uobject;
3956         srq->event_handler = attr.event_handler;
3957         srq->srq_context   = attr.srq_context;
3958
3959         if (cmd->srq_type == IB_SRQT_XRC) {
3960                 srq->ext.xrc.cq   = attr.ext.xrc.cq;
3961                 srq->ext.xrc.xrcd = attr.ext.xrc.xrcd;
3962                 atomic_inc(&attr.ext.xrc.cq->usecnt);
3963                 atomic_inc(&attr.ext.xrc.xrcd->usecnt);
3964         }
3965
3966         atomic_inc(&pd->usecnt);
3967         atomic_set(&srq->usecnt, 0);
3968
3969         obj->uevent.uobject.object = srq;
3970         ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
3971         if (ret)
3972                 goto err_destroy;
3973
3974         memset(&resp, 0, sizeof resp);
3975         resp.srq_handle = obj->uevent.uobject.id;
3976         resp.max_wr     = attr.attr.max_wr;
3977         resp.max_sge    = attr.attr.max_sge;
3978         if (cmd->srq_type == IB_SRQT_XRC)
3979                 resp.srqn = srq->ext.xrc.srq_num;
3980
3981         if (copy_to_user((void __user *) (unsigned long) cmd->response,
3982                          &resp, sizeof resp)) {
3983                 ret = -EFAULT;
3984                 goto err_copy;
3985         }
3986
3987         if (cmd->srq_type == IB_SRQT_XRC) {
3988                 put_uobj_read(xrcd_uobj);
3989                 put_cq_read(attr.ext.xrc.cq);
3990         }
3991         put_pd_read(pd);
3992
3993         mutex_lock(&file->mutex);
3994         list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
3995         mutex_unlock(&file->mutex);
3996
3997         obj->uevent.uobject.live = 1;
3998
3999         up_write(&obj->uevent.uobject.mutex);
4000
4001         return 0;
4002
4003 err_copy:
4004         idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
4005
4006 err_destroy:
4007         ib_destroy_srq(srq);
4008
4009 err_put:
4010         put_pd_read(pd);
4011
4012 err_put_cq:
4013         if (cmd->srq_type == IB_SRQT_XRC)
4014                 put_cq_read(attr.ext.xrc.cq);
4015
4016 err_put_xrcd:
4017         if (cmd->srq_type == IB_SRQT_XRC) {
4018                 atomic_dec(&obj->uxrcd->refcnt);
4019                 put_uobj_read(xrcd_uobj);
4020         }
4021
4022 err:
4023         put_uobj_write(&obj->uevent.uobject);
4024         return ret;
4025 }
4026
4027 ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
4028                              struct ib_device *ib_dev,
4029                              const char __user *buf, int in_len,
4030                              int out_len)
4031 {
4032         struct ib_uverbs_create_srq      cmd;
4033         struct ib_uverbs_create_xsrq     xcmd;
4034         struct ib_uverbs_create_srq_resp resp;
4035         struct ib_udata                  udata;
4036         int ret;
4037
4038         if (out_len < sizeof resp)
4039                 return -ENOSPC;
4040
4041         if (copy_from_user(&cmd, buf, sizeof cmd))
4042                 return -EFAULT;
4043
4044         xcmd.response    = cmd.response;
4045         xcmd.user_handle = cmd.user_handle;
4046         xcmd.srq_type    = IB_SRQT_BASIC;
4047         xcmd.pd_handle   = cmd.pd_handle;
4048         xcmd.max_wr      = cmd.max_wr;
4049         xcmd.max_sge     = cmd.max_sge;
4050         xcmd.srq_limit   = cmd.srq_limit;
4051
4052         INIT_UDATA(&udata, buf + sizeof cmd,
4053                    (unsigned long) cmd.response + sizeof resp,
4054                    in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
4055                    out_len - sizeof resp);
4056
4057         ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
4058         if (ret)
4059                 return ret;
4060
4061         return in_len;
4062 }
4063
4064 ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
4065                               struct ib_device *ib_dev,
4066                               const char __user *buf, int in_len, int out_len)
4067 {
4068         struct ib_uverbs_create_xsrq     cmd;
4069         struct ib_uverbs_create_srq_resp resp;
4070         struct ib_udata                  udata;
4071         int ret;
4072
4073         if (out_len < sizeof resp)
4074                 return -ENOSPC;
4075
4076         if (copy_from_user(&cmd, buf, sizeof cmd))
4077                 return -EFAULT;
4078
4079         INIT_UDATA(&udata, buf + sizeof cmd,
4080                    (unsigned long) cmd.response + sizeof resp,
4081                    in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
4082                    out_len - sizeof resp);
4083
4084         ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
4085         if (ret)
4086                 return ret;
4087
4088         return in_len;
4089 }
4090
4091 ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
4092                              struct ib_device *ib_dev,
4093                              const char __user *buf, int in_len,
4094                              int out_len)
4095 {
4096         struct ib_uverbs_modify_srq cmd;
4097         struct ib_udata             udata;
4098         struct ib_srq              *srq;
4099         struct ib_srq_attr          attr;
4100         int                         ret;
4101
4102         if (copy_from_user(&cmd, buf, sizeof cmd))
4103                 return -EFAULT;
4104
4105         INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
4106                    out_len);
4107
4108         srq = idr_read_srq(cmd.srq_handle, file->ucontext);
4109         if (!srq)
4110                 return -EINVAL;
4111
4112         attr.max_wr    = cmd.max_wr;
4113         attr.srq_limit = cmd.srq_limit;
4114
4115         ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
4116
4117         put_srq_read(srq);
4118
4119         return ret ? ret : in_len;
4120 }
4121
4122 ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
4123                             struct ib_device *ib_dev,
4124                             const char __user *buf,
4125                             int in_len, int out_len)
4126 {
4127         struct ib_uverbs_query_srq      cmd;
4128         struct ib_uverbs_query_srq_resp resp;
4129         struct ib_srq_attr              attr;
4130         struct ib_srq                   *srq;
4131         int                             ret;
4132
4133         if (out_len < sizeof resp)
4134                 return -ENOSPC;
4135
4136         if (copy_from_user(&cmd, buf, sizeof cmd))
4137                 return -EFAULT;
4138
4139         srq = idr_read_srq(cmd.srq_handle, file->ucontext);
4140         if (!srq)
4141                 return -EINVAL;
4142
4143         ret = ib_query_srq(srq, &attr);
4144
4145         put_srq_read(srq);
4146
4147         if (ret)
4148                 return ret;
4149
4150         memset(&resp, 0, sizeof resp);
4151
4152         resp.max_wr    = attr.max_wr;
4153         resp.max_sge   = attr.max_sge;
4154         resp.srq_limit = attr.srq_limit;
4155
4156         if (copy_to_user((void __user *) (unsigned long) cmd.response,
4157                          &resp, sizeof resp))
4158                 return -EFAULT;
4159
4160         return in_len;
4161 }
4162
4163 ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
4164                               struct ib_device *ib_dev,
4165                               const char __user *buf, int in_len,
4166                               int out_len)
4167 {
4168         struct ib_uverbs_destroy_srq      cmd;
4169         struct ib_uverbs_destroy_srq_resp resp;
4170         struct ib_uobject                *uobj;
4171         struct ib_srq                    *srq;
4172         struct ib_uevent_object          *obj;
4173         int                               ret = -EINVAL;
4174         struct ib_usrq_object            *us;
4175         enum ib_srq_type                  srq_type;
4176
4177         if (copy_from_user(&cmd, buf, sizeof cmd))
4178                 return -EFAULT;
4179
4180         uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext);
4181         if (!uobj)
4182                 return -EINVAL;
4183         srq = uobj->object;
4184         obj = container_of(uobj, struct ib_uevent_object, uobject);
4185         srq_type = srq->srq_type;
4186
4187         ret = ib_destroy_srq(srq);
4188         if (!ret)
4189                 uobj->live = 0;
4190
4191         put_uobj_write(uobj);
4192
4193         if (ret)
4194                 return ret;
4195
4196         if (srq_type == IB_SRQT_XRC) {
4197                 us = container_of(obj, struct ib_usrq_object, uevent);
4198                 atomic_dec(&us->uxrcd->refcnt);
4199         }
4200
4201         idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
4202
4203         mutex_lock(&file->mutex);
4204         list_del(&uobj->list);
4205         mutex_unlock(&file->mutex);
4206
4207         ib_uverbs_release_uevent(file, obj);
4208
4209         memset(&resp, 0, sizeof resp);
4210         resp.events_reported = obj->events_reported;
4211
4212         put_uobj(uobj);
4213
4214         if (copy_to_user((void __user *) (unsigned long) cmd.response,
4215                          &resp, sizeof resp))
4216                 ret = -EFAULT;
4217
4218         return ret ? ret : in_len;
4219 }
4220
4221 int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
4222                               struct ib_device *ib_dev,
4223                               struct ib_udata *ucore,
4224                               struct ib_udata *uhw)
4225 {
4226         struct ib_uverbs_ex_query_device_resp resp = { {0} };
4227         struct ib_uverbs_ex_query_device  cmd;
4228         struct ib_device_attr attr = {0};
4229         int err;
4230
4231         if (ucore->inlen < sizeof(cmd))
4232                 return -EINVAL;
4233
4234         err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
4235         if (err)
4236                 return err;
4237
4238         if (cmd.comp_mask)
4239                 return -EINVAL;
4240
4241         if (cmd.reserved)
4242                 return -EINVAL;
4243
4244         resp.response_length = offsetof(typeof(resp), odp_caps);
4245
4246         if (ucore->outlen < resp.response_length)
4247                 return -ENOSPC;
4248
4249         err = ib_dev->query_device(ib_dev, &attr, uhw);
4250         if (err)
4251                 return err;
4252
4253         copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
4254
4255         if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
4256                 goto end;
4257
4258 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
4259         resp.odp_caps.general_caps = attr.odp_caps.general_caps;
4260         resp.odp_caps.per_transport_caps.rc_odp_caps =
4261                 attr.odp_caps.per_transport_caps.rc_odp_caps;
4262         resp.odp_caps.per_transport_caps.uc_odp_caps =
4263                 attr.odp_caps.per_transport_caps.uc_odp_caps;
4264         resp.odp_caps.per_transport_caps.ud_odp_caps =
4265                 attr.odp_caps.per_transport_caps.ud_odp_caps;
4266 #endif
4267         resp.response_length += sizeof(resp.odp_caps);
4268
4269         if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask))
4270                 goto end;
4271
4272         resp.timestamp_mask = attr.timestamp_mask;
4273         resp.response_length += sizeof(resp.timestamp_mask);
4274
4275         if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock))
4276                 goto end;
4277
4278         resp.hca_core_clock = attr.hca_core_clock;
4279         resp.response_length += sizeof(resp.hca_core_clock);
4280
4281         if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex))
4282                 goto end;
4283
4284         resp.device_cap_flags_ex = attr.device_cap_flags;
4285         resp.response_length += sizeof(resp.device_cap_flags_ex);
4286
4287         if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps))
4288                 goto end;
4289
4290         resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts;
4291         resp.rss_caps.max_rwq_indirection_tables =
4292                 attr.rss_caps.max_rwq_indirection_tables;
4293         resp.rss_caps.max_rwq_indirection_table_size =
4294                 attr.rss_caps.max_rwq_indirection_table_size;
4295
4296         resp.response_length += sizeof(resp.rss_caps);
4297
4298         if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq))
4299                 goto end;
4300
4301         resp.max_wq_type_rq = attr.max_wq_type_rq;
4302         resp.response_length += sizeof(resp.max_wq_type_rq);
4303 end:
4304         err = ib_copy_to_udata(ucore, &resp, resp.response_length);
4305         return err;
4306 }