]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - sys/ofed/drivers/infiniband/ulp/sdp/sdp_cma.c
- Copy stable/9 to releng/9.2 as part of the 9.2-RELEASE cycle.
[FreeBSD/releng/9.2.git] / sys / ofed / drivers / infiniband / ulp / sdp / sdp_cma.c
1 /*
2  * Copyright (c) 2006 Mellanox Technologies Ltd.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  * $Id$
33  */
34 #include "sdp.h"
35
36 #define SDP_MAJV_MINV 0x22
37
38 SDP_MODPARAM_SINT(sdp_link_layer_ib_only, 1, "Support only link layer of "
39                 "type Infiniband");
40
41 enum {
42         SDP_HH_SIZE = 76,
43         SDP_HAH_SIZE = 180,
44 };
45
46 static void
47 sdp_qp_event_handler(struct ib_event *event, void *data)
48 {
49 }
50
51 static int
52 sdp_get_max_dev_sge(struct ib_device *dev)
53 {
54         struct ib_device_attr attr;
55         static int max_sges = -1;
56
57         if (max_sges > 0)
58                 goto out;
59
60         ib_query_device(dev, &attr);
61
62         max_sges = attr.max_sge;
63
64 out:
65         return max_sges;
66 }
67
68 static int
69 sdp_init_qp(struct socket *sk, struct rdma_cm_id *id)
70 {
71         struct ib_qp_init_attr qp_init_attr = {
72                 .event_handler = sdp_qp_event_handler,
73                 .cap.max_send_wr = SDP_TX_SIZE,
74                 .cap.max_recv_wr = SDP_RX_SIZE,
75                 .sq_sig_type = IB_SIGNAL_REQ_WR,
76                 .qp_type = IB_QPT_RC,
77         };
78         struct ib_device *device = id->device;
79         struct sdp_sock *ssk;
80         int rc;
81
82         sdp_dbg(sk, "%s\n", __func__);
83
84         ssk = sdp_sk(sk);
85         ssk->max_sge = sdp_get_max_dev_sge(device);
86         sdp_dbg(sk, "Max sges: %d\n", ssk->max_sge);
87
88         qp_init_attr.cap.max_send_sge = MIN(ssk->max_sge, SDP_MAX_SEND_SGES);
89         sdp_dbg(sk, "Setting max send sge to: %d\n",
90             qp_init_attr.cap.max_send_sge);
91                 
92         qp_init_attr.cap.max_recv_sge = MIN(ssk->max_sge, SDP_MAX_RECV_SGES);
93         sdp_dbg(sk, "Setting max recv sge to: %d\n",
94             qp_init_attr.cap.max_recv_sge);
95                 
96         ssk->sdp_dev = ib_get_client_data(device, &sdp_client);
97         if (!ssk->sdp_dev) {
98                 sdp_warn(sk, "SDP not available on device %s\n", device->name);
99                 rc = -ENODEV;
100                 goto err_rx;
101         }
102
103         rc = sdp_rx_ring_create(ssk, device);
104         if (rc)
105                 goto err_rx;
106
107         rc = sdp_tx_ring_create(ssk, device);
108         if (rc)
109                 goto err_tx;
110
111         qp_init_attr.recv_cq = ssk->rx_ring.cq;
112         qp_init_attr.send_cq = ssk->tx_ring.cq;
113
114         rc = rdma_create_qp(id, ssk->sdp_dev->pd, &qp_init_attr);
115         if (rc) {
116                 sdp_warn(sk, "Unable to create QP: %d.\n", rc);
117                 goto err_qp;
118         }
119         ssk->qp = id->qp;
120         ssk->ib_device = device;
121         ssk->qp_active = 1;
122         ssk->context.device = device;
123
124         sdp_dbg(sk, "%s done\n", __func__);
125         return 0;
126
127 err_qp:
128         sdp_tx_ring_destroy(ssk);
129 err_tx:
130         sdp_rx_ring_destroy(ssk);
131 err_rx:
132         return rc;
133 }
134
135 static int
136 sdp_connect_handler(struct socket *sk, struct rdma_cm_id *id,
137     struct rdma_cm_event *event)
138 {
139         struct sockaddr_in *src_addr;
140         struct sockaddr_in *dst_addr;
141         struct socket *child;
142         const struct sdp_hh *h;
143         struct sdp_sock *ssk;
144         int rc;
145
146         sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id);
147
148         h = event->param.conn.private_data;
149         SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
150
151         if (!h->max_adverts)
152                 return -EINVAL;
153
154         child = sonewconn(sk, SS_ISCONNECTED);
155         if (!child)
156                 return -ENOMEM;
157
158         ssk = sdp_sk(child);
159         rc = sdp_init_qp(child, id);
160         if (rc)
161                 return rc;
162         SDP_WLOCK(ssk);
163         id->context = ssk;
164         ssk->id = id;
165         ssk->socket = child;
166         ssk->cred = crhold(child->so_cred);
167         dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
168         src_addr = (struct sockaddr_in *)&id->route.addr.src_addr;
169         ssk->fport = dst_addr->sin_port;
170         ssk->faddr = dst_addr->sin_addr.s_addr;
171         ssk->lport = src_addr->sin_port;
172         ssk->max_bufs = ntohs(h->bsdh.bufs);
173         atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
174         ssk->min_bufs = tx_credits(ssk) / 4;
175         ssk->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh);
176         sdp_init_buffers(ssk, rcvbuf_initial_size);
177         ssk->state = TCPS_SYN_RECEIVED;
178         SDP_WUNLOCK(ssk);
179
180         return 0;
181 }
182
183 static int
184 sdp_response_handler(struct socket *sk, struct rdma_cm_id *id,
185     struct rdma_cm_event *event)
186 {
187         const struct sdp_hah *h;
188         struct sockaddr_in *dst_addr;
189         struct sdp_sock *ssk;
190         sdp_dbg(sk, "%s\n", __func__);
191
192         ssk = sdp_sk(sk);
193         SDP_WLOCK(ssk);
194         ssk->state = TCPS_ESTABLISHED;
195         sdp_set_default_moderation(ssk);
196         if (ssk->flags & SDP_DROPPED) {
197                 SDP_WUNLOCK(ssk);
198                 return 0;
199         }
200         if (sk->so_options & SO_KEEPALIVE)
201                 sdp_start_keepalive_timer(sk);
202         h = event->param.conn.private_data;
203         SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
204         ssk->max_bufs = ntohs(h->bsdh.bufs);
205         atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
206         ssk->min_bufs = tx_credits(ssk) / 4;
207         ssk->xmit_size_goal =
208                 ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh);
209         ssk->poll_cq = 1;
210
211         dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
212         ssk->fport = dst_addr->sin_port;
213         ssk->faddr = dst_addr->sin_addr.s_addr;
214         soisconnected(sk);
215         SDP_WUNLOCK(ssk);
216
217         return 0;
218 }
219
220 static int
221 sdp_connected_handler(struct socket *sk, struct rdma_cm_event *event)
222 {
223         struct sdp_sock *ssk;
224
225         sdp_dbg(sk, "%s\n", __func__);
226
227         ssk = sdp_sk(sk);
228         SDP_WLOCK(ssk);
229         ssk->state = TCPS_ESTABLISHED;
230
231         sdp_set_default_moderation(ssk);
232
233         if (sk->so_options & SO_KEEPALIVE)
234                 sdp_start_keepalive_timer(sk);
235
236         if ((ssk->flags & SDP_DROPPED) == 0)
237                 soisconnected(sk);
238         SDP_WUNLOCK(ssk);
239         return 0;
240 }
241
242 static int
243 sdp_disconnected_handler(struct socket *sk)
244 {
245         struct sdp_sock *ssk;
246
247         ssk = sdp_sk(sk);
248         sdp_dbg(sk, "%s\n", __func__);
249
250         SDP_WLOCK_ASSERT(ssk);
251         if (sdp_sk(sk)->state == TCPS_SYN_RECEIVED) {
252                 sdp_connected_handler(sk, NULL);
253
254                 if (rcv_nxt(ssk))
255                         return 0;
256         }
257
258         return -ECONNRESET;
259 }
260
261 int
262 sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
263 {
264         struct rdma_conn_param conn_param;
265         struct socket *sk;
266         struct sdp_sock *ssk;
267         struct sdp_hah hah;
268         struct sdp_hh hh;
269
270         int rc = 0;
271
272         ssk = id->context;
273         sk = NULL;
274         if (ssk)
275                 sk = ssk->socket;
276         if (!ssk || !sk || !ssk->id) {
277                 sdp_dbg(sk,
278                     "cm_id is being torn down, event %d, ssk %p, sk %p, id %p\n",
279                         event->event, ssk, sk, id);
280                 return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
281                         -EINVAL : 0;
282         }
283
284         sdp_dbg(sk, "%s event %d id %p\n", __func__, event->event, id);
285         switch (event->event) {
286         case RDMA_CM_EVENT_ADDR_RESOLVED:
287                 sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_RESOLVED\n");
288
289                 if (sdp_link_layer_ib_only &&
290                         rdma_node_get_transport(id->device->node_type) == 
291                                 RDMA_TRANSPORT_IB &&
292                         rdma_port_get_link_layer(id->device, id->port_num) !=
293                                 IB_LINK_LAYER_INFINIBAND) {
294                         sdp_dbg(sk, "Link layer is: %d. Only IB link layer "
295                                 "is allowed\n",
296                                 rdma_port_get_link_layer(id->device, id->port_num));
297                         rc = -ENETUNREACH;
298                         break;
299                 }
300
301                 rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT);
302                 break;
303         case RDMA_CM_EVENT_ADDR_ERROR:
304                 sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_ERROR\n");
305                 rc = -ENETUNREACH;
306                 break;
307         case RDMA_CM_EVENT_ROUTE_RESOLVED:
308                 sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_RESOLVED : %p\n", id);
309                 rc = sdp_init_qp(sk, id);
310                 if (rc)
311                         break;
312                 atomic_set(&sdp_sk(sk)->remote_credits,
313                                 rx_ring_posted(sdp_sk(sk)));
314                 memset(&hh, 0, sizeof hh);
315                 hh.bsdh.mid = SDP_MID_HELLO;
316                 hh.bsdh.len = htonl(sizeof(struct sdp_hh));
317                 hh.max_adverts = 1;
318                 hh.ipv_cap = 0x40;
319                 hh.majv_minv = SDP_MAJV_MINV;
320                 sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size);
321                 hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk)));
322                 hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_bytes);
323                 hh.max_adverts = 0x1;
324                 sdp_sk(sk)->laddr = 
325                         ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
326                 memset(&conn_param, 0, sizeof conn_param);
327                 conn_param.private_data_len = sizeof hh;
328                 conn_param.private_data = &hh;
329                 conn_param.responder_resources = 4 /* TODO */;
330                 conn_param.initiator_depth = 4 /* TODO */;
331                 conn_param.retry_count = SDP_RETRY_COUNT;
332                 SDP_DUMP_PACKET(NULL, "TX", NULL, &hh.bsdh);
333                 rc = rdma_connect(id, &conn_param);
334                 break;
335         case RDMA_CM_EVENT_ROUTE_ERROR:
336                 sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_ERROR : %p\n", id);
337                 rc = -ETIMEDOUT;
338                 break;
339         case RDMA_CM_EVENT_CONNECT_REQUEST:
340                 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_REQUEST\n");
341                 rc = sdp_connect_handler(sk, id, event);
342                 if (rc) {
343                         sdp_dbg(sk, "Destroying qp\n");
344                         rdma_reject(id, NULL, 0);
345                         break;
346                 }
347                 ssk = id->context;
348                 atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));
349                 memset(&hah, 0, sizeof hah);
350                 hah.bsdh.mid = SDP_MID_HELLO_ACK;
351                 hah.bsdh.bufs = htons(rx_ring_posted(ssk));
352                 hah.bsdh.len = htonl(sizeof(struct sdp_hah));
353                 hah.majv_minv = SDP_MAJV_MINV;
354                 hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec,
355                                             but just in case */
356                 hah.actrcvsz = htonl(ssk->recv_bytes);
357                 memset(&conn_param, 0, sizeof conn_param);
358                 conn_param.private_data_len = sizeof hah;
359                 conn_param.private_data = &hah;
360                 conn_param.responder_resources = 4 /* TODO */;
361                 conn_param.initiator_depth = 4 /* TODO */;
362                 conn_param.retry_count = SDP_RETRY_COUNT;
363                 SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh);
364                 rc = rdma_accept(id, &conn_param);
365                 if (rc) {
366                         ssk->id = NULL;
367                         id->qp = NULL;
368                         id->context = NULL;
369                 }
370                 break;
371         case RDMA_CM_EVENT_CONNECT_RESPONSE:
372                 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_RESPONSE\n");
373                 rc = sdp_response_handler(sk, id, event);
374                 if (rc) {
375                         sdp_dbg(sk, "Destroying qp\n");
376                         rdma_reject(id, NULL, 0);
377                 } else
378                         rc = rdma_accept(id, NULL);
379                 break;
380         case RDMA_CM_EVENT_CONNECT_ERROR:
381                 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_ERROR\n");
382                 rc = -ETIMEDOUT;
383                 break;
384         case RDMA_CM_EVENT_UNREACHABLE:
385                 sdp_dbg(sk, "RDMA_CM_EVENT_UNREACHABLE\n");
386                 rc = -ENETUNREACH;
387                 break;
388         case RDMA_CM_EVENT_REJECTED:
389                 sdp_dbg(sk, "RDMA_CM_EVENT_REJECTED\n");
390                 rc = -ECONNREFUSED;
391                 break;
392         case RDMA_CM_EVENT_ESTABLISHED:
393                 sdp_dbg(sk, "RDMA_CM_EVENT_ESTABLISHED\n");
394                 sdp_sk(sk)->laddr = 
395                         ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
396                 rc = sdp_connected_handler(sk, event);
397                 break;
398         case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */
399                 sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n");
400
401                 SDP_WLOCK(ssk);
402                 if (ssk->state == TCPS_LAST_ACK) {
403                         sdp_cancel_dreq_wait_timeout(ssk);
404
405                         sdp_dbg(sk, "%s: waiting for Infiniband tear down\n",
406                                 __func__);
407                 }
408                 ssk->qp_active = 0;
409                 SDP_WUNLOCK(ssk);
410                 rdma_disconnect(id);
411                 SDP_WLOCK(ssk);
412                 if (ssk->state != TCPS_TIME_WAIT) {
413                         if (ssk->state == TCPS_CLOSE_WAIT) {
414                                 sdp_dbg(sk, "IB teardown while in "
415                                         "TCPS_CLOSE_WAIT taking reference to "
416                                         "let close() finish the work\n");
417                         }
418                         rc = sdp_disconnected_handler(sk);
419                         if (rc)
420                                 rc = -EPIPE;
421                 }
422                 SDP_WUNLOCK(ssk);
423                 break;
424         case RDMA_CM_EVENT_TIMEWAIT_EXIT:
425                 sdp_dbg(sk, "RDMA_CM_EVENT_TIMEWAIT_EXIT\n");
426                 SDP_WLOCK(ssk);
427                 rc = sdp_disconnected_handler(sk);
428                 SDP_WUNLOCK(ssk);
429                 break;
430         case RDMA_CM_EVENT_DEVICE_REMOVAL:
431                 sdp_dbg(sk, "RDMA_CM_EVENT_DEVICE_REMOVAL\n");
432                 rc = -ENETRESET;
433                 break;
434         default:
435                 printk(KERN_ERR "SDP: Unexpected CMA event: %d\n",
436                        event->event);
437                 rc = -ECONNABORTED;
438                 break;
439         }
440
441         sdp_dbg(sk, "event %d done. status %d\n", event->event, rc);
442
443         if (rc) {
444                 SDP_WLOCK(ssk);
445                 if (ssk->id == id) {
446                         ssk->id = NULL;
447                         id->qp = NULL;
448                         id->context = NULL;
449                         if (sdp_notify(ssk, -rc))
450                                 SDP_WUNLOCK(ssk);
451                 } else
452                         SDP_WUNLOCK(ssk);
453         }
454
455         return rc;
456 }