2 * Copyright (c) 2006 Mellanox Technologies Ltd. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 #define SDP_MAJV_MINV 0x22
38 SDP_MODPARAM_SINT(sdp_link_layer_ib_only, 1, "Support only link layer of "
47 sdp_qp_event_handler(struct ib_event *event, void *data)
52 sdp_get_max_dev_sge(struct ib_device *dev)
54 struct ib_device_attr attr;
55 static int max_sges = -1;
60 ib_query_device(dev, &attr);
62 max_sges = attr.max_sge;
69 sdp_init_qp(struct socket *sk, struct rdma_cm_id *id)
71 struct ib_qp_init_attr qp_init_attr = {
72 .event_handler = sdp_qp_event_handler,
73 .cap.max_send_wr = SDP_TX_SIZE,
74 .cap.max_recv_wr = SDP_RX_SIZE,
75 .sq_sig_type = IB_SIGNAL_REQ_WR,
78 struct ib_device *device = id->device;
82 sdp_dbg(sk, "%s\n", __func__);
85 ssk->max_sge = sdp_get_max_dev_sge(device);
86 sdp_dbg(sk, "Max sges: %d\n", ssk->max_sge);
88 qp_init_attr.cap.max_send_sge = MIN(ssk->max_sge, SDP_MAX_SEND_SGES);
89 sdp_dbg(sk, "Setting max send sge to: %d\n",
90 qp_init_attr.cap.max_send_sge);
92 qp_init_attr.cap.max_recv_sge = MIN(ssk->max_sge, SDP_MAX_RECV_SGES);
93 sdp_dbg(sk, "Setting max recv sge to: %d\n",
94 qp_init_attr.cap.max_recv_sge);
96 ssk->sdp_dev = ib_get_client_data(device, &sdp_client);
98 sdp_warn(sk, "SDP not available on device %s\n", device->name);
103 rc = sdp_rx_ring_create(ssk, device);
107 rc = sdp_tx_ring_create(ssk, device);
111 qp_init_attr.recv_cq = ssk->rx_ring.cq;
112 qp_init_attr.send_cq = ssk->tx_ring.cq;
114 rc = rdma_create_qp(id, ssk->sdp_dev->pd, &qp_init_attr);
116 sdp_warn(sk, "Unable to create QP: %d.\n", rc);
120 ssk->ib_device = device;
122 ssk->context.device = device;
124 sdp_dbg(sk, "%s done\n", __func__);
128 sdp_tx_ring_destroy(ssk);
130 sdp_rx_ring_destroy(ssk);
136 sdp_connect_handler(struct socket *sk, struct rdma_cm_id *id,
137 struct rdma_cm_event *event)
139 struct sockaddr_in *src_addr;
140 struct sockaddr_in *dst_addr;
141 struct socket *child;
142 const struct sdp_hh *h;
143 struct sdp_sock *ssk;
146 sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id);
148 h = event->param.conn.private_data;
149 SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
154 child = sonewconn(sk, SS_ISCONNECTED);
159 rc = sdp_init_qp(child, id);
166 ssk->cred = crhold(child->so_cred);
167 dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
168 src_addr = (struct sockaddr_in *)&id->route.addr.src_addr;
169 ssk->fport = dst_addr->sin_port;
170 ssk->faddr = dst_addr->sin_addr.s_addr;
171 ssk->lport = src_addr->sin_port;
172 ssk->max_bufs = ntohs(h->bsdh.bufs);
173 atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
174 ssk->min_bufs = tx_credits(ssk) / 4;
175 ssk->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh);
176 sdp_init_buffers(ssk, rcvbuf_initial_size);
177 ssk->state = TCPS_SYN_RECEIVED;
184 sdp_response_handler(struct socket *sk, struct rdma_cm_id *id,
185 struct rdma_cm_event *event)
187 const struct sdp_hah *h;
188 struct sockaddr_in *dst_addr;
189 struct sdp_sock *ssk;
190 sdp_dbg(sk, "%s\n", __func__);
194 ssk->state = TCPS_ESTABLISHED;
195 sdp_set_default_moderation(ssk);
196 if (ssk->flags & SDP_DROPPED) {
200 if (sk->so_options & SO_KEEPALIVE)
201 sdp_start_keepalive_timer(sk);
202 h = event->param.conn.private_data;
203 SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
204 ssk->max_bufs = ntohs(h->bsdh.bufs);
205 atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
206 ssk->min_bufs = tx_credits(ssk) / 4;
207 ssk->xmit_size_goal =
208 ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh);
211 dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
212 ssk->fport = dst_addr->sin_port;
213 ssk->faddr = dst_addr->sin_addr.s_addr;
221 sdp_connected_handler(struct socket *sk, struct rdma_cm_event *event)
223 struct sdp_sock *ssk;
225 sdp_dbg(sk, "%s\n", __func__);
229 ssk->state = TCPS_ESTABLISHED;
231 sdp_set_default_moderation(ssk);
233 if (sk->so_options & SO_KEEPALIVE)
234 sdp_start_keepalive_timer(sk);
236 if ((ssk->flags & SDP_DROPPED) == 0)
243 sdp_disconnected_handler(struct socket *sk)
245 struct sdp_sock *ssk;
248 sdp_dbg(sk, "%s\n", __func__);
250 SDP_WLOCK_ASSERT(ssk);
251 if (sdp_sk(sk)->state == TCPS_SYN_RECEIVED) {
252 sdp_connected_handler(sk, NULL);
262 sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
264 struct rdma_conn_param conn_param;
266 struct sdp_sock *ssk;
276 if (!ssk || !sk || !ssk->id) {
278 "cm_id is being torn down, event %d, ssk %p, sk %p, id %p\n",
279 event->event, ssk, sk, id);
280 return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
284 sdp_dbg(sk, "%s event %d id %p\n", __func__, event->event, id);
285 switch (event->event) {
286 case RDMA_CM_EVENT_ADDR_RESOLVED:
287 sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_RESOLVED\n");
289 if (sdp_link_layer_ib_only &&
290 rdma_node_get_transport(id->device->node_type) ==
292 rdma_port_get_link_layer(id->device, id->port_num) !=
293 IB_LINK_LAYER_INFINIBAND) {
294 sdp_dbg(sk, "Link layer is: %d. Only IB link layer "
296 rdma_port_get_link_layer(id->device, id->port_num));
301 rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT);
303 case RDMA_CM_EVENT_ADDR_ERROR:
304 sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_ERROR\n");
307 case RDMA_CM_EVENT_ROUTE_RESOLVED:
308 sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_RESOLVED : %p\n", id);
309 rc = sdp_init_qp(sk, id);
312 atomic_set(&sdp_sk(sk)->remote_credits,
313 rx_ring_posted(sdp_sk(sk)));
314 memset(&hh, 0, sizeof hh);
315 hh.bsdh.mid = SDP_MID_HELLO;
316 hh.bsdh.len = htonl(sizeof(struct sdp_hh));
319 hh.majv_minv = SDP_MAJV_MINV;
320 sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size);
321 hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk)));
322 hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_bytes);
323 hh.max_adverts = 0x1;
325 ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
326 memset(&conn_param, 0, sizeof conn_param);
327 conn_param.private_data_len = sizeof hh;
328 conn_param.private_data = &hh;
329 conn_param.responder_resources = 4 /* TODO */;
330 conn_param.initiator_depth = 4 /* TODO */;
331 conn_param.retry_count = SDP_RETRY_COUNT;
332 SDP_DUMP_PACKET(NULL, "TX", NULL, &hh.bsdh);
333 rc = rdma_connect(id, &conn_param);
335 case RDMA_CM_EVENT_ROUTE_ERROR:
336 sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_ERROR : %p\n", id);
339 case RDMA_CM_EVENT_CONNECT_REQUEST:
340 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_REQUEST\n");
341 rc = sdp_connect_handler(sk, id, event);
343 sdp_dbg(sk, "Destroying qp\n");
344 rdma_reject(id, NULL, 0);
348 atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));
349 memset(&hah, 0, sizeof hah);
350 hah.bsdh.mid = SDP_MID_HELLO_ACK;
351 hah.bsdh.bufs = htons(rx_ring_posted(ssk));
352 hah.bsdh.len = htonl(sizeof(struct sdp_hah));
353 hah.majv_minv = SDP_MAJV_MINV;
354 hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec,
356 hah.actrcvsz = htonl(ssk->recv_bytes);
357 memset(&conn_param, 0, sizeof conn_param);
358 conn_param.private_data_len = sizeof hah;
359 conn_param.private_data = &hah;
360 conn_param.responder_resources = 4 /* TODO */;
361 conn_param.initiator_depth = 4 /* TODO */;
362 conn_param.retry_count = SDP_RETRY_COUNT;
363 SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh);
364 rc = rdma_accept(id, &conn_param);
371 case RDMA_CM_EVENT_CONNECT_RESPONSE:
372 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_RESPONSE\n");
373 rc = sdp_response_handler(sk, id, event);
375 sdp_dbg(sk, "Destroying qp\n");
376 rdma_reject(id, NULL, 0);
378 rc = rdma_accept(id, NULL);
380 case RDMA_CM_EVENT_CONNECT_ERROR:
381 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_ERROR\n");
384 case RDMA_CM_EVENT_UNREACHABLE:
385 sdp_dbg(sk, "RDMA_CM_EVENT_UNREACHABLE\n");
388 case RDMA_CM_EVENT_REJECTED:
389 sdp_dbg(sk, "RDMA_CM_EVENT_REJECTED\n");
392 case RDMA_CM_EVENT_ESTABLISHED:
393 sdp_dbg(sk, "RDMA_CM_EVENT_ESTABLISHED\n");
395 ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
396 rc = sdp_connected_handler(sk, event);
398 case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */
399 sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n");
402 if (ssk->state == TCPS_LAST_ACK) {
403 sdp_cancel_dreq_wait_timeout(ssk);
405 sdp_dbg(sk, "%s: waiting for Infiniband tear down\n",
412 if (ssk->state != TCPS_TIME_WAIT) {
413 if (ssk->state == TCPS_CLOSE_WAIT) {
414 sdp_dbg(sk, "IB teardown while in "
415 "TCPS_CLOSE_WAIT taking reference to "
416 "let close() finish the work\n");
418 rc = sdp_disconnected_handler(sk);
424 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
425 sdp_dbg(sk, "RDMA_CM_EVENT_TIMEWAIT_EXIT\n");
427 rc = sdp_disconnected_handler(sk);
430 case RDMA_CM_EVENT_DEVICE_REMOVAL:
431 sdp_dbg(sk, "RDMA_CM_EVENT_DEVICE_REMOVAL\n");
435 printk(KERN_ERR "SDP: Unexpected CMA event: %d\n",
441 sdp_dbg(sk, "event %d done. status %d\n", event->event, rc);
449 if (sdp_notify(ssk, -rc))