]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - contrib/ofed/management/opensm/opensm/osm_sa_multipath_record.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / contrib / ofed / management / opensm / opensm / osm_sa_multipath_record.c
1 /*
2  * Copyright (c) 2006-2008 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved.
4  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 /*
37  * Abstract:
38  *      Implementation of osm_mpr_rcv_t.
39  *      This object represents the MultiPath Record Receiver object.
40  *      This object is part of the opensm family of objects.
41  */
42
43 #if HAVE_CONFIG_H
44 #  include <config.h>
45 #endif                          /* HAVE_CONFIG_H */
46
47 #if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP)
48
49 #include <string.h>
50 #include <iba/ib_types.h>
51 #include <complib/cl_qmap.h>
52 #include <complib/cl_passivelock.h>
53 #include <complib/cl_debug.h>
54 #include <complib/cl_qlist.h>
55 #include <vendor/osm_vendor_api.h>
56 #include <opensm/osm_port.h>
57 #include <opensm/osm_node.h>
58 #include <opensm/osm_switch.h>
59 #include <opensm/osm_partition.h>
60 #include <opensm/osm_helper.h>
61 #include <opensm/osm_qos_policy.h>
62 #include <opensm/osm_sa.h>
63
64 #define OSM_SA_MPR_MAX_NUM_PATH        127
65
66 typedef struct osm_mpr_item {
67         cl_list_item_t list_item;
68         ib_path_rec_t path_rec;
69         const osm_port_t *p_src_port;
70         const osm_port_t *p_dest_port;
71         int hops;
72 } osm_mpr_item_t;
73
74 typedef struct osm_path_parms {
75         ib_net16_t pkey;
76         uint8_t mtu;
77         uint8_t rate;
78         uint8_t sl;
79         uint8_t pkt_life;
80         boolean_t reversible;
81         int hops;
82 } osm_path_parms_t;
83
84 /**********************************************************************
85  **********************************************************************/
86 static inline boolean_t
87 __osm_sa_multipath_rec_is_tavor_port(IN const osm_port_t * const p_port)
88 {
89         osm_node_t const *p_node;
90         ib_net32_t vend_id;
91
92         p_node = p_port->p_node;
93         vend_id = ib_node_info_get_vendor_id(&p_node->node_info);
94
95         return ((p_node->node_info.device_id == CL_HTON16(23108)) &&
96                 ((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) ||
97                  (vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) ||
98                  (vend_id == CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) ||
99                  (vend_id == CL_HTON32(OSM_VENDOR_ID_VOLTAIRE))));
100 }
101
102 /**********************************************************************
103  **********************************************************************/
104 static boolean_t
105 __osm_sa_multipath_rec_apply_tavor_mtu_limit(IN const ib_multipath_rec_t *
106                                              const p_mpr,
107                                              IN const osm_port_t *
108                                              const p_src_port,
109                                              IN const osm_port_t *
110                                              const p_dest_port,
111                                              IN const ib_net64_t comp_mask)
112 {
113         uint8_t required_mtu;
114
115         /* only if at least one of the ports is a Tavor device */
116         if (!__osm_sa_multipath_rec_is_tavor_port(p_src_port) &&
117             !__osm_sa_multipath_rec_is_tavor_port(p_dest_port))
118                 return (FALSE);
119
120         /*
121            we can apply the patch if either:
122            1. No MTU required
123            2. Required MTU <
124            3. Required MTU = 1K or 512 or 256
125            4. Required MTU > 256 or 512
126          */
127         required_mtu = ib_multipath_rec_mtu(p_mpr);
128         if ((comp_mask & IB_MPR_COMPMASK_MTUSELEC) &&
129             (comp_mask & IB_MPR_COMPMASK_MTU)) {
130                 switch (ib_multipath_rec_mtu_sel(p_mpr)) {
131                 case 0: /* must be greater than */
132                 case 2: /* exact match */
133                         if (IB_MTU_LEN_1024 < required_mtu)
134                                 return (FALSE);
135                         break;
136
137                 case 1: /* must be less than */
138                         /* can't be disqualified by this one */
139                         break;
140
141                 case 3: /* largest available */
142                         /* the ULP intentionally requested */
143                         /* the largest MTU possible */
144                         return (FALSE);
145                         break;
146
147                 default:
148                         /* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */
149                         CL_ASSERT(FALSE);
150                         break;
151                 }
152         }
153
154         return (TRUE);
155 }
156
157 /**********************************************************************
158  **********************************************************************/
159 static ib_api_status_t
160 __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa,
161                              IN const ib_multipath_rec_t * const p_mpr,
162                              IN const osm_port_t * const p_src_port,
163                              IN const osm_port_t * const p_dest_port,
164                              IN const uint16_t dest_lid_ho,
165                              IN const ib_net64_t comp_mask,
166                              OUT osm_path_parms_t * const p_parms)
167 {
168         const osm_node_t *p_node;
169         const osm_physp_t *p_physp;
170         const osm_physp_t *p_src_physp;
171         const osm_physp_t *p_dest_physp;
172         const osm_prtn_t *p_prtn = NULL;
173         const ib_port_info_t *p_pi;
174         ib_slvl_table_t *p_slvl_tbl;
175         ib_api_status_t status = IB_SUCCESS;
176         uint8_t mtu;
177         uint8_t rate;
178         uint8_t pkt_life;
179         uint8_t required_mtu;
180         uint8_t required_rate;
181         ib_net16_t required_pkey;
182         uint8_t required_sl;
183         uint8_t required_pkt_life;
184         ib_net16_t dest_lid;
185         int hops = 0;
186         int in_port_num = 0;
187         uint8_t i;
188         osm_qos_level_t *p_qos_level = NULL;
189         uint16_t valid_sl_mask = 0xffff;
190
191         OSM_LOG_ENTER(sa->p_log);
192
193         dest_lid = cl_hton16(dest_lid_ho);
194
195         p_dest_physp = p_dest_port->p_physp;
196         p_physp = p_src_port->p_physp;
197         p_src_physp = p_physp;
198         p_pi = &p_physp->port_info;
199
200         mtu = ib_port_info_get_mtu_cap(p_pi);
201         rate = ib_port_info_compute_rate(p_pi);
202
203         /*
204            Mellanox Tavor device performance is better using 1K MTU.
205            If required MTU and MTU selector are such that 1K is OK
206            and at least one end of the path is Tavor we override the
207            port MTU with 1K.
208          */
209         if (sa->p_subn->opt.enable_quirks &&
210             __osm_sa_multipath_rec_apply_tavor_mtu_limit(p_mpr, p_src_port,
211                                                          p_dest_port,
212                                                          comp_mask))
213                 if (mtu > IB_MTU_LEN_1024) {
214                         mtu = IB_MTU_LEN_1024;
215                         OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
216                                 "Optimized Path MTU to 1K for Mellanox Tavor device\n");
217                 }
218
219         /*
220            Walk the subnet object from source to destination,
221            tracking the most restrictive rate and mtu values along the way...
222
223            If source port node is a switch, then p_physp should
224            point to the port that routes the destination lid
225          */
226
227         p_node = osm_physp_get_node_ptr(p_physp);
228
229         if (p_node->sw) {
230                 /*
231                  * Source node is a switch.
232                  * Make sure that p_physp points to the out port of the
233                  * switch that routes to the destination lid (dest_lid_ho)
234                  */
235                 p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
236                 if (p_physp == 0) {
237                         OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4514: "
238                                 "Can't find routing to LID %u from switch for GUID 0x%016"
239                                 PRIx64 "\n", dest_lid_ho,
240                                 cl_ntoh64(osm_node_get_node_guid(p_node)));
241                         status = IB_NOT_FOUND;
242                         goto Exit;
243                 }
244         }
245
246         if (sa->p_subn->opt.qos) {
247
248                 /*
249                  * Whether this node is switch or CA, the IN port for
250                  * the sl2vl table is 0, because this is a source node.
251                  */
252                 p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0);
253
254                 /* update valid SLs that still exist on this route */
255                 for (i = 0; i < IB_MAX_NUM_VLS; i++) {
256                         if (valid_sl_mask & (1 << i) &&
257                             ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL)
258                                 valid_sl_mask &= ~(1 << i);
259                 }
260                 if (!valid_sl_mask) {
261                         OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
262                                 "All the SLs lead to VL15 on this path\n");
263                         status = IB_NOT_FOUND;
264                         goto Exit;
265                 }
266         }
267
268         /*
269          * Same as above
270          */
271         p_node = osm_physp_get_node_ptr(p_dest_physp);
272
273         if (p_node->sw) {
274                 /*
275                  * if destination is switch, we want p_dest_physp to point to port 0
276                  */
277                 p_dest_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
278
279                 if (p_dest_physp == 0) {
280                         OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4515: "
281                                 "Can't find routing to LID %u from switch for GUID 0x%016"
282                                 PRIx64 "\n", dest_lid_ho,
283                                 cl_ntoh64(osm_node_get_node_guid(p_node)));
284                         status = IB_NOT_FOUND;
285                         goto Exit;
286                 }
287
288         }
289
290         /*
291          * Now go through the path step by step
292          */
293
294         while (p_physp != p_dest_physp) {
295
296                 p_node = osm_physp_get_node_ptr(p_physp);
297                 p_physp = osm_physp_get_remote(p_physp);
298
299                 if (p_physp == 0) {
300                         OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4505: "
301                                 "Can't find remote phys port when routing to LID %u from node GUID 0x%016"
302                                 PRIx64 "\n", dest_lid_ho,
303                                 cl_ntoh64(osm_node_get_node_guid(p_node)));
304                         status = IB_ERROR;
305                         goto Exit;
306                 }
307
308                 hops++;
309                 in_port_num = osm_physp_get_port_num(p_physp);
310
311                 /*
312                    This is point to point case (no switch in between)
313                  */
314                 if (p_physp == p_dest_physp)
315                         break;
316
317                 p_node = osm_physp_get_node_ptr(p_physp);
318
319                 if (!p_node->sw) {
320                         /*
321                            There is some sort of problem in the subnet object!
322                            If this isn't a switch, we should have reached
323                            the destination by now!
324                          */
325                         OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4503: "
326                                 "Internal error, bad path\n");
327                         status = IB_ERROR;
328                         goto Exit;
329                 }
330
331                 /*
332                    Check parameters for the ingress port in this switch.
333                  */
334                 p_pi = &p_physp->port_info;
335
336                 if (mtu > ib_port_info_get_mtu_cap(p_pi))
337                         mtu = ib_port_info_get_mtu_cap(p_pi);
338
339                 if (rate > ib_port_info_compute_rate(p_pi))
340                         rate = ib_port_info_compute_rate(p_pi);
341
342                 /*
343                    Continue with the egress port on this switch.
344                  */
345                 p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
346                 if (p_physp == 0) {
347                         OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4516: "
348                                 "Dead end on path to LID %u from switch for GUID 0x%016"
349                                 PRIx64 "\n", dest_lid_ho,
350                                 cl_ntoh64(osm_node_get_node_guid(p_node)));
351                         status = IB_ERROR;
352                         goto Exit;
353                 }
354
355                 p_pi = &p_physp->port_info;
356
357                 if (mtu > ib_port_info_get_mtu_cap(p_pi))
358                         mtu = ib_port_info_get_mtu_cap(p_pi);
359
360                 if (rate > ib_port_info_compute_rate(p_pi))
361                         rate = ib_port_info_compute_rate(p_pi);
362
363                 if (sa->p_subn->opt.qos) {
364                         /*
365                          * Check SL2VL table of the switch and update valid SLs
366                          */
367                         p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num);
368                         for (i = 0; i < IB_MAX_NUM_VLS; i++) {
369                                 if (valid_sl_mask & (1 << i) &&
370                                     ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL)
371                                         valid_sl_mask &= ~(1 << i);
372                         }
373                         if (!valid_sl_mask) {
374                                 OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
375                                         "All the SLs lead to VL15 "
376                                         "on this path\n");
377                                 status = IB_NOT_FOUND;
378                                 goto Exit;
379                         }
380                 }
381         }
382
383         /*
384            p_physp now points to the destination
385          */
386         p_pi = &p_physp->port_info;
387
388         if (mtu > ib_port_info_get_mtu_cap(p_pi))
389                 mtu = ib_port_info_get_mtu_cap(p_pi);
390
391         if (rate > ib_port_info_compute_rate(p_pi))
392                 rate = ib_port_info_compute_rate(p_pi);
393
394         OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
395                 "Path min MTU = %u, min rate = %u\n", mtu, rate);
396
397         /*
398          * Get QoS Level object according to the MultiPath request
399          * and adjust MultiPath parameters according to QoS settings
400          */
401         if (sa->p_subn->opt.qos &&
402             sa->p_subn->p_qos_policy &&
403             (p_qos_level =
404              osm_qos_policy_get_qos_level_by_mpr(sa->p_subn->p_qos_policy,
405                                                  p_mpr, p_src_physp,
406                                                  p_dest_physp, comp_mask))) {
407
408                 OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
409                         "MultiPathRecord request matches QoS Level '%s' (%s)\n",
410                         p_qos_level->name,
411                         p_qos_level->use ? p_qos_level->use : "no description");
412
413                 if (p_qos_level->mtu_limit_set
414                     && (mtu > p_qos_level->mtu_limit))
415                         mtu = p_qos_level->mtu_limit;
416
417                 if (p_qos_level->rate_limit_set
418                     && (rate > p_qos_level->rate_limit))
419                         rate = p_qos_level->rate_limit;
420
421                 if (p_qos_level->sl_set) {
422                         required_sl = p_qos_level->sl;
423                         if (!(valid_sl_mask & (1 << required_sl))) {
424                                 status = IB_NOT_FOUND;
425                                 goto Exit;
426                         }
427                 }
428         }
429
430         /*
431            Determine if these values meet the user criteria
432          */
433
434         /* we silently ignore cases where only the MTU selector is defined */
435         if ((comp_mask & IB_MPR_COMPMASK_MTUSELEC) &&
436             (comp_mask & IB_MPR_COMPMASK_MTU)) {
437                 required_mtu = ib_multipath_rec_mtu(p_mpr);
438                 switch (ib_multipath_rec_mtu_sel(p_mpr)) {
439                 case 0: /* must be greater than */
440                         if (mtu <= required_mtu)
441                                 status = IB_NOT_FOUND;
442                         break;
443
444                 case 1: /* must be less than */
445                         if (mtu >= required_mtu) {
446                                 /* adjust to use the highest mtu
447                                    lower then the required one */
448                                 if (required_mtu > 1)
449                                         mtu = required_mtu - 1;
450                                 else
451                                         status = IB_NOT_FOUND;
452                         }
453                         break;
454
455                 case 2: /* exact match */
456                         if (mtu < required_mtu)
457                                 status = IB_NOT_FOUND;
458                         else
459                                 mtu = required_mtu;
460                         break;
461
462                 case 3: /* largest available */
463                         /* can't be disqualified by this one */
464                         break;
465
466                 default:
467                         /* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */
468                         CL_ASSERT(FALSE);
469                         status = IB_ERROR;
470                         break;
471                 }
472         }
473         if (status != IB_SUCCESS)
474                 goto Exit;
475
476         /* we silently ignore cases where only the Rate selector is defined */
477         if ((comp_mask & IB_MPR_COMPMASK_RATESELEC) &&
478             (comp_mask & IB_MPR_COMPMASK_RATE)) {
479                 required_rate = ib_multipath_rec_rate(p_mpr);
480                 switch (ib_multipath_rec_rate_sel(p_mpr)) {
481                 case 0: /* must be greater than */
482                         if (rate <= required_rate)
483                                 status = IB_NOT_FOUND;
484                         break;
485
486                 case 1: /* must be less than */
487                         if (rate >= required_rate) {
488                                 /* adjust the rate to use the highest rate
489                                    lower then the required one */
490                                 if (required_rate > 2)
491                                         rate = required_rate - 1;
492                                 else
493                                         status = IB_NOT_FOUND;
494                         }
495                         break;
496
497                 case 2: /* exact match */
498                         if (rate < required_rate)
499                                 status = IB_NOT_FOUND;
500                         else
501                                 rate = required_rate;
502                         break;
503
504                 case 3: /* largest available */
505                         /* can't be disqualified by this one */
506                         break;
507
508                 default:
509                         /* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */
510                         CL_ASSERT(FALSE);
511                         status = IB_ERROR;
512                         break;
513                 }
514         }
515         if (status != IB_SUCCESS)
516                 goto Exit;
517
518         /* Verify the pkt_life_time */
519         /* According to spec definition IBA 1.2 Table 205 PacketLifeTime description,
520            for loopback paths, packetLifeTime shall be zero. */
521         if (p_src_port == p_dest_port)
522                 pkt_life = 0;   /* loopback */
523         else if (p_qos_level && p_qos_level->pkt_life_set)
524                 pkt_life = p_qos_level->pkt_life;
525         else
526                 pkt_life = sa->p_subn->opt.subnet_timeout;
527
528         /* we silently ignore cases where only the PktLife selector is defined */
529         if ((comp_mask & IB_MPR_COMPMASK_PKTLIFETIMESELEC) &&
530             (comp_mask & IB_MPR_COMPMASK_PKTLIFETIME)) {
531                 required_pkt_life = ib_multipath_rec_pkt_life(p_mpr);
532                 switch (ib_multipath_rec_pkt_life_sel(p_mpr)) {
533                 case 0: /* must be greater than */
534                         if (pkt_life <= required_pkt_life)
535                                 status = IB_NOT_FOUND;
536                         break;
537
538                 case 1: /* must be less than */
539                         if (pkt_life >= required_pkt_life) {
540                                 /* adjust the lifetime to use the highest possible
541                                    lower then the required one */
542                                 if (required_pkt_life > 1)
543                                         pkt_life = required_pkt_life - 1;
544                                 else
545                                         status = IB_NOT_FOUND;
546                         }
547                         break;
548
549                 case 2: /* exact match */
550                         if (pkt_life < required_pkt_life)
551                                 status = IB_NOT_FOUND;
552                         else
553                                 pkt_life = required_pkt_life;
554                         break;
555
556                 case 3: /* smallest available */
557                         /* can't be disqualified by this one */
558                         break;
559
560                 default:
561                         /* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */
562                         CL_ASSERT(FALSE);
563                         status = IB_ERROR;
564                         break;
565                 }
566         }
567
568         if (status != IB_SUCCESS)
569                 goto Exit;
570
571         /*
572          * set Pkey for this MultiPath record request
573          */
574
575         if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC &&
576             cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31))
577                 required_pkey =
578                     osm_physp_find_common_pkey(p_src_physp, p_dest_physp);
579
580         else if (comp_mask & IB_MPR_COMPMASK_PKEY) {
581                 /*
582                  * MPR request has a specific pkey:
583                  * Check that source and destination share this pkey.
584                  * If QoS level has pkeys, check that this pkey exists
585                  * in the QoS level pkeys.
586                  * MPR returned pkey is the requested pkey.
587                  */
588                 required_pkey = p_mpr->pkey;
589                 if (!osm_physp_share_this_pkey
590                     (p_src_physp, p_dest_physp, required_pkey)) {
591                         OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4518: "
592                                 "Ports do not share specified PKey 0x%04x\n"
593                                 "\t\tsrc %" PRIx64 " dst %" PRIx64 "\n",
594                                 cl_ntoh16(required_pkey),
595                                 cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
596                                 cl_ntoh64(osm_physp_get_port_guid
597                                           (p_dest_physp)));
598                         status = IB_NOT_FOUND;
599                         goto Exit;
600                 }
601                 if (p_qos_level && p_qos_level->pkey_range_len &&
602                     !osm_qos_level_has_pkey(p_qos_level, required_pkey)) {
603                         OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451C: "
604                                 "Ports do not share PKeys defined by QoS level\n");
605                         status = IB_NOT_FOUND;
606                         goto Exit;
607                 }
608
609         } else if (p_qos_level && p_qos_level->pkey_range_len) {
610                 /*
611                  * MPR request doesn't have a specific pkey, but QoS level
612                  * has pkeys - get shared pkey from QoS level pkeys
613                  */
614                 required_pkey = osm_qos_level_get_shared_pkey(p_qos_level,
615                                                               p_src_physp,
616                                                               p_dest_physp);
617                 if (!required_pkey) {
618                         OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451D: "
619                                 "Ports do not share PKeys defined by QoS level\n");
620                         status = IB_NOT_FOUND;
621                         goto Exit;
622                 }
623
624         } else {
625                 /*
626                  * Neither MPR request nor QoS level have pkey.
627                  * Just get any shared pkey.
628                  */
629                 required_pkey =
630                     osm_physp_find_common_pkey(p_src_physp, p_dest_physp);
631                 if (!required_pkey) {
632                         OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4519: "
633                                 "Ports do not have any shared PKeys\n"
634                                 "\t\tsrc %" PRIx64 " dst %" PRIx64 "\n",
635                                 cl_ntoh64(osm_physp_get_port_guid(p_physp)),
636                                 cl_ntoh64(osm_physp_get_port_guid
637                                           (p_dest_physp)));
638                         status = IB_NOT_FOUND;
639                         goto Exit;
640                 }
641         }
642
643         if (required_pkey) {
644                 p_prtn =
645                     (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl,
646                                                required_pkey &
647                                                cl_ntoh16((uint16_t) ~ 0x8000));
648                 if (p_prtn ==
649                     (osm_prtn_t *) cl_qmap_end(&sa->p_subn->prtn_pkey_tbl))
650                         p_prtn = NULL;
651         }
652
653         /*
654          * Set MultiPathRecord SL.
655          */
656
657         if (comp_mask & IB_MPR_COMPMASK_SL) {
658                 /*
659                  * Specific SL was requested
660                  */
661                 required_sl = ib_multipath_rec_sl(p_mpr);
662
663                 if (p_qos_level && p_qos_level->sl_set &&
664                     p_qos_level->sl != required_sl) {
665                         OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451E: "
666                                 "QoS constaraints: required MultiPathRecord SL (%u) "
667                                 "doesn't match QoS policy SL (%u)\n",
668                                 required_sl, p_qos_level->sl);
669                         status = IB_NOT_FOUND;
670                         goto Exit;
671                 }
672
673         } else if (p_qos_level && p_qos_level->sl_set) {
674                 /*
675                  * No specific SL was requested,
676                  * but there is an SL in QoS level.
677                  */
678                 required_sl = p_qos_level->sl;
679
680                 if (required_pkey && p_prtn && p_prtn->sl != p_qos_level->sl)
681                         OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
682                                 "QoS level SL (%u) overrides partition SL (%u)\n",
683                                 p_qos_level->sl, p_prtn->sl);
684
685         } else if (required_pkey) {
686                 /*
687                  * No specific SL in request or in QoS level - use partition SL
688                  */
689                 p_prtn =
690                     (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl,
691                                                required_pkey &
692                                                cl_ntoh16((uint16_t) ~ 0x8000));
693                 if (!p_prtn) {
694                         required_sl = OSM_DEFAULT_SL;
695                         /* this may be possible when pkey tables are created somehow in
696                            previous runs or things are going wrong here */
697                         OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451A: "
698                                 "No partition found for PKey 0x%04x - using default SL %d\n",
699                                 cl_ntoh16(required_pkey), required_sl);
700                 } else
701                         required_sl = p_prtn->sl;
702
703         } else if (sa->p_subn->opt.qos) {
704                 if (valid_sl_mask & (1 << OSM_DEFAULT_SL))
705                         required_sl = OSM_DEFAULT_SL;
706                 else {
707                         for (i = 0; i < IB_MAX_NUM_VLS; i++)
708                                 if (valid_sl_mask & (1 << i))
709                                         break;
710                         required_sl = i;
711                 }
712         } else
713                 required_sl = OSM_DEFAULT_SL;
714
715         if (sa->p_subn->opt.qos && !(valid_sl_mask & (1 << required_sl))) {
716                 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451F: "
717                         "Selected SL (%u) leads to VL15\n", required_sl);
718                 status = IB_NOT_FOUND;
719                 goto Exit;
720         }
721
722         /* reset pkey when raw traffic */
723         if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC &&
724             cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31))
725                 required_pkey = 0;
726
727         p_parms->mtu = mtu;
728         p_parms->rate = rate;
729         p_parms->pkey = required_pkey;
730         p_parms->pkt_life = pkt_life;
731         p_parms->sl = required_sl;
732         p_parms->hops = hops;
733
734         OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "MultiPath params:"
735                 " mtu = %u, rate = %u, packet lifetime = %u,"
736                 " pkey = 0x%04X, sl = %u, hops = %u\n", mtu, rate,
737                 pkt_life, cl_ntoh16(required_pkey), required_sl, hops);
738
739 Exit:
740         OSM_LOG_EXIT(sa->p_log);
741         return (status);
742 }
743
744 /**********************************************************************
745  **********************************************************************/
746 static void
747 __osm_mpr_rcv_build_pr(IN osm_sa_t * sa,
748                        IN const osm_port_t * const p_src_port,
749                        IN const osm_port_t * const p_dest_port,
750                        IN const uint16_t src_lid_ho,
751                        IN const uint16_t dest_lid_ho,
752                        IN const uint8_t preference,
753                        IN const osm_path_parms_t * const p_parms,
754                        OUT ib_path_rec_t * const p_pr)
755 {
756         const osm_physp_t *p_src_physp;
757         const osm_physp_t *p_dest_physp;
758
759         OSM_LOG_ENTER(sa->p_log);
760
761         p_src_physp = p_src_port->p_physp;
762         p_dest_physp = p_dest_port->p_physp;
763
764         p_pr->dgid.unicast.prefix = osm_physp_get_subnet_prefix(p_dest_physp);
765         p_pr->dgid.unicast.interface_id = osm_physp_get_port_guid(p_dest_physp);
766
767         p_pr->sgid.unicast.prefix = osm_physp_get_subnet_prefix(p_src_physp);
768         p_pr->sgid.unicast.interface_id = osm_physp_get_port_guid(p_src_physp);
769
770         p_pr->dlid = cl_hton16(dest_lid_ho);
771         p_pr->slid = cl_hton16(src_lid_ho);
772
773         p_pr->hop_flow_raw &= cl_hton32(1 << 31);
774
775         p_pr->pkey = p_parms->pkey;
776         ib_path_rec_set_qos_class(p_pr, 0);
777         ib_path_rec_set_sl(p_pr, p_parms->sl);
778         p_pr->mtu = (uint8_t) (p_parms->mtu | 0x80);
779         p_pr->rate = (uint8_t) (p_parms->rate | 0x80);
780
781         /* According to 1.2 spec definition Table 205 PacketLifeTime description,
782            for loopback paths, packetLifeTime shall be zero. */
783         if (p_src_port == p_dest_port)
784                 p_pr->pkt_life = 0x80;  /* loopback */
785         else
786                 p_pr->pkt_life = (uint8_t) (p_parms->pkt_life | 0x80);
787
788         p_pr->preference = preference;
789
790         /* always return num_path = 0 so this is only the reversible component */
791         if (p_parms->reversible)
792                 p_pr->num_path = 0x80;
793
794         OSM_LOG_EXIT(sa->p_log);
795 }
796
797 /**********************************************************************
798  **********************************************************************/
799 static osm_mpr_item_t *
800 __osm_mpr_rcv_get_lid_pair_path(IN osm_sa_t * sa,
801                                 IN const ib_multipath_rec_t * const p_mpr,
802                                 IN const osm_port_t * const p_src_port,
803                                 IN const osm_port_t * const p_dest_port,
804                                 IN const uint16_t src_lid_ho,
805                                 IN const uint16_t dest_lid_ho,
806                                 IN const ib_net64_t comp_mask,
807                                 IN const uint8_t preference)
808 {
809         osm_path_parms_t path_parms;
810         osm_path_parms_t rev_path_parms;
811         osm_mpr_item_t *p_pr_item;
812         ib_api_status_t status, rev_path_status;
813
814         OSM_LOG_ENTER(sa->p_log);
815
816         OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID %u, Dest LID %u\n",
817                 src_lid_ho, dest_lid_ho);
818
819         p_pr_item = malloc(sizeof(*p_pr_item));
820         if (p_pr_item == NULL) {
821                 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4501: "
822                         "Unable to allocate path record\n");
823                 goto Exit;
824         }
825         memset(p_pr_item, 0, sizeof(*p_pr_item));
826
827         status = __osm_mpr_rcv_get_path_parms(sa, p_mpr, p_src_port,
828                                               p_dest_port, dest_lid_ho,
829                                               comp_mask, &path_parms);
830
831         if (status != IB_SUCCESS) {
832                 free(p_pr_item);
833                 p_pr_item = NULL;
834                 goto Exit;
835         }
836
837         /* now try the reversible path */
838         rev_path_status =
839             __osm_mpr_rcv_get_path_parms(sa, p_mpr, p_dest_port, p_src_port,
840                                          src_lid_ho, comp_mask,
841                                          &rev_path_parms);
842         path_parms.reversible = (rev_path_status == IB_SUCCESS);
843
844         /* did we get a Reversible Path compmask ? */
845         /*
846            NOTE that if the reversible component = 0, it is a don't care
847            rather then requiring non-reversible paths ...
848            see Vol1 Ver1.2 p900 l16
849          */
850         if (comp_mask & IB_MPR_COMPMASK_REVERSIBLE) {
851                 if ((!path_parms.reversible && (p_mpr->num_path & 0x80))) {
852                         OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
853                                 "Requested reversible path but failed to get one\n");
854
855                         free(p_pr_item);
856                         p_pr_item = NULL;
857                         goto Exit;
858                 }
859         }
860
861         p_pr_item->p_src_port = p_src_port;
862         p_pr_item->p_dest_port = p_dest_port;
863         p_pr_item->hops = path_parms.hops;
864
865         __osm_mpr_rcv_build_pr(sa, p_src_port, p_dest_port, src_lid_ho,
866                                dest_lid_ho, preference, &path_parms,
867                                &p_pr_item->path_rec);
868
869 Exit:
870         OSM_LOG_EXIT(sa->p_log);
871         return (p_pr_item);
872 }
873
874 /**********************************************************************
875  **********************************************************************/
876 static uint32_t
877 __osm_mpr_rcv_get_port_pair_paths(IN osm_sa_t * sa,
878                                   IN const ib_multipath_rec_t * const p_mpr,
879                                   IN const osm_port_t * const p_req_port,
880                                   IN const osm_port_t * const p_src_port,
881                                   IN const osm_port_t * const p_dest_port,
882                                   IN const uint32_t rem_paths,
883                                   IN const ib_net64_t comp_mask,
884                                   IN cl_qlist_t * const p_list)
885 {
886         osm_mpr_item_t *p_pr_item;
887         uint16_t src_lid_min_ho;
888         uint16_t src_lid_max_ho;
889         uint16_t dest_lid_min_ho;
890         uint16_t dest_lid_max_ho;
891         uint16_t src_lid_ho;
892         uint16_t dest_lid_ho;
893         uint32_t path_num = 0;
894         uint8_t preference;
895         uintn_t src_offset;
896         uintn_t dest_offset;
897
898         OSM_LOG_ENTER(sa->p_log);
899
900         OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
901                 "Src port 0x%016" PRIx64 ", Dst port 0x%016" PRIx64 "\n",
902                 cl_ntoh64(osm_port_get_guid(p_src_port)),
903                 cl_ntoh64(osm_port_get_guid(p_dest_port)));
904
905         /* Check that the req_port, src_port and dest_port all share a
906            pkey. The check is done on the default physical port of the ports. */
907         if (osm_port_share_pkey(sa->p_log, p_req_port, p_src_port) == FALSE
908             || osm_port_share_pkey(sa->p_log, p_req_port,
909                                    p_dest_port) == FALSE
910             || osm_port_share_pkey(sa->p_log, p_src_port,
911                                    p_dest_port) == FALSE)
912                 /* One of the pairs doesn't share a pkey so the path is disqualified. */
913                 goto Exit;
914
915         /*
916            We shouldn't be here if the paths are disqualified in some way...
917            Thus, we assume every possible connection is valid.
918
919            We desire to return high-quality paths first.
920            In OpenSM, higher quality mean least overlap with other paths.
921            This is acheived in practice by returning paths with
922            different LID value on each end, which means these
923            paths are more redundant that paths with the same LID repeated
924            on one side.  For example, in OpenSM the paths between two
925            endpoints with LMC = 1 might be as follows:
926
927            Port A, LID 1 <-> Port B, LID 3
928            Port A, LID 1 <-> Port B, LID 4
929            Port A, LID 2 <-> Port B, LID 3
930            Port A, LID 2 <-> Port B, LID 4
931
932            The OpenSM unicast routing algorithms attempt to disperse each path
933            to as varied a physical path as is reasonable.  1<->3 and 1<->4 have
934            more physical overlap (hence less redundancy) than 1<->3 and 2<->4.
935
936            OpenSM ranks paths in three preference groups:
937
938            Preference Value           Description
939            ----------------           -------------------------------------------
940            0                  Redundant in both directions with other
941            pref value = 0 paths
942
943            1                  Redundant in one direction with other
944            pref value = 0 and pref value = 1 paths
945
946            2                  Not redundant in either direction with
947            other paths
948
949            3-FF                       Unused
950
951            SA clients don't need to know these details, only that the lower
952            preference paths are preferred, as stated in the spec.  The paths
953            may not actually be physically redundant depending on the topology
954            of the subnet, but the point of LMC > 0 is to offer redundancy,
955            so I assume the subnet is physically appropriate for the specified
956            LMC value.  A more advanced implementation could inspect for physical
957            redundancy, but I'm not going to bother with that now.
958          */
959
960         osm_port_get_lid_range_ho(p_src_port, &src_lid_min_ho, &src_lid_max_ho);
961         osm_port_get_lid_range_ho(p_dest_port, &dest_lid_min_ho,
962                                   &dest_lid_max_ho);
963
964         OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID [%u-%u], Dest LID [%u-%u]\n",
965                 src_lid_min_ho, src_lid_max_ho,
966                 dest_lid_min_ho, dest_lid_max_ho);
967
968         src_lid_ho = src_lid_min_ho;
969         dest_lid_ho = dest_lid_min_ho;
970
971         /*
972            Preferred paths come first in OpenSM
973          */
974         preference = 0;
975
976         while (path_num < rem_paths) {
977                 /*
978                    These paths are "fully redundant"
979                  */
980                 p_pr_item = __osm_mpr_rcv_get_lid_pair_path(sa, p_mpr,
981                                                             p_src_port,
982                                                             p_dest_port,
983                                                             src_lid_ho,
984                                                             dest_lid_ho,
985                                                             comp_mask,
986                                                             preference);
987
988                 if (p_pr_item) {
989                         cl_qlist_insert_tail(p_list, &p_pr_item->list_item);
990                         ++path_num;
991                 }
992
993                 if (++src_lid_ho > src_lid_max_ho)
994                         break;
995
996                 if (++dest_lid_ho > dest_lid_max_ho)
997                         break;
998         }
999
1000         /*
1001            Check if we've accumulated all the paths that the user cares to see
1002          */
1003         if (path_num == rem_paths)
1004                 goto Exit;
1005
1006         /*
1007            Don't bother reporting preference 1 paths for now.
1008            It's more trouble than it's worth and can only occur
1009            if ports have different LMC values, which isn't supported
1010            by OpenSM right now anyway.
1011          */
1012         preference = 2;
1013         src_lid_ho = src_lid_min_ho;
1014         dest_lid_ho = dest_lid_min_ho;
1015         src_offset = 0;
1016         dest_offset = 0;
1017
1018         /*
1019            Iterate over the remaining paths
1020          */
1021         while (path_num < rem_paths) {
1022                 dest_offset++;
1023                 dest_lid_ho++;
1024
1025                 if (dest_lid_ho > dest_lid_max_ho) {
1026                         src_offset++;
1027                         src_lid_ho++;
1028
1029                         if (src_lid_ho > src_lid_max_ho)
1030                                 break;  /* done */
1031
1032                         dest_offset = 0;
1033                         dest_lid_ho = dest_lid_min_ho;
1034                 }
1035
1036                 /*
1037                    These paths are "fully non-redundant" with paths already
1038                    identified above and consequently not of much value.
1039
1040                    Don't return paths we already identified above, as indicated
1041                    by the offset values being equal.
1042                  */
1043                 if (src_offset == dest_offset)
1044                         continue;       /* already reported */
1045
1046                 p_pr_item = __osm_mpr_rcv_get_lid_pair_path(sa, p_mpr,
1047                                                             p_src_port,
1048                                                             p_dest_port,
1049                                                             src_lid_ho,
1050                                                             dest_lid_ho,
1051                                                             comp_mask,
1052                                                             preference);
1053
1054                 if (p_pr_item) {
1055                         cl_qlist_insert_tail(p_list, &p_pr_item->list_item);
1056                         ++path_num;
1057                 }
1058         }
1059
1060 Exit:
1061         OSM_LOG_EXIT(sa->p_log);
1062         return path_num;
1063 }
1064
1065 #undef min
1066 #define min(x,y)        (((x) < (y)) ? (x) : (y))
1067
1068 /**********************************************************************
1069  **********************************************************************/
1070 static osm_mpr_item_t *
1071 __osm_mpr_rcv_get_apm_port_pair_paths(IN osm_sa_t * sa,
1072                                       IN const ib_multipath_rec_t * const p_mpr,
1073                                       IN const osm_port_t * const p_src_port,
1074                                       IN const osm_port_t * const p_dest_port,
1075                                       IN int base_offs,
1076                                       IN const ib_net64_t comp_mask,
1077                                       IN cl_qlist_t * const p_list)
1078 {
1079         osm_mpr_item_t *p_pr_item = 0;
1080         uint16_t src_lid_min_ho;
1081         uint16_t src_lid_max_ho;
1082         uint16_t dest_lid_min_ho;
1083         uint16_t dest_lid_max_ho;
1084         uint16_t src_lid_ho;
1085         uint16_t dest_lid_ho;
1086         uintn_t iterations;
1087         int src_lids, dest_lids;
1088
1089         OSM_LOG_ENTER(sa->p_log);
1090
1091         OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src port 0x%016" PRIx64 ", "
1092                 "Dst port 0x%016" PRIx64 ", base offs %d\n",
1093                 cl_ntoh64(osm_port_get_guid(p_src_port)),
1094                 cl_ntoh64(osm_port_get_guid(p_dest_port)), base_offs);
1095
1096         osm_port_get_lid_range_ho(p_src_port, &src_lid_min_ho, &src_lid_max_ho);
1097         osm_port_get_lid_range_ho(p_dest_port, &dest_lid_min_ho,
1098                                   &dest_lid_max_ho);
1099
1100         src_lid_ho = src_lid_min_ho;
1101         dest_lid_ho = dest_lid_min_ho;
1102
1103         src_lids = src_lid_max_ho - src_lid_min_ho + 1;
1104         dest_lids = dest_lid_max_ho - dest_lid_min_ho + 1;
1105
1106         src_lid_ho += base_offs % src_lids;
1107         dest_lid_ho += base_offs % dest_lids;
1108
1109         OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1110                 "Src LIDs [%u-%u] hashed %u, "
1111                 "Dest LIDs [%u-%u] hashed %u\n",
1112                 src_lid_min_ho, src_lid_max_ho, src_lid_ho,
1113                 dest_lid_min_ho, dest_lid_max_ho, dest_lid_ho);
1114
1115         iterations = min(src_lids, dest_lids);
1116
1117         while (iterations--) {
1118                 /*
1119                    These paths are "fully redundant"
1120                  */
1121                 p_pr_item = __osm_mpr_rcv_get_lid_pair_path(sa, p_mpr,
1122                                                             p_src_port,
1123                                                             p_dest_port,
1124                                                             src_lid_ho,
1125                                                             dest_lid_ho,
1126                                                             comp_mask, 0);
1127
1128                 if (p_pr_item) {
1129                         OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1130                                 "Found matching path from Src LID %u to Dest LID %u with %d hops\n",
1131                                 src_lid_ho, dest_lid_ho, p_pr_item->hops);
1132                         break;
1133                 }
1134
1135                 if (++src_lid_ho > src_lid_max_ho)
1136                         src_lid_ho = src_lid_min_ho;
1137
1138                 if (++dest_lid_ho > dest_lid_max_ho)
1139                         dest_lid_ho = dest_lid_min_ho;
1140         }
1141
1142         OSM_LOG_EXIT(sa->p_log);
1143         return p_pr_item;
1144 }
1145
1146 /**********************************************************************
1147  **********************************************************************/
1148 static ib_net16_t
1149 __osm_mpr_rcv_get_gids(IN osm_sa_t * sa,
1150                        IN const ib_gid_t * gids,
1151                        IN int ngids, IN int is_sgid, OUT osm_port_t ** pp_port)
1152 {
1153         osm_port_t *p_port;
1154         ib_net16_t ib_status = IB_SUCCESS;
1155         int i;
1156
1157         OSM_LOG_ENTER(sa->p_log);
1158
1159         for (i = 0; i < ngids; i++, gids++) {
1160                 if (!ib_gid_is_link_local(gids)) {
1161                         if ((is_sgid && ib_gid_is_multicast(gids)) ||
1162                             (ib_gid_get_subnet_prefix(gids) !=
1163                              sa->p_subn->opt.subnet_prefix)) {
1164                                 /*
1165                                    This 'error' is the client's fault (bad gid)
1166                                    so don't enter it as an error in our own log.
1167                                    Return an error response to the client.
1168                                  */
1169                                 OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "ERR 451B: "
1170                                         "%sGID 0x%016" PRIx64
1171                                         " is multicast or non local subnet prefix\n",
1172                                         is_sgid ? "S" : "D",
1173                                         cl_ntoh64(gids->unicast.prefix));
1174
1175                                 ib_status = IB_SA_MAD_STATUS_INVALID_GID;
1176                                 goto Exit;
1177                         }
1178                 }
1179
1180                 p_port =
1181                     osm_get_port_by_guid(sa->p_subn,
1182                                          gids->unicast.interface_id);
1183                 if (!p_port) {
1184                         /*
1185                            This 'error' is the client's fault (bad gid) so
1186                            don't enter it as an error in our own log.
1187                            Return an error response to the client.
1188                          */
1189                         OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4506: "
1190                                 "No port with GUID 0x%016" PRIx64 "\n",
1191                                 cl_ntoh64(gids->unicast.interface_id));
1192
1193                         ib_status = IB_SA_MAD_STATUS_INVALID_GID;
1194                         goto Exit;
1195                 }
1196
1197                 pp_port[i] = p_port;
1198         }
1199
1200 Exit:
1201         OSM_LOG_EXIT(sa->p_log);
1202
1203         return ib_status;
1204 }
1205
1206 /**********************************************************************
1207  **********************************************************************/
1208 static ib_net16_t
1209 __osm_mpr_rcv_get_end_points(IN osm_sa_t * sa,
1210                              IN const osm_madw_t * const p_madw,
1211                              OUT osm_port_t ** pp_ports,
1212                              OUT int *nsrc, OUT int *ndest)
1213 {
1214         const ib_multipath_rec_t *p_mpr;
1215         const ib_sa_mad_t *p_sa_mad;
1216         ib_net64_t comp_mask;
1217         ib_net16_t sa_status = IB_SA_MAD_STATUS_SUCCESS;
1218         ib_gid_t *gids;
1219
1220         OSM_LOG_ENTER(sa->p_log);
1221
1222         /*
1223            Determine what fields are valid and then get a pointer
1224            to the source and destination port objects, if possible.
1225          */
1226         p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
1227         p_mpr = (ib_multipath_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad);
1228         gids = (ib_gid_t *) p_mpr->gids;
1229
1230         comp_mask = p_sa_mad->comp_mask;
1231
1232         /*
1233            Check a few easy disqualifying cases up front before getting
1234            into the endpoints.
1235          */
1236         *nsrc = *ndest = 0;
1237
1238         if (comp_mask & IB_MPR_COMPMASK_SGIDCOUNT) {
1239                 *nsrc = p_mpr->sgid_count;
1240                 if (*nsrc > IB_MULTIPATH_MAX_GIDS)
1241                         *nsrc = IB_MULTIPATH_MAX_GIDS;
1242                 sa_status =
1243                     __osm_mpr_rcv_get_gids(sa, gids, *nsrc, 1, pp_ports);
1244                 if (sa_status != IB_SUCCESS)
1245                         goto Exit;
1246         }
1247
1248         if (comp_mask & IB_MPR_COMPMASK_DGIDCOUNT) {
1249                 *ndest = p_mpr->dgid_count;
1250                 if (*ndest + *nsrc > IB_MULTIPATH_MAX_GIDS)
1251                         *ndest = IB_MULTIPATH_MAX_GIDS - *nsrc;
1252                 sa_status =
1253                     __osm_mpr_rcv_get_gids(sa, gids + *nsrc, *ndest, 0,
1254                                            pp_ports + *nsrc);
1255         }
1256
1257 Exit:
1258         OSM_LOG_EXIT(sa->p_log);
1259         return (sa_status);
1260 }
1261
1262 #define __hash_lids(a, b, lmc)  \
1263         (((((a) >> (lmc)) << 4) | ((b) >> (lmc))) % 103)
1264
1265 /**********************************************************************
1266  **********************************************************************/
1267 static void
1268 __osm_mpr_rcv_get_apm_paths(IN osm_sa_t * sa,
1269                             IN const ib_multipath_rec_t * const p_mpr,
1270                             IN const osm_port_t * const p_req_port,
1271                             IN osm_port_t ** _pp_ports,
1272                             IN const ib_net64_t comp_mask,
1273                             IN cl_qlist_t * const p_list)
1274 {
1275         osm_port_t *pp_ports[4];
1276         osm_mpr_item_t *matrix[2][2];
1277         int base_offs, src_lid_ho, dest_lid_ho;
1278         int sumA, sumB, minA, minB;
1279
1280         OSM_LOG_ENTER(sa->p_log);
1281
1282         /*
1283          * We want to:
1284          *    1. use different lid offsets (from base) for the resultant paths
1285          *    to increase the probability of redundant paths or in case
1286          *    of Clos - to ensure it (different offset => different spine!)
1287          *    2. keep consistent paths no matter of direction and order of ports
1288          *    3. distibute the lid offsets to balance the load
1289          * So, we sort the ports (within the srcs, and within the dests),
1290          * hash the lids of S0, D0 (after the sort), and call __osm_mpr_rcv_get_apm_port_pair_paths
1291          * with base_lid for S0, D0 and base_lid + 1 for S1, D1. This way we will get
1292          * always the same offsets - order indepentent, and make sure different spines are used.
1293          * Note that the diagonals on a Clos have the same number of hops, so it doesn't
1294          * really matter which diagonal we use.
1295          */
1296         if (_pp_ports[0]->guid < _pp_ports[1]->guid) {
1297                 pp_ports[0] = _pp_ports[0];
1298                 pp_ports[1] = _pp_ports[1];
1299         } else {
1300                 pp_ports[0] = _pp_ports[1];
1301                 pp_ports[1] = _pp_ports[0];
1302         }
1303         if (_pp_ports[2]->guid < _pp_ports[3]->guid) {
1304                 pp_ports[2] = _pp_ports[2];
1305                 pp_ports[3] = _pp_ports[3];
1306         } else {
1307                 pp_ports[2] = _pp_ports[3];
1308                 pp_ports[3] = _pp_ports[2];
1309         }
1310
1311         src_lid_ho = osm_port_get_base_lid(pp_ports[0]);
1312         dest_lid_ho = osm_port_get_base_lid(pp_ports[2]);
1313
1314         base_offs = src_lid_ho < dest_lid_ho ?
1315             __hash_lids(src_lid_ho, dest_lid_ho, sa->p_subn->opt.lmc) :
1316             __hash_lids(dest_lid_ho, src_lid_ho, sa->p_subn->opt.lmc);
1317
1318         matrix[0][0] =
1319             __osm_mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_ports[0],
1320                                                   pp_ports[2], base_offs,
1321                                                   comp_mask, p_list);
1322         matrix[0][1] =
1323             __osm_mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_ports[0],
1324                                                   pp_ports[3], base_offs,
1325                                                   comp_mask, p_list);
1326         matrix[1][0] =
1327             __osm_mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_ports[1],
1328                                                   pp_ports[2], base_offs + 1,
1329                                                   comp_mask, p_list);
1330         matrix[1][1] =
1331             __osm_mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_ports[1],
1332                                                   pp_ports[3], base_offs + 1,
1333                                                   comp_mask, p_list);
1334
1335         OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "APM matrix:\n"
1336                 "\t{0,0} 0x%X->0x%X (%d)\t| {0,1} 0x%X->0x%X (%d)\n"
1337                 "\t{1,0} 0x%X->0x%X (%d)\t| {1,1} 0x%X->0x%X (%d)\n",
1338                 matrix[0][0]->path_rec.slid, matrix[0][0]->path_rec.dlid,
1339                 matrix[0][0]->hops, matrix[0][1]->path_rec.slid,
1340                 matrix[0][1]->path_rec.dlid, matrix[0][1]->hops,
1341                 matrix[1][0]->path_rec.slid, matrix[1][0]->path_rec.dlid,
1342                 matrix[1][0]->hops, matrix[1][1]->path_rec.slid,
1343                 matrix[1][1]->path_rec.dlid, matrix[1][1]->hops);
1344
1345         /* check diagonal A {(0,0), (1,1)} */
1346         sumA = matrix[0][0]->hops + matrix[1][1]->hops;
1347         minA = min(matrix[0][0]->hops, matrix[1][1]->hops);
1348
1349         /* check diagonal B {(0,1), (1,0)} */
1350         sumB = matrix[0][1]->hops + matrix[1][0]->hops;
1351         minB = min(matrix[0][1]->hops, matrix[1][0]->hops);
1352
1353         /* and the winner is... */
1354         if (minA <= minB || (minA == minB && sumA < sumB)) {
1355                 /* Diag A */
1356                 OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1357                         "Diag {0,0} & {1,1} is the best:\n"
1358                         "\t{0,0} 0x%X->0x%X (%d)\t & {1,1} 0x%X->0x%X (%d)\n",
1359                         matrix[0][0]->path_rec.slid,
1360                         matrix[0][0]->path_rec.dlid, matrix[0][0]->hops,
1361                         matrix[1][1]->path_rec.slid,
1362                         matrix[1][1]->path_rec.dlid, matrix[1][1]->hops);
1363                 cl_qlist_insert_tail(p_list, &matrix[0][0]->list_item);
1364                 cl_qlist_insert_tail(p_list, &matrix[1][1]->list_item);
1365                 free(matrix[0][1]);
1366                 free(matrix[1][0]);
1367         } else {
1368                 /* Diag B */
1369                 OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1370                         "Diag {0,1} & {1,0} is the best:\n"
1371                         "\t{0,1} 0x%X->0x%X (%d)\t & {1,0} 0x%X->0x%X (%d)\n",
1372                         matrix[0][1]->path_rec.slid,
1373                         matrix[0][1]->path_rec.dlid, matrix[0][1]->hops,
1374                         matrix[1][0]->path_rec.slid,
1375                         matrix[1][0]->path_rec.dlid, matrix[1][0]->hops);
1376                 cl_qlist_insert_tail(p_list, &matrix[0][1]->list_item);
1377                 cl_qlist_insert_tail(p_list, &matrix[1][0]->list_item);
1378                 free(matrix[0][0]);
1379                 free(matrix[1][1]);
1380         }
1381
1382         OSM_LOG_EXIT(sa->p_log);
1383 }
1384
1385 /**********************************************************************
1386  **********************************************************************/
1387 static void
1388 __osm_mpr_rcv_process_pairs(IN osm_sa_t * sa,
1389                             IN const ib_multipath_rec_t * const p_mpr,
1390                             IN osm_port_t * const p_req_port,
1391                             IN osm_port_t ** pp_ports,
1392                             IN const int nsrc,
1393                             IN const int ndest,
1394                             IN const ib_net64_t comp_mask,
1395                             IN cl_qlist_t * const p_list)
1396 {
1397         osm_port_t **pp_src_port, **pp_es;
1398         osm_port_t **pp_dest_port, **pp_ed;
1399         uint32_t max_paths, num_paths, total_paths = 0;
1400
1401         OSM_LOG_ENTER(sa->p_log);
1402
1403         if (comp_mask & IB_MPR_COMPMASK_NUMBPATH)
1404                 max_paths = p_mpr->num_path & 0x7F;
1405         else
1406                 max_paths = OSM_SA_MPR_MAX_NUM_PATH;
1407
1408         for (pp_src_port = pp_ports, pp_es = pp_ports + nsrc;
1409              pp_src_port < pp_es; pp_src_port++) {
1410                 for (pp_dest_port = pp_es, pp_ed = pp_es + ndest;
1411                      pp_dest_port < pp_ed; pp_dest_port++) {
1412                         num_paths =
1413                             __osm_mpr_rcv_get_port_pair_paths(sa, p_mpr,
1414                                                               p_req_port,
1415                                                               *pp_src_port,
1416                                                               *pp_dest_port,
1417                                                               max_paths -
1418                                                               total_paths,
1419                                                               comp_mask,
1420                                                               p_list);
1421                         total_paths += num_paths;
1422                         OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1423                                 "%d paths %d total paths %d max paths\n",
1424                                 num_paths, total_paths, max_paths);
1425                         /* Just take first NumbPaths found */
1426                         if (total_paths >= max_paths)
1427                                 goto Exit;
1428                 }
1429         }
1430
1431 Exit:
1432         OSM_LOG_EXIT(sa->p_log);
1433 }
1434
1435 /**********************************************************************
1436  **********************************************************************/
1437 void osm_mpr_rcv_process(IN void *context, IN void *data)
1438 {
1439         osm_sa_t *sa = context;
1440         osm_madw_t *p_madw = data;
1441         const ib_multipath_rec_t *p_mpr;
1442         ib_sa_mad_t *p_sa_mad;
1443         osm_port_t *requester_port;
1444         osm_port_t *pp_ports[IB_MULTIPATH_MAX_GIDS];
1445         cl_qlist_t pr_list;
1446         ib_net16_t sa_status;
1447         int nsrc, ndest;
1448
1449         OSM_LOG_ENTER(sa->p_log);
1450
1451         CL_ASSERT(p_madw);
1452
1453         p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
1454         p_mpr = (ib_multipath_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad);
1455
1456         CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_MULTIPATH_RECORD);
1457
1458         if ((p_sa_mad->rmpp_flags & IB_RMPP_FLAG_ACTIVE) != IB_RMPP_FLAG_ACTIVE) {
1459                 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4510: "
1460                         "Invalid request since RMPP_FLAG_ACTIVE is not set\n");
1461                 osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID);
1462                 goto Exit;
1463         }
1464
1465         /* we only support SubnAdmGetMulti method */
1466         if (p_sa_mad->method != IB_MAD_METHOD_GETMULTI) {
1467                 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4513: "
1468                         "Unsupported Method (%s)\n",
1469                         ib_get_sa_method_str(p_sa_mad->method));
1470                 osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR);
1471                 goto Exit;
1472         }
1473
1474         /* update the requester physical port. */
1475         requester_port = osm_get_port_by_mad_addr(sa->p_log, sa->p_subn,
1476                                                   osm_madw_get_mad_addr_ptr
1477                                                   (p_madw));
1478         if (requester_port == NULL) {
1479                 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4517: "
1480                         "Cannot find requester physical port\n");
1481                 goto Exit;
1482         }
1483
1484         if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG))
1485                 osm_dump_multipath_record(sa->p_log, p_mpr, OSM_LOG_DEBUG);
1486
1487         cl_qlist_init(&pr_list);
1488
1489         /*
1490            Most SA functions (including this one) are read-only on the
1491            subnet object, so we grab the lock non-exclusively.
1492          */
1493         cl_plock_acquire(sa->p_lock);
1494
1495         sa_status = __osm_mpr_rcv_get_end_points(sa, p_madw, pp_ports,
1496                                                  &nsrc, &ndest);
1497
1498         if (sa_status != IB_SA_MAD_STATUS_SUCCESS || !nsrc || !ndest) {
1499                 if (sa_status == IB_SA_MAD_STATUS_SUCCESS && (!nsrc || !ndest))
1500                         OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4512: "
1501                                 "__osm_mpr_rcv_get_end_points failed, not enough GIDs "
1502                                 "(nsrc %d ndest %d)\n", nsrc, ndest);
1503                 cl_plock_release(sa->p_lock);
1504                 if (sa_status == IB_SA_MAD_STATUS_SUCCESS)
1505                         osm_sa_send_error(sa, p_madw,
1506                                           IB_SA_MAD_STATUS_REQ_INVALID);
1507                 else
1508                         osm_sa_send_error(sa, p_madw, sa_status);
1509                 goto Exit;
1510         }
1511
1512         /* APM request */
1513         if (nsrc == 2 && ndest == 2 && (p_mpr->num_path & 0x7F) == 2)
1514                 __osm_mpr_rcv_get_apm_paths(sa, p_mpr, requester_port,
1515                                             pp_ports, p_sa_mad->comp_mask,
1516                                             &pr_list);
1517         else
1518                 __osm_mpr_rcv_process_pairs(sa, p_mpr, requester_port,
1519                                             pp_ports, nsrc, ndest,
1520                                             p_sa_mad->comp_mask, &pr_list);
1521
1522         cl_plock_release(sa->p_lock);
1523
1524         /* o15-0.2.7: If MultiPath is supported, then SA shall respond to a
1525            SubnAdmGetMulti() containing a valid MultiPathRecord attribute with
1526            a set of zero or more PathRecords satisfying the constraints
1527            indicated in the MultiPathRecord received. The PathRecord Attribute
1528            ID shall be used in the response.
1529          */
1530         p_sa_mad->attr_id = IB_MAD_ATTR_PATH_RECORD;
1531         osm_sa_respond(sa, p_madw, sizeof(ib_path_rec_t), &pr_list);
1532
1533 Exit:
1534         OSM_LOG_EXIT(sa->p_log);
1535 }
1536 #endif