1 /* 2 * Copyright (C) 2009 Michael Brown <mbrown (at) fensystems.co.uk>. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License as 6 * published by the Free Software Foundation; either version 2 of the 7 * License, or any later version. 8 * 9 * This program is distributed in the hope that it will be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 17 */ 18 19 FILE_LICENCE ( GPL2_OR_LATER ); 20 21 #include <stdint.h> 22 #include <stdlib.h> 23 #include <string.h> 24 #include <byteswap.h> 25 #include <errno.h> 26 #include <assert.h> 27 #include <gpxe/infiniband.h> 28 #include <gpxe/ib_mi.h> 29 #include <gpxe/ib_pathrec.h> 30 #include <gpxe/ib_cm.h> 31 32 /** 33 * @file 34 * 35 * Infiniband communication management 36 * 37 */ 38 39 /** List of connections */ 40 static LIST_HEAD ( ib_cm_conns ); 41 42 /** 43 * Send "ready to use" response 44 * 45 * @v ibdev Infiniband device 46 * @v mi Management interface 47 * @v conn Connection 48 * @v av Address vector 49 * @ret rc Return status code 50 */ 51 static int ib_cm_send_rtu ( struct ib_device *ibdev, 52 struct ib_mad_interface *mi, 53 struct ib_connection *conn, 54 struct ib_address_vector *av ) { 55 union ib_mad mad; 56 struct ib_cm_ready_to_use *ready = 57 &mad.cm.cm_data.ready_to_use; 58 int rc; 59 60 /* Construct "ready to use" response */ 61 memset ( &mad, 0, sizeof ( mad ) ); 62 mad.hdr.mgmt_class = IB_MGMT_CLASS_CM; 63 mad.hdr.class_version = IB_CM_CLASS_VERSION; 64 mad.hdr.method = IB_MGMT_METHOD_SEND; 65 mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE ); 66 ready->local_id = htonl ( conn->local_id ); 67 ready->remote_id = htonl ( conn->remote_id ); 68 if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){ 69 DBGC ( conn, "CM %p could not send RTU: %s\n", 70 conn, strerror ( rc ) ); 71 return rc; 72 } 73 74 return 0; 75 } 76 77 /** 78 * Handle duplicate connection replies 79 * 80 * @v ibdev Infiniband device 81 * @v mi Management interface 82 * @v mad Received MAD 83 * @v av Source address vector 84 * @ret rc Return status code 85 * 86 * If a "ready to use" MAD is lost, the peer may resend the connection 87 * reply. We have to respond to these with duplicate "ready to use" 88 * MADs, otherwise the peer may time out and drop the connection. 89 */ 90 static void ib_cm_connect_rep ( struct ib_device *ibdev, 91 struct ib_mad_interface *mi, 92 union ib_mad *mad, 93 struct ib_address_vector *av ) { 94 struct ib_cm_connect_reply *connect_rep = 95 &mad->cm.cm_data.connect_reply; 96 struct ib_connection *conn; 97 int rc; 98 99 /* Identify connection */ 100 list_for_each_entry ( conn, &ib_cm_conns, list ) { 101 if ( ntohl ( connect_rep->remote_id ) != conn->local_id ) 102 continue; 103 /* Try to send "ready to use" reply */ 104 if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) { 105 /* Ignore errors */ 106 return; 107 } 108 return; 109 } 110 111 DBG ( "CM unidentified connection %08x\n", 112 ntohl ( connect_rep->remote_id ) ); 113 } 114 115 /** Communication management agents */ 116 struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = { 117 { 118 .mgmt_class = IB_MGMT_CLASS_CM, 119 .class_version = IB_CM_CLASS_VERSION, 120 .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ), 121 .handle = ib_cm_connect_rep, 122 }, 123 }; 124 125 /** 126 * Convert connection rejection reason to return status code 127 * 128 * @v reason Rejection reason (in network byte order) 129 * @ret rc Return status code 130 */ 131 static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) { 132 switch ( reason ) { 133 case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) : 134 return -ENODEV; 135 case htons ( IB_CM_REJECT_STALE_CONN ) : 136 return -EALREADY; 137 case htons ( IB_CM_REJECT_CONSUMER ) : 138 return -ENOTTY; 139 default: 140 return -EPERM; 141 } 142 } 143 144 /** 145 * Handle connection request transaction completion 146 * 147 * @v ibdev Infiniband device 148 * @v mi Management interface 149 * @v madx Management transaction 150 * @v rc Status code 151 * @v mad Received MAD (or NULL on error) 152 * @v av Source address vector (or NULL on error) 153 */ 154 static void ib_cm_req_complete ( struct ib_device *ibdev, 155 struct ib_mad_interface *mi, 156 struct ib_mad_transaction *madx, 157 int rc, union ib_mad *mad, 158 struct ib_address_vector *av ) { 159 struct ib_connection *conn = ib_madx_get_ownerdata ( madx ); 160 struct ib_queue_pair *qp = conn->qp; 161 struct ib_cm_common *common = &mad->cm.cm_data.common; 162 struct ib_cm_connect_reply *connect_rep = 163 &mad->cm.cm_data.connect_reply; 164 struct ib_cm_connect_reject *connect_rej = 165 &mad->cm.cm_data.connect_reject; 166 void *private_data = NULL; 167 size_t private_data_len = 0; 168 169 /* Report failures */ 170 if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) )) 171 rc = -EIO; 172 if ( rc != 0 ) { 173 DBGC ( conn, "CM %p connection request failed: %s\n", 174 conn, strerror ( rc ) ); 175 goto out; 176 } 177 178 /* Record remote communication ID */ 179 conn->remote_id = ntohl ( common->local_id ); 180 181 /* Handle response */ 182 switch ( mad->hdr.attr_id ) { 183 184 case htons ( IB_CM_ATTR_CONNECT_REPLY ) : 185 /* Extract fields */ 186 qp->av.qpn = ( ntohl ( connect_rep->local_qpn ) >> 8 ); 187 qp->send.psn = ( ntohl ( connect_rep->starting_psn ) >> 8 ); 188 private_data = &connect_rep->private_data; 189 private_data_len = sizeof ( connect_rep->private_data ); 190 DBGC ( conn, "CM %p connected to QPN %lx PSN %x\n", 191 conn, qp->av.qpn, qp->send.psn ); 192 193 /* Modify queue pair */ 194 if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) { 195 DBGC ( conn, "CM %p could not modify queue pair: %s\n", 196 conn, strerror ( rc ) ); 197 goto out; 198 } 199 200 /* Send "ready to use" reply */ 201 if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) { 202 /* Treat as non-fatal */ 203 rc = 0; 204 } 205 break; 206 207 case htons ( IB_CM_ATTR_CONNECT_REJECT ) : 208 /* Extract fields */ 209 DBGC ( conn, "CM %p connection rejected (reason %d)\n", 210 conn, ntohs ( connect_rej->reason ) ); 211 /* Private data is valid only for a Consumer Reject */ 212 if ( connect_rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) { 213 private_data = &connect_rej->private_data; 214 private_data_len = sizeof (connect_rej->private_data); 215 } 216 rc = ib_cm_rejection_reason_to_rc ( connect_rej->reason ); 217 break; 218 219 default: 220 DBGC ( conn, "CM %p unexpected response (attribute %04x)\n", 221 conn, ntohs ( mad->hdr.attr_id ) ); 222 rc = -ENOTSUP; 223 break; 224 } 225 226 out: 227 /* Destroy the completed transaction */ 228 ib_destroy_madx ( ibdev, ibdev->gsi, madx ); 229 conn->madx = NULL; 230 231 /* Hand off to the upper completion handler */ 232 conn->op->changed ( ibdev, qp, conn, rc, private_data, 233 private_data_len ); 234 } 235 236 /** Connection request operations */ 237 static struct ib_mad_transaction_operations ib_cm_req_op = { 238 .complete = ib_cm_req_complete, 239 }; 240 241 /** 242 * Handle connection path transaction completion 243 * 244 * @v ibdev Infiniband device 245 * @v path Path 246 * @v rc Status code 247 * @v av Address vector, or NULL on error 248 */ 249 static void ib_cm_path_complete ( struct ib_device *ibdev, 250 struct ib_path *path, int rc, 251 struct ib_address_vector *av ) { 252 struct ib_connection *conn = ib_path_get_ownerdata ( path ); 253 struct ib_queue_pair *qp = conn->qp; 254 union ib_mad mad; 255 struct ib_cm_connect_request *connect_req = 256 &mad.cm.cm_data.connect_request; 257 size_t private_data_len; 258 259 /* Report failures */ 260 if ( rc != 0 ) { 261 DBGC ( conn, "CM %p path lookup failed: %s\n", 262 conn, strerror ( rc ) ); 263 conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 ); 264 goto out; 265 } 266 267 /* Update queue pair peer path */ 268 memcpy ( &qp->av, av, sizeof ( qp->av ) ); 269 270 /* Construct connection request */ 271 memset ( &mad, 0, sizeof ( mad ) ); 272 mad.hdr.mgmt_class = IB_MGMT_CLASS_CM; 273 mad.hdr.class_version = IB_CM_CLASS_VERSION; 274 mad.hdr.method = IB_MGMT_METHOD_SEND; 275 mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST ); 276 connect_req->local_id = htonl ( conn->local_id ); 277 memcpy ( &connect_req->service_id, &conn->service_id, 278 sizeof ( connect_req->service_id ) ); 279 ib_get_hca_info ( ibdev, &connect_req->local_ca ); 280 connect_req->local_qpn__responder_resources = 281 htonl ( ( qp->qpn << 8 ) | 1 ); 282 connect_req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 ); 283 connect_req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl = 284 htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) | 285 ( 0 << 0 ) ); 286 connect_req->starting_psn__local_timeout__retry_count = 287 htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) | 288 ( 0x07 << 0 ) ); 289 connect_req->pkey = htons ( ibdev->pkey ); 290 connect_req->payload_mtu__rdc_exists__rnr_retry = 291 ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) ); 292 connect_req->max_cm_retries__srq = 293 ( ( 0x0f << 4 ) | ( 0 << 3 ) ); 294 connect_req->primary.local_lid = htons ( ibdev->lid ); 295 connect_req->primary.remote_lid = htons ( conn->qp->av.lid ); 296 memcpy ( &connect_req->primary.local_gid, &ibdev->gid, 297 sizeof ( connect_req->primary.local_gid ) ); 298 memcpy ( &connect_req->primary.remote_gid, &conn->qp->av.gid, 299 sizeof ( connect_req->primary.remote_gid ) ); 300 connect_req->primary.flow_label__rate = 301 htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) ); 302 connect_req->primary.hop_limit = 0; 303 connect_req->primary.sl__subnet_local = 304 ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) ); 305 connect_req->primary.local_ack_timeout = ( 0x13 << 3 ); 306 private_data_len = conn->private_data_len; 307 if ( private_data_len > sizeof ( connect_req->private_data ) ) 308 private_data_len = sizeof ( connect_req->private_data ); 309 memcpy ( &connect_req->private_data, &conn->private_data, 310 private_data_len ); 311 312 /* Create connection request */ 313 av->qpn = IB_QPN_GSI; 314 av->qkey = IB_QKEY_GSI; 315 conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av, 316 &ib_cm_req_op ); 317 if ( ! conn->madx ) { 318 DBGC ( conn, "CM %p could not create connection request\n", 319 conn ); 320 conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 ); 321 goto out; 322 } 323 ib_madx_set_ownerdata ( conn->madx, conn ); 324 325 out: 326 /* Destroy the completed transaction */ 327 ib_destroy_path ( ibdev, path ); 328 conn->path = NULL; 329 } 330 331 /** Connection path operations */ 332 static struct ib_path_operations ib_cm_path_op = { 333 .complete = ib_cm_path_complete, 334 }; 335 336 /** 337 * Create connection to remote QP 338 * 339 * @v ibdev Infiniband device 340 * @v qp Queue pair 341 * @v dgid Target GID 342 * @v service_id Target service ID 343 * @v private_data Connection request private data 344 * @v private_data_len Length of connection request private data 345 * @v op Connection operations 346 * @ret conn Connection 347 */ 348 struct ib_connection * 349 ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp, 350 struct ib_gid *dgid, struct ib_gid_half *service_id, 351 void *private_data, size_t private_data_len, 352 struct ib_connection_operations *op ) { 353 struct ib_connection *conn; 354 355 /* Allocate and initialise request */ 356 conn = zalloc ( sizeof ( *conn ) + private_data_len ); 357 if ( ! conn ) 358 goto err_alloc_conn; 359 conn->ibdev = ibdev; 360 conn->qp = qp; 361 memset ( &qp->av, 0, sizeof ( qp->av ) ); 362 qp->av.gid_present = 1; 363 memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) ); 364 conn->local_id = random(); 365 memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) ); 366 conn->op = op; 367 conn->private_data_len = private_data_len; 368 memcpy ( &conn->private_data, private_data, private_data_len ); 369 370 /* Create path */ 371 conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op ); 372 if ( ! conn->path ) 373 goto err_create_path; 374 ib_path_set_ownerdata ( conn->path, conn ); 375 376 /* Add to list of connections */ 377 list_add ( &conn->list, &ib_cm_conns ); 378 379 DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n", 380 conn, ibdev, qp->qpn ); 381 DBGC ( conn, "CM %p connecting to %08x:%08x:%08x:%08x %08x:%08x\n", 382 conn, ntohl ( dgid->u.dwords[0] ), ntohl ( dgid->u.dwords[1] ), 383 ntohl ( dgid->u.dwords[2] ), ntohl ( dgid->u.dwords[3] ), 384 ntohl ( service_id->u.dwords[0] ), 385 ntohl ( service_id->u.dwords[1] ) ); 386 387 return conn; 388 389 ib_destroy_path ( ibdev, conn->path ); 390 err_create_path: 391 free ( conn ); 392 err_alloc_conn: 393 return NULL; 394 } 395 396 /** 397 * Destroy connection to remote QP 398 * 399 * @v ibdev Infiniband device 400 * @v qp Queue pair 401 * @v conn Connection 402 */ 403 void ib_destroy_conn ( struct ib_device *ibdev, 404 struct ib_queue_pair *qp __unused, 405 struct ib_connection *conn ) { 406 407 list_del ( &conn->list ); 408 if ( conn->madx ) 409 ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx ); 410 if ( conn->path ) 411 ib_destroy_path ( ibdev, conn->path ); 412 free ( conn ); 413 } 414