Home | History | Annotate | Download | only in infiniband
      1 /*
      2  * Copyright (C) 2009 Michael Brown <mbrown (at) fensystems.co.uk>.
      3  *
      4  * This program is free software; you can redistribute it and/or
      5  * modify it under the terms of the GNU General Public License as
      6  * published by the Free Software Foundation; either version 2 of the
      7  * License, or any later version.
      8  *
      9  * This program is distributed in the hope that it will be useful, but
     10  * WITHOUT ANY WARRANTY; without even the implied warranty of
     11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     12  * General Public License for more details.
     13  *
     14  * You should have received a copy of the GNU General Public License
     15  * along with this program; if not, write to the Free Software
     16  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
     17  */
     18 
     19 FILE_LICENCE ( GPL2_OR_LATER );
     20 
     21 #include <stdint.h>
     22 #include <stdlib.h>
     23 #include <string.h>
     24 #include <byteswap.h>
     25 #include <errno.h>
     26 #include <assert.h>
     27 #include <gpxe/infiniband.h>
     28 #include <gpxe/ib_mi.h>
     29 #include <gpxe/ib_pathrec.h>
     30 #include <gpxe/ib_cm.h>
     31 
     32 /**
     33  * @file
     34  *
     35  * Infiniband communication management
     36  *
     37  */
     38 
     39 /** List of connections */
     40 static LIST_HEAD ( ib_cm_conns );
     41 
     42 /**
     43  * Send "ready to use" response
     44  *
     45  * @v ibdev		Infiniband device
     46  * @v mi		Management interface
     47  * @v conn		Connection
     48  * @v av		Address vector
     49  * @ret rc		Return status code
     50  */
     51 static int ib_cm_send_rtu ( struct ib_device *ibdev,
     52 			    struct ib_mad_interface *mi,
     53 			    struct ib_connection *conn,
     54 			    struct ib_address_vector *av ) {
     55 	union ib_mad mad;
     56 	struct ib_cm_ready_to_use *ready =
     57 		&mad.cm.cm_data.ready_to_use;
     58 	int rc;
     59 
     60 	/* Construct "ready to use" response */
     61 	memset ( &mad, 0, sizeof ( mad ) );
     62 	mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
     63 	mad.hdr.class_version = IB_CM_CLASS_VERSION;
     64 	mad.hdr.method = IB_MGMT_METHOD_SEND;
     65 	mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
     66 	ready->local_id = htonl ( conn->local_id );
     67 	ready->remote_id = htonl ( conn->remote_id );
     68 	if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){
     69 		DBGC ( conn, "CM %p could not send RTU: %s\n",
     70 		       conn, strerror ( rc ) );
     71 		return rc;
     72 	}
     73 
     74 	return 0;
     75 }
     76 
     77 /**
     78  * Handle duplicate connection replies
     79  *
     80  * @v ibdev		Infiniband device
     81  * @v mi		Management interface
     82  * @v mad		Received MAD
     83  * @v av		Source address vector
     84  * @ret rc		Return status code
     85  *
     86  * If a "ready to use" MAD is lost, the peer may resend the connection
     87  * reply.  We have to respond to these with duplicate "ready to use"
     88  * MADs, otherwise the peer may time out and drop the connection.
     89  */
     90 static void ib_cm_connect_rep ( struct ib_device *ibdev,
     91 				struct ib_mad_interface *mi,
     92 				union ib_mad *mad,
     93 				struct ib_address_vector *av ) {
     94 	struct ib_cm_connect_reply *connect_rep =
     95 		&mad->cm.cm_data.connect_reply;
     96 	struct ib_connection *conn;
     97 	int rc;
     98 
     99 	/* Identify connection */
    100 	list_for_each_entry ( conn, &ib_cm_conns, list ) {
    101 		if ( ntohl ( connect_rep->remote_id ) != conn->local_id )
    102 			continue;
    103 		/* Try to send "ready to use" reply */
    104 		if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) {
    105 			/* Ignore errors */
    106 			return;
    107 		}
    108 		return;
    109 	}
    110 
    111 	DBG ( "CM unidentified connection %08x\n",
    112 	      ntohl ( connect_rep->remote_id ) );
    113 }
    114 
    115 /** Communication management agents */
    116 struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
    117 	{
    118 		.mgmt_class = IB_MGMT_CLASS_CM,
    119 		.class_version = IB_CM_CLASS_VERSION,
    120 		.attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
    121 		.handle = ib_cm_connect_rep,
    122 	},
    123 };
    124 
    125 /**
    126  * Convert connection rejection reason to return status code
    127  *
    128  * @v reason		Rejection reason (in network byte order)
    129  * @ret rc		Return status code
    130  */
    131 static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) {
    132 	switch ( reason ) {
    133 	case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) :
    134 		return -ENODEV;
    135 	case htons ( IB_CM_REJECT_STALE_CONN ) :
    136 		return -EALREADY;
    137 	case htons ( IB_CM_REJECT_CONSUMER ) :
    138 		return -ENOTTY;
    139 	default:
    140 		return -EPERM;
    141 	}
    142 }
    143 
    144 /**
    145  * Handle connection request transaction completion
    146  *
    147  * @v ibdev		Infiniband device
    148  * @v mi		Management interface
    149  * @v madx		Management transaction
    150  * @v rc		Status code
    151  * @v mad		Received MAD (or NULL on error)
    152  * @v av		Source address vector (or NULL on error)
    153  */
    154 static void ib_cm_req_complete ( struct ib_device *ibdev,
    155 				 struct ib_mad_interface *mi,
    156 				 struct ib_mad_transaction *madx,
    157 				 int rc, union ib_mad *mad,
    158 				 struct ib_address_vector *av ) {
    159 	struct ib_connection *conn = ib_madx_get_ownerdata ( madx );
    160 	struct ib_queue_pair *qp = conn->qp;
    161 	struct ib_cm_common *common = &mad->cm.cm_data.common;
    162 	struct ib_cm_connect_reply *connect_rep =
    163 		&mad->cm.cm_data.connect_reply;
    164 	struct ib_cm_connect_reject *connect_rej =
    165 		&mad->cm.cm_data.connect_reject;
    166 	void *private_data = NULL;
    167 	size_t private_data_len = 0;
    168 
    169 	/* Report failures */
    170 	if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
    171 		rc = -EIO;
    172 	if ( rc != 0 ) {
    173 		DBGC ( conn, "CM %p connection request failed: %s\n",
    174 		       conn, strerror ( rc ) );
    175 		goto out;
    176 	}
    177 
    178 	/* Record remote communication ID */
    179 	conn->remote_id = ntohl ( common->local_id );
    180 
    181 	/* Handle response */
    182 	switch ( mad->hdr.attr_id ) {
    183 
    184 	case htons ( IB_CM_ATTR_CONNECT_REPLY ) :
    185 		/* Extract fields */
    186 		qp->av.qpn = ( ntohl ( connect_rep->local_qpn ) >> 8 );
    187 		qp->send.psn = ( ntohl ( connect_rep->starting_psn ) >> 8 );
    188 		private_data = &connect_rep->private_data;
    189 		private_data_len = sizeof ( connect_rep->private_data );
    190 		DBGC ( conn, "CM %p connected to QPN %lx PSN %x\n",
    191 		       conn, qp->av.qpn, qp->send.psn );
    192 
    193 		/* Modify queue pair */
    194 		if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
    195 			DBGC ( conn, "CM %p could not modify queue pair: %s\n",
    196 			       conn, strerror ( rc ) );
    197 			goto out;
    198 		}
    199 
    200 		/* Send "ready to use" reply */
    201 		if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) {
    202 			/* Treat as non-fatal */
    203 			rc = 0;
    204 		}
    205 		break;
    206 
    207 	case htons ( IB_CM_ATTR_CONNECT_REJECT ) :
    208 		/* Extract fields */
    209 		DBGC ( conn, "CM %p connection rejected (reason %d)\n",
    210 		       conn, ntohs ( connect_rej->reason ) );
    211 		/* Private data is valid only for a Consumer Reject */
    212 		if ( connect_rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) {
    213 			private_data = &connect_rej->private_data;
    214 			private_data_len = sizeof (connect_rej->private_data);
    215 		}
    216 		rc = ib_cm_rejection_reason_to_rc ( connect_rej->reason );
    217 		break;
    218 
    219 	default:
    220 		DBGC ( conn, "CM %p unexpected response (attribute %04x)\n",
    221 		       conn, ntohs ( mad->hdr.attr_id ) );
    222 		rc = -ENOTSUP;
    223 		break;
    224 	}
    225 
    226  out:
    227 	/* Destroy the completed transaction */
    228 	ib_destroy_madx ( ibdev, ibdev->gsi, madx );
    229 	conn->madx = NULL;
    230 
    231 	/* Hand off to the upper completion handler */
    232 	conn->op->changed ( ibdev, qp, conn, rc, private_data,
    233 			    private_data_len );
    234 }
    235 
    236 /** Connection request operations */
    237 static struct ib_mad_transaction_operations ib_cm_req_op = {
    238 	.complete = ib_cm_req_complete,
    239 };
    240 
    241 /**
    242  * Handle connection path transaction completion
    243  *
    244  * @v ibdev		Infiniband device
    245  * @v path		Path
    246  * @v rc		Status code
    247  * @v av		Address vector, or NULL on error
    248  */
    249 static void ib_cm_path_complete ( struct ib_device *ibdev,
    250 				  struct ib_path *path, int rc,
    251 				  struct ib_address_vector *av ) {
    252 	struct ib_connection *conn = ib_path_get_ownerdata ( path );
    253 	struct ib_queue_pair *qp = conn->qp;
    254 	union ib_mad mad;
    255 	struct ib_cm_connect_request *connect_req =
    256 		&mad.cm.cm_data.connect_request;
    257 	size_t private_data_len;
    258 
    259 	/* Report failures */
    260 	if ( rc != 0 ) {
    261 		DBGC ( conn, "CM %p path lookup failed: %s\n",
    262 		       conn, strerror ( rc ) );
    263 		conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
    264 		goto out;
    265 	}
    266 
    267 	/* Update queue pair peer path */
    268 	memcpy ( &qp->av, av, sizeof ( qp->av ) );
    269 
    270 	/* Construct connection request */
    271 	memset ( &mad, 0, sizeof ( mad ) );
    272 	mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
    273 	mad.hdr.class_version = IB_CM_CLASS_VERSION;
    274 	mad.hdr.method = IB_MGMT_METHOD_SEND;
    275 	mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
    276 	connect_req->local_id = htonl ( conn->local_id );
    277 	memcpy ( &connect_req->service_id, &conn->service_id,
    278 		 sizeof ( connect_req->service_id ) );
    279 	ib_get_hca_info ( ibdev, &connect_req->local_ca );
    280 	connect_req->local_qpn__responder_resources =
    281 		htonl ( ( qp->qpn << 8 ) | 1 );
    282 	connect_req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
    283 	connect_req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
    284 		htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
    285 			( 0 << 0 ) );
    286 	connect_req->starting_psn__local_timeout__retry_count =
    287 		htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
    288 			( 0x07 << 0 ) );
    289 	connect_req->pkey = htons ( ibdev->pkey );
    290 	connect_req->payload_mtu__rdc_exists__rnr_retry =
    291 		( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
    292 	connect_req->max_cm_retries__srq =
    293 		( ( 0x0f << 4 ) | ( 0 << 3 ) );
    294 	connect_req->primary.local_lid = htons ( ibdev->lid );
    295 	connect_req->primary.remote_lid = htons ( conn->qp->av.lid );
    296 	memcpy ( &connect_req->primary.local_gid, &ibdev->gid,
    297 		 sizeof ( connect_req->primary.local_gid ) );
    298 	memcpy ( &connect_req->primary.remote_gid, &conn->qp->av.gid,
    299 		 sizeof ( connect_req->primary.remote_gid ) );
    300 	connect_req->primary.flow_label__rate =
    301 		htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) );
    302 	connect_req->primary.hop_limit = 0;
    303 	connect_req->primary.sl__subnet_local =
    304 		( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) );
    305 	connect_req->primary.local_ack_timeout = ( 0x13 << 3 );
    306 	private_data_len = conn->private_data_len;
    307 	if ( private_data_len > sizeof ( connect_req->private_data ) )
    308 		private_data_len = sizeof ( connect_req->private_data );
    309 	memcpy ( &connect_req->private_data, &conn->private_data,
    310 		 private_data_len );
    311 
    312 	/* Create connection request */
    313 	av->qpn = IB_QPN_GSI;
    314 	av->qkey = IB_QKEY_GSI;
    315 	conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av,
    316 				      &ib_cm_req_op );
    317 	if ( ! conn->madx ) {
    318 		DBGC ( conn, "CM %p could not create connection request\n",
    319 		       conn );
    320 		conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
    321 		goto out;
    322 	}
    323 	ib_madx_set_ownerdata ( conn->madx, conn );
    324 
    325  out:
    326 	/* Destroy the completed transaction */
    327 	ib_destroy_path ( ibdev, path );
    328 	conn->path = NULL;
    329 }
    330 
    331 /** Connection path operations */
    332 static struct ib_path_operations ib_cm_path_op = {
    333 	.complete = ib_cm_path_complete,
    334 };
    335 
    336 /**
    337  * Create connection to remote QP
    338  *
    339  * @v ibdev		Infiniband device
    340  * @v qp		Queue pair
    341  * @v dgid		Target GID
    342  * @v service_id	Target service ID
    343  * @v private_data	Connection request private data
    344  * @v private_data_len	Length of connection request private data
    345  * @v op		Connection operations
    346  * @ret conn		Connection
    347  */
    348 struct ib_connection *
    349 ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp,
    350 		 struct ib_gid *dgid, struct ib_gid_half *service_id,
    351 		 void *private_data, size_t private_data_len,
    352 		 struct ib_connection_operations *op ) {
    353 	struct ib_connection *conn;
    354 
    355 	/* Allocate and initialise request */
    356 	conn = zalloc ( sizeof ( *conn ) + private_data_len );
    357 	if ( ! conn )
    358 		goto err_alloc_conn;
    359 	conn->ibdev = ibdev;
    360 	conn->qp = qp;
    361 	memset ( &qp->av, 0, sizeof ( qp->av ) );
    362 	qp->av.gid_present = 1;
    363 	memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
    364 	conn->local_id = random();
    365 	memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) );
    366 	conn->op = op;
    367 	conn->private_data_len = private_data_len;
    368 	memcpy ( &conn->private_data, private_data, private_data_len );
    369 
    370 	/* Create path */
    371 	conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op );
    372 	if ( ! conn->path )
    373 		goto err_create_path;
    374 	ib_path_set_ownerdata ( conn->path, conn );
    375 
    376 	/* Add to list of connections */
    377 	list_add ( &conn->list, &ib_cm_conns );
    378 
    379 	DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n",
    380 	       conn, ibdev, qp->qpn );
    381 	DBGC ( conn, "CM %p connecting to %08x:%08x:%08x:%08x %08x:%08x\n",
    382 	       conn, ntohl ( dgid->u.dwords[0] ), ntohl ( dgid->u.dwords[1] ),
    383 	       ntohl ( dgid->u.dwords[2] ), ntohl ( dgid->u.dwords[3] ),
    384 	       ntohl ( service_id->u.dwords[0] ),
    385 	       ntohl ( service_id->u.dwords[1] ) );
    386 
    387 	return conn;
    388 
    389 	ib_destroy_path ( ibdev, conn->path );
    390  err_create_path:
    391 	free ( conn );
    392  err_alloc_conn:
    393 	return NULL;
    394 }
    395 
    396 /**
    397  * Destroy connection to remote QP
    398  *
    399  * @v ibdev		Infiniband device
    400  * @v qp		Queue pair
    401  * @v conn		Connection
    402  */
    403 void ib_destroy_conn ( struct ib_device *ibdev,
    404 		       struct ib_queue_pair *qp __unused,
    405 		       struct ib_connection *conn ) {
    406 
    407 	list_del ( &conn->list );
    408 	if ( conn->madx )
    409 		ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
    410 	if ( conn->path )
    411 		ib_destroy_path ( ibdev, conn->path );
    412 	free ( conn );
    413 }
    414