Home | History | Annotate | Download | only in infiniband
      1 /*
      2  * Copyright (C) 2009 Michael Brown <mbrown (at) fensystems.co.uk>.
      3  *
      4  * This program is free software; you can redistribute it and/or
      5  * modify it under the terms of the GNU General Public License as
      6  * published by the Free Software Foundation; either version 2 of the
      7  * License, or any later version.
      8  *
      9  * This program is distributed in the hope that it will be useful, but
     10  * WITHOUT ANY WARRANTY; without even the implied warranty of
     11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     12  * General Public License for more details.
     13  *
     14  * You should have received a copy of the GNU General Public License
     15  * along with this program; if not, write to the Free Software
     16  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
     17  */
     18 
     19 FILE_LICENCE ( GPL2_OR_LATER );
     20 
     21 #include <stdint.h>
     22 #include <stdlib.h>
     23 #include <string.h>
     24 #include <errno.h>
     25 #include <stdio.h>
     26 #include <unistd.h>
     27 #include <byteswap.h>
     28 #include <gpxe/infiniband.h>
     29 #include <gpxe/iobuf.h>
     30 #include <gpxe/ib_mi.h>
     31 
     32 /**
     33  * @file
     34  *
     35  * Infiniband management interfaces
     36  *
     37  */
     38 
     39 /** Management interface number of send WQEs
     40  *
     41  * This is a policy decision.
     42  */
     43 #define IB_MI_NUM_SEND_WQES 4
     44 
     45 /** Management interface number of receive WQEs
     46  *
     47  * This is a policy decision.
     48  */
     49 #define IB_MI_NUM_RECV_WQES 2
     50 
     51 /** Management interface number of completion queue entries
     52  *
     53  * This is a policy decision
     54  */
     55 #define IB_MI_NUM_CQES 8
     56 
     57 /** TID magic signature */
     58 #define IB_MI_TID_MAGIC ( ( 'g' << 24 ) | ( 'P' << 16 ) | ( 'X' << 8 ) | 'E' )
     59 
     60 /** TID to use for next MAD */
     61 static unsigned int next_tid;
     62 
     63 /**
     64  * Handle received MAD
     65  *
     66  * @v ibdev		Infiniband device
     67  * @v mi		Management interface
     68  * @v mad		Received MAD
     69  * @v av		Source address vector
     70  * @ret rc		Return status code
     71  */
     72 static int ib_mi_handle ( struct ib_device *ibdev,
     73 			  struct ib_mad_interface *mi,
     74 			  union ib_mad *mad,
     75 			  struct ib_address_vector *av ) {
     76 	struct ib_mad_hdr *hdr = &mad->hdr;
     77 	struct ib_mad_transaction *madx;
     78 	struct ib_mad_agent *agent;
     79 
     80 	/* Look for a matching transaction by TID */
     81 	list_for_each_entry ( madx, &mi->madx, list ) {
     82 		if ( memcmp ( &hdr->tid, &madx->mad.hdr.tid,
     83 			      sizeof ( hdr->tid ) ) != 0 )
     84 			continue;
     85 		/* Found a matching transaction */
     86 		madx->op->complete ( ibdev, mi, madx, 0, mad, av );
     87 		return 0;
     88 	}
     89 
     90 	/* If there is no matching transaction, look for a listening agent */
     91 	for_each_table_entry ( agent, IB_MAD_AGENTS ) {
     92 		if ( ( ( agent->mgmt_class & IB_MGMT_CLASS_MASK ) !=
     93 		       ( hdr->mgmt_class & IB_MGMT_CLASS_MASK ) ) ||
     94 		     ( agent->class_version != hdr->class_version ) ||
     95 		     ( agent->attr_id != hdr->attr_id ) )
     96 			continue;
     97 		/* Found a matching agent */
     98 		agent->handle ( ibdev, mi, mad, av );
     99 		return 0;
    100 	}
    101 
    102 	/* Otherwise, ignore it */
    103 	DBGC ( mi, "MI %p RX TID %08x%08x ignored\n",
    104 	       mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) );
    105 	return -ENOTSUP;
    106 }
    107 
    108 /**
    109  * Complete receive via management interface
    110  *
    111  *
    112  * @v ibdev		Infiniband device
    113  * @v qp		Queue pair
    114  * @v av		Address vector
    115  * @v iobuf		I/O buffer
    116  * @v rc		Completion status code
    117  */
    118 static void ib_mi_complete_recv ( struct ib_device *ibdev,
    119 				  struct ib_queue_pair *qp,
    120 				  struct ib_address_vector *av,
    121 				  struct io_buffer *iobuf, int rc ) {
    122 	struct ib_mad_interface *mi = ib_qp_get_ownerdata ( qp );
    123 	union ib_mad *mad;
    124 	struct ib_mad_hdr *hdr;
    125 
    126 	/* Ignore errors */
    127 	if ( rc != 0 ) {
    128 		DBGC ( mi, "MI %p RX error: %s\n", mi, strerror ( rc ) );
    129 		goto out;
    130 	}
    131 
    132 	/* Sanity checks */
    133 	if ( iob_len ( iobuf ) != sizeof ( *mad ) ) {
    134 		DBGC ( mi, "MI %p RX bad size (%zd bytes)\n",
    135 		       mi, iob_len ( iobuf ) );
    136 		DBGC_HDA ( mi, 0, iobuf->data, iob_len ( iobuf ) );
    137 		goto out;
    138 	}
    139 	mad = iobuf->data;
    140 	hdr = &mad->hdr;
    141 	if ( hdr->base_version != IB_MGMT_BASE_VERSION ) {
    142 		DBGC ( mi, "MI %p RX unsupported base version %x\n",
    143 		       mi, hdr->base_version );
    144 		DBGC_HDA ( mi, 0, mad, sizeof ( *mad ) );
    145 		goto out;
    146 	}
    147 	DBGC ( mi, "MI %p RX TID %08x%08x (%02x,%02x,%02x,%04x) status "
    148 	       "%04x\n", mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ),
    149 	       hdr->mgmt_class, hdr->class_version, hdr->method,
    150 	       ntohs ( hdr->attr_id ), ntohs ( hdr->status ) );
    151 	DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) );
    152 
    153 	/* Handle MAD */
    154 	if ( ( rc = ib_mi_handle ( ibdev, mi, mad, av ) ) != 0 )
    155 		goto out;
    156 
    157  out:
    158 	free_iob ( iobuf );
    159 }
    160 
    161 /** Management interface completion operations */
    162 static struct ib_completion_queue_operations ib_mi_completion_ops = {
    163 	.complete_recv = ib_mi_complete_recv,
    164 };
    165 
    166 /**
    167  * Transmit MAD
    168  *
    169  * @v ibdev		Infiniband device
    170  * @v mi		Management interface
    171  * @v mad		MAD
    172  * @v av		Destination address vector
    173  * @ret rc		Return status code
    174  */
    175 int ib_mi_send ( struct ib_device *ibdev, struct ib_mad_interface *mi,
    176 		 union ib_mad *mad, struct ib_address_vector *av ) {
    177 	struct ib_mad_hdr *hdr = &mad->hdr;
    178 	struct io_buffer *iobuf;
    179 	int rc;
    180 
    181 	/* Set common fields */
    182 	hdr->base_version = IB_MGMT_BASE_VERSION;
    183 	if ( ( hdr->tid[0] == 0 ) && ( hdr->tid[1] == 0 ) ) {
    184 		hdr->tid[0] = htonl ( IB_MI_TID_MAGIC );
    185 		hdr->tid[1] = htonl ( ++next_tid );
    186 	}
    187 	DBGC ( mi, "MI %p TX TID %08x%08x (%02x,%02x,%02x,%04x) status "
    188 	       "%04x\n", mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ),
    189 	       hdr->mgmt_class, hdr->class_version, hdr->method,
    190 	       ntohs ( hdr->attr_id ), ntohs ( hdr->status ) );
    191 	DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) );
    192 
    193 	/* Construct directed route portion of response, if necessary */
    194 	if ( hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ) {
    195 		struct ib_mad_smp *smp = &mad->smp;
    196 		unsigned int hop_pointer;
    197 		unsigned int hop_count;
    198 
    199 		smp->mad_hdr.status |= htons ( IB_SMP_STATUS_D_INBOUND );
    200 		hop_pointer = smp->mad_hdr.class_specific.smp.hop_pointer;
    201 		hop_count = smp->mad_hdr.class_specific.smp.hop_count;
    202 		assert ( hop_count == hop_pointer );
    203 		if ( hop_pointer < ( sizeof ( smp->return_path.hops ) /
    204 				     sizeof ( smp->return_path.hops[0] ) ) ) {
    205 			smp->return_path.hops[hop_pointer] = ibdev->port;
    206 		} else {
    207 			DBGC ( mi, "MI %p TX TID %08x%08x invalid hop pointer "
    208 			       "%d\n", mi, ntohl ( hdr->tid[0] ),
    209 			       ntohl ( hdr->tid[1] ), hop_pointer );
    210 			return -EINVAL;
    211 		}
    212 	}
    213 
    214 	/* Construct I/O buffer */
    215 	iobuf = alloc_iob ( sizeof ( *mad ) );
    216 	if ( ! iobuf ) {
    217 		DBGC ( mi, "MI %p could not allocate buffer for TID "
    218 		       "%08x%08x\n",
    219 		       mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) );
    220 		return -ENOMEM;
    221 	}
    222 	memcpy ( iob_put ( iobuf, sizeof ( *mad ) ), mad, sizeof ( *mad ) );
    223 
    224 	/* Send I/O buffer */
    225 	if ( ( rc = ib_post_send ( ibdev, mi->qp, av, iobuf ) ) != 0 ) {
    226 		DBGC ( mi, "MI %p TX TID %08x%08x failed: %s\n",
    227 		       mi,  ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ),
    228 		       strerror ( rc ) );
    229 		free_iob ( iobuf );
    230 		return rc;
    231 	}
    232 
    233 	return 0;
    234 }
    235 
    236 /**
    237  * Handle management transaction timer expiry
    238  *
    239  * @v timer		Retry timer
    240  * @v expired		Failure indicator
    241  */
    242 static void ib_mi_timer_expired ( struct retry_timer *timer, int expired ) {
    243 	struct ib_mad_transaction *madx =
    244 		container_of ( timer, struct ib_mad_transaction, timer );
    245 	struct ib_mad_interface *mi = madx->mi;
    246 	struct ib_device *ibdev = mi->ibdev;
    247 	struct ib_mad_hdr *hdr = &madx->mad.hdr;
    248 
    249 	/* Abandon transaction if we have tried too many times */
    250 	if ( expired ) {
    251 		DBGC ( mi, "MI %p abandoning TID %08x%08x\n",
    252 		       mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) );
    253 		madx->op->complete ( ibdev, mi, madx, -ETIMEDOUT, NULL, NULL );
    254 		return;
    255 	}
    256 
    257 	/* Restart retransmission timer */
    258 	start_timer ( timer );
    259 
    260 	/* Resend MAD */
    261 	ib_mi_send ( ibdev, mi, &madx->mad, &madx->av );
    262 }
    263 
    264 /**
    265  * Create management transaction
    266  *
    267  * @v ibdev		Infiniband device
    268  * @v mi		Management interface
    269  * @v mad		MAD to send
    270  * @v av		Destination address, or NULL to use SM's GSI
    271  * @v op		Management transaction operations
    272  * @ret madx		Management transaction, or NULL
    273  */
    274 struct ib_mad_transaction *
    275 ib_create_madx ( struct ib_device *ibdev, struct ib_mad_interface *mi,
    276 		 union ib_mad *mad, struct ib_address_vector *av,
    277 		 struct ib_mad_transaction_operations *op ) {
    278 	struct ib_mad_transaction *madx;
    279 
    280 	/* Allocate and initialise structure */
    281 	madx = zalloc ( sizeof ( *madx ) );
    282 	if ( ! madx )
    283 		return NULL;
    284 	madx->mi = mi;
    285 	madx->timer.expired = ib_mi_timer_expired;
    286 	madx->op = op;
    287 
    288 	/* Determine address vector */
    289 	if ( av ) {
    290 		memcpy ( &madx->av, av, sizeof ( madx->av ) );
    291 	} else {
    292 		madx->av.lid = ibdev->sm_lid;
    293 		madx->av.sl = ibdev->sm_sl;
    294 		madx->av.qpn = IB_QPN_GSI;
    295 		madx->av.qkey = IB_QKEY_GSI;
    296 	}
    297 
    298 	/* Copy MAD */
    299 	memcpy ( &madx->mad, mad, sizeof ( madx->mad ) );
    300 
    301 	/* Add to list and start timer to send initial MAD */
    302 	list_add ( &madx->list, &mi->madx );
    303 	start_timer_nodelay ( &madx->timer );
    304 
    305 	return madx;
    306 }
    307 
    308 /**
    309  * Destroy management transaction
    310  *
    311  * @v ibdev		Infiniband device
    312  * @v mi		Management interface
    313  * @v madx		Management transaction
    314  */
    315 void ib_destroy_madx ( struct ib_device *ibdev __unused,
    316 		       struct ib_mad_interface *mi __unused,
    317 		       struct ib_mad_transaction *madx ) {
    318 
    319 	/* Stop timer and remove from list */
    320 	stop_timer ( &madx->timer );
    321 	list_del ( &madx->list );
    322 
    323 	/* Free transaction */
    324 	free ( madx );
    325 }
    326 
    327 /**
    328  * Create management interface
    329  *
    330  * @v ibdev		Infiniband device
    331  * @v type		Queue pair type
    332  * @ret mi		Management agent, or NULL
    333  */
    334 struct ib_mad_interface * ib_create_mi ( struct ib_device *ibdev,
    335 					 enum ib_queue_pair_type type ) {
    336 	struct ib_mad_interface *mi;
    337 	int rc;
    338 
    339 	/* Allocate and initialise fields */
    340 	mi = zalloc ( sizeof ( *mi ) );
    341 	if ( ! mi )
    342 		goto err_alloc;
    343 	mi->ibdev = ibdev;
    344 	INIT_LIST_HEAD ( &mi->madx );
    345 
    346 	/* Create completion queue */
    347 	mi->cq = ib_create_cq ( ibdev, IB_MI_NUM_CQES, &ib_mi_completion_ops );
    348 	if ( ! mi->cq ) {
    349 		DBGC ( mi, "MI %p could not allocate completion queue\n", mi );
    350 		goto err_create_cq;
    351 	}
    352 
    353 	/* Create queue pair */
    354 	mi->qp = ib_create_qp ( ibdev, type, IB_MI_NUM_SEND_WQES, mi->cq,
    355 				IB_MI_NUM_RECV_WQES, mi->cq );
    356 	if ( ! mi->qp ) {
    357 		DBGC ( mi, "MI %p could not allocate queue pair\n", mi );
    358 		goto err_create_qp;
    359 	}
    360 	ib_qp_set_ownerdata ( mi->qp, mi );
    361 	DBGC ( mi, "MI %p (%s) running on QPN %#lx\n",
    362 	       mi, ( ( type == IB_QPT_SMI ) ? "SMI" : "GSI" ), mi->qp->qpn );
    363 
    364 	/* Set queue key */
    365 	mi->qp->qkey = ( ( type == IB_QPT_SMI ) ? IB_QKEY_SMI : IB_QKEY_GSI );
    366 	if ( ( rc = ib_modify_qp ( ibdev, mi->qp ) ) != 0 ) {
    367 		DBGC ( mi, "MI %p could not set queue key: %s\n",
    368 		       mi, strerror ( rc ) );
    369 		goto err_modify_qp;
    370 	}
    371 
    372 	/* Fill receive ring */
    373 	ib_refill_recv ( ibdev, mi->qp );
    374 	return mi;
    375 
    376  err_modify_qp:
    377 	ib_destroy_qp ( ibdev, mi->qp );
    378  err_create_qp:
    379 	ib_destroy_cq ( ibdev, mi->cq );
    380  err_create_cq:
    381 	free ( mi );
    382  err_alloc:
    383 	return NULL;
    384 }
    385 
    386 /**
    387  * Destroy management interface
    388  *
    389  * @v mi		Management interface
    390  */
    391 void ib_destroy_mi ( struct ib_device *ibdev, struct ib_mad_interface *mi ) {
    392 	struct ib_mad_transaction *madx;
    393 	struct ib_mad_transaction *tmp;
    394 
    395 	/* Flush any outstanding requests */
    396 	list_for_each_entry_safe ( madx, tmp, &mi->madx, list ) {
    397 		DBGC ( mi, "MI %p destroyed while TID %08x%08x in progress\n",
    398 		       mi, ntohl ( madx->mad.hdr.tid[0] ),
    399 		       ntohl ( madx->mad.hdr.tid[1] ) );
    400 		madx->op->complete ( ibdev, mi, madx, -ECANCELED, NULL, NULL );
    401 	}
    402 
    403 	ib_destroy_qp ( ibdev, mi->qp );
    404 	ib_destroy_cq ( ibdev, mi->cq );
    405 	free ( mi );
    406 }
    407