Home | History | Annotate | Download | only in net
      1 /*
      2  * Copyright (C) 2007 Michael Brown <mbrown (at) fensystems.co.uk>.
      3  *
      4  * This program is free software; you can redistribute it and/or
      5  * modify it under the terms of the GNU General Public License as
      6  * published by the Free Software Foundation; either version 2 of the
      7  * License, or any later version.
      8  *
      9  * This program is distributed in the hope that it will be useful, but
     10  * WITHOUT ANY WARRANTY; without even the implied warranty of
     11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     12  * General Public License for more details.
     13  *
     14  * You should have received a copy of the GNU General Public License
     15  * along with this program; if not, write to the Free Software
     16  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
     17  */
     18 
     19 FILE_LICENCE ( GPL2_OR_LATER );
     20 
     21 #include <stdint.h>
     22 #include <stdlib.h>
     23 #include <stdio.h>
     24 #include <string.h>
     25 #include <unistd.h>
     26 #include <byteswap.h>
     27 #include <errno.h>
     28 #include <assert.h>
     29 #include <gpxe/list.h>
     30 #include <gpxe/errortab.h>
     31 #include <gpxe/if_arp.h>
     32 #include <gpxe/netdevice.h>
     33 #include <gpxe/iobuf.h>
     34 #include <gpxe/ipoib.h>
     35 #include <gpxe/process.h>
     36 #include <gpxe/infiniband.h>
     37 #include <gpxe/ib_mi.h>
     38 #include <gpxe/ib_sma.h>
     39 
     40 /** @file
     41  *
     42  * Infiniband protocol
     43  *
     44  */
     45 
     46 /** List of Infiniband devices */
     47 struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
     48 
     49 /** List of open Infiniband devices, in reverse order of opening */
     50 static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
     51 
     52 /* Disambiguate the various possible EINPROGRESSes */
     53 #define EINPROGRESS_INIT ( EINPROGRESS | EUNIQ_01 )
     54 #define EINPROGRESS_ARMED ( EINPROGRESS | EUNIQ_02 )
     55 
     56 /** Human-readable message for the link statuses */
     57 struct errortab infiniband_errors[] __errortab = {
     58 	{ EINPROGRESS_INIT, "Initialising" },
     59 	{ EINPROGRESS_ARMED, "Armed" },
     60 };
     61 
     62 /***************************************************************************
     63  *
     64  * Completion queues
     65  *
     66  ***************************************************************************
     67  */
     68 
     69 /**
     70  * Create completion queue
     71  *
     72  * @v ibdev		Infiniband device
     73  * @v num_cqes		Number of completion queue entries
     74  * @v op		Completion queue operations
     75  * @ret cq		New completion queue
     76  */
     77 struct ib_completion_queue *
     78 ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
     79 	       struct ib_completion_queue_operations *op ) {
     80 	struct ib_completion_queue *cq;
     81 	int rc;
     82 
     83 	DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
     84 
     85 	/* Allocate and initialise data structure */
     86 	cq = zalloc ( sizeof ( *cq ) );
     87 	if ( ! cq )
     88 		goto err_alloc_cq;
     89 	cq->ibdev = ibdev;
     90 	list_add ( &cq->list, &ibdev->cqs );
     91 	cq->num_cqes = num_cqes;
     92 	INIT_LIST_HEAD ( &cq->work_queues );
     93 	cq->op = op;
     94 
     95 	/* Perform device-specific initialisation and get CQN */
     96 	if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
     97 		DBGC ( ibdev, "IBDEV %p could not initialise completion "
     98 		       "queue: %s\n", ibdev, strerror ( rc ) );
     99 		goto err_dev_create_cq;
    100 	}
    101 
    102 	DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
    103 	       "with CQN %#lx\n", ibdev, num_cqes, cq,
    104 	       ib_cq_get_drvdata ( cq ), cq->cqn );
    105 	return cq;
    106 
    107 	ibdev->op->destroy_cq ( ibdev, cq );
    108  err_dev_create_cq:
    109 	list_del ( &cq->list );
    110 	free ( cq );
    111  err_alloc_cq:
    112 	return NULL;
    113 }
    114 
    115 /**
    116  * Destroy completion queue
    117  *
    118  * @v ibdev		Infiniband device
    119  * @v cq		Completion queue
    120  */
    121 void ib_destroy_cq ( struct ib_device *ibdev,
    122 		     struct ib_completion_queue *cq ) {
    123 	DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
    124 	       ibdev, cq->cqn );
    125 	assert ( list_empty ( &cq->work_queues ) );
    126 	ibdev->op->destroy_cq ( ibdev, cq );
    127 	list_del ( &cq->list );
    128 	free ( cq );
    129 }
    130 
    131 /**
    132  * Poll completion queue
    133  *
    134  * @v ibdev		Infiniband device
    135  * @v cq		Completion queue
    136  */
    137 void ib_poll_cq ( struct ib_device *ibdev,
    138 		  struct ib_completion_queue *cq ) {
    139 	struct ib_work_queue *wq;
    140 
    141 	/* Poll completion queue */
    142 	ibdev->op->poll_cq ( ibdev, cq );
    143 
    144 	/* Refill receive work queues */
    145 	list_for_each_entry ( wq, &cq->work_queues, list ) {
    146 		if ( ! wq->is_send )
    147 			ib_refill_recv ( ibdev, wq->qp );
    148 	}
    149 }
    150 
    151 /***************************************************************************
    152  *
    153  * Work queues
    154  *
    155  ***************************************************************************
    156  */
    157 
    158 /**
    159  * Create queue pair
    160  *
    161  * @v ibdev		Infiniband device
    162  * @v type		Queue pair type
    163  * @v num_send_wqes	Number of send work queue entries
    164  * @v send_cq		Send completion queue
    165  * @v num_recv_wqes	Number of receive work queue entries
    166  * @v recv_cq		Receive completion queue
    167  * @ret qp		Queue pair
    168  *
    169  * The queue pair will be left in the INIT state; you must call
    170  * ib_modify_qp() before it is ready to use for sending and receiving.
    171  */
    172 struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
    173 				      enum ib_queue_pair_type type,
    174 				      unsigned int num_send_wqes,
    175 				      struct ib_completion_queue *send_cq,
    176 				      unsigned int num_recv_wqes,
    177 				      struct ib_completion_queue *recv_cq ) {
    178 	struct ib_queue_pair *qp;
    179 	size_t total_size;
    180 	int rc;
    181 
    182 	DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
    183 
    184 	/* Allocate and initialise data structure */
    185 	total_size = ( sizeof ( *qp ) +
    186 		       ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
    187 		       ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
    188 	qp = zalloc ( total_size );
    189 	if ( ! qp )
    190 		goto err_alloc_qp;
    191 	qp->ibdev = ibdev;
    192 	list_add ( &qp->list, &ibdev->qps );
    193 	qp->type = type;
    194 	qp->send.qp = qp;
    195 	qp->send.is_send = 1;
    196 	qp->send.cq = send_cq;
    197 	list_add ( &qp->send.list, &send_cq->work_queues );
    198 	qp->send.psn = ( random() & 0xffffffUL );
    199 	qp->send.num_wqes = num_send_wqes;
    200 	qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
    201 	qp->recv.qp = qp;
    202 	qp->recv.cq = recv_cq;
    203 	list_add ( &qp->recv.list, &recv_cq->work_queues );
    204 	qp->recv.psn = ( random() & 0xffffffUL );
    205 	qp->recv.num_wqes = num_recv_wqes;
    206 	qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
    207 			    ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
    208 	INIT_LIST_HEAD ( &qp->mgids );
    209 
    210 	/* Perform device-specific initialisation and get QPN */
    211 	if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
    212 		DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
    213 		       "%s\n", ibdev, strerror ( rc ) );
    214 		goto err_dev_create_qp;
    215 	}
    216 	DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
    217 	       ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
    218 	DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
    219 	       ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
    220 	       qp->recv.iobufs );
    221 	DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
    222 	       ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
    223 	       ( ( ( void * ) qp ) + total_size ) );
    224 
    225 	/* Calculate externally-visible QPN */
    226 	switch ( type ) {
    227 	case IB_QPT_SMI:
    228 		qp->ext_qpn = IB_QPN_SMI;
    229 		break;
    230 	case IB_QPT_GSI:
    231 		qp->ext_qpn = IB_QPN_GSI;
    232 		break;
    233 	default:
    234 		qp->ext_qpn = qp->qpn;
    235 		break;
    236 	}
    237 	if ( qp->ext_qpn != qp->qpn ) {
    238 		DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
    239 		       ibdev, qp->qpn, qp->ext_qpn );
    240 	}
    241 
    242 	return qp;
    243 
    244 	ibdev->op->destroy_qp ( ibdev, qp );
    245  err_dev_create_qp:
    246 	list_del ( &qp->send.list );
    247 	list_del ( &qp->recv.list );
    248 	list_del ( &qp->list );
    249 	free ( qp );
    250  err_alloc_qp:
    251 	return NULL;
    252 }
    253 
    254 /**
    255  * Modify queue pair
    256  *
    257  * @v ibdev		Infiniband device
    258  * @v qp		Queue pair
    259  * @v av		New address vector, if applicable
    260  * @ret rc		Return status code
    261  */
    262 int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
    263 	int rc;
    264 
    265 	DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
    266 
    267 	if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
    268 		DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
    269 		       ibdev, qp->qpn, strerror ( rc ) );
    270 		return rc;
    271 	}
    272 
    273 	return 0;
    274 }
    275 
    276 /**
    277  * Destroy queue pair
    278  *
    279  * @v ibdev		Infiniband device
    280  * @v qp		Queue pair
    281  */
    282 void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
    283 	struct io_buffer *iobuf;
    284 	unsigned int i;
    285 
    286 	DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
    287 	       ibdev, qp->qpn );
    288 
    289 	assert ( list_empty ( &qp->mgids ) );
    290 
    291 	/* Perform device-specific destruction */
    292 	ibdev->op->destroy_qp ( ibdev, qp );
    293 
    294 	/* Complete any remaining I/O buffers with errors */
    295 	for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
    296 		if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
    297 			ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
    298 	}
    299 	for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
    300 		if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
    301 			ib_complete_recv ( ibdev, qp, NULL, iobuf,
    302 					   -ECANCELED );
    303 		}
    304 	}
    305 
    306 	/* Remove work queues from completion queue */
    307 	list_del ( &qp->send.list );
    308 	list_del ( &qp->recv.list );
    309 
    310 	/* Free QP */
    311 	list_del ( &qp->list );
    312 	free ( qp );
    313 }
    314 
    315 /**
    316  * Find queue pair by QPN
    317  *
    318  * @v ibdev		Infiniband device
    319  * @v qpn		Queue pair number
    320  * @ret qp		Queue pair, or NULL
    321  */
    322 struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
    323 					unsigned long qpn ) {
    324 	struct ib_queue_pair *qp;
    325 
    326 	list_for_each_entry ( qp, &ibdev->qps, list ) {
    327 		if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
    328 			return qp;
    329 	}
    330 	return NULL;
    331 }
    332 
    333 /**
    334  * Find queue pair by multicast GID
    335  *
    336  * @v ibdev		Infiniband device
    337  * @v gid		Multicast GID
    338  * @ret qp		Queue pair, or NULL
    339  */
    340 struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
    341 					 struct ib_gid *gid ) {
    342 	struct ib_queue_pair *qp;
    343 	struct ib_multicast_gid *mgid;
    344 
    345 	list_for_each_entry ( qp, &ibdev->qps, list ) {
    346 		list_for_each_entry ( mgid, &qp->mgids, list ) {
    347 			if ( memcmp ( &mgid->gid, gid,
    348 				      sizeof ( mgid->gid ) ) == 0 ) {
    349 				return qp;
    350 			}
    351 		}
    352 	}
    353 	return NULL;
    354 }
    355 
    356 /**
    357  * Find work queue belonging to completion queue
    358  *
    359  * @v cq		Completion queue
    360  * @v qpn		Queue pair number
    361  * @v is_send		Find send work queue (rather than receive)
    362  * @ret wq		Work queue, or NULL if not found
    363  */
    364 struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
    365 				    unsigned long qpn, int is_send ) {
    366 	struct ib_work_queue *wq;
    367 
    368 	list_for_each_entry ( wq, &cq->work_queues, list ) {
    369 		if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
    370 			return wq;
    371 	}
    372 	return NULL;
    373 }
    374 
    375 /**
    376  * Post send work queue entry
    377  *
    378  * @v ibdev		Infiniband device
    379  * @v qp		Queue pair
    380  * @v av		Address vector
    381  * @v iobuf		I/O buffer
    382  * @ret rc		Return status code
    383  */
    384 int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
    385 		   struct ib_address_vector *av,
    386 		   struct io_buffer *iobuf ) {
    387 	struct ib_address_vector av_copy;
    388 	int rc;
    389 
    390 	/* Check queue fill level */
    391 	if ( qp->send.fill >= qp->send.num_wqes ) {
    392 		DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
    393 		       ibdev, qp->qpn );
    394 		return -ENOBUFS;
    395 	}
    396 
    397 	/* Use default address vector if none specified */
    398 	if ( ! av )
    399 		av = &qp->av;
    400 
    401 	/* Make modifiable copy of address vector */
    402 	memcpy ( &av_copy, av, sizeof ( av_copy ) );
    403 	av = &av_copy;
    404 
    405 	/* Fill in optional parameters in address vector */
    406 	if ( ! av->qkey )
    407 		av->qkey = qp->qkey;
    408 	if ( ! av->rate )
    409 		av->rate = IB_RATE_2_5;
    410 
    411 	/* Post to hardware */
    412 	if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) {
    413 		DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
    414 		       "%s\n", ibdev, qp->qpn, strerror ( rc ) );
    415 		return rc;
    416 	}
    417 
    418 	qp->send.fill++;
    419 	return 0;
    420 }
    421 
    422 /**
    423  * Post receive work queue entry
    424  *
    425  * @v ibdev		Infiniband device
    426  * @v qp		Queue pair
    427  * @v iobuf		I/O buffer
    428  * @ret rc		Return status code
    429  */
    430 int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
    431 		   struct io_buffer *iobuf ) {
    432 	int rc;
    433 
    434 	/* Check packet length */
    435 	if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
    436 		DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
    437 		       ibdev, qp->qpn, iob_tailroom ( iobuf ) );
    438 		return -EINVAL;
    439 	}
    440 
    441 	/* Check queue fill level */
    442 	if ( qp->recv.fill >= qp->recv.num_wqes ) {
    443 		DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
    444 		       ibdev, qp->qpn );
    445 		return -ENOBUFS;
    446 	}
    447 
    448 	/* Post to hardware */
    449 	if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
    450 		DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
    451 		       "%s\n", ibdev, qp->qpn, strerror ( rc ) );
    452 		return rc;
    453 	}
    454 
    455 	qp->recv.fill++;
    456 	return 0;
    457 }
    458 
    459 /**
    460  * Complete send work queue entry
    461  *
    462  * @v ibdev		Infiniband device
    463  * @v qp		Queue pair
    464  * @v iobuf		I/O buffer
    465  * @v rc		Completion status code
    466  */
    467 void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
    468 			struct io_buffer *iobuf, int rc ) {
    469 
    470 	if ( qp->send.cq->op->complete_send ) {
    471 		qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
    472 	} else {
    473 		free_iob ( iobuf );
    474 	}
    475 	qp->send.fill--;
    476 }
    477 
    478 /**
    479  * Complete receive work queue entry
    480  *
    481  * @v ibdev		Infiniband device
    482  * @v qp		Queue pair
    483  * @v av		Address vector
    484  * @v iobuf		I/O buffer
    485  * @v rc		Completion status code
    486  */
    487 void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
    488 			struct ib_address_vector *av,
    489 			struct io_buffer *iobuf, int rc ) {
    490 
    491 	if ( qp->recv.cq->op->complete_recv ) {
    492 		qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc );
    493 	} else {
    494 		free_iob ( iobuf );
    495 	}
    496 	qp->recv.fill--;
    497 }
    498 
    499 /**
    500  * Refill receive work queue
    501  *
    502  * @v ibdev		Infiniband device
    503  * @v qp		Queue pair
    504  */
    505 void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
    506 	struct io_buffer *iobuf;
    507 	int rc;
    508 
    509 	/* Keep filling while unfilled entries remain */
    510 	while ( qp->recv.fill < qp->recv.num_wqes ) {
    511 
    512 		/* Allocate I/O buffer */
    513 		iobuf = alloc_iob ( IB_MAX_PAYLOAD_SIZE );
    514 		if ( ! iobuf ) {
    515 			/* Non-fatal; we will refill on next attempt */
    516 			return;
    517 		}
    518 
    519 		/* Post I/O buffer */
    520 		if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
    521 			DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
    522 			       ibdev, strerror ( rc ) );
    523 			free_iob ( iobuf );
    524 			/* Give up */
    525 			return;
    526 		}
    527 	}
    528 }
    529 
    530 /***************************************************************************
    531  *
    532  * Link control
    533  *
    534  ***************************************************************************
    535  */
    536 
    537 /**
    538  * Open port
    539  *
    540  * @v ibdev		Infiniband device
    541  * @ret rc		Return status code
    542  */
    543 int ib_open ( struct ib_device *ibdev ) {
    544 	int rc;
    545 
    546 	/* Increment device open request counter */
    547 	if ( ibdev->open_count++ > 0 ) {
    548 		/* Device was already open; do nothing */
    549 		return 0;
    550 	}
    551 
    552 	/* Create subnet management interface */
    553 	ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
    554 	if ( ! ibdev->smi ) {
    555 		DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
    556 		rc = -ENOMEM;
    557 		goto err_create_smi;
    558 	}
    559 
    560 	/* Create subnet management agent */
    561 	if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
    562 		DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
    563 		       ibdev, strerror ( rc ) );
    564 		goto err_create_sma;
    565 	}
    566 
    567 	/* Create general services interface */
    568 	ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
    569 	if ( ! ibdev->gsi ) {
    570 		DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
    571 		rc = -ENOMEM;
    572 		goto err_create_gsi;
    573 	}
    574 
    575 	/* Open device */
    576 	if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
    577 		DBGC ( ibdev, "IBDEV %p could not open: %s\n",
    578 		       ibdev, strerror ( rc ) );
    579 		goto err_open;
    580 	}
    581 
    582 	/* Add to head of open devices list */
    583 	list_add ( &ibdev->open_list, &open_ib_devices );
    584 
    585 	assert ( ibdev->open_count == 1 );
    586 	return 0;
    587 
    588 	ibdev->op->close ( ibdev );
    589  err_open:
    590 	ib_destroy_mi ( ibdev, ibdev->gsi );
    591  err_create_gsi:
    592 	ib_destroy_sma ( ibdev, ibdev->smi );
    593  err_create_sma:
    594 	ib_destroy_mi ( ibdev, ibdev->smi );
    595  err_create_smi:
    596 	assert ( ibdev->open_count == 1 );
    597 	ibdev->open_count = 0;
    598 	return rc;
    599 }
    600 
    601 /**
    602  * Close port
    603  *
    604  * @v ibdev		Infiniband device
    605  */
    606 void ib_close ( struct ib_device *ibdev ) {
    607 
    608 	/* Decrement device open request counter */
    609 	ibdev->open_count--;
    610 
    611 	/* Close device if this was the last remaining requested opening */
    612 	if ( ibdev->open_count == 0 ) {
    613 		list_del ( &ibdev->open_list );
    614 		ib_destroy_mi ( ibdev, ibdev->gsi );
    615 		ib_destroy_sma ( ibdev, ibdev->smi );
    616 		ib_destroy_mi ( ibdev, ibdev->smi );
    617 		ibdev->op->close ( ibdev );
    618 	}
    619 }
    620 
    621 /**
    622  * Get link state
    623  *
    624  * @v ibdev		Infiniband device
    625  * @ret rc		Link status code
    626  */
    627 int ib_link_rc ( struct ib_device *ibdev ) {
    628 	switch ( ibdev->port_state ) {
    629 	case IB_PORT_STATE_DOWN:	return -ENOTCONN;
    630 	case IB_PORT_STATE_INIT:	return -EINPROGRESS_INIT;
    631 	case IB_PORT_STATE_ARMED:	return -EINPROGRESS_ARMED;
    632 	case IB_PORT_STATE_ACTIVE:	return 0;
    633 	default:			return -EINVAL;
    634 	}
    635 }
    636 
    637 /***************************************************************************
    638  *
    639  * Multicast
    640  *
    641  ***************************************************************************
    642  */
    643 
    644 /**
    645  * Attach to multicast group
    646  *
    647  * @v ibdev		Infiniband device
    648  * @v qp		Queue pair
    649  * @v gid		Multicast GID
    650  * @ret rc		Return status code
    651  *
    652  * Note that this function handles only the local device's attachment
    653  * to the multicast GID; it does not issue the relevant MADs to join
    654  * the multicast group on the subnet.
    655  */
    656 int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
    657 		      struct ib_gid *gid ) {
    658 	struct ib_multicast_gid *mgid;
    659 	int rc;
    660 
    661 	/* Add to software multicast GID list */
    662 	mgid = zalloc ( sizeof ( *mgid ) );
    663 	if ( ! mgid ) {
    664 		rc = -ENOMEM;
    665 		goto err_alloc_mgid;
    666 	}
    667 	memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
    668 	list_add ( &mgid->list, &qp->mgids );
    669 
    670 	/* Add to hardware multicast GID list */
    671 	if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
    672 		goto err_dev_mcast_attach;
    673 
    674 	return 0;
    675 
    676  err_dev_mcast_attach:
    677 	list_del ( &mgid->list );
    678 	free ( mgid );
    679  err_alloc_mgid:
    680 	return rc;
    681 }
    682 
    683 /**
    684  * Detach from multicast group
    685  *
    686  * @v ibdev		Infiniband device
    687  * @v qp		Queue pair
    688  * @v gid		Multicast GID
    689  */
    690 void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
    691 		       struct ib_gid *gid ) {
    692 	struct ib_multicast_gid *mgid;
    693 
    694 	/* Remove from hardware multicast GID list */
    695 	ibdev->op->mcast_detach ( ibdev, qp, gid );
    696 
    697 	/* Remove from software multicast GID list */
    698 	list_for_each_entry ( mgid, &qp->mgids, list ) {
    699 		if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
    700 			list_del ( &mgid->list );
    701 			free ( mgid );
    702 			break;
    703 		}
    704 	}
    705 }
    706 
    707 /***************************************************************************
    708  *
    709  * Miscellaneous
    710  *
    711  ***************************************************************************
    712  */
    713 
    714 /**
    715  * Get Infiniband HCA information
    716  *
    717  * @v ibdev		Infiniband device
    718  * @ret hca_guid	HCA GUID
    719  * @ret num_ports	Number of ports
    720  */
    721 int ib_get_hca_info ( struct ib_device *ibdev,
    722 		      struct ib_gid_half *hca_guid ) {
    723 	struct ib_device *tmp;
    724 	int num_ports = 0;
    725 
    726 	/* Search for IB devices with the same physical device to
    727 	 * identify port count and a suitable Node GUID.
    728 	 */
    729 	for_each_ibdev ( tmp ) {
    730 		if ( tmp->dev != ibdev->dev )
    731 			continue;
    732 		if ( num_ports == 0 ) {
    733 			memcpy ( hca_guid, &tmp->gid.u.half[1],
    734 				 sizeof ( *hca_guid ) );
    735 		}
    736 		num_ports++;
    737 	}
    738 	return num_ports;
    739 }
    740 
    741 /**
    742  * Set port information
    743  *
    744  * @v ibdev		Infiniband device
    745  * @v mad		Set port information MAD
    746  */
    747 int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
    748 	int rc;
    749 
    750 	/* Adapters with embedded SMAs do not need to support this method */
    751 	if ( ! ibdev->op->set_port_info ) {
    752 		DBGC ( ibdev, "IBDEV %p does not support setting port "
    753 		       "information\n", ibdev );
    754 		return -ENOTSUP;
    755 	}
    756 
    757 	if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
    758 		DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
    759 		       ibdev, strerror ( rc ) );
    760 		return rc;
    761 	}
    762 
    763 	return 0;
    764 };
    765 
    766 /**
    767  * Set partition key table
    768  *
    769  * @v ibdev		Infiniband device
    770  * @v mad		Set partition key table MAD
    771  */
    772 int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
    773 	int rc;
    774 
    775 	/* Adapters with embedded SMAs do not need to support this method */
    776 	if ( ! ibdev->op->set_pkey_table ) {
    777 		DBGC ( ibdev, "IBDEV %p does not support setting partition "
    778 		       "key table\n", ibdev );
    779 		return -ENOTSUP;
    780 	}
    781 
    782 	if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
    783 		DBGC ( ibdev, "IBDEV %p could not set partition key table: "
    784 		       "%s\n", ibdev, strerror ( rc ) );
    785 		return rc;
    786 	}
    787 
    788 	return 0;
    789 };
    790 
    791 /***************************************************************************
    792  *
    793  * Event queues
    794  *
    795  ***************************************************************************
    796  */
    797 
    798 /**
    799  * Handle Infiniband link state change
    800  *
    801  * @v ibdev		Infiniband device
    802  */
    803 void ib_link_state_changed ( struct ib_device *ibdev ) {
    804 
    805 	/* Notify IPoIB of link state change */
    806 	ipoib_link_state_changed ( ibdev );
    807 }
    808 
    809 /**
    810  * Poll event queue
    811  *
    812  * @v ibdev		Infiniband device
    813  */
    814 void ib_poll_eq ( struct ib_device *ibdev ) {
    815 	struct ib_completion_queue *cq;
    816 
    817 	/* Poll device's event queue */
    818 	ibdev->op->poll_eq ( ibdev );
    819 
    820 	/* Poll all completion queues */
    821 	list_for_each_entry ( cq, &ibdev->cqs, list )
    822 		ib_poll_cq ( ibdev, cq );
    823 }
    824 
    825 /**
    826  * Single-step the Infiniband event queue
    827  *
    828  * @v process		Infiniband event queue process
    829  */
    830 static void ib_step ( struct process *process __unused ) {
    831 	struct ib_device *ibdev;
    832 
    833 	for_each_ibdev ( ibdev )
    834 		ib_poll_eq ( ibdev );
    835 }
    836 
    837 /** Infiniband event queue process */
    838 struct process ib_process __permanent_process = {
    839 	.list = LIST_HEAD_INIT ( ib_process.list ),
    840 	.step = ib_step,
    841 };
    842 
    843 /***************************************************************************
    844  *
    845  * Infiniband device creation/destruction
    846  *
    847  ***************************************************************************
    848  */
    849 
    850 /**
    851  * Allocate Infiniband device
    852  *
    853  * @v priv_size		Size of driver private data area
    854  * @ret ibdev		Infiniband device, or NULL
    855  */
    856 struct ib_device * alloc_ibdev ( size_t priv_size ) {
    857 	struct ib_device *ibdev;
    858 	void *drv_priv;
    859 	size_t total_len;
    860 
    861 	total_len = ( sizeof ( *ibdev ) + priv_size );
    862 	ibdev = zalloc ( total_len );
    863 	if ( ibdev ) {
    864 		drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
    865 		ib_set_drvdata ( ibdev, drv_priv );
    866 		INIT_LIST_HEAD ( &ibdev->cqs );
    867 		INIT_LIST_HEAD ( &ibdev->qps );
    868 		ibdev->port_state = IB_PORT_STATE_DOWN;
    869 		ibdev->lid = IB_LID_NONE;
    870 		ibdev->pkey = IB_PKEY_DEFAULT;
    871 	}
    872 	return ibdev;
    873 }
    874 
    875 /**
    876  * Register Infiniband device
    877  *
    878  * @v ibdev		Infiniband device
    879  * @ret rc		Return status code
    880  */
    881 int register_ibdev ( struct ib_device *ibdev ) {
    882 	int rc;
    883 
    884 	/* Add to device list */
    885 	ibdev_get ( ibdev );
    886 	list_add_tail ( &ibdev->list, &ib_devices );
    887 
    888 	/* Add IPoIB device */
    889 	if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) {
    890 		DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n",
    891 		       ibdev, strerror ( rc ) );
    892 		goto err_ipoib_probe;
    893 	}
    894 
    895 	DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
    896 	       ibdev->dev->name );
    897 	return 0;
    898 
    899  err_ipoib_probe:
    900 	list_del ( &ibdev->list );
    901 	ibdev_put ( ibdev );
    902 	return rc;
    903 }
    904 
    905 /**
    906  * Unregister Infiniband device
    907  *
    908  * @v ibdev		Infiniband device
    909  */
    910 void unregister_ibdev ( struct ib_device *ibdev ) {
    911 
    912 	/* Close device */
    913 	ipoib_remove ( ibdev );
    914 
    915 	/* Remove from device list */
    916 	list_del ( &ibdev->list );
    917 	ibdev_put ( ibdev );
    918 	DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
    919 }
    920 
    921 /**
    922  * Find Infiniband device by GID
    923  *
    924  * @v gid		GID
    925  * @ret ibdev		Infiniband device, or NULL
    926  */
    927 struct ib_device * find_ibdev ( struct ib_gid *gid ) {
    928 	struct ib_device *ibdev;
    929 
    930 	for_each_ibdev ( ibdev ) {
    931 		if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
    932 			return ibdev;
    933 	}
    934 	return NULL;
    935 }
    936 
    937 /**
    938  * Get most recently opened Infiniband device
    939  *
    940  * @ret ibdev		Most recently opened Infiniband device, or NULL
    941  */
    942 struct ib_device * last_opened_ibdev ( void ) {
    943 	struct ib_device *ibdev;
    944 
    945 	list_for_each_entry ( ibdev, &open_ib_devices, open_list ) {
    946 		assert ( ibdev->open_count != 0 );
    947 		return ibdev;
    948 	}
    949 
    950 	return NULL;
    951 }
    952