Home | History | Annotate | Download | only in net
      1 /*
      2  * Copyright (C) 2007 Michael Brown <mbrown (at) fensystems.co.uk>.
      3  *
      4  * This program is free software; you can redistribute it and/or
      5  * modify it under the terms of the GNU General Public License as
      6  * published by the Free Software Foundation; either version 2 of the
      7  * License, or any later version.
      8  *
      9  * This program is distributed in the hope that it will be useful, but
     10  * WITHOUT ANY WARRANTY; without even the implied warranty of
     11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     12  * General Public License for more details.
     13  *
     14  * You should have received a copy of the GNU General Public License
     15  * along with this program; if not, write to the Free Software
     16  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
     17  */
     18 
     19 FILE_LICENCE ( GPL2_OR_LATER );
     20 
     21 #include <stdint.h>
     22 #include <stdio.h>
     23 #include <unistd.h>
     24 #include <string.h>
     25 #include <byteswap.h>
     26 #include <errno.h>
     27 #include <gpxe/errortab.h>
     28 #include <gpxe/if_arp.h>
     29 #include <gpxe/iobuf.h>
     30 #include <gpxe/netdevice.h>
     31 #include <gpxe/infiniband.h>
     32 #include <gpxe/ib_pathrec.h>
     33 #include <gpxe/ib_mcast.h>
     34 #include <gpxe/ipoib.h>
     35 
     36 /** @file
     37  *
     38  * IP over Infiniband
     39  */
     40 
     41 /** Number of IPoIB send work queue entries */
     42 #define IPOIB_NUM_SEND_WQES 2
     43 
     44 /** Number of IPoIB receive work queue entries */
     45 #define IPOIB_NUM_RECV_WQES 4
     46 
     47 /** Number of IPoIB completion entries */
     48 #define IPOIB_NUM_CQES 8
     49 
     50 /** An IPoIB device */
     51 struct ipoib_device {
     52 	/** Network device */
     53 	struct net_device *netdev;
     54 	/** Underlying Infiniband device */
     55 	struct ib_device *ibdev;
     56 	/** Completion queue */
     57 	struct ib_completion_queue *cq;
     58 	/** Queue pair */
     59 	struct ib_queue_pair *qp;
     60 	/** Broadcast MAC */
     61 	struct ipoib_mac broadcast;
     62 	/** Joined to IPv4 broadcast multicast group
     63 	 *
     64 	 * This flag indicates whether or not we have initiated the
     65 	 * join to the IPv4 broadcast multicast group.
     66 	 */
     67 	int broadcast_joined;
     68 	/** IPv4 broadcast multicast group membership */
     69 	struct ib_mc_membership broadcast_membership;
     70 };
     71 
     72 /** Broadcast IPoIB address */
     73 static struct ipoib_mac ipoib_broadcast = {
     74 	.flags__qpn = htonl ( IB_QPN_BROADCAST ),
     75 	.gid.u.bytes = 	{ 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
     76 			  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff },
     77 };
     78 
     79 /** Link status for "broadcast join in progress" */
     80 #define EINPROGRESS_JOINING ( EINPROGRESS | EUNIQ_01 )
     81 
     82 /** Human-readable message for the link status */
     83 struct errortab ipoib_errors[] __errortab = {
     84 	{ EINPROGRESS_JOINING, "Joining" },
     85 };
     86 
     87 /****************************************************************************
     88  *
     89  * IPoIB peer cache
     90  *
     91  ****************************************************************************
     92  */
     93 
     94 /**
     95  * IPoIB peer address
     96  *
     97  * The IPoIB link-layer header is only four bytes long and so does not
     98  * have sufficient room to store IPoIB MAC address(es).  We therefore
     99  * maintain a cache of MAC addresses identified by a single-byte key,
    100  * and abuse the spare two bytes within the link-layer header to
    101  * communicate these MAC addresses between the link-layer code and the
    102  * netdevice driver.
    103  */
    104 struct ipoib_peer {
    105 	/** Key */
    106 	uint8_t key;
    107 	/** MAC address */
    108 	struct ipoib_mac mac;
    109 };
    110 
    111 /** Number of IPoIB peer cache entries
    112  *
    113  * Must be a power of two.
    114  */
    115 #define IPOIB_NUM_CACHED_PEERS 4
    116 
    117 /** IPoIB peer address cache */
    118 static struct ipoib_peer ipoib_peer_cache[IPOIB_NUM_CACHED_PEERS];
    119 
    120 /** Oldest IPoIB peer cache entry index */
    121 static unsigned int ipoib_peer_cache_idx = 1;
    122 
    123 /**
    124  * Look up cached peer by key
    125  *
    126  * @v key		Peer cache key
    127  * @ret peer		Peer cache entry, or NULL
    128  */
    129 static struct ipoib_peer * ipoib_lookup_peer_by_key ( unsigned int key ) {
    130 	struct ipoib_peer *peer;
    131 	unsigned int i;
    132 
    133 	for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
    134 		peer = &ipoib_peer_cache[i];
    135 		if ( peer->key == key )
    136 			return peer;
    137 	}
    138 
    139 	if ( key != 0 ) {
    140 		DBG ( "IPoIB warning: peer cache lost track of key %x while "
    141 		      "still in use\n", key );
    142 	}
    143 	return NULL;
    144 }
    145 
    146 /**
    147  * Store GID and QPN in peer cache
    148  *
    149  * @v mac		Peer MAC address
    150  * @ret peer		Peer cache entry
    151  */
    152 static struct ipoib_peer * ipoib_cache_peer ( const struct ipoib_mac *mac ) {
    153 	struct ipoib_peer *peer;
    154 	unsigned int key;
    155 	unsigned int i;
    156 
    157 	/* Look for existing cache entry */
    158 	for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
    159 		peer = &ipoib_peer_cache[i];
    160 		if ( memcmp ( &peer->mac, mac, sizeof ( peer->mac ) ) == 0 )
    161 			return peer;
    162 	}
    163 
    164 	/* No entry found: create a new one */
    165 	key = ipoib_peer_cache_idx++;
    166 	peer = &ipoib_peer_cache[ key % IPOIB_NUM_CACHED_PEERS ];
    167 	if ( peer->key )
    168 		DBG ( "IPoIB peer %x evicted from cache\n", peer->key );
    169 
    170 	memset ( peer, 0, sizeof ( *peer ) );
    171 	peer->key = key;
    172 	memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
    173 	DBG ( "IPoIB peer %x has MAC %s\n",
    174 	      peer->key, ipoib_ntoa ( &peer->mac ) );
    175 	return peer;
    176 }
    177 
    178 /****************************************************************************
    179  *
    180  * IPoIB link layer
    181  *
    182  ****************************************************************************
    183  */
    184 
    185 /**
    186  * Add IPoIB link-layer header
    187  *
    188  * @v netdev		Network device
    189  * @v iobuf		I/O buffer
    190  * @v ll_dest		Link-layer destination address
    191  * @v ll_source		Source link-layer address
    192  * @v net_proto		Network-layer protocol, in network-byte order
    193  * @ret rc		Return status code
    194  */
    195 static int ipoib_push ( struct net_device *netdev __unused,
    196 			struct io_buffer *iobuf, const void *ll_dest,
    197 			const void *ll_source __unused, uint16_t net_proto ) {
    198 	struct ipoib_hdr *ipoib_hdr =
    199 		iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
    200 	const struct ipoib_mac *dest_mac = ll_dest;
    201 	const struct ipoib_mac *src_mac = ll_source;
    202 	struct ipoib_peer *dest;
    203 	struct ipoib_peer *src;
    204 
    205 	/* Add link-layer addresses to cache */
    206 	dest = ipoib_cache_peer ( dest_mac );
    207 	src = ipoib_cache_peer ( src_mac );
    208 
    209 	/* Build IPoIB header */
    210 	ipoib_hdr->proto = net_proto;
    211 	ipoib_hdr->u.peer.dest = dest->key;
    212 	ipoib_hdr->u.peer.src = src->key;
    213 
    214 	return 0;
    215 }
    216 
    217 /**
    218  * Remove IPoIB link-layer header
    219  *
    220  * @v netdev		Network device
    221  * @v iobuf		I/O buffer
    222  * @ret ll_dest		Link-layer destination address
    223  * @ret ll_source	Source link-layer address
    224  * @ret net_proto	Network-layer protocol, in network-byte order
    225  * @ret rc		Return status code
    226  */
    227 static int ipoib_pull ( struct net_device *netdev,
    228 			struct io_buffer *iobuf, const void **ll_dest,
    229 			const void **ll_source, uint16_t *net_proto ) {
    230 	struct ipoib_device *ipoib = netdev->priv;
    231 	struct ipoib_hdr *ipoib_hdr = iobuf->data;
    232 	struct ipoib_peer *dest;
    233 	struct ipoib_peer *source;
    234 
    235 	/* Sanity check */
    236 	if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
    237 		DBG ( "IPoIB packet too short for link-layer header\n" );
    238 		DBG_HD ( iobuf->data, iob_len ( iobuf ) );
    239 		return -EINVAL;
    240 	}
    241 
    242 	/* Strip off IPoIB header */
    243 	iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
    244 
    245 	/* Identify source and destination addresses, and clear
    246 	 * reserved word in IPoIB header
    247 	 */
    248 	dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
    249 	source = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.src );
    250 	ipoib_hdr->u.reserved = 0;
    251 
    252 	/* Fill in required fields */
    253 	*ll_dest = ( dest ? &dest->mac : &ipoib->broadcast );
    254 	*ll_source = ( source ? &source->mac : &ipoib->broadcast );
    255 	*net_proto = ipoib_hdr->proto;
    256 
    257 	return 0;
    258 }
    259 
    260 /**
    261  * Initialise IPoIB link-layer address
    262  *
    263  * @v hw_addr		Hardware address
    264  * @v ll_addr		Link-layer address
    265  */
    266 static void ipoib_init_addr ( const void *hw_addr, void *ll_addr ) {
    267 	const struct ib_gid_half *guid = hw_addr;
    268 	struct ipoib_mac *mac = ll_addr;
    269 
    270 	memset ( mac, 0, sizeof ( *mac ) );
    271 	memcpy ( &mac->gid.u.half[1], guid, sizeof ( mac->gid.u.half[1] ) );
    272 }
    273 
    274 /**
    275  * Transcribe IPoIB link-layer address
    276  *
    277  * @v ll_addr	Link-layer address
    278  * @ret string	Link-layer address in human-readable format
    279  */
    280 const char * ipoib_ntoa ( const void *ll_addr ) {
    281 	static char buf[45];
    282 	const struct ipoib_mac *mac = ll_addr;
    283 
    284 	snprintf ( buf, sizeof ( buf ), "%08x:%08x:%08x:%08x:%08x",
    285 		   htonl ( mac->flags__qpn ), htonl ( mac->gid.u.dwords[0] ),
    286 		   htonl ( mac->gid.u.dwords[1] ),
    287 		   htonl ( mac->gid.u.dwords[2] ),
    288 		   htonl ( mac->gid.u.dwords[3] ) );
    289 	return buf;
    290 }
    291 
    292 /**
    293  * Hash multicast address
    294  *
    295  * @v af		Address family
    296  * @v net_addr		Network-layer address
    297  * @v ll_addr		Link-layer address to fill in
    298  * @ret rc		Return status code
    299  */
    300 static int ipoib_mc_hash ( unsigned int af __unused,
    301 			   const void *net_addr __unused,
    302 			   void *ll_addr __unused ) {
    303 
    304 	return -ENOTSUP;
    305 }
    306 
    307 /**
    308  * Generate Mellanox Ethernet-compatible compressed link-layer address
    309  *
    310  * @v ll_addr		Link-layer address
    311  * @v eth_addr		Ethernet-compatible address to fill in
    312  */
    313 static int ipoib_mlx_eth_addr ( const struct ib_gid_half *guid,
    314 				uint8_t *eth_addr ) {
    315 	eth_addr[0] = ( ( guid->u.bytes[3] == 2 ) ? 0x00 : 0x02 );
    316 	eth_addr[1] = guid->u.bytes[1];
    317 	eth_addr[2] = guid->u.bytes[2];
    318 	eth_addr[3] = guid->u.bytes[5];
    319 	eth_addr[4] = guid->u.bytes[6];
    320 	eth_addr[5] = guid->u.bytes[7];
    321 	return 0;
    322 }
    323 
    324 /** An IPoIB Ethernet-compatible compressed link-layer address generator */
    325 struct ipoib_eth_addr_handler {
    326 	/** GUID byte 1 */
    327 	uint8_t byte1;
    328 	/** GUID byte 2 */
    329 	uint8_t byte2;
    330 	/** Handler */
    331 	int ( * eth_addr ) ( const struct ib_gid_half *guid,
    332 			     uint8_t *eth_addr );
    333 };
    334 
    335 /** IPoIB Ethernet-compatible compressed link-layer address generators */
    336 static struct ipoib_eth_addr_handler ipoib_eth_addr_handlers[] = {
    337 	{ 0x02, 0xc9, ipoib_mlx_eth_addr },
    338 };
    339 
    340 /**
    341  * Generate Ethernet-compatible compressed link-layer address
    342  *
    343  * @v ll_addr		Link-layer address
    344  * @v eth_addr		Ethernet-compatible address to fill in
    345  */
    346 static int ipoib_eth_addr ( const void *ll_addr, void *eth_addr ) {
    347 	const struct ipoib_mac *ipoib_addr = ll_addr;
    348 	const struct ib_gid_half *guid = &ipoib_addr->gid.u.half[1];
    349 	struct ipoib_eth_addr_handler *handler;
    350 	unsigned int i;
    351 
    352 	for ( i = 0 ; i < ( sizeof ( ipoib_eth_addr_handlers ) /
    353 			    sizeof ( ipoib_eth_addr_handlers[0] ) ) ; i++ ) {
    354 		handler = &ipoib_eth_addr_handlers[i];
    355 		if ( ( handler->byte1 == guid->u.bytes[1] ) &&
    356 		     ( handler->byte2 == guid->u.bytes[2] ) ) {
    357 			return handler->eth_addr ( guid, eth_addr );
    358 		}
    359 	}
    360 	return -ENOTSUP;
    361 }
    362 
    363 /** IPoIB protocol */
    364 struct ll_protocol ipoib_protocol __ll_protocol = {
    365 	.name		= "IPoIB",
    366 	.ll_proto	= htons ( ARPHRD_INFINIBAND ),
    367 	.hw_addr_len	= sizeof ( struct ib_gid_half ),
    368 	.ll_addr_len	= IPOIB_ALEN,
    369 	.ll_header_len	= IPOIB_HLEN,
    370 	.push		= ipoib_push,
    371 	.pull		= ipoib_pull,
    372 	.init_addr	= ipoib_init_addr,
    373 	.ntoa		= ipoib_ntoa,
    374 	.mc_hash	= ipoib_mc_hash,
    375 	.eth_addr	= ipoib_eth_addr,
    376 };
    377 
    378 /**
    379  * Allocate IPoIB device
    380  *
    381  * @v priv_size		Size of driver private data
    382  * @ret netdev		Network device, or NULL
    383  */
    384 struct net_device * alloc_ipoibdev ( size_t priv_size ) {
    385 	struct net_device *netdev;
    386 
    387 	netdev = alloc_netdev ( priv_size );
    388 	if ( netdev ) {
    389 		netdev->ll_protocol = &ipoib_protocol;
    390 		netdev->ll_broadcast = ( uint8_t * ) &ipoib_broadcast;
    391 		netdev->max_pkt_len = IB_MAX_PAYLOAD_SIZE;
    392 	}
    393 	return netdev;
    394 }
    395 
    396 /****************************************************************************
    397  *
    398  * IPoIB network device
    399  *
    400  ****************************************************************************
    401  */
    402 
    403 /**
    404  * Transmit packet via IPoIB network device
    405  *
    406  * @v netdev		Network device
    407  * @v iobuf		I/O buffer
    408  * @ret rc		Return status code
    409  */
    410 static int ipoib_transmit ( struct net_device *netdev,
    411 			    struct io_buffer *iobuf ) {
    412 	struct ipoib_device *ipoib = netdev->priv;
    413 	struct ib_device *ibdev = ipoib->ibdev;
    414 	struct ipoib_hdr *ipoib_hdr;
    415 	struct ipoib_peer *dest;
    416 	struct ib_address_vector av;
    417 	int rc;
    418 
    419 	/* Sanity check */
    420 	if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
    421 		DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
    422 		return -EINVAL;
    423 	}
    424 	ipoib_hdr = iobuf->data;
    425 
    426 	/* Attempting transmission while link is down will put the
    427 	 * queue pair into an error state, so don't try it.
    428 	 */
    429 	if ( ! ib_link_ok ( ibdev ) )
    430 		return -ENETUNREACH;
    431 
    432 	/* Identify destination address */
    433 	dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
    434 	if ( ! dest )
    435 		return -ENXIO;
    436 	ipoib_hdr->u.reserved = 0;
    437 
    438 	/* Construct address vector */
    439 	memset ( &av, 0, sizeof ( av ) );
    440 	av.qpn = ( ntohl ( dest->mac.flags__qpn ) & IB_QPN_MASK );
    441 	av.gid_present = 1;
    442 	memcpy ( &av.gid, &dest->mac.gid, sizeof ( av.gid ) );
    443 	if ( ( rc = ib_resolve_path ( ibdev, &av ) ) != 0 ) {
    444 		/* Path not resolved yet */
    445 		return rc;
    446 	}
    447 
    448 	return ib_post_send ( ibdev, ipoib->qp, &av, iobuf );
    449 }
    450 
    451 /**
    452  * Handle IPoIB send completion
    453  *
    454  * @v ibdev		Infiniband device
    455  * @v qp		Queue pair
    456  * @v iobuf		I/O buffer
    457  * @v rc		Completion status code
    458  */
    459 static void ipoib_complete_send ( struct ib_device *ibdev __unused,
    460 				  struct ib_queue_pair *qp,
    461 				  struct io_buffer *iobuf, int rc ) {
    462 	struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
    463 
    464 	netdev_tx_complete_err ( ipoib->netdev, iobuf, rc );
    465 }
    466 
    467 /**
    468  * Handle IPoIB receive completion
    469  *
    470  * @v ibdev		Infiniband device
    471  * @v qp		Queue pair
    472  * @v av		Address vector, or NULL
    473  * @v iobuf		I/O buffer
    474  * @v rc		Completion status code
    475  */
    476 static void ipoib_complete_recv ( struct ib_device *ibdev __unused,
    477 				  struct ib_queue_pair *qp,
    478 				  struct ib_address_vector *av,
    479 				  struct io_buffer *iobuf, int rc ) {
    480 	struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
    481 	struct net_device *netdev = ipoib->netdev;
    482 	struct ipoib_hdr *ipoib_hdr;
    483 	struct ipoib_mac ll_src;
    484 	struct ipoib_peer *src;
    485 
    486 	if ( rc != 0 ) {
    487 		netdev_rx_err ( netdev, iobuf, rc );
    488 		return;
    489 	}
    490 
    491 	/* Sanity check */
    492 	if ( iob_len ( iobuf ) < sizeof ( struct ipoib_hdr ) ) {
    493 		DBGC ( ipoib, "IPoIB %p received packet too short to "
    494 		       "contain IPoIB header\n", ipoib );
    495 		DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
    496 		netdev_rx_err ( netdev, iobuf, -EIO );
    497 		return;
    498 	}
    499 	ipoib_hdr = iobuf->data;
    500 
    501 	/* Parse source address */
    502 	if ( av->gid_present ) {
    503 		ll_src.flags__qpn = htonl ( av->qpn );
    504 		memcpy ( &ll_src.gid, &av->gid, sizeof ( ll_src.gid ) );
    505 		src = ipoib_cache_peer ( &ll_src );
    506 		ipoib_hdr->u.peer.src = src->key;
    507 	}
    508 
    509 	/* Hand off to network layer */
    510 	netdev_rx ( netdev, iobuf );
    511 }
    512 
    513 /** IPoIB completion operations */
    514 static struct ib_completion_queue_operations ipoib_cq_op = {
    515 	.complete_send = ipoib_complete_send,
    516 	.complete_recv = ipoib_complete_recv,
    517 };
    518 
    519 /**
    520  * Poll IPoIB network device
    521  *
    522  * @v netdev		Network device
    523  */
    524 static void ipoib_poll ( struct net_device *netdev ) {
    525 	struct ipoib_device *ipoib = netdev->priv;
    526 	struct ib_device *ibdev = ipoib->ibdev;
    527 
    528 	ib_poll_eq ( ibdev );
    529 }
    530 
    531 /**
    532  * Enable/disable interrupts on IPoIB network device
    533  *
    534  * @v netdev		Network device
    535  * @v enable		Interrupts should be enabled
    536  */
    537 static void ipoib_irq ( struct net_device *netdev __unused,
    538 			int enable __unused ) {
    539 	/* No implementation */
    540 }
    541 
    542 /**
    543  * Handle IPv4 broadcast multicast group join completion
    544  *
    545  * @v ibdev		Infiniband device
    546  * @v qp		Queue pair
    547  * @v membership	Multicast group membership
    548  * @v rc		Status code
    549  * @v mad		Response MAD (or NULL on error)
    550  */
    551 void ipoib_join_complete ( struct ib_device *ibdev __unused,
    552 			   struct ib_queue_pair *qp __unused,
    553 			   struct ib_mc_membership *membership, int rc,
    554 			   union ib_mad *mad __unused ) {
    555 	struct ipoib_device *ipoib = container_of ( membership,
    556 				   struct ipoib_device, broadcast_membership );
    557 
    558 	/* Record join status as link status */
    559 	netdev_link_err ( ipoib->netdev, rc );
    560 }
    561 
    562 /**
    563  * Join IPv4 broadcast multicast group
    564  *
    565  * @v ipoib		IPoIB device
    566  * @ret rc		Return status code
    567  */
    568 static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
    569 	int rc;
    570 
    571 	if ( ( rc = ib_mcast_join ( ipoib->ibdev, ipoib->qp,
    572 				    &ipoib->broadcast_membership,
    573 				    &ipoib->broadcast.gid,
    574 				    ipoib_join_complete ) ) != 0 ) {
    575 		DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
    576 		       ipoib, strerror ( rc ) );
    577 		return rc;
    578 	}
    579 	ipoib->broadcast_joined = 1;
    580 
    581 	return 0;
    582 }
    583 
    584 /**
    585  * Leave IPv4 broadcast multicast group
    586  *
    587  * @v ipoib		IPoIB device
    588  */
    589 static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
    590 
    591 	if ( ipoib->broadcast_joined ) {
    592 		ib_mcast_leave ( ipoib->ibdev, ipoib->qp,
    593 				 &ipoib->broadcast_membership );
    594 		ipoib->broadcast_joined = 0;
    595 	}
    596 }
    597 
    598 /**
    599  * Open IPoIB network device
    600  *
    601  * @v netdev		Network device
    602  * @ret rc		Return status code
    603  */
    604 static int ipoib_open ( struct net_device *netdev ) {
    605 	struct ipoib_device *ipoib = netdev->priv;
    606 	struct ib_device *ibdev = ipoib->ibdev;
    607 	struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
    608 	int rc;
    609 
    610 	/* Open IB device */
    611 	if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
    612 		DBGC ( ipoib, "IPoIB %p could not open device: %s\n",
    613 		       ipoib, strerror ( rc ) );
    614 		goto err_ib_open;
    615 	}
    616 
    617 	/* Allocate completion queue */
    618 	ipoib->cq = ib_create_cq ( ibdev, IPOIB_NUM_CQES, &ipoib_cq_op );
    619 	if ( ! ipoib->cq ) {
    620 		DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n",
    621 		       ipoib );
    622 		rc = -ENOMEM;
    623 		goto err_create_cq;
    624 	}
    625 
    626 	/* Allocate queue pair */
    627 	ipoib->qp = ib_create_qp ( ibdev, IB_QPT_UD,
    628 				   IPOIB_NUM_SEND_WQES, ipoib->cq,
    629 				   IPOIB_NUM_RECV_WQES, ipoib->cq );
    630 	if ( ! ipoib->qp ) {
    631 		DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n",
    632 		       ipoib );
    633 		rc = -ENOMEM;
    634 		goto err_create_qp;
    635 	}
    636 	ib_qp_set_ownerdata ( ipoib->qp, ipoib );
    637 
    638 	/* Update MAC address with QPN */
    639 	mac->flags__qpn = htonl ( ipoib->qp->qpn );
    640 
    641 	/* Fill receive rings */
    642 	ib_refill_recv ( ibdev, ipoib->qp );
    643 
    644 	/* Fake a link status change to join the broadcast group */
    645 	ipoib_link_state_changed ( ibdev );
    646 
    647 	return 0;
    648 
    649 	ib_destroy_qp ( ibdev, ipoib->qp );
    650  err_create_qp:
    651 	ib_destroy_cq ( ibdev, ipoib->cq );
    652  err_create_cq:
    653 	ib_close ( ibdev );
    654  err_ib_open:
    655 	return rc;
    656 }
    657 
    658 /**
    659  * Close IPoIB network device
    660  *
    661  * @v netdev		Network device
    662  */
    663 static void ipoib_close ( struct net_device *netdev ) {
    664 	struct ipoib_device *ipoib = netdev->priv;
    665 	struct ib_device *ibdev = ipoib->ibdev;
    666 	struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
    667 
    668 	/* Leave broadcast group */
    669 	ipoib_leave_broadcast_group ( ipoib );
    670 
    671 	/* Remove QPN from MAC address */
    672 	mac->flags__qpn = 0;
    673 
    674 	/* Tear down the queues */
    675 	ib_destroy_qp ( ibdev, ipoib->qp );
    676 	ib_destroy_cq ( ibdev, ipoib->cq );
    677 
    678 	/* Close IB device */
    679 	ib_close ( ibdev );
    680 }
    681 
    682 /** IPoIB network device operations */
    683 static struct net_device_operations ipoib_operations = {
    684 	.open		= ipoib_open,
    685 	.close		= ipoib_close,
    686 	.transmit	= ipoib_transmit,
    687 	.poll		= ipoib_poll,
    688 	.irq		= ipoib_irq,
    689 };
    690 
    691 /**
    692  * Handle link status change
    693  *
    694  * @v ibdev		Infiniband device
    695  */
    696 void ipoib_link_state_changed ( struct ib_device *ibdev ) {
    697 	struct net_device *netdev = ib_get_ownerdata ( ibdev );
    698 	struct ipoib_device *ipoib = netdev->priv;
    699 	struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
    700 	int rc;
    701 
    702 	/* Leave existing broadcast group */
    703 	ipoib_leave_broadcast_group ( ipoib );
    704 
    705 	/* Update MAC address based on potentially-new GID prefix */
    706 	memcpy ( &mac->gid.u.half[0], &ibdev->gid.u.half[0],
    707 		 sizeof ( mac->gid.u.half[0] ) );
    708 
    709 	/* Update broadcast GID based on potentially-new partition key */
    710 	ipoib->broadcast.gid.u.words[2] =
    711 		htons ( ibdev->pkey | IB_PKEY_FULL );
    712 
    713 	/* Set net device link state to reflect Infiniband link state */
    714 	rc = ib_link_rc ( ibdev );
    715 	netdev_link_err ( netdev, ( rc ? rc : -EINPROGRESS_JOINING ) );
    716 
    717 	/* Join new broadcast group */
    718 	if ( ib_link_ok ( ibdev ) &&
    719 	     ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) ) {
    720 		DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
    721 		       "%s\n", ipoib, strerror ( rc ) );
    722 		netdev_link_err ( netdev, rc );
    723 		return;
    724 	}
    725 }
    726 
    727 /**
    728  * Probe IPoIB device
    729  *
    730  * @v ibdev		Infiniband device
    731  * @ret rc		Return status code
    732  */
    733 int ipoib_probe ( struct ib_device *ibdev ) {
    734 	struct net_device *netdev;
    735 	struct ipoib_device *ipoib;
    736 	int rc;
    737 
    738 	/* Allocate network device */
    739 	netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
    740 	if ( ! netdev )
    741 		return -ENOMEM;
    742 	netdev_init ( netdev, &ipoib_operations );
    743 	ipoib = netdev->priv;
    744 	ib_set_ownerdata ( ibdev, netdev );
    745 	netdev->dev = ibdev->dev;
    746 	memset ( ipoib, 0, sizeof ( *ipoib ) );
    747 	ipoib->netdev = netdev;
    748 	ipoib->ibdev = ibdev;
    749 
    750 	/* Extract hardware address */
    751 	memcpy ( netdev->hw_addr, &ibdev->gid.u.half[1],
    752 		 sizeof ( ibdev->gid.u.half[1] ) );
    753 
    754 	/* Set default broadcast address */
    755 	memcpy ( &ipoib->broadcast, &ipoib_broadcast,
    756 		 sizeof ( ipoib->broadcast ) );
    757 	netdev->ll_broadcast = ( ( uint8_t * ) &ipoib->broadcast );
    758 
    759 	/* Register network device */
    760 	if ( ( rc = register_netdev ( netdev ) ) != 0 )
    761 		goto err_register_netdev;
    762 
    763 	return 0;
    764 
    765  err_register_netdev:
    766 	netdev_nullify ( netdev );
    767 	netdev_put ( netdev );
    768 	return rc;
    769 }
    770 
    771 /**
    772  * Remove IPoIB device
    773  *
    774  * @v ibdev		Infiniband device
    775  */
    776 void ipoib_remove ( struct ib_device *ibdev ) {
    777 	struct net_device *netdev = ib_get_ownerdata ( ibdev );
    778 
    779 	unregister_netdev ( netdev );
    780 	netdev_nullify ( netdev );
    781 	netdev_put ( netdev );
    782 }
    783