Home | History | Annotate | Download | only in net
      1 /************************************************* -*- linux-c -*-
      2  * Myricom 10Gb Network Interface Card Software
      3  * Copyright 2009, Myricom, Inc.
      4  *
      5  * This program is free software; you can redistribute it and/or
      6  * modify it under the terms of the GNU General Public License,
      7  * version 2, as published by the Free Software Foundation.
      8  *
      9  * This program is distributed in the hope that it will be useful,
     10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12  * GNU General Public License for more details.
     13  *
     14  * You should have received a copy of the GNU General Public License
     15  * along with this program; if not, write to the Free Software
     16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     17  ****************************************************************/
     18 
     19 FILE_LICENCE ( GPL2_ONLY );
     20 
     21 /*
     22  * Author: Glenn Brown <glenn (at) myri.com>
     23  */
     24 
     25 /*
     26  * General Theory of Operation
     27  *
     28  * This is a minimal Myricom 10 gigabit Ethernet driver for network
     29  * boot.
     30  *
     31  * Initialization
     32  *
     33  * myri10ge_pci_probe() is called by gPXE during initialization.
     34  * Minimal NIC initialization is performed to minimize resources
     35  * consumed when the driver is resident but unused.
     36  *
     37  * Network Boot
     38  *
     39  * myri10ge_net_open() is called by gPXE before attempting to network
     40  * boot from the card.  Packet buffers are allocated and the NIC
     41  * interface is initialized.
     42  *
     43  * Transmit
     44  *
     45  * myri10ge_net_transmit() enqueues frames for transmission by writing
     46  * discriptors to the NIC's tx ring.  For simplicity and to avoid
     47  * copies, we always have the NIC DMA up the packet.  The sent I/O
     48  * buffer is released once the NIC signals myri10ge_interrupt_handler()
     49  * that the send has completed.
     50  *
     51  * Receive
     52  *
     53  * Receives are posted to the NIC's receive ring.  The NIC fills a
     54  * DMAable receive_completion ring with completion notifications.
     55  * myri10ge_net_poll() polls for these receive notifications, posts
     56  * replacement receive buffers to the NIC, and passes received frames
     57  * to netdev_rx().
     58  */
     59 
     60 /*
     61  * Debugging levels:
     62  *	- DBG() is for any errors, i.e. failed alloc_iob(), malloc_dma(),
     63  *	  TX overflow, corrupted packets, ...
     64  *	- DBG2() is for successful events, like packet received,
     65  *	  packet transmitted, and other general notifications.
     66  *	- DBGP() prints the name of each called function on entry
     67  */
     68 
     69 #include <stdint.h>
     70 
     71 #include <byteswap.h>
     72 #include <errno.h>
     73 #include <gpxe/ethernet.h>
     74 #include <gpxe/if_ether.h>
     75 #include <gpxe/iobuf.h>
     76 #include <gpxe/malloc.h>
     77 #include <gpxe/netdevice.h>
     78 #include <gpxe/pci.h>
     79 #include <gpxe/timer.h>
     80 
     81 #include "myri10ge_mcp.h"
     82 
     83 /****************************************************************
     84  * Forward declarations
     85  ****************************************************************/
     86 
     87 /* PCI driver entry points */
     88 
     89 static int	myri10ge_pci_probe ( struct pci_device*,
     90 				     const struct pci_device_id* );
     91 static void	myri10ge_pci_remove ( struct pci_device* );
     92 
     93 /* Network device operations */
     94 
     95 static void	myri10ge_net_close ( struct net_device* );
     96 static void	myri10ge_net_irq ( struct net_device*, int enable );
     97 static int	myri10ge_net_open ( struct net_device* );
     98 static void	myri10ge_net_poll ( struct net_device* );
     99 static int	myri10ge_net_transmit ( struct net_device*, struct io_buffer* );
    100 
    101 /****************************************************************
    102  * Constants
    103  ****************************************************************/
    104 
    105 /* Maximum ring indices, used to wrap ring indices.  These must be 2**N-1. */
    106 
    107 #define MYRI10GE_TRANSMIT_WRAP                  1U
    108 #define MYRI10GE_RECEIVE_WRAP                   7U
    109 #define MYRI10GE_RECEIVE_COMPLETION_WRAP        31U
    110 
    111 /****************************************************************
    112  * Driver internal data types.
    113  ****************************************************************/
    114 
    115 /* Structure holding all DMA buffers for a NIC, which we will
    116    allocated as contiguous read/write DMAable memory when the NIC is
    117    initialized. */
    118 
    119 struct myri10ge_dma_buffers
    120 {
    121 	/* The NIC DMAs receive completion notifications into this ring */
    122 
    123 	mcp_slot_t receive_completion[1+MYRI10GE_RECEIVE_COMPLETION_WRAP];
    124 
    125 	/* Interrupt details are DMAd here before interrupting. */
    126 
    127 	mcp_irq_data_t irq_data; /* 64B */
    128 
    129 	/* NIC command completion status is DMAd here. */
    130 
    131 	mcp_cmd_response_t command_response; /* 8B */
    132 };
    133 
    134 struct myri10ge_private
    135 {
    136 	/* Interrupt support */
    137 
    138 	uint32	*irq_claim;	/* in NIC SRAM */
    139 	uint32	*irq_deassert;	/* in NIC SRAM */
    140 
    141 	/* DMA buffers. */
    142 
    143 	struct myri10ge_dma_buffers	*dma;
    144 
    145 	/*
    146 	 * Transmit state.
    147 	 *
    148 	 * The counts here are uint32 for easy comparison with
    149 	 * priv->dma->irq_data.send_done_count and with each other.
    150 	 */
    151 
    152 	mcp_kreq_ether_send_t	*transmit_ring;	/* in NIC SRAM */
    153 	uint32                   transmit_ring_wrap;
    154 	uint32                   transmits_posted;
    155 	uint32                   transmits_done;
    156 	struct io_buffer	*transmit_iob[1 + MYRI10GE_TRANSMIT_WRAP];
    157 
    158 	/*
    159 	 * Receive state.
    160 	 */
    161 
    162 	mcp_kreq_ether_recv_t	*receive_post_ring;	/* in NIC SRAM */
    163 	unsigned int             receive_post_ring_wrap;
    164 	unsigned int             receives_posted;
    165 	unsigned int             receives_done;
    166 	struct io_buffer	*receive_iob[1 + MYRI10GE_RECEIVE_WRAP];
    167 
    168 	/* Address for writing commands to the firmware.
    169 	   BEWARE: the value must be written 32 bits at a time. */
    170 
    171 	mcp_cmd_t	*command;
    172 };
    173 
    174 /****************************************************************
    175  * Driver internal functions.
    176  ****************************************************************/
    177 
    178 /* Print ring status when debugging.  Use this only after a printed
    179    value changes. */
    180 
    181 #define DBG2_RINGS( priv ) 						\
    182 	DBG2 ( "tx %x/%x rx %x/%x in %s() \n",				\
    183 	       ( priv ) ->transmits_done, ( priv ) -> transmits_posted,	\
    184 	       ( priv ) ->receives_done, ( priv ) -> receives_posted,	\
    185 	       __FUNCTION__ )
    186 
    187 /*
    188  * Return a pointer to the driver private data for a network device.
    189  *
    190  * @v netdev	Network device created by this driver.
    191  * @ret priv	The corresponding driver private data.
    192  */
    193 static inline struct myri10ge_private *myri10ge_priv ( struct net_device *nd )
    194 {
    195 	/* Our private data always follows the network device in memory,
    196 	   since we use alloc_netdev() to allocate the storage. */
    197 
    198 	return ( struct myri10ge_private * ) ( nd + 1 );
    199 }
    200 
    201 /*
    202  * Pass a receive buffer to the NIC to be filled.
    203  *
    204  * @v priv	The network device to receive the buffer.
    205  * @v iob	The I/O buffer to fill.
    206  *
    207  * Receive buffers are filled in FIFO order.
    208  */
    209 static void myri10ge_post_receive ( struct myri10ge_private *priv,
    210 				    struct io_buffer *iob )
    211 {
    212 	unsigned int		 receives_posted;
    213 	mcp_kreq_ether_recv_t	*request;
    214 
    215 	/* Record the posted I/O buffer, to be passed to netdev_rx() on
    216 	   receive. */
    217 
    218 	receives_posted = priv->receives_posted;
    219 	priv->receive_iob[receives_posted & MYRI10GE_RECEIVE_WRAP] = iob;
    220 
    221 	/* Post the receive. */
    222 
    223 	request = &priv->receive_post_ring[receives_posted
    224 					   & priv->receive_post_ring_wrap];
    225 	request->addr_high = 0;
    226 	wmb();
    227 	request->addr_low = htonl ( virt_to_bus ( iob->data ) );
    228 	priv->receives_posted = ++receives_posted;
    229 }
    230 
    231 /*
    232  * Execute a command on the NIC.
    233  *
    234  * @v priv	NIC to perform the command.
    235  * @v cmd	The command to perform.
    236  * @v data	I/O copy buffer for parameters/results
    237  * @ret rc	0 on success, else an error code.
    238  */
    239 static int myri10ge_command ( struct myri10ge_private *priv,
    240 			      uint32 cmd,
    241 			      uint32 data[3] )
    242 {
    243 	int				 i;
    244 	mcp_cmd_t			*command;
    245 	uint32				 result;
    246 	unsigned int			 slept_ms;
    247 	volatile mcp_cmd_response_t	*response;
    248 
    249 	DBGP ( "myri10ge_command ( ,%d, ) \n", cmd );
    250 	command = priv->command;
    251 	response = &priv->dma->command_response;
    252 
    253 	/* Mark the command as incomplete. */
    254 
    255 	response->result = 0xFFFFFFFF;
    256 
    257 	/* Pass the command to the NIC. */
    258 
    259 	command->cmd		    = htonl ( cmd );
    260 	command->data0		    = htonl ( data[0] );
    261 	command->data1		    = htonl ( data[1] );
    262 	command->data2		    = htonl ( data[2] );
    263 	command->response_addr.high = 0;
    264 	command->response_addr.low
    265 		= htonl ( virt_to_bus ( &priv->dma->command_response ) );
    266 	for ( i=0; i<36; i+=4 )
    267 		* ( uint32 * ) &command->pad[i] = 0;
    268 	wmb();
    269 	* ( uint32 * ) &command->pad[36] = 0;
    270 
    271 	/* Wait up to 2 seconds for a response. */
    272 
    273 	for ( slept_ms=0; slept_ms<2000; slept_ms++ ) {
    274 		result = response->result;
    275 		if ( result == 0 ) {
    276 			data[0] = ntohl ( response->data );
    277 			return 0;
    278 		} else if ( result != 0xFFFFFFFF ) {
    279 			DBG ( "cmd%d:0x%x\n",
    280 			      cmd,
    281 			      ntohl ( response->result ) );
    282 			return -EIO;
    283 		}
    284 		udelay ( 1000 );
    285 		rmb();
    286 	}
    287 	DBG ( "cmd%d:timed out\n", cmd );
    288 	return -ETIMEDOUT;
    289 }
    290 
    291 /*
    292  * Handle any pending interrupt.
    293  *
    294  * @v netdev		Device being polled for interrupts.
    295  *
    296  * This is called periodically to let the driver check for interrupts.
    297  */
    298 static void myri10ge_interrupt_handler ( struct net_device *netdev )
    299 {
    300 	struct myri10ge_private *priv;
    301 	mcp_irq_data_t		*irq_data;
    302 	uint8			 valid;
    303 
    304 	priv = myri10ge_priv ( netdev );
    305 	irq_data = &priv->dma->irq_data;
    306 
    307 	/* Return if there was no interrupt. */
    308 
    309 	rmb();
    310 	valid = irq_data->valid;
    311 	if ( !valid )
    312 		return;
    313 	DBG2 ( "irq " );
    314 
    315 	/* Tell the NIC to deassert the interrupt and clear
    316 	   irq_data->valid.*/
    317 
    318 	*priv->irq_deassert = 0;	/* any value is OK. */
    319 	mb();
    320 
    321 	/* Handle any new receives. */
    322 
    323 	if ( valid & 1 ) {
    324 
    325 		/* Pass the receive interrupt token back to the NIC. */
    326 
    327 		DBG2 ( "rx " );
    328 		*priv->irq_claim = htonl ( 3 );
    329 		wmb();
    330 	}
    331 
    332 	/* Handle any sent packet by freeing its I/O buffer, now that
    333 	   we know it has been DMAd. */
    334 
    335 	if ( valid & 2 ) {
    336 		unsigned int nic_done_count;
    337 
    338 		DBG2 ( "snt " );
    339 		nic_done_count = ntohl ( priv->dma->irq_data.send_done_count );
    340 		while ( priv->transmits_done != nic_done_count ) {
    341 			struct io_buffer *iob;
    342 
    343 			iob = priv->transmit_iob [priv->transmits_done
    344 						  & MYRI10GE_TRANSMIT_WRAP];
    345 			DBG2 ( "%p ", iob );
    346 			netdev_tx_complete ( netdev, iob );
    347 			++priv->transmits_done;
    348 		}
    349 	}
    350 
    351 	/* Record any statistics update. */
    352 
    353 	if ( irq_data->stats_updated ) {
    354 
    355 		/* Update the link status. */
    356 
    357 		DBG2 ( "stats " );
    358 		if ( ntohl ( irq_data->link_up ) == MXGEFW_LINK_UP )
    359 			netdev_link_up ( netdev );
    360 		else
    361 			netdev_link_down ( netdev );
    362 
    363 		/* Ignore all error counters from the NIC. */
    364 	}
    365 
    366 	/* Wait for the interrupt to be deasserted, as indicated by
    367 	   irq_data->valid, which is set by the NIC after the deassert. */
    368 
    369 	DBG2 ( "wait " );
    370 	do {
    371 		mb();
    372 	} while ( irq_data->valid );
    373 
    374 	/* Claim the interrupt to enable future interrupt generation. */
    375 
    376 	DBG2 ( "claim\n" );
    377 	* ( priv->irq_claim + 1 ) = htonl ( 3 );
    378 	mb();
    379 }
    380 
    381 /* Constants for reading the STRING_SPECS via the Myricom
    382    Vendor Specific PCI configuration space capability. */
    383 
    384 #define VS_ADDR ( vs + 0x18 )
    385 #define VS_DATA ( vs + 0x14 )
    386 #define VS_MODE ( vs + 0x10 )
    387 #define 	VS_MODE_READ32 0x3
    388 #define 	VS_MODE_LOCATE 0x8
    389 #define 		VS_LOCATE_STRING_SPECS 0x3
    390 
    391 /*
    392  * Read MAC address from its 'string specs' via the vendor-specific
    393  * capability.  (This capability allows NIC SRAM and ROM to be read
    394  * before it is mapped.)
    395  *
    396  * @v pci		The device.
    397  * @v mac		Buffer to store the MAC address.
    398  * @ret rc		Returns 0 on success, else an error code.
    399  */
    400 static int mac_address_from_string_specs ( struct pci_device *pci,
    401 						   uint8 mac[ETH_ALEN] )
    402 {
    403 	char string_specs[256];
    404 	char *ptr, *limit;
    405 	char *to = string_specs;
    406 	uint32 addr;
    407 	uint32 len;
    408 	unsigned int vs;
    409 	int mac_set = 0;
    410 
    411 	/* Find the "vendor specific" capability. */
    412 
    413 	vs = pci_find_capability ( pci, 9 );
    414 	if ( vs == 0 ) {
    415 		DBG ( "no VS\n" );
    416 		return -ENOTSUP;
    417 	}
    418 
    419 	/* Locate the String specs in LANai SRAM. */
    420 
    421 	pci_write_config_byte ( pci, VS_MODE, VS_MODE_LOCATE );
    422 	pci_write_config_dword ( pci, VS_ADDR, VS_LOCATE_STRING_SPECS );
    423 	pci_read_config_dword ( pci, VS_ADDR, &addr );
    424 	pci_read_config_dword ( pci, VS_DATA, &len );
    425 	DBG2 ( "ss@%x,%x\n", addr, len );
    426 
    427 	/* Copy in the string specs.  Use 32-bit reads for performance. */
    428 
    429 	if ( len > sizeof ( string_specs ) || ( len & 3 ) ) {
    430 		DBG ( "SS too big\n" );
    431 		return -ENOTSUP;
    432 	}
    433 
    434 	pci_write_config_byte ( pci, VS_MODE, VS_MODE_READ32 );
    435 	while ( len >= 4 ) {
    436 		uint32 tmp;
    437 
    438 		pci_write_config_byte ( pci, VS_ADDR, addr );
    439 		pci_read_config_dword ( pci, VS_DATA, &tmp );
    440 		tmp = ntohl ( tmp );
    441 		memcpy ( to, &tmp, 4 );
    442 		to += 4;
    443 		addr += 4;
    444 		len -= 4;
    445 	}
    446 	pci_write_config_byte ( pci, VS_MODE, 0 );
    447 
    448 	/* Parse the string specs. */
    449 
    450 	DBG2 ( "STRING_SPECS:\n" );
    451 	ptr = string_specs;
    452 	limit = string_specs + sizeof ( string_specs );
    453 	while ( *ptr != '\0' && ptr < limit ) {
    454 		DBG2 ( "%s\n", ptr );
    455 		if ( memcmp ( ptr, "MAC=", 4 ) == 0 ) {
    456 			unsigned int i;
    457 
    458 			ptr += 4;
    459 			for ( i=0; i<6; i++ ) {
    460 				if ( ( ptr + 2 ) > limit ) {
    461 					DBG ( "bad MAC addr\n" );
    462 					return -ENOTSUP;
    463 				}
    464 				mac[i] = strtoul ( ptr, &ptr, 16 );
    465 				ptr += 1;
    466 			}
    467 			mac_set = 1;
    468 		}
    469 		else
    470 			while ( ptr < limit && *ptr++ );
    471 	}
    472 
    473 	/* Verify we parsed all we need. */
    474 
    475 	if ( !mac_set ) {
    476 		DBG ( "no MAC addr\n" );
    477 		return -ENOTSUP;
    478 	}
    479 
    480 	DBG2 ( "MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
    481 	       mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] );
    482 
    483 	return 0;
    484 }
    485 
    486 /****************************************************************
    487  * gPXE PCI Device Driver API functions
    488  ****************************************************************/
    489 
    490 /*
    491  * Initialize the PCI device.
    492  *
    493  * @v pci 		The device's associated pci_device structure.
    494  * @v id  		The PCI device + vendor id.
    495  * @ret rc		Returns zero if successfully initialized.
    496  *
    497  * This function is called very early on, while gPXE is initializing.
    498  * This is a gPXE PCI Device Driver API function.
    499  */
    500 static int myri10ge_pci_probe ( struct pci_device *pci,
    501 				const struct pci_device_id *id __unused )
    502 {
    503 	static struct net_device_operations myri10ge_operations = {
    504 		.open     = myri10ge_net_open,
    505 		.close    = myri10ge_net_close,
    506 		.transmit = myri10ge_net_transmit,
    507 		.poll     = myri10ge_net_poll,
    508 		.irq      = myri10ge_net_irq
    509 	};
    510 
    511 	const char *dbg;
    512 	int rc;
    513 	struct net_device *netdev;
    514 	struct myri10ge_private *priv;
    515 
    516 	DBGP ( "myri10ge_pci_probe: " );
    517 
    518 	netdev = alloc_etherdev ( sizeof ( *priv ) );
    519 	if ( !netdev ) {
    520 		rc = -ENOMEM;
    521 		dbg = "alloc_etherdev";
    522 		goto abort_with_nothing;
    523 	}
    524 
    525 	netdev_init ( netdev, &myri10ge_operations );
    526 	priv = myri10ge_priv ( netdev );
    527 
    528 	pci_set_drvdata ( pci, netdev );
    529 	netdev->dev = &pci->dev;
    530 
    531 	/* Make sure interrupts are disabled. */
    532 
    533 	myri10ge_net_irq ( netdev, 0 );
    534 
    535 	/* Read the NIC HW address. */
    536 
    537 	rc = mac_address_from_string_specs ( pci, netdev->hw_addr );
    538 	if ( rc ) {
    539 		dbg = "mac_from_ss";
    540 		goto abort_with_netdev_init;
    541 	}
    542 	DBGP ( "mac " );
    543 
    544 	/* Enable bus master, etc. */
    545 
    546 	adjust_pci_device ( pci );
    547 	DBGP ( "pci " );
    548 
    549 	/* Register the initialized network device. */
    550 
    551 	rc = register_netdev ( netdev );
    552 	if ( rc ) {
    553 		dbg = "register_netdev";
    554 		goto abort_with_netdev_init;
    555 	}
    556 
    557 	DBGP ( "done\n" );
    558 
    559 	return 0;
    560 
    561 abort_with_netdev_init:
    562 	netdev_nullify ( netdev );
    563 	netdev_put ( netdev );
    564 abort_with_nothing:
    565 	DBG ( "%s:%s\n", dbg, strerror ( rc ) );
    566 	return rc;
    567 }
    568 
    569 /*
    570  * Remove a device from the PCI device list.
    571  *
    572  * @v pci		PCI device to remove.
    573  *
    574  * This is a PCI Device Driver API function.
    575  */
    576 static void myri10ge_pci_remove ( struct pci_device *pci )
    577 {
    578 	struct net_device	*netdev;
    579 
    580 	DBGP ( "myri10ge_pci_remove\n" );
    581 	netdev = pci_get_drvdata ( pci );
    582 
    583 	unregister_netdev ( netdev );
    584 	netdev_nullify ( netdev );
    585 	netdev_put ( netdev );
    586 }
    587 
    588 /****************************************************************
    589  * gPXE Network Device Driver Operations
    590  ****************************************************************/
    591 
    592 /*
    593  * Close a network device.
    594  *
    595  * @v netdev		Device to close.
    596  *
    597  * This is a gPXE Network Device Driver API function.
    598  */
    599 static void myri10ge_net_close ( struct net_device *netdev )
    600 {
    601 	struct myri10ge_private *priv;
    602 	uint32			 data[3];
    603 
    604 	DBGP ( "myri10ge_net_close\n" );
    605 	priv = myri10ge_priv ( netdev );
    606 
    607 	/* disable interrupts */
    608 
    609 	myri10ge_net_irq ( netdev, 0 );
    610 
    611 	/* Reset the NIC interface, so we won't get any more events from
    612 	   the NIC. */
    613 
    614 	myri10ge_command ( priv, MXGEFW_CMD_RESET, data );
    615 
    616 	/* Free receive buffers that were never filled. */
    617 
    618 	while ( priv->receives_done != priv->receives_posted ) {
    619 		free_iob ( priv->receive_iob[priv->receives_done
    620 					     & MYRI10GE_RECEIVE_WRAP] );
    621 		++priv->receives_done;
    622 	}
    623 
    624 	/* Release DMAable memory. */
    625 
    626 	free_dma ( priv->dma, sizeof ( *priv->dma ) );
    627 
    628 	/* Erase all state from the open. */
    629 
    630 	memset ( priv, 0, sizeof ( *priv ) );
    631 
    632 	DBG2_RINGS ( priv );
    633 }
    634 
    635 /*
    636  * Enable or disable IRQ masking.
    637  *
    638  * @v netdev		Device to control.
    639  * @v enable		Zero to mask off IRQ, non-zero to enable IRQ.
    640  *
    641  * This is a gPXE Network Driver API function.
    642  */
    643 static void myri10ge_net_irq ( struct net_device *netdev, int enable )
    644 {
    645 	struct pci_device	*pci_dev;
    646 	uint16			 val;
    647 
    648 	DBGP ( "myri10ge_net_irq\n" );
    649 	pci_dev = ( struct pci_device * ) netdev->dev;
    650 
    651 	/* Adjust the Interrupt Disable bit in the Command register of the
    652 	   PCI Device. */
    653 
    654 	pci_read_config_word ( pci_dev, PCI_COMMAND, &val );
    655 	if ( enable )
    656 		val &= ~PCI_COMMAND_INTX_DISABLE;
    657 	else
    658 		val |= PCI_COMMAND_INTX_DISABLE;
    659 	pci_write_config_word ( pci_dev, PCI_COMMAND, val );
    660 }
    661 
    662 /*
    663  * Opens a network device.
    664  *
    665  * @v netdev		Device to be opened.
    666  * @ret rc  		Non-zero if failed to open.
    667  *
    668  * This enables tx and rx on the device.
    669  * This is a gPXE Network Device Driver API function.
    670  */
    671 static int myri10ge_net_open ( struct net_device *netdev )
    672 {
    673 	const char		*dbg;	/* printed upon error return */
    674 	int			 rc;
    675 	struct io_buffer	*iob;
    676 	struct myri10ge_private *priv;
    677 	uint32			 data[3];
    678 	struct pci_device	*pci_dev;
    679 	void			*membase;
    680 
    681 	DBGP ( "myri10ge_net_open\n" );
    682 	priv	= myri10ge_priv ( netdev );
    683 	pci_dev = ( struct pci_device * ) netdev->dev;
    684 	membase = phys_to_virt ( pci_dev->membase );
    685 
    686 	/* Compute address for passing commands to the firmware. */
    687 
    688 	priv->command = membase + MXGEFW_ETH_CMD;
    689 
    690 	/* Ensure interrupts are disabled. */
    691 
    692 	myri10ge_net_irq ( netdev, 0 );
    693 
    694 	/* Allocate cleared DMAable buffers. */
    695 
    696 	priv->dma = malloc_dma ( sizeof ( *priv->dma ) , 128 );
    697 	if ( !priv->dma ) {
    698 		rc = -ENOMEM;
    699 		dbg = "DMA";
    700 		goto abort_with_nothing;
    701 	}
    702 	memset ( priv->dma, 0, sizeof ( *priv->dma ) );
    703 
    704 	/* Simplify following code. */
    705 
    706 #define TRY( prefix, base, suffix ) do {		\
    707 		rc = myri10ge_command ( priv,		\
    708 					MXGEFW_		\
    709 					## prefix	\
    710 					## base		\
    711 					## suffix,	\
    712 					data );		\
    713 		if ( rc ) {				\
    714 			dbg = #base;			\
    715 			goto abort_with_dma;		\
    716 		}					\
    717 	} while ( 0 )
    718 
    719 	/* Send a reset command to the card to see if it is alive,
    720 	   and to reset its queue state. */
    721 
    722 	TRY ( CMD_, RESET , );
    723 
    724 	/* Set the interrupt queue size. */
    725 
    726 	data[0] = ( sizeof ( priv->dma->receive_completion )
    727 		    | MXGEFW_CMD_SET_INTRQ_SIZE_FLAG_NO_STRICT_SIZE_CHECK );
    728 	TRY ( CMD_SET_ , INTRQ_SIZE , );
    729 
    730 	/* Set the interrupt queue DMA address. */
    731 
    732 	data[0] = virt_to_bus ( &priv->dma->receive_completion );
    733 	data[1] = 0;
    734 	TRY ( CMD_SET_, INTRQ_DMA, );
    735 
    736 	/* Get the NIC interrupt claim address. */
    737 
    738 	TRY ( CMD_GET_, IRQ_ACK, _OFFSET );
    739 	priv->irq_claim = membase + data[0];
    740 
    741 	/* Get the NIC interrupt assert address. */
    742 
    743 	TRY ( CMD_GET_, IRQ_DEASSERT, _OFFSET );
    744 	priv->irq_deassert = membase + data[0];
    745 
    746 	/* Disable interrupt coalescing, which is inappropriate for the
    747 	   minimal buffering we provide. */
    748 
    749 	TRY ( CMD_GET_, INTR_COAL, _DELAY_OFFSET );
    750 	* ( ( uint32 * ) ( membase + data[0] ) ) = 0;
    751 
    752 	/* Set the NIC mac address. */
    753 
    754 	data[0] = ( netdev->ll_addr[0] << 24
    755 		    | netdev->ll_addr[1] << 16
    756 		    | netdev->ll_addr[2] << 8
    757 		    | netdev->ll_addr[3] );
    758 	data[1] = ( ( netdev->ll_addr[4] << 8 )
    759 		     | netdev->ll_addr[5] );
    760 	TRY ( SET_ , MAC_ADDRESS , );
    761 
    762 	/* Enable multicast receives, because some gPXE clients don't work
    763 	   without multicast. . */
    764 
    765 	TRY ( ENABLE_ , ALLMULTI , );
    766 
    767 	/* Disable Ethernet flow control, so the NIC cannot deadlock the
    768 	   network under any circumstances. */
    769 
    770 	TRY ( DISABLE_ , FLOW , _CONTROL );
    771 
    772 	/* Compute transmit ring sizes. */
    773 
    774 	data[0] = 0;		/* slice 0 */
    775 	TRY ( CMD_GET_, SEND_RING, _SIZE );
    776 	priv->transmit_ring_wrap
    777 		= data[0] / sizeof ( mcp_kreq_ether_send_t ) - 1;
    778 	if ( priv->transmit_ring_wrap
    779 	     & ( priv->transmit_ring_wrap + 1 ) ) {
    780 		rc = -EPROTO;
    781 		dbg = "TX_RING";
    782 		goto abort_with_dma;
    783 	}
    784 
    785 	/* Compute receive ring sizes. */
    786 
    787 	data[0] = 0;		/* slice 0 */
    788 	TRY ( CMD_GET_ , RX_RING , _SIZE );
    789 	priv->receive_post_ring_wrap = data[0] / sizeof ( mcp_dma_addr_t ) - 1;
    790 	if ( priv->receive_post_ring_wrap
    791 	     & ( priv->receive_post_ring_wrap + 1 ) ) {
    792 		rc = -EPROTO;
    793 		dbg = "RX_RING";
    794 		goto abort_with_dma;
    795 	}
    796 
    797 	/* Get NIC transmit ring address. */
    798 
    799 	data[0] = 0;		/* slice 0. */
    800 	TRY ( CMD_GET_, SEND, _OFFSET );
    801 	priv->transmit_ring = membase + data[0];
    802 
    803 	/* Get the NIC receive ring address. */
    804 
    805 	data[0] = 0;		/* slice 0. */
    806 	TRY ( CMD_GET_, SMALL_RX, _OFFSET );
    807 	priv->receive_post_ring = membase + data[0];
    808 
    809 	/* Set the Nic MTU. */
    810 
    811 	data[0] = ETH_FRAME_LEN;
    812 	TRY ( CMD_SET_, MTU, );
    813 
    814 	/* Tell the NIC our buffer sizes. ( We use only small buffers, so we
    815 	   set both buffer sizes to the same value, which will force all
    816 	   received frames to use small buffers. ) */
    817 
    818 	data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
    819 	TRY ( CMD_SET_, SMALL_BUFFER, _SIZE );
    820 	data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
    821 	TRY ( CMD_SET_, BIG_BUFFER, _SIZE );
    822 
    823         /* Tell firmware where to DMA IRQ data */
    824 
    825 	data[0] = virt_to_bus ( &priv->dma->irq_data );
    826 	data[1] = 0;
    827 	data[2] = sizeof ( priv->dma->irq_data );
    828 	TRY ( CMD_SET_, STATS_DMA_V2, );
    829 
    830 	/* Post receives. */
    831 
    832 	while ( priv->receives_posted <= MYRI10GE_RECEIVE_WRAP ) {
    833 
    834 		/* Reserve 2 extra bytes at the start of packets, since
    835 		   the firmware always skips the first 2 bytes of the buffer
    836 		   so TCP headers will be aligned. */
    837 
    838 		iob = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
    839 		if ( !iob ) {
    840 			rc = -ENOMEM;
    841 			dbg = "alloc_iob";
    842 			goto abort_with_receives_posted;
    843 		}
    844 		iob_reserve ( iob, MXGEFW_PAD );
    845 		myri10ge_post_receive ( priv, iob );
    846 	}
    847 
    848 	/* Bring up the link. */
    849 
    850 	TRY ( CMD_, ETHERNET_UP, );
    851 
    852 	DBG2_RINGS ( priv );
    853 	return 0;
    854 
    855 abort_with_receives_posted:
    856 	while ( priv->receives_posted-- )
    857 		free_iob ( priv->receive_iob[priv->receives_posted] );
    858 abort_with_dma:
    859 	/* Because the link is not up, we don't have to reset the NIC here. */
    860 	free_dma ( priv->dma, sizeof ( *priv->dma ) );
    861 abort_with_nothing:
    862 	/* Erase all signs of the failed open. */
    863 	memset ( priv, 0, sizeof ( *priv ) );
    864 	DBG ( "%s: %s\n", dbg, strerror ( rc ) );
    865 	return ( rc );
    866 }
    867 
    868 /*
    869  * This function allows a driver to process events during operation.
    870  *
    871  * @v netdev		Device being polled.
    872  *
    873  * This is called periodically by gPXE to let the driver check the status of
    874  * transmitted packets and to allow the driver to check for received packets.
    875  * This is a gPXE Network Device Driver API function.
    876  */
    877 static void myri10ge_net_poll ( struct net_device *netdev )
    878 {
    879 	struct io_buffer		*iob;
    880 	struct io_buffer		*replacement;
    881 	struct myri10ge_dma_buffers	*dma;
    882 	struct myri10ge_private		*priv;
    883 	unsigned int			 length;
    884 	unsigned int			 orig_receives_posted;
    885 
    886 	DBGP ( "myri10ge_net_poll\n" );
    887 	priv = myri10ge_priv ( netdev );
    888 	dma  = priv->dma;
    889 
    890 	/* Process any pending interrupt. */
    891 
    892 	myri10ge_interrupt_handler ( netdev );
    893 
    894 	/* Pass up received frames, but limit ourselves to receives posted
    895 	   before this function was called, so we cannot livelock if
    896 	   receives are arriving faster than we process them. */
    897 
    898 	orig_receives_posted = priv->receives_posted;
    899 	while ( priv->receives_done != orig_receives_posted ) {
    900 
    901 		/* Stop if there is no pending receive. */
    902 
    903 		length = ntohs ( dma->receive_completion
    904 				 [priv->receives_done
    905 				  & MYRI10GE_RECEIVE_COMPLETION_WRAP]
    906 				 .length );
    907 		if ( length == 0 )
    908 			break;
    909 
    910 		/* Allocate a replacement buffer.  If none is available,
    911 		   stop passing up packets until a buffer is available.
    912 
    913 		   Reserve 2 extra bytes at the start of packets, since
    914 		   the firmware always skips the first 2 bytes of the buffer
    915 		   so TCP headers will be aligned. */
    916 
    917 		replacement = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
    918 		if ( !replacement ) {
    919 			DBG ( "NO RX BUF\n" );
    920 			break;
    921 		}
    922 		iob_reserve ( replacement, MXGEFW_PAD );
    923 
    924 		/* Pass up the received frame. */
    925 
    926 		iob = priv->receive_iob[priv->receives_done
    927 					& MYRI10GE_RECEIVE_WRAP];
    928 		iob_put ( iob, length );
    929 		netdev_rx ( netdev, iob );
    930 
    931 		/* We have consumed the packet, so clear the receive
    932 		   notification. */
    933 
    934 		dma->receive_completion [priv->receives_done
    935 					 & MYRI10GE_RECEIVE_COMPLETION_WRAP]
    936 			.length = 0;
    937 		wmb();
    938 
    939 		/* Replace the passed-up I/O buffer. */
    940 
    941 		myri10ge_post_receive ( priv, replacement );
    942 		++priv->receives_done;
    943 		DBG2_RINGS ( priv );
    944 	}
    945 }
    946 
    947 /*
    948  * This transmits a packet.
    949  *
    950  * @v netdev		Device to transmit from.
    951  * @v iobuf 		Data to transmit.
    952  * @ret rc  		Non-zero if failed to transmit.
    953  *
    954  * This is a gPXE Network Driver API function.
    955  */
    956 static int myri10ge_net_transmit ( struct net_device *netdev,
    957 				   struct io_buffer *iobuf )
    958 {
    959 	mcp_kreq_ether_send_t	*kreq;
    960 	size_t			 len;
    961 	struct myri10ge_private *priv;
    962 	uint32			 transmits_posted;
    963 
    964 	DBGP ( "myri10ge_net_transmit\n" );
    965 	priv = myri10ge_priv ( netdev );
    966 
    967 	/* Confirm space in the send ring. */
    968 
    969 	transmits_posted = priv->transmits_posted;
    970 	if ( transmits_posted - priv->transmits_done
    971 	     > MYRI10GE_TRANSMIT_WRAP ) {
    972 		DBG ( "TX ring full\n" );
    973 		return -ENOBUFS;
    974 	}
    975 
    976 	DBG2 ( "TX %p+%d ", iobuf->data, iob_len ( iobuf ) );
    977 	DBG2_HD ( iobuf->data, 14 );
    978 
    979 	/* Record the packet being transmitted, so we can later report
    980 	   send completion. */
    981 
    982 	priv->transmit_iob[transmits_posted & MYRI10GE_TRANSMIT_WRAP] = iobuf;
    983 
    984 	/* Copy and pad undersized frames, because the NIC does not pad,
    985 	   and we would rather copy small frames than do a gather. */
    986 
    987 	len = iob_len ( iobuf );
    988 	if ( len < ETH_ZLEN ) {
    989 		iob_pad ( iobuf, ETH_ZLEN );
    990 		len = ETH_ZLEN;
    991 	}
    992 
    993 	/* Enqueue the packet by writing a descriptor to the NIC.
    994 	   This is a bit tricky because the HW requires 32-bit writes,
    995 	   but the structure has smaller fields. */
    996 
    997 	kreq = &priv->transmit_ring[transmits_posted
    998 				    & priv->transmit_ring_wrap];
    999 	kreq->addr_high = 0;
   1000 	kreq->addr_low = htonl ( virt_to_bus ( iobuf->data ) );
   1001 	( ( uint32 * ) kreq ) [2] = htonl (
   1002 		0x0000 << 16	 /* pseudo_header_offset */
   1003 		| ( len & 0xFFFF ) /* length */
   1004 		);
   1005 	wmb();
   1006 	( ( uint32 * ) kreq ) [3] = htonl (
   1007 		0x00 << 24	/* pad */
   1008 		| 0x01 << 16	/* rdma_count */
   1009 		| 0x00 << 8	/* cksum_offset */
   1010 		| ( MXGEFW_FLAGS_SMALL
   1011 		    | MXGEFW_FLAGS_FIRST
   1012 		    | MXGEFW_FLAGS_NO_TSO ) /* flags */
   1013 		);
   1014 	wmb();
   1015 
   1016 	/* Mark the slot as consumed and return. */
   1017 
   1018 	priv->transmits_posted = ++transmits_posted;
   1019 	DBG2_RINGS ( priv );
   1020 	return 0;
   1021 }
   1022 
   1023 static struct pci_device_id myri10ge_nics[] = {
   1024 	/* Each of these macros must be a single line to satisfy a script. */
   1025 	PCI_ROM ( 0x14c1, 0x0008, "myri10ge", "Myricom 10Gb Ethernet Adapter", 0 ) ,
   1026 };
   1027 
   1028 struct pci_driver myri10ge_driver __pci_driver = {
   1029 	.ids      = myri10ge_nics,
   1030 	.id_count = ( sizeof ( myri10ge_nics ) / sizeof ( myri10ge_nics[0] ) ) ,
   1031 	.probe    = myri10ge_pci_probe,
   1032 	.remove   = myri10ge_pci_remove
   1033 };
   1034 
   1035 /*
   1036  * Local variables:
   1037  *  c-basic-offset: 8
   1038  *  c-indent-level: 8
   1039  *  tab-width: 8
   1040  * End:
   1041  */
   1042