[ewg] Re: [PATCH 2/14] nes: device structures and defines

Jeff Garzik jeff at garzik.org
Tue Aug 7 18:13:30 PDT 2007


ggrundstrom at neteffect.com wrote:
> +#ifndef PCI_VENDOR_ID_NETEFFECT	/* not in pci.ids yet */
> +#define PCI_VENDOR_ID_NETEFFECT 0x1678

this should be part of your patch


> +#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100

no need for a #define at all, just use the hex number if the ONLY place 
its used is in the pci_device_id table.

Doing so avoids patch hell that is pci_ids.h, avoids adding a constant 
for a single-use hex number that's arbitrary anyway


> +#define BAR_0                	0
> +#define BAR_1                	2

delete


> +#define RX_BUF_SIZE         	(1536 + 8)

this number was blindly copied from another driver, right?


> +#ifdef NES_DEBUG
> +#define assert(expr)												\
> +if(!(expr)) {														\
> +	printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n",	\
> +		   #expr, __FILE__, __FUNCTION__, __LINE__);				\
> +}
> +#ifndef dprintk
> +#define dprintk(fmt, args...) do { printk(KERN_ERR PFX fmt, ##args); } while (0)
> +#endif

look around, we already have debug macros.  you're probably copying from 
an older net driver that doesn't yet use the new stuff


> +#define NES_EVENT_TIMEOUT	1200000
> +/* #define NES_EVENT_TIMEOUT	1200 */
> +#else
> +#define assert(expr)          do {} while (0)
> +#define dprintk(fmt, args...) do {} while (0)
> +
> +#define NES_EVENT_TIMEOUT	100000
> +#endif
> +
> +#include "nes_hw.h"
> +#include "nes_verbs.h"
> +#include "nes_context.h"
> +#include "nes_user.h"
> +#include "nes_cm.h"
> +
> +
> +extern unsigned int nes_drv_opt;
> +extern unsigned int nes_debug_level;
> +
> +extern struct list_head nes_adapter_list;
> +extern struct list_head nes_dev_list;
> +
> +extern int max_mtu;
> +#define max_frame_len (max_mtu+ETH_HLEN)
> +extern int interrupt_mod_interval;
> +
> +struct nes_device {
> +	struct nes_adapter *nesadapter;
> +	void __iomem *regs;
> +	void __iomem *index_reg;
> +	struct pci_dev *pcidev;
> +	struct net_device *netdev[NES_NIC_MAX_NICS];

this is questionable.  why do you need multiple netdevs?  multiple 
ports?  ok.  multiple queues?  not ok.  see recent netdev discussions.


> +	u64 link_status_interrupts;
> +	struct tasklet_struct dpc_tasklet;
> +	spinlock_t indexed_regs_lock;
> +	unsigned long doorbell_start;
> +	unsigned long csr_start;
> +	unsigned long mac_tx_errors;
> +	unsigned long mac_pause_frames_sent;
> +	unsigned long mac_pause_frames_received;
> +	unsigned long mac_rx_errors;
> +	unsigned long mac_rx_crc_errors;
> +	unsigned long mac_rx_symbol_err_frames;
> +	unsigned long mac_rx_jabber_frames;
> +	unsigned long mac_rx_oversized_frames;
> +	unsigned long mac_rx_short_frames;
> +	unsigned int mac_index;
> +	unsigned int nes_stack_start;
> +
> +	/* Control Structures */
> +	void *cqp_vbase;
> +	dma_addr_t cqp_pbase;
> +	u32 cqp_mem_size;
> +	u8 ceq_index;
> +	u8 nic_ceq_index;
> +	struct nes_hw_cqp cqp;
> +	struct nes_hw_cq ccq;
> +	struct list_head cqp_avail_reqs;
> +	struct list_head cqp_pending_reqs;
> +	struct nes_cqp_request *nes_cqp_requests;
> +
> +	u32 int_req;
> +	u32 int_stat;
> +	u32 timer_int_req;
> +	u32 timer_only_int_count;
> +	u32 intf_int_req;
> +	u32 et_rx_coalesce_usecs_irq;
> +	struct list_head list;
> +
> +	u16 base_doorbell_index;
> +	u8 msi_enabled;
> +	u8 netdev_count;
> +	u8 napi_isr_ran;
> +	u8 disable_rx_flow_control;
> +	u8 disable_tx_flow_control;

#1: please consider using tabs to separate type and name, which makes 
the struct definition far easier to read.

See drivers/net/tg3.h for an example

#2: consider putting all RX-related items together, and all TX-related 
items together.  this makes cacheline sharing more likely.


> +/* Linux kernel version interface changes */
> +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
> +static inline unsigned short nes_skb_lso_size(const struct sk_buff *skb)
> +{
> +	return(skb_shinfo(skb)->tso_size);
> +}
> +#define nes_skb_linearize(_skb, _type)  skb_linearize(_skb, _type)
> +#define NES_INIT_WORK(_work, _func, _data)  INIT_WORK(_work, _func)
> +#else
> +static inline unsigned short nes_skb_lso_size(const struct sk_buff *skb)
> +{
> +	return(skb_shinfo(skb)->gso_size);
> +}
> +#define nes_skb_linearize(_skb, _type)  skb_linearize(_skb)
> +#define NES_INIT_WORK(_work, _func, _data)  INIT_WORK(_work, _func)
> +#endif

delete all old-kernel compat code.  not appropriate for upstream submission



> +static inline u32 nes_read32(const void __iomem * addr)
> +{
> +	return(le32_to_cpu(readl(addr)));
> +}
> +
> +static inline u16 nes_read16(const void __iomem * addr)
> +{
> +	return(le16_to_cpu(readw(addr)));
> +}
> +
> +static inline u8 nes_read8(const void __iomem * addr)
> +{
> +	return(readb(addr));
> +}

#1:  delete these completely useless wrappers

#2:  these wrappers are WRONG anyway.  You don't need endian conversion 
macros.


> +/* Write to memory-mapped device */
> +static inline void nes_write_indexed(struct nes_device *nesdev, u32 reg_index, u32 val)
> +{
> +	unsigned long flags;
> +	void __iomem * addr = nesdev->index_reg;
> +
> +	spin_lock_irqsave(&nesdev->indexed_regs_lock, flags);
> +
> +	/* dprintk("Writing %08X, to indexed offset %08X using address %p and %p.\n",
> +			val, reg_index, addr, addr+4); */
> +	writel(cpu_to_le32(reg_index), addr);
> +	writel(cpu_to_le32(val),(u8 *)addr + 4);

wrong -- endian conversion macros not needed with writel()



> +	spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags);
> +}
> +
> +static inline void nes_write32(void __iomem * addr, u32 val)
> +{
> +	/* dprintk("Writing %08X, to address %p.\n", val, addr); */
> +	writel(cpu_to_le32(val), addr);
> +}
> +
> +static inline void nes_write16(void __iomem * addr, u16 val)
> +{
> +	writew(cpu_to_le16(val), addr);
> +}
> +
> +static inline void nes_write8(void __iomem * addr, u8 val)
> +{
> +	writeb(val, addr);
> +}

#1: delete wrappers

#2: the wrappers are buggy anyway


> +static inline struct nes_vnic *to_nesvnic(struct ib_device *ibdev) {
> +	return (container_of(ibdev, struct nes_ib_device, ibdev)->nesvnic);
> +}
> +
> +static inline struct nes_pd *to_nespd(struct ib_pd *ibpd) {
> +	return (container_of(ibpd, struct nes_pd, ibpd));
> +}
> +
> +static inline struct nes_ucontext *to_nesucontext(struct ib_ucontext *ibucontext) {
> +	return (container_of(ibucontext, struct nes_ucontext, ibucontext));
> +}
> +
> +static inline struct nes_mr *to_nesmr(struct ib_mr *ibmr) {
> +	return (container_of(ibmr, struct nes_mr, ibmr));
> +}
> +
> +static inline struct nes_mr *to_nesmw(struct ib_mw *ibmw) {
> +	return (container_of(ibmw, struct nes_mr, ibmw));
> +}
> +
> +static inline struct nes_mr *to_nesfmr(struct ib_fmr *ibfmr) {
> +	return (container_of(ibfmr, struct nes_mr, ibfmr));
> +}
> +
> +static inline struct nes_cq *to_nescq(struct ib_cq *ibcq) {
> +	return (container_of(ibcq, struct nes_cq, ibcq));
> +}
> +
> +static inline struct nes_qp *to_nesqp(struct ib_qp *ibqp) {
> +	return (container_of(ibqp, struct nes_qp, ibqp));
> +}

all functions have their starting brace in column 1, not at EOL


> +/* Utils */
> +#define CRC32C_POLY     0x1EDC6F41

use libcrc32c rather than reinventing the wheel


> +static inline struct nes_cqp_request
> +		*nes_get_cqp_request(struct nes_device *nesdev, int holding_lock)
> +{
> +	unsigned long flags;
> +	struct nes_cqp_request *cqp_request = NULL;
> +
> +	if (!holding_lock) {
> +		spin_lock_irqsave(&nesdev->cqp.lock, flags);
> +	}
> +	if (!list_empty(&nesdev->cqp_avail_reqs)) {
> +		cqp_request = list_entry(nesdev->cqp_avail_reqs.next,
> +				struct nes_cqp_request, list);
> +		list_del_init(&cqp_request->list);
> +	}
> +	if (!holding_lock) {
> +		spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
> +	}
> +
> +	if (cqp_request) {
> +		cqp_request->waiting = 0;
> +		cqp_request->request_done = 0;
> +		init_waitqueue_head(&cqp_request->waitq);
> +		/* dprintk("%s: Got cqp request %p from the available list \n",
> +				__FUNCTION__, cqp_request); */
> +	} else
> +		dprintk("%s: CQP request queue is empty.\n", __FUNCTION__);
> +
> +	return (cqp_request);
> +}

see below comment about conditional locking


> +static inline void nes_post_cqp_request(struct nes_device *nesdev,
> +		struct nes_cqp_request *cqp_request, int holding_lock, int ring_doorbell)
> +{
> +	/* caller must be holding CQP lock */
> +	struct nes_hw_cqp_wqe *cqp_wqe;
> +	unsigned long flags;
> +	u32 cqp_head;
> +
> +	if (!holding_lock) {
> +		spin_lock_irqsave(&nesdev->cqp.lock, flags);
> +	}
> +
> +	if (((((nesdev->cqp.sq_tail+(nesdev->cqp.sq_size*2))-nesdev->cqp.sq_head) &
> +			(nesdev->cqp.sq_size - 1)) != 1)
> +			&& (list_empty(&nesdev->cqp_pending_reqs))) {
> +		cqp_head = nesdev->cqp.sq_head++;
> +		nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
> +		cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
> +		memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
> +		barrier();
> +		*((struct nes_cqp_request **)&cqp_wqe->wqe_words
> +				[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX]) = cqp_request;
> +		dprintk("%s: CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ,"
> +				" request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u,"
> +				" waiting = %d, refcount = %d.\n",
> +				__FUNCTION__, cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]&0x3f,
> +				cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX], cqp_request,
> +				nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size,
> +				cqp_request->waiting, atomic_read(&cqp_request->refcount));
> +	} else {
> +		dprintk("%s: CQP request %p (opcode 0x%02X), line 1 = 0x%08X"
> +				" put on the pending queue.\n",
> +				__FUNCTION__, cqp_request,
> +				cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]&0x3f,
> +				cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_ID_IDX]);
> +		list_add_tail(&cqp_request->list, &nesdev->cqp_pending_reqs);
> +	}
> +
> +	if (ring_doorbell) {
> +		barrier();
> +		/* Ring doorbell (1 WQEs) */
> +		nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
> +	}
> +
> +	if (!holding_lock) {
> +		spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
> +	}
> +
> +	return;
> +}

#1: these two are way too big to inline

#2: conditional locking has proven to be fragile.  don't do it.  create 
a wrapper function that acquires/releases the lock, and an inner 
function that does the real work.




More information about the ewg mailing list