[openib-general] [PATCH] backport memfree support to trunk

Michael S. Tsirkin mst at mellanox.co.il
Sun Jan 30 06:18:14 PST 2005


Hello!
The following backports the memfree support mthca code from
https://openib.org/svn/gen2/branches/roland-uverbs/
revision 1701, to trunk.

Please note I did this in the simplest possible way,
so that the handling of uar tables is still compiled
in mthca. The idea is to make back and forward porting updates easy.

This works basically as well as the roland-uverbs for me, that is
ip over ib works in Tavor mode, and passes modprobe ib_mthca in Arbel
mode.

Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>

Backport memfree support code from roland-uverbs, revision 1701.

Index: hw/mthca/mthca_dev.h
===================================================================
--- hw/mthca/mthca_dev.h	(revision 1701)
+++ hw/mthca/mthca_dev.h	(working copy)
@@ -65,7 +65,6 @@ enum {
 };
 
 enum {
-	MTHCA_KAR_PAGE  = 1,
 	MTHCA_MAX_PORTS = 2
 };
 
@@ -108,6 +107,7 @@ struct mthca_limits {
 	int      gid_table_len;
 	int      pkey_table_len;
 	int      local_ca_ack_delay;
+	int      num_uars;
 	int      max_sg;
 	int      num_qps;
 	int      reserved_qps;
@@ -148,6 +148,12 @@ struct mthca_array {
 	} *page_list;
 };
 
+struct mthca_uar_table {
+	struct mthca_alloc alloc;
+	u64                uarc_base;
+	int                uarc_size;
+};
+
 struct mthca_pd_table {
 	struct mthca_alloc alloc;
 };
@@ -237,13 +243,22 @@ struct mthca_dev {
 	struct semaphore cap_mask_mutex;
 
 	void __iomem    *hcr;
-	void __iomem    *ecr_base;
-	void __iomem    *clr_base;
 	void __iomem    *kar;
+	void __iomem    *clr_base;
+	union {
+		struct {
+			void __iomem *ecr_base;
+		} tavor;
+		struct {
+			void __iomem *eq_arm;
+			void __iomem *eq_set_ci_base;
+		} arbel;
+	} eq_regs;
 
 	struct mthca_cmd    cmd;
 	struct mthca_limits limits;
 
+	struct mthca_uar_table uar_table;
 	struct mthca_pd_table  pd_table;
 	struct mthca_mr_table  mr_table;
 	struct mthca_eq_table  eq_table;
@@ -252,8 +267,10 @@ struct mthca_dev {
 	struct mthca_av_table  av_table;
 	struct mthca_mcg_table mcg_table;
 
-	struct mthca_pd       driver_pd;
-	struct mthca_mr       driver_mr;
+	struct mthca_uar       driver_uar;
+	struct mthca_db_table *db_tab;
+	struct mthca_pd        driver_pd;
+	struct mthca_mr        driver_mr;
 
 	struct ib_mad_agent  *send_agent[MTHCA_MAX_PORTS][2];
 	struct ib_ah         *sm_ah[MTHCA_MAX_PORTS];
@@ -310,6 +327,7 @@ void mthca_array_clear(struct mthca_arra
 int mthca_array_init(struct mthca_array *array, int nent);
 void mthca_array_cleanup(struct mthca_array *array, int nent);
 
+int mthca_init_uar_table(struct mthca_dev *dev);
 int mthca_init_pd_table(struct mthca_dev *dev);
 int mthca_init_mr_table(struct mthca_dev *dev);
 int mthca_init_eq_table(struct mthca_dev *dev);
@@ -318,6 +336,7 @@ int mthca_init_qp_table(struct mthca_dev
 int mthca_init_av_table(struct mthca_dev *dev);
 int mthca_init_mcg_table(struct mthca_dev *dev);
 
+void mthca_cleanup_uar_table(struct mthca_dev *dev);
 void mthca_cleanup_pd_table(struct mthca_dev *dev);
 void mthca_cleanup_mr_table(struct mthca_dev *dev);
 void mthca_cleanup_eq_table(struct mthca_dev *dev);
@@ -329,6 +348,9 @@ void mthca_cleanup_mcg_table(struct mthc
 int mthca_register_device(struct mthca_dev *dev);
 void mthca_unregister_device(struct mthca_dev *dev);
 
+int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
+void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
+
 int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd);
 void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
 
@@ -345,8 +367,8 @@ void mthca_unmap_eq_icm(struct mthca_dev
 
 int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
 		  struct ib_wc *entry);
-void mthca_arm_cq(struct mthca_dev *dev, struct mthca_cq *cq,
-		  int solicited);
+int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
+int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
 int mthca_init_cq(struct mthca_dev *dev, int nent,
 		  struct mthca_cq *cq);
 void mthca_free_cq(struct mthca_dev *dev,
@@ -357,10 +379,14 @@ void mthca_cq_clean(struct mthca_dev *de
 void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
 		    enum ib_event_type event_type);
 int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
-int mthca_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		    struct ib_send_wr **bad_wr);
-int mthca_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		       struct ib_recv_wr **bad_wr);
+int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+			  struct ib_send_wr **bad_wr);
+int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+			     struct ib_recv_wr **bad_wr);
+int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+			  struct ib_send_wr **bad_wr);
+int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+			     struct ib_recv_wr **bad_wr);
 int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,
 		       int index, int *dbd, u32 *new_wqe);
 int mthca_alloc_qp(struct mthca_dev *dev,
Index: hw/mthca/mthca_main.c
===================================================================
--- hw/mthca/mthca_main.c	(revision 1701)
+++ hw/mthca/mthca_main.c	(working copy)
@@ -363,10 +363,9 @@ static int __devinit mthca_init_icm(stru
 	}
 
 	mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
-							 mdev->limits.num_mtt_segs *
 							 init_hca->mtt_seg_sz,
-							 mdev->limits.reserved_mtts *
-							 init_hca->mtt_seg_sz, 1);
+							 mdev->limits.num_mtt_segs,
+							 mdev->limits.reserved_mtts, 1);
 	if (!mdev->mr_table.mtt_table) {
 		mthca_err(mdev, "Failed to map MTT context memory, aborting.\n");
 		err = -ENOMEM;
@@ -374,10 +373,9 @@ static int __devinit mthca_init_icm(stru
 	}
 
 	mdev->mr_table.mpt_table = mthca_alloc_icm_table(mdev, init_hca->mpt_base,
-							 mdev->limits.num_mpts *
 							 dev_lim->mpt_entry_sz,
-							 mdev->limits.reserved_mrws *
-							 dev_lim->mpt_entry_sz, 1);
+							 mdev->limits.num_mpts,
+							 mdev->limits.reserved_mrws, 1);
 	if (!mdev->mr_table.mpt_table) {
 		mthca_err(mdev, "Failed to map MPT context memory, aborting.\n");
 		err = -ENOMEM;
@@ -385,10 +383,9 @@ static int __devinit mthca_init_icm(stru
 	}
 
 	mdev->qp_table.qp_table = mthca_alloc_icm_table(mdev, init_hca->qpc_base,
-							mdev->limits.num_qps *
 							dev_lim->qpc_entry_sz,
-							mdev->limits.reserved_qps *
-							dev_lim->qpc_entry_sz, 1);
+							mdev->limits.num_qps,
+							mdev->limits.reserved_qps, 0);
 	if (!mdev->qp_table.qp_table) {
 		mthca_err(mdev, "Failed to map QP context memory, aborting.\n");
 		err = -ENOMEM;
@@ -396,10 +393,9 @@ static int __devinit mthca_init_icm(stru
 	}
 
 	mdev->qp_table.eqp_table = mthca_alloc_icm_table(mdev, init_hca->eqpc_base,
-							 mdev->limits.num_qps *
 							 dev_lim->eqpc_entry_sz,
-							 mdev->limits.reserved_qps *
-							 dev_lim->eqpc_entry_sz, 1);
+							 mdev->limits.num_qps,
+							 mdev->limits.reserved_qps, 0);
 	if (!mdev->qp_table.eqp_table) {
 		mthca_err(mdev, "Failed to map EQP context memory, aborting.\n");
 		err = -ENOMEM;
@@ -407,10 +403,9 @@ static int __devinit mthca_init_icm(stru
 	}
 
 	mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base,
-						     mdev->limits.num_cqs *
 						     dev_lim->cqc_entry_sz,
-						     mdev->limits.reserved_cqs *
-						     dev_lim->cqc_entry_sz, 1);
+						     mdev->limits.num_cqs,
+						     mdev->limits.reserved_cqs, 0);
 	if (!mdev->cq_table.table) {
 		mthca_err(mdev, "Failed to map CQ context memory, aborting.\n");
 		err = -ENOMEM;
@@ -570,11 +565,33 @@ static int __devinit mthca_setup_hca(str
 
 	MTHCA_INIT_DOORBELL_LOCK(&dev->doorbell_lock);
 
+	err = mthca_init_uar_table(dev);
+	if (err) {
+		mthca_err(dev, "Failed to initialize "
+			  "user access region table, aborting.\n");
+		return err;
+	}
+
+	err = mthca_uar_alloc(dev, &dev->driver_uar);
+	if (err) {
+		mthca_err(dev, "Failed to allocate driver access region, "
+			  "aborting.\n");
+		goto err_uar_table_free;
+	}
+
+	dev->kar = ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+	if (!dev->kar) {
+		mthca_err(dev, "Couldn't map kernel access region, "
+			  "aborting.\n");
+		err = -ENOMEM;
+		goto err_uar_free;
+	}
+
 	err = mthca_init_pd_table(dev);
 	if (err) {
 		mthca_err(dev, "Failed to initialize "
 			  "protection domain table, aborting.\n");
-		return err;
+		goto err_kar_unmap;
 	}
 
 	err = mthca_init_mr_table(dev);
@@ -591,13 +608,6 @@ static int __devinit mthca_setup_hca(str
 		goto err_mr_table_free;
 	}
 
-	if (dev->hca_type == ARBEL_NATIVE) {
-		mthca_warn(dev, "Sorry, native MT25208 mode support is not done, "
-			   "aborting.\n");
-		err = -ENODEV;
-		goto err_pd_free;
-	}
-
 	err = mthca_init_eq_table(dev);
 	if (err) {
 		mthca_err(dev, "Failed to initialize "
@@ -621,8 +631,8 @@ static int __devinit mthca_setup_hca(str
 			mthca_err(dev, "BIOS or ACPI interrupt routing problem?\n");
 
 		goto err_cmd_poll;
-	} else
-		mthca_dbg(dev, "NOP command IRQ test passed\n");
+	}
+	mthca_dbg(dev, "NOP command IRQ test passed\n");
 
 	err = mthca_init_cq_table(dev);
 	if (err) {
@@ -677,6 +687,15 @@ err_mr_table_free:
 
 err_pd_table_free:
 	mthca_cleanup_pd_table(dev);
+
+err_kar_unmap:
+	iounmap(dev->kar);
+
+err_uar_free:
+	mthca_uar_free(dev, &dev->driver_uar);
+
+err_uar_table_free:
+	mthca_cleanup_uar_table(dev);
 	return err;
 }
 
@@ -686,37 +705,18 @@ static int __devinit mthca_request_regio
 	int err;
 
 	/*
-	 * We request our first BAR in two chunks, since the MSI-X
-	 * vector table is right in the middle.
+	 * We can't just use pci_request_regions() because the MSI-X
+	 * table is right in the middle of the first BAR.  If we did
+	 * pci_request_region and grab all of the first BAR, then
+	 * setting up MSI-X would fail, since the PCI core wants to do
+	 * request_mem_region on the MSI-X vector table.
 	 *
-	 * This is why we can't just use pci_request_regions() -- if
-	 * we did then setting up MSI-X would fail, since the PCI core
-	 * wants to do request_mem_region on the MSI-X vector table.
+	 * So just request what we need right now, and request any
+	 * other regions we need when setting up EQs.
 	 */
-	if (!request_mem_region(pci_resource_start(pdev, 0) +
-				MTHCA_HCR_BASE,
-				MTHCA_HCR_SIZE,
-				DRV_NAME)) {
-		err = -EBUSY;
-		goto err_hcr_failed;
-	}
-
-	if (!request_mem_region(pci_resource_start(pdev, 0) +
-				MTHCA_ECR_BASE,
-				MTHCA_MAP_ECR_SIZE,
-				DRV_NAME)) {
-		err = -EBUSY;
-		goto err_ecr_failed;
-	}
-
-	if (!request_mem_region(pci_resource_start(pdev, 0) +
-				MTHCA_CLR_INT_BASE,
-				MTHCA_CLR_INT_SIZE,
-				DRV_NAME)) {
-		err = -EBUSY;
-		goto err_int_failed;
-	}
-
+	if (!request_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
+				MTHCA_HCR_SIZE, DRV_NAME))
+		return -EBUSY;
 
 	err = pci_request_region(pdev, 2, DRV_NAME);
 	if (err)
@@ -730,25 +730,12 @@ static int __devinit mthca_request_regio
 
 	return 0;
 
-err_bar4_failed:
-
-	pci_release_region(pdev, 2);
 err_bar2_failed:
-
-	release_mem_region(pci_resource_start(pdev, 0) +
-			   MTHCA_CLR_INT_BASE,
-			   MTHCA_CLR_INT_SIZE);
-err_int_failed:
-
-	release_mem_region(pci_resource_start(pdev, 0) +
-			   MTHCA_ECR_BASE,
-			   MTHCA_MAP_ECR_SIZE);
-err_ecr_failed:
-
-	release_mem_region(pci_resource_start(pdev, 0) +
-			   MTHCA_HCR_BASE,
+	release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
 			   MTHCA_HCR_SIZE);
-err_hcr_failed:
+
+err_bar4_failed:
+	pci_release_region(pdev, 2);
 
 	return err;
 }
@@ -761,16 +748,7 @@ static void mthca_release_regions(struct
 
 	pci_release_region(pdev, 2);
 
-	release_mem_region(pci_resource_start(pdev, 0) +
-			   MTHCA_CLR_INT_BASE,
-			   MTHCA_CLR_INT_SIZE);
-
-	release_mem_region(pci_resource_start(pdev, 0) +
-			   MTHCA_ECR_BASE,
-			   MTHCA_MAP_ECR_SIZE);
-
-	release_mem_region(pci_resource_start(pdev, 0) +
-			   MTHCA_HCR_BASE,
+	release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
 			   MTHCA_HCR_SIZE);
 }
 
@@ -830,7 +808,6 @@ static int __devinit mthca_init_one(stru
 	static int mthca_version_printed = 0;
 	int ddr_hidden = 0;
 	int err;
-	unsigned long mthca_base;
 	struct mthca_dev *mdev;
 
 	if (!mthca_version_printed) {
@@ -908,6 +885,10 @@ static int __devinit mthca_init_one(stru
 	mdev->pdev     = pdev;
 	mdev->hca_type = id->driver_data;
 
+	if (mdev->hca_type == ARBEL_NATIVE)
+		mthca_warn(mdev, "Warning: native MT25208 mode support is not done.  "
+			   "Your HCA may not work properly.\n");
+
 	if (ddr_hidden)
 		mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
 
@@ -932,8 +913,7 @@ static int __devinit mthca_init_one(stru
 	sema_init(&mdev->cmd.poll_sem, 1);
 	mdev->cmd.use_events = 0;
 
-	mthca_base = pci_resource_start(pdev, 0);
-	mdev->hcr = ioremap(mthca_base + MTHCA_HCR_BASE, MTHCA_HCR_SIZE);
+	mdev->hcr = ioremap(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, MTHCA_HCR_SIZE);
 	if (!mdev->hcr) {
 		mthca_err(mdev, "Couldn't map command register, "
 			  "aborting.\n");
@@ -941,40 +921,13 @@ static int __devinit mthca_init_one(stru
 		goto err_free_dev;
 	}
 
-	mdev->clr_base = ioremap(mthca_base + MTHCA_CLR_INT_BASE,
-				 MTHCA_CLR_INT_SIZE);
-	if (!mdev->clr_base) {
-		mthca_err(mdev, "Couldn't map interrupt clear register, "
-			  "aborting.\n");
-		err = -ENOMEM;
-		goto err_iounmap;
-	}
-
-	mdev->ecr_base = ioremap(mthca_base + MTHCA_ECR_BASE,
-				 MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE);
-	if (!mdev->ecr_base) {
-		mthca_err(mdev, "Couldn't map ecr register, "
-			  "aborting.\n");
-		err = -ENOMEM;
-		goto err_iounmap_clr;
-	}
-
-	mthca_base = pci_resource_start(pdev, 2);
-	mdev->kar = ioremap(mthca_base + PAGE_SIZE * MTHCA_KAR_PAGE, PAGE_SIZE);
-	if (!mdev->kar) {
-		mthca_err(mdev, "Couldn't map kernel access region, "
-			  "aborting.\n");
-		err = -ENOMEM;
-		goto err_iounmap_ecr;
-	}
-
 	err = mthca_tune_pci(mdev);
 	if (err)
-		goto err_iounmap_kar;
+		goto err_iounmap;
 
 	err = mthca_init_hca(mdev);
 	if (err)
-		goto err_iounmap_kar;
+		goto err_iounmap;
 
 	err = mthca_setup_hca(mdev);
 	if (err)
@@ -1007,19 +960,11 @@ err_cleanup:
 
 	mthca_cleanup_mr_table(mdev);
 	mthca_cleanup_pd_table(mdev);
+	mthca_cleanup_uar_table(mdev);
 
 err_close:
 	mthca_close_hca(mdev);
 
-err_iounmap_kar:
-	iounmap(mdev->kar);
-
-err_iounmap_ecr:
-	iounmap(mdev->ecr_base);
-
-err_iounmap_clr:
-	iounmap(mdev->clr_base);
-
 err_iounmap:
 	iounmap(mdev->hcr);
 
@@ -1064,12 +1009,14 @@ static void __devexit mthca_remove_one(s
 
 		mthca_cleanup_mr_table(mdev);
 		mthca_cleanup_pd_table(mdev);
+		
+		iounmap(mdev->kar);
+		mthca_uar_free(mdev, &mdev->driver_uar);
+		mthca_cleanup_uar_table(mdev);
 
 		mthca_close_hca(mdev);
 
 		iounmap(mdev->hcr);
-		iounmap(mdev->ecr_base);
-		iounmap(mdev->clr_base);
 
 		if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
 			pci_disable_msix(pdev);
Index: hw/mthca/mthca_memfree.h
===================================================================
--- hw/mthca/mthca_memfree.h	(revision 1701)
+++ hw/mthca/mthca_memfree.h	(working copy)
@@ -52,13 +52,17 @@ struct mthca_icm_chunk {
 };
 
 struct mthca_icm {
+	int              refcount;
 	struct list_head chunk_list;
 };
 
 struct mthca_icm_table {
 	u64               virt;
 	int               num_icm;
-	struct semaphore  sem;
+	int               num_obj;
+	int               obj_size;
+	int               lowmem;
+	struct semaphore  mutex;
 	struct mthca_icm *icm[0];
 };
 
@@ -75,10 +79,12 @@ struct mthca_icm *mthca_alloc_icm(struct
 void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm);
 
 struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
-					      u64 virt, unsigned size,
-					      unsigned reserved,
+					      u64 virt, int obj_size,
+					      int nobj, int reserved,
 					      int use_lowmem);
 void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table);
+int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
+void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
 
 static inline void mthca_icm_first(struct mthca_icm *icm,
 				   struct mthca_icm_iter *iter)
@@ -119,4 +125,37 @@ static inline unsigned long mthca_icm_si
 	return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
 }
 
+enum {
+	MTHCA_DB_REC_PER_PAGE = 4096 / 8
+};
+
+struct mthca_db_page {
+	DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE);
+	u64       *db_rec;
+	dma_addr_t mapping;
+};
+
+struct mthca_db_table {
+	int 	       	      npages;
+	int 	       	      max_group1;
+	int 	       	      min_group2;
+	struct mthca_db_page *page;
+	struct semaphore      mutex;
+};
+
+enum {
+	MTHCA_DB_TYPE_INVALID   = 0x0,
+	MTHCA_DB_TYPE_CQ_SET_CI = 0x1,
+	MTHCA_DB_TYPE_CQ_ARM    = 0x2,
+	MTHCA_DB_TYPE_SQ        = 0x3,
+	MTHCA_DB_TYPE_RQ        = 0x4,
+	MTHCA_DB_TYPE_SRQ       = 0x5,
+	MTHCA_DB_TYPE_GROUP_SEP = 0x7
+};
+
+int mthca_init_db_tab(struct mthca_dev *dev);
+void mthca_cleanup_db_tab(struct mthca_dev *dev);
+int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db);
+void mthca_free_db(struct mthca_dev *dev, int type, int db_index);
+
 #endif /* MTHCA_MEMFREE_H */
Index: hw/mthca/mthca_provider.c
===================================================================
--- hw/mthca/mthca_provider.c	(revision 1701)
+++ hw/mthca/mthca_provider.c	(working copy)
@@ -379,7 +379,9 @@ static struct ib_qp *mthca_create_qp(str
 		return ERR_PTR(err);
 	}
 
-        init_attr->cap.max_inline_data = 0;
+	init_attr->cap.max_inline_data = 0;
+	init_attr->cap.max_send_wr     = qp->sq.max;
+	init_attr->cap.max_recv_wr     = qp->rq.max;
 
 	return &qp->ibqp;
 }
@@ -422,13 +424,6 @@ static int mthca_destroy_cq(struct ib_cq
 	return 0;
 }
 
-static int mthca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify notify)
-{
-	mthca_arm_cq(to_mdev(cq->device), to_mcq(cq),
-		     notify == IB_CQ_SOLICITED);
-	return 0;
-}
-
 static inline u32 convert_access(int acc)
 {
 	return (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC       : 0) |
@@ -621,12 +616,9 @@ int mthca_register_device(struct mthca_d
 	dev->ib_dev.create_qp            = mthca_create_qp;
 	dev->ib_dev.modify_qp            = mthca_modify_qp;
 	dev->ib_dev.destroy_qp           = mthca_destroy_qp;
-	dev->ib_dev.post_send            = mthca_post_send;
-	dev->ib_dev.post_recv            = mthca_post_receive;
 	dev->ib_dev.create_cq            = mthca_create_cq;
 	dev->ib_dev.destroy_cq           = mthca_destroy_cq;
 	dev->ib_dev.poll_cq              = mthca_poll_cq;
-	dev->ib_dev.req_notify_cq        = mthca_req_notify_cq;
 	dev->ib_dev.get_dma_mr           = mthca_get_dma_mr;
 	dev->ib_dev.reg_phys_mr          = mthca_reg_phys_mr;
 	dev->ib_dev.dereg_mr             = mthca_dereg_mr;
@@ -634,6 +626,16 @@ int mthca_register_device(struct mthca_d
 	dev->ib_dev.detach_mcast         = mthca_multicast_detach;
 	dev->ib_dev.process_mad          = mthca_process_mad;
 
+	if (dev->hca_type == ARBEL_NATIVE) {
+		dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
+		dev->ib_dev.post_send     = mthca_arbel_post_send;
+		dev->ib_dev.post_recv     = mthca_arbel_post_receive;
+	} else {
+		dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq;
+		dev->ib_dev.post_send     = mthca_tavor_post_send;
+		dev->ib_dev.post_recv     = mthca_tavor_post_receive;
+	}
+
 	init_MUTEX(&dev->cap_mask_mutex);
 
 	ret = ib_register_device(&dev->ib_dev);
Index: hw/mthca/mthca_provider.h
===================================================================
--- hw/mthca/mthca_provider.h	(revision 1701)
+++ hw/mthca/mthca_provider.h	(working copy)
@@ -49,6 +49,11 @@ struct mthca_buf_list {
 	DECLARE_PCI_UNMAP_ADDR(mapping)
 };
 
+struct mthca_uar {
+	unsigned long pfn;
+	int           index;
+};
+
 struct mthca_mr {
 	struct ib_mr ibmr;
 	int order;
@@ -65,7 +70,7 @@ struct mthca_pd {
 struct mthca_eq {
 	struct mthca_dev      *dev;
 	int                    eqn;
-	u32                    ecr_mask;
+	u32                    eqn_mask;
 	u32                    cons_index;
 	u16                    msi_x_vector;
 	u16                    msi_x_entry;
@@ -77,12 +82,18 @@ struct mthca_eq {
 
 struct mthca_av;
 
+enum mthca_ah_type {
+	MTHCA_AH_ON_HCA,
+	MTHCA_AH_PCI_POOL,
+	MTHCA_AH_KMALLOC
+};
+
 struct mthca_ah {
-	struct ib_ah     ibah;
-	int              on_hca;
-	u32              key;
-	struct mthca_av *av;
-	dma_addr_t       avdma;
+	struct ib_ah       ibah;
+	enum mthca_ah_type type;
+	u32                key;
+	struct mthca_av   *av;
+	dma_addr_t         avdma;
 };
 
 /*
@@ -136,8 +147,16 @@ struct mthca_cq {
 	spinlock_t             lock;
 	atomic_t               refcount;
 	int                    cqn;
-	int                    cons_index;
+	u32                    cons_index;
 	int                    is_direct;
+
+	/* Next fields are Arbel only */
+	int                    set_ci_db_index;
+	u32                   *set_ci_db;
+	int                    arm_db_index;
+	u32                   *arm_db;
+	int                    arm_sn;
+
 	union {
 		struct mthca_buf_list direct;
 		struct mthca_buf_list *page_list;
@@ -155,6 +174,9 @@ struct mthca_wq {
 	int   max_gs;
 	int   wqe_shift;
 	enum ib_sig_type policy;
+
+	int   db_index;		/* Arbel only */
+	u32  *db;
 };
 
 struct mthca_qp {
Index: hw/mthca/mthca_profile.c
===================================================================
--- hw/mthca/mthca_profile.c	(revision 1701)
+++ hw/mthca/mthca_profile.c	(working copy)
@@ -236,15 +236,20 @@ u64 mthca_make_profile(struct mthca_dev 
 			init_hca->mtt_seg_sz     = ffs(dev_lim->mtt_seg_sz) - 7;
 			break;
 		case MTHCA_RES_UAR:
+			dev->limits.num_uars       = profile[i].num;
 			init_hca->uar_scratch_base = profile[i].start;
 			break;
 		case MTHCA_RES_UDAV:
 			dev->av_table.ddr_av_base = profile[i].start;
 			dev->av_table.num_ddr_avs = profile[i].num;
+			break;
 		case MTHCA_RES_UARC:
-			init_hca->uarc_base   = profile[i].start;
-			init_hca->log_uarc_sz = ffs(request->uarc_size) - 13;
-			init_hca->log_uar_sz  = ffs(request->num_uar) - 1;
+			dev->uar_table.uarc_size = request->uarc_size;
+			dev->uar_table.uarc_base = profile[i].start;
+			init_hca->uarc_base   	 = profile[i].start;
+			init_hca->log_uarc_sz 	 = ffs(request->uarc_size) - 13;
+			init_hca->log_uar_sz  	 = ffs(request->num_uar) - 1;
+			break;
 		default:
 			break;
 		}
Index: hw/mthca/mthca_cq.c
===================================================================
--- hw/mthca/mthca_cq.c	(revision 1701)
+++ hw/mthca/mthca_cq.c	(working copy)
@@ -39,6 +39,7 @@
 
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
+#include "mthca_memfree.h"
 
 enum {
 	MTHCA_MAX_DIRECT_CQ_SIZE = 4 * PAGE_SIZE
@@ -55,7 +56,7 @@ struct mthca_cq_context {
 	u32 flags;
 	u64 start;
 	u32 logsize_usrpage;
-	u32 error_eqn;
+	u32 error_eqn;		/* Tavor only */
 	u32 comp_eqn;
 	u32 pd;
 	u32 lkey;
@@ -64,7 +65,9 @@ struct mthca_cq_context {
 	u32 consumer_index;
 	u32 producer_index;
 	u32 cqn;
-	u32 reserved[3];
+	u32 ci_db;		/* Arbel only */
+	u32 state_db;		/* Arbel only */
+	u32 reserved;
 } __attribute__((packed));
 
 #define MTHCA_CQ_STATUS_OK          ( 0 << 28)
@@ -133,11 +136,15 @@ struct mthca_err_cqe {
 #define MTHCA_CQ_ENTRY_OWNER_SW      (0 << 7)
 #define MTHCA_CQ_ENTRY_OWNER_HW      (1 << 7)
 
-#define MTHCA_CQ_DB_INC_CI       (1 << 24)
-#define MTHCA_CQ_DB_REQ_NOT      (2 << 24)
-#define MTHCA_CQ_DB_REQ_NOT_SOL  (3 << 24)
-#define MTHCA_CQ_DB_SET_CI       (4 << 24)
-#define MTHCA_CQ_DB_REQ_NOT_MULT (5 << 24)
+#define MTHCA_TAVOR_CQ_DB_INC_CI       (1 << 24)
+#define MTHCA_TAVOR_CQ_DB_REQ_NOT      (2 << 24)
+#define MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL  (3 << 24)
+#define MTHCA_TAVOR_CQ_DB_SET_CI       (4 << 24)
+#define MTHCA_TAVOR_CQ_DB_REQ_NOT_MULT (5 << 24)
+
+#define MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL  (1 << 24)
+#define MTHCA_ARBEL_CQ_DB_REQ_NOT      (2 << 24)
+#define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24)
 
 static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
 {
@@ -156,7 +163,7 @@ static inline struct mthca_cqe *cqe_sw(s
 
 static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq)
 {
-	return cqe_sw(cq, cq->cons_index);
+	return cqe_sw(cq, cq->cons_index & cq->ibcq.cqe);
 }
 
 static inline void set_cqe_hw(struct mthca_cqe *cqe)
@@ -169,7 +176,7 @@ static inline void inc_cons_index(struct
 {
 	u32 doorbell[2];
 
-	doorbell[0] = cpu_to_be32(MTHCA_CQ_DB_INC_CI | cq->cqn);
+	doorbell[0] = cpu_to_be32(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn);
 	doorbell[1] = cpu_to_be32(nent - 1);
 
 	mthca_write64(doorbell,
@@ -188,6 +195,8 @@ void mthca_cq_event(struct mthca_dev *de
 		return;
 	}
 
+	++cq->arm_sn;
+
 	cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
 }
 
@@ -245,7 +254,7 @@ void mthca_cq_clean(struct mthca_dev *de
 	if (nfreed) {
 		wmb();
 		inc_cons_index(dev, cq, nfreed);
-		cq->cons_index = (cq->cons_index + nfreed) & cq->ibcq.cqe;
+		cq->cons_index = cq->cons_index + nfreed;
 	}
 
 	spin_unlock_irq(&cq->lock);
@@ -504,7 +513,7 @@ static inline int mthca_poll_one(struct 
 	if (likely(free_cqe)) {
 		set_cqe_hw(cqe);
 		++(*freed);
-		cq->cons_index = (cq->cons_index + 1) & cq->ibcq.cqe;
+		++cq->cons_index;
 	}
 
 	return err;
@@ -543,20 +552,57 @@ int mthca_poll_cq(struct ib_cq *ibcq, in
 	return err == 0 || err == -EAGAIN ? npolled : err;
 }
 
-void mthca_arm_cq(struct mthca_dev *dev, struct mthca_cq *cq,
-		  int solicited)
+int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify)
 {
 	u32 doorbell[2];
 
-	doorbell[0] =  cpu_to_be32((solicited ?
-				    MTHCA_CQ_DB_REQ_NOT_SOL :
-				    MTHCA_CQ_DB_REQ_NOT)      |
-				   cq->cqn);
+	doorbell[0] = cpu_to_be32((notify == IB_CQ_SOLICITED ?
+				   MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
+				   MTHCA_TAVOR_CQ_DB_REQ_NOT)      |
+				  to_mcq(cq)->cqn);
 	doorbell[1] = 0xffffffff;
 
 	mthca_write64(doorbell,
-		      dev->kar + MTHCA_CQ_DOORBELL,
-		      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+		      to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
+		      MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock));
+
+	return 0;
+}
+
+int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+{
+	struct mthca_cq *cq = to_mcq(ibcq);
+	u32 doorbell[2];
+	u32 sn;
+	u32 ci;
+
+	sn = cq->arm_sn & 3;
+	ci = cpu_to_be32(cq->cons_index);
+
+	doorbell[0] = ci;
+	doorbell[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
+				  (notify == IB_CQ_SOLICITED ? 1 : 2));
+
+	mthca_write_db_rec(doorbell, cq->arm_db);
+
+	/*
+	 * Make sure that the doorbell record in host memory is
+	 * written before ringing the doorbell via PCI MMIO.
+	 */
+	wmb();
+
+	doorbell[0] = cpu_to_be32((sn << 28)                       |
+				  (notify == IB_CQ_SOLICITED ?
+				   MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
+				   MTHCA_ARBEL_CQ_DB_REQ_NOT)      |
+				  cq->cqn);
+	doorbell[1] = ci;
+
+	mthca_write64(doorbell,
+		      to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL,
+		      MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock));
+
+	return 0;
 }
 
 int mthca_init_cq(struct mthca_dev *dev, int nent,
@@ -574,10 +620,34 @@ int mthca_init_cq(struct mthca_dev *dev,
 
 	might_sleep();
 
+	cq->cqn = mthca_alloc(&dev->cq_table.alloc);
+	if (cq->cqn == -1)
+		return -ENOMEM;
+
+	if (dev->hca_type == ARBEL_NATIVE) {
+		cq->arm_sn = 1;
+
+		err = mthca_table_get(dev, dev->cq_table.table, cq->cqn);
+		if (err)
+			goto err_out;
+
+		err = -ENOMEM;
+
+		cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
+						     cq->cqn, &cq->set_ci_db);
+		if (cq->set_ci_db_index < 0)
+			goto err_out_icm;
+
+		cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
+						  cq->cqn, &cq->arm_db);
+		if (cq->arm_db_index < 0)
+			goto err_out_ci;
+	}
+
 	mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA,
 			  GFP_KERNEL);
 	if (!mailbox)
-		goto err_out;
+		goto err_out_mailbox;
 
 	cq_context = MAILBOX_ALIGN(mailbox);
 
@@ -592,7 +662,7 @@ int mthca_init_cq(struct mthca_dev *dev,
 		cq->queue.direct.buf = pci_alloc_consistent(dev->pdev,
 							    size, &t);
 		if (!cq->queue.direct.buf)
-			goto err_out;
+			goto err_out_mailbox;
 
 		pci_unmap_addr_set(&cq->queue.direct, mapping, t);
 
@@ -619,12 +689,12 @@ int mthca_init_cq(struct mthca_dev *dev,
 
 		dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
 		if (!dma_list)
-			goto err_out;
+			goto err_out_mailbox;
 
 		cq->queue.page_list = kmalloc(npages * sizeof *cq->queue.page_list,
 					      GFP_KERNEL);
 		if (!cq->queue.page_list)
-			goto err_out;
+			goto err_out_mailbox;
 
 		for (i = 0; i < npages; ++i)
 			cq->queue.page_list[i].buf = NULL;
@@ -645,10 +715,6 @@ int mthca_init_cq(struct mthca_dev *dev,
 	for (i = 0; i < nent; ++i)
 		set_cqe_hw(get_cqe(cq, i));
 
-	cq->cqn = mthca_alloc(&dev->cq_table.alloc);
-	if (cq->cqn == -1)
-		goto err_out_free;
-
 	err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
 				  dma_list, shift, npages,
 				  0, size,
@@ -656,7 +722,7 @@ int mthca_init_cq(struct mthca_dev *dev,
 				  MTHCA_MPT_FLAG_LOCAL_READ,
 				  &cq->mr);
 	if (err)
-		goto err_out_free_cq;
+		goto err_out_free;
 
 	spin_lock_init(&cq->lock);
 	atomic_set(&cq->refcount, 1);
@@ -668,13 +734,18 @@ int mthca_init_cq(struct mthca_dev *dev,
 						  MTHCA_CQ_FLAG_TR);
 	cq_context->start           = cpu_to_be64(0);
 	cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 |
-						  MTHCA_KAR_PAGE);
+						  dev->driver_uar.index);
 	cq_context->error_eqn       = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
 	cq_context->comp_eqn        = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
 	cq_context->pd              = cpu_to_be32(dev->driver_pd.pd_num);
 	cq_context->lkey            = cpu_to_be32(cq->mr.ibmr.lkey);
 	cq_context->cqn             = cpu_to_be32(cq->cqn);
 
+	if (dev->hca_type == ARBEL_NATIVE) {
+		cq_context->ci_db    = cpu_to_be32(cq->set_ci_db_index);
+		cq_context->state_db = cpu_to_be32(cq->arm_db_index);
+	}
+
 	err = mthca_SW2HW_CQ(dev, cq_context, cq->cqn, &status);
 	if (err) {
 		mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err);
@@ -707,9 +778,6 @@ int mthca_init_cq(struct mthca_dev *dev,
  err_out_free_mr:
 	mthca_free_mr(dev, &cq->mr);
 
- err_out_free_cq:
-	mthca_free(&dev->cq_table.alloc, cq->cqn);
-
  err_out_free:
 	if (cq->is_direct)
 		pci_free_consistent(dev->pdev, size,
@@ -726,10 +794,21 @@ int mthca_init_cq(struct mthca_dev *dev,
 		kfree(cq->queue.page_list);
 	}
 
- err_out:
+ err_out_mailbox:
 	kfree(dma_list);
 	kfree(mailbox);
 
+	mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
+
+ err_out_ci:
+	mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
+
+ err_out_icm:
+	mthca_table_put(dev, dev->cq_table.table, cq->cqn);
+
+ err_out:
+	mthca_free(&dev->cq_table.alloc, cq->cqn);
+
 	return err;
 }
 
@@ -802,6 +881,11 @@ void mthca_free_cq(struct mthca_dev *dev
 		kfree(cq->queue.page_list);
 	}
 
+	if (dev->hca_type == ARBEL_NATIVE) {
+		mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM,    cq->arm_db_index);
+		mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
+	}
+
 	mthca_free(&dev->cq_table.alloc, cq->cqn);
 	kfree(mailbox);
 }
Index: hw/mthca/mthca_cmd.c
===================================================================
--- hw/mthca/mthca_cmd.c	(revision 1701)
+++ hw/mthca/mthca_cmd.c	(working copy)
@@ -1290,7 +1290,7 @@ int mthca_MAP_ICM_page(struct mthca_dev 
 		return -ENOMEM;
 
 	inbox[0] = cpu_to_be64(virt);
-	inbox[1] = cpu_to_be64(dma_addr | (PAGE_SHIFT - 12));
+	inbox[1] = cpu_to_be64(dma_addr);
 
 	err = mthca_cmd(dev, indma, 1, 0, CMD_MAP_ICM, CMD_TIME_CLASS_B, status);
 
@@ -1305,6 +1305,9 @@ int mthca_MAP_ICM_page(struct mthca_dev 
 
 int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status)
 {
+	mthca_dbg(dev, "Unmapping %d pages at %llx from ICM.\n",
+		  page_count, (unsigned long long) virt);
+
 	return mthca_cmd(dev, virt, page_count, 0, CMD_UNMAP_ICM, CMD_TIME_CLASS_B, status);
 }
 
@@ -1538,10 +1541,10 @@ int mthca_MODIFY_QP(struct mthca_dev *de
 		if (0) {
 			int i;
 			mthca_dbg(dev, "Dumping QP context:\n");
-			printk(" %08x\n", be32_to_cpup(qp_context));
+			printk("  opt param mask: %08x\n", be32_to_cpup(qp_context));
 			for (i = 0; i < 0x100 / 4; ++i) {
 				if (i % 8 == 0)
-					printk("[%02x] ", i * 4);
+					printk("  [%02x] ", i * 4);
 				printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2]));
 				if ((i + 1) % 8 == 0)
 					printk("\n");
Index: hw/mthca/mthca_eq.c
===================================================================
--- hw/mthca/mthca_eq.c	(revision 1701)
+++ hw/mthca/mthca_eq.c	(working copy)
@@ -54,10 +54,10 @@ struct mthca_eq_context {
 	u32 flags;
 	u64 start;
 	u32 logsize_usrpage;
-	u32 pd;
+	u32 tavor_pd;		/* reserved for Arbel */
 	u8  reserved1[3];
 	u8  intr;
-	u32 lost_count;
+	u32 arbel_pd;		/* lost_count for Tavor */
 	u32 lkey;
 	u32 reserved2[2];
 	u32 consumer_index;
@@ -75,6 +75,7 @@ struct mthca_eq_context {
 #define MTHCA_EQ_STATE_ARMED        ( 1 <<  8)
 #define MTHCA_EQ_STATE_FIRED        ( 2 <<  8)
 #define MTHCA_EQ_STATE_ALWAYS_ARMED ( 3 <<  8)
+#define MTHCA_EQ_STATE_ARBEL        ( 8 <<  8)
 
 enum {
 	MTHCA_EVENT_TYPE_COMP       	    = 0x00,
@@ -164,19 +165,46 @@ static inline u64 async_mask(struct mthc
 		MTHCA_ASYNC_EVENT_MASK;
 }
 
-static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
+static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
 {
 	u32 doorbell[2];
 
 	doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn);
 	doorbell[1] = cpu_to_be32(ci & (eq->nent - 1));
 
+	/*
+	 * This barrier makes sure that all updates to ownership bits
+	 * done by set_eqe_hw() hit memory before the consumer index
+	 * is updated.  set_eq_ci() allows the HCA to possibly write
+	 * more EQ entries, and we want to avoid the exceedingly
+	 * unlikely possibility of the HCA writing an entry and then
+	 * having set_eqe_hw() overwrite the owner field.
+	 */
+	wmb();
 	mthca_write64(doorbell,
 		      dev->kar + MTHCA_EQ_DOORBELL,
 		      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 }
 
-static inline void eq_req_not(struct mthca_dev *dev, int eqn)
+static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
+{
+	/* See comment in tavor_set_eq_ci() above. */
+	wmb();
+	__raw_writel(cpu_to_be32(ci), dev->eq_regs.arbel.eq_set_ci_base +
+		     eq->eqn * 8);
+	/* We still want ordering, just not swabbing, so add a barrier */
+	mb();
+}
+
+static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
+{
+	if (dev->hca_type == ARBEL_NATIVE)
+		arbel_set_eq_ci(dev, eq, ci);
+	else
+		tavor_set_eq_ci(dev, eq, ci);
+}
+
+static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
 {
 	u32 doorbell[2];
 
@@ -188,6 +216,11 @@ static inline void eq_req_not(struct mth
 		      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 }
 
+static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
+{
+	writel(eqn_mask, dev->eq_regs.arbel.eq_arm);
+}
+
 static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
 {
 	u32 doorbell[2];
@@ -232,7 +265,7 @@ static void port_change(struct mthca_dev
 	ib_dispatch_event(&record);
 }
 
-static void mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
+static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
 {
 	struct mthca_eqe *eqe;
 	int disarm_cqn;
@@ -333,30 +366,24 @@ static void mthca_eq_int(struct mthca_de
 		++eq->cons_index;
 		eqes_found = 1;
 
-		if (set_ci) {
-			wmb(); /* see comment below */
+		if (unlikely(set_ci)) {
+			/*
+			 * Conditional on hca_type is OK here because
+			 * this is a rare case, not the fast path.
+			 */
 			set_eq_ci(dev, eq, eq->cons_index);
 			set_ci = 0;
 		}
 	}
 
 	/*
-	 * This barrier makes sure that all updates to
-	 * ownership bits done by set_eqe_hw() hit memory
-	 * before the consumer index is updated.  set_eq_ci()
-	 * allows the HCA to possibly write more EQ entries,
-	 * and we want to avoid the exceedingly unlikely
-	 * possibility of the HCA writing an entry and then
-	 * having set_eqe_hw() overwrite the owner field.
+	 * Rely on caller to set consumer index so that we don't have
+	 * to test hca_type in our interrupt handling fast path.
 	 */
-	if (likely(eqes_found)) {
-		wmb();
-		set_eq_ci(dev, eq, eq->cons_index);
-	}
-	eq_req_not(dev, eq->eqn);
+	return eqes_found;
 }
 
-static irqreturn_t mthca_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
+static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
 {
 	struct mthca_dev *dev = dev_ptr;
 	u32 ecr;
@@ -366,27 +393,70 @@ static irqreturn_t mthca_interrupt(int i
 	if (dev->eq_table.clr_mask)
 		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
 
-	if ((ecr = readl(dev->ecr_base + 4)) != 0) {
+	if ((ecr = readl(dev->eq_regs.tavor.ecr_base + 4)) != 0) {
 		work = 1;
 
-		writel(ecr, dev->ecr_base +
+		writel(ecr, dev->eq_regs.tavor.ecr_base +
 		       MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
 
 		for (i = 0; i < MTHCA_NUM_EQ; ++i)
-			if (ecr & dev->eq_table.eq[i].ecr_mask)
-				mthca_eq_int(dev, &dev->eq_table.eq[i]);
+			if (ecr & dev->eq_table.eq[i].eqn_mask &&
+			    mthca_eq_int(dev, &dev->eq_table.eq[i])) {
+				tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
+						dev->eq_table.eq[i].cons_index);
+				tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
+			}
 	}
 
 	return IRQ_RETVAL(work);
 }
 
-static irqreturn_t mthca_msi_x_interrupt(int irq, void *eq_ptr,
+static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr,
 					 struct pt_regs *regs)
 {
 	struct mthca_eq  *eq  = eq_ptr;
 	struct mthca_dev *dev = eq->dev;
 
-	mthca_eq_int(dev, eq);
+	if (mthca_eq_int(dev, eq)) {
+		tavor_set_eq_ci(dev, eq, eq->cons_index);
+		tavor_eq_req_not(dev, eq->eqn);
+	}
+
+	/* MSI-X vectors always belong to us */
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
+{
+	struct mthca_dev *dev = dev_ptr;
+	u32 arm = 0;
+	int i;
+
+	if (dev->eq_table.clr_mask)
+		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
+
+	for (i = 0; i < MTHCA_NUM_EQ; ++i)
+		if (mthca_eq_int(dev, &dev->eq_table.eq[i])) {
+			arbel_set_eq_ci(dev, &dev->eq_table.eq[i],
+					dev->eq_table.eq[i].cons_index);
+			arm |= dev->eq_table.eq[i].eqn_mask;
+		}
+
+	arbel_eq_req_not(dev, arm);
+
+	return IRQ_RETVAL(arm);
+}
+
+static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr,
+					       struct pt_regs *regs)
+{
+	struct mthca_eq  *eq  = eq_ptr;
+	struct mthca_dev *dev = eq->dev;
+
+	if (mthca_eq_int(dev, eq)) {
+		arbel_set_eq_ci(dev, eq, eq->cons_index);
+		arbel_eq_req_not(dev, eq->eqn_mask);
+	}
 
 	/* MSI-X vectors always belong to us */
 	return IRQ_HANDLED;
@@ -467,10 +537,16 @@ static int __devinit mthca_create_eq(str
 						  MTHCA_EQ_OWNER_HW    |
 						  MTHCA_EQ_STATE_ARMED |
 						  MTHCA_EQ_FLAG_TR);
-	eq_context->start           = cpu_to_be64(0);
-	eq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 |
-						  MTHCA_KAR_PAGE);
-	eq_context->pd              = cpu_to_be32(dev->driver_pd.pd_num);
+	if (dev->hca_type == ARBEL_NATIVE)
+		eq_context->flags  |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL);
+
+	eq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
+	if (dev->hca_type == ARBEL_NATIVE) {
+		eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num);
+	} else {
+		eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
+		eq_context->tavor_pd         = cpu_to_be32(dev->driver_pd.pd_num);
+	}
 	eq_context->intr            = intr;
 	eq_context->lkey            = cpu_to_be32(eq->mr.ibmr.lkey);
 
@@ -489,11 +565,9 @@ static int __devinit mthca_create_eq(str
 	kfree(dma_list);
 	kfree(mailbox);
 
-	eq->ecr_mask   = swab32(1 << eq->eqn);
+	eq->eqn_mask   = swab32(1 << eq->eqn);
 	eq->cons_index = 0;
 
-	eq_req_not(dev, eq->eqn);
-
 	mthca_dbg(dev, "Allocated EQ %d with %d entries\n",
 		  eq->eqn, nent);
 
@@ -555,7 +629,6 @@ static void mthca_free_eq(struct mthca_d
 		}
 	}
 
-
 	mthca_free_mr(dev, &eq->mr);
 	for (i = 0; i < npages; ++i)
 		pci_free_consistent(dev->pdev, PAGE_SIZE,
@@ -578,6 +651,129 @@ static void mthca_free_irqs(struct mthca
 				 dev->eq_table.eq + i);
 }
 
+static int __devinit mthca_map_reg(struct mthca_dev *dev,
+				   unsigned long offset, unsigned long size,
+				   void __iomem **map)
+{
+	unsigned long base = pci_resource_start(dev->pdev, 0);
+
+	if (!request_mem_region(base + offset, size, DRV_NAME))
+		return -EBUSY;
+
+	*map = ioremap(base + offset, size);
+	if (!*map) {
+		release_mem_region(base + offset, size);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void mthca_unmap_reg(struct mthca_dev *dev, unsigned long offset,
+			    unsigned long size, void __iomem *map)
+{
+	unsigned long base = pci_resource_start(dev->pdev, 0);
+
+	release_mem_region(base + offset, size);
+	iounmap(map);
+}
+
+static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
+{
+	unsigned long mthca_base;
+
+	mthca_base = pci_resource_start(dev->pdev, 0);
+
+	if (dev->hca_type == ARBEL_NATIVE) {
+		/*
+		 * We assume that the EQ arm and EQ set CI registers
+		 * fall within the first BAR.  We can't trust the
+		 * values firmware gives us, since those addresses are
+		 * valid on the HCA's side of the PCI bus but not
+		 * necessarily the host side.
+		 */
+		if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
+				  dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
+				  &dev->clr_base)) {
+			mthca_err(dev, "Couldn't map interrupt clear register, "
+				  "aborting.\n");
+			return -ENOMEM;
+		}
+
+		/*
+		 * Add 4 because we limit ourselves to EQs 0 ... 31,
+		 * so we only need the low word of the register.
+		 */
+		if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
+					dev->fw.arbel.eq_arm_base) + 4, 4,
+				  &dev->eq_regs.arbel.eq_arm)) {
+			mthca_err(dev, "Couldn't map interrupt clear register, "
+				  "aborting.\n");
+			mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
+					dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
+					dev->clr_base);
+			return -ENOMEM;
+		}
+
+		if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
+				  dev->fw.arbel.eq_set_ci_base,
+				  MTHCA_EQ_SET_CI_SIZE,
+				  &dev->eq_regs.arbel.eq_set_ci_base)) {
+			mthca_err(dev, "Couldn't map interrupt clear register, "
+				  "aborting.\n");
+			mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
+					      dev->fw.arbel.eq_arm_base) + 4, 4,
+					dev->eq_regs.arbel.eq_arm);
+			mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
+					dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
+					dev->clr_base);
+			return -ENOMEM;
+		}
+	} else {
+		if (mthca_map_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
+				  &dev->clr_base)) {
+			mthca_err(dev, "Couldn't map interrupt clear register, "
+				  "aborting.\n");
+			return -ENOMEM;
+		}
+
+		if (mthca_map_reg(dev, MTHCA_ECR_BASE,
+				  MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
+				  &dev->eq_regs.tavor.ecr_base)) {
+			mthca_err(dev, "Couldn't map ecr register, "
+				  "aborting.\n");
+			mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
+					dev->clr_base);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+
+}
+
+static void __devexit mthca_unmap_eq_regs(struct mthca_dev *dev)
+{
+	if (dev->hca_type == ARBEL_NATIVE) {
+		mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
+				dev->fw.arbel.eq_set_ci_base,
+				MTHCA_EQ_SET_CI_SIZE,
+				dev->eq_regs.arbel.eq_set_ci_base);
+		mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
+				      dev->fw.arbel.eq_arm_base) + 4, 4,
+				dev->eq_regs.arbel.eq_arm);
+		mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
+				dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
+				dev->clr_base);
+	} else {
+		mthca_unmap_reg(dev, MTHCA_ECR_BASE,
+				MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
+				dev->eq_regs.tavor.ecr_base);
+		mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
+				dev->clr_base);
+	}
+}
+
 int __devinit mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
 {
 	int ret;
@@ -636,6 +832,10 @@ int __devinit mthca_init_eq_table(struct
 	if (err)
 		return err;
 
+	err = mthca_map_eq_regs(dev);
+	if (err)
+		goto err_out_free;
+
 	if (dev->mthca_flags & MTHCA_FLAG_MSI ||
 	    dev->mthca_flags & MTHCA_FLAG_MSI_X) {
 		dev->eq_table.clr_mask = 0;
@@ -653,7 +853,7 @@ int __devinit mthca_init_eq_table(struct
 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr,
 			      &dev->eq_table.eq[MTHCA_EQ_COMP]);
 	if (err)
-		goto err_out_free;
+		goto err_out_unmap;
 
 	err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE,
 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr,
@@ -676,15 +876,20 @@ int __devinit mthca_init_eq_table(struct
 
 		for (i = 0; i < MTHCA_NUM_EQ; ++i) {
 			err = request_irq(dev->eq_table.eq[i].msi_x_vector,
-					  mthca_msi_x_interrupt, 0,
-					  eq_name[i], dev->eq_table.eq + i);
+					  dev->hca_type == ARBEL_NATIVE ?
+					  mthca_arbel_msi_x_interrupt :
+					  mthca_tavor_msi_x_interrupt,
+					  0, eq_name[i], dev->eq_table.eq + i);
 			if (err)
 				goto err_out_cmd;
 			dev->eq_table.eq[i].have_irq = 1;
 		}
 	} else {
-		err = request_irq(dev->pdev->irq, mthca_interrupt, SA_SHIRQ,
-				  DRV_NAME, dev);
+		err = request_irq(dev->pdev->irq,
+				  dev->hca_type == ARBEL_NATIVE ?
+				  mthca_arbel_interrupt :
+				  mthca_tavor_interrupt,
+				  SA_SHIRQ, DRV_NAME, dev);
 		if (err)
 			goto err_out_cmd;
 		dev->eq_table.have_irq = 1;
@@ -708,6 +913,12 @@ int __devinit mthca_init_eq_table(struct
 		mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n",
 			   dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);
 
+	for (i = 0; i < MTHCA_EQ_CMD; ++i)
+		if (dev->hca_type == ARBEL_NATIVE)
+			arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
+		else
+			tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
+
 	return 0;
 
 err_out_cmd:
@@ -720,6 +931,9 @@ err_out_async:
 err_out_comp:
 	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_COMP]);
 
+err_out_unmap:
+	mthca_unmap_eq_regs(dev);
+
 err_out_free:
 	mthca_alloc_cleanup(&dev->eq_table.alloc);
 	return err;
@@ -740,5 +954,7 @@ void __devexit mthca_cleanup_eq_table(st
 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
 		mthca_free_eq(dev, &dev->eq_table.eq[i]);
 
+	mthca_unmap_eq_regs(dev);
+
 	mthca_alloc_cleanup(&dev->eq_table.alloc);
 }
Index: hw/mthca/mthca_av.c
===================================================================
--- hw/mthca/mthca_av.c	(revision 1701)
+++ hw/mthca/mthca_av.c	(working copy)
@@ -60,27 +60,34 @@ int mthca_create_ah(struct mthca_dev *de
 	u32 index = -1;
 	struct mthca_av *av = NULL;
 
-	ah->on_hca = 0;
+	ah->type = MTHCA_AH_PCI_POOL;
 
-	if (!atomic_read(&pd->sqp_count) &&
-	    !(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
+	if (dev->hca_type == ARBEL_NATIVE) {
+		ah->av   = kmalloc(sizeof *ah->av, GFP_KERNEL);
+		if (!ah->av)
+			return -ENOMEM;
+
+		ah->type = MTHCA_AH_KMALLOC;
+		av       = ah->av;
+	} else if (!atomic_read(&pd->sqp_count) &&
+		 !(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
 		index = mthca_alloc(&dev->av_table.alloc);
 
 		/* fall back to allocate in host memory */
 		if (index == -1)
-			goto host_alloc;
+			goto on_hca_fail;
 
 		av = kmalloc(sizeof *av, GFP_KERNEL);
 		if (!av)
-			goto host_alloc;
+			goto on_hca_fail;
 
-		ah->on_hca = 1;
+		ah->type = MTHCA_AH_ON_HCA;
 		ah->avdma  = dev->av_table.ddr_av_base +
 			index * MTHCA_AV_SIZE;
 	}
 
- host_alloc:
-	if (!ah->on_hca) {
+on_hca_fail:
+	if (ah->type == MTHCA_AH_PCI_POOL) {
 		ah->av = pci_pool_alloc(dev->av_table.pool,
 					SLAB_KERNEL, &ah->avdma);
 		if (!ah->av)
@@ -123,7 +130,7 @@ int mthca_create_ah(struct mthca_dev *de
 			       j * 4, be32_to_cpu(((u32 *) av)[j]));
 	}
 
-	if (ah->on_hca) {
+	if (ah->type == MTHCA_AH_ON_HCA) {
 		memcpy_toio(dev->av_table.av_map + index * MTHCA_AV_SIZE,
 			    av, MTHCA_AV_SIZE);
 		kfree(av);
@@ -134,12 +141,21 @@ int mthca_create_ah(struct mthca_dev *de
 
 int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah)
 {
-	if (ah->on_hca)
+	switch (ah->type) {
+	case MTHCA_AH_ON_HCA:
 		mthca_free(&dev->av_table.alloc,
  			   (ah->avdma - dev->av_table.ddr_av_base) /
 			   MTHCA_AV_SIZE);
-	else
+		break;
+
+	case MTHCA_AH_PCI_POOL:
 		pci_pool_free(dev->av_table.pool, ah->av, ah->avdma);
+		break;
+
+	case MTHCA_AH_KMALLOC:
+		kfree(ah->av);
+		break;
+	}
 
 	return 0;
 }
@@ -147,7 +163,7 @@ int mthca_destroy_ah(struct mthca_dev *d
 int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
 		  struct ib_ud_header *header)
 {
-	if (ah->on_hca)
+	if (ah->type == MTHCA_AH_ON_HCA)
 		return -EINVAL;
 
 	header->lrh.service_level   = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
@@ -176,6 +192,9 @@ int __devinit mthca_init_av_table(struct
 {
 	int err;
 
+	if (dev->hca_type == ARBEL_NATIVE)
+		return 0;
+
 	err = mthca_alloc_init(&dev->av_table.alloc,
 			       dev->av_table.num_ddr_avs,
 			       dev->av_table.num_ddr_avs - 1,
@@ -212,6 +231,9 @@ int __devinit mthca_init_av_table(struct
 
 void __devexit mthca_cleanup_av_table(struct mthca_dev *dev)
 {
+	if (dev->hca_type == ARBEL_NATIVE)
+		return;
+
 	if (dev->av_table.av_map)
 		iounmap(dev->av_table.av_map);
 	pci_pool_destroy(dev->av_table.pool);
Index: hw/mthca/mthca_config_reg.h
===================================================================
--- hw/mthca/mthca_config_reg.h	(revision 1701)
+++ hw/mthca/mthca_config_reg.h	(working copy)
@@ -46,5 +46,6 @@
 #define MTHCA_MAP_ECR_SIZE     (MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE)
 #define MTHCA_CLR_INT_BASE     0xf00d8
 #define MTHCA_CLR_INT_SIZE     0x00008
+#define MTHCA_EQ_SET_CI_SIZE   (8 * 32)
 
 #endif /* MTHCA_CONFIG_REG_H */
Index: hw/mthca/mthca_doorbell.h
===================================================================
--- hw/mthca/mthca_doorbell.h	(revision 1701)
+++ hw/mthca/mthca_doorbell.h	(working copy)
@@ -57,6 +57,11 @@ static inline void mthca_write64(u32 val
 	__raw_writeq(*(u64 *) val, dest);
 }
 
+static inline void mthca_write_db_rec(u32 val[2], u32 *db)
+{
+	*(u64 *) db = *(u64 *) val;
+}
+
 #else
 
 /*
@@ -80,4 +85,11 @@ static inline void mthca_write64(u32 val
 	spin_unlock_irqrestore(doorbell_lock, flags);
 }
 
+static inline void mthca_write_db_rec(u32 val[2], u32 *db)
+{
+	db[0] = val[0];
+	wmb();
+	db[1] = val[1];
+}
+
 #endif
Index: hw/mthca/mthca_mr.c
===================================================================
--- hw/mthca/mthca_mr.c	(revision 1701)
+++ hw/mthca/mthca_mr.c	(working copy)
@@ -53,7 +53,8 @@ struct mthca_mpt_entry {
 	u32 window_count;
 	u32 window_count_limit;
 	u64 mtt_seg;
-	u32 reserved[3];
+	u32 mtt_sz;		/* Arbel only */
+	u32 reserved[2];
 } __attribute__((packed));
 
 #define MTHCA_MPT_FLAG_SW_OWNS       (0xfUL << 28)
@@ -121,21 +122,38 @@ static void mthca_free_mtt(struct mthca_
 	spin_unlock(&dev->mr_table.mpt_alloc.lock);
 }
 
+static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
+{
+	if (dev->hca_type == ARBEL_NATIVE)
+		return (ind >> 24) | (ind << 8);
+	else
+		return ind;
+}
+
+static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
+{
+	if (dev->hca_type == ARBEL_NATIVE)
+		return (key << 24) | (key >> 8);
+	else
+		return key;
+}
+
 int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
 			   u32 access, struct mthca_mr *mr)
 {
 	void *mailbox;
 	struct mthca_mpt_entry *mpt_entry;
+	u32 key;
 	int err;
 	u8 status;
 
 	might_sleep();
 
 	mr->order = -1;
-	mr->ibmr.lkey = mthca_alloc(&dev->mr_table.mpt_alloc);
-	if (mr->ibmr.lkey == -1)
+	key = mthca_alloc(&dev->mr_table.mpt_alloc);
+	if (key == -1)
 		return -ENOMEM;
-	mr->ibmr.rkey = mr->ibmr.lkey;
+	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
 
 	mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
 			  GFP_KERNEL);
@@ -151,7 +169,7 @@ int mthca_mr_alloc_notrans(struct mthca_
 				       MTHCA_MPT_FLAG_REGION      |
 				       access);
 	mpt_entry->page_size = 0;
-	mpt_entry->key       = cpu_to_be32(mr->ibmr.lkey);
+	mpt_entry->key       = cpu_to_be32(key);
 	mpt_entry->pd        = cpu_to_be32(pd);
 	mpt_entry->start     = 0;
 	mpt_entry->length    = ~0ULL;
@@ -160,7 +178,7 @@ int mthca_mr_alloc_notrans(struct mthca_
 	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
 
 	err = mthca_SW2HW_MPT(dev, mpt_entry,
-			      mr->ibmr.lkey & (dev->limits.num_mpts - 1),
+			      key & (dev->limits.num_mpts - 1),
 			      &status);
 	if (err)
 		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
@@ -182,6 +200,7 @@ int mthca_mr_alloc_phys(struct mthca_dev
 	void *mailbox;
 	u64 *mtt_entry;
 	struct mthca_mpt_entry *mpt_entry;
+	u32 key;
 	int err = -ENOMEM;
 	u8 status;
 	int i;
@@ -189,10 +208,10 @@ int mthca_mr_alloc_phys(struct mthca_dev
 	might_sleep();
 	WARN_ON(buffer_size_shift >= 32);
 
-	mr->ibmr.lkey = mthca_alloc(&dev->mr_table.mpt_alloc);
-	if (mr->ibmr.lkey == -1)
+	key = mthca_alloc(&dev->mr_table.mpt_alloc);
+	if (key == -1)
 		return -ENOMEM;
-	mr->ibmr.rkey = mr->ibmr.lkey;
+	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
 
 	for (i = dev->limits.mtt_seg_size / 8, mr->order = 0;
 	     i < list_len;
@@ -254,7 +273,7 @@ int mthca_mr_alloc_phys(struct mthca_dev
 				       access);
 
 	mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
-	mpt_entry->key       = cpu_to_be32(mr->ibmr.lkey);
+	mpt_entry->key       = cpu_to_be32(key);
 	mpt_entry->pd        = cpu_to_be32(pd);
 	mpt_entry->start     = cpu_to_be64(iova);
 	mpt_entry->length    = cpu_to_be64(total_size);
@@ -264,7 +283,7 @@ int mthca_mr_alloc_phys(struct mthca_dev
 					   mr->first_seg * dev->limits.mtt_seg_size);
 
 	if (0) {
-		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
+		mthca_dbg(dev, "Dumping MPT entry %08x:\n", key);
 		for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
 			if (i % 4 == 0)
 				printk("[%02x] ", i * 4);
@@ -275,7 +294,7 @@ int mthca_mr_alloc_phys(struct mthca_dev
 	}
 
 	err = mthca_SW2HW_MPT(dev, mpt_entry,
-			      mr->ibmr.lkey & (dev->limits.num_mpts - 1),
+			      key & (dev->limits.num_mpts - 1),
 			      &status);
 	if (err)
 		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
@@ -307,7 +326,8 @@ void mthca_free_mr(struct mthca_dev *dev
 	might_sleep();
 
 	err = mthca_HW2SW_MPT(dev, NULL,
-			      mr->ibmr.lkey & (dev->limits.num_mpts - 1),
+			      key_to_hw_index(dev, mr->ibmr.lkey) &
+			      (dev->limits.num_mpts - 1),
 			      &status);
 	if (err)
 		mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err);
@@ -318,7 +338,7 @@ void mthca_free_mr(struct mthca_dev *dev
 	if (mr->order >= 0)
 		mthca_free_mtt(dev, mr->first_seg, mr->order);
 
-	mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
+	mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, mr->ibmr.lkey));
 }
 
 int __devinit mthca_init_mr_table(struct mthca_dev *dev)
Index: hw/mthca/mthca_qp.c
===================================================================
--- hw/mthca/mthca_qp.c	(revision 1701)
+++ hw/mthca/mthca_qp.c	(working copy)
@@ -40,6 +40,7 @@
 
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
+#include "mthca_memfree.h"
 
 enum {
 	MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
@@ -105,8 +106,11 @@ struct mthca_qp_path {
 
 struct mthca_qp_context {
 	u32 flags;
-	u32 sched_queue;
-	u32 mtu_msgmax;
+	u32 tavor_sched_queue;	/* Reserved on Arbel */
+	u8  mtu_msgmax;
+	u8  rq_size_stride;	/* Reserved on Tavor */
+	u8  sq_size_stride;	/* Reserved on Tavor */
+	u8  rlkey_arbel_sched_queue;	/* Reserved on Tavor */
 	u32 usr_page;
 	u32 local_qpn;
 	u32 remote_qpn;
@@ -121,18 +125,22 @@ struct mthca_qp_context {
 	u32 reserved2;
 	u32 next_send_psn;
 	u32 cqn_snd;
-	u32 next_snd_wqe[2];
+	u32 snd_wqe_base_l;	/* Next send WQE on Tavor */
+	u32 snd_db_index;	/* (debugging only entries) */
 	u32 last_acked_psn;
 	u32 ssn;
 	u32 params2;
 	u32 rnr_nextrecvpsn;
 	u32 ra_buff_indx;
 	u32 cqn_rcv;
-	u32 next_rcv_wqe[2];
+	u32 rcv_wqe_base_l;	/* Next recv WQE on Tavor */
+	u32 rcv_db_index;	/* (debugging only entries) */
 	u32 qkey;
 	u32 srqn;
 	u32 rmsn;
-	u32 reserved3[19];
+	u16 rq_wqe_counter;	/* reserved on Tavor */
+	u16 sq_wqe_counter;	/* reserved on Tavor */
+	u32 reserved3[18];
 } __attribute__((packed));
 
 struct mthca_qp_param {
@@ -617,15 +625,24 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
 			break;
 		}
 	}
-	/* leave sched_queue as 0 */
+
+	/* leave tavor_sched_queue as 0 */
+
 	if (qp->transport == MLX || qp->transport == UD)
-		qp_context->mtu_msgmax = cpu_to_be32((IB_MTU_2048 << 29) |
-						     (11 << 24));
-	else if (attr_mask & IB_QP_PATH_MTU) {
-		qp_context->mtu_msgmax = cpu_to_be32((attr->path_mtu << 29) |
-						     (31 << 24));
+		qp_context->mtu_msgmax = (IB_MTU_2048 << 5) | 11;
+	else if (attr_mask & IB_QP_PATH_MTU)
+		qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31;
+
+	if (dev->hca_type == ARBEL_NATIVE) {
+		qp_context->rq_size_stride =
+			((ffs(qp->rq.max) - 1) << 3) | (qp->rq.wqe_shift - 4);
+		qp_context->sq_size_stride =
+			((ffs(qp->sq.max) - 1) << 3) | (qp->sq.wqe_shift - 4);
 	}
-	qp_context->usr_page   = cpu_to_be32(MTHCA_KAR_PAGE);
+
+	/* leave arbel_sched_queue as 0 */
+
+	qp_context->usr_page   = cpu_to_be32(dev->driver_uar.index);
 	qp_context->local_qpn  = cpu_to_be32(qp->qpn);
 	if (attr_mask & IB_QP_DEST_QPN) {
 		qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
@@ -708,6 +725,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
 		qp_context->next_send_psn = cpu_to_be32(attr->sq_psn);
 	qp_context->cqn_snd = cpu_to_be32(to_mcq(ibqp->send_cq)->cqn);
 
+	if (dev->hca_type == ARBEL_NATIVE) {
+		qp_context->snd_wqe_base_l = cpu_to_be32(qp->send_wqe_offset);
+		qp_context->snd_db_index   = cpu_to_be32(qp->sq.db_index);
+	}
+
 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
 		/*
 		 * Only enable RDMA/atomics if we have responder
@@ -787,12 +809,16 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
 	if (attr_mask & IB_QP_RQ_PSN)
 		qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
 
-	qp_context->ra_buff_indx = dev->qp_table.rdb_base +
-		((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE <<
-		 dev->qp_table.rdb_shift);
+	qp_context->ra_buff_indx =
+		cpu_to_be32(dev->qp_table.rdb_base +
+			    ((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE <<
+			     dev->qp_table.rdb_shift));
 
 	qp_context->cqn_rcv = cpu_to_be32(to_mcq(ibqp->recv_cq)->cqn);
 
+	if (dev->hca_type == ARBEL_NATIVE)
+		qp_context->rcv_db_index   = cpu_to_be32(qp->rq.db_index);
+
 	if (attr_mask & IB_QP_QKEY) {
 		qp_context->qkey = cpu_to_be32(attr->qkey);
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY);
@@ -980,7 +1006,7 @@ static int mthca_alloc_qp_common(struct 
 				 enum ib_sig_type recv_policy,
 				 struct mthca_qp *qp)
 {
-	int err;
+	int ret;
 
 	spin_lock_init(&qp->lock);
 	atomic_set(&qp->refcount, 1);
@@ -998,8 +1024,73 @@ static int mthca_alloc_qp_common(struct 
 	qp->rq.last      = NULL;
 	qp->sq.last      = NULL;
 
-	err = mthca_alloc_wqe_buf(dev, pd, qp);
-	return err;
+	if (dev->hca_type == ARBEL_NATIVE) {
+		ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
+		if (ret)
+			goto err;
+
+		ret = mthca_table_get(dev, dev->qp_table.eqp_table, qp->qpn);
+		if (ret)
+			goto err_qpc;
+
+		qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
+						 qp->qpn, &qp->rq.db);
+		if (qp->rq.db_index < 0) {
+			ret = -ENOMEM;
+			goto err_eqpc;
+		}
+
+		qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
+						 qp->qpn, &qp->sq.db);
+		if (qp->sq.db_index < 0) {
+			ret = -ENOMEM;
+			goto err_rq_db;
+		}
+	}
+
+	ret = mthca_alloc_wqe_buf(dev, pd, qp);
+
+	if (ret) {
+		if (dev->hca_type == ARBEL_NATIVE)
+			goto err_sq_db;
+		else
+			return ret;
+	}
+
+	return 0;
+
+err_sq_db:
+	mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
+
+err_rq_db:
+	mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
+
+err_eqpc:
+	mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
+
+err_qpc:
+	mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
+
+err:
+	return ret;
+}
+
+static void mthca_align_qp_size(struct mthca_dev *dev, struct mthca_qp *qp)
+{
+	int i;
+
+	if (dev->hca_type != ARBEL_NATIVE)
+		return;
+
+	for (i = 0; 1 << i < qp->rq.max; ++i)
+		; /* nothing */
+
+	qp->rq.max = 1 << i;
+
+	for (i = 0; 1 << i < qp->sq.max; ++i)
+		; /* nothing */
+
+	qp->sq.max = 1 << i;
 }
 
 int mthca_alloc_qp(struct mthca_dev *dev,
@@ -1013,6 +1104,8 @@ int mthca_alloc_qp(struct mthca_dev *dev
 {
 	int err;
 
+	mthca_align_qp_size(dev, qp);
+
 	switch (type) {
 	case IB_QPT_RC: qp->transport = RC; break;
 	case IB_QPT_UC: qp->transport = UC; break;
@@ -1052,6 +1145,8 @@ int mthca_alloc_sqp(struct mthca_dev *de
 	int err = 0;
 	u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
 
+	mthca_align_qp_size(dev, &sqp->qp);
+
 	sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;
 	sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,
 					     &sqp->header_dma, GFP_KERNEL);
@@ -1165,14 +1260,20 @@ void mthca_free_qp(struct mthca_dev *dev
 
 	kfree(qp->wrid);
 
+	if (dev->hca_type == ARBEL_NATIVE) {
+		mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
+		mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
+		mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
+		mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
+	}
+
 	if (is_sqp(dev, qp)) {
 		atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
 		dma_free_coherent(&dev->pdev->dev,
 				  to_msqp(qp)->header_buf_size,
 				  to_msqp(qp)->header_buf,
 				  to_msqp(qp)->header_dma);
-	}
-	else
+	} else
 		mthca_free(&dev->qp_table.alloc, qp->qpn);
 }
 
@@ -1245,8 +1346,8 @@ static int build_mlx_header(struct mthca
 	return 0;
 }
 
-int mthca_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		    struct ib_send_wr **bad_wr)
+int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+			  struct ib_send_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
@@ -1459,8 +1560,8 @@ out:
 	return err;
 }
 
-int mthca_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		       struct ib_recv_wr **bad_wr)
+int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+			     struct ib_recv_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
@@ -1558,6 +1659,18 @@ out:
 	return err;
 }
 
+int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+			  struct ib_send_wr **bad_wr)
+{
+	return 0;
+}
+
+int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+			     struct ib_recv_wr **bad_wr)
+{
+	return 0;
+}
+
 int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,
 		       int index, int *dbd, u32 *new_wqe)
 {
Index: hw/mthca/Makefile
===================================================================
--- hw/mthca/Makefile	(revision 1701)
+++ hw/mthca/Makefile	(working copy)
@@ -9,4 +9,4 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mth
 ib_mthca-y :=	mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
 		mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \
 		mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \
-		mthca_provider.o mthca_memfree.o
+		mthca_provider.o mthca_memfree.o mthca_uar.o
Index: hw/mthca/mthca_memfree.c
===================================================================
--- hw/mthca/mthca_memfree.c	(revision 1701)
+++ hw/mthca/mthca_memfree.c	(working copy)
@@ -79,6 +79,7 @@ struct mthca_icm *mthca_alloc_icm(struct
 	if (!icm)
 		return icm;
 
+	icm->refcount = 0;
 	INIT_LIST_HEAD(&icm->chunk_list);
 
 	cur_order = get_order(MTHCA_ICM_ALLOC_SIZE);
@@ -138,9 +139,62 @@ fail:
 	return NULL;
 }
 
+int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
+{
+	int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
+	int ret = 0;
+	u8 status;
+
+	down(&table->mutex);
+
+	if (table->icm[i]) {
+		++table->icm[i]->refcount;
+		goto out;
+	}
+
+	table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
+					(table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
+					__GFP_NOWARN);
+	if (!table->icm[i]) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
+			  &status) || status) {
+		mthca_free_icm(dev, table->icm[i]);
+		table->icm[i] = NULL;
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	++table->icm[i]->refcount;
+
+out:
+	up(&table->mutex);
+	return ret;
+}
+
+void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
+{
+	int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
+	u8 status;
+
+	down(&table->mutex);
+
+	if (--table->icm[i]->refcount == 0) {
+		mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
+				MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
+		mthca_free_icm(dev, table->icm[i]);
+		table->icm[i] = NULL;
+	}
+
+	up(&table->mutex);
+}
+
 struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
-					      u64 virt, unsigned size,
-					      unsigned reserved,
+					      u64 virt, int obj_size,
+					      int nobj, int reserved,
 					      int use_lowmem)
 {
 	struct mthca_icm_table *table;
@@ -148,20 +202,23 @@ struct mthca_icm_table *mthca_alloc_icm_
 	int i;
 	u8 status;
 
-	num_icm = size / MTHCA_TABLE_CHUNK_SIZE;
+	num_icm = obj_size * nobj / MTHCA_TABLE_CHUNK_SIZE;
 
 	table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL);
 	if (!table)
 		return NULL;
 
-	table->virt    = virt;
-	table->num_icm = num_icm;
-	init_MUTEX(&table->sem);
+	table->virt     = virt;
+	table->num_icm  = num_icm;
+	table->num_obj  = nobj;
+	table->obj_size = obj_size;
+	table->lowmem   = use_lowmem;
+	init_MUTEX(&table->mutex);
 
 	for (i = 0; i < num_icm; ++i)
 		table->icm[i] = NULL;
 
-	for (i = 0; i < (reserved + MTHCA_TABLE_CHUNK_SIZE - 1) / MTHCA_TABLE_CHUNK_SIZE; ++i) {
+	for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
 		table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
 						(use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
 						__GFP_NOWARN);
@@ -173,6 +230,12 @@ struct mthca_icm_table *mthca_alloc_icm_
 			table->icm[i] = NULL;
 			goto err;
 		}
+
+		/*
+		 * Add a reference to this ICM chunk so that it never
+		 * gets freed (since it contains reserved firmware objects).
+		 */
+		++table->icm[i]->refcount;
 	}
 
 	return table;
@@ -204,3 +267,199 @@ void mthca_free_icm_table(struct mthca_d
 
 	kfree(table);
 }
+
+static u64 mthca_uarc_virt(struct mthca_dev *dev, int page)
+{
+	return dev->uar_table.uarc_base +
+		dev->driver_uar.index * dev->uar_table.uarc_size +
+		page * 4096;
+}
+
+int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
+{
+	int group;
+	int start, end, dir;
+	int i, j;
+	struct mthca_db_page *page;
+	int ret = 0;
+	u8 status;
+
+	down(&dev->db_tab->mutex);
+
+	switch (type) {
+	case MTHCA_DB_TYPE_CQ_ARM:
+	case MTHCA_DB_TYPE_SQ:
+		group = 0;
+		start = 0;
+		end   = dev->db_tab->max_group1;
+		dir   = 1;
+		break;
+
+	case MTHCA_DB_TYPE_CQ_SET_CI:
+	case MTHCA_DB_TYPE_RQ:
+	case MTHCA_DB_TYPE_SRQ:
+		group = 1;
+		start = dev->db_tab->npages - 1;
+		end   = dev->db_tab->min_group2;
+		dir   = -1;
+		break;
+
+	default:
+		return -1;
+	}
+	
+	for (i = start; i != end; i += dir)
+		if (dev->db_tab->page[i].db_rec &&
+		    !bitmap_full(dev->db_tab->page[i].used,
+				 MTHCA_DB_REC_PER_PAGE)) {
+			page = dev->db_tab->page + i;
+			goto found;
+		}
+
+	if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	page = dev->db_tab->page + end;
+	page->db_rec = dma_alloc_coherent(&dev->pdev->dev, 4096,
+					  &page->mapping, GFP_KERNEL);
+	if (!page->db_rec) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	memset(page->db_rec, 0, 4096);
+
+	ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, i), &status);
+	if (!ret && status)
+		ret = -EINVAL;
+	if (ret) {
+		dma_free_coherent(&dev->pdev->dev, 4096,
+				  page->db_rec, page->mapping);
+		goto out;
+	}
+
+	bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE);
+	if (group == 0)
+		++dev->db_tab->max_group1;
+	else
+		--dev->db_tab->min_group2;
+
+found:
+	j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE);
+	set_bit(j, page->used);
+
+	if (group == 1)
+		j = MTHCA_DB_REC_PER_PAGE - 1 - j;
+
+	ret = i * MTHCA_DB_REC_PER_PAGE + j;
+
+	page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5));
+	
+	*db = (u32 *) &page->db_rec[j];
+
+out:
+	up(&dev->db_tab->mutex);
+
+	return ret;
+}
+
+void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
+{
+	int i, j;
+	struct mthca_db_page *page;
+	u8 status;
+
+	i = db_index / MTHCA_DB_REC_PER_PAGE;
+	j = db_index % MTHCA_DB_REC_PER_PAGE;
+
+	page = dev->db_tab->page + i;
+
+	down(&dev->db_tab->mutex);
+
+	page->db_rec[j] = 0;
+	if (i >= dev->db_tab->min_group2)
+		j = MTHCA_DB_REC_PER_PAGE - 1 - j;
+	clear_bit(j, page->used);
+
+	if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
+	    i >= dev->db_tab->max_group1 - 1) {
+		mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
+		
+		dma_free_coherent(&dev->pdev->dev, 4096,
+				  page->db_rec, page->mapping);
+		page->db_rec = NULL;
+
+		if (i == dev->db_tab->max_group1) {
+			--dev->db_tab->max_group1;
+			/* XXX may be able to unmap more pages now */
+		}
+		if (i == dev->db_tab->min_group2)
+			++dev->db_tab->min_group2;
+	}
+
+	up(&dev->db_tab->mutex);
+}
+
+int mthca_init_db_tab(struct mthca_dev *dev)
+{
+	int i;
+
+	if (dev->hca_type != ARBEL_NATIVE)
+		return 0;
+
+	dev->db_tab = kmalloc(sizeof *dev->db_tab, GFP_KERNEL);
+	if (!dev->db_tab)
+		return -ENOMEM;
+
+	init_MUTEX(&dev->db_tab->mutex);
+
+	dev->db_tab->npages     = dev->uar_table.uarc_size / PAGE_SIZE;
+	dev->db_tab->max_group1 = 0;
+	dev->db_tab->min_group2 = dev->db_tab->npages - 1;
+
+	dev->db_tab->page = kmalloc(dev->db_tab->npages *
+				    sizeof *dev->db_tab->page,
+				    GFP_KERNEL);
+	if (!dev->db_tab->page) {
+		kfree(dev->db_tab);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < dev->db_tab->npages; ++i)
+		dev->db_tab->page[i].db_rec = NULL;
+
+	return 0;
+}
+
+void mthca_cleanup_db_tab(struct mthca_dev *dev)
+{
+	int i;
+	u8 status;
+
+	if (dev->hca_type != ARBEL_NATIVE)
+		return;
+
+	/*
+	 * Because we don't always free our UARC pages when they
+	 * become empty to make mthca_free_db() simpler we need to
+	 * make a sweep through the doorbell pages and free any
+	 * leftover pages now.
+	 */
+	for (i = 0; i < dev->db_tab->npages; ++i) {
+		if (!dev->db_tab->page[i].db_rec)
+			continue;
+
+		if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
+			mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
+
+		mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
+		
+		dma_free_coherent(&dev->pdev->dev, 4096,
+				  dev->db_tab->page[i].db_rec,
+				  dev->db_tab->page[i].mapping);
+	}
+
+	kfree(dev->db_tab->page);
+	kfree(dev->db_tab);
+}


-- 
MST - Michael S. Tsirkin



More information about the general mailing list