[ofw] [patch][mlx4] enlarge the bus driver internal limitation on the system memory size from 256 GB to 8 TB
Leonid Keller
leonid at mellanox.co.il
Sun Dec 12 06:02:00 PST 2010
The bus driver memory registration mechanism is using internal cash that sets some limitation on the system memory size.
Till now it was 256 GB, but a customer of us has machines with up to 1 TB system memory...
Index: hw/mlx4/kernel/bus/core/pa_cash.c
===================================================================
--- hw/mlx4/kernel/bus/core/pa_cash.c (revision 3023)
+++ hw/mlx4/kernel/bus/core/pa_cash.c (working copy)
@@ -50,9 +50,10 @@
///////////////////////////////////////////////////////////////////////////
#ifdef _WIN64
-#define MAX_PAGES_SUPPORTED (64 * 1024 * 1024) // 256 GB
+// be careful with setting it >= 4G. Compiler puts it into an integer, so 4*1024*1024*1024 = 0 !!!
+#define MAX_PAGES_SUPPORTED ((u32)2 * 1024 * 1024 * 1024) // 8 TB
#else
-#define MAX_PAGES_SUPPORTED (16 * 1024 * 1024) // 64 GB
+#define MAX_PAGES_SUPPORTED ((u32)16 * 1024 * 1024) // 64 GB
#endif
#define FREE_LIST_TRESHOLD 256 // max number of pages in free list
@@ -63,13 +64,9 @@
//
///////////////////////////////////////////////////////////////////////////
-#define PA_TABLE_ENTRY_SIZE sizeof(pa_table_entry_t)
-#define PA_TABLE_ENTRY_NUM (PAGE_SIZE / PA_TABLE_ENTRY_SIZE)
-#define PA_TABLE_SIZE (PA_TABLE_ENTRY_SIZE * PA_TABLE_ENTRY_NUM)
+#define PA_TABLE_ENTRY_SIZE sizeof(pa_table_entry_t) // 4
-#define PA_DIR_ENTRY_SIZE sizeof(pa_dir_entry_t)
-#define PA_DIR_ENTRY_NUM (MAX_PAGES_SUPPORTED /PA_TABLE_ENTRY_NUM)
-#define PA_DIR_SIZE (PA_DIR_ENTRY_SIZE * PA_DIR_ENTRY_NUM)
+#define PA_DIR_ENTRY_SIZE sizeof(pa_dir_entry_t) // 16 for x64
///////////////////////////////////////////////////////////////////////////
@@ -107,6 +104,11 @@
DEFINE_MUTEX(g_pa_mutex);
u64 g_pa[1024];
pa_cash_t g_cash;
+u32 g_max_pages_supported = 0;
+u32 g_pa_table_entry_num = 0;
+u32 g_pa_table_size = 0;
+u32 g_pa_dir_entry_num = 0;
+u32 g_pa_dir_size = 0;
///////////////////////////////////////////////////////////////////////////
@@ -133,7 +135,7 @@
g_cash.free_nr_pages--;
}
else /* allocate new page */
- pa_te = (pa_table_entry_t *)kzalloc( PA_TABLE_SIZE, GFP_KERNEL );
+ pa_te = (pa_table_entry_t *)kzalloc( g_pa_table_size, GFP_KERNEL );
return pa_te;
}
@@ -150,15 +152,15 @@
static pa_table_entry_t * pa_get_page(uint32_t ix)
{
- pa_table_entry_t *pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te;
+ pa_table_entry_t *pa_te = g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te;
/* no this page_table - add a new one */
if (!pa_te) {
pa_te = pa_alloc_page();
if (!pa_te)
return NULL;
- g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = pa_te;
- g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used = 0;
+ g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te = pa_te;
+ g_cash.pa_dir[ix / g_pa_table_entry_num].used = 0;
g_cash.cur_nr_pages++;
}
@@ -167,8 +169,8 @@
static void pa_put_page(uint32_t ix)
{
- pa_free_page(g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te);
- g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = NULL;
+ pa_free_page(g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te);
+ g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te = NULL;
g_cash.cur_nr_pages--;
}
@@ -189,9 +191,9 @@
return -ENOMEM;
/* register page address */
- if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt)
- ++g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used;
- ++pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt;
+ if (!pa_te[ix % g_pa_table_entry_num].ref_cnt)
+ ++g_cash.pa_dir[ix / g_pa_table_entry_num].used;
+ ++pa_te[ix % g_pa_table_entry_num].ref_cnt;
return 0;
}
@@ -208,7 +210,7 @@
return -EFAULT;
}
- pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te;
+ pa_te = g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te;
/* no this page_table - error*/
if (!pa_te) {
@@ -217,13 +219,13 @@
}
/* deregister page address */
- --pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt;
- ASSERT(pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt >= 0);
+ --pa_te[ix % g_pa_table_entry_num].ref_cnt;
+ ASSERT(pa_te[ix % g_pa_table_entry_num].ref_cnt >= 0);
/* release the page on need */
- if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt)
- --g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used;
- if (!g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used)
+ if (!pa_te[ix % g_pa_table_entry_num].ref_cnt)
+ --g_cash.pa_dir[ix / g_pa_table_entry_num].used;
+ if (!g_cash.pa_dir[ix / g_pa_table_entry_num].used)
pa_put_page(ix);
return 0;
@@ -301,7 +303,7 @@
void pa_cash_release()
{
- int i;
+ u32 i;
pa_cash_print();
@@ -309,7 +311,7 @@
return;
/* free cash tables */
- for (i=0; i<PA_DIR_ENTRY_NUM; ++i)
+ for (i=0; i<g_pa_dir_entry_num; ++i)
if (g_cash.pa_dir[i].pa_te) {
kfree(g_cash.pa_dir[i].pa_te);
g_cash.cur_nr_pages--;
@@ -338,24 +340,31 @@
return -EFAULT;
}
- pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te;
+ pa_te = g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te;
/* no this page_table */
if (!pa_te)
return 0;
- return pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt;
+ return pa_te[ix % g_pa_table_entry_num].ref_cnt;
}
int pa_cash_init()
{
void *pa_dir;
- pa_dir = kzalloc(PA_DIR_SIZE, GFP_KERNEL);
+ g_max_pages_supported = MAX_PAGES_SUPPORTED;
+ g_pa_table_entry_num = PAGE_SIZE / PA_TABLE_ENTRY_SIZE;
+ g_pa_table_size = PA_TABLE_ENTRY_SIZE * g_pa_table_entry_num;
+ g_pa_dir_entry_num = g_max_pages_supported /g_pa_table_entry_num;
+ g_pa_dir_size = PA_DIR_ENTRY_SIZE * g_pa_dir_entry_num;
+
+ pa_dir = kzalloc(g_pa_dir_size, GFP_KERNEL);
+
if (!pa_dir)
return -ENOMEM;
g_cash.pa_dir = pa_dir;
- g_cash.max_nr_pages = PA_TABLE_ENTRY_NUM * PA_DIR_ENTRY_NUM;
+ g_cash.max_nr_pages = g_pa_table_entry_num * g_pa_dir_entry_num;
g_cash.free_list_hdr.Next = NULL;
g_cash.cur_nr_pages = 0;
g_cash.free_nr_pages = 0;
@@ -363,4 +372,4 @@
mutex_init(&g_pa_mutex);
return 0;
}
-
+
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20101212/73e5d7bd/attachment.html>
More information about the ofw
mailing list