[ofw] [patch][mlx4] enlarge the bus driver internal limitation on the system memory size from 256 GB to 8 TB

Fab Tillier ftillier at microsoft.com
Sun Dec 12 21:14:12 PST 2010


Note that the whole cache can go away - the memory corruption when probing pages for write access has been fixed, and you can just call MmProbeAndLockPages.

-Fab

From: ofw-bounces at lists.openfabrics.org [mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Leonid Keller
Sent: Sunday, December 12, 2010 6:02 AM
To: 'ofw at lists.openfabrics.org'
Subject: [ofw] [patch][mlx4] enlarge the bus driver internal limitation on the system memory size from 256 GB to 8 TB

The bus driver memory registration mechanism is using internal cash that sets some limitation on the system memory size.
Till now it was 256 GB, but a customer of us has machines with up to 1 TB system memory...

Index: hw/mlx4/kernel/bus/core/pa_cash.c
===================================================================
--- hw/mlx4/kernel/bus/core/pa_cash.c               (revision 3023)
+++ hw/mlx4/kernel/bus/core/pa_cash.c            (working copy)
@@ -50,9 +50,10 @@
 ///////////////////////////////////////////////////////////////////////////

 #ifdef _WIN64
-#define MAX_PAGES_SUPPORTED        (64 * 1024 * 1024)                            // 256 GB
+// be careful with setting it >= 4G. Compiler puts it into an integer, so 4*1024*1024*1024 = 0 !!!
+#define MAX_PAGES_SUPPORTED       ((u32)2 * 1024 * 1024 * 1024)                                                                      // 8 TB
 #else
-#define MAX_PAGES_SUPPORTED        (16 * 1024 * 1024)                            // 64 GB
+#define MAX_PAGES_SUPPORTED       ((u32)16 * 1024 * 1024)                                                                                                                 // 64 GB
 #endif

 #define FREE_LIST_TRESHOLD                  256                         // max number of pages in free list
@@ -63,13 +64,9 @@
 //
 ///////////////////////////////////////////////////////////////////////////

-#define PA_TABLE_ENTRY_SIZE              sizeof(pa_table_entry_t)
-#define PA_TABLE_ENTRY_NUM           (PAGE_SIZE / PA_TABLE_ENTRY_SIZE)
-#define PA_TABLE_SIZE                                              (PA_TABLE_ENTRY_SIZE * PA_TABLE_ENTRY_NUM)
+#define PA_TABLE_ENTRY_SIZE                             sizeof(pa_table_entry_t)                                                                             // 4

-#define PA_DIR_ENTRY_SIZE                   sizeof(pa_dir_entry_t)
-#define PA_DIR_ENTRY_NUM                 (MAX_PAGES_SUPPORTED /PA_TABLE_ENTRY_NUM)
-#define PA_DIR_SIZE                                                   (PA_DIR_ENTRY_SIZE * PA_DIR_ENTRY_NUM)
+#define PA_DIR_ENTRY_SIZE                  sizeof(pa_dir_entry_t)                                                                                  // 16 for x64


 ///////////////////////////////////////////////////////////////////////////
@@ -107,6 +104,11 @@
 DEFINE_MUTEX(g_pa_mutex);
 u64 g_pa[1024];
 pa_cash_t g_cash;
+u32 g_max_pages_supported = 0;
+u32 g_pa_table_entry_num = 0;
+u32 g_pa_table_size = 0;
+u32 g_pa_dir_entry_num = 0;
+u32 g_pa_dir_size = 0;


 ///////////////////////////////////////////////////////////////////////////
@@ -133,7 +135,7 @@
                                g_cash.free_nr_pages--;
                }
                else  /* allocate new page */
-                              pa_te = (pa_table_entry_t *)kzalloc( PA_TABLE_SIZE, GFP_KERNEL );
+                             pa_te = (pa_table_entry_t *)kzalloc( g_pa_table_size, GFP_KERNEL );

                return pa_te;
 }
@@ -150,15 +152,15 @@

 static pa_table_entry_t * pa_get_page(uint32_t ix)
 {
-              pa_table_entry_t *pa_te =  g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te;
+             pa_table_entry_t *pa_te =  g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te;

                /* no this page_table - add a new one */
                if (!pa_te) {
                                pa_te = pa_alloc_page();
                                if (!pa_te)
                                                return NULL;
-                              g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = pa_te;
-                              g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used = 0;
+                             g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te = pa_te;
+                             g_cash.pa_dir[ix / g_pa_table_entry_num].used = 0;
                                g_cash.cur_nr_pages++;
                }

@@ -167,8 +169,8 @@

 static void pa_put_page(uint32_t ix)
 {
-              pa_free_page(g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te);
-              g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = NULL;
+             pa_free_page(g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te);
+             g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te = NULL;
                g_cash.cur_nr_pages--;
 }

@@ -189,9 +191,9 @@
                                return -ENOMEM;

                /* register page address */
-              if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt)
-                              ++g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used;
-              ++pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt;
+             if (!pa_te[ix % g_pa_table_entry_num].ref_cnt)
+                             ++g_cash.pa_dir[ix / g_pa_table_entry_num].used;
+             ++pa_te[ix % g_pa_table_entry_num].ref_cnt;

                return 0;
 }
@@ -208,7 +210,7 @@
                                return -EFAULT;
                }

-              pa_te =  g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te;
+             pa_te =  g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te;

                /* no this page_table - error*/
                if (!pa_te)  {
@@ -217,13 +219,13 @@
                }

                /* deregister page address */
-              --pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt;
-              ASSERT(pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt >= 0);
+             --pa_te[ix % g_pa_table_entry_num].ref_cnt;
+             ASSERT(pa_te[ix % g_pa_table_entry_num].ref_cnt >= 0);

                /* release the page on need */
-              if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt)
-                              --g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used;
-              if (!g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used)
+             if (!pa_te[ix % g_pa_table_entry_num].ref_cnt)
+                             --g_cash.pa_dir[ix / g_pa_table_entry_num].used;
+             if (!g_cash.pa_dir[ix / g_pa_table_entry_num].used)
                                pa_put_page(ix);

                return 0;
@@ -301,7 +303,7 @@

 void pa_cash_release()
 {
-              int i;
+             u32 i;

                pa_cash_print();

@@ -309,7 +311,7 @@
                                return;

                /* free cash tables */
-              for (i=0; i<PA_DIR_ENTRY_NUM; ++i)
+             for (i=0; i<g_pa_dir_entry_num; ++i)
                                if (g_cash.pa_dir[i].pa_te) {
                                                kfree(g_cash.pa_dir[i].pa_te);
                                                g_cash.cur_nr_pages--;
@@ -338,24 +340,31 @@
                                return -EFAULT;
                }

-              pa_te =  g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te;
+             pa_te =  g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te;

                /* no this page_table */
                if (!pa_te)
                                return 0;

-              return pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt;
+             return pa_te[ix % g_pa_table_entry_num].ref_cnt;
 }

 int pa_cash_init()
 {
                void *pa_dir;
-              pa_dir = kzalloc(PA_DIR_SIZE, GFP_KERNEL);

+             g_max_pages_supported = MAX_PAGES_SUPPORTED;
+             g_pa_table_entry_num = PAGE_SIZE / PA_TABLE_ENTRY_SIZE;
+             g_pa_table_size = PA_TABLE_ENTRY_SIZE * g_pa_table_entry_num;
+             g_pa_dir_entry_num = g_max_pages_supported /g_pa_table_entry_num;
+             g_pa_dir_size = PA_DIR_ENTRY_SIZE * g_pa_dir_entry_num;
+
+             pa_dir = kzalloc(g_pa_dir_size, GFP_KERNEL);
+
                if (!pa_dir)
                                return -ENOMEM;
                g_cash.pa_dir = pa_dir;
-              g_cash.max_nr_pages = PA_TABLE_ENTRY_NUM * PA_DIR_ENTRY_NUM;
+             g_cash.max_nr_pages = g_pa_table_entry_num * g_pa_dir_entry_num;
                g_cash.free_list_hdr.Next = NULL;
                g_cash.cur_nr_pages = 0;
                g_cash.free_nr_pages = 0;
@@ -363,4 +372,4 @@
                mutex_init(&g_pa_mutex);
                return 0;
 }
-
+
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20101213/70489120/attachment.html>


More information about the ofw mailing list