[ofw] [patch][mlx4] enlarge the bus driver internal	limitation	on	the system memory size from 256 GB to 8 TB
    Fab Tillier 
    ftillier at microsoft.com
       
    Sun Dec 12 21:14:12 PST 2010
    
    
  
Note that the whole cache can go away - the memory corruption when probing pages for write access has been fixed, and you can just call MmProbeAndLockPages.
-Fab
From: ofw-bounces at lists.openfabrics.org [mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Leonid Keller
Sent: Sunday, December 12, 2010 6:02 AM
To: 'ofw at lists.openfabrics.org'
Subject: [ofw] [patch][mlx4] enlarge the bus driver internal limitation on the system memory size from 256 GB to 8 TB
The bus driver memory registration mechanism is using internal cash that sets some limitation on the system memory size.
Till now it was 256 GB, but a customer of us has machines with up to 1 TB system memory...
Index: hw/mlx4/kernel/bus/core/pa_cash.c
===================================================================
--- hw/mlx4/kernel/bus/core/pa_cash.c               (revision 3023)
+++ hw/mlx4/kernel/bus/core/pa_cash.c            (working copy)
@@ -50,9 +50,10 @@
 ///////////////////////////////////////////////////////////////////////////
 #ifdef _WIN64
-#define MAX_PAGES_SUPPORTED        (64 * 1024 * 1024)                            // 256 GB
+// be careful with setting it >= 4G. Compiler puts it into an integer, so 4*1024*1024*1024 = 0 !!!
+#define MAX_PAGES_SUPPORTED       ((u32)2 * 1024 * 1024 * 1024)                                                                      // 8 TB
 #else
-#define MAX_PAGES_SUPPORTED        (16 * 1024 * 1024)                            // 64 GB
+#define MAX_PAGES_SUPPORTED       ((u32)16 * 1024 * 1024)                                                                                                                 // 64 GB
 #endif
 #define FREE_LIST_TRESHOLD                  256                         // max number of pages in free list
@@ -63,13 +64,9 @@
 //
 ///////////////////////////////////////////////////////////////////////////
-#define PA_TABLE_ENTRY_SIZE              sizeof(pa_table_entry_t)
-#define PA_TABLE_ENTRY_NUM           (PAGE_SIZE / PA_TABLE_ENTRY_SIZE)
-#define PA_TABLE_SIZE                                              (PA_TABLE_ENTRY_SIZE * PA_TABLE_ENTRY_NUM)
+#define PA_TABLE_ENTRY_SIZE                             sizeof(pa_table_entry_t)                                                                             // 4
-#define PA_DIR_ENTRY_SIZE                   sizeof(pa_dir_entry_t)
-#define PA_DIR_ENTRY_NUM                 (MAX_PAGES_SUPPORTED /PA_TABLE_ENTRY_NUM)
-#define PA_DIR_SIZE                                                   (PA_DIR_ENTRY_SIZE * PA_DIR_ENTRY_NUM)
+#define PA_DIR_ENTRY_SIZE                  sizeof(pa_dir_entry_t)                                                                                  // 16 for x64
 ///////////////////////////////////////////////////////////////////////////
@@ -107,6 +104,11 @@
 DEFINE_MUTEX(g_pa_mutex);
 u64 g_pa[1024];
 pa_cash_t g_cash;
+u32 g_max_pages_supported = 0;
+u32 g_pa_table_entry_num = 0;
+u32 g_pa_table_size = 0;
+u32 g_pa_dir_entry_num = 0;
+u32 g_pa_dir_size = 0;
 ///////////////////////////////////////////////////////////////////////////
@@ -133,7 +135,7 @@
                                g_cash.free_nr_pages--;
                }
                else  /* allocate new page */
-                              pa_te = (pa_table_entry_t *)kzalloc( PA_TABLE_SIZE, GFP_KERNEL );
+                             pa_te = (pa_table_entry_t *)kzalloc( g_pa_table_size, GFP_KERNEL );
                return pa_te;
 }
@@ -150,15 +152,15 @@
 static pa_table_entry_t * pa_get_page(uint32_t ix)
 {
-              pa_table_entry_t *pa_te =  g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te;
+             pa_table_entry_t *pa_te =  g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te;
                /* no this page_table - add a new one */
                if (!pa_te) {
                                pa_te = pa_alloc_page();
                                if (!pa_te)
                                                return NULL;
-                              g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = pa_te;
-                              g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used = 0;
+                             g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te = pa_te;
+                             g_cash.pa_dir[ix / g_pa_table_entry_num].used = 0;
                                g_cash.cur_nr_pages++;
                }
@@ -167,8 +169,8 @@
 static void pa_put_page(uint32_t ix)
 {
-              pa_free_page(g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te);
-              g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = NULL;
+             pa_free_page(g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te);
+             g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te = NULL;
                g_cash.cur_nr_pages--;
 }
@@ -189,9 +191,9 @@
                                return -ENOMEM;
                /* register page address */
-              if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt)
-                              ++g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used;
-              ++pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt;
+             if (!pa_te[ix % g_pa_table_entry_num].ref_cnt)
+                             ++g_cash.pa_dir[ix / g_pa_table_entry_num].used;
+             ++pa_te[ix % g_pa_table_entry_num].ref_cnt;
                return 0;
 }
@@ -208,7 +210,7 @@
                                return -EFAULT;
                }
-              pa_te =  g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te;
+             pa_te =  g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te;
                /* no this page_table - error*/
                if (!pa_te)  {
@@ -217,13 +219,13 @@
                }
                /* deregister page address */
-              --pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt;
-              ASSERT(pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt >= 0);
+             --pa_te[ix % g_pa_table_entry_num].ref_cnt;
+             ASSERT(pa_te[ix % g_pa_table_entry_num].ref_cnt >= 0);
                /* release the page on need */
-              if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt)
-                              --g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used;
-              if (!g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used)
+             if (!pa_te[ix % g_pa_table_entry_num].ref_cnt)
+                             --g_cash.pa_dir[ix / g_pa_table_entry_num].used;
+             if (!g_cash.pa_dir[ix / g_pa_table_entry_num].used)
                                pa_put_page(ix);
                return 0;
@@ -301,7 +303,7 @@
 void pa_cash_release()
 {
-              int i;
+             u32 i;
                pa_cash_print();
@@ -309,7 +311,7 @@
                                return;
                /* free cash tables */
-              for (i=0; i<PA_DIR_ENTRY_NUM; ++i)
+             for (i=0; i<g_pa_dir_entry_num; ++i)
                                if (g_cash.pa_dir[i].pa_te) {
                                                kfree(g_cash.pa_dir[i].pa_te);
                                                g_cash.cur_nr_pages--;
@@ -338,24 +340,31 @@
                                return -EFAULT;
                }
-              pa_te =  g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te;
+             pa_te =  g_cash.pa_dir[ix / g_pa_table_entry_num].pa_te;
                /* no this page_table */
                if (!pa_te)
                                return 0;
-              return pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt;
+             return pa_te[ix % g_pa_table_entry_num].ref_cnt;
 }
 int pa_cash_init()
 {
                void *pa_dir;
-              pa_dir = kzalloc(PA_DIR_SIZE, GFP_KERNEL);
+             g_max_pages_supported = MAX_PAGES_SUPPORTED;
+             g_pa_table_entry_num = PAGE_SIZE / PA_TABLE_ENTRY_SIZE;
+             g_pa_table_size = PA_TABLE_ENTRY_SIZE * g_pa_table_entry_num;
+             g_pa_dir_entry_num = g_max_pages_supported /g_pa_table_entry_num;
+             g_pa_dir_size = PA_DIR_ENTRY_SIZE * g_pa_dir_entry_num;
+
+             pa_dir = kzalloc(g_pa_dir_size, GFP_KERNEL);
+
                if (!pa_dir)
                                return -ENOMEM;
                g_cash.pa_dir = pa_dir;
-              g_cash.max_nr_pages = PA_TABLE_ENTRY_NUM * PA_DIR_ENTRY_NUM;
+             g_cash.max_nr_pages = g_pa_table_entry_num * g_pa_dir_entry_num;
                g_cash.free_list_hdr.Next = NULL;
                g_cash.cur_nr_pages = 0;
                g_cash.free_nr_pages = 0;
@@ -363,4 +372,4 @@
                mutex_init(&g_pa_mutex);
                return 0;
 }
-
+
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20101213/70489120/attachment.html>
    
    
More information about the ofw
mailing list