HugePages は free コマンドで見ると used に計上される

HugePages として確保したサイズは使用されていなくても free コマンドで見ると used に計上されると聞いたので理由を調べてみた。

Pages that are used as huge pages are reserved inside the kernel and cannot be used for other purposes. Huge pages cannot be swapped out under memory pressure.

Once a number of huge pages have been pre-allocated to the kernel huge page pool, a user with appropriate privilege can use either the mmap system call or shared memory system calls to use the huge pages

https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt

OSカーネル内に確保されるようだ。

[root@yazekats-linux linux]# free
             total       used       free     shared    buffers     cached
Mem:      16158544    3976420   12182124          0     148008    2649148
-/+ buffers/cache:    1179264   14979280
Swap:      8085500          0    8085500
[root@yazekats-linux linux]# sysctl -w vm.nr_hugepages=100
vm.nr_hugepages = 100
[root@yazekats-linux linux]# free
             total       used       free     shared    buffers     cached
Mem:      16158544    4180864   11977680          0     148060    2649436
-/+ buffers/cache:    1383368   14775176
Swap:      8085500          0    8085500
[root@yazekats-linux linux]# expr 1383368 - 1179264
204104

確かに vm.nr_hugepages に 100 を設定してみると、200MBほど used が増えた。
free コマンドはどこから情報をとっているかというと、

yazekats% strace -e open free 

...

open("/sys/devices/system/cpu/online", O_RDONLY|O_CLOEXEC) = 3
open("/proc/meminfo", O_RDONLY)         = 3
             total       used       free     shared    buffers     cached
Mem:      16158544    3982292   12176252          0     149808    2650748
-/+ buffers/cache:    1181736   14976808
Swap:      8085500          0    8085500

やはり /proc/meminfo を見ていますね。

free コマンドを含む RPM パッケージを調べて、

yazekats% rpm -qf `which free`
procps-3.2.8-25.el6.x86_64

ソースをダウンロードして、

[root@yazekats-linux src]# cd /usr/local/src
[root@yazekats-linux src]# wget http://procps.sourceforge.net/procps-3.2.8.tar.gz
[root@yazekats-linux src]# tar xfvz procps-3.2.8.tar.gz

中身を見てみる

  • /usr/local/src/procps-3.2.8/free.c
// free.c - free(1)
// procps utility to display free memory information
//
// All new, Robert Love <rml@tech9.net>             18 Nov 2002
// Original by Brian Edmonds and Rafal Maszkowski   14 Dec 1992
//
// This program is licensed under the GNU Library General Public License, v2
//
// Copyright 2003 Robert Love
// Copyright 2004 Albert Cahalan

#include "proc/sysinfo.h"
#include "proc/version.h"
//#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>

...

        if(!old_fmt){
            unsigned KLONG buffers_plus_cached = kb_main_buffers + kb_main_cached;
            printf(
                "-/+ buffers/cache: %10Lu %10Lu\n", 
                S(kb_main_used - buffers_plus_cached),
                S(kb_main_free + buffers_plus_cached)
            );
        }

お、Google の Robert Love の名前が。

  • /usr/local/src/procps-3.2.8/sysinfo.h
38  /* derived values */
39  extern unsigned long kb_swap_used;
40  extern unsigned long kb_main_used;
  • /usr/local/src/procps-3.2.8/proc/sysinfo.c
484  void meminfo(void){
485    char namebuf[16]; /* big enough to hold any row name */
486    mem_table_struct findme = { namebuf, NULL};
487    mem_table_struct *found;
488    char *head;
489    char *tail;
490    static const mem_table_struct mem_table[] = {
491    {"Active",       &kb_active},       // important
492    {"AnonPages",    &kb_anon_pages},
493    {"Bounce",       &kb_bounce},
494    {"Buffers",      &kb_main_buffers}, // important
495    {"Cached",       &kb_main_cached},  // important
496    {"CommitLimit",  &kb_commit_limit},

...

509    {"MemFree",      &kb_main_free},    // important
510    {"MemShared",    &kb_main_shared},  // important, but now gone!
511    {"MemTotal",     &kb_main_total},   // important

...

517    {"Slab",         &kb_slab},         // kB version of vmstat nr_slab
518    {"SwapCached",   &kb_swap_cached},
519    {"SwapFree",     &kb_swap_free},    // important
520    {"SwapTotal",    &kb_swap_total},   // important
521    {"VmallocChunk", &kb_vmalloc_chunk},
522    {"VmallocTotal", &kb_vmalloc_total},
523    {"VmallocUsed",  &kb_vmalloc_used},
524    {"Writeback",    &kb_writeback},    // kB version of vmstat nr_writeback
525    };
526    const int mem_table_count = sizeof(mem_table)/sizeof(mem_table_struct);

527    FILE_TO_BUF(MEMINFO_FILE,meminfo_fd);
       
528    kb_inactive = ~0UL;
       
529    head = buf;
530    for(;;){
531      tail = strchr(head, ':');
532      if(!tail) break;
533      *tail = '\0';
534      if(strlen(head) >= sizeof(namebuf)){
535        head = tail+1;
536        goto nextline;
537      }
538      strcpy(namebuf,head);
539      found = bsearch(&findme, mem_table, mem_table_count,
540          sizeof(mem_table_struct), compare_mem_table_structs
541      );
542      head = tail+1;
543      if(!found) goto nextline;
544      *(found->slot) = strtoul(head,&tail,10);
545  nextline:
546      tail = strchr(head, '\n');
547      if(!tail) break;
548      head = tail+1;
549    }
550    if(!kb_low_total){  /* low==main except with large-memory support */
551      kb_low_total = kb_main_total;
552      kb_low_free  = kb_main_free;
553    }
554    if(kb_inactive==~0UL){
555      kb_inactive = kb_inact_dirty + kb_inact_clean + kb_inact_laundry;
556    }
557    kb_swap_used = kb_swap_total - kb_swap_free;
558    kb_main_used = kb_main_total - kb_main_free;
559  }

ということで、

free の used(-/+ buffers/cache): /proc/meminfo の MemTotal - MemFree - (Buffers + Cached)

ということになるようだ。
ここから先は fs/proc/meminfo.c から先を調べればよい。

  • /usr/src/debug/kernel-2.6.39/linux-2.6.39.x86_64/fs/proc/meminfo.c
29      /*
30       * display in kilobytes.
31       */
32      #define K(x) ((x) << (PAGE_SHIFT - 10))
33              si_meminfo(&i);
34              si_swapinfo(&i);
35              committed = percpu_counter_read_positive(&vm_committed_as);
36              allowed = ((totalram_pages - hugetlb_total_pages())
37                      * sysctl_overcommit_ratio / 100) + total_swap_pages;
38              cached = global_page_state(NR_FILE_PAGES) -
39                              total_swapcache_pages - i.bufferram;
40              if (cached < 0)
41                      cached = 0;
42              get_vmalloc_info(&vmi);
43              for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
44                      pages[lru] = global_page_state(NR_LRU_BASE + lru);
45              /*
46               * Tagged format, for easy grepping and expansion.
47               */
48              seq_printf(m,
49                      "MemTotal:       %8lu kB\n"
50                      "MemFree:        %8lu kB\n"
51                      "Buffers:        %8lu kB\n"
52                      "Cached:         %8lu kB\n"

...

101                     K(i.totalram),
102                     K(i.freeram),
103                     K(i.bufferram),
104                     K(cached),
105                     K(total_swapcache_pages),
  • /usr/src/debug/kernel-2.6.39/linux-2.6.39.x86_64/mm/page_alloc.c
2178    void si_meminfo(struct sysinfo *val)
2179    {
2180            val->totalram = totalram_pages;
2181            val->sharedram = 0;
2182            val->freeram = global_page_state(NR_FREE_PAGES);
2183            val->bufferram = nr_blockdev_pages();
2184            val->totalhigh = totalhigh_pages;
2185            val->freehigh = nr_free_highpages();
2186            val->mem_unit = PAGE_SIZE;
2187    }
  • /usr/src/debug/kernel-2.6.39/linux-2.6.39.x86_64/include/linux/vmstat.h
75      extern atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];

...

82      static inline unsigned long global_page_state(enum zone_stat_item item)
83      {
84              long x = atomic_long_read(&vm_stat[item]);
85      #ifdef CONFIG_SMP
86              if (x < 0)
87                      x = 0;
88      #endif
89              return x;
90      }

よくわからんが、HugePages はカーネル空間に確保されて、空き領域として扱われないということだろうw


使った環境は以下の通り。

yazekats% cat /etc/issue
Oracle Linux Server release 6.4
Kernel \r on an \m

yazekats% cat /proc/version
Linux version 2.6.39-400.17.1.el6uek.x86_64 (mockbuild@ca-build44.us.oracle.com) (gcc version 4.4.7 20120313 (Red Hat 4.4.7-3) (GCC) ) #1 SMP Fri Feb 22 18:16:18 PST 2013