18 #include <sys/types.h> 21 #include <sys/mount.h> 24 #include <linux/mempolicy.h> 25 #include <linux/memfd.h> 35 #ifndef F_LINUX_SPECIFIC_BASE 36 #define F_LINUX_SPECIFIC_BASE 1024 40 #define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) 41 #define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) 43 #define F_SEAL_SEAL 0x0001 44 #define F_SEAL_SHRINK 0x0002 45 #define F_SEAL_GROW 0x0004 46 #define F_SEAL_WRITE 0x0008 50 #define MFD_HUGETLB 0x0004U 53 #ifndef MAP_HUGE_SHIFT 54 #define MAP_HUGE_SHIFT 26 57 #ifndef MFD_HUGE_SHIFT 58 #define MFD_HUGE_SHIFT 26 61 #ifndef MAP_FIXED_NOREPLACE 62 #define MAP_FIXED_NOREPLACE 0x100000 94 if ((fd = open (
"/proc/meminfo", 0)) == -1)
101 if (
unformat (&input,
"Hugepagesize:%_%u kB", &size))
109 return 1024ULL *
size;
119 if ((fp = fopen (
"/proc/meminfo",
"r")) == NULL)
122 while (fscanf (fp,
"%32s", tmp) > 0)
123 if (strncmp (
"Hugepagesize:", tmp, 13) == 0)
126 if (fscanf (fp,
"%u", &size) > 0)
127 log2_page_size = 10 +
min_log2 (size);
132 return log2_page_size;
147 page_size = sysconf (_SC_PAGESIZE);
151 if ((fd = syscall (__NR_memfd_create,
"test", MFD_HUGETLB)) != -1)
160 va = mmap (0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE |
161 MAP_ANONYMOUS, -1, 0);
162 if (va == MAP_FAILED)
165 if (mlock (va, page_size))
171 if (syscall (__NR_move_pages, 0, 1, &va, &
i, &status, 0) == 0)
176 munmap (va, page_size);
182 struct stat st = { 0 };
183 if (fstat (fd, &st) == -1)
185 return st.st_blksize;
201 if (log2_page_size <= 12)
203 else if (log2_page_size > 12 && log2_page_size <= 16)
226 temp =
format (0,
"/tmp/hugepage_mount.XXXXXX%c", 0);
229 if ((mount_dir = mkdtemp ((
char *) temp)) == 0)
237 if (mount (
"none", mount_dir,
"hugetlbfs", 0, NULL))
240 rmdir ((
char *) mount_dir);
246 filename =
format (0,
"%s/%s%c", mount_dir, name, 0);
248 if ((fd = open ((
char *) filename, O_CREAT | O_RDWR, 0755)) == -1)
254 umount2 ((
char *) mount_dir, MNT_DETACH);
255 rmdir ((
char *) mount_dir);
267 unsigned int memfd_flags;
276 switch (log2_page_size)
284 memfd_flags = MFD_HUGETLB;
287 memfd_flags = MFD_HUGETLB | log2_page_size << MFD_HUGE_SHIFT;
300 fd = syscall (__NR_memfd_create, (
char *) s, memfd_flags);
303 if (fd == -1 && errno == EINVAL &&
334 uword pagesize = 1ULL << log2_page_sz;
351 base = (
void *) start - sys_page_sz;
352 base = mmap (base, size + sys_page_sz, PROT_NONE,
354 return (base == MAP_FAILED) ? ~0 : start;
360 base = mmap (0, size + pagesize, PROT_NONE,
361 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
363 if (base == MAP_FAILED)
367 p = base + size + pagesize;
376 n_bytes = pagesize - sys_page_sz -
n_bytes;
379 munmap (base, n_bytes);
383 return (
uword) base + sys_page_sz;
396 mprotect (hdr, sys_page_sz, PROT_READ);
400 mprotect (hdr, sys_page_sz, PROT_NONE);
402 mprotect (next, sys_page_sz, PROT_READ);
413 int mmap_flags = MAP_FIXED, is_huge = 0;
417 mmap_flags |= MAP_SHARED;
424 mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
429 switch (log2_page_sz)
438 mmap_flags |= MAP_HUGETLB;
443 mmap_flags |= MAP_HUGETLB;
452 size =
round_pow2 (size, 1ULL << log2_page_sz);
456 if (base == (
void *) ~0)
459 base = mmap (base, size, PROT_READ | PROT_WRITE, mmap_flags, fd, offset);
461 if (base == MAP_FAILED)
464 if (is_huge && (mlock (base, size) != 0))
470 hdr = mmap (base - sys_page_sz, sys_page_sz, PROT_READ | PROT_WRITE,
471 MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
473 if (hdr != base - sys_page_sz)
483 mprotect (mm->
last_map, sys_page_sz, PROT_READ | PROT_WRITE);
485 mprotect (mm->
last_map, sys_page_sz, PROT_NONE);
498 hdr->base_addr = (
uword) base;
499 hdr->log2_page_sz = log2_page_sz;
500 hdr->num_pages = size >> log2_page_sz;
503 mprotect (hdr, sys_page_sz, PROT_NONE);
516 if (mprotect (hdr, sys_page_sz, PROT_READ | PROT_WRITE) != 0)
519 size = hdr->num_pages << hdr->log2_page_sz;
520 if (munmap ((
void *) hdr->base_addr, size) != 0)
527 mprotect (hdr->next, sys_page_sz, PROT_READ | PROT_WRITE);
528 hdr->next->prev = hdr->prev;
529 mprotect (hdr->next, sys_page_sz, PROT_NONE);
536 mprotect (hdr->prev, sys_page_sz, PROT_READ | PROT_WRITE);
537 hdr->prev->next = hdr->next;
538 mprotect (hdr->prev, sys_page_sz, PROT_NONE);
545 if (munmap (hdr, sys_page_sz) != 0)
563 for (i = 0; i < n_pages; i++)
564 ptr[i] = start + (i << log2_page_size);
567 stats->
total = n_pages;
570 if (syscall (__NR_move_pages, 0, n_pages, ptr, 0, status, 0) != 0)
576 for (i = 0; i < n_pages; i++)
583 else if (status[i] == -EFAULT)
599 int pagesize = sysconf (_SC_PAGESIZE);
606 if ((fd = open ((
char *)
"/proc/self/pagemap", O_RDONLY)) == -1)
609 for (i = 0; i < n_pages; i++)
611 u64 seek, pagemap = 0;
613 seek = ((
u64) vaddr / pagesize) *
sizeof (
u64);
614 if (lseek (fd, seek, SEEK_SET) != seek)
617 if (read (fd, &pagemap,
sizeof (pagemap)) != (
sizeof (pagemap)))
620 if ((pagemap & (1ULL << 63)) == 0)
641 long unsigned int mask[16] = { 0 };
642 int mask_len =
sizeof (
mask) * 8 + 1;
658 mask[0] = 1 << numa_node;
660 if (syscall (__NR_set_mempolicy, force ? MPOL_BIND : MPOL_PREFERRED, mask,
678 if (syscall (__NR_set_mempolicy, MPOL_DEFAULT, 0, 0))
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
__clib_export int clib_mem_vm_create_fd(clib_mem_page_sz_t log2_page_size, char *fmt,...)
#define CLIB_MEM_UNPOISON(a, s)
#define CLIB_MEM_VM_MAP_FAILED
__clib_export int clib_mem_vm_unmap(void *base)
__clib_export void clib_mem_vm_randomize_va(uword *requested_va, clib_mem_page_sz_t log2_page_size)
vnet_hw_if_output_node_runtime_t * r
#define MAP_FIXED_NOREPLACE
struct _clib_mem_vm_map_hdr clib_mem_vm_map_hdr_t
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
clib_mem_vm_map_hdr_t * first_map
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
static u64 clib_cpu_time_now(void)
__clib_export clib_mem_vm_map_hdr_t * clib_mem_vm_get_next_map_hdr(clib_mem_vm_map_hdr_t *hdr)
static_always_inline uword clib_mem_get_page_size(void)
static clib_mem_page_sz_t legacy_get_log2_default_hugepage_size(void)
static uword min_log2(uword x)
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
#define MFD_ALLOW_SEALING
void * clib_mem_vm_map_internal(void *base, clib_mem_page_sz_t log2_page_sz, uword size, int fd, uword offset, char *name)
static_always_inline clib_mem_page_sz_t clib_mem_log2_page_size_validate(clib_mem_page_sz_t log2_page_size)
static uword pow2_mask(uword x)
description fragment has unexpected format
#define clib_error_return(e, args...)
__clib_export int clib_mem_set_numa_affinity(u8 numa_node, int force)
uword per_numa[CLIB_MAX_NUMAS]
#define clib_atomic_test_and_set(a)
vl_api_ikev2_sa_stats_t stats
#define clib_atomic_release(a)
#define CLIB_VM_MAP_HDR_NAME_MAX_LEN
#define clib_error_return_unix(e, args...)
static int legacy_memfd_create(u8 *name)
clib_mem_vm_map_hdr_t * last_map
clib_mem_page_sz_t log2_default_hugepage_sz
sll srl srl sll sra u16x4 i
#define vec_free(V)
Free vector's memory (no header).
__clib_export u64 * clib_mem_vm_get_paddr(void *mem, clib_mem_page_sz_t log2_page_size, int n_pages)
__clib_export uword clib_mem_get_default_hugepage_size(void)
static uword round_pow2(uword x, uword pow2)
uword clib_mem_vm_reserve(uword start, uword size, clib_mem_page_sz_t log2_page_sz)
__clib_export void clib_mem_get_page_stats(void *start, clib_mem_page_sz_t log2_page_size, uword n_pages, clib_mem_page_stats_t *stats)
__clib_export clib_mem_page_sz_t clib_mem_get_fd_log2_page_size(int fd)
static uword pointer_to_uword(const void *p)
__clib_export u64 clib_mem_get_fd_page_size(int fd)
template key/value backing page structure
clib_mem_page_sz_t log2_page_sz
clib_mem_page_sz_t log2_page_sz
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
__clib_export clib_mem_main_t clib_mem_main
void clib_mem_main_init()
__clib_export int clib_mem_set_default_numa_affinity()