FD.io VPP  v20.01-48-g3e0dafb74
Vector Packet Processing
mem.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #define _GNU_SOURCE
17 #include <stdlib.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 #include <unistd.h>
21 #include <sys/mount.h>
22 #include <sys/mman.h>
23 #include <fcntl.h>
24 #include <linux/mempolicy.h>
25 #include <linux/memfd.h>
26 
27 #include <vppinfra/clib.h>
28 #include <vppinfra/mem.h>
29 #include <vppinfra/time.h>
30 #include <vppinfra/format.h>
31 #include <vppinfra/clib_error.h>
32 #include <vppinfra/linux/syscall.h>
33 #include <vppinfra/linux/sysfs.h>
34 
35 #ifndef F_LINUX_SPECIFIC_BASE
36 #define F_LINUX_SPECIFIC_BASE 1024
37 #endif
38 
39 #ifndef F_ADD_SEALS
40 #define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
41 #define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
42 
43 #define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
44 #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
45 #define F_SEAL_GROW 0x0004 /* prevent file from growing */
46 #define F_SEAL_WRITE 0x0008 /* prevent writes */
47 #endif
48 
49 
50 uword
52 {
53  return getpagesize ();
54 }
55 
56 uword
58 {
59  unformat_input_t input;
60  static u32 size = 0;
61  int fd;
62 
63  if (size)
64  goto done;
65 
66  /*
67  * If the kernel doesn't support hugepages, /proc/meminfo won't
68  * say anything about it. Use the regular page size as a default.
69  */
70  size = clib_mem_get_page_size () / 1024;
71 
72  if ((fd = open ("/proc/meminfo", 0)) == -1)
73  return 0;
74 
75  unformat_init_clib_file (&input, fd);
76 
78  {
79  if (unformat (&input, "Hugepagesize:%_%u kB", &size))
80  ;
81  else
82  unformat_skip_line (&input);
83  }
84  unformat_free (&input);
85  close (fd);
86 done:
87  return 1024ULL * size;
88 }
89 
90 u64
92 {
93  struct stat st = { 0 };
94  if (fstat (fd, &st) == -1)
95  return 0;
96  return st.st_blksize;
97 }
98 
99 int
101 {
102  return min_log2 (clib_mem_get_fd_page_size (fd));
103 }
104 
105 void
106 clib_mem_vm_randomize_va (uword * requested_va, u32 log2_page_size)
107 {
108  u8 bit_mask = 15;
109 
110  if (log2_page_size <= 12)
111  bit_mask = 15;
112  else if (log2_page_size > 12 && log2_page_size <= 16)
113  bit_mask = 3;
114  else
115  bit_mask = 0;
116 
117  *requested_va +=
118  (clib_cpu_time_now () & bit_mask) * (1ull << log2_page_size);
119 }
120 
121 #ifndef MFD_HUGETLB
122 #define MFD_HUGETLB 0x0004U
123 #endif
124 
125 clib_error_t *
126 clib_mem_create_fd (char *name, int *fdp)
127 {
128  int fd;
129 
130  ASSERT (name);
131 
132  if ((fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1)
133  return clib_error_return_unix (0, "memfd_create");
134 
135  if ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1)
136  {
137  close (fd);
138  return clib_error_return_unix (0, "fcntl (F_ADD_SEALS)");
139  }
140 
141  *fdp = fd;
142  return 0;
143 }
144 
145 clib_error_t *
147 {
148  clib_error_t *err = 0;
149  int fd = -1;
150  static int memfd_hugetlb_supported = 1;
151  char *mount_dir;
152  char template[] = "/tmp/hugepage_mount.XXXXXX";
153  u8 *filename;
154 
155  ASSERT (name);
156 
157  if (memfd_hugetlb_supported)
158  {
159  if ((fd = memfd_create (name, MFD_HUGETLB)) != -1)
160  goto done;
161 
162  /* avoid further tries if memfd MFD_HUGETLB is not supported */
163  if (errno == EINVAL && strnlen (name, 256) <= 249)
164  memfd_hugetlb_supported = 0;
165  }
166 
167  mount_dir = mkdtemp (template);
168  if (mount_dir == 0)
169  return clib_error_return_unix (0, "mkdtemp \'%s\'", template);
170 
171  if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL))
172  {
173  rmdir ((char *) mount_dir);
174  err = clib_error_return_unix (0, "mount hugetlb directory '%s'",
175  mount_dir);
176  }
177 
178  filename = format (0, "%s/%s%c", mount_dir, name, 0);
179  fd = open ((char *) filename, O_CREAT | O_RDWR, 0755);
180  umount2 ((char *) mount_dir, MNT_DETACH);
181  rmdir ((char *) mount_dir);
182 
183  if (fd == -1)
184  err = clib_error_return_unix (0, "open");
185 
186 done:
187  if (fd != -1)
188  fdp[0] = fd;
189  return err;
190 }
191 
192 clib_error_t *
194 {
195  int fd = -1;
196  clib_error_t *err = 0;
197  void *addr = 0;
198  u8 *filename = 0;
199  int mmap_flags = 0;
200  int log2_page_size;
201  int n_pages;
202  int old_mpol = -1;
203  long unsigned int old_mask[16] = { 0 };
204 
205  /* save old numa mem policy if needed */
207  {
208  int rv;
209  rv = get_mempolicy (&old_mpol, old_mask, sizeof (old_mask) * 8 + 1,
210  0, 0);
211 
212  if (rv == -1)
213  {
214  if (a->numa_node != 0 && (a->flags & CLIB_MEM_VM_F_NUMA_FORCE) != 0)
215  {
216  err = clib_error_return_unix (0, "get_mempolicy");
217  goto error;
218  }
219  else
220  old_mpol = -1;
221  }
222  }
223 
224  if (a->flags & CLIB_MEM_VM_F_LOCKED)
225  mmap_flags |= MAP_LOCKED;
226 
227  /* if we are creating shared segment, we need file descriptor */
228  if (a->flags & CLIB_MEM_VM_F_SHARED)
229  {
230  mmap_flags |= MAP_SHARED;
231  /* if hugepages are needed we need to create mount point */
232  if (a->flags & CLIB_MEM_VM_F_HUGETLB)
233  {
234  if ((err = clib_mem_create_hugetlb_fd (a->name, &fd)))
235  goto error;
236 
237  mmap_flags |= MAP_LOCKED;
238  }
239  else
240  {
241  if ((err = clib_mem_create_fd (a->name, &fd)))
242  goto error;
243  }
244 
245  log2_page_size = clib_mem_get_fd_log2_page_size (fd);
246  if (log2_page_size == 0)
247  {
248  err = clib_error_return_unix (0, "cannot determine page size");
249  goto error;
250  }
251 
252  if (a->requested_va)
253  {
254  clib_mem_vm_randomize_va (&a->requested_va, log2_page_size);
255  mmap_flags |= MAP_FIXED;
256  }
257  }
258  else /* not CLIB_MEM_VM_F_SHARED */
259  {
260  mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
261  if (a->flags & CLIB_MEM_VM_F_HUGETLB)
262  {
263  mmap_flags |= MAP_HUGETLB;
264  log2_page_size = 21;
265  }
266  else
267  {
268  log2_page_size = min_log2 (sysconf (_SC_PAGESIZE));
269  }
270  }
271 
272  n_pages = ((a->size - 1) >> log2_page_size) + 1;
273 
275  {
276  err = clib_sysfs_prealloc_hugepages (a->numa_node, log2_page_size,
277  n_pages);
278  if (err)
279  goto error;
280 
281  }
282 
283  if (fd != -1)
284  if ((ftruncate (fd, (u64) n_pages * (1 << log2_page_size))) == -1)
285  {
286  err = clib_error_return_unix (0, "ftruncate");
287  goto error;
288  }
289 
290  if (old_mpol != -1)
291  {
292  int rv;
293  long unsigned int mask[16] = { 0 };
294  mask[0] = 1 << a->numa_node;
295  rv = set_mempolicy (MPOL_BIND, mask, sizeof (mask) * 8 + 1);
296  if (rv == -1 && a->numa_node != 0 &&
297  (a->flags & CLIB_MEM_VM_F_NUMA_FORCE) != 0)
298  {
299  err = clib_error_return_unix (0, "set_mempolicy");
300  goto error;
301  }
302  }
303 
304  addr = mmap (uword_to_pointer (a->requested_va, void *), a->size,
305  (PROT_READ | PROT_WRITE), mmap_flags, fd, 0);
306  if (addr == MAP_FAILED)
307  {
308  err = clib_error_return_unix (0, "mmap");
309  goto error;
310  }
311 
312  /* re-apply old numa memory policy */
313  if (old_mpol != -1 &&
314  set_mempolicy (old_mpol, old_mask, sizeof (old_mask) * 8 + 1) == -1)
315  {
316  err = clib_error_return_unix (0, "set_mempolicy");
317  goto error;
318  }
319 
320  a->log2_page_size = log2_page_size;
321  a->n_pages = n_pages;
322  a->addr = addr;
323  a->fd = fd;
324  goto done;
325 
326 error:
327  if (fd != -1)
328  close (fd);
329 
330 done:
331  vec_free (filename);
332  return err;
333 }
334 
335 void
337 {
338  if (a != 0)
339  {
340  clib_mem_vm_free (a->addr, 1ull << a->log2_page_size);
341  if (a->fd != -1)
342  close (a->fd);
343  }
344 }
345 
346 u64 *
347 clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages)
348 {
349  int pagesize = sysconf (_SC_PAGESIZE);
350  int fd;
351  int i;
352  u64 *r = 0;
353 
354  if ((fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1)
355  return 0;
356 
357  for (i = 0; i < n_pages; i++)
358  {
359  u64 seek, pagemap = 0;
360  uword vaddr = pointer_to_uword (mem) + (((u64) i) << log2_page_size);
361  seek = ((u64) vaddr / pagesize) * sizeof (u64);
362  if (lseek (fd, seek, SEEK_SET) != seek)
363  goto done;
364 
365  if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap)))
366  goto done;
367 
368  if ((pagemap & (1ULL << 63)) == 0)
369  goto done;
370 
371  pagemap &= pow2_mask (55);
372  vec_add1 (r, pagemap * pagesize);
373  }
374 
375 done:
376  close (fd);
377  if (vec_len (r) != n_pages)
378  {
379  vec_free (r);
380  return 0;
381  }
382  return r;
383 }
384 
385 clib_error_t *
387 {
388  int mmap_flags = MAP_SHARED;
389  void *addr;
390 
391  if (a->requested_va)
392  mmap_flags |= MAP_FIXED;
393 
394  addr = (void *) mmap (uword_to_pointer (a->requested_va, void *), a->size,
395  PROT_READ | PROT_WRITE, mmap_flags, a->fd, 0);
396 
397  if (addr == MAP_FAILED)
398  return clib_error_return_unix (0, "mmap");
399 
400  a->addr = addr;
401  return 0;
402 }
403 
404 /*
405  * fd.io coding-style-patch-verification: ON
406  *
407  * Local Variables:
408  * eval: (c-set-style "gnu")
409  * End:
410  */
void clib_mem_vm_ext_free(clib_mem_vm_alloc_t *a)
Definition: mem.c:336
#define CLIB_MEM_VM_F_HUGETLB
Definition: mem.h:387
clib_error_t * clib_sysfs_prealloc_hugepages(int numa_node, int log2_page_size, int nr)
Definition: sysfs.c:239
a
Definition: bitmap.h:538
#define CLIB_MEM_VM_F_NUMA_PREFER
Definition: mem.h:388
unsigned long u64
Definition: types.h:89
#define F_ADD_SEALS
Definition: mem.c:40
#define NULL
Definition: clib.h:58
void * addr
Pointer to allocated memory, set on successful allocation.
Definition: mem.h:406
uword requested_va
Request fixed position mapping.
Definition: mem.h:410
static int memfd_create(const char *name, unsigned int flags)
Definition: syscall.h:52
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:523
static u64 clib_cpu_time_now(void)
Definition: time.h:75
int i
int numa_node
numa node preference.
Definition: mem.h:405
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:424
vhost_vring_addr_t addr
Definition: vhost_user.h:147
unsigned char u8
Definition: types.h:56
void unformat_init_clib_file(unformat_input_t *input, int file_descriptor)
Definition: unformat.c:1064
static uword min_log2(uword x)
Definition: clib.h:144
#define MFD_ALLOW_SEALING
Definition: main.c:104
clib_error_t * clib_mem_vm_ext_map(clib_mem_vm_map_t *a)
Definition: mem.c:386
static void unformat_skip_line(unformat_input_t *i)
Definition: format.h:222
clib_error_t * clib_mem_vm_ext_alloc(clib_mem_vm_alloc_t *a)
Definition: mem.c:193
uword requested_va
Request fixed position mapping.
Definition: mem.h:426
static long set_mempolicy(int mode, const unsigned long *nodemask, unsigned long maxnode)
Definition: syscall.h:31
static uword pow2_mask(uword x)
Definition: clib.h:220
char * name
Name for memory allocation, set by caller.
Definition: mem.h:403
uword size
Allocation size, set by caller.
Definition: mem.h:404
unsigned int u32
Definition: types.h:88
#define F_SEAL_SHRINK
Definition: mem.c:44
#define CLIB_MEM_VM_F_SHARED
Definition: mem.h:386
int fd
File descriptor, set on successful allocation if CLIB_MEM_VM_F_SHARED is set.
Definition: mem.h:407
struct _unformat_input_t unformat_input_t
uword clib_mem_get_page_size(void)
Definition: mem.c:51
#define clib_error_return_unix(e, args...)
Definition: error.h:102
u64 size
Definition: vhost_user.h:140
#define CLIB_MEM_VM_F_NUMA_FORCE
Definition: mem.h:389
static int get_mempolicy(int *mode, unsigned long *nodemask, unsigned long maxnode, void *addr, unsigned long flags)
Definition: syscall.h:37
int clib_mem_get_fd_log2_page_size(int fd)
Definition: mem.c:100
u64 clib_mem_get_fd_page_size(int fd)
Definition: mem.c:91
#define UNFORMAT_END_OF_INPUT
Definition: format.h:145
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:342
string name[64]
Definition: ip.api:44
u32 flags
vm allocation flags: CLIB_MEM_VM_F_SHARED: request shared memory, file descriptor will be provided ...
Definition: mem.h:392
#define CLIB_MEM_VM_F_HUGETLB_PREALLOC
Definition: mem.h:390
#define uword_to_pointer(u, type)
Definition: types.h:136
#define ASSERT(truth)
#define CLIB_MEM_VM_F_LOCKED
Definition: mem.h:391
uword size
Map size.
Definition: mem.h:424
uword clib_mem_get_default_hugepage_size(void)
Definition: mem.c:57
static uword pointer_to_uword(const void *p)
Definition: types.h:131
clib_error_t * clib_mem_create_hugetlb_fd(char *name, int *fdp)
Definition: mem.c:146
clib_error_t * clib_mem_create_fd(char *name, int *fdp)
Definition: mem.c:126
static void clib_mem_vm_free(void *addr, uword size)
Definition: mem.h:349
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
u64 uword
Definition: types.h:112
static void unformat_free(unformat_input_t *i)
Definition: format.h:163
u64 * clib_mem_vm_get_paddr(void *mem, int log2_page_size, int n_pages)
Definition: mem.c:347
void clib_mem_vm_randomize_va(uword *requested_va, u32 log2_page_size)
Definition: mem.c:106
void * mem
int fd
File descriptor to be mapped.
Definition: mem.h:425
int log2_page_size
Definition: mem.h:408
void * addr
Pointer to mapped memory, if successful.
Definition: mem.h:427
uword unformat(unformat_input_t *i, const char *fmt,...)
Definition: unformat.c:978
static uword unformat_check_input(unformat_input_t *i)
Definition: format.h:171