FD.io VPP  v21.06
Vector Packet Processing
svm.c
Go to the documentation of this file.
1 /*
2  *------------------------------------------------------------------
3  * svm.c - shared VM allocation, mmap(...MAP_FIXED...)
4  * library
5  *
6  * Copyright (c) 2009 Cisco and/or its affiliates.
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at:
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *------------------------------------------------------------------
19  */
20 
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #include <sys/stat.h>
26 #include <netinet/in.h>
27 #include <signal.h>
28 #include <pthread.h>
29 #include <unistd.h>
30 #include <time.h>
31 #include <fcntl.h>
32 #include <string.h>
33 #include <vppinfra/clib.h>
34 #include <vppinfra/vec.h>
35 #include <vppinfra/hash.h>
36 #include <vppinfra/bitmap.h>
37 #include <vppinfra/fifo.h>
38 #include <vppinfra/time.h>
39 #include <vppinfra/heap.h>
40 #include <vppinfra/pool.h>
41 #include <vppinfra/format.h>
42 
43 #include "svm.h"
44 
46 static int root_rp_refcount;
47 
48 #define MAXLOCK 2
49 static pthread_mutex_t *mutexes_held[MAXLOCK];
50 static int nheld;
51 
54 {
55  return root_rp;
56 }
57 
58 #define MUTEX_DEBUG
59 
60 u64
62 {
63 #ifdef CLIB_SANITIZE_ADDR
64  return 0x200000000000;
65 #endif
66 
67 #if __aarch64__
68  /* On AArch64 VA space can have different size, from 36 to 48 bits.
69  Here we are trying to detect VA bits by parsing /proc/self/maps
70  address ranges */
71  int fd;
72  unformat_input_t input;
73  u64 start, end = 0;
74  u8 bits = 0;
75 
76  if ((fd = open ("/proc/self/maps", 0)) < 0)
77  clib_unix_error ("open '/proc/self/maps'");
78 
79  unformat_init_clib_file (&input, fd);
81  {
82  if (unformat (&input, "%llx-%llx", &start, &end))
83  end--;
84  unformat_skip_line (&input);
85  }
86  unformat_free (&input);
87  close (fd);
88 
89  bits = count_leading_zeros (end);
90  bits = 64 - bits;
91  if (bits >= 36 && bits <= 48)
92  return ((1ul << bits) / 4) - (2 * SVM_GLOBAL_REGION_SIZE);
93  else
94  clib_unix_error ("unexpected va bits '%u'", bits);
95 #endif
96 
97  /* default value */
98  return 0x130000000ULL;
99 }
100 
101 static void
102 region_lock (svm_region_t * rp, int tag)
103 {
104  pthread_mutex_lock (&rp->mutex);
105 #ifdef MUTEX_DEBUG
106  rp->mutex_owner_pid = getpid ();
107  rp->mutex_owner_tag = tag;
108 #endif
109  ASSERT (nheld < MAXLOCK); //NOSONAR
110  /*
111  * Keep score of held mutexes so we can try to exit
112  * cleanly if the world comes to an end at the worst possible
113  * moment
114  */
115  mutexes_held[nheld++] = &rp->mutex;
116 }
117 
118 static void
120 {
121  int i, j;
122 #ifdef MUTEX_DEBUG
123  rp->mutex_owner_pid = 0;
124  rp->mutex_owner_tag = 0;
125 #endif
126 
127  for (i = nheld - 1; i >= 0; i--)
128  {
129  if (mutexes_held[i] == &rp->mutex)
130  {
131  for (j = i; j < MAXLOCK - 1; j++)
132  mutexes_held[j] = mutexes_held[j + 1];
133  nheld--;
134  goto found;
135  }
136  }
137  ASSERT (0);
138 
139 found:
141  pthread_mutex_unlock (&rp->mutex);
142 }
143 
144 
145 static u8 *
146 format_svm_flags (u8 * s, va_list * args)
147 {
148  uword f = va_arg (*args, uword);
149 
150  if (f & SVM_FLAGS_MHEAP)
151  s = format (s, "MHEAP ");
152  if (f & SVM_FLAGS_FILE)
153  s = format (s, "FILE ");
154  if (f & SVM_FLAGS_NODATA)
155  s = format (s, "NODATA ");
156  if (f & SVM_FLAGS_NEED_DATA_INIT)
157  s = format (s, "INIT ");
158 
159  return (s);
160 }
161 
162 static u8 *
163 format_svm_size (u8 * s, va_list * args)
164 {
165  uword size = va_arg (*args, uword);
166 
167  if (size >= (1 << 20))
168  {
169  s = format (s, "(%d mb)", size >> 20);
170  }
171  else if (size >= (1 << 10))
172  {
173  s = format (s, "(%d kb)", size >> 10);
174  }
175  else
176  {
177  s = format (s, "(%d bytes)", size);
178  }
179  return (s);
180 }
181 
182 u8 *
183 format_svm_region (u8 * s, va_list * args)
184 {
185  svm_region_t *rp = va_arg (*args, svm_region_t *);
186  int verbose = va_arg (*args, int);
187  int i;
188  uword lo, hi;
189 
190  s = format (s, "%s: base va 0x%x size 0x%x %U\n",
191  rp->region_name, rp->virtual_base,
193  s = format (s, " user_ctx 0x%x, bitmap_size %d\n",
194  rp->user_ctx, rp->bitmap_size);
195 
196  if (verbose)
197  {
198  s = format (s, " flags: 0x%x %U\n", rp->flags,
199  format_svm_flags, rp->flags);
200  s = format (s,
201  " region_heap 0x%x data_base 0x%x data_heap 0x%x\n",
202  rp->region_heap, rp->data_base, rp->data_heap);
203  }
204 
205  s = format (s, " %d clients, pids: ", vec_len (rp->client_pids));
206 
207  for (i = 0; i < vec_len (rp->client_pids); i++)
208  s = format (s, "%d ", rp->client_pids[i]);
209 
210  s = format (s, "\n");
211 
212  if (verbose)
213  {
214  lo = hi = ~0;
215 
216  s = format (s, " VM in use: ");
217 
218  for (i = 0; i < rp->bitmap_size; i++)
219  {
220  if (clib_bitmap_get_no_check (rp->bitmap, i) != 0)
221  {
222  if (lo == ~0)
223  {
224  hi = lo = rp->virtual_base + i * MMAP_PAGESIZE;
225  }
226  else
227  {
228  hi = rp->virtual_base + i * MMAP_PAGESIZE;
229  }
230  }
231  else
232  {
233  if (lo != ~0)
234  {
235  hi = rp->virtual_base + i * MMAP_PAGESIZE - 1;
236  s = format (s, " 0x%x - 0x%x (%dk)\n", lo, hi,
237  (hi - lo) >> 10);
238  lo = hi = ~0;
239  }
240  }
241  }
242  }
243 
244  return (s);
245 }
246 
247 /*
248  * rnd_pagesize
249  * Round to a pagesize multiple, presumably 4k works
250  */
251 static u64
253 {
254  u64 rv;
255 
256  rv = (size + (MMAP_PAGESIZE - 1)) & ~(MMAP_PAGESIZE - 1);
257  return (rv);
258 }
259 
260 /*
261  * svm_data_region_setup
262  */
263 static int
265 {
266  int fd;
267  u8 junk = 0;
268  uword map_size;
269 
270  map_size = rp->virtual_size - (MMAP_PAGESIZE +
271  (a->pvt_heap_size ? a->pvt_heap_size :
273 
274  if (a->flags & SVM_FLAGS_FILE)
275  {
276  struct stat statb;
277 
278  fd = open (a->backing_file, O_RDWR | O_CREAT, 0777);
279 
280  if (fd < 0)
281  {
282  clib_unix_warning ("open");
283  return -1;
284  }
285 
286  if (fstat (fd, &statb) < 0)
287  {
288  clib_unix_warning ("fstat");
289  close (fd);
290  return -2;
291  }
292 
293  if (statb.st_mode & S_IFREG)
294  {
295  if (statb.st_size == 0)
296  {
297  if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1)
298  {
299  clib_unix_warning ("seek region size");
300  close (fd);
301  return -3;
302  }
303  if (write (fd, &junk, 1) != 1)
304  {
305  clib_unix_warning ("set region size");
306  close (fd);
307  return -3;
308  }
309  }
310  else
311  {
312  map_size = rnd_pagesize (statb.st_size);
313  }
314  }
315  else
316  {
317  map_size = a->backing_mmap_size;
318  }
319 
320  ASSERT (map_size <= rp->virtual_size -
322 
323  if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
324  MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED)
325  {
326  clib_unix_warning ("mmap");
327  close (fd);
328  return -3;
329  }
330  close (fd);
331  CLIB_MEM_UNPOISON (rp->data_base, map_size);
332  rp->backing_file = (char *) format (0, "%s%c", a->backing_file, 0);
333  rp->flags |= SVM_FLAGS_FILE;
334  }
335 
336  if (a->flags & SVM_FLAGS_MHEAP)
337  {
338  rp->data_heap = clib_mem_create_heap (rp->data_base, map_size,
339  1 /* locked */ , "svm data");
340 
341  rp->flags |= SVM_FLAGS_MHEAP;
342  }
343  return 0;
344 }
345 
346 static int
348 {
349  int fd;
350  u8 junk = 0;
351  uword map_size;
352  struct stat statb;
353 
354  map_size = rp->virtual_size -
357 
358  if (a->flags & SVM_FLAGS_FILE)
359  {
360 
361  fd = open (a->backing_file, O_RDWR, 0777);
362 
363  if (fd < 0)
364  {
365  clib_unix_warning ("open");
366  return -1;
367  }
368 
369  if (fstat (fd, &statb) < 0)
370  {
371  clib_unix_warning ("fstat");
372  close (fd);
373  return -2;
374  }
375 
376  if (statb.st_mode & S_IFREG)
377  {
378  if (statb.st_size == 0)
379  {
380  if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1)
381  {
382  clib_unix_warning ("seek region size");
383  close (fd);
384  return -3;
385  }
386  if (write (fd, &junk, 1) != 1)
387  {
388  clib_unix_warning ("set region size");
389  close (fd);
390  return -3;
391  }
392  }
393  else
394  {
395  map_size = rnd_pagesize (statb.st_size);
396  }
397  }
398  else
399  {
400  map_size = a->backing_mmap_size;
401  }
402 
403  ASSERT (map_size <= rp->virtual_size
404  - (MMAP_PAGESIZE
405  +
407 
408  if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
409  MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED)
410  {
411  clib_unix_warning ("mmap");
412  close (fd);
413  return -3;
414  }
415  close (fd);
416  CLIB_MEM_UNPOISON (rp->data_base, map_size);
417  }
418  return 0;
419 }
420 
421 u8 *
423 {
424  u8 *shm_name;
425  int root_path_offset = 0;
426  int name_offset = 0;
427 
428  if (a->root_path)
429  {
430  /* Tolerate present or absent slashes */
431  if (a->root_path[0] == '/')
432  root_path_offset++;
433 
434  if (a->name[0] == '/')
435  name_offset = 1;
436 
437  shm_name = format (0, "/%s-%s%c", &a->root_path[root_path_offset],
438  &a->name[name_offset], 0);
439  }
440  else
441  shm_name = format (0, "%s%c", a->name, 0);
442  return (shm_name);
443 }
444 
445 void
447 {
448  pthread_mutexattr_t attr;
449  pthread_condattr_t cattr;
450  int nbits, words, bit;
451  int overhead_space;
452  void *oldheap;
453  uword data_base;
454  ASSERT (rp);
455  int rv;
456 
457  clib_memset (rp, 0, sizeof (*rp));
458 
459  if (pthread_mutexattr_init (&attr))
460  clib_unix_warning ("mutexattr_init");
461 
462  if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
463  clib_unix_warning ("mutexattr_setpshared");
464 
465  if (pthread_mutex_init (&rp->mutex, &attr))
466  clib_unix_warning ("mutex_init");
467 
468  if (pthread_mutexattr_destroy (&attr))
469  clib_unix_warning ("mutexattr_destroy");
470 
471  if (pthread_condattr_init (&cattr))
472  clib_unix_warning ("condattr_init");
473 
474  if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED))
475  clib_unix_warning ("condattr_setpshared");
476 
477  if (pthread_cond_init (&rp->condvar, &cattr))
478  clib_unix_warning ("cond_init");
479 
480  if (pthread_condattr_destroy (&cattr))
481  clib_unix_warning ("condattr_destroy");
482 
483  region_lock (rp, 1);
484 
485  rp->virtual_base = a->baseva;
486  rp->virtual_size = a->size;
487 
489  (uword_to_pointer (a->baseva + MMAP_PAGESIZE, void *),
490  (a->pvt_heap_size !=
491  0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE, 1 /* locked */ ,
492  "svm region");
493 
494  oldheap = svm_push_pvt_heap (rp);
495 
496  rp->region_name = (char *) format (0, "%s%c", a->name, 0);
497  vec_add1 (rp->client_pids, getpid ());
498 
499  nbits = rp->virtual_size / MMAP_PAGESIZE;
500 
501  ASSERT (nbits > 0);
502  rp->bitmap_size = nbits;
503  words = (nbits + BITS (uword) - 1) / BITS (uword);
504  vec_validate (rp->bitmap, words - 1);
505 
506  overhead_space = MMAP_PAGESIZE /* header */ +
508 
509  bit = 0;
510  data_base = (uword) rp->virtual_base;
511 
512  if (a->flags & SVM_FLAGS_NODATA)
514 
515  do
516  {
517  clib_bitmap_set_no_check (rp->bitmap, bit, 1);
518  bit++;
519  overhead_space -= MMAP_PAGESIZE;
520  data_base += MMAP_PAGESIZE;
521  }
522  while (overhead_space > 0);
523 
524  rp->data_base = (void *) data_base;
525 
526  /*
527  * Note: although the POSIX spec guarantees that only one
528  * process enters this block, we have to play games
529  * to hold off clients until e.g. the mutex is ready
530  */
531  rp->version = SVM_VERSION;
532 
533  /* setup the data portion of the region */
534 
535  rv = svm_data_region_create (a, rp);
536  if (rv)
537  {
538  clib_warning ("data_region_create: %d", rv);
539  }
540 
541  region_unlock (rp);
542 
543  svm_pop_heap (oldheap);
544 }
545 
546 /*
547  * svm_map_region
548  */
549 void *
551 {
552  int svm_fd;
553  svm_region_t *rp;
554  int deadman = 0;
555  u8 junk = 0;
556  void *oldheap;
557  int rv;
558  int pid_holding_region_lock;
559  u8 *shm_name;
560  int dead_region_recovery = 0;
561  int time_left;
562  struct stat stat;
563  struct timespec ts, tsrem;
564 
565  ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size);
566  ASSERT (a->name);
567 
568  shm_name = shm_name_from_svm_map_region_args (a);
569 
570  if (CLIB_DEBUG > 1)
571  clib_warning ("[%d] map region %s: shm_open (%s)",
572  getpid (), a->name, shm_name);
573 
574  svm_fd = shm_open ((char *) shm_name, O_RDWR | O_CREAT | O_EXCL, 0777);
575 
576  if (svm_fd >= 0)
577  {
578  if (fchmod (svm_fd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP) < 0)
579  clib_unix_warning ("segment chmod");
580  /* This turns out to fail harmlessly if the client starts first */
581  if (fchown (svm_fd, a->uid, a->gid) < 0)
582  clib_unix_warning ("segment chown [ok if client starts first]");
583 
584  vec_free (shm_name);
585 
586  if (lseek (svm_fd, a->size, SEEK_SET) == (off_t) - 1)
587  {
588  clib_warning ("seek region size");
589  close (svm_fd);
590  return (0);
591  }
592  if (write (svm_fd, &junk, 1) != 1)
593  {
594  clib_warning ("set region size");
595  close (svm_fd);
596  return (0);
597  }
598 
599  rp = mmap (uword_to_pointer (a->baseva, void *), a->size,
600  PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0);
601 
602  if (rp == (svm_region_t *) MAP_FAILED)
603  {
604  clib_unix_warning ("mmap create");
605  close (svm_fd);
606  return (0);
607  }
608  close (svm_fd);
609  CLIB_MEM_UNPOISON (rp, a->size);
610 
612 
613  return ((void *) rp);
614  }
615  else
616  {
617  svm_fd = shm_open ((char *) shm_name, O_RDWR, 0777);
618 
619  vec_free (shm_name);
620 
621  if (svm_fd < 0)
622  {
623  perror ("svm_region_map(mmap open)");
624  return (0);
625  }
626 
627  /* Reset ownership in case the client started first */
628  if (fchown (svm_fd, a->uid, a->gid) < 0)
629  clib_unix_warning ("segment chown [ok if client starts first]");
630 
631  time_left = 20;
632  while (1)
633  {
634  if (0 != fstat (svm_fd, &stat))
635  {
636  clib_warning ("fstat failed: %d", errno);
637  close (svm_fd);
638  return (0);
639  }
640  if (stat.st_size > 0)
641  {
642  break;
643  }
644  if (0 == time_left)
645  {
646  clib_warning ("waiting for resize of shm file timed out");
647  close (svm_fd);
648  return (0);
649  }
650  ts.tv_sec = 0;
651  ts.tv_nsec = 100000000;
652  while (nanosleep (&ts, &tsrem) < 0)
653  ts = tsrem;
654  time_left--;
655  }
656 
657  rp = mmap (0, MMAP_PAGESIZE,
658  PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0);
659 
660  if (rp == (svm_region_t *) MAP_FAILED)
661  {
662  close (svm_fd);
663  clib_warning ("mmap");
664  return (0);
665  }
666 
668 
669  /*
670  * We lost the footrace to create this region; make sure
671  * the winner has crossed the finish line.
672  */
673  while (rp->version == 0 && deadman++ < 5)
674  {
675  sleep (1);
676  }
677 
678  /*
679  * <bleep>-ed?
680  */
681  if (rp->version == 0)
682  {
683  clib_warning ("rp->version %d not %d", rp->version, SVM_VERSION);
684  close (svm_fd);
685  munmap (rp, a->size);
686  return (0);
687  }
688  /* Remap now that the region has been placed */
689  a->baseva = rp->virtual_base;
690  a->size = rp->virtual_size;
691  munmap (rp, MMAP_PAGESIZE);
692 
693  rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size,
694  PROT_READ | PROT_WRITE,
695  MAP_SHARED | MAP_FIXED, svm_fd, 0);
696  if ((uword) rp == (uword) MAP_FAILED)
697  {
698  clib_unix_warning ("mmap");
699  close (svm_fd);
700  return (0);
701  }
702 
703  close (svm_fd);
704 
705  CLIB_MEM_UNPOISON (rp, a->size);
706 
707  if ((uword) rp != rp->virtual_base)
708  {
709  clib_warning ("mmap botch");
710  }
711 
712  /*
713  * Try to fix the region mutex if it is held by
714  * a dead process
715  */
716  pid_holding_region_lock = rp->mutex_owner_pid;
717  if (pid_holding_region_lock && kill (pid_holding_region_lock, 0) < 0)
718  {
719  pthread_mutexattr_t attr;
721  ("region %s mutex held by dead pid %d, tag %d, force unlock",
722  rp->region_name, pid_holding_region_lock, rp->mutex_owner_tag);
723  /* owner pid is nonexistent */
724  if (pthread_mutexattr_init (&attr))
725  clib_unix_warning ("mutexattr_init");
726  if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
727  clib_unix_warning ("mutexattr_setpshared");
728  if (pthread_mutex_init (&rp->mutex, &attr))
729  clib_unix_warning ("mutex_init");
730  dead_region_recovery = 1;
731  }
732 
733  if (dead_region_recovery)
734  clib_warning ("recovery: attempt to re-lock region");
735 
736  region_lock (rp, 2);
737  oldheap = svm_push_pvt_heap (rp);
738  vec_add1 (rp->client_pids, getpid ());
739 
740  if (dead_region_recovery)
741  clib_warning ("recovery: attempt svm_data_region_map");
742 
743  rv = svm_data_region_map (a, rp);
744  if (rv)
745  {
746  clib_warning ("data_region_map: %d", rv);
747  }
748 
749  if (dead_region_recovery)
750  clib_warning ("unlock and continue");
751 
752  region_unlock (rp);
753 
754  svm_pop_heap (oldheap);
755 
756  return ((void *) rp);
757 
758  }
759  return 0; /* NOTREACHED *///NOSONAR
760 }
761 
762 static void
764 {
765  int i;
766  for (i = 0; i < nheld; i++)
767  {
768  pthread_mutex_unlock (mutexes_held[i]); //NOSONAR
769  }
770 }
771 
772 static int
774 {
775  svm_region_t *rp;
776  u64 ticks = clib_cpu_time_now ();
777  uword randomize_baseva;
778 
779  /* guard against klutz calls */
780  if (root_rp)
781  return -1;
782 
784 
785  atexit (svm_mutex_cleanup);
786 
787  /* Randomize the shared-VM base at init time */
788  if (MMAP_PAGESIZE <= (4 << 10))
789  randomize_baseva = (ticks & 15) * MMAP_PAGESIZE;
790  else
791  randomize_baseva = (ticks & 3) * MMAP_PAGESIZE;
792 
793  a->baseva += randomize_baseva;
794 
795  rp = svm_map_region (a);
796  if (!rp)
797  return -1;
798 
799  region_lock (rp, 3);
800 
801  /* Set up the main region data structures */
803  {
804  svm_main_region_t *mp = 0;
805  void *oldheap;
806 
808 
809  oldheap = svm_push_pvt_heap (rp);
810  vec_validate (mp, 0);
811  mp->name_hash = hash_create_string (0, sizeof (uword));
812  mp->root_path = a->root_path ? format (0, "%s%c", a->root_path, 0) : 0;
813  mp->uid = a->uid;
814  mp->gid = a->gid;
815  rp->data_base = mp;
816  svm_pop_heap (oldheap);
817  }
818  region_unlock (rp);
819  root_rp = rp;
820 
821  return 0;
822 }
823 
824 void
826 {
827  svm_map_region_args_t _a, *a = &_a;
828 
829  clib_memset (a, 0, sizeof (*a));
830  a->root_path = 0;
834  a->flags = SVM_FLAGS_NODATA;
835  a->uid = 0;
836  a->gid = 0;
837 
839 }
840 
841 int
842 svm_region_init_chroot (const char *root_path)
843 {
844  svm_map_region_args_t _a, *a = &_a;
845 
846  clib_memset (a, 0, sizeof (*a));
847  a->root_path = root_path;
851  a->flags = SVM_FLAGS_NODATA;
852  a->uid = 0;
853  a->gid = 0;
854 
855  return svm_region_init_internal (a);
856 }
857 
858 void
859 svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid)
860 {
861  svm_map_region_args_t _a, *a = &_a;
862 
863  clib_memset (a, 0, sizeof (*a));
864  a->root_path = root_path;
868  a->flags = SVM_FLAGS_NODATA;
869  a->uid = uid;
870  a->gid = gid;
871 
873 }
874 
875 void
877 {
879 }
880 
881 void *
883 {
884  svm_main_region_t *mp;
885  svm_region_t *rp;
886  uword need_nbits;
887  int index, i;
888  void *oldheap;
889  uword *p;
890  u8 *name;
891  svm_subregion_t *subp;
892 
893  ASSERT (root_rp);
894 
895  a->size += MMAP_PAGESIZE +
897  a->size = rnd_pagesize (a->size);
898 
899  region_lock (root_rp, 4);
900  oldheap = svm_push_pvt_heap (root_rp);
901  mp = root_rp->data_base;
902 
903  ASSERT (mp);
904 
905  /* Map the named region from the correct chroot environment */
906  if (a->root_path == NULL)
907  a->root_path = (char *) mp->root_path;
908 
909  /*
910  * See if this region is already known. If it is, we're
911  * almost done...
912  */
913  p = hash_get_mem (mp->name_hash, a->name);
914 
915  if (p)
916  {
917  rp = svm_map_region (a);
918  region_unlock (root_rp);
919  svm_pop_heap (oldheap);
920  return rp;
921  }
922 
923  /* Create the region. */
924  ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size);
925 
926  need_nbits = a->size / MMAP_PAGESIZE;
927 
928  index = 1; /* $$$ fixme, figure out how many bit to really skip */
929 
930  /*
931  * Scan the virtual space allocation bitmap, looking for a large
932  * enough chunk
933  */
934  do
935  {
936  if (clib_bitmap_get_no_check (root_rp->bitmap, index) == 0)
937  {
938  for (i = 0; i < (need_nbits - 1); i++)
939  {
940  if (clib_bitmap_get_no_check (root_rp->bitmap, index + i) == 1)
941  {
942  index = index + i;
943  goto next;
944  }
945  }
946  break;
947  }
948  index++;
949  next:;
950  }
951  while (index < root_rp->bitmap_size);
952 
953  /* Completely out of VM? */
954  if (index >= root_rp->bitmap_size)
955  {
956  clib_warning ("region %s: not enough VM to allocate 0x%llx (%lld)",
957  root_rp->region_name, a->size, a->size);
958  svm_pop_heap (oldheap);
959  region_unlock (root_rp);
960  return 0;
961  }
962 
963  /*
964  * Mark virtual space allocated
965  */
966 #if CLIB_DEBUG > 1
967  clib_warning ("set %d bits at index %d", need_nbits, index);
968 #endif
969 
970  for (i = 0; i < need_nbits; i++)
971  {
972  clib_bitmap_set_no_check (root_rp->bitmap, index + i, 1);
973  }
974 
975  /* Place this region where it goes... */
976  a->baseva = root_rp->virtual_base + index * MMAP_PAGESIZE;
977 
978  rp = svm_map_region (a);
979 
980  pool_get (mp->subregions, subp);
981  name = format (0, "%s%c", a->name, 0);
982  subp->subregion_name = name;
983 
984  hash_set_mem (mp->name_hash, name, subp - mp->subregions);
985 
986  svm_pop_heap (oldheap);
987 
988  region_unlock (root_rp);
989 
990  return (rp);
991 }
992 
993 void
995 {
996  svm_map_region_args_t _a, *a = &_a;
997  svm_main_region_t *mp;
998  u8 *shm_name;
999 
1000  ASSERT (root_rp);
1001  ASSERT (rp);
1003 
1004  mp = root_rp->data_base;
1005  ASSERT (mp);
1006 
1007  a->root_path = (char *) mp->root_path;
1008  a->name = rp->region_name;
1009  shm_name = shm_name_from_svm_map_region_args (a);
1010  if (CLIB_DEBUG > 1)
1011  clib_warning ("[%d] shm_unlink (%s)", getpid (), shm_name);
1012  shm_unlink ((const char *) shm_name);
1013  vec_free (shm_name);
1014 }
1015 
1016 /*
1017  * svm_region_unmap
1018  *
1019  * Let go of the indicated region. If the calling process
1020  * is the last customer, throw it away completely.
1021  * The root region mutex guarantees atomicity with respect to
1022  * a new region client showing up at the wrong moment.
1023  */
1024 void
1025 svm_region_unmap_internal (void *rp_arg, u8 is_client)
1026 {
1027  int i, mypid = getpid ();
1028  int nclients_left;
1029  void *oldheap;
1030  uword virtual_base, virtual_size;
1031  svm_region_t *rp = rp_arg;
1032  char *name;
1033 
1034  /*
1035  * If we take a signal while holding one or more shared-memory
1036  * mutexes, we may end up back here from an otherwise
1037  * benign exit handler. Bail out to avoid a recursive
1038  * mutex screw-up.
1039  */
1040  if (nheld)
1041  return;
1042 
1043  ASSERT (rp);
1044  ASSERT (root_rp);
1045 
1046  if (CLIB_DEBUG > 1)
1047  clib_warning ("[%d] unmap region %s", getpid (), rp->region_name);
1048 
1049  region_lock (root_rp, 5);
1050  region_lock (rp, 6);
1051 
1052  oldheap = svm_push_pvt_heap (rp); /* nb vec_delete() in the loop */
1053 
1054  /* Remove the caller from the list of mappers */
1056  for (i = 0; i < vec_len (rp->client_pids); i++)
1057  {
1058  if (rp->client_pids[i] == mypid)
1059  {
1060  vec_delete (rp->client_pids, 1, i);
1061  goto found;
1062  }
1063  }
1064  clib_warning ("pid %d AWOL", mypid);
1065 
1066 found:
1067 
1068  svm_pop_heap (oldheap);
1069 
1070  nclients_left = vec_len (rp->client_pids);
1071  virtual_base = rp->virtual_base;
1072  virtual_size = rp->virtual_size;
1073 
1074  if (nclients_left == 0)
1075  {
1076  int index, nbits, i;
1077  svm_main_region_t *mp;
1078  uword *p;
1079  svm_subregion_t *subp;
1080 
1081  /* Kill the region, last guy on his way out */
1082 
1083  oldheap = svm_push_pvt_heap (root_rp);
1084  name = vec_dup (rp->region_name);
1085 
1086  virtual_base = rp->virtual_base;
1087  virtual_size = rp->virtual_size;
1088 
1089  /* Figure out which bits to clear in the root region bitmap */
1090  index = (virtual_base - root_rp->virtual_base) / MMAP_PAGESIZE;
1091 
1092  nbits = (virtual_size + MMAP_PAGESIZE - 1) / MMAP_PAGESIZE;
1093 
1094 #if CLIB_DEBUG > 1
1095  clib_warning ("clear %d bits at index %d", nbits, index);
1096 #endif
1097  /* Give back the allocated VM */
1098  for (i = 0; i < nbits; i++)
1099  {
1100  clib_bitmap_set_no_check (root_rp->bitmap, index + i, 0);
1101  }
1102 
1103  mp = root_rp->data_base;
1104 
1105  p = hash_get_mem (mp->name_hash, name);
1106 
1107  /* Better never happen ... */
1108  if (p == NULL)
1109  {
1110  region_unlock (rp);
1111  region_unlock (root_rp);
1112  svm_pop_heap (oldheap);
1113  clib_warning ("Region name '%s' not found?", name);
1114  return;
1115  }
1116 
1117  /* Remove from the root region subregion pool */
1118  subp = mp->subregions + p[0];
1119  pool_put (mp->subregions, subp);
1120 
1121  hash_unset_mem (mp->name_hash, name);
1122 
1123  vec_free (name);
1124 
1125  region_unlock (rp);
1126 
1127  /* If a client asks for the cleanup, don't unlink the backing
1128  * file since we can't tell if it has been recreated. */
1129  if (!is_client)
1130  svm_region_unlink (rp);
1131 
1132  munmap ((void *) virtual_base, virtual_size);
1133  region_unlock (root_rp);
1134  svm_pop_heap (oldheap);
1135  return;
1136  }
1137 
1138  region_unlock (rp);
1139  region_unlock (root_rp);
1140 
1141  munmap ((void *) virtual_base, virtual_size);
1142 }
1143 
1144 void
1145 svm_region_unmap (void *rp_arg)
1146 {
1147  svm_region_unmap_internal (rp_arg, 0 /* is_client */ );
1148 }
1149 
1150 void
1152 {
1153  svm_region_unmap_internal (rp_arg, 1 /* is_client */ );
1154 }
1155 
1156 /*
1157  * svm_region_exit
1158  */
1159 static void
1161 {
1162  void *oldheap;
1163  int i, mypid = getpid ();
1164  uword virtual_base, virtual_size;
1165 
1166  /* It felt so nice we did it twice... */
1167  if (root_rp == 0)
1168  return;
1169 
1170  if (--root_rp_refcount > 0)
1171  return;
1172 
1173  /*
1174  * If we take a signal while holding one or more shared-memory
1175  * mutexes, we may end up back here from an otherwise
1176  * benign exit handler. Bail out to avoid a recursive
1177  * mutex screw-up.
1178  */
1179  if (nheld)
1180  return;
1181 
1182  region_lock (root_rp, 7);
1183  oldheap = svm_push_pvt_heap (root_rp);
1184 
1185  virtual_base = root_rp->virtual_base;
1186  virtual_size = root_rp->virtual_size;
1187 
1188  CLIB_MEM_UNPOISON (root_rp->client_pids, vec_bytes (root_rp->client_pids));
1189  for (i = 0; i < vec_len (root_rp->client_pids); i++)
1190  {
1191  if (root_rp->client_pids[i] == mypid)
1192  {
1193  vec_delete (root_rp->client_pids, 1, i);
1194  goto found;
1195  }
1196  }
1197  clib_warning ("pid %d AWOL", mypid);
1198 
1199 found:
1200 
1201  if (!is_client && vec_len (root_rp->client_pids) == 0)
1202  svm_region_unlink (root_rp);
1203 
1204  region_unlock (root_rp);
1205  svm_pop_heap (oldheap);
1206 
1207  root_rp = 0;
1208  munmap ((void *) virtual_base, virtual_size);
1209 }
1210 
1211 void
1213 {
1214  svm_region_exit_internal (0 /* is_client */ );
1215 }
1216 
1217 void
1219 {
1220  svm_region_exit_internal (1 /* is_client */ );
1221 }
1222 
1223 void
1225 {
1226  int j;
1227  int mypid = getpid ();
1228  void *oldheap;
1229 
1230  for (j = 0; j < vec_len (rp->client_pids); j++)
1231  {
1232  if (mypid == rp->client_pids[j])
1233  continue;
1234  if (rp->client_pids[j] && (kill (rp->client_pids[j], 0) < 0))
1235  {
1236  clib_warning ("%s: cleanup ghost pid %d",
1237  rp->region_name, rp->client_pids[j]);
1238  /* nb: client vec in rp->region_heap */
1239  oldheap = svm_push_pvt_heap (rp);
1240  vec_delete (rp->client_pids, 1, j);
1241  j--;
1242  svm_pop_heap (oldheap);
1243  }
1244  }
1245 }
1246 
1247 
1248 /*
1249  * Scan svm regions for dead clients
1250  */
1251 void
1252 svm_client_scan (const char *root_path)
1253 {
1254  int i, j;
1255  svm_main_region_t *mp;
1256  svm_map_region_args_t *a = 0;
1258  svm_region_t *rp;
1259  svm_subregion_t *subp;
1260  u8 *name = 0;
1261  u8 **svm_names = 0;
1262  void *oldheap;
1263  int mypid = getpid ();
1264 
1265  vec_validate (a, 0);
1266 
1267  svm_region_init_chroot (root_path);
1268 
1269  root_rp = svm_get_root_rp ();
1270 
1271  pthread_mutex_lock (&root_rp->mutex);
1272 
1273  mp = root_rp->data_base;
1274 
1275  for (j = 0; j < vec_len (root_rp->client_pids); j++)
1276  {
1277  if (mypid == root_rp->client_pids[j])
1278  continue;
1279  if (root_rp->client_pids[j] && (kill (root_rp->client_pids[j], 0) < 0))
1280  {
1281  clib_warning ("%s: cleanup ghost pid %d",
1282  root_rp->region_name, root_rp->client_pids[j]);
1283  /* nb: client vec in root_rp->region_heap */
1284  oldheap = svm_push_pvt_heap (root_rp);
1285  vec_delete (root_rp->client_pids, 1, j);
1286  j--;
1287  svm_pop_heap (oldheap);
1288  }
1289  }
1290 
1291  /*
1292  * Snapshoot names, can't hold root rp mutex across
1293  * find_or_create.
1294  */
1295  /* *INDENT-OFF* */
1296  pool_foreach (subp, mp->subregions) {
1297  name = vec_dup (subp->subregion_name);
1298  vec_add1(svm_names, name);
1299  }
1300  /* *INDENT-ON* */
1301 
1302  pthread_mutex_unlock (&root_rp->mutex);
1303 
1304  for (i = 0; i < vec_len (svm_names); i++)
1305  {
1306  vec_validate (a, 0);
1307  a->root_path = root_path;
1308  a->name = (char *) svm_names[i];
1309  rp = svm_region_find_or_create (a);
1310  if (rp)
1311  {
1312  pthread_mutex_lock (&rp->mutex);
1313 
1315 
1316  pthread_mutex_unlock (&rp->mutex);
1317  svm_region_unmap (rp);
1318  vec_free (svm_names[i]);
1319  }
1320  vec_free (a);
1321  }
1322  vec_free (svm_names);
1323 
1324  svm_region_exit ();
1325 
1326  vec_free (a);
1327 }
1328 
1329 /*
1330  * fd.io coding-style-patch-verification: ON
1331  *
1332  * Local Variables:
1333  * eval: (c-set-style "gnu")
1334  * End:
1335  */
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
Definition: vec.h:524
void svm_region_init_chroot_uid_gid(const char *root_path, int uid, int gid)
Definition: svm.c:859
#define CLIB_MEM_UNPOISON(a, s)
Definition: sanitizer.h:47
svm_region_t * svm_get_root_rp(void)
Definition: svm.c:53
#define SVM_GLOBAL_REGION_NAME
Definition: svm_common.h:102
const char * root_path
Definition: svm_common.h:68
static int nheld
Definition: svm.c:50
static void svm_pop_heap(void *oldheap)
Definition: svm.h:94
#define vec_c_string_is_terminated(V)
Test whether a vector is a NULL terminated c-string.
Definition: vec.h:1125
a
Definition: bitmap.h:544
#define SVM_FLAGS_NODATA
Definition: svm_common.h:30
#define SVM_FLAGS_NEED_DATA_INIT
Definition: svm_common.h:31
void * svm_map_region(svm_map_region_args_t *a)
Definition: svm.c:550
#define count_leading_zeros(x)
Definition: clib.h:160
#define pool_foreach(VAR, POOL)
Iterate through pool.
Definition: pool.h:534
clib_mem_heap_t * clib_mem_create_heap(void *base, uword size, int is_locked, char *fmt,...)
Definition: mem_dlmalloc.c:536
Optimized string handling code, including c11-compliant "safe C library" variants.
unsigned long u64
Definition: types.h:89
Fixed length block allocator.
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:607
static u64 clib_cpu_time_now(void)
Definition: time.h:81
void svm_client_scan(const char *root_path)
Definition: svm.c:1252
uword virtual_base
Definition: svm_common.h:43
#define SVM_PVT_MHEAP_SIZE
Definition: svm_common.h:33
void svm_region_unmap_client(void *rp_arg)
Definition: svm.c:1151
#define hash_set_mem(h, key, value)
Definition: hash.h:275
void svm_region_exit_client(void)
Definition: svm.c:1218
string name[64]
Definition: fib.api:25
#define vec_bytes(v)
Number of data bytes in vector.
#define pool_get(P, E)
Allocate an object E from a pool P (unspecified alignment).
Definition: pool.h:255
static u8 * format_svm_size(u8 *s, va_list *args)
Definition: svm.c:163
static uword clib_bitmap_get_no_check(uword *ai, uword i)
Gets the ith bit value from a bitmap Does not sanity-check the bit position.
Definition: bitmap.h:212
unsigned char u8
Definition: types.h:56
void unformat_init_clib_file(unformat_input_t *input, int file_descriptor)
Definition: unformat.c:1064
#define clib_unix_error(format, args...)
Definition: error.h:65
static uword clib_bitmap_set_no_check(uword *a, uword i, uword new_value)
Sets the ith bit of a bitmap to new_value.
Definition: bitmap.h:141
vlib_frame_t * f
#define SVM_VERSION
Definition: svm_common.h:26
void svm_region_exit(void)
Definition: svm.c:1212
if(node->flags &VLIB_NODE_FLAG_TRACE) vnet_interface_output_trace(vm
static void unformat_skip_line(unformat_input_t *i)
Definition: format.h:214
void svm_region_init(void)
Definition: svm.c:825
uword * client_pids
Definition: svm_common.h:55
void * svm_region_find_or_create(svm_map_region_args_t *a)
Definition: svm.c:882
volatile void * user_ctx
Definition: svm_common.h:48
description fragment has unexpected format
Definition: map.api:433
static u64 rnd_pagesize(u64 size)
Definition: svm.c:252
int __clib_unused rv
Definition: application.c:491
pthread_cond_t condvar
Definition: svm_common.h:39
u8 * format_svm_region(u8 *s, va_list *args)
Definition: svm.c:183
#define hash_create_string(elts, value_bytes)
Definition: hash.h:690
#define SVM_FLAGS_MHEAP
Definition: svm_common.h:28
u16 * next
static int svm_region_init_internal(svm_map_region_args_t *a)
Definition: svm.c:773
void * data_base
Definition: svm_common.h:46
#define hash_unset_mem(h, key)
Definition: hash.h:291
lo
#define MAXLOCK
Definition: svm.c:48
struct _unformat_input_t unformat_input_t
u32 size
Definition: vhost_user.h:125
#define pool_put(P, E)
Free an object E in pool P.
Definition: pool.h:305
#define vec_dup(V)
Return copy of vector (no header, no alignment)
Definition: vec.h:444
svm_subregion_t * subregions
Definition: svm_common.h:120
char * backing_file
Definition: svm_common.h:53
uword virtual_size
Definition: svm_common.h:44
void svm_region_init_args(svm_map_region_args_t *a)
Definition: svm.c:876
#define SVM_GLOBAL_REGION_SIZE
Definition: svm_common.h:101
char * region_name
Definition: svm_common.h:52
void svm_region_init_mapped_region(svm_map_region_args_t *a, svm_region_t *rp)
Definition: svm.c:446
static void * svm_push_pvt_heap(svm_region_t *rp)
Definition: svm.h:78
static int root_rp_refcount
Definition: svm.c:46
#define UNFORMAT_END_OF_INPUT
Definition: format.h:137
sll srl srl sll sra u16x4 i
Definition: vector_sse42.h:261
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:395
u32 index
Definition: flow_types.api:221
#define clib_warning(format, args...)
Definition: error.h:59
static pthread_mutex_t * mutexes_held[MAXLOCK]
Definition: svm.c:49
u8 * shm_name_from_svm_map_region_args(svm_map_region_args_t *a)
Definition: svm.c:422
uword bitmap_size
Definition: svm_common.h:50
static int svm_data_region_map(svm_map_region_args_t *a, svm_region_t *rp)
Definition: svm.c:347
#define uword_to_pointer(u, type)
Definition: types.h:136
#define ASSERT(truth)
#define vec_delete(V, N, M)
Delete N elements starting at element M.
Definition: vec.h:876
static void region_lock(svm_region_t *rp, int tag)
Definition: svm.c:102
volatile uword version
Definition: svm_common.h:37
u64 svm_get_global_region_base_va()
Definition: svm.c:61
Bitmaps built as vectors of machine words.
static void region_unlock(svm_region_t *rp)
Definition: svm.c:119
int mutex_owner_tag
Definition: svm_common.h:41
void svm_region_unmap_internal(void *rp_arg, u8 is_client)
Definition: svm.c:1025
vl_api_ip4_address_t hi
Definition: arp.api:37
#define MMAP_PAGESIZE
Definition: ssvm.h:42
void svm_region_unlink(svm_region_t *rp)
Definition: svm.c:994
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
const char * name
Definition: svm_common.h:69
u64 uword
Definition: types.h:112
static void unformat_free(unformat_input_t *i)
Definition: format.h:155
#define clib_unix_warning(format, args...)
Definition: error.h:68
void svm_region_unmap(void *rp_arg)
Definition: svm.c:1145
void svm_client_scan_this_region_nolock(svm_region_t *rp)
Definition: svm.c:1224
#define hash_get_mem(h, key)
Definition: hash.h:269
void * region_heap
Definition: svm_common.h:45
uword * bitmap
Definition: svm_common.h:51
f64 end
end of the time range
Definition: mactime.api:44
#define CLIB_MEMORY_BARRIER()
Definition: clib.h:137
void * data_heap
Definition: svm_common.h:47
static int svm_data_region_create(svm_map_region_args_t *a, svm_region_t *rp)
Definition: svm.c:264
static u8 * format_svm_flags(u8 *s, va_list *args)
Definition: svm.c:146
int mutex_owner_pid
Definition: svm_common.h:40
#define BITS(x)
Definition: clib.h:69
uword flags
Definition: svm_common.h:42
static void svm_mutex_cleanup(void)
Definition: svm.c:763
uword unformat(unformat_input_t *i, const char *fmt,...)
Definition: unformat.c:978
pthread_mutex_t mutex
Definition: svm_common.h:38
#define SVM_FLAGS_FILE
Definition: svm_common.h:29
CLIB vectors are ubiquitous dynamically resized arrays with by user defined "headers".
static uword unformat_check_input(unformat_input_t *i)
Definition: format.h:163
static svm_region_t * root_rp
Definition: svm.c:45
int svm_region_init_chroot(const char *root_path)
Definition: svm.c:842
static void svm_region_exit_internal(u8 is_client)
Definition: svm.c:1160