FD.io VPP  v18.10-32-g1161dda
Vector Packet Processing
threads.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #define _GNU_SOURCE
16 
17 #include <signal.h>
18 #include <math.h>
19 #include <vppinfra/format.h>
20 #include <vppinfra/linux/sysfs.h>
21 #include <vlib/vlib.h>
22 
23 #include <vlib/threads.h>
24 #include <vlib/unix/cj.h>
25 
27 
28 #define FRAME_QUEUE_NELTS 64
29 
30 u32
31 vl (void *p)
32 {
33  return vec_len (p);
34 }
35 
38 
39 /*
40  * Barrier tracing can be enabled on a normal build to collect information
41  * on barrier use, including timings and call stacks. Deliberately not
42  * keyed off CLIB_DEBUG, because that can add significant overhead which
43  * imapacts observed timings.
44  */
45 
46 u32
47 elog_global_id_for_msg_name (const char *msg_name)
48 {
49  uword *p, r;
50  static uword *h;
51  u8 *name_copy;
52 
53  if (!h)
54  h = hash_create_string (0, sizeof (uword));
55 
56  p = hash_get_mem (h, msg_name);
57  if (p)
58  return p[0];
59  r = elog_string (&vlib_global_main.elog_main, "%s", msg_name);
60 
61  name_copy = format (0, "%s%c", msg_name, 0);
62 
63  hash_set_mem (h, name_copy, r);
64 
65  return r;
66 }
67 
68 static inline void
69 barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
70 {
71  if (!vlib_worker_threads->barrier_elog_enabled)
72  return;
73 
74  /* *INDENT-OFF* */
75  ELOG_TYPE_DECLARE (e) =
76  {
77  .format = "bar-trace-%s-#%d",
78  .format_args = "T4i4",
79  };
80  /* *INDENT-ON* */
81  struct
82  {
83  u32 caller, count, t_entry, t_open, t_closed;
84  } *ed = 0;
85 
87  ed->count = (int) vlib_worker_threads[0].barrier_sync_count;
88  ed->caller = elog_global_id_for_msg_name
89  (vlib_worker_threads[0].barrier_caller);
90  ed->t_entry = (int) (1000000.0 * t_entry);
91  ed->t_open = (int) (1000000.0 * t_open);
92  ed->t_closed = (int) (1000000.0 * t_closed);
93 }
94 
95 static inline void
97 {
98  if (!vlib_worker_threads->barrier_elog_enabled)
99  return;
100 
101  /* *INDENT-OFF* */
102  ELOG_TYPE_DECLARE (e) =
103  {
104  .format = "bar-syncrec-%s-#%d",
105  .format_args = "T4i4",
106  };
107  /* *INDENT-ON* */
108  struct
109  {
110  u32 caller, depth;
111  } *ed = 0;
112 
114  ed->depth = (int) vlib_worker_threads[0].recursion_level - 1;
115  ed->caller = elog_global_id_for_msg_name
116  (vlib_worker_threads[0].barrier_caller);
117 }
118 
119 static inline void
121 {
122  if (!vlib_worker_threads->barrier_elog_enabled)
123  return;
124 
125  /* *INDENT-OFF* */
126  ELOG_TYPE_DECLARE (e) =
127  {
128  .format = "bar-relrrec-#%d",
129  .format_args = "i4",
130  };
131  /* *INDENT-ON* */
132  struct
133  {
134  u32 depth;
135  } *ed = 0;
136 
138  ed->depth = (int) vlib_worker_threads[0].recursion_level;
139 }
140 
141 static inline void
142 barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
143 {
144  if (!vlib_worker_threads->barrier_elog_enabled)
145  return;
146 
147  /* *INDENT-OFF* */
148  ELOG_TYPE_DECLARE (e) =
149  {
150  .format = "bar-rel-#%d-e%d-u%d-t%d",
151  .format_args = "i4i4i4i4",
152  };
153  /* *INDENT-ON* */
154  struct
155  {
156  u32 count, t_entry, t_update_main, t_closed_total;
157  } *ed = 0;
158 
160  ed->t_entry = (int) (1000000.0 * t_entry);
161  ed->t_update_main = (int) (1000000.0 * t_update_main);
162  ed->t_closed_total = (int) (1000000.0 * t_closed_total);
163  ed->count = (int) vlib_worker_threads[0].barrier_sync_count;
164 
165  /* Reset context for next trace */
166  vlib_worker_threads[0].barrier_context = NULL;
167 }
168 
169 uword
171 {
172  u32 len;
173 
174  len = vec_len (vlib_thread_stacks);
175  if (len == 0)
176  return 1;
177  else
178  return len;
179 }
180 
181 void
183 {
184  int pthread_setname_np (pthread_t __target_thread, const char *__name);
185  int rv;
186  pthread_t thread = pthread_self ();
187 
188  if (thread)
189  {
190  rv = pthread_setname_np (thread, name);
191  if (rv)
192  clib_warning ("pthread_setname_np returned %d", rv);
193  }
194 }
195 
196 static int
197 sort_registrations_by_no_clone (void *a0, void *a1)
198 {
199  vlib_thread_registration_t **tr0 = a0;
200  vlib_thread_registration_t **tr1 = a1;
201 
202  return ((i32) ((*tr0)->no_data_structure_clone)
203  - ((i32) ((*tr1)->no_data_structure_clone)));
204 }
205 
206 static uword *
208 {
209  FILE *fp;
210  uword *r = 0;
211 
212  fp = fopen (filename, "r");
213 
214  if (fp != NULL)
215  {
216  u8 *buffer = 0;
217  vec_validate (buffer, 256 - 1);
218  if (fgets ((char *) buffer, 256, fp))
219  {
220  unformat_input_t in;
221  unformat_init_string (&in, (char *) buffer,
222  strlen ((char *) buffer));
223  if (unformat (&in, "%U", unformat_bitmap_list, &r) != 1)
224  clib_warning ("unformat_bitmap_list failed");
225  unformat_free (&in);
226  }
227  vec_free (buffer);
228  fclose (fp);
229  }
230  return r;
231 }
232 
233 
234 /* Called early in the init sequence */
235 
236 clib_error_t *
238 {
242  u32 n_vlib_mains = 1;
243  u32 first_index = 1;
244  u32 i;
245  uword *avail_cpu;
246 
247  /* get bitmaps of active cpu cores and sockets */
248  tm->cpu_core_bitmap =
249  clib_sysfs_list_to_bitmap ("/sys/devices/system/cpu/online");
250  tm->cpu_socket_bitmap =
251  clib_sysfs_list_to_bitmap ("/sys/devices/system/node/online");
252 
253  avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap);
254 
255  /* skip cores */
256  for (i = 0; i < tm->skip_cores; i++)
257  {
258  uword c = clib_bitmap_first_set (avail_cpu);
259  if (c == ~0)
260  return clib_error_return (0, "no available cpus to skip");
261 
262  avail_cpu = clib_bitmap_set (avail_cpu, c, 0);
263  }
264 
265  /* grab cpu for main thread */
266  if (tm->main_lcore == ~0)
267  {
268  /* if main-lcore is not set, we try to use lcore 1 */
269  if (clib_bitmap_get (avail_cpu, 1))
270  tm->main_lcore = 1;
271  else
272  tm->main_lcore = clib_bitmap_first_set (avail_cpu);
273  if (tm->main_lcore == (u8) ~ 0)
274  return clib_error_return (0, "no available cpus to be used for the"
275  " main thread");
276  }
277  else
278  {
279  if (clib_bitmap_get (avail_cpu, tm->main_lcore) == 0)
280  return clib_error_return (0, "cpu %u is not available to be used"
281  " for the main thread", tm->main_lcore);
282  }
283  avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0);
284 
285  /* assume that there is socket 0 only if there is no data from sysfs */
286  if (!tm->cpu_socket_bitmap)
287  tm->cpu_socket_bitmap = clib_bitmap_set (0, 0, 1);
288 
289  /* pin main thread to main_lcore */
291  {
293  }
294  else
295  {
296  cpu_set_t cpuset;
297  CPU_ZERO (&cpuset);
298  CPU_SET (tm->main_lcore, &cpuset);
299  pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset);
300  }
301 
302  /* as many threads as stacks... */
303  vec_validate_aligned (vlib_worker_threads, vec_len (vlib_thread_stacks) - 1,
305 
306  /* Preallocate thread 0 */
307  _vec_len (vlib_worker_threads) = 1;
311  w->cpu_id = tm->main_lcore;
312  w->lwp = syscall (SYS_gettid);
313  w->thread_id = pthread_self ();
314  tm->n_vlib_mains = 1;
315 
316  if (tm->sched_policy != ~0)
317  {
318  struct sched_param sched_param;
319  if (!sched_getparam (w->lwp, &sched_param))
320  {
321  if (tm->sched_priority != ~0)
322  sched_param.sched_priority = tm->sched_priority;
323  sched_setscheduler (w->lwp, tm->sched_policy, &sched_param);
324  }
325  }
326 
327  /* assign threads to cores and set n_vlib_mains */
328  tr = tm->next;
329 
330  while (tr)
331  {
332  vec_add1 (tm->registrations, tr);
333  tr = tr->next;
334  }
335 
337 
338  for (i = 0; i < vec_len (tm->registrations); i++)
339  {
340  int j;
341  tr = tm->registrations[i];
342  tr->first_index = first_index;
343  first_index += tr->count;
344  n_vlib_mains += (tr->no_data_structure_clone == 0) ? tr->count : 0;
345 
346  /* construct coremask */
347  if (tr->use_pthreads || !tr->count)
348  continue;
349 
350  if (tr->coremask)
351  {
352  uword c;
353  /* *INDENT-OFF* */
354  clib_bitmap_foreach (c, tr->coremask, ({
355  if (clib_bitmap_get(avail_cpu, c) == 0)
356  return clib_error_return (0, "cpu %u is not available to be used"
357  " for the '%s' thread",c, tr->name);
358 
359  avail_cpu = clib_bitmap_set(avail_cpu, c, 0);
360  }));
361 /* *INDENT-ON* */
362 
363  }
364  else
365  {
366  for (j = 0; j < tr->count; j++)
367  {
368  uword c = clib_bitmap_first_set (avail_cpu);
369  if (c == ~0)
370  return clib_error_return (0,
371  "no available cpus to be used for"
372  " the '%s' thread", tr->name);
373 
374  avail_cpu = clib_bitmap_set (avail_cpu, c, 0);
375  tr->coremask = clib_bitmap_set (tr->coremask, c, 1);
376  }
377  }
378  }
379 
380  clib_bitmap_free (avail_cpu);
381 
382  tm->n_vlib_mains = n_vlib_mains;
383 
384  vec_validate_aligned (vlib_worker_threads, first_index - 1,
386 
387  return 0;
388 }
389 
392 {
393  vlib_frame_queue_t *fq;
394 
395  fq = clib_mem_alloc_aligned (sizeof (*fq), CLIB_CACHE_LINE_BYTES);
396  memset (fq, 0, sizeof (*fq));
397  fq->nelts = nelts;
398  fq->vector_threshold = 128; // packets
400 
401  if (1)
402  {
403  if (((uword) & fq->tail) & (CLIB_CACHE_LINE_BYTES - 1))
404  fformat (stderr, "WARNING: fq->tail unaligned\n");
405  if (((uword) & fq->head) & (CLIB_CACHE_LINE_BYTES - 1))
406  fformat (stderr, "WARNING: fq->head unaligned\n");
407  if (((uword) fq->elts) & (CLIB_CACHE_LINE_BYTES - 1))
408  fformat (stderr, "WARNING: fq->elts unaligned\n");
409 
410  if (sizeof (fq->elts[0]) % CLIB_CACHE_LINE_BYTES)
411  fformat (stderr, "WARNING: fq->elts[0] size %d\n",
412  sizeof (fq->elts[0]));
413  if (nelts & (nelts - 1))
414  {
415  fformat (stderr, "FATAL: nelts MUST be a power of 2\n");
416  abort ();
417  }
418  }
419 
420  return (fq);
421 }
422 
423 void vl_msg_api_handler_no_free (void *) __attribute__ ((weak));
424 void
426 {
427 }
428 
429 /* Turned off, save as reference material... */
430 #if 0
431 static inline int
432 vlib_frame_queue_dequeue_internal (int thread_id,
433  vlib_main_t * vm, vlib_node_main_t * nm)
434 {
435  vlib_frame_queue_t *fq = vlib_frame_queues[thread_id];
437  vlib_frame_t *f;
440  u32 node_runtime_index;
441  int msg_type;
442  u64 before;
443  int processed = 0;
444 
445  ASSERT (vm == vlib_mains[thread_id]);
446 
447  while (1)
448  {
449  if (fq->head == fq->tail)
450  return processed;
451 
452  elt = fq->elts + ((fq->head + 1) & (fq->nelts - 1));
453 
454  if (!elt->valid)
455  return processed;
456 
457  before = clib_cpu_time_now ();
458 
459  f = elt->frame;
460  node_runtime_index = elt->node_runtime_index;
461  msg_type = elt->msg_type;
462 
463  switch (msg_type)
464  {
465  case VLIB_FRAME_QUEUE_ELT_FREE_BUFFERS:
467  /* note fallthrough... */
468  case VLIB_FRAME_QUEUE_ELT_FREE_FRAME:
470  node_runtime_index);
471  vlib_frame_free (vm, r, f);
472  break;
474  vec_add2 (vm->node_main.pending_frames, p, 1);
476  p->node_runtime_index = elt->node_runtime_index;
477  p->frame_index = vlib_frame_index (vm, f);
479  fq->dequeue_vectors += (u64) f->n_vectors;
480  break;
481  case VLIB_FRAME_QUEUE_ELT_API_MSG:
483  break;
484  default:
485  clib_warning ("bogus frame queue message, type %d", msg_type);
486  break;
487  }
488  elt->valid = 0;
489  fq->dequeues++;
490  fq->dequeue_ticks += clib_cpu_time_now () - before;
492  fq->head++;
493  processed++;
494  }
495  ASSERT (0);
496  return processed;
497 }
498 
499 int
500 vlib_frame_queue_dequeue (int thread_id,
501  vlib_main_t * vm, vlib_node_main_t * nm)
502 {
503  return vlib_frame_queue_dequeue_internal (thread_id, vm, nm);
504 }
505 
506 int
507 vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
508  u32 frame_queue_index, vlib_frame_t * frame,
510 {
511  vlib_frame_queue_t *fq = vlib_frame_queues[frame_queue_index];
513  u32 save_count;
514  u64 new_tail;
515  u64 before = clib_cpu_time_now ();
516 
517  ASSERT (fq);
518 
519  new_tail = __sync_add_and_fetch (&fq->tail, 1);
520 
521  /* Wait until a ring slot is available */
522  while (new_tail >= fq->head + fq->nelts)
523  {
524  f64 b4 = vlib_time_now_ticks (vm, before);
526  /* Bad idea. Dequeue -> enqueue -> dequeue -> trouble */
527  // vlib_frame_queue_dequeue (vm->thread_index, vm, nm);
528  }
529 
530  elt = fq->elts + (new_tail & (fq->nelts - 1));
531 
532  /* this would be very bad... */
533  while (elt->valid)
534  {
535  }
536 
537  /* Once we enqueue the frame, frame->n_vectors is owned elsewhere... */
538  save_count = frame->n_vectors;
539 
540  elt->frame = frame;
541  elt->node_runtime_index = node_runtime_index;
542  elt->msg_type = type;
544  elt->valid = 1;
545 
546  return save_count;
547 }
548 #endif /* 0 */
549 
550 /* To be called by vlib worker threads upon startup */
551 void
553 {
555 
556  /*
557  * Note: disabling signals in worker threads as follows
558  * prevents the api post-mortem dump scheme from working
559  * {
560  * sigset_t s;
561  * sigfillset (&s);
562  * pthread_sigmask (SIG_SETMASK, &s, 0);
563  * }
564  */
565 
567 
568  if (vec_len (tm->thread_prefix) && w->registration->short_name)
569  {
570  w->name = format (0, "%v_%s_%d%c", tm->thread_prefix,
571  w->registration->short_name, w->instance_id, '\0');
572  vlib_set_thread_name ((char *) w->name);
573  }
574 
575  if (!w->registration->use_pthreads)
576  {
577 
578  /* Initial barrier sync, for both worker and i/o threads */
579  clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, 1);
580 
581  while (*vlib_worker_threads->wait_at_barrier)
582  ;
583 
584  clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1);
585  }
586 }
587 
588 void *
590 {
591  void *rv;
592  vlib_worker_thread_t *w = arg;
593 
594  w->lwp = syscall (SYS_gettid);
595  w->thread_id = pthread_self ();
596 
597  __os_thread_index = w - vlib_worker_threads;
598 
599  rv = (void *) clib_calljmp
600  ((uword (*)(uword)) w->thread_function,
602  /* NOTREACHED, we hope */
603  return rv;
604 }
605 
606 static void
608 {
609  const char *sys_cpu_path = "/sys/devices/system/cpu/cpu";
610  u8 *p = 0;
611  int core_id = -1, socket_id = -1;
612 
613  p = format (p, "%s%u/topology/core_id%c", sys_cpu_path, cpu_id, 0);
614  clib_sysfs_read ((char *) p, "%d", &core_id);
615  vec_reset_length (p);
616  p =
617  format (p, "%s%u/topology/physical_package_id%c", sys_cpu_path, cpu_id,
618  0);
619  clib_sysfs_read ((char *) p, "%d", &socket_id);
620  vec_free (p);
621 
622  w->core_id = core_id;
623  w->socket_id = socket_id;
624 }
625 
626 static clib_error_t *
627 vlib_launch_thread_int (void *fp, vlib_worker_thread_t * w, unsigned cpu_id)
628 {
630  void *(*fp_arg) (void *) = fp;
631 
632  w->cpu_id = cpu_id;
633  vlib_get_thread_core_socket (w, cpu_id);
635  return tm->cb.vlib_launch_thread_cb (fp, (void *) w, cpu_id);
636  else
637  {
638  pthread_t worker;
639  cpu_set_t cpuset;
640  CPU_ZERO (&cpuset);
641  CPU_SET (cpu_id, &cpuset);
642 
643  if (pthread_create (&worker, NULL /* attr */ , fp_arg, (void *) w))
644  return clib_error_return_unix (0, "pthread_create");
645 
646  if (pthread_setaffinity_np (worker, sizeof (cpu_set_t), &cpuset))
647  return clib_error_return_unix (0, "pthread_setaffinity_np");
648 
649  return 0;
650  }
651 }
652 
653 static clib_error_t *
655 {
656  int i, j;
658  vlib_main_t *vm_clone;
659  void *oldheap;
663  u32 n_vlib_mains = tm->n_vlib_mains;
664  u32 worker_thread_index;
665  u8 *main_heap = clib_mem_get_per_cpu_heap ();
666 
667  vec_reset_length (vlib_worker_threads);
668 
669  /* Set up the main thread */
670  vec_add2_aligned (vlib_worker_threads, w, 1, CLIB_CACHE_LINE_BYTES);
671  w->elog_track.name = "main thread";
673 
674  if (vec_len (tm->thread_prefix))
675  {
676  w->name = format (0, "%v_main%c", tm->thread_prefix, '\0');
677  vlib_set_thread_name ((char *) w->name);
678  }
679 
680  vm->elog_main.lock =
682  vm->elog_main.lock[0] = 0;
683 
684  if (n_vlib_mains > 1)
685  {
686  /* Replace hand-crafted length-1 vector with a real vector */
687  vlib_mains = 0;
688 
691  _vec_len (vlib_mains) = 0;
693 
694  vlib_worker_threads->wait_at_barrier =
696  vlib_worker_threads->workers_at_barrier =
698 
699  vlib_worker_threads->node_reforks_required =
701 
702  /* Ask for an initial barrier sync */
703  *vlib_worker_threads->workers_at_barrier = 0;
704  *vlib_worker_threads->wait_at_barrier = 1;
705 
706  /* Without update or refork */
707  *vlib_worker_threads->node_reforks_required = 0;
709 
710  /* init timing */
711  vm->barrier_epoch = 0;
712  vm->barrier_no_close_before = 0;
713 
714  worker_thread_index = 1;
715 
716  for (i = 0; i < vec_len (tm->registrations); i++)
717  {
718  vlib_node_main_t *nm, *nm_clone;
719  vlib_buffer_free_list_t *fl_clone, *fl_orig;
720  vlib_buffer_free_list_t *orig_freelist_pool;
721  int k;
722 
723  tr = tm->registrations[i];
724 
725  if (tr->count == 0)
726  continue;
727 
728  for (k = 0; k < tr->count; k++)
729  {
730  vlib_node_t *n;
731 
732  vec_add2 (vlib_worker_threads, w, 1);
733  /* Currently unused, may not really work */
734  if (tr->mheap_size)
735  {
736 #if USE_DLMALLOC == 0
737  w->thread_mheap =
738  mheap_alloc (0 /* use VM */ , tr->mheap_size);
739 #else
741  0 /* unlocked */ );
742 #endif
743  }
744  else
745  w->thread_mheap = main_heap;
746 
747  w->thread_stack =
748  vlib_thread_stack_init (w - vlib_worker_threads);
749  w->thread_function = tr->function;
750  w->thread_function_arg = w;
751  w->instance_id = k;
752  w->registration = tr;
753 
754  w->elog_track.name =
755  (char *) format (0, "%s %d", tr->name, k + 1);
756  vec_add1 (w->elog_track.name, 0);
758 
759  if (tr->no_data_structure_clone)
760  continue;
761 
762  /* Fork vlib_global_main et al. Look for bugs here */
763  oldheap = clib_mem_set_heap (w->thread_mheap);
764 
765  vm_clone = clib_mem_alloc_aligned (sizeof (*vm_clone),
767  clib_memcpy (vm_clone, vlib_mains[0], sizeof (*vm_clone));
768 
769  vm_clone->thread_index = worker_thread_index;
770  vm_clone->heap_base = w->thread_mheap;
771  vm_clone->heap_aligned_base = (void *)
772  (((uword) w->thread_mheap) & ~(VLIB_FRAME_ALIGN - 1));
773  vm_clone->init_functions_called =
774  hash_create (0, /* value bytes */ 0);
775  vm_clone->pending_rpc_requests = 0;
776  vec_validate (vm_clone->pending_rpc_requests, 0);
777  _vec_len (vm_clone->pending_rpc_requests) = 0;
778  memset (&vm_clone->random_buffer, 0,
779  sizeof (vm_clone->random_buffer));
780 
781  nm = &vlib_mains[0]->node_main;
782  nm_clone = &vm_clone->node_main;
783  /* fork next frames array, preserving node runtime indices */
784  nm_clone->next_frames = vec_dup_aligned (nm->next_frames,
786  for (j = 0; j < vec_len (nm_clone->next_frames); j++)
787  {
788  vlib_next_frame_t *nf = &nm_clone->next_frames[j];
789  u32 save_node_runtime_index;
790  u32 save_flags;
791 
792  save_node_runtime_index = nf->node_runtime_index;
793  save_flags = nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
795  nf->node_runtime_index = save_node_runtime_index;
796  nf->flags = save_flags;
797  }
798 
799  /* fork the frame dispatch queue */
800  nm_clone->pending_frames = 0;
801  vec_validate (nm_clone->pending_frames, 10); /* $$$$$?????? */
802  _vec_len (nm_clone->pending_frames) = 0;
803 
804  /* fork nodes */
805  nm_clone->nodes = 0;
806 
807  /* Allocate all nodes in single block for speed */
808  n = clib_mem_alloc_no_fail (vec_len (nm->nodes) * sizeof (*n));
809 
810  for (j = 0; j < vec_len (nm->nodes); j++)
811  {
812  clib_memcpy (n, nm->nodes[j], sizeof (*n));
813  /* none of the copied nodes have enqueue rights given out */
815  memset (&n->stats_total, 0, sizeof (n->stats_total));
816  memset (&n->stats_last_clear, 0,
817  sizeof (n->stats_last_clear));
818  vec_add1 (nm_clone->nodes, n);
819  n++;
820  }
824  vec_foreach (rt,
826  {
827  vlib_node_t *n = vlib_get_node (vm, rt->node_index);
828  rt->thread_index = vm_clone->thread_index;
829  /* copy initial runtime_data from node */
830  if (n->runtime_data && n->runtime_data_bytes > 0)
833  n->runtime_data_bytes));
834  }
835 
840  {
841  vlib_node_t *n = vlib_get_node (vm, rt->node_index);
842  rt->thread_index = vm_clone->thread_index;
843  /* copy initial runtime_data from node */
844  if (n->runtime_data && n->runtime_data_bytes > 0)
847  n->runtime_data_bytes));
848  }
849 
850  nm_clone->processes = vec_dup_aligned (nm->processes,
852 
853  /* zap the (per worker) frame freelists, etc */
854  nm_clone->frame_sizes = 0;
855  nm_clone->frame_size_hash = hash_create (0, sizeof (uword));
856 
857  /* Packet trace buffers are guaranteed to be empty, nothing to do here */
858 
859  clib_mem_set_heap (oldheap);
861 
863  (vlib_mains[0]->error_main.counters, CLIB_CACHE_LINE_BYTES);
865  (vlib_mains[0]->error_main.counters_last_clear,
867 
868  /* Fork the vlib_buffer_main_t free lists, etc. */
869  orig_freelist_pool = vm_clone->buffer_free_list_pool;
870  vm_clone->buffer_free_list_pool = 0;
871 
872  /* *INDENT-OFF* */
873  pool_foreach (fl_orig, orig_freelist_pool,
874  ({
876  fl_clone, CLIB_CACHE_LINE_BYTES);
877  ASSERT (fl_orig - orig_freelist_pool
878  == fl_clone - vm_clone->buffer_free_list_pool);
879 
880  fl_clone[0] = fl_orig[0];
881  fl_clone->buffers = 0;
882  fl_clone->n_alloc = 0;
883  }));
884 /* *INDENT-ON* */
885 
886  worker_thread_index++;
887  }
888  }
889  }
890  else
891  {
892  /* only have non-data-structure copy threads to create... */
893  for (i = 0; i < vec_len (tm->registrations); i++)
894  {
895  tr = tm->registrations[i];
896 
897  for (j = 0; j < tr->count; j++)
898  {
899  vec_add2 (vlib_worker_threads, w, 1);
900  if (tr->mheap_size)
901  {
902 #if USE_DLMALLOC == 0
903  w->thread_mheap =
904  mheap_alloc (0 /* use VM */ , tr->mheap_size);
905 #else
906  w->thread_mheap =
907  create_mspace (tr->mheap_size, 0 /* locked */ );
908 #endif
909  }
910  else
911  w->thread_mheap = main_heap;
912  w->thread_stack =
913  vlib_thread_stack_init (w - vlib_worker_threads);
914  w->thread_function = tr->function;
915  w->thread_function_arg = w;
916  w->instance_id = j;
917  w->elog_track.name =
918  (char *) format (0, "%s %d", tr->name, j + 1);
919  w->registration = tr;
920  vec_add1 (w->elog_track.name, 0);
922  }
923  }
924  }
925 
926  worker_thread_index = 1;
927 
928  for (i = 0; i < vec_len (tm->registrations); i++)
929  {
930  clib_error_t *err;
931  int j;
932 
933  tr = tm->registrations[i];
934 
935  if (tr->use_pthreads || tm->use_pthreads)
936  {
937  for (j = 0; j < tr->count; j++)
938  {
939  w = vlib_worker_threads + worker_thread_index++;
941  w, 0);
942  if (err)
943  clib_error_report (err);
944  }
945  }
946  else
947  {
948  uword c;
949  /* *INDENT-OFF* */
950  clib_bitmap_foreach (c, tr->coremask, ({
951  w = vlib_worker_threads + worker_thread_index++;
952  err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn,
953  w, c);
954  if (err)
955  clib_error_report (err);
956  }));
957  /* *INDENT-ON* */
958  }
959  }
962  return 0;
963 }
964 
966 
967 
968 static inline void
970 {
971  int i, j;
972  vlib_main_t *vm;
973  vlib_node_main_t *nm, *nm_clone;
974  vlib_main_t *vm_clone;
976  never_inline void
979  uword n_calls,
980  uword n_vectors, uword n_clocks);
981 
982  ASSERT (vlib_get_thread_index () == 0);
983 
984  vm = vlib_mains[0];
985  nm = &vm->node_main;
986 
987  ASSERT (*vlib_worker_threads->wait_at_barrier == 1);
988 
989  /*
990  * Scrape all runtime stats, so we don't lose node runtime(s) with
991  * pending counts, or throw away worker / io thread counts.
992  */
993  for (j = 0; j < vec_len (nm->nodes); j++)
994  {
995  vlib_node_t *n;
996  n = nm->nodes[j];
997  vlib_node_sync_stats (vm, n);
998  }
999 
1000  for (i = 1; i < vec_len (vlib_mains); i++)
1001  {
1002  vlib_node_t *n;
1003 
1004  vm_clone = vlib_mains[i];
1005  nm_clone = &vm_clone->node_main;
1006 
1007  for (j = 0; j < vec_len (nm_clone->nodes); j++)
1008  {
1009  n = nm_clone->nodes[j];
1010 
1011  rt = vlib_node_get_runtime (vm_clone, n->index);
1012  vlib_node_runtime_sync_stats (vm_clone, rt, 0, 0, 0);
1013  }
1014  }
1015 
1016  /* Per-worker clone rebuilds are now done on each thread */
1017 }
1018 
1019 
1020 void
1022 {
1023  vlib_main_t *vm, *vm_clone;
1024  vlib_node_main_t *nm, *nm_clone;
1025  vlib_node_t **old_nodes_clone;
1026  vlib_node_runtime_t *rt, *old_rt;
1027 
1028  vlib_node_t *new_n_clone;
1029 
1030  int j;
1031 
1032  vm = vlib_mains[0];
1033  nm = &vm->node_main;
1034  vm_clone = vlib_get_main ();
1035  nm_clone = &vm_clone->node_main;
1036 
1037  /* Re-clone error heap */
1038  u64 *old_counters = vm_clone->error_main.counters;
1039  u64 *old_counters_all_clear = vm_clone->error_main.counters_last_clear;
1040 
1041  clib_memcpy (&vm_clone->error_main, &vm->error_main,
1042  sizeof (vm->error_main));
1043  j = vec_len (vm->error_main.counters) - 1;
1044  vec_validate_aligned (old_counters, j, CLIB_CACHE_LINE_BYTES);
1045  vec_validate_aligned (old_counters_all_clear, j, CLIB_CACHE_LINE_BYTES);
1046  vm_clone->error_main.counters = old_counters;
1047  vm_clone->error_main.counters_last_clear = old_counters_all_clear;
1048 
1049  nm_clone = &vm_clone->node_main;
1050  vec_free (nm_clone->next_frames);
1051  nm_clone->next_frames = vec_dup_aligned (nm->next_frames,
1053 
1054  for (j = 0; j < vec_len (nm_clone->next_frames); j++)
1055  {
1056  vlib_next_frame_t *nf = &nm_clone->next_frames[j];
1057  u32 save_node_runtime_index;
1058  u32 save_flags;
1059 
1060  save_node_runtime_index = nf->node_runtime_index;
1061  save_flags = nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
1062  vlib_next_frame_init (nf);
1063  nf->node_runtime_index = save_node_runtime_index;
1064  nf->flags = save_flags;
1065  }
1066 
1067  old_nodes_clone = nm_clone->nodes;
1068  nm_clone->nodes = 0;
1069 
1070  /* re-fork nodes */
1071 
1072  /* Allocate all nodes in single block for speed */
1073  new_n_clone =
1074  clib_mem_alloc_no_fail (vec_len (nm->nodes) * sizeof (*new_n_clone));
1075  for (j = 0; j < vec_len (nm->nodes); j++)
1076  {
1077  vlib_node_t *old_n_clone;
1078  vlib_node_t *new_n;
1079 
1080  new_n = nm->nodes[j];
1081  old_n_clone = old_nodes_clone[j];
1082 
1083  clib_memcpy (new_n_clone, new_n, sizeof (*new_n));
1084  /* none of the copied nodes have enqueue rights given out */
1086 
1087  if (j >= vec_len (old_nodes_clone))
1088  {
1089  /* new node, set to zero */
1090  memset (&new_n_clone->stats_total, 0,
1091  sizeof (new_n_clone->stats_total));
1092  memset (&new_n_clone->stats_last_clear, 0,
1093  sizeof (new_n_clone->stats_last_clear));
1094  }
1095  else
1096  {
1097  /* Copy stats if the old data is valid */
1098  clib_memcpy (&new_n_clone->stats_total,
1099  &old_n_clone->stats_total,
1100  sizeof (new_n_clone->stats_total));
1101  clib_memcpy (&new_n_clone->stats_last_clear,
1102  &old_n_clone->stats_last_clear,
1103  sizeof (new_n_clone->stats_last_clear));
1104 
1105  /* keep previous node state */
1106  new_n_clone->state = old_n_clone->state;
1107  }
1108  vec_add1 (nm_clone->nodes, new_n_clone);
1109  new_n_clone++;
1110  }
1111  /* Free the old node clones */
1112  clib_mem_free (old_nodes_clone[0]);
1113 
1114  vec_free (old_nodes_clone);
1115 
1116 
1117  /* re-clone internal nodes */
1118  old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL];
1122 
1124  {
1125  vlib_node_t *n = vlib_get_node (vm, rt->node_index);
1126  rt->thread_index = vm_clone->thread_index;
1127  /* copy runtime_data, will be overwritten later for existing rt */
1128  if (n->runtime_data && n->runtime_data_bytes > 0)
1131  n->runtime_data_bytes));
1132  }
1133 
1134  for (j = 0; j < vec_len (old_rt); j++)
1135  {
1136  rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
1137  rt->state = old_rt[j].state;
1138  clib_memcpy (rt->runtime_data, old_rt[j].runtime_data,
1140  }
1141 
1142  vec_free (old_rt);
1143 
1144  /* re-clone input nodes */
1145  old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT];
1146  nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
1149 
1151  {
1152  vlib_node_t *n = vlib_get_node (vm, rt->node_index);
1153  rt->thread_index = vm_clone->thread_index;
1154  /* copy runtime_data, will be overwritten later for existing rt */
1155  if (n->runtime_data && n->runtime_data_bytes > 0)
1158  n->runtime_data_bytes));
1159  }
1160 
1161  for (j = 0; j < vec_len (old_rt); j++)
1162  {
1163  rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
1164  rt->state = old_rt[j].state;
1165  clib_memcpy (rt->runtime_data, old_rt[j].runtime_data,
1167  }
1168 
1169  vec_free (old_rt);
1170 
1171  nm_clone->processes = vec_dup_aligned (nm->processes,
1173 }
1174 
1175 void
1177 {
1178  /*
1179  * Make a note that we need to do a node runtime update
1180  * prior to releasing the barrier.
1181  */
1183 }
1184 
1185 u32
1186 unformat_sched_policy (unformat_input_t * input, va_list * args)
1187 {
1188  u32 *r = va_arg (*args, u32 *);
1189 
1190  if (0);
1191 #define _(v,f,s) else if (unformat (input, s)) *r = SCHED_POLICY_##f;
1193 #undef _
1194  else
1195  return 0;
1196  return 1;
1197 }
1198 
1199 static clib_error_t *
1201 {
1203  uword *p;
1205  u8 *name;
1206  uword *bitmap;
1207  u32 count;
1208 
1210 
1211  tm->n_thread_stacks = 1; /* account for main thread */
1212  tm->sched_policy = ~0;
1213  tm->sched_priority = ~0;
1214  tm->main_lcore = ~0;
1215 
1216  tr = tm->next;
1217 
1218  while (tr)
1219  {
1221  tr = tr->next;
1222  }
1223 
1224  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1225  {
1226  if (unformat (input, "use-pthreads"))
1227  tm->use_pthreads = 1;
1228  else if (unformat (input, "thread-prefix %v", &tm->thread_prefix))
1229  ;
1230  else if (unformat (input, "main-core %u", &tm->main_lcore))
1231  ;
1232  else if (unformat (input, "skip-cores %u", &tm->skip_cores))
1233  ;
1234  else if (unformat (input, "coremask-%s %U", &name,
1235  unformat_bitmap_mask, &bitmap) ||
1236  unformat (input, "corelist-%s %U", &name,
1237  unformat_bitmap_list, &bitmap))
1238  {
1240  if (p == 0)
1241  return clib_error_return (0, "no such thread type '%s'", name);
1242 
1243  tr = (vlib_thread_registration_t *) p[0];
1244 
1245  if (tr->use_pthreads)
1246  return clib_error_return (0,
1247  "corelist cannot be set for '%s' threads",
1248  name);
1249 
1250  tr->coremask = bitmap;
1252  }
1253  else
1254  if (unformat
1255  (input, "scheduler-policy %U", unformat_sched_policy,
1256  &tm->sched_policy))
1257  ;
1258  else if (unformat (input, "scheduler-priority %u", &tm->sched_priority))
1259  ;
1260  else if (unformat (input, "%s %u", &name, &count))
1261  {
1263  if (p == 0)
1264  return clib_error_return (0, "no such thread type 3 '%s'", name);
1265 
1266  tr = (vlib_thread_registration_t *) p[0];
1267  if (tr->fixed_count)
1268  return clib_error_return
1269  (0, "number of %s threads not configurable", tr->name);
1270  tr->count = count;
1271  }
1272  else
1273  break;
1274  }
1275 
1276  if (tm->sched_priority != ~0)
1277  {
1278  if (tm->sched_policy == SCHED_FIFO || tm->sched_policy == SCHED_RR)
1279  {
1280  u32 prio_max = sched_get_priority_max (tm->sched_policy);
1281  u32 prio_min = sched_get_priority_min (tm->sched_policy);
1282  if (tm->sched_priority > prio_max)
1283  tm->sched_priority = prio_max;
1284  if (tm->sched_priority < prio_min)
1285  tm->sched_priority = prio_min;
1286  }
1287  else
1288  {
1289  return clib_error_return
1290  (0,
1291  "scheduling priority (%d) is not allowed for `normal` scheduling policy",
1292  tm->sched_priority);
1293  }
1294  }
1295  tr = tm->next;
1296 
1297  if (!tm->thread_prefix)
1298  tm->thread_prefix = format (0, "vpp");
1299 
1300  while (tr)
1301  {
1302  tm->n_thread_stacks += tr->count;
1303  tm->n_pthreads += tr->count * tr->use_pthreads;
1304  tm->n_threads += tr->count * (tr->use_pthreads == 0);
1305  tr = tr->next;
1306  }
1307 
1308  return 0;
1309 }
1310 
1312 
1313 #if !defined (__x86_64__) && !defined (__i386__) && !defined (__aarch64__) && !defined (__powerpc64__) && !defined(__arm__)
1314 void
1315 __sync_fetch_and_add_8 (void)
1316 {
1317  fformat (stderr, "%s called\n", __FUNCTION__);
1318  abort ();
1319 }
1320 
1321 void
1322 __sync_add_and_fetch_8 (void)
1323 {
1324  fformat (stderr, "%s called\n", __FUNCTION__);
1325  abort ();
1326 }
1327 #endif
1328 
1329 void vnet_main_fixup (vlib_fork_fixup_t which) __attribute__ ((weak));
1330 void
1332 {
1333 }
1334 
1335 void
1337 {
1338  vlib_main_t *vm = vlib_get_main ();
1339 
1340  if (vlib_mains == 0)
1341  return;
1342 
1343  ASSERT (vlib_get_thread_index () == 0);
1345 
1346  switch (which)
1347  {
1350  break;
1351 
1352  default:
1353  ASSERT (0);
1354  }
1356 }
1357 
1358  /*
1359  * Enforce minimum open time to minimize packet loss due to Rx overflow,
1360  * based on a test based heuristic that barrier should be open for at least
1361  * 3 time as long as it is closed (with an upper bound of 1ms because by that
1362  * point it is probably too late to make a difference)
1363  */
1364 
1365 #ifndef BARRIER_MINIMUM_OPEN_LIMIT
1366 #define BARRIER_MINIMUM_OPEN_LIMIT 0.001
1367 #endif
1368 
1369 #ifndef BARRIER_MINIMUM_OPEN_FACTOR
1370 #define BARRIER_MINIMUM_OPEN_FACTOR 3
1371 #endif
1372 
1373 void
1375 {
1376  f64 deadline;
1377  f64 now;
1378  f64 t_entry;
1379  f64 t_open;
1380  f64 t_closed;
1381  u32 count;
1382 
1383  if (vec_len (vlib_mains) < 2)
1384  return;
1385 
1386  ASSERT (vlib_get_thread_index () == 0);
1387 
1388  count = vec_len (vlib_mains) - 1;
1389 
1390  /* Record entry relative to last close */
1391  now = vlib_time_now (vm);
1392  t_entry = now - vm->barrier_epoch;
1393 
1394  /* Tolerate recursive calls */
1395  if (++vlib_worker_threads[0].recursion_level > 1)
1396  {
1397  barrier_trace_sync_rec (t_entry);
1398  return;
1399  }
1400 
1401  vlib_worker_threads[0].barrier_sync_count++;
1402 
1403  /* Enforce minimum barrier open time to minimize packet loss */
1405 
1406  while (1)
1407  {
1408  now = vlib_time_now (vm);
1409  /* Barrier hold-down timer expired? */
1410  if (now >= vm->barrier_no_close_before)
1411  break;
1412  if ((vm->barrier_no_close_before - now)
1413  > (2.0 * BARRIER_MINIMUM_OPEN_LIMIT))
1414  {
1415  clib_warning ("clock change: would have waited for %.4f seconds",
1416  (vm->barrier_no_close_before - now));
1417  break;
1418  }
1419  }
1420  /* Record time of closure */
1421  t_open = now - vm->barrier_epoch;
1422  vm->barrier_epoch = now;
1423 
1424  deadline = now + BARRIER_SYNC_TIMEOUT;
1425 
1426  *vlib_worker_threads->wait_at_barrier = 1;
1427  while (*vlib_worker_threads->workers_at_barrier != count)
1428  {
1429  if ((now = vlib_time_now (vm)) > deadline)
1430  {
1431  fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
1432  os_panic ();
1433  }
1434  }
1435 
1436  t_closed = now - vm->barrier_epoch;
1437 
1438  barrier_trace_sync (t_entry, t_open, t_closed);
1439 
1440 }
1441 
1442 void vlib_stat_segment_lock (void) __attribute__ ((weak));
1443 void
1445 {
1446 }
1447 
1448 void vlib_stat_segment_unlock (void) __attribute__ ((weak));
1449 void
1451 {
1452 }
1453 
1454 void
1456 {
1457  f64 deadline;
1458  f64 now;
1459  f64 minimum_open;
1460  f64 t_entry;
1461  f64 t_closed_total;
1462  f64 t_update_main = 0.0;
1463  int refork_needed = 0;
1464 
1465  if (vec_len (vlib_mains) < 2)
1466  return;
1467 
1468  ASSERT (vlib_get_thread_index () == 0);
1469 
1470 
1471  now = vlib_time_now (vm);
1472  t_entry = now - vm->barrier_epoch;
1473 
1474  if (--vlib_worker_threads[0].recursion_level > 0)
1475  {
1476  barrier_trace_release_rec (t_entry);
1477  return;
1478  }
1479 
1480  /* Update (all) node runtimes before releasing the barrier, if needed */
1482  {
1483  /*
1484  * Lock stat segment here, so we's safe when
1485  * rebuilding the stat segment node clones from the
1486  * stat thread...
1487  */
1489 
1490  /* Do stats elements on main thread */
1493 
1494  /* Do per thread rebuilds in parallel */
1495  refork_needed = 1;
1496  clib_smp_atomic_add (vlib_worker_threads->node_reforks_required,
1497  (vec_len (vlib_mains) - 1));
1498  now = vlib_time_now (vm);
1499  t_update_main = now - vm->barrier_epoch;
1500  }
1501 
1502  deadline = now + BARRIER_SYNC_TIMEOUT;
1503 
1504  *vlib_worker_threads->wait_at_barrier = 0;
1505 
1506  while (*vlib_worker_threads->workers_at_barrier > 0)
1507  {
1508  if ((now = vlib_time_now (vm)) > deadline)
1509  {
1510  fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
1511  os_panic ();
1512  }
1513  }
1514 
1515  /* Wait for reforks before continuing */
1516  if (refork_needed)
1517  {
1518  now = vlib_time_now (vm);
1519 
1520  deadline = now + BARRIER_SYNC_TIMEOUT;
1521 
1522  while (*vlib_worker_threads->node_reforks_required > 0)
1523  {
1524  if ((now = vlib_time_now (vm)) > deadline)
1525  {
1526  fformat (stderr, "%s: worker thread refork deadlock\n",
1527  __FUNCTION__);
1528  os_panic ();
1529  }
1530  }
1532  }
1533 
1534  t_closed_total = now - vm->barrier_epoch;
1535 
1536  minimum_open = t_closed_total * BARRIER_MINIMUM_OPEN_FACTOR;
1537 
1538  if (minimum_open > BARRIER_MINIMUM_OPEN_LIMIT)
1539  {
1540  minimum_open = BARRIER_MINIMUM_OPEN_LIMIT;
1541  }
1542 
1543  vm->barrier_no_close_before = now + minimum_open;
1544 
1545  /* Record barrier epoch (used to enforce minimum open time) */
1546  vm->barrier_epoch = now;
1547 
1548  barrier_trace_release (t_entry, t_closed_total, t_update_main);
1549 
1550 }
1551 
1552 /*
1553  * Check the frame queue to see if any frames are available.
1554  * If so, pull the packets off the frames and put them to
1555  * the handoff node.
1556  */
1557 int
1559 {
1560  u32 thread_id = vm->thread_index;
1561  vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
1563  u32 *from, *to;
1564  vlib_frame_t *f;
1565  int msg_type;
1566  int processed = 0;
1567  u32 n_left_to_node;
1568  u32 vectors = 0;
1569 
1570  ASSERT (fq);
1571  ASSERT (vm == vlib_mains[thread_id]);
1572 
1573  if (PREDICT_FALSE (fqm->node_index == ~0))
1574  return 0;
1575  /*
1576  * Gather trace data for frame queues
1577  */
1578  if (PREDICT_FALSE (fq->trace))
1579  {
1580  frame_queue_trace_t *fqt;
1582  u32 elix;
1583 
1584  fqt = &fqm->frame_queue_traces[thread_id];
1585 
1586  fqt->nelts = fq->nelts;
1587  fqt->head = fq->head;
1588  fqt->head_hint = fq->head_hint;
1589  fqt->tail = fq->tail;
1590  fqt->threshold = fq->vector_threshold;
1591  fqt->n_in_use = fqt->tail - fqt->head;
1592  if (fqt->n_in_use >= fqt->nelts)
1593  {
1594  // if beyond max then use max
1595  fqt->n_in_use = fqt->nelts - 1;
1596  }
1597 
1598  /* Record the number of elements in use in the histogram */
1599  fqh = &fqm->frame_queue_histogram[thread_id];
1600  fqh->count[fqt->n_in_use]++;
1601 
1602  /* Record a snapshot of the elements in use */
1603  for (elix = 0; elix < fqt->nelts; elix++)
1604  {
1605  elt = fq->elts + ((fq->head + 1 + elix) & (fq->nelts - 1));
1606  if (1 || elt->valid)
1607  {
1608  fqt->n_vectors[elix] = elt->n_vectors;
1609  }
1610  }
1611  fqt->written = 1;
1612  }
1613 
1614  while (1)
1615  {
1616  if (fq->head == fq->tail)
1617  {
1618  fq->head_hint = fq->head;
1619  return processed;
1620  }
1621 
1622  elt = fq->elts + ((fq->head + 1) & (fq->nelts - 1));
1623 
1624  if (!elt->valid)
1625  {
1626  fq->head_hint = fq->head;
1627  return processed;
1628  }
1629 
1630  from = elt->buffer_index;
1631  msg_type = elt->msg_type;
1632 
1634  ASSERT (elt->n_vectors <= VLIB_FRAME_SIZE);
1635 
1636  f = vlib_get_frame_to_node (vm, fqm->node_index);
1637 
1638  to = vlib_frame_vector_args (f);
1639 
1640  n_left_to_node = elt->n_vectors;
1641 
1642  while (n_left_to_node >= 4)
1643  {
1644  to[0] = from[0];
1645  to[1] = from[1];
1646  to[2] = from[2];
1647  to[3] = from[3];
1648  to += 4;
1649  from += 4;
1650  n_left_to_node -= 4;
1651  }
1652 
1653  while (n_left_to_node > 0)
1654  {
1655  to[0] = from[0];
1656  to++;
1657  from++;
1658  n_left_to_node--;
1659  }
1660 
1661  vectors += elt->n_vectors;
1662  f->n_vectors = elt->n_vectors;
1663  vlib_put_frame_to_node (vm, fqm->node_index, f);
1664 
1665  elt->valid = 0;
1666  elt->n_vectors = 0;
1667  elt->msg_type = 0xfefefefe;
1669  fq->head++;
1670  processed++;
1671 
1672  /*
1673  * Limit the number of packets pushed into the graph
1674  */
1675  if (vectors >= fq->vector_threshold)
1676  {
1677  fq->head_hint = fq->head;
1678  return processed;
1679  }
1680  }
1681  ASSERT (0);
1682  return processed;
1683 }
1684 
1685 void
1687 {
1690  vlib_main_t *vm = vlib_get_main ();
1691  clib_error_t *e;
1692 
1694 
1696  clib_time_init (&vm->clib_time);
1698 
1699  /* Wait until the dpdk init sequence is complete */
1700  while (tm->extern_thread_mgmt && tm->worker_thread_release == 0)
1702 
1704  (vm, vm->worker_init_function_registrations, 1 /* call_once */ );
1705  if (e)
1706  clib_error_report (e);
1707 
1708  vlib_worker_loop (vm);
1709 }
1710 
1711 /* *INDENT-OFF* */
1712 VLIB_REGISTER_THREAD (worker_thread_reg, static) = {
1713  .name = "workers",
1714  .short_name = "wk",
1715  .function = vlib_worker_thread_fn,
1716 };
1717 /* *INDENT-ON* */
1718 
1719 u32
1720 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
1721 {
1724  vlib_frame_queue_t *fq;
1725  int i;
1726 
1727  if (frame_queue_nelts == 0)
1728  frame_queue_nelts = FRAME_QUEUE_NELTS;
1729 
1730  ASSERT (frame_queue_nelts >= 8);
1731 
1732  vec_add2 (tm->frame_queue_mains, fqm, 1);
1733 
1734  fqm->node_index = node_index;
1735  fqm->frame_queue_nelts = frame_queue_nelts;
1736  fqm->queue_hi_thresh = frame_queue_nelts - 2;
1737 
1739  vec_validate (fqm->per_thread_data, tm->n_vlib_mains - 1);
1740  _vec_len (fqm->vlib_frame_queues) = 0;
1741  for (i = 0; i < tm->n_vlib_mains; i++)
1742  {
1744  fq = vlib_frame_queue_alloc (frame_queue_nelts);
1745  vec_add1 (fqm->vlib_frame_queues, fq);
1746 
1747  ptd = vec_elt_at_index (fqm->per_thread_data, i);
1749  tm->n_vlib_mains - 1);
1751  tm->n_vlib_mains - 1,
1752  (vlib_frame_queue_t *) (~0));
1753  }
1754 
1755  return (fqm - tm->frame_queue_mains);
1756 }
1757 
1758 int
1760 {
1762 
1763  if (tm->extern_thread_mgmt)
1764  return -1;
1765 
1767  tm->extern_thread_mgmt = 1;
1768  return 0;
1769 }
1770 
1771 void
1773  args)
1774 {
1775  ASSERT (vlib_get_thread_index () == 0);
1777  args->type_opaque, args->data);
1778 }
1779 
1781 
1782 void
1783 vlib_rpc_call_main_thread (void *callback, u8 * args, u32 arg_size)
1784 {
1786  {
1787  void (*fp) (void *, u8 *, u32) = rpc_call_main_thread_cb_fn;
1788  (*fp) (callback, args, arg_size);
1789  }
1790  else
1791  clib_warning ("BUG: rpc_call_main_thread_cb_fn NULL!");
1792 }
1793 
1794 clib_error_t *
1796 {
1797  return 0;
1798 }
1799 
1801 
1802 /*
1803  * fd.io coding-style-patch-verification: ON
1804  *
1805  * Local Variables:
1806  * eval: (c-set-style "gnu")
1807  * End:
1808  */
_vlib_init_function_list_elt_t * worker_init_function_registrations
Definition: main.h:183
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
Definition: vec.h:437
static void barrier_trace_release_rec(f64 t_entry)
Definition: threads.c:120
static void barrier_trace_sync(f64 t_entry, f64 t_open, f64 t_closed)
Definition: threads.c:69
u32 vl(void *p)
Definition: threads.c:31
uword * pending_rpc_requests
Definition: main.h:217
vlib_main_t vlib_global_main
Definition: main.c:1638
never_inline void vlib_node_runtime_sync_stats(vlib_main_t *vm, vlib_node_runtime_t *r, uword n_calls, uword n_vectors, uword n_clocks)
Definition: main.c:541
#define clib_min(x, y)
Definition: clib.h:291
vlib_process_t ** processes
Definition: node.h:738
#define VLIB_PENDING_FRAME_NO_NEXT_FRAME
Definition: node.h:463
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
Definition: buffer_funcs.h:547
u8 runtime_data[0]
Function dependent node-runtime data.
Definition: node.h:521
int vlib_frame_queue_enqueue(vlib_main_t *vm, u32 node_runtime_index, u32 frame_queue_index, vlib_frame_t *frame, vlib_frame_queue_msg_type_t type)
#define VLIB_MAIN_LOOP_ENTER_FUNCTION(x)
Definition: init.h:166
word elog_track_register(elog_main_t *em, elog_track_t *t)
register an event track
Definition: elog.c:198
unsigned long u64
Definition: types.h:89
void * mheap_alloc(void *memory, uword size)
Definition: mheap.c:963
#define NULL
Definition: clib.h:57
u32 index
Definition: node.h:288
static f64 vlib_time_now(vlib_main_t *vm)
Definition: main.h:227
#define vec_add2_aligned(V, P, N, A)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:574
clib_error_t * threads_init(vlib_main_t *vm)
Definition: threads.c:1795
void os_panic(void)
Definition: unix-misc.c:174
u32 vlib_frame_queue_main_init(u32 node_index, u32 frame_queue_nelts)
Definition: threads.c:1720
u32 thread_index
Definition: main.h:179
void * thread_function_arg
Definition: threads.h:98
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:523
static int sort_registrations_by_no_clone(void *a0, void *a1)
Definition: threads.c:197
static u64 clib_cpu_time_now(void)
Definition: time.h:73
frame_queue_trace_t * frame_queue_traces
Definition: threads.h:165
void vlib_process_signal_event_mt_helper(vlib_process_signal_event_mt_args_t *args)
Definition: threads.c:1772
elog_track_t elog_track
Definition: threads.h:100
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:562
int i
void vnet_main_fixup(vlib_fork_fixup_t which)
Definition: threads.c:1331
static uword * clib_bitmap_set(uword *ai, uword i, uword value)
Sets the ith bit of a bitmap to new_value Removes trailing zeros from the bitmap. ...
Definition: bitmap.h:167
#define hash_set_mem(h, key, value)
Definition: hash.h:275
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:419
clib_time_t clib_time
Definition: main.h:63
void vlib_worker_thread_fn(void *arg)
Definition: threads.c:1686
u32 unformat_sched_policy(unformat_input_t *input, va_list *args)
Definition: threads.c:1186
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:448
struct vlib_thread_registration_ * next
Definition: threads.h:31
u32 buffer_index[VLIB_FRAME_SIZE]
Definition: threads.h:82
void * runtime_data
Definition: node.h:294
volatile u32 valid
Definition: threads.h:76
vlib_main_t ** vlib_mains
Definition: buffer.c:303
static void vlib_get_thread_core_socket(vlib_worker_thread_t *w, unsigned cpu_id)
Definition: threads.c:607
unsigned char u8
Definition: types.h:56
#define clib_bitmap_dup(v)
Duplicate a bitmap.
Definition: bitmap.h:87
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
double f64
Definition: types.h:142
u8 state
Definition: node.h:316
u16 thread_index
thread this node runs on
Definition: node.h:519
#define vlib_worker_thread_barrier_sync(X)
Definition: threads.h:204
u64 * counters_last_clear
Definition: error.h:81
static void vlib_worker_thread_barrier_check(void)
Definition: threads.h:389
vlib_thread_registration_t * next
Definition: threads.h:282
#define vec_add1_aligned(V, E, A)
Add 1 element to end of vector (alignment specified).
Definition: vec.h:533
#define VLIB_NODE_RUNTIME_DATA_SIZE
Definition: node.h:532
vlib_node_stats_t stats_last_clear
Definition: node.h:282
memset(h->entries, 0, sizeof(h->entries[0])*entries)
#define clib_smp_atomic_add(addr, increment)
Definition: smp.h:46
#define pool_foreach(VAR, POOL, BODY)
Iterate through pool.
Definition: pool.h:443
void vlib_worker_thread_node_runtime_update(void)
Definition: threads.c:1176
u64 count[FRAME_QUEUE_MAX_NELTS]
Definition: node.h:782
#define VLIB_INIT_FUNCTION(x)
Definition: init.h:163
#define VLIB_INVALID_NODE_INDEX
Definition: node.h:379
void * heap_aligned_base
Definition: main.h:107
vlib_frame_queue_msg_type_t
Definition: threads.h:68
vlib_node_t ** nodes
Definition: node.h:697
vlib_frame_queue_elt_t ** handoff_queue_elt_by_thread_index
Definition: threads.h:151
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
vlib_frame_t * vlib_get_frame_to_node(vlib_main_t *vm, u32 to_node_index)
Definition: main.c:182
#define clib_error_return(e, args...)
Definition: error.h:99
#define VLIB_FRAME_ALIGN
Definition: node.h:383
uword * lock
SMP lock, non-zero means locking required.
Definition: elog.h:172
uword * cpu_core_bitmap
Definition: threads.h:319
#define BARRIER_MINIMUM_OPEN_FACTOR
Definition: threads.c:1370
clib_error_t * vlib_call_init_exit_functions(vlib_main_t *vm, _vlib_init_function_list_elt_t *head, int call_once)
Definition: init.c:43
vlib_frame_queue_elt_t * elts
Definition: threads.h:144
pthread_t thread[MAX_CONNS]
Definition: main.c:142
unsigned int u32
Definition: types.h:88
vlib_node_runtime_t * nodes_by_type[VLIB_N_NODE_TYPE]
Definition: node.h:707
#define VLIB_FRAME_SIZE
Definition: node.h:382
void vlib_set_thread_name(char *name)
Definition: threads.c:182
void vl_msg_api_handler_no_free(void *)
Definition: threads.c:425
#define hash_create_string(elts, value_bytes)
Definition: hash.h:690
void unformat_init_string(unformat_input_t *input, char *string, int string_len)
Definition: unformat.c:1023
vlib_fork_fixup_t
Definition: threads.h:226
#define BARRIER_SYNC_TIMEOUT
Definition: threads.h:199
void * rpc_call_main_thread_cb_fn
Definition: threads.c:1780
VLIB_REGISTER_THREAD(worker_thread_reg, static)
int extern_thread_mgmt
Definition: threads.h:338
vlib_worker_thread_t * vlib_worker_threads
Definition: threads.c:36
#define clib_bitmap_foreach(i, ai, body)
Macro to iterate across set bits in a bitmap.
Definition: bitmap.h:361
void * thread_mheap
Definition: threads.h:95
u32 next_frame_index
Definition: node.h:460
vlib_node_stats_t stats_total
Definition: node.h:278
volatile u64 head
Definition: threads.h:131
u16 state
Input node state.
Definition: node.h:509
static void vlib_process_signal_event(vlib_main_t *vm, uword node_index, uword type_opaque, uword data)
Definition: node_funcs.h:960
static uword clib_bitmap_first_set(uword *ai)
Return the lowest numbered set bit in a bitmap.
Definition: bitmap.h:385
u8 * vlib_thread_stack_init(uword thread_index)
Definition: main.c:614
static void vlib_next_frame_init(vlib_next_frame_t *nf)
Definition: node.h:443
vlib_error_main_t error_main
Definition: main.h:143
static u32 vlib_frame_index(vlib_main_t *vm, vlib_frame_t *f)
Definition: node_funcs.h:245
vlib_thread_callbacks_t cb
Definition: threads.h:337
#define VLIB_FRAME_NO_FREE_AFTER_DISPATCH
Definition: node.h:419
int vlib_thread_cb_register(struct vlib_main_t *vm, vlib_thread_callbacks_t *cb)
Definition: threads.c:1759
#define v
Definition: acl.c:496
struct _unformat_input_t unformat_input_t
const char * barrier_context
Definition: threads.h:107
char * name
Track name vector.
Definition: elog.h:115
#define clib_error_return_unix(e, args...)
Definition: error.h:102
void vlib_put_frame_to_node(vlib_main_t *vm, u32 to_node_index, vlib_frame_t *f)
Definition: main.c:191
static void * clib_mem_get_per_cpu_heap(void)
Definition: mem.h:64
void vlib_frame_free(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_frame_t *f)
Definition: main.c:211
#define ELOG_DATA(em, f)
Definition: elog.h:481
#define PREDICT_FALSE(x)
Definition: clib.h:107
static clib_error_t * vlib_launch_thread_int(void *fp, vlib_worker_thread_t *w, unsigned cpu_id)
Definition: threads.c:627
void vlib_worker_thread_node_refork(void)
Definition: threads.c:1021
clib_error_t *(* vlib_thread_set_lcore_cb)(u32 thread, u16 cpu)
Definition: threads.h:276
vlib_buffer_free_list_t * buffer_free_list_pool
Definition: main.h:113
u32 node_index
Node index.
Definition: node.h:494
u32 elog_global_id_for_msg_name(const char *msg_name)
Definition: threads.c:47
uword * init_functions_called
Definition: main.h:176
void clib_time_init(clib_time_t *c)
Definition: time.c:178
uword * frame_size_hash
Definition: node.h:753
u8 name[64]
Definition: memclnt.api:151
vlib_thread_main_t vlib_thread_main
Definition: threads.c:37
clib_error_t * clib_sysfs_read(char *file_name, char *fmt,...)
Definition: sysfs.c:50
word fformat(FILE *f, char *fmt,...)
Definition: format.c:453
void(* thread_function)(void *)
Definition: threads.h:97
static clib_error_t * cpu_config(vlib_main_t *vm, unformat_input_t *input)
Definition: threads.c:1200
#define pool_get_aligned(P, E, A)
Allocate an object E from a pool P (general version).
Definition: pool.h:188
i32 n_vectors[FRAME_QUEUE_MAX_NELTS]
Definition: node.h:777
u64 * counters
Definition: error.h:78
u32 owner_node_index
Definition: node.h:359
vlib_frame_queue_t * vlib_frame_queue_alloc(int nelts)
Definition: threads.c:391
volatile u64 tail
Definition: threads.h:123
#define clib_mem_alloc_no_fail(size)
Definition: mem.h:176
#define VLIB_EARLY_CONFIG_FUNCTION(x, n,...)
Definition: init.h:216
#define UNFORMAT_END_OF_INPUT
Definition: format.h:144
svmdb_client_t * c
u16 n_vectors
Definition: node.h:401
vlib_frame_queue_t ** vlib_frame_queues
Definition: threads.h:161
DLMALLOC_EXPORT mspace create_mspace(size_t capacity, int locked)
static_always_inline uword vlib_get_thread_index(void)
Definition: threads.h:211
vlib_main_t * vm
Definition: buffer.c:294
u32 node_runtime_index
Definition: node.h:454
vlib_pending_frame_t * pending_frames
Definition: node.h:723
vlib_thread_function_t * function
Definition: threads.h:36
int vlib_frame_queue_dequeue(vlib_main_t *vm, vlib_frame_queue_main_t *fqm)
Definition: threads.c:1558
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:339
void * heap_base
Definition: main.h:104
static void * clib_mem_set_heap(void *heap)
Definition: mem.h:261
#define clib_warning(format, args...)
Definition: error.h:59
static vlib_node_runtime_t * vlib_node_get_runtime(vlib_main_t *vm, u32 node_index)
Get node runtime by node index.
Definition: node_funcs.h:89
#define clib_memcpy(a, b, c)
Definition: string.h:75
elog_main_t elog_main
Definition: main.h:157
frame_queue_nelt_counter_t * frame_queue_histogram
Definition: threads.h:166
#define VLIB_FRAME_PENDING
Definition: node.h:430
static uword clib_bitmap_get(uword *ai, uword i)
Gets the ith bit value from a bitmap.
Definition: bitmap.h:197
#define ELOG_TYPE_DECLARE(f)
Definition: elog.h:439
void vlib_worker_thread_init(vlib_worker_thread_t *w)
Definition: threads.c:552
uword os_get_nthreads(void)
Definition: threads.c:170
static void * clib_mem_get_heap(void)
Definition: mem.h:255
volatile u32 * wait_at_barrier
Definition: threads.h:90
#define FRAME_QUEUE_NELTS
Definition: threads.c:28
vlib_frame_queue_per_thread_data_t * per_thread_data
Definition: threads.h:162
void vlib_stat_segment_unlock(void)
Definition: threads.c:1450
vlib_frame_queue_t ** congested_handoff_queue_by_thread_index
Definition: threads.h:152
#define never_inline
Definition: clib.h:91
signed int i32
Definition: types.h:77
#define hash_create(elts, value_bytes)
Definition: hash.h:696
#define ASSERT(truth)
static void barrier_trace_sync_rec(f64 t_entry)
Definition: threads.c:96
vlib_frame_queue_main_t * frame_queue_mains
Definition: threads.h:325
u16 flags
Definition: node.h:392
static void clib_mem_free(void *p)
Definition: mem.h:205
#define clib_error_report(e)
Definition: error.h:113
#define clib_bitmap_free(v)
Free a bitmap.
Definition: bitmap.h:92
void vlib_worker_thread_barrier_sync_int(vlib_main_t *vm)
Definition: threads.c:1374
size_t count
Definition: vapi.c:46
int need_vlib_worker_thread_node_runtime_update
Definition: main.h:205
uword * thread_registrations_by_name
Definition: threads.h:287
#define BARRIER_MINIMUM_OPEN_LIMIT
Definition: threads.c:1366
clib_error_t *(* vlib_launch_thread_cb)(void *fp, vlib_worker_thread_t *w, unsigned cpu_id)
Definition: threads.h:274
volatile u32 * node_reforks_required
Definition: threads.h:108
static vlib_main_t * vlib_get_main(void)
Definition: global_funcs.h:23
void vlib_node_sync_stats(vlib_main_t *vm, vlib_node_t *n)
Definition: main.c:571
static uword clib_bitmap_count_set_bits(uword *ai)
Return the number of set bits in a bitmap.
Definition: bitmap.h:462
static void barrier_trace_release(f64 t_entry, f64 t_closed_total, f64 t_update_main)
Definition: threads.c:142
void vlib_worker_loop(vlib_main_t *vm)
Definition: main.c:1633
#define vec_dup_aligned(V, A)
Return copy of vector (no header, alignment specified).
Definition: vec.h:382
u32 elog_string(elog_main_t *em, char *fmt,...)
add a string to the event-log string table
Definition: elog.c:538
f64 barrier_no_close_before
Definition: main.h:214
static clib_error_t * start_workers(vlib_main_t *vm)
Definition: threads.c:654
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
void vlib_rpc_call_main_thread(void *callback, u8 *args, u32 arg_size)
Definition: threads.c:1783
DECLARE_CJ_GLOBAL_LOG
Definition: threads.c:26
vlib_node_main_t node_main
Definition: main.h:134
u64 uword
Definition: types.h:112
vlib_next_frame_t * next_frames
Definition: node.h:720
#define vec_sort_with_function(vec, f)
Sort a vector using the supplied element comparison function.
Definition: vec.h:982
static void unformat_free(unformat_input_t *i)
Definition: format.h:162
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
Definition: node_funcs.h:267
volatile u64 head_hint
Definition: threads.h:140
#define VLIB_THREAD_STACK_SIZE
Definition: threads.h:66
f64 barrier_epoch
Definition: main.h:211
vlib_frame_size_t * frame_sizes
Definition: node.h:756
#define hash_get_mem(h, key)
Definition: hash.h:269
static void worker_thread_node_runtime_update_internal(void)
Definition: threads.c:969
static void * clib_mem_alloc_aligned(uword size, uword align)
Definition: mem.h:140
volatile u32 * workers_at_barrier
Definition: threads.h:91
uword clib_calljmp(uword(*func)(uword func_arg), uword func_arg, void *stack)
static uword * clib_sysfs_list_to_bitmap(char *filename)
Definition: threads.c:207
void vlib_worker_thread_barrier_release(vlib_main_t *vm)
Definition: threads.c:1455
static vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:32
static f64 vlib_time_now_ticks(vlib_main_t *vm, u64 n)
Definition: main.h:233
static vlib_node_t * vlib_get_node(vlib_main_t *vm, u32 i)
Get vlib node by index.
Definition: node_funcs.h:59
void vlib_stat_segment_lock(void)
Definition: threads.c:1444
#define vec_foreach(var, vec)
Vector iterator.
void * vlib_worker_thread_bootstrap_fn(void *arg)
Definition: threads.c:589
#define CLIB_MEMORY_BARRIER()
Definition: clib.h:111
u32 node_runtime_index
Definition: node.h:413
uword * cpu_socket_bitmap
Definition: threads.h:322
#define foreach_sched_policy
Definition: threads.h:257
vlib_thread_registration_t ** registrations
Definition: threads.h:285
#define vec_validate_init_empty(V, I, INIT)
Make sure vector is long enough for given index and initialize empty space (no header, unspecified alignment)
Definition: vec.h:486
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:59
u8 ** vlib_thread_stacks
Definition: main.c:597
pthread_t thread_id
Definition: threads.h:114
vlib_thread_registration_t * registration
Definition: threads.h:102
volatile u32 worker_thread_release
Definition: threads.h:328
void vlib_worker_thread_fork_fixup(vlib_fork_fixup_t which)
Definition: threads.c:1336
clib_random_buffer_t random_buffer
Definition: main.h:173
u8 runtime_data_bytes
Definition: node.h:319
uword unformat(unformat_input_t *i, const char *fmt,...)
Definition: unformat.c:972
static uword unformat_check_input(unformat_input_t *i)
Definition: format.h:170
#define VLIB_FRAME_FREE_AFTER_DISPATCH
Definition: node.h:433
clib_error_t * vlib_thread_init(vlib_main_t *vm)
Definition: threads.c:237