FD.io VPP  v17.04-9-g99c0734
Vector Packet Processing
load_balance.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <vnet/ip/lookup.h>
17 #include <vnet/dpo/load_balance.h>
19 #include <vnet/dpo/drop_dpo.h>
20 #include <vppinfra/math.h> /* for fabs */
21 #include <vnet/adj/adj.h>
22 #include <vnet/adj/adj_internal.h>
23 #include <vnet/fib/fib_urpf_list.h>
24 
25 /*
26  * distribution error tolerance for load-balancing
27  */
29 
30 #undef LB_DEBUG
31 
32 #ifdef LB_DEBUG
33 #define LB_DBG(_lb, _fmt, _args...) \
34 { \
35  u8* _tmp =NULL; \
36  clib_warning("lb:[%s]:" _fmt, \
37  load_balance_format(load_balance_get_index((_lb)), \
38  0, _tmp), \
39  ##_args); \
40  vec_free(_tmp); \
41 }
42 #else
43 #define LB_DBG(_p, _fmt, _args...)
44 #endif
45 
46 
47 /**
48  * Pool of all DPOs. It's not static so the DP can have fast access
49  */
51 
52 /**
53  * The one instance of load-balance main
54  */
56 
57 f64
59 {
61 }
62 
63 static inline index_t
65 {
66  return (lb - load_balance_pool);
67 }
68 
69 static inline dpo_id_t*
71 {
72  if (LB_HAS_INLINE_BUCKETS(lb))
73  {
74  return (lb->lb_buckets_inline);
75  }
76  else
77  {
78  return (lb->lb_buckets);
79  }
80 }
81 
82 static load_balance_t *
84 {
85  load_balance_t *lb;
86 
87  pool_get_aligned(load_balance_pool, lb, CLIB_CACHE_LINE_BYTES);
88  memset(lb, 0, sizeof(*lb));
89 
90  lb->lb_map = INDEX_INVALID;
91  lb->lb_urpf = INDEX_INVALID;
96  vlib_zero_combined_counter(&(load_balance_main.lbm_to_counters),
98  vlib_zero_combined_counter(&(load_balance_main.lbm_via_counters),
100 
101  return (lb);
102 }
103 
104 static u8*
107  u32 indent,
108  u8 *s)
109 {
110  vlib_counter_t to, via;
111  load_balance_t *lb;
112  dpo_id_t *buckets;
113  u32 i;
114 
115  lb = load_balance_get(lbi);
116  vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to);
117  vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via);
118  buckets = load_balance_get_buckets(lb);
119 
120  s = format(s, "%U: ", format_dpo_type, DPO_LOAD_BALANCE);
121  s = format(s, "[index:%d buckets:%d ", lbi, lb->lb_n_buckets);
122  s = format(s, "uRPF:%d ", lb->lb_urpf);
123  s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes);
124  if (0 != via.packets)
125  {
126  s = format(s, " via:[%Ld:%Ld]",
127  via.packets, via.bytes);
128  }
129  s = format(s, "]");
130 
131  if (INDEX_INVALID != lb->lb_map)
132  {
133  s = format(s, "\n%U%U",
134  format_white_space, indent+4,
135  format_load_balance_map, lb->lb_map, indent+4);
136  }
137  for (i = 0; i < lb->lb_n_buckets; i++)
138  {
139  s = format(s, "\n%U[%d] %U",
140  format_white_space, indent+2,
141  i,
143  &buckets[i], indent+6);
144  }
145  return (s);
146 }
147 
148 u8*
149 format_load_balance (u8 * s, va_list * args)
150 {
151  index_t lbi = va_arg(*args, index_t);
153 
154  return (load_balance_format(lbi, flags, 0, s));
155 }
156 static u8*
157 format_load_balance_dpo (u8 * s, va_list * args)
158 {
159  index_t lbi = va_arg(*args, index_t);
160  u32 indent = va_arg(*args, u32);
161 
162  return (load_balance_format(lbi, LOAD_BALANCE_FORMAT_DETAIL, indent, s));
163 }
164 
165 
166 static load_balance_t *
168  dpo_proto_t lb_proto,
169  flow_hash_config_t fhc)
170 {
171  load_balance_t *lb;
172 
173  lb = load_balance_alloc_i();
174  lb->lb_hash_config = fhc;
175  lb->lb_n_buckets = num_buckets;
176  lb->lb_n_buckets_minus_1 = num_buckets-1;
177  lb->lb_proto = lb_proto;
178 
179  if (!LB_HAS_INLINE_BUCKETS(lb))
180  {
182  lb->lb_n_buckets - 1,
184  }
185 
186  LB_DBG(lb, "create");
187 
188  return (lb);
189 }
190 
191 index_t
193  dpo_proto_t lb_proto,
194  flow_hash_config_t fhc)
195 {
196  return (load_balance_get_index(load_balance_create_i(n_buckets, lb_proto, fhc)));
197 }
198 
199 static inline void
201  u32 bucket,
202  dpo_id_t *buckets,
203  const dpo_id_t *next)
204 {
205  dpo_stack(DPO_LOAD_BALANCE, lb->lb_proto, &buckets[bucket], next);
206 }
207 
208 void
210  u32 bucket,
211  const dpo_id_t *next)
212 {
213  load_balance_t *lb;
214  dpo_id_t *buckets;
215 
216  lb = load_balance_get(lbi);
217  buckets = load_balance_get_buckets(lb);
218 
219  ASSERT(bucket < lb->lb_n_buckets);
220 
221  load_balance_set_bucket_i(lb, bucket, buckets, next);
222 }
223 
224 int
226 {
227  load_balance_t *lb;
228 
229  if (DPO_LOAD_BALANCE != dpo->dpoi_type)
230  return (0);
231 
232  lb = load_balance_get(dpo->dpoi_index);
233 
234  if (1 == lb->lb_n_buckets)
235  {
236  return (dpo_is_drop(load_balance_get_bucket_i(lb, 0)));
237  }
238  return (0);
239 }
240 
241 void
244 {
245  load_balance_t *lb;
246 
247  lb = load_balance_get(lbi);
249 }
250 
251 
252 void
254  index_t urpf)
255 {
256  load_balance_t *lb;
257  index_t old;
258 
259  lb = load_balance_get(lbi);
260 
261  /*
262  * packets in flight we see this change. but it's atomic, so :P
263  */
264  old = lb->lb_urpf;
265  lb->lb_urpf = urpf;
266 
268  fib_urpf_list_lock(urpf);
269 }
270 
271 index_t
273 {
274  load_balance_t *lb;
275 
276  lb = load_balance_get(lbi);
277 
278  return (lb->lb_urpf);
279 }
280 
281 const dpo_id_t *
283  u32 bucket)
284 {
285  load_balance_t *lb;
286 
287  lb = load_balance_get(lbi);
288 
289  return (load_balance_get_bucket_i(lb, bucket));
290 }
291 
292 static int
294  const load_balance_path_t * n2)
295 {
296  return ((int) n1->path_weight - (int) n2->path_weight);
297 }
298 
299 /* Given next hop vector is over-written with normalized one with sorted weights and
300  with weights corresponding to the number of adjacencies for each next hop.
301  Returns number of adjacencies in block. */
302 u32
304  load_balance_path_t ** normalized_next_hops,
305  u32 *sum_weight_in,
307 {
308  load_balance_path_t * nhs;
309  uword n_nhs, n_adj, n_adj_left, i, sum_weight;
310  f64 norm, error;
311 
312  n_nhs = vec_len (raw_next_hops);
313  ASSERT (n_nhs > 0);
314  if (n_nhs == 0)
315  return 0;
316 
317  /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */
318  nhs = *normalized_next_hops;
319  vec_validate (nhs, 2*n_nhs - 1);
320 
321  /* Fast path: 1 next hop in block. */
322  n_adj = n_nhs;
323  if (n_nhs == 1)
324  {
325  nhs[0] = raw_next_hops[0];
326  nhs[0].path_weight = 1;
327  _vec_len (nhs) = 1;
328  sum_weight = 1;
329  goto done;
330  }
331 
332  else if (n_nhs == 2)
333  {
334  int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0;
335 
336  /* Fast sort. */
337  nhs[0] = raw_next_hops[cmp];
338  nhs[1] = raw_next_hops[cmp ^ 1];
339 
340  /* Fast path: equal cost multipath with 2 next hops. */
341  if (nhs[0].path_weight == nhs[1].path_weight)
342  {
343  nhs[0].path_weight = nhs[1].path_weight = 1;
344  _vec_len (nhs) = 2;
345  sum_weight = 2;
346  goto done;
347  }
348  }
349  else
350  {
351  clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
352  qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
353  }
354 
355  /* Find total weight to normalize weights. */
356  sum_weight = 0;
357  for (i = 0; i < n_nhs; i++)
358  sum_weight += nhs[i].path_weight;
359 
360  /* In the unlikely case that all weights are given as 0, set them all to 1. */
361  if (sum_weight == 0)
362  {
363  for (i = 0; i < n_nhs; i++)
364  nhs[i].path_weight = 1;
365  sum_weight = n_nhs;
366  }
367 
368  /* Save copies of all next hop weights to avoid being overwritten in loop below. */
369  for (i = 0; i < n_nhs; i++)
370  nhs[n_nhs + i].path_weight = nhs[i].path_weight;
371 
372  /* Try larger and larger power of 2 sized adjacency blocks until we
373  find one where traffic flows to within 1% of specified weights. */
374  for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
375  {
376  error = 0;
377 
378  norm = n_adj / ((f64) sum_weight);
379  n_adj_left = n_adj;
380  for (i = 0; i < n_nhs; i++)
381  {
382  f64 nf = nhs[n_nhs + i].path_weight * norm; /* use saved weights */
383  word n = flt_round_nearest (nf);
384 
385  n = n > n_adj_left ? n_adj_left : n;
386  n_adj_left -= n;
387  error += fabs (nf - n);
388  nhs[i].path_weight = n;
389 
390  if (0 == nhs[i].path_weight)
391  {
392  /*
393  * when the weight skew is high (norm is small) and n == nf.
394  * without this correction the path with a low weight would have
395  * no represenation in the load-balanace - don't want that.
396  * If the weight skew is high so the load-balance has many buckets
397  * to allow it. pays ya money takes ya choice.
398  */
399  error = n_adj;
400  break;
401  }
402  }
403 
404  nhs[0].path_weight += n_adj_left;
405 
406  /* Less than 5% average error per adjacency with this size adjacency block? */
407  if (error <= multipath_next_hop_error_tolerance*n_adj)
408  {
409  /* Truncate any next hops with zero weight. */
410  _vec_len (nhs) = i;
411  break;
412  }
413  }
414 
415 done:
416  /* Save vector for next call. */
417  *normalized_next_hops = nhs;
418  *sum_weight_in = sum_weight;
419  return n_adj;
420 }
421 
422 static load_balance_path_t *
424  dpo_proto_t drop_proto)
425 {
426  if (0 == vec_len(nhs))
427  {
428  load_balance_path_t *new_nhs = NULL, *nh;
429 
430  /*
431  * we need something for the load-balance. so use the drop
432  */
433  vec_add2(new_nhs, nh, 1);
434 
435  nh->path_weight = 1;
436  dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
437 
438  return (new_nhs);
439  }
440 
441  return (NULL);
442 }
443 
444 /*
445  * Fill in adjacencies in block based on corresponding
446  * next hop adjacencies.
447  */
448 static void
450  load_balance_path_t *nhs,
451  dpo_id_t *buckets,
452  u32 n_buckets)
453 {
454  load_balance_path_t * nh;
455  u16 ii, bucket;
456 
457  bucket = 0;
458 
459  /*
460  * the next-hops have normalised weights. that means their sum is the number
461  * of buckets we need to fill.
462  */
463  vec_foreach (nh, nhs)
464  {
465  for (ii = 0; ii < nh->path_weight; ii++)
466  {
467  ASSERT(bucket < n_buckets);
468  load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo);
469  }
470  }
471 }
472 
473 static inline void
475  u32 n_buckets)
476 {
477  lb->lb_n_buckets = n_buckets;
478  lb->lb_n_buckets_minus_1 = n_buckets-1;
479 }
480 
481 void
483  const load_balance_path_t * raw_nhs,
485 {
486  load_balance_path_t *nh, *nhs, *fixed_nhs;
487  u32 sum_of_weights, n_buckets, ii;
488  index_t lbmi, old_lbmi;
489  load_balance_t *lb;
490  dpo_id_t *tmp_dpo;
491 
492  nhs = NULL;
493 
495  lb = load_balance_get(dpo->dpoi_index);
496  fixed_nhs = load_balance_multipath_next_hop_fixup(raw_nhs, lb->lb_proto);
497  n_buckets =
498  ip_multipath_normalize_next_hops((NULL == fixed_nhs ?
499  raw_nhs :
500  fixed_nhs),
501  &nhs,
502  &sum_of_weights,
504 
505  ASSERT (n_buckets >= vec_len (raw_nhs));
506 
507  /*
508  * Save the old load-balance map used, and get a new one if required.
509  */
510  old_lbmi = lb->lb_map;
511  if (flags & LOAD_BALANCE_FLAG_USES_MAP)
512  {
513  lbmi = load_balance_map_add_or_lock(n_buckets, sum_of_weights, nhs);
514  }
515  else
516  {
517  lbmi = INDEX_INVALID;
518  }
519 
520  if (0 == lb->lb_n_buckets)
521  {
522  /*
523  * first time initialisation. no packets inflight, so we can write
524  * at leisure.
525  */
526  load_balance_set_n_buckets(lb, n_buckets);
527 
528  if (!LB_HAS_INLINE_BUCKETS(lb))
530  lb->lb_n_buckets - 1,
532 
535  n_buckets);
536  lb->lb_map = lbmi;
537  }
538  else
539  {
540  /*
541  * This is a modification of an existing load-balance.
542  * We need to ensure that packets inflight see a consistent state, that
543  * is the number of reported buckets the LB has (read from
544  * lb_n_buckets_minus_1) is not more than it actually has. So if the
545  * number of buckets is increasing, we must update the bucket array first,
546  * then the reported number. vice-versa if the number of buckets goes down.
547  */
548  if (n_buckets == lb->lb_n_buckets)
549  {
550  /*
551  * no change in the number of buckets. we can simply fill what
552  * is new over what is old.
553  */
556  n_buckets);
557  lb->lb_map = lbmi;
558  }
559  else if (n_buckets > lb->lb_n_buckets)
560  {
561  /*
562  * we have more buckets. the old load-balance map (if there is one)
563  * will remain valid, i.e. mapping to indices within range, so we
564  * update it last.
565  */
566  if (n_buckets > LB_NUM_INLINE_BUCKETS &&
568  {
569  /*
570  * the new increased number of buckets is crossing the threshold
571  * from the inline storage to out-line. Alloc the outline buckets
572  * first, then fixup the number. then reset the inlines.
573  */
574  ASSERT(NULL == lb->lb_buckets);
576  n_buckets - 1,
578 
580  lb->lb_buckets,
581  n_buckets);
583  load_balance_set_n_buckets(lb, n_buckets);
584 
586 
587  for (ii = 0; ii < LB_NUM_INLINE_BUCKETS; ii++)
588  {
589  dpo_reset(&lb->lb_buckets_inline[ii]);
590  }
591  }
592  else
593  {
594  if (n_buckets <= LB_NUM_INLINE_BUCKETS)
595  {
596  /*
597  * we are not crossing the threshold and it's still inline buckets.
598  * we can write the new on the old..
599  */
602  n_buckets);
604  load_balance_set_n_buckets(lb, n_buckets);
605  }
606  else
607  {
608  /*
609  * we are not crossing the threshold. We need a new bucket array to
610  * hold the increased number of choices.
611  */
612  dpo_id_t *new_buckets, *old_buckets, *tmp_dpo;
613 
614  new_buckets = NULL;
615  old_buckets = load_balance_get_buckets(lb);
616 
617  vec_validate_aligned(new_buckets,
618  n_buckets - 1,
620 
621  load_balance_fill_buckets(lb, nhs, new_buckets, n_buckets);
623  lb->lb_buckets = new_buckets;
625  load_balance_set_n_buckets(lb, n_buckets);
626 
627  vec_foreach(tmp_dpo, old_buckets)
628  {
629  dpo_reset(tmp_dpo);
630  }
631  vec_free(old_buckets);
632  }
633  }
634 
635  /*
636  * buckets fixed. ready for the MAP update.
637  */
638  lb->lb_map = lbmi;
639  }
640  else
641  {
642  /*
643  * bucket size shrinkage.
644  * Any map we have will be based on the old
645  * larger number of buckets, so will be translating to indices
646  * out of range. So the new MAP must be installed first.
647  */
648  lb->lb_map = lbmi;
650 
651 
652  if (n_buckets <= LB_NUM_INLINE_BUCKETS &&
654  {
655  /*
656  * the new decreased number of buckets is crossing the threshold
657  * from out-line storage to inline:
658  * 1 - Fill the inline buckets,
659  * 2 - fixup the number (and this point the inline buckets are
660  * used).
661  * 3 - free the outline buckets
662  */
664  lb->lb_buckets_inline,
665  n_buckets);
667  load_balance_set_n_buckets(lb, n_buckets);
669 
670  vec_foreach(tmp_dpo, lb->lb_buckets)
671  {
672  dpo_reset(tmp_dpo);
673  }
674  vec_free(lb->lb_buckets);
675  }
676  else
677  {
678  /*
679  * not crossing the threshold.
680  * 1 - update the number to the smaller size
681  * 2 - write the new buckets
682  * 3 - reset those no longer used.
683  */
684  dpo_id_t *buckets;
685  u32 old_n_buckets;
686 
687  old_n_buckets = lb->lb_n_buckets;
688  buckets = load_balance_get_buckets(lb);
689 
690  load_balance_set_n_buckets(lb, n_buckets);
692 
694  buckets,
695  n_buckets);
696 
697  for (ii = n_buckets; ii < old_n_buckets; ii++)
698  {
699  dpo_reset(&buckets[ii]);
700  }
701  }
702  }
703  }
704 
705  vec_foreach (nh, nhs)
706  {
707  dpo_reset(&nh->path_dpo);
708  }
709  vec_free(nhs);
710  vec_free(fixed_nhs);
711 
712  load_balance_map_unlock(old_lbmi);
713 }
714 
715 static void
717 {
718  load_balance_t *lb;
719 
720  lb = load_balance_get(dpo->dpoi_index);
721 
722  lb->lb_locks++;
723 }
724 
725 static void
727 {
728  dpo_id_t *buckets;
729  int i;
730 
731  buckets = load_balance_get_buckets(lb);
732 
733  for (i = 0; i < lb->lb_n_buckets; i++)
734  {
735  dpo_reset(&buckets[i]);
736  }
737 
738  LB_DBG(lb, "destroy");
739  if (!LB_HAS_INLINE_BUCKETS(lb))
740  {
741  vec_free(lb->lb_buckets);
742  }
743 
746 
747  pool_put(load_balance_pool, lb);
748 }
749 
750 static void
752 {
753  load_balance_t *lb;
754 
755  lb = load_balance_get(dpo->dpoi_index);
756 
757  lb->lb_locks--;
758 
759  if (0 == lb->lb_locks)
760  {
762  }
763 }
764 
765 static void
767 {
768  fib_show_memory_usage("load-balance",
769  pool_elts(load_balance_pool),
770  pool_len(load_balance_pool),
771  sizeof(load_balance_t));
773 }
774 
775 const static dpo_vft_t lb_vft = {
777  .dv_unlock = load_balance_unlock,
778  .dv_format = format_load_balance_dpo,
779  .dv_mem_show = load_balance_mem_show,
780 };
781 
782 /**
783  * @brief The per-protocol VLIB graph nodes that are assigned to a load-balance
784  * object.
785  *
786  * this means that these graph nodes are ones from which a load-balance is the
787  * parent object in the DPO-graph.
788  *
789  * We do not list all the load-balance nodes, such as the *-lookup. instead
790  * we are relying on the correct use of the .sibling_of field when setting
791  * up these sibling nodes.
792  */
793 const static char* const load_balance_ip4_nodes[] =
794 {
795  "ip4-load-balance",
796  NULL,
797 };
798 const static char* const load_balance_ip6_nodes[] =
799 {
800  "ip6-load-balance",
801  NULL,
802 };
803 const static char* const load_balance_mpls_nodes[] =
804 {
805  "mpls-load-balance",
806  NULL,
807 };
808 const static char* const load_balance_l2_nodes[] =
809 {
810  "l2-load-balance",
811  NULL,
812 };
813 const static char* const load_balance_nsh_nodes[] =
814 {
815  "nsh-load-balance",
816  NULL,
817 };
818 const static char* const * const load_balance_nodes[DPO_PROTO_NUM] =
819 {
825 };
826 
827 void
829 {
831 
833 }
834 
835 static clib_error_t *
837  unformat_input_t * input,
838  vlib_cli_command_t * cmd)
839 {
840  index_t lbi = INDEX_INVALID;
841 
843  {
844  if (unformat (input, "%d", &lbi))
845  ;
846  else
847  break;
848  }
849 
850  if (INDEX_INVALID != lbi)
851  {
852  vlib_cli_output (vm, "%U", format_load_balance, lbi,
854  }
855  else
856  {
857  load_balance_t *lb;
858 
859  pool_foreach(lb, load_balance_pool,
860  ({
864  }));
865  }
866 
867  return 0;
868 }
869 
870 VLIB_CLI_COMMAND (load_balance_show_command, static) = {
871  .path = "show load-balance",
872  .short_help = "show load-balance [<index>]",
873  .function = load_balance_show,
874 };
875 
876 
878 ip_flow_hash (void *data)
879 {
880  ip4_header_t *iph = (ip4_header_t *) data;
881 
882  if ((iph->ip_version_and_header_length & 0xF0) == 0x40)
884  else
886 }
887 
890 {
891  return (*((u64 *) m) & 0xffffffffffff);
892 }
893 
896 {
897  ethernet_header_t *eh;
898  u64 a, b, c;
899  uword is_ip, eh_size;
900  u16 eh_type;
901 
902  eh = vlib_buffer_get_current (b0);
903  eh_type = clib_net_to_host_u16 (eh->type);
904  eh_size = ethernet_buffer_header_size (b0);
905 
906  is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6);
907 
908  /* since we have 2 cache lines, use them */
909  if (is_ip)
910  a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size);
911  else
912  a = eh->type;
913 
914  b = mac_to_u64 ((u8 *) eh->dst_address);
915  c = mac_to_u64 ((u8 *) eh->src_address);
916  hash_mix64 (a, b, c);
917 
918  return (u32) c;
919 }
920 
921 typedef struct load_balance_trace_t_
922 {
925 
926 static uword
928  vlib_node_runtime_t * node,
929  vlib_frame_t * frame)
930 {
931  u32 n_left_from, next_index, *from, *to_next;
932 
933  from = vlib_frame_vector_args (frame);
934  n_left_from = frame->n_vectors;
935 
936  next_index = node->cached_next_index;
937 
938  while (n_left_from > 0)
939  {
940  u32 n_left_to_next;
941 
942  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
943 
944  while (n_left_from > 0 && n_left_to_next > 0)
945  {
946  vlib_buffer_t *b0;
947  u32 bi0, lbi0, next0;
948  const dpo_id_t *dpo0;
949  const load_balance_t *lb0;
950 
951  bi0 = from[0];
952  to_next[0] = bi0;
953  from += 1;
954  to_next += 1;
955  n_left_from -= 1;
956  n_left_to_next -= 1;
957 
958  b0 = vlib_get_buffer (vm, bi0);
959 
960  /* lookup dst + src mac */
961  lbi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
962  lb0 = load_balance_get(lbi0);
963 
964  vnet_buffer(b0)->ip.flow_hash = l2_flow_hash(b0);
965 
966  dpo0 = load_balance_get_bucket_i(lb0,
967  vnet_buffer(b0)->ip.flow_hash &
968  (lb0->lb_n_buckets_minus_1));
969 
970  next0 = dpo0->dpoi_next_node;
971  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
972 
974  {
975  load_balance_trace_t *tr = vlib_add_trace (vm, node, b0,
976  sizeof (*tr));
977  tr->lb_index = lbi0;
978  }
979  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
980  n_left_to_next, bi0, next0);
981  }
982 
983  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
984  }
985 
986  return frame->n_vectors;
987 }
988 
989 static u8 *
990 format_l2_load_balance_trace (u8 * s, va_list * args)
991 {
992  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
993  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
994  load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
995 
996  s = format (s, "L2-load-balance: index %d", t->lb_index);
997  return s;
998 }
999 
1000 /**
1001  * @brief
1002  */
1004  .function = l2_load_balance,
1005  .name = "l2-load-balance",
1006  .vector_size = sizeof (u32),
1007 
1008  .format_trace = format_l2_load_balance_trace,
1009  .n_next_nodes = 1,
1010  .next_nodes = {
1011  [0] = "error-drop",
1012  },
1013 };
1014 
1015 static uword
1017  vlib_node_runtime_t * node,
1018  vlib_frame_t * frame)
1019 {
1020  u32 n_left_from, next_index, *from, *to_next;
1021 
1022  from = vlib_frame_vector_args (frame);
1023  n_left_from = frame->n_vectors;
1024 
1025  next_index = node->cached_next_index;
1026 
1027  while (n_left_from > 0)
1028  {
1029  u32 n_left_to_next;
1030 
1031  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1032 
1033  while (n_left_from > 0 && n_left_to_next > 0)
1034  {
1035  vlib_buffer_t *b0;
1036  u32 bi0, lbi0, next0, *nsh0;
1037  const dpo_id_t *dpo0;
1038  const load_balance_t *lb0;
1039 
1040  bi0 = from[0];
1041  to_next[0] = bi0;
1042  from += 1;
1043  to_next += 1;
1044  n_left_from -= 1;
1045  n_left_to_next -= 1;
1046 
1047  b0 = vlib_get_buffer (vm, bi0);
1048 
1049  lbi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
1050  lb0 = load_balance_get(lbi0);
1051 
1052  /* SPI + SI are the second word of the NSH header */
1053  nsh0 = vlib_buffer_get_current (b0);
1054  vnet_buffer(b0)->ip.flow_hash = nsh0[1] % lb0->lb_n_buckets;
1055 
1056  dpo0 = load_balance_get_bucket_i(lb0,
1057  vnet_buffer(b0)->ip.flow_hash &
1058  (lb0->lb_n_buckets_minus_1));
1059 
1060  next0 = dpo0->dpoi_next_node;
1061  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
1062 
1064  {
1065  load_balance_trace_t *tr = vlib_add_trace (vm, node, b0,
1066  sizeof (*tr));
1067  tr->lb_index = lbi0;
1068  }
1069  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1070  n_left_to_next, bi0, next0);
1071  }
1072 
1073  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1074  }
1075 
1076  return frame->n_vectors;
1077 }
1078 
1079 static u8 *
1080 format_nsh_load_balance_trace (u8 * s, va_list * args)
1081 {
1082  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1083  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1084  load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
1085 
1086  s = format (s, "NSH-load-balance: index %d", t->lb_index);
1087  return s;
1088 }
1089 
1090 /**
1091  * @brief
1092  */
1094  .function = nsh_load_balance,
1095  .name = "nsh-load-balance",
1096  .vector_size = sizeof (u32),
1097 
1098  .format_trace = format_nsh_load_balance_trace,
1099  .n_next_nodes = 1,
1100  .next_nodes = {
1101  [0] = "error-drop",
1102  },
1103 };
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
Definition: vec.h:436
u16 lb_n_buckets
number of buckets in the load-balance.
Definition: load_balance.h:88
static const char *const load_balance_ip6_nodes[]
Definition: load_balance.c:798
dpo_id_t * lb_buckets
Vector of buckets containing the next DPOs, sized as lbo_num.
Definition: load_balance.h:133
dpo_lock_fn_t dv_lock
A reference counting lock function.
Definition: dpo.h:335
static u8 * format_l2_load_balance_trace(u8 *s, va_list *args)
Definition: load_balance.c:990
void load_balance_map_unlock(index_t lbmi)
vlib_combined_counter_main_t lbm_to_counters
Definition: load_balance.h:46
index_t load_balance_map_add_or_lock(u32 n_buckets, u32 sum_of_weights, const load_balance_path_t *paths)
fib_entry_flag_t lb_fib_entry_flags
Flags from the load-balance&#39;s associated fib_entry_t.
Definition: load_balance.h:105
static index_t load_balance_get_index(const load_balance_t *lb)
Definition: load_balance.c:64
sll srl srl sll sra u16x4 i
Definition: vector_sse2.h:343
static const char *const *const load_balance_nodes[DPO_PROTO_NUM]
Definition: load_balance.c:818
#define CLIB_UNUSED(x)
Definition: clib.h:79
A virtual function table regisitered for a DPO type.
Definition: dpo.h:330
static const char *const load_balance_mpls_nodes[]
Definition: load_balance.c:803
void vlib_validate_combined_counter(vlib_combined_counter_main_t *cm, u32 index)
validate a combined counter
Definition: counter.c:89
a
Definition: bitmap.h:516
u8 * format_dpo_type(u8 *s, va_list *args)
format a DPO type
Definition: dpo.c:111
dpo_id_t path_dpo
ID of the Data-path object.
Definition: load_balance.h:66
static void load_balance_set_n_buckets(load_balance_t *lb, u32 n_buckets)
Definition: load_balance.c:474
static u32 ip_flow_hash(void *data)
Definition: load_balance.c:878
enum load_balance_format_flags_t_ load_balance_format_flags_t
Flags controlling load-balance formatting/display.
Definitions for all things IP (v4|v6) unicast and multicast lookup related.
#define NULL
Definition: clib.h:55
static u32 ip4_compute_flow_hash(const ip4_header_t *ip, flow_hash_config_t flow_hash_config)
Definition: ip4.h:295
void load_balance_set_urpf(index_t lbi, index_t urpf)
Definition: load_balance.c:253
#define ethernet_buffer_header_size(b)
Determine the size of the Ethernet headers of the current frame in the buffer.
Definition: ethernet.h:390
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
Definition: main.c:459
flow_hash_config_t lb_hash_config
the hash config to use when selecting a bucket.
Definition: load_balance.h:128
u8 src_address[6]
Definition: packet.h:54
void dpo_copy(dpo_id_t *dst, const dpo_id_t *src)
atomic copy a data-plane object.
Definition: dpo.c:224
u32 index_t
A Data-Path Object is an object that represents actions that are applied to packets are they are swit...
Definition: dpo.h:41
Combined counter to hold both packets and byte differences.
Definition: counter.h:139
static const char *const load_balance_ip4_nodes[]
The per-protocol VLIB graph nodes that are assigned to a load-balance object.
Definition: load_balance.c:793
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:561
static u8 * format_load_balance_dpo(u8 *s, va_list *args)
Definition: load_balance.c:157
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:418
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:447
static u8 * load_balance_format(index_t lbi, load_balance_format_flags_t flags, u32 indent, u8 *s)
Definition: load_balance.c:105
static load_balance_t * load_balance_alloc_i(void)
Definition: load_balance.c:83
index_t load_balance_get_urpf(index_t lbi)
Definition: load_balance.c:272
static const char *const load_balance_l2_nodes[]
Definition: load_balance.c:808
#define pool_len(p)
Number of elements in pool vector.
Definition: pool.h:121
index_t load_balance_create(u32 n_buckets, dpo_proto_t lb_proto, flow_hash_config_t fhc)
Definition: load_balance.c:192
const dpo_id_t * drop_dpo_get(dpo_proto_t proto)
Definition: drop_dpo.c:25
void dpo_register(dpo_type_t type, const dpo_vft_t *vft, const char *const *const *nodes)
For a given DPO type Register:
Definition: dpo.c:249
#define pool_foreach(VAR, POOL, BODY)
Iterate through pool.
Definition: pool.h:376
load_balance_t * load_balance_pool
Pool of all DPOs.
Definition: load_balance.c:50
void load_balance_map_module_init(void)
Make/add a new or lock an existing Load-balance map.
static dpo_id_t * load_balance_get_buckets(load_balance_t *lb)
Definition: load_balance.c:70
#define always_inline
Definition: clib.h:84
void load_balance_module_init(void)
Definition: load_balance.c:828
u16 lb_n_buckets_minus_1
number of buckets in the load-balance - 1.
Definition: load_balance.h:93
u8 dst_address[6]
Definition: packet.h:53
u8 * format_white_space(u8 *s, va_list *va)
Definition: std-formats.c:113
static int next_hop_sort_by_weight(const load_balance_path_t *n1, const load_balance_path_t *n2)
Definition: load_balance.c:293
static void load_balance_mem_show(void)
Definition: load_balance.c:766
void fib_urpf_list_lock(index_t ui)
Definition: fib_urpf_list.c:87
static load_balance_t * load_balance_create_i(u32 num_buckets, dpo_proto_t lb_proto, flow_hash_config_t fhc)
Definition: load_balance.c:167
void fib_show_memory_usage(const char *name, u32 in_use_elts, u32 allocd_elts, size_t size_elt)
Show the memory usage for a type.
Definition: fib_node.c:221
void load_balance_multipath_update(const dpo_id_t *dpo, const load_balance_path_t *raw_nhs, load_balance_flags_t flags)
Definition: load_balance.c:482
unsigned long u64
Definition: types.h:89
f64 load_balance_get_multipath_tolerance(void)
Definition: load_balance.c:58
enum dpo_proto_t_ dpo_proto_t
Data path protocol.
static void load_balance_lock(dpo_id_t *dpo)
Definition: load_balance.c:716
int load_balance_is_drop(const dpo_id_t *dpo)
Definition: load_balance.c:225
static void load_balance_unlock(dpo_id_t *dpo)
Definition: load_balance.c:751
The identity of a DPO is a combination of its type and its instance number/index of objects of that t...
Definition: dpo.h:146
static load_balance_path_t * load_balance_multipath_next_hop_fixup(const load_balance_path_t *nhs, dpo_proto_t drop_proto)
Definition: load_balance.c:423
static void vlib_zero_combined_counter(vlib_combined_counter_main_t *cm, u32 index)
Clear a combined counter Clears the set of per-thread counters.
Definition: counter.h:276
counter_t packets
packet counter
Definition: counter.h:141
u8 * format_load_balance(u8 *s, va_list *args)
Definition: load_balance.c:149
dpo_type_t dpoi_type
the type
Definition: dpo.h:150
static const dpo_id_t * load_balance_get_bucket_i(const load_balance_t *lb, u32 bucket)
Definition: load_balance.h:202
dpo_proto_t lb_proto
The protocol of packets that traverse this LB.
Definition: load_balance.h:100
struct _unformat_input_t unformat_input_t
void load_balance_set_fib_entry_flags(index_t lbi, fib_entry_flag_t flags)
Definition: load_balance.c:242
load-balancing over a choice of [un]equal cost paths
Definition: dpo.h:104
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
Definition: buffer.h:188
static u32 ip6_compute_flow_hash(const ip6_header_t *ip, flow_hash_config_t flow_hash_config)
Definition: ip6.h:389
#define pool_put(P, E)
Free an object E in pool P.
Definition: pool.h:241
The FIB DPO provieds;.
Definition: load_balance.h:84
#define PREDICT_FALSE(x)
Definition: clib.h:97
load_balance_main_t load_balance_main
The one instance of load-balance main.
Definition: load_balance.c:55
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
Definition: buffer_node.h:216
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
Definition: node_funcs.h:350
const dpo_id_t * load_balance_get_bucket(index_t lbi, u32 bucket)
Definition: load_balance.c:282
The load-balance object represents an ECMP choice.
Definition: load_balance.h:44
vlib_node_registration_t l2_load_balance_node
(constructor) VLIB_REGISTER_NODE (l2_load_balance_node)
dpo_id_t lb_buckets_inline[LB_NUM_INLINE_BUCKETS]
The rest of the cache line is used for buckets.
Definition: load_balance.h:141
#define pool_get_aligned(P, E, A)
Allocate an object E from a pool P (general version).
Definition: pool.h:169
enum load_balance_flags_t_ load_balance_flags_t
Flags controlling load-balance creation and modification.
#define UNFORMAT_END_OF_INPUT
Definition: format.h:143
#define hash_mix64(a0, b0, c0)
Definition: hash.h:507
svmdb_client_t * c
u16 n_vectors
Definition: node.h:344
static void vlib_get_combined_counter(const vlib_combined_counter_main_t *cm, u32 index, vlib_counter_t *result)
Get the value of a combined counter, never called in the speed path Scrapes the entire set of per-thr...
Definition: counter.h:250
vlib_main_t * vm
Definition: buffer.c:276
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:340
static const char *const load_balance_nsh_nodes[]
Definition: load_balance.c:813
void load_balance_map_show_mem(void)
#define VLIB_BUFFER_IS_TRACED
Definition: buffer.h:85
static f64 fabs(f64 x)
Definition: math.h:50
#define clib_memcpy(a, b, c)
Definition: string.h:69
static uword max_pow2(uword x)
Definition: clib.h:263
static u8 * format_nsh_load_balance_trace(u8 *s, va_list *args)
#define LB_NUM_INLINE_BUCKETS
The number of buckets that a load-balance object can have and still fit in one cache-line.
Definition: load_balance.h:56
vlib_combined_counter_main_t lbm_via_counters
Definition: load_balance.h:47
static void load_balance_fill_buckets(load_balance_t *lb, load_balance_path_t *nhs, dpo_id_t *buckets, u32 n_buckets)
Definition: load_balance.c:449
enum fib_entry_flag_t_ fib_entry_flag_t
static uword nsh_load_balance(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
#define VLIB_CLI_COMMAND(x,...)
Definition: cli.h:154
vlib_node_registration_t nsh_load_balance_node
(constructor) VLIB_REGISTER_NODE (nsh_load_balance_node)
u16 cached_next_index
Next frame index that vector arguments were last enqueued to last time this node ran.
Definition: node.h:455
#define ASSERT(truth)
index_t lb_urpf
This is the index of the uRPF list for this LB.
Definition: load_balance.h:123
unsigned int u32
Definition: types.h:88
static load_balance_t * load_balance_get(index_t lbi)
Definition: load_balance.h:193
static clib_error_t * load_balance_show(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: load_balance.c:836
u32 lb_locks
The number of locks, which is approximately the number of users, of this load-balance.
Definition: load_balance.h:113
static u64 mac_to_u64(u8 *m)
Definition: load_balance.c:889
#define IP_FLOW_HASH_DEFAULT
Default: 5-tuple without the "reverse" bit.
Definition: lookup.h:152
#define LB_HAS_INLINE_BUCKETS(_lb)
Definition: load_balance.h:198
void load_balance_set_bucket(index_t lbi, u32 bucket, const dpo_id_t *next)
Definition: load_balance.c:209
u8 * format_dpo_id(u8 *s, va_list *args)
Format a DPO_id_t oject
Definition: dpo.c:121
u32 flow_hash_config_t
A flow hash configuration is a mask of the flow hash options.
Definition: lookup.h:165
u64 uword
Definition: types.h:112
static void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
Definition: trace_funcs.h:55
counter_t bytes
byte counter
Definition: counter.h:142
static uword l2_load_balance(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
Definition: load_balance.c:927
Definition: defs.h:47
unsigned short u16
Definition: types.h:57
#define DPO_PROTO_NUM
Definition: dpo.h:73
i64 word
Definition: types.h:111
static word flt_round_nearest(f64 x)
Definition: clib.h:314
void qsort(void *base, uword n, uword size, int(*compar)(const void *, const void *))
Definition: qsort.c:56
index_t dpoi_index
the index of objects of that type
Definition: dpo.h:162
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
double f64
Definition: types.h:142
unsigned char u8
Definition: types.h:56
u32 path_weight
weight for the path.
Definition: load_balance.h:76
#define INDEX_INVALID
Invalid index - used when no index is known blazoned capitals INVALID speak volumes where ~0 does not...
Definition: dpo.h:47
static void load_balance_destroy(load_balance_t *lb)
Definition: load_balance.c:726
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
Definition: node_funcs.h:253
void fib_urpf_list_unlock(index_t ui)
Definition: fib_urpf_list.c:68
One path from an [EU]CMP set that the client wants to add to a load-balance object.
Definition: load_balance.h:62
static u32 l2_flow_hash(vlib_buffer_t *b0)
Definition: load_balance.c:895
#define vnet_buffer(b)
Definition: buffer.h:294
index_t lb_map
index of the load-balance map, INVALID if this LB does not use one
Definition: load_balance.h:118
const f64 multipath_next_hop_error_tolerance
Definition: load_balance.c:28
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:143
int dpo_is_drop(const dpo_id_t *dpo)
The Drop DPO will drop all packets, no questions asked.
Definition: drop_dpo.c:33
u32 ip_multipath_normalize_next_hops(const load_balance_path_t *raw_next_hops, load_balance_path_t **normalized_next_hops, u32 *sum_weight_in, f64 multipath_next_hop_error_tolerance)
Definition: load_balance.c:303
void dpo_reset(dpo_id_t *dpo)
reset a DPO ID The DPO will be unlocked.
Definition: dpo.c:194
#define vec_foreach(var, vec)
Vector iterator.
#define CLIB_MEMORY_BARRIER()
Definition: clib.h:101
u16 dpoi_next_node
The next VLIB node to follow.
Definition: dpo.h:158
#define LB_DBG(_p, _fmt, _args...)
Definition: load_balance.c:43
u8 ip_version_and_header_length
Definition: ip4_packet.h:131
u32 flags
Definition: vhost-user.h:78
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:67
u8 * format_load_balance_map(u8 *s, va_list ap)
u32 flags
buffer flags: VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:74
void vlib_cli_output(vlib_main_t *vm, char *fmt,...)
Definition: cli.c:577
struct load_balance_trace_t_ load_balance_trace_t
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:57
uword unformat(unformat_input_t *i, const char *fmt,...)
Definition: unformat.c:971
static uword unformat_check_input(unformat_input_t *i)
Definition: format.h:169
void dpo_stack(dpo_type_t child_type, dpo_proto_t child_proto, dpo_id_t *dpo, const dpo_id_t *parent)
Stack one DPO object on another, and thus establish a child-parent relationship.
Definition: dpo.c:401
static void load_balance_set_bucket_i(load_balance_t *lb, u32 bucket, dpo_id_t *buckets, const dpo_id_t *next)
Definition: load_balance.c:200
static uword pool_elts(void *v)
Number of active elements in a pool.
Definition: pool.h:109