FD.io VPP  v16.12-rc0-308-g931be3a
Vector Packet Processing
node.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <lb/lb.h>
17 
18 #include <vnet/gre/packet.h>
19 #include <lb/lbhash.h>
20 
21 #define foreach_lb_error \
22  _(NONE, "no error") \
23  _(PROTO_NOT_SUPPORTED, "protocol not supported")
24 
25 typedef enum {
26 #define _(sym,str) LB_ERROR_##sym,
28 #undef _
30 } lb_error_t;
31 
32 static char *lb_error_strings[] = {
33 #define _(sym,string) string,
35 #undef _
36 };
37 
38 typedef struct {
41 } lb_trace_t;
42 
43 u8 *
44 format_lb_trace (u8 * s, va_list * args)
45 {
46  lb_main_t *lbm = &lb_main;
47  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49  lb_trace_t *t = va_arg (*args, lb_trace_t *);
50  if (pool_is_free_index(lbm->vips, t->vip_index)) {
51  s = format(s, "lb vip[%d]: This VIP was freed since capture\n");
52  } else {
53  s = format(s, "lb vip[%d]: %U\n", t->vip_index, format_lb_vip, &lbm->vips[t->vip_index]);
54  }
55  if (pool_is_free_index(lbm->ass, t->as_index)) {
56  s = format(s, "lb as[%d]: This AS was freed since capture\n");
57  } else {
58  s = format(s, "lb as[%d]: %U\n", t->as_index, format_lb_as, &lbm->ass[t->as_index]);
59  }
60  return s;
61 }
62 
64 {
65  lb_main_t *lbm = &lb_main;
66  lb_hash_t *sticky_ht = lbm->per_cpu[cpu_index].sticky_ht;
67  //Check if size changed
68  if (PREDICT_FALSE(sticky_ht && (lbm->per_cpu_sticky_buckets != lb_hash_nbuckets(sticky_ht))))
69  {
70  //Dereference everything in there
72  u32 i;
73  lb_hash_foreach_entry(sticky_ht, b, i) {
74  vlib_refcount_add(&lbm->as_refcount, cpu_index, b->value[i], -1);
75  vlib_refcount_add(&lbm->as_refcount, cpu_index, 0, 1);
76  }
77 
78  lb_hash_free(sticky_ht);
79  sticky_ht = NULL;
80  }
81 
82  //Create if necessary
83  if (PREDICT_FALSE(sticky_ht == NULL)) {
85  sticky_ht = lbm->per_cpu[cpu_index].sticky_ht;
86  clib_warning("Regenerated sticky table %p", sticky_ht);
87  }
88 
89  ASSERT(sticky_ht);
90 
91  //Update timeout
92  sticky_ht->timeout = lbm->flow_timeout;
93  return sticky_ht;
94 }
95 
96 u64
98 {
99  return 0;
100 }
101 
102 u64
104 {
105  return 0;
106 }
107 
110 {
111  u32 hash;
112  if (is_input_v4)
113  {
114  ip4_header_t *ip40;
115  u64 ports;
116  ip40 = vlib_buffer_get_current (p);
117  if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_TCP ||
118  ip40->protocol == IP_PROTOCOL_UDP))
119  ports = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 16) |
120  ((u64)((udp_header_t *)(ip40 + 1))->dst_port);
121  else
122  ports = lb_node_get_other_ports4(ip40);
123 
124  hash = lb_hash_hash(*((u64 *)&ip40->address_pair), ports,
125  0, 0, 0);
126  }
127  else
128  {
129  ip6_header_t *ip60;
130  ip60 = vlib_buffer_get_current (p);
131  u64 ports;
132  if (PREDICT_TRUE (ip60->protocol == IP_PROTOCOL_TCP ||
133  ip60->protocol == IP_PROTOCOL_UDP))
134  ports = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 16) |
135  ((u64)((udp_header_t *)(ip60 + 1))->dst_port);
136  else
137  ports = lb_node_get_other_ports6(ip60);
138 
139  hash = lb_hash_hash(ip60->src_address.as_u64[0],
140  ip60->src_address.as_u64[1],
141  ip60->dst_address.as_u64[0],
142  ip60->dst_address.as_u64[1],
143  ports);
144  }
145  return hash;
146 }
147 
150  vlib_node_runtime_t * node, vlib_frame_t * frame,
151  u8 is_input_v4, //Compile-time parameter stating that is input is v4 (or v6)
152  u8 is_encap_v4) //Compile-time parameter stating that is GRE encap is v4 (or v6)
153 {
154  lb_main_t *lbm = &lb_main;
155  u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
156  u32 cpu_index = os_get_cpu_number();
157  u32 lb_time = lb_hash_time_now(vm);
158 
159  lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index);
160  from = vlib_frame_vector_args (frame);
161  n_left_from = frame->n_vectors;
162  next_index = node->cached_next_index;
163 
164  u32 nexthash0 = 0;
165  if (PREDICT_TRUE(n_left_from > 0))
166  nexthash0 = lb_node_get_hash(vlib_get_buffer (vm, from[0]), is_input_v4);
167 
168  while (n_left_from > 0)
169  {
170  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
171  while (n_left_from > 0 && n_left_to_next > 0)
172  {
173  u32 pi0;
174  vlib_buffer_t *p0;
175  lb_vip_t *vip0;
176  u32 asindex0;
177  u16 len0;
178  u32 available_index0;
179  u8 counter = 0;
180  u32 hash0 = nexthash0;
181 
182  if (PREDICT_TRUE(n_left_from > 1))
183  {
184  vlib_buffer_t *p1 = vlib_get_buffer (vm, from[1]);
185  //Compute next hash and prefetch bucket
186  nexthash0 = lb_node_get_hash(p1, is_input_v4);
187  lb_hash_prefetch_bucket(sticky_ht, nexthash0);
188  //Prefetch for encap, next
189  CLIB_PREFETCH (vlib_buffer_get_current(p1) - 64, 64, STORE);
190  }
191 
192  if (PREDICT_TRUE(n_left_from > 2))
193  {
194  vlib_buffer_t *p2;
195  p2 = vlib_get_buffer(vm, from[2]);
196  /* prefetch packet header and data */
197  vlib_prefetch_buffer_header(p2, STORE);
198  CLIB_PREFETCH (vlib_buffer_get_current(p2), 64, STORE);
199  }
200 
201  pi0 = to_next[0] = from[0];
202  from += 1;
203  n_left_from -= 1;
204  to_next += 1;
205  n_left_to_next -= 1;
206 
207  p0 = vlib_get_buffer (vm, pi0);
208  vip0 = pool_elt_at_index (lbm->vips,
209  vnet_buffer (p0)->ip.adj_index[VLIB_TX]);
210 
211  if (is_input_v4)
212  {
213  ip4_header_t *ip40;
214  ip40 = vlib_buffer_get_current (p0);
215  len0 = clib_net_to_host_u16(ip40->length);
216  }
217  else
218  {
219  ip6_header_t *ip60;
220  ip60 = vlib_buffer_get_current (p0);
221  len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t);
222  }
223 
224  lb_hash_get(sticky_ht, hash0, vnet_buffer (p0)->ip.adj_index[VLIB_TX],
225  lb_time, &available_index0, &asindex0);
226 
227  if (PREDICT_TRUE(asindex0 != ~0))
228  {
229  //Found an existing entry
230  counter = LB_VIP_COUNTER_NEXT_PACKET;
231  }
232  else if (PREDICT_TRUE(available_index0 != ~0))
233  {
234  //There is an available slot for a new flow
235  asindex0 = vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index;
236  counter = LB_VIP_COUNTER_FIRST_PACKET;
237  counter = (asindex0 == 0)?LB_VIP_COUNTER_NO_SERVER:counter;
238 
239  //TODO: There are race conditions with as0 and vip0 manipulation.
240  //Configuration may be changed, vectors resized, etc...
241 
242  //Dereference previously used
243  vlib_refcount_add(&lbm->as_refcount, cpu_index,
244  lb_hash_available_value(sticky_ht, hash0, available_index0), -1);
245  vlib_refcount_add(&lbm->as_refcount, cpu_index,
246  asindex0, 1);
247 
248  //Add sticky entry
249  //Note that when there is no AS configured, an entry is configured anyway.
250  //But no configured AS is not something that should happen
251  lb_hash_put(sticky_ht, hash0, asindex0,
252  vnet_buffer (p0)->ip.adj_index[VLIB_TX],
253  available_index0, lb_time);
254  }
255  else
256  {
257  //Could not store new entry in the table
258  asindex0 = vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index;
259  counter = LB_VIP_COUNTER_UNTRACKED_PACKET;
260  }
261 
263  cpu_index,
264  vnet_buffer (p0)->ip.adj_index[VLIB_TX],
265  1);
266 
267  //Now let's encap
268  {
269  gre_header_t *gre0;
270  if (is_encap_v4)
271  {
272  ip4_header_t *ip40;
273  vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t));
274  ip40 = vlib_buffer_get_current(p0);
275  gre0 = (gre_header_t *)(ip40 + 1);
276  ip40->src_address = lbm->ip4_src_address;
277  ip40->dst_address = lbm->ass[asindex0].address.ip4;
278  ip40->ip_version_and_header_length = 0x45;
279  ip40->ttl = 128;
280  ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t));
281  ip40->protocol = IP_PROTOCOL_GRE;
282  ip40->checksum = ip4_header_checksum (ip40);
283  }
284  else
285  {
286  ip6_header_t *ip60;
287  vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t));
288  ip60 = vlib_buffer_get_current(p0);
289  gre0 = (gre_header_t *)(ip60 + 1);
290  ip60->dst_address = lbm->ass[asindex0].address.ip6;
291  ip60->src_address = lbm->ip6_src_address;
292  ip60->hop_limit = 128;
293  ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28);
294  ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t));
295  ip60->protocol = IP_PROTOCOL_GRE;
296  }
297 
298  gre0->flags_and_version = 0;
299  gre0->protocol = (is_input_v4)?
300  clib_host_to_net_u16(0x0800):
301  clib_host_to_net_u16(0x86DD);
302  }
303 
305  {
306  lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
307  tr->as_index = asindex0;
308  tr->vip_index = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
309  }
310 
311  //Enqueue to next
312  //Note that this is going to error if asindex0 == 0
313  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbm->ass[asindex0].dpo.dpoi_index;
314  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
315  n_left_to_next, pi0,
316  lbm->ass[asindex0].dpo.dpoi_next_node);
317  }
318  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
319  }
320 
321  return frame->n_vectors;
322 }
323 
324 static uword
326  vlib_node_runtime_t * node, vlib_frame_t * frame)
327 {
328  return lb_node_fn(vm, node, frame, 0, 0);
329 }
330 
331 static uword
333  vlib_node_runtime_t * node, vlib_frame_t * frame)
334 {
335  return lb_node_fn(vm, node, frame, 0, 1);
336 }
337 
338 static uword
340  vlib_node_runtime_t * node, vlib_frame_t * frame)
341 {
342  return lb_node_fn(vm, node, frame, 1, 0);
343 }
344 
345 static uword
347  vlib_node_runtime_t * node, vlib_frame_t * frame)
348 {
349  return lb_node_fn(vm, node, frame, 1, 1);
350 }
351 
353 {
354  .function = lb6_gre6_node_fn,
355  .name = "lb6-gre6",
356  .vector_size = sizeof (u32),
357  .format_trace = format_lb_trace,
358 
359  .n_errors = LB_N_ERROR,
360  .error_strings = lb_error_strings,
361 
362  .n_next_nodes = LB_N_NEXT,
363  .next_nodes =
364  {
365  [LB_NEXT_DROP] = "error-drop"
366  },
367 };
368 
370 {
371  .function = lb6_gre4_node_fn,
372  .name = "lb6-gre4",
373  .vector_size = sizeof (u32),
374  .format_trace = format_lb_trace,
375 
376  .n_errors = LB_N_ERROR,
377  .error_strings = lb_error_strings,
378 
379  .n_next_nodes = LB_N_NEXT,
380  .next_nodes =
381  {
382  [LB_NEXT_DROP] = "error-drop"
383  },
384 };
385 
387 {
388  .function = lb4_gre6_node_fn,
389  .name = "lb4-gre6",
390  .vector_size = sizeof (u32),
391  .format_trace = format_lb_trace,
392 
393  .n_errors = LB_N_ERROR,
394  .error_strings = lb_error_strings,
395 
396  .n_next_nodes = LB_N_NEXT,
397  .next_nodes =
398  {
399  [LB_NEXT_DROP] = "error-drop"
400  },
401 };
402 
404 {
405  .function = lb4_gre4_node_fn,
406  .name = "lb4-gre4",
407  .vector_size = sizeof (u32),
408  .format_trace = format_lb_trace,
409 
410  .n_errors = LB_N_ERROR,
411  .error_strings = lb_error_strings,
412 
413  .n_next_nodes = LB_N_NEXT,
414  .next_nodes =
415  {
416  [LB_NEXT_DROP] = "error-drop"
417  },
418 };
419 
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
Definition: main.c:457
static void vlib_increment_simple_counter(vlib_simple_counter_main_t *cm, u32 cpu_index, u32 index, u32 increment)
Increment a simple counter.
Definition: counter.h:78
vlib_node_registration_t lb4_gre4_node
(constructor) VLIB_REGISTER_NODE (lb4_gre4_node)
Definition: node.c:403
format_function_t format_lb_vip
Definition: lb.h:217
static uword lb4_gre4_node_fn(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
Definition: node.c:346
u32 lb_hash_time_now(vlib_main_t *vm)
Definition: lb.c:50
u64 lb_node_get_other_ports6(ip6_header_t *ip60)
Definition: node.c:103
u64 lb_node_get_other_ports4(ip4_header_t *ip40)
Definition: node.c:97
sll srl srl sll sra u16x4 i
Definition: vector_sse2.h:343
#define CLIB_UNUSED(x)
Definition: clib.h:79
ip4_address_t src_address
Definition: ip4_packet.h:138
u32 per_cpu_sticky_buckets
Number of buckets in the per-cpu sticky hash table.
Definition: lb.h:272
#define PREDICT_TRUE(x)
Definition: clib.h:98
u64 as_u64[2]
Definition: ip6_packet.h:50
#define NULL
Definition: clib.h:55
static_always_inline lb_hash_t * lb_hash_alloc(u32 buckets, u32 timeout)
Definition: lbhash.h:78
Definition: lb.h:48
static_always_inline u32 lb_node_get_hash(vlib_buffer_t *p, u8 is_input_v4)
Definition: node.c:109
lb_error_t
Definition: node.c:25
static_always_inline u32 lb_hash_available_value(lb_hash_t *h, u32 hash, u32 available_index)
Definition: lbhash.h:186
vlib_node_registration_t lb6_gre4_node
(constructor) VLIB_REGISTER_NODE (lb6_gre4_node)
Definition: node.c:369
lb_hash_t * lb_get_sticky_table(u32 cpu_index)
Definition: node.c:63
ip6_address_t src_address
Definition: ip6_packet.h:300
lb_hash_t * sticky_ht
Each CPU has its own sticky flow hash table.
Definition: lb.h:225
ip46_address_t address
Destination address used to tunnel traffic towards that application server.
Definition: lb.h:67
u32 timeout
Definition: lbhash.h:57
u32 vip_index
Definition: node.c:39
#define lb_hash_nbuckets(h)
Definition: lbhash.h:61
#define static_always_inline
Definition: clib.h:85
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
Definition: buffer.h:190
ip4_address_t dst_address
Definition: ip4_packet.h:138
lb_main_t lb_main
Definition: lb.c:26
u32 flow_timeout
Flow timeout in seconds.
Definition: lb.h:277
Definition: lb.h:228
vlib_refcount_t as_refcount
Each AS has an associated reference counter.
Definition: lb.h:247
#define clib_warning(format, args...)
Definition: error.h:59
unsigned long u64
Definition: types.h:89
format_function_t format_lb_as
Definition: lb.h:112
lb_vip_t * vips
Pool of all Virtual IPs.
Definition: lb.h:232
ip4_address_t ip4_src_address
Source address used for IPv4 encapsulated traffic.
Definition: lb.h:267
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
Definition: pool.h:369
u32 value[LBHASH_ENTRY_PER_BUCKET]
Definition: lbhash.h:52
uword os_get_cpu_number(void)
Definition: unix-misc.c:224
ip4_address_pair_t address_pair
Definition: ip4_packet.h:140
#define PREDICT_FALSE(x)
Definition: clib.h:97
static_always_inline void lb_hash_prefetch_bucket(lb_hash_t *ht, u32 hash)
Definition: lbhash.h:124
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
Definition: buffer_node.h:216
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
Definition: node_funcs.h:350
u16 flags_and_version
Definition: packet.h:37
static_always_inline uword lb_node_fn(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, u8 is_input_v4, u8 is_encap_v4)
Definition: node.c:149
static uword lb6_gre4_node_fn(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
Definition: node.c:332
#define lb_hash_foreach_entry(h, bucket, i)
Definition: lbhash.h:69
static char * lb_error_strings[]
Definition: node.c:32
u16 n_vectors
Definition: node.h:344
#define CLIB_PREFETCH(addr, size, type)
Definition: cache.h:82
u32 as_index
Definition: node.c:40
u16 protocol
Definition: packet.h:52
static void vlib_buffer_advance(vlib_buffer_t *b, word l)
Advance current data pointer by the supplied (signed!) amount.
Definition: buffer.h:203
#define pool_is_free_index(P, I)
Use free bitmap to query whether given index is free.
Definition: pool.h:211
u32 as_index
Definition: lb.h:115
static_always_inline void lb_hash_free(lb_hash_t *h)
Definition: lbhash.h:96
vlib_node_registration_t lb4_gre6_node
(constructor) VLIB_REGISTER_NODE (lb4_gre6_node)
Definition: node.c:386
lb_as_t * ass
Pool of ASs.
Definition: lb.h:240
#define foreach_lb_error
Definition: node.c:21
u16 cached_next_index
Definition: node.h:463
#define ASSERT(truth)
unsigned int u32
Definition: types.h:88
#define vnet_buffer(b)
Definition: buffer.h:333
u32 new_flow_table_mask
New flows table length - 1 (length MUST be a power of 2)
Definition: lb.h:168
static uword lb6_gre6_node_fn(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
Definition: node.c:325
lb_per_cpu_t * per_cpu
Some global data is per-cpu.
Definition: lb.h:252
#define VLIB_BUFFER_IS_TRACED
Definition: buffer.h:95
u64 uword
Definition: types.h:112
vlib_simple_counter_main_t vip_counters[LB_N_VIP_COUNTERS]
Per VIP counter.
Definition: lb.h:282
static void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
Definition: trace_funcs.h:55
u32 ip_version_traffic_class_and_flow_label
Definition: ip6_packet.h:287
Definition: defs.h:47
unsigned short u16
Definition: types.h:57
ip6_address_t ip6_src_address
Source address used in IPv6 encapsulated traffic.
Definition: lb.h:262
u16 payload_length
Definition: ip6_packet.h:291
index_t dpoi_index
the index of objects of that type
Definition: dpo.h:154
unsigned char u8
Definition: types.h:56
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
Definition: node_funcs.h:253
u8 * format_lb_trace(u8 *s, va_list *args)
Definition: node.c:44
vlib_node_registration_t lb6_gre6_node
(constructor) VLIB_REGISTER_NODE (lb6_gre6_node)
Definition: node.c:352
#define vlib_prefetch_buffer_header(b, type)
Prefetch buffer metadata.
Definition: buffer.h:166
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:143
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:418
static uword lb4_gre6_node_fn(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
Definition: node.c:339
dpo_id_t dpo
The next DPO in the graph to follow.
Definition: lb.h:108
u16 dpoi_next_node
The next VLIB node to follow.
Definition: dpo.h:150
u8 ip_version_and_header_length
Definition: ip4_packet.h:108
lb_new_flow_entry_t * new_flow_table
Vector mapping (flow-hash & new_connect_table_mask) to AS index.
Definition: lb.h:162
u32 flags
buffer flags: VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:85
static_always_inline void vlib_refcount_add(vlib_refcount_t *r, u32 cpu_index, u32 counter_index, i32 v)
Definition: refcount.h:48
Load balancing service is provided per VIP.
Definition: lb.h:154
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:69
static_always_inline void lb_hash_get(lb_hash_t *ht, u32 hash, u32 vip, u32 time_now, u32 *available_index, u32 *found_value)
Definition: lbhash.h:131
static u16 ip4_header_checksum(ip4_header_t *i)
Definition: ip4_packet.h:194
ip6_address_t dst_address
Definition: ip6_packet.h:300
static_always_inline u32 lb_hash_hash(u64 k0, u64 k1, u64 k2, u64 k3, u64 k4)
Definition: lbhash.h:116
static_always_inline void lb_hash_put(lb_hash_t *h, u32 hash, u32 value, u32 vip, u32 available_index, u32 time_now)
Definition: lbhash.h:192