FD.io VPP  v19.01.3-6-g70449b9b9
Vector Packet Processing
vhash.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  Copyright (c) 2010 Eliot Dresselhaus
17 
18  Permission is hereby granted, free of charge, to any person obtaining
19  a copy of this software and associated documentation files (the
20  "Software"), to deal in the Software without restriction, including
21  without limitation the rights to use, copy, modify, merge, publish,
22  distribute, sublicense, and/or sell copies of the Software, and to
23  permit persons to whom the Software is furnished to do so, subject to
24  the following conditions:
25 
26  The above copyright notice and this permission notice shall be
27  included in all copies or substantial portions of the Software.
28 
29  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
36 */
37 
38 #ifndef included_clib_vhash_h
39 #define included_clib_vhash_h
40 
41 #include <vppinfra/vector.h>
42 
43 #ifdef CLIB_HAVE_VEC128
44 
45 #include <vppinfra/cache.h>
46 #include <vppinfra/hash.h>
47 #include <vppinfra/pipeline.h>
48 
49 /* Gathers 32 bits worth of key with given index. */
50 typedef u32 (vhash_key_function_t) (void *state, u32 vector_index,
51  u32 key_word_index);
52 typedef u32x4 (vhash_4key_function_t) (void *state, u32 vector_index,
53  u32 key_word_index);
54 /* Sets/gets result of hash lookup. */
55 typedef u32 (vhash_result_function_t) (void *state, u32 vector_index,
56  u32 result, u32 n_key_u32);
57 typedef u32x4 (vhash_4result_function_t) (void *state, u32 vector_index,
58  u32x4 results, u32 n_key_u32);
59 
60 typedef struct
61 {
62  u32x4_union_t hashed_key[3];
63 } vhash_hashed_key_t;
64 
65 /* Search buckets are really this structure. */
66 typedef struct
67 {
68  /* 4 results for this bucket.
69  Zero is used to mark empty results. This means user can't use the result ~0
70  since user results differ from internal results stored in buckets by 1.
71  e.g. internal result = user result + 1. */
72  u32x4_union_t result;
73 
74  /* n_key_u32s u32x4s of key data follow. */
75  u32x4_union_t key[0];
76 } vhash_search_bucket_t;
77 
78 typedef struct
79 {
80  u32x4_union_t *search_buckets;
81 
82  /* Vector of bucket free indices. */
83  u32 *free_indices;
84 
85  /* Number of entries in this overflow bucket. */
86  u32 n_overflow;
87 } vhash_overflow_buckets_t;
88 
89 typedef struct
90 {
91  /* 2^log2_n_keys keys grouped in groups of 4.
92  Each bucket contains 4 results plus 4 keys for a
93  total of (1 + n_key_u32) u32x4s. */
94  u32x4_union_t *search_buckets;
95 
96  /* When a bucket of 4 results/keys are full we search
97  the overflow. hash_key is used to select which overflow
98  bucket. */
99  vhash_overflow_buckets_t overflow_buckets[16];
100 
101  /* Total count of occupied elements in hash table. */
102  u32 n_elts;
103 
104  u32 log2_n_keys;
105 
106  /* Number of 32 bit words in a hash key. */
107  u32 n_key_u32;
108 
109  u32x4_union_t bucket_mask;
110 
111  /* table[i] = min_log2 (first_set (~i)). */
112  u8 find_first_zero_table[16];
113 
114  /* Hash seeds for Jenkins hash. */
115  u32x4_union_t hash_seeds[3];
116 
117  /* Key work space is a vector of length
118  n_key_u32s << log2_n_key_word_len_u32x. */
119  u32 log2_n_key_word_len_u32x;
120 
121  /* Work space to store keys between pipeline stages. */
122  u32x4_union_t *key_work_space;
123 
124  /* Hash work space to store Jenkins hash values between
125  pipeline stages. */
126  vhash_hashed_key_t *hash_work_space;
127 } vhash_t;
128 
129 always_inline vhash_overflow_buckets_t *
130 vhash_get_overflow_buckets (vhash_t * h, u32 key)
131 {
132  u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf);
133  ASSERT (i < ARRAY_LEN (h->overflow_buckets));
134  return h->overflow_buckets + i;
135 }
136 
138 vhash_is_non_empty_overflow_bucket (vhash_t * h, u32 key)
139 {
140  u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf);
141  ASSERT (i < ARRAY_LEN (h->overflow_buckets));
142  return h->overflow_buckets[i].n_overflow > 0;
143 }
144 
145 always_inline void
146 vhash_free_overflow_buckets (vhash_overflow_buckets_t * obs)
147 {
148  vec_free (obs->search_buckets);
149  vec_free (obs->free_indices);
150 }
151 
152 always_inline void
153 vhash_free (vhash_t * h)
154 {
155  uword i;
156  for (i = 0; i < ARRAY_LEN (h->overflow_buckets); i++)
157  vhash_free_overflow_buckets (&h->overflow_buckets[i]);
158  vec_free (h->search_buckets);
159  vec_free (h->key_work_space);
160  vec_free (h->hash_work_space);
161 }
162 
163 always_inline void
164 vhash_set_key_word (vhash_t * h, u32 wi, u32 vi, u32 value)
165 {
166  u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
167  u32 i1 = vi % 4;
168  vec_elt (h->key_work_space, i0).as_u32[i1] = value;
169 }
170 
171 always_inline void
172 vhash_set_key_word_u32x (vhash_t * h, u32 wi, u32 vi, u32x value)
173 {
174  u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
175  vec_elt (h->key_work_space, i0).as_u32x4 = value;
176 }
177 
179 vhash_get_key_word (vhash_t * h, u32 wi, u32 vi)
180 {
181  u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
182  u32 i1 = vi % 4;
183  return vec_elt (h->key_work_space, i0).as_u32[i1];
184 }
185 
186 always_inline u32x
187 vhash_get_key_word_u32x (vhash_t * h, u32 wi, u32 vi)
188 {
189  u32 i0 = (wi << h->log2_n_key_word_len_u32x) + vi;
190  return vec_elt (h->key_work_space, i0).as_u32x4;
191 }
192 
193 always_inline void
194 vhash_validate_sizes (vhash_t * h, u32 n_key_u32, u32 n_vectors)
195 {
196  u32 n, l;
197 
198  n = max_pow2 (n_vectors) / 4;
199  n = clib_max (n, 8);
200 
201  h->log2_n_key_word_len_u32x = l = min_log2 (n);
202  vec_validate_aligned (h->key_work_space, (n_key_u32 << l) - 1,
204  vec_validate_aligned (h->hash_work_space, n - 1, CLIB_CACHE_LINE_BYTES);
205 }
206 
207 always_inline void
208 vhash_gather_key_stage (vhash_t * h,
209  u32 vector_index,
210  u32 n_vectors,
211  vhash_key_function_t key_function,
212  void *state, u32 n_key_u32s)
213 {
214  u32 i, j, vi;
215 
216  /* Gather keys for 4 packets (for 128 bit vector length e.g. u32x4). */
217  for (i = 0; i < n_vectors; i++)
218  {
219  vi = vector_index * 4 + i;
220  for (j = 0; j < n_key_u32s; j++)
221  vhash_set_key_word (h, j, vi, key_function (state, vi, j));
222  }
223 }
224 
225 always_inline void
226 vhash_gather_4key_stage (vhash_t * h,
227  u32 vector_index,
228  vhash_4key_function_t key_function,
229  void *state, u32 n_key_u32s)
230 {
231  u32 j, vi;
232  vi = vector_index * 4;
233  for (j = 0; j < n_key_u32s; j++)
234  vhash_set_key_word_u32x (h, j, vi, key_function (state, vi, j));
235 }
236 
237 always_inline void
238 vhash_mix_stage (vhash_t * h, u32 vector_index, u32 n_key_u32s)
239 {
240  i32 i, n_left;
241  u32x a, b, c;
242 
243  /* Only need to do this for keys longer than 12 bytes. */
244  ASSERT (n_key_u32s > 3);
245 
246  a = h->hash_seeds[0].as_u32x4;
247  b = h->hash_seeds[1].as_u32x4;
248  c = h->hash_seeds[2].as_u32x4;
249  for (i = 0, n_left = n_key_u32s - 3; n_left > 0; n_left -= 3, i += 3)
250  {
251  a +=
252  vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 0), vector_index);
253  if (n_left > 1)
254  b +=
255  vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 1), vector_index);
256  if (n_left > 2)
257  c +=
258  vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 2), vector_index);
259 
260  hash_v3_mix_u32x (a, b, c);
261  }
262 
263  /* Save away a, b, c for later finalize. */
264  {
265  vhash_hashed_key_t *hk =
266  vec_elt_at_index (h->hash_work_space, vector_index);
267  hk->hashed_key[0].as_u32x4 = a;
268  hk->hashed_key[1].as_u32x4 = b;
269  hk->hashed_key[2].as_u32x4 = c;
270  }
271 }
272 
273 always_inline vhash_search_bucket_t *
274 vhash_get_search_bucket_with_index (vhash_t * h, u32 i, u32 n_key_u32s)
275 {
276  return ((vhash_search_bucket_t *)
277  vec_elt_at_index (h->search_buckets,
278  (i / 4) *
279  ((sizeof (vhash_search_bucket_t) /
280  sizeof (u32x4)) + n_key_u32s)));
281 }
282 
283 always_inline vhash_search_bucket_t *
284 vhash_get_search_bucket (vhash_t * h, u32 key_hash, u32 n_key_u32s)
285 {
286  u32 i = key_hash & h->bucket_mask.as_u32[0];
287  return vhash_get_search_bucket_with_index (h, i, n_key_u32s);
288 }
289 
291 vhash_get_4_search_bucket_byte_offsets (vhash_t * h, u32x4 key_hash,
292  u32 n_key_u32s)
293 {
294  vhash_search_bucket_t *b;
295  u32 n_bytes_per_bucket = sizeof (b[0]) + n_key_u32s * sizeof (b->key[0]);
296  u32x4 r = key_hash & h->bucket_mask.as_u32x4;
297 
298  /* Multiply with shifts and adds to get bucket byte offset. */
299 #define _(x) u32x4_ishift_left (r, (x) - 2)
300  if (n_bytes_per_bucket == (1 << 5))
301  r = _(5);
302  else if (n_bytes_per_bucket == ((1 << 5) + (1 << 4)))
303  r = _(5) + _(4);
304  else if (n_bytes_per_bucket == (1 << 6))
305  r = _(6);
306  else if (n_bytes_per_bucket == ((1 << 6) + (1 << 4)))
307  r = _(6) + _(4);
308  else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5)))
309  r = _(6) + _(5);
310  else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5) + (1 << 4)))
311  r = _(6) + _(5) + _(4);
312  else
313  ASSERT (0);
314 #undef _
315  return r;
316 }
317 
318 always_inline void
319 vhash_finalize_stage (vhash_t * h, u32 vector_index, u32 n_key_u32s)
320 {
321  i32 n_left;
322  u32x a, b, c;
323  vhash_hashed_key_t *hk =
324  vec_elt_at_index (h->hash_work_space, vector_index);
325 
326  if (n_key_u32s <= 3)
327  {
328  a = h->hash_seeds[0].as_u32x4;
329  b = h->hash_seeds[1].as_u32x4;
330  c = h->hash_seeds[2].as_u32x4;
331  n_left = n_key_u32s;
332  }
333  else
334  {
335  a = hk->hashed_key[0].as_u32x4;
336  b = hk->hashed_key[1].as_u32x4;
337  c = hk->hashed_key[2].as_u32x4;
338  n_left = 3;
339  }
340 
341  if (n_left > 0)
342  a += vhash_get_key_word_u32x (h, 0, vector_index);
343  if (n_left > 1)
344  b += vhash_get_key_word_u32x (h, 1, vector_index);
345  if (n_left > 2)
346  c += vhash_get_key_word_u32x (h, 2, vector_index);
347 
348  hash_v3_finalize_u32x (a, b, c);
349 
350  /* Only save away last 32 bits of hash code. */
351  hk->hashed_key[2].as_u32x4 = c;
352 
353  /* Prefetch buckets. This costs a bit for small tables but saves
354  big for large ones. */
355  {
356  vhash_search_bucket_t *b0, *b1, *b2, *b3;
357  u32x4_union_t kh;
358 
359  kh.as_u32x4 = vhash_get_4_search_bucket_byte_offsets (h, c, n_key_u32s);
360  hk->hashed_key[1].as_u32x4 = kh.as_u32x4;
361 
362  b0 = (void *) h->search_buckets + kh.as_u32[0];
363  b1 = (void *) h->search_buckets + kh.as_u32[1];
364  b2 = (void *) h->search_buckets + kh.as_u32[2];
365  b3 = (void *) h->search_buckets + kh.as_u32[3];
366 
367  CLIB_PREFETCH (b0, sizeof (b0[0]) + n_key_u32s * sizeof (b0->key[0]),
368  READ);
369  CLIB_PREFETCH (b1, sizeof (b1[0]) + n_key_u32s * sizeof (b1->key[0]),
370  READ);
371  CLIB_PREFETCH (b2, sizeof (b2[0]) + n_key_u32s * sizeof (b2->key[0]),
372  READ);
373  CLIB_PREFETCH (b3, sizeof (b3[0]) + n_key_u32s * sizeof (b3->key[0]),
374  READ);
375  }
376 }
377 
379 vhash_merge_results (u32x4 r)
380 {
381  r = r | u32x4_word_shift_right (r, 2);
382  r = r | u32x4_word_shift_right (r, 1);
383  return u32x4_get0 (r);
384 }
385 
386 /* Bucket is full if none of its 4 results are 0. */
388 vhash_search_bucket_is_full (u32x4 r)
389 {
390  return u32x4_zero_byte_mask (r) == 0;
391 }
392 
394 vhash_non_empty_result_index (u32x4 x)
395 {
396  u32 empty_mask = u32x4_zero_byte_mask (x);
397  ASSERT (empty_mask != 0xffff);
398  return min_log2 (0xffff & ~empty_mask) / 4;
399 }
400 
402 vhash_empty_result_index (u32x4 x)
403 {
404  u32 empty_mask = u32x4_zero_byte_mask (x);
405  ASSERT (empty_mask != 0);
406  return min_log2 (0xffff & empty_mask) / 4;
407 }
408 
410 vhash_bucket_compare (vhash_t * h,
411  u32x4_union_t * bucket, u32 key_word_index, u32 vi)
412 {
413  u32 k = vhash_get_key_word (h, key_word_index, vi);
414  u32x4 x = { k, k, k, k };
415  return (bucket[key_word_index].as_u32x4 == x);
416 }
417 
418 #define vhash_bucket_compare_4(h,wi,vi,b0,b1,b2,b3,cmp0,cmp1,cmp2,cmp3) \
419 do { \
420  u32x4 _k4 = vhash_get_key_word_u32x ((h), (wi), (vi)); \
421  u32x4 _k0 = u32x4_splat_word (_k4, 0); \
422  u32x4 _k1 = u32x4_splat_word (_k4, 1); \
423  u32x4 _k2 = u32x4_splat_word (_k4, 2); \
424  u32x4 _k3 = u32x4_splat_word (_k4, 3); \
425  \
426  cmp0 = (b0->key[wi].as_u32x4 == _k0); \
427  cmp1 = (b1->key[wi].as_u32x4 == _k1); \
428  cmp2 = (b2->key[wi].as_u32x4 == _k2); \
429  cmp3 = (b3->key[wi].as_u32x4 == _k3); \
430 } while (0)
431 
432 u32 vhash_get_overflow (vhash_t * h, u32 key_hash, u32 vi, u32 n_key_u32s);
433 
434 always_inline void
435 vhash_get_stage (vhash_t * h,
436  u32 vector_index,
437  u32 n_vectors,
438  vhash_result_function_t result_function,
439  void *state, u32 n_key_u32s)
440 {
441  u32 i, j;
442  vhash_hashed_key_t *hk =
443  vec_elt_at_index (h->hash_work_space, vector_index);
444  vhash_search_bucket_t *b;
445 
446  for (i = 0; i < n_vectors; i++)
447  {
448  u32 vi = vector_index * 4 + i;
449  u32 key_hash = hk->hashed_key[2].as_u32[i];
450  u32 result;
451  u32x4 r, r0;
452 
453  b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
454 
455  r = r0 = b->result.as_u32x4;
456  for (j = 0; j < n_key_u32s; j++)
457  r &= vhash_bucket_compare (h, &b->key[0], j, vi);
458 
459  /* At this point only one of 4 results should be non-zero.
460  So we can or all 4 together and get the valid result (if there is one). */
461  result = vhash_merge_results (r);
462 
463  if (!result && vhash_search_bucket_is_full (r0))
464  result = vhash_get_overflow (h, key_hash, vi, n_key_u32s);
465 
466  result_function (state, vi, result - 1, n_key_u32s);
467  }
468 }
469 
470 always_inline void
471 vhash_get_4_stage (vhash_t * h,
472  u32 vector_index,
473  vhash_4result_function_t result_function,
474  void *state, u32 n_key_u32s)
475 {
476  u32 i, vi;
477  vhash_hashed_key_t *hk =
478  vec_elt_at_index (h->hash_work_space, vector_index);
479  vhash_search_bucket_t *b0, *b1, *b2, *b3;
480  u32x4 r0, r1, r2, r3, r0_before, r1_before, r2_before, r3_before;
481  u32x4_union_t kh;
482 
483  kh.as_u32x4 = hk->hashed_key[1].as_u32x4;
484 
485  b0 = (void *) h->search_buckets + kh.as_u32[0];
486  b1 = (void *) h->search_buckets + kh.as_u32[1];
487  b2 = (void *) h->search_buckets + kh.as_u32[2];
488  b3 = (void *) h->search_buckets + kh.as_u32[3];
489 
490  r0 = r0_before = b0->result.as_u32x4;
491  r1 = r1_before = b1->result.as_u32x4;
492  r2 = r2_before = b2->result.as_u32x4;
493  r3 = r3_before = b3->result.as_u32x4;
494 
495  vi = vector_index * 4;
496 
497  for (i = 0; i < n_key_u32s; i++)
498  {
499  u32x4 c0, c1, c2, c3;
500  vhash_bucket_compare_4 (h, i, vector_index,
501  b0, b1, b2, b3, c0, c1, c2, c3);
502  r0 &= c0;
503  r1 &= c1;
504  r2 &= c2;
505  r3 &= c3;
506  }
507 
508  u32x4_transpose (r0, r1, r2, r3);
509 
510  /* Gather together 4 results. */
511  {
512  u32x4_union_t r;
513  u32x4 ones = { 1, 1, 1, 1 };
514  u32 not_found_mask;
515 
516  r.as_u32x4 = r0 | r1 | r2 | r3;
517  not_found_mask = u32x4_zero_byte_mask (r.as_u32x4);
518  not_found_mask &= ((vhash_search_bucket_is_full (r0_before) << (4 * 0))
519  | (vhash_search_bucket_is_full (r1_before) << (4 * 1))
520  | (vhash_search_bucket_is_full (r2_before) << (4 * 2))
521  | (vhash_search_bucket_is_full (r3_before) <<
522  (4 * 3)));
523  if (not_found_mask)
524  {
525  u32x4_union_t key_hash;
526 
527  key_hash.as_u32x4 =
528  hk->hashed_key[2].as_u32x4 & h->bucket_mask.as_u32x4;
529 
530  /* Slow path: one of the buckets may have been full and we need to search overflow. */
531  if (not_found_mask & (1 << (4 * 0)))
532  r.as_u32[0] = vhash_get_overflow (h, key_hash.as_u32[0],
533  vi + 0, n_key_u32s);
534  if (not_found_mask & (1 << (4 * 1)))
535  r.as_u32[1] = vhash_get_overflow (h, key_hash.as_u32[1],
536  vi + 1, n_key_u32s);
537  if (not_found_mask & (1 << (4 * 2)))
538  r.as_u32[2] = vhash_get_overflow (h, key_hash.as_u32[2],
539  vi + 2, n_key_u32s);
540  if (not_found_mask & (1 << (4 * 3)))
541  r.as_u32[3] = vhash_get_overflow (h, key_hash.as_u32[3],
542  vi + 3, n_key_u32s);
543  }
544 
545  result_function (state, vi, r.as_u32x4 - ones, n_key_u32s);
546  }
547 }
548 
549 u32
550 vhash_set_overflow (vhash_t * h,
551  u32 key_hash, u32 vi, u32 new_result, u32 n_key_u32s);
552 
553 always_inline void
554 vhash_set_stage (vhash_t * h,
555  u32 vector_index,
556  u32 n_vectors,
557  vhash_result_function_t result_function,
558  void *state, u32 n_key_u32s)
559 {
560  u32 i, j, n_new_elts = 0;
561  vhash_hashed_key_t *hk =
562  vec_elt_at_index (h->hash_work_space, vector_index);
563  vhash_search_bucket_t *b;
564 
565  for (i = 0; i < n_vectors; i++)
566  {
567  u32 vi = vector_index * 4 + i;
568  u32 key_hash = hk->hashed_key[2].as_u32[i];
569  u32 old_result, new_result;
570  u32 i_set;
571  u32x4 r, r0, cmp;
572 
573  b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
574 
575  cmp = vhash_bucket_compare (h, &b->key[0], 0, vi);
576  for (j = 1; j < n_key_u32s; j++)
577  cmp &= vhash_bucket_compare (h, &b->key[0], j, vi);
578 
579  r0 = b->result.as_u32x4;
580  r = r0 & cmp;
581 
582  /* At this point only one of 4 results should be non-zero.
583  So we can or all 4 together and get the valid result (if there is one). */
584  old_result = vhash_merge_results (r);
585 
586  if (!old_result && vhash_search_bucket_is_full (r0))
587  old_result = vhash_get_overflow (h, key_hash, vi, n_key_u32s);
588 
589  /* Get new result; possibly do something with old result. */
590  new_result = result_function (state, vi, old_result - 1, n_key_u32s);
591 
592  /* User cannot use ~0 as a hash result since a result of 0 is
593  used to mark unused bucket entries. */
594  ASSERT (new_result + 1 != 0);
595  new_result += 1;
596 
597  /* Set over-writes existing result. */
598  if (old_result)
599  {
600  i_set = vhash_non_empty_result_index (r);
601  b->result.as_u32[i_set] = new_result;
602  }
603  else
604  {
605  /* Set allocates new result. */
606  u32 valid_mask;
607 
608  valid_mask = (((b->result.as_u32[0] != 0) << 0)
609  | ((b->result.as_u32[1] != 0) << 1)
610  | ((b->result.as_u32[2] != 0) << 2)
611  | ((b->result.as_u32[3] != 0) << 3));
612 
613  /* Rotate 4 bit valid mask so that key_hash corresponds to bit 0. */
614  i_set = key_hash & 3;
615  valid_mask =
616  ((valid_mask >> i_set) | (valid_mask << (4 - i_set))) & 0xf;
617 
618  /* Insert into first empty position in bucket after key_hash. */
619  i_set = (i_set + h->find_first_zero_table[valid_mask]) & 3;
620 
621  if (valid_mask != 0xf)
622  {
623  n_new_elts += 1;
624 
625  b->result.as_u32[i_set] = new_result;
626 
627  /* Insert new key into search bucket. */
628  for (j = 0; j < n_key_u32s; j++)
629  b->key[j].as_u32[i_set] = vhash_get_key_word (h, j, vi);
630  }
631  else
632  vhash_set_overflow (h, key_hash, vi, new_result, n_key_u32s);
633  }
634  }
635 
636  h->n_elts += n_new_elts;
637 }
638 
639 u32 vhash_unset_overflow (vhash_t * h, u32 key_hash, u32 vi, u32 n_key_u32s);
640 
641 void
642 vhash_unset_refill_from_overflow (vhash_t * h,
643  vhash_search_bucket_t * b,
644  u32 key_hash, u32 n_key_u32s);
645 
646 /* Note: Eliot tried doing 4 unsets at once and could not get a speed up
647  and abandoned vhash_unset_4_stage. */
648 always_inline void
649 vhash_unset_stage (vhash_t * h,
650  u32 vector_index,
651  u32 n_vectors,
652  vhash_result_function_t result_function,
653  void *state, u32 n_key_u32s)
654 {
655  u32 i, j, n_elts_unset = 0;
656  vhash_hashed_key_t *hk =
657  vec_elt_at_index (h->hash_work_space, vector_index);
658  vhash_search_bucket_t *b;
659 
660  for (i = 0; i < n_vectors; i++)
661  {
662  u32 vi = vector_index * 4 + i;
663  u32 key_hash = hk->hashed_key[2].as_u32[i];
664  u32 old_result;
665  u32x4 cmp, r0;
666 
667  b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
668 
669  cmp = vhash_bucket_compare (h, &b->key[0], 0, vi);
670  for (j = 1; j < n_key_u32s; j++)
671  cmp &= vhash_bucket_compare (h, &b->key[0], j, vi);
672 
673  r0 = b->result.as_u32x4;
674 
675  /* At this point cmp is all ones where key matches and zero otherwise.
676  So, this will invalidate results for matching key and do nothing otherwise. */
677  b->result.as_u32x4 = r0 & ~cmp;
678 
679  old_result = vhash_merge_results (r0 & cmp);
680 
681  n_elts_unset += old_result != 0;
682 
683  if (vhash_search_bucket_is_full (r0))
684  {
685  if (old_result)
686  vhash_unset_refill_from_overflow (h, b, key_hash, n_key_u32s);
687  else
688  old_result = vhash_unset_overflow (h, key_hash, vi, n_key_u32s);
689  }
690 
691  result_function (state, vi, old_result - 1, n_key_u32s);
692  }
693  ASSERT (h->n_elts >= n_elts_unset);
694  h->n_elts -= n_elts_unset;
695 }
696 
697 void vhash_init (vhash_t * h, u32 log2_n_keys, u32 n_key_u32,
698  u32 * hash_seeds);
699 
700 void vhash_resize (vhash_t * old, u32 log2_n_keys);
701 
702 typedef struct
703 {
704  vhash_t *vhash;
705 
706  union
707  {
708  struct
709  {
710  u32 *keys;
711  u32 *results;
712  };
713 
714  /* Vector layout for get keys. */
715  struct
716  {
717  u32x4_union_t *get_keys;
718  u32x4_union_t *get_results;
719  };
720  };
721 
722  u32 n_vectors_div_4;
723  u32 n_vectors_mod_4;
724 
725  u32 n_key_u32;
726 
727  u32 n_keys;
728 } vhash_main_t;
729 
731 vhash_get_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32)
732 {
733  u32 i, n;
734 
735  i = vm->n_keys;
736  vm->n_keys = i + n_keys;
737 
738  n = (round_pow2 (vm->n_keys, 4) / 4) * n_key_u32;
739 
740  vec_validate_aligned (vm->get_keys, n - 1, sizeof (vm->get_keys[0]));
741  vec_validate_aligned (vm->get_results, n - 1, sizeof (vm->get_results[0]));
742 
743  return i;
744 }
745 
746 always_inline void
747 vhash_get_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32,
748  u32 value)
749 {
750  u32x4_union_t *k = vec_elt_at_index (vm->get_keys, (vi / 4) * n_key_u32);
751  ASSERT (wi < n_key_u32);
752  k[wi].as_u32[vi % 4] = value;
753 }
754 
756 vhash_get_fetch_result (vhash_main_t * vm, u32 vi)
757 {
758  u32x4_union_t *r = vec_elt_at_index (vm->get_results, vi / 4);
759  return r->as_u32[vi % 4];
760 }
761 
762 void vhash_main_get (vhash_main_t * vm);
763 
765 vhash_set_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32)
766 {
767  u32 i;
768 
769  i = vm->n_keys;
770  vm->n_keys = i + n_keys;
771 
772  vec_resize (vm->keys, n_keys * n_key_u32);
773  vec_resize (vm->results, n_keys);
774 
775  return i;
776 }
777 
778 always_inline void
779 vhash_set_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32,
780  u32 value)
781 {
782  u32 *k = vec_elt_at_index (vm->keys, vi * n_key_u32);
783  ASSERT (wi < n_key_u32);
784  k[wi] = value;
785 }
786 
787 always_inline void
788 vhash_set_set_result (vhash_main_t * vm, u32 vi, u32 result)
789 {
790  u32 *r = vec_elt_at_index (vm->results, vi);
791  r[0] = result;
792 }
793 
795 vhash_set_fetch_old_result (vhash_main_t * vm, u32 vi)
796 {
797  u32 *r = vec_elt_at_index (vm->results, vi);
798  return r[0];
799 }
800 
801 void vhash_main_set (vhash_main_t * vm);
802 
804 vhash_unset_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32)
805 {
806  return vhash_set_alloc_keys (vm, n_keys, n_key_u32);
807 }
808 
809 always_inline void
810 vhash_unset_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32,
811  u32 value)
812 {
813  vhash_set_set_key_word (vm, vi, wi, n_key_u32, value);
814 }
815 
816 always_inline void
817 vhash_unset_set_result (vhash_main_t * vm, u32 vi, u32 result)
818 {
819  vhash_set_set_result (vm, vi, result);
820 }
821 
823 vhash_unset_fetch_old_result (vhash_main_t * vm, u32 vi)
824 {
825  return vhash_set_fetch_old_result (vm, vi);
826 }
827 
828 void vhash_main_unset (vhash_main_t * vm);
829 
830 typedef struct
831 {
832  vhash_main_t new;
833 
834  vhash_t *old;
835 } vhash_resize_t;
836 
837 u32 vhash_resize_incremental (vhash_resize_t * vr, u32 vector_index,
838  u32 n_vectors);
839 
840 #endif /* CLIB_HAVE_VEC128 */
841 
842 #endif /* included_clib_vhash_h */
843 
844 /*
845  * fd.io coding-style-patch-verification: ON
846  *
847  * Local Variables:
848  * eval: (c-set-style "gnu")
849  * End:
850  */
a
Definition: bitmap.h:538
#define hash_v3_mix_u32x(a, b, c)
Definition: hash.h:644
int i
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:450
unsigned char u8
Definition: types.h:56
static uword min_log2(uword x)
Definition: clib.h:144
#define always_inline
Definition: clib.h:98
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define u32x4_word_shift_right(a, n)
Definition: vector_sse42.h:370
vhost_vring_state_t state
Definition: vhost_user.h:120
#define vec_resize(V, N)
Resize a vector (no header, unspecified alignment) Add N elements to end of given vector V...
Definition: vec.h:242
unsigned int u32
Definition: types.h:88
vslo vsro vslo static vsro u32 u32x4_get0(u32x4 x)
#define hash_v3_finalize_u32x(a, b, c)
Definition: hash.h:650
svmdb_client_t * c
#define CLIB_PREFETCH(addr, size, type)
Definition: cache.h:79
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:341
#define u32x4_transpose(x0, x1, x2, x3)
Definition: vector_funcs.h:292
static uword max_pow2(uword x)
Definition: clib.h:226
#define ARRAY_LEN(x)
Definition: clib.h:62
static uword round_pow2(uword x, uword pow2)
Definition: clib.h:241
signed int i32
Definition: types.h:77
#define ASSERT(truth)
#define clib_max(x, y)
Definition: clib.h:288
#define vec_elt(v, i)
Get vector value at index i.
u64 uword
Definition: types.h:112
unsigned long long u32x4
Definition: ixge.c:28
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:59
static u32 u32x4_zero_byte_mask(u32x4 x)