58 #ifndef INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
59 #define INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
62 unsigned char t[64 / 8 ];
63 unsigned int w[64 / 32];
64 unsigned short s[64 / 16];
65 unsigned char c[64 / 8];
73 static inline void renormalize(
unsigned char* X,
unsigned char threshold)
78 unsigned char min = X[0];
80 for (
i = 0;
i < NUMSTATES;
i++)
83 for (
i = 0;
i < NUMSTATES;
i++)
96 unsigned char* Branchtab)
98 int j, decision0, decision1;
99 unsigned char metric, m0, m1, m2, m3;
104 int PRECISIONSHIFT = 2;
107 for (j = 0; j < RATE; j++)
108 metric += (Branchtab[
i + j * NUMSTATES / 2] ^ syms[s * RATE + j]) >> METRICSHIFT;
109 metric = metric >> PRECISIONSHIFT;
111 unsigned char max = ((RATE * ((256 - 1) >> METRICSHIFT)) >> PRECISIONSHIFT);
114 m1 = X[
i + NUMSTATES / 2] + (max - metric);
115 m2 = X[
i] + (max - metric);
116 m3 = X[
i + NUMSTATES / 2] + metric;
118 decision0 = (
signed int)(m0 - m1) > 0;
119 decision1 = (
signed int)(m2 - m3) > 0;
121 Y[2 *
i] = decision0 ? m1 : m0;
122 Y[2 *
i + 1] = decision1 ? m3 : m2;
124 d->
w[
i / (
sizeof(
unsigned int) * 8 / 2) +
125 s * (
sizeof(
decision_t) /
sizeof(
unsigned int))] |=
126 (decision0 | decision1 << 1) << ((2 *
i) & (
sizeof(
unsigned int) * 8 - 1));
337 #include <emmintrin.h>
338 #include <mmintrin.h>
339 #include <pmmintrin.h>
341 #include <xmmintrin.h>
347 unsigned int framebits,
349 unsigned char* Branchtab)
352 for (i9 = 0; i9 < ((framebits + excess) >> 1); i9++) {
353 unsigned char a75, a81;
355 short int s20, s21, s26, s27;
356 unsigned char *a74, *a80, *b6;
357 short int *a110, *a111, *a91, *a93, *a94;
358 __m128i *a102, *a112, *a113, *a71, *a72, *a77, *a83, *a95, *a96, *a97, *a98, *a99;
359 __m128i a105, a106, a86, a87;
360 __m128i a100, a101, a103, a104, a107, a108, a109, a76, a78, a79, a82, a84, a85,
361 a88, a89, a90, d10, d11, d12, d9, m23, m24, m25, m26, m27, m28, m29, m30, s18,
362 s19, s22, s23, s24, s25, s28, s29, t13, t14, t15, t16, t17, t18;
370 a76 = _mm_set1_epi8(a75);
371 a77 = ((__m128i*)Branchtab);
373 a79 = _mm_xor_si128(a76, a78);
377 a82 = _mm_set1_epi8(a81);
380 a85 = _mm_xor_si128(a82, a84);
381 t13 = _mm_avg_epu8(a79, a85);
382 a86 = ((__m128i)t13);
383 a87 = _mm_srli_epi16(a86, 2);
384 a88 = ((__m128i)a87);
387 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
389 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
391 m23 = _mm_adds_epu8(s18, t14);
392 m24 = _mm_adds_epu8(s19, t15);
393 m25 = _mm_adds_epu8(s18, t15);
394 m26 = _mm_adds_epu8(s19, t14);
395 a89 = _mm_min_epu8(m24, m23);
396 d9 = _mm_cmpeq_epi8(a89, m24);
397 a90 = _mm_min_epu8(m26, m25);
398 d10 = _mm_cmpeq_epi8(a90, m26);
399 s20 = _mm_movemask_epi8(_mm_unpacklo_epi8(d9, d10));
400 a91 = ((
short int*)dec);
404 s21 = _mm_movemask_epi8(_mm_unpackhi_epi8(d9, d10));
407 s22 = _mm_unpacklo_epi8(a89, a90);
408 s23 = _mm_unpackhi_epi8(a89, a90);
419 a101 = _mm_xor_si128(a76, a100);
422 a104 = _mm_xor_si128(a82, a103);
423 t16 = _mm_avg_epu8(a101, a104);
424 a105 = ((__m128i)t16);
425 a106 = _mm_srli_epi16(a105, 2);
426 a107 = ((__m128i)a106);
429 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
431 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
433 m27 = _mm_adds_epu8(s24, t17);
434 m28 = _mm_adds_epu8(s25, t18);
435 m29 = _mm_adds_epu8(s24, t18);
436 m30 = _mm_adds_epu8(s25, t17);
437 a108 = _mm_min_epu8(m28, m27);
438 d11 = _mm_cmpeq_epi8(a108, m28);
439 a109 = _mm_min_epu8(m30, m29);
440 d12 = _mm_cmpeq_epi8(a109, m30);
441 s26 = _mm_movemask_epi8(_mm_unpacklo_epi8(d11, d12));
444 s27 = _mm_movemask_epi8(_mm_unpackhi_epi8(d11, d12));
447 s28 = _mm_unpacklo_epi8(a108, a109);
448 s29 = _mm_unpackhi_epi8(a108, a109);
453 if ((((
unsigned char*)Y)[0] > 210)) {
455 m5 = ((__m128i*)Y)[0];
456 m5 = _mm_min_epu8(m5, ((__m128i*)Y)[1]);
457 m5 = _mm_min_epu8(m5, ((__m128i*)Y)[2]);
458 m5 = _mm_min_epu8(m5, ((__m128i*)Y)[3]);
460 m7 = _mm_min_epu8(_mm_srli_si128(m5, 8), m5);
462 ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m7, 32)), ((__m128i)m7)));
464 ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m7, 16)), ((__m128i)m7)));
465 m7 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m7, 8)), ((__m128i)m7)));
466 m7 = _mm_unpacklo_epi8(m7, m7);
467 m7 = _mm_shufflelo_epi16(m7, _MM_SHUFFLE(0, 0, 0, 0));
468 m6 = _mm_unpacklo_epi64(m7, m7);
469 ((__m128i*)Y)[0] = _mm_subs_epu8(((__m128i*)Y)[0], m6);
470 ((__m128i*)Y)[1] = _mm_subs_epu8(((__m128i*)Y)[1], m6);
471 ((__m128i*)Y)[2] = _mm_subs_epu8(((__m128i*)Y)[2], m6);
472 ((__m128i*)Y)[3] = _mm_subs_epu8(((__m128i*)Y)[3], m6);
474 unsigned char a188, a194;
476 short int s48, s49, s54, s55;
477 unsigned char *a187, *a193, *b15;
478 short int *a204, *a206, *a207, *a223, *a224, *b16;
479 __m128i *a184, *a185, *a190, *a196, *a208, *a209, *a210, *a211, *a212, *a215,
481 __m128i a199, a200, a218, a219;
482 __m128i a189, a191, a192, a195, a197, a198, a201, a202, a203, a213, a214, a216,
483 a217, a220, a221, a222, d17, d18, d19, d20, m39, m40, m41, m42, m43, m44, m45,
484 m46, s46, s47, s50, s51, s52, s53, s56, s57, t25, t26, t27, t28, t29, t30;
485 a184 = ((__m128i*)Y);
493 a189 = _mm_set1_epi8(a188);
494 a190 = ((__m128i*)Branchtab);
496 a192 = _mm_xor_si128(a189, a191);
499 a195 = _mm_set1_epi8(a194);
502 a198 = _mm_xor_si128(a195, a197);
503 t25 = _mm_avg_epu8(a192, a198);
504 a199 = ((__m128i)t25);
505 a200 = _mm_srli_epi16(a199, 2);
506 a201 = ((__m128i)a200);
509 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
511 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
513 m39 = _mm_adds_epu8(s46, t26);
514 m40 = _mm_adds_epu8(s47, t27);
515 m41 = _mm_adds_epu8(s46, t27);
516 m42 = _mm_adds_epu8(s47, t26);
517 a202 = _mm_min_epu8(m40, m39);
518 d17 = _mm_cmpeq_epi8(a202, m40);
519 a203 = _mm_min_epu8(m42, m41);
520 d18 = _mm_cmpeq_epi8(a203, m42);
521 s48 = _mm_movemask_epi8(_mm_unpacklo_epi8(d17, d18));
522 a204 = ((
short int*)dec);
527 s49 = _mm_movemask_epi8(_mm_unpackhi_epi8(d17, d18));
530 s50 = _mm_unpacklo_epi8(a202, a203);
531 s51 = _mm_unpackhi_epi8(a202, a203);
532 a208 = ((__m128i*)X);
542 a214 = _mm_xor_si128(a189, a213);
545 a217 = _mm_xor_si128(a195, a216);
546 t28 = _mm_avg_epu8(a214, a217);
547 a218 = ((__m128i)t28);
548 a219 = _mm_srli_epi16(a218, 2);
549 a220 = ((__m128i)a219);
552 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
554 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
556 m43 = _mm_adds_epu8(s52, t29);
557 m44 = _mm_adds_epu8(s53, t30);
558 m45 = _mm_adds_epu8(s52, t30);
559 m46 = _mm_adds_epu8(s53, t29);
560 a221 = _mm_min_epu8(m44, m43);
561 d19 = _mm_cmpeq_epi8(a221, m44);
562 a222 = _mm_min_epu8(m46, m45);
563 d20 = _mm_cmpeq_epi8(a222, m46);
564 s54 = _mm_movemask_epi8(_mm_unpacklo_epi8(d19, d20));
567 s55 = _mm_movemask_epi8(_mm_unpackhi_epi8(d19, d20));
570 s56 = _mm_unpacklo_epi8(a221, a222);
571 s57 = _mm_unpackhi_epi8(a221, a222);
576 if ((((
unsigned char*)X)[0] > 210)) {
578 m12 = ((__m128i*)X)[0];
579 m12 = _mm_min_epu8(m12, ((__m128i*)X)[1]);
580 m12 = _mm_min_epu8(m12, ((__m128i*)X)[2]);
581 m12 = _mm_min_epu8(m12, ((__m128i*)X)[3]);
583 m14 = _mm_min_epu8(_mm_srli_si128(m12, 8), m12);
584 m14 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m14, 32)),
586 m14 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m14, 16)),
588 m14 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m14, 8)),
590 m14 = _mm_unpacklo_epi8(m14, m14);
591 m14 = _mm_shufflelo_epi16(m14, _MM_SHUFFLE(0, 0, 0, 0));
592 m13 = _mm_unpacklo_epi64(m14, m14);
593 ((__m128i*)X)[0] = _mm_subs_epu8(((__m128i*)X)[0], m13);
594 ((__m128i*)X)[1] = _mm_subs_epu8(((__m128i*)X)[1], m13);
595 ((__m128i*)X)[2] = _mm_subs_epu8(((__m128i*)X)[2], m13);
596 ((__m128i*)X)[3] = _mm_subs_epu8(((__m128i*)X)[3], m13);
609 for (j = 0; j < (framebits + excess) % 2; ++j) {
611 for (
i = 0;
i < 64 / 2;
i++) {
613 (((framebits + excess) >> 1) << 1) + j,
642 unsigned int framebits,
644 unsigned char* Branchtab)
646 int nbits = framebits + excess;
648 int RENORMALIZE_THRESHOLD = 210;
651 for (s = 0; s < nbits; s++) {
653 for (
i = 0;
i < NUMSTATES / 2;
i++) {
662 Y = (
unsigned char*)tmp;
Definition: volk_8u_x4_conv_k7_r2_8u.h:61
unsigned int w[64/32]
Definition: volk_8u_x4_conv_k7_r2_8u.h:63
static void BFLY(int i, int s, unsigned char *syms, unsigned char *Y, unsigned char *X, decision_t *d, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:90
static void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:343
static void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:638
static void renormalize(unsigned char *X, unsigned char threshold)
Definition: volk_8u_x4_conv_k7_r2_8u.h:73
for i
Definition: volk_config_fixed.tmpl.h:25