42 "pxor %%mm0, %%mm0 \n"
43 "pxor %%mm7, %%mm7 \n"
47 "movq (%0, %3), %%mm3 \n"
48 "movq (%1, %3), %%mm4 \n"
53 "movq %%mm1, %%mm5 \n"
54 "movq %%mm3, %%mm6 \n"
55 "psubusb %%mm2, %%mm1 \n"
56 "psubusb %%mm4, %%mm3 \n"
57 "psubusb %%mm5, %%mm2 \n"
58 "psubusb %%mm6, %%mm4 \n"
64 "movq %%mm2, %%mm1 \n"
65 "movq %%mm4, %%mm3 \n"
67 "punpckhbw %%mm0, %%mm2 \n"
68 "punpckhbw %%mm0, %%mm4 \n"
69 "punpcklbw %%mm0, %%mm1 \n"
70 "punpcklbw %%mm0, %%mm3 \n"
72 "pmaddwd %%mm2, %%mm2 \n"
73 "pmaddwd %%mm4, %%mm4 \n"
74 "pmaddwd %%mm1, %%mm1 \n"
75 "pmaddwd %%mm3, %%mm3 \n"
77 "lea (%0, %3, 2), %0 \n"
78 "lea (%1, %3, 2), %1 \n"
80 "paddd %%mm2, %%mm1 \n"
81 "paddd %%mm4, %%mm3 \n"
82 "paddd %%mm1, %%mm7 \n"
83 "paddd %%mm3, %%mm7 \n"
88 "movq %%mm7, %%mm1 \n"
90 "paddd %%mm7, %%mm1 \n"
92 :
"+r" (pix1),
"+r" (pix2),
"=r" (tmp)
93 :
"r" ((
x86_reg) line_size),
"m" (h)
100 int line_size,
int h)
106 "pxor %%mm0, %%mm0\n"
107 "pxor %%mm7, %%mm7\n"
111 "movq 8(%0), %%mm3\n"
112 "movq 8(%1), %%mm4\n"
117 "movq %%mm1, %%mm5\n"
118 "movq %%mm3, %%mm6\n"
119 "psubusb %%mm2, %%mm1\n"
120 "psubusb %%mm4, %%mm3\n"
121 "psubusb %%mm5, %%mm2\n"
122 "psubusb %%mm6, %%mm4\n"
128 "movq %%mm2, %%mm1\n"
129 "movq %%mm4, %%mm3\n"
131 "punpckhbw %%mm0, %%mm2\n"
132 "punpckhbw %%mm0, %%mm4\n"
133 "punpcklbw %%mm0, %%mm1\n"
134 "punpcklbw %%mm0, %%mm3\n"
136 "pmaddwd %%mm2, %%mm2\n"
137 "pmaddwd %%mm4, %%mm4\n"
138 "pmaddwd %%mm1, %%mm1\n"
139 "pmaddwd %%mm3, %%mm3\n"
144 "paddd %%mm2, %%mm1\n"
145 "paddd %%mm4, %%mm3\n"
146 "paddd %%mm1, %%mm7\n"
147 "paddd %%mm3, %%mm7\n"
152 "movq %%mm7, %%mm1\n"
154 "paddd %%mm7, %%mm1\n"
156 :
"+r" (pix1),
"+r" (pix2),
"=r" (tmp)
157 :
"r" ((
x86_reg) line_size),
"m" (h)
163 static int hf_noise8_mmx(
uint8_t *pix1,
int line_size,
int h)
169 "pxor %%mm7, %%mm7\n"
170 "pxor %%mm6, %%mm6\n"
173 "movq %%mm0, %%mm1\n"
177 "movq %%mm0, %%mm2\n"
178 "movq %%mm1, %%mm3\n"
179 "punpcklbw %%mm7, %%mm0\n"
180 "punpcklbw %%mm7, %%mm1\n"
181 "punpckhbw %%mm7, %%mm2\n"
182 "punpckhbw %%mm7, %%mm3\n"
183 "psubw %%mm1, %%mm0\n"
184 "psubw %%mm3, %%mm2\n"
189 "movq %%mm4, %%mm1\n"
193 "movq %%mm4, %%mm5\n"
194 "movq %%mm1, %%mm3\n"
195 "punpcklbw %%mm7, %%mm4\n"
196 "punpcklbw %%mm7, %%mm1\n"
197 "punpckhbw %%mm7, %%mm5\n"
198 "punpckhbw %%mm7, %%mm3\n"
199 "psubw %%mm1, %%mm4\n"
200 "psubw %%mm3, %%mm5\n"
201 "psubw %%mm4, %%mm0\n"
202 "psubw %%mm5, %%mm2\n"
203 "pxor %%mm3, %%mm3\n"
204 "pxor %%mm1, %%mm1\n"
205 "pcmpgtw %%mm0, %%mm3\n\t"
206 "pcmpgtw %%mm2, %%mm1\n\t"
207 "pxor %%mm3, %%mm0\n"
208 "pxor %%mm1, %%mm2\n"
209 "psubw %%mm3, %%mm0\n"
210 "psubw %%mm1, %%mm2\n"
211 "paddw %%mm0, %%mm2\n"
212 "paddw %%mm2, %%mm6\n"
218 "movq %%mm0, %%mm1\n"
222 "movq %%mm0, %%mm2\n"
223 "movq %%mm1, %%mm3\n"
224 "punpcklbw %%mm7, %%mm0\n"
225 "punpcklbw %%mm7, %%mm1\n"
226 "punpckhbw %%mm7, %%mm2\n"
227 "punpckhbw %%mm7, %%mm3\n"
228 "psubw %%mm1, %%mm0\n"
229 "psubw %%mm3, %%mm2\n"
230 "psubw %%mm0, %%mm4\n"
231 "psubw %%mm2, %%mm5\n"
232 "pxor %%mm3, %%mm3\n"
233 "pxor %%mm1, %%mm1\n"
234 "pcmpgtw %%mm4, %%mm3\n\t"
235 "pcmpgtw %%mm5, %%mm1\n\t"
236 "pxor %%mm3, %%mm4\n"
237 "pxor %%mm1, %%mm5\n"
238 "psubw %%mm3, %%mm4\n"
239 "psubw %%mm1, %%mm5\n"
240 "paddw %%mm4, %%mm5\n"
241 "paddw %%mm5, %%mm6\n"
246 "movq %%mm4, %%mm1\n"
250 "movq %%mm4, %%mm5\n"
251 "movq %%mm1, %%mm3\n"
252 "punpcklbw %%mm7, %%mm4\n"
253 "punpcklbw %%mm7, %%mm1\n"
254 "punpckhbw %%mm7, %%mm5\n"
255 "punpckhbw %%mm7, %%mm3\n"
256 "psubw %%mm1, %%mm4\n"
257 "psubw %%mm3, %%mm5\n"
258 "psubw %%mm4, %%mm0\n"
259 "psubw %%mm5, %%mm2\n"
260 "pxor %%mm3, %%mm3\n"
261 "pxor %%mm1, %%mm1\n"
262 "pcmpgtw %%mm0, %%mm3\n\t"
263 "pcmpgtw %%mm2, %%mm1\n\t"
264 "pxor %%mm3, %%mm0\n"
265 "pxor %%mm1, %%mm2\n"
266 "psubw %%mm3, %%mm0\n"
267 "psubw %%mm1, %%mm2\n"
268 "paddw %%mm0, %%mm2\n"
269 "paddw %%mm2, %%mm6\n"
275 "movq %%mm6, %%mm0\n"
276 "punpcklwd %%mm7, %%mm0\n"
277 "punpckhwd %%mm7, %%mm6\n"
278 "paddd %%mm0, %%mm6\n"
280 "movq %%mm6, %%mm0\n"
282 "paddd %%mm6, %%mm0\n"
284 :
"+r" (pix1),
"=r" (tmp)
285 :
"r" ((
x86_reg) line_size),
"g" (h - 2)
291 static int hf_noise16_mmx(
uint8_t *pix1,
int line_size,
int h)
298 "pxor %%mm7, %%mm7\n"
299 "pxor %%mm6, %%mm6\n"
302 "movq 1(%0), %%mm1\n"
303 "movq %%mm0, %%mm2\n"
304 "movq %%mm1, %%mm3\n"
305 "punpcklbw %%mm7, %%mm0\n"
306 "punpcklbw %%mm7, %%mm1\n"
307 "punpckhbw %%mm7, %%mm2\n"
308 "punpckhbw %%mm7, %%mm3\n"
309 "psubw %%mm1, %%mm0\n"
310 "psubw %%mm3, %%mm2\n"
315 "movq 1(%0), %%mm1\n"
316 "movq %%mm4, %%mm5\n"
317 "movq %%mm1, %%mm3\n"
318 "punpcklbw %%mm7, %%mm4\n"
319 "punpcklbw %%mm7, %%mm1\n"
320 "punpckhbw %%mm7, %%mm5\n"
321 "punpckhbw %%mm7, %%mm3\n"
322 "psubw %%mm1, %%mm4\n"
323 "psubw %%mm3, %%mm5\n"
324 "psubw %%mm4, %%mm0\n"
325 "psubw %%mm5, %%mm2\n"
326 "pxor %%mm3, %%mm3\n"
327 "pxor %%mm1, %%mm1\n"
328 "pcmpgtw %%mm0, %%mm3\n\t"
329 "pcmpgtw %%mm2, %%mm1\n\t"
330 "pxor %%mm3, %%mm0\n"
331 "pxor %%mm1, %%mm2\n"
332 "psubw %%mm3, %%mm0\n"
333 "psubw %%mm1, %%mm2\n"
334 "paddw %%mm0, %%mm2\n"
335 "paddw %%mm2, %%mm6\n"
341 "movq 1(%0), %%mm1\n"
342 "movq %%mm0, %%mm2\n"
343 "movq %%mm1, %%mm3\n"
344 "punpcklbw %%mm7, %%mm0\n"
345 "punpcklbw %%mm7, %%mm1\n"
346 "punpckhbw %%mm7, %%mm2\n"
347 "punpckhbw %%mm7, %%mm3\n"
348 "psubw %%mm1, %%mm0\n"
349 "psubw %%mm3, %%mm2\n"
350 "psubw %%mm0, %%mm4\n"
351 "psubw %%mm2, %%mm5\n"
352 "pxor %%mm3, %%mm3\n"
353 "pxor %%mm1, %%mm1\n"
354 "pcmpgtw %%mm4, %%mm3\n\t"
355 "pcmpgtw %%mm5, %%mm1\n\t"
356 "pxor %%mm3, %%mm4\n"
357 "pxor %%mm1, %%mm5\n"
358 "psubw %%mm3, %%mm4\n"
359 "psubw %%mm1, %%mm5\n"
360 "paddw %%mm4, %%mm5\n"
361 "paddw %%mm5, %%mm6\n"
366 "movq 1(%0), %%mm1\n"
367 "movq %%mm4, %%mm5\n"
368 "movq %%mm1, %%mm3\n"
369 "punpcklbw %%mm7, %%mm4\n"
370 "punpcklbw %%mm7, %%mm1\n"
371 "punpckhbw %%mm7, %%mm5\n"
372 "punpckhbw %%mm7, %%mm3\n"
373 "psubw %%mm1, %%mm4\n"
374 "psubw %%mm3, %%mm5\n"
375 "psubw %%mm4, %%mm0\n"
376 "psubw %%mm5, %%mm2\n"
377 "pxor %%mm3, %%mm3\n"
378 "pxor %%mm1, %%mm1\n"
379 "pcmpgtw %%mm0, %%mm3\n\t"
380 "pcmpgtw %%mm2, %%mm1\n\t"
381 "pxor %%mm3, %%mm0\n"
382 "pxor %%mm1, %%mm2\n"
383 "psubw %%mm3, %%mm0\n"
384 "psubw %%mm1, %%mm2\n"
385 "paddw %%mm0, %%mm2\n"
386 "paddw %%mm2, %%mm6\n"
392 "movq %%mm6, %%mm0\n"
393 "punpcklwd %%mm7, %%mm0\n"
394 "punpckhwd %%mm7, %%mm6\n"
395 "paddd %%mm0, %%mm6\n"
397 "movq %%mm6, %%mm0\n"
399 "paddd %%mm6, %%mm0\n"
401 :
"+r" (pix1),
"=r" (tmp)
402 :
"r" ((
x86_reg) line_size),
"g" (h - 2)
405 return tmp + hf_noise8_mmx(pix + 8, line_size, h);
409 int line_size,
int h)
414 score1 = c->
mecc.
sse[0](c, pix1, pix2, line_size, h);
416 score1 = sse16_mmx(c, pix1, pix2, line_size, h);
417 score2 = hf_noise16_mmx(pix1, line_size, h) -
418 hf_noise16_mmx(pix2, line_size, h);
423 return score1 +
FFABS(score2) * 8;
427 int line_size,
int h)
429 int score1 = sse8_mmx(c, pix1, pix2, line_size, h);
430 int score2 = hf_noise8_mmx(pix1, line_size, h) -
431 hf_noise8_mmx(pix2, line_size, h);
436 return score1 +
FFABS(score2) * 8;
440 int line_size,
int h)
444 assert((((
int) pix) & 7) == 0);
445 assert((line_size & 7) == 0);
447 #define SUM(in0, in1, out0, out1) \
448 "movq (%0), %%mm2\n" \
449 "movq 8(%0), %%mm3\n" \
451 "movq %%mm2, " #out0 "\n" \
452 "movq %%mm3, " #out1 "\n" \
453 "psubusb " #in0 ", %%mm2\n" \
454 "psubusb " #in1 ", %%mm3\n" \
455 "psubusb " #out0 ", " #in0 "\n" \
456 "psubusb " #out1 ", " #in1 "\n" \
457 "por %%mm2, " #in0 "\n" \
458 "por %%mm3, " #in1 "\n" \
459 "movq " #in0 ", %%mm2\n" \
460 "movq " #in1 ", %%mm3\n" \
461 "punpcklbw %%mm7, " #in0 "\n" \
462 "punpcklbw %%mm7, " #in1 "\n" \
463 "punpckhbw %%mm7, %%mm2\n" \
464 "punpckhbw %%mm7, %%mm3\n" \
465 "paddw " #in1 ", " #in0 "\n" \
466 "paddw %%mm3, %%mm2\n" \
467 "paddw %%mm2, " #in0 "\n" \
468 "paddw " #in0 ", %%mm6\n"
473 "pxor %%mm6, %%mm6\n"
474 "pxor %%mm7, %%mm7\n"
476 "movq 8(%0), %%mm1\n"
481 SUM(%%mm4, %%mm5, %%mm0, %%mm1)
483 SUM(%%mm0, %%mm1, %%mm4, %%mm5)
488 "movq %%mm6, %%mm0\n"
490 "paddw %%mm6, %%mm0\n"
491 "movq %%mm0, %%mm6\n"
493 "paddw %%mm6, %%mm0\n"
495 :
"+r" (pix),
"=r" (tmp)
496 :
"r" ((
x86_reg) line_size),
"m" (h)
504 int line_size,
int h)
508 assert((((
int) pix) & 7) == 0);
509 assert((line_size & 7) == 0);
511 #define SUM(in0, in1, out0, out1) \
512 "movq (%0), " #out0 "\n" \
513 "movq 8(%0), " #out1 "\n" \
515 "psadbw " #out0 ", " #in0 "\n" \
516 "psadbw " #out1 ", " #in1 "\n" \
517 "paddw " #in1 ", " #in0 "\n" \
518 "paddw " #in0 ", %%mm6\n"
522 "pxor %%mm6, %%mm6\n"
523 "pxor %%mm7, %%mm7\n"
525 "movq 8(%0), %%mm1\n"
530 SUM(%%mm4, %%mm5, %%mm0, %%mm1)
532 SUM(%%mm0, %%mm1, %%mm4, %%mm5)
538 :
"+r" (pix),
"=r" (tmp)
539 :
"r" ((
x86_reg) line_size),
"m" (h)
547 int line_size,
int h)
551 assert((((
int) pix1) & 7) == 0);
552 assert((((
int) pix2) & 7) == 0);
553 assert((line_size & 7) == 0);
555 #define SUM(in0, in1, out0, out1) \
556 "movq (%0), %%mm2\n" \
557 "movq (%1), " #out0 "\n" \
558 "movq 8(%0), %%mm3\n" \
559 "movq 8(%1), " #out1 "\n" \
562 "psubb " #out0 ", %%mm2\n" \
563 "psubb " #out1 ", %%mm3\n" \
564 "pxor %%mm7, %%mm2\n" \
565 "pxor %%mm7, %%mm3\n" \
566 "movq %%mm2, " #out0 "\n" \
567 "movq %%mm3, " #out1 "\n" \
568 "psubusb " #in0 ", %%mm2\n" \
569 "psubusb " #in1 ", %%mm3\n" \
570 "psubusb " #out0 ", " #in0 "\n" \
571 "psubusb " #out1 ", " #in1 "\n" \
572 "por %%mm2, " #in0 "\n" \
573 "por %%mm3, " #in1 "\n" \
574 "movq " #in0 ", %%mm2\n" \
575 "movq " #in1 ", %%mm3\n" \
576 "punpcklbw %%mm7, " #in0 "\n" \
577 "punpcklbw %%mm7, " #in1 "\n" \
578 "punpckhbw %%mm7, %%mm2\n" \
579 "punpckhbw %%mm7, %%mm3\n" \
580 "paddw " #in1 ", " #in0 "\n" \
581 "paddw %%mm3, %%mm2\n" \
582 "paddw %%mm2, " #in0 "\n" \
583 "paddw " #in0 ", %%mm6\n"
588 "pxor %%mm6, %%mm6\n"
589 "pcmpeqw %%mm7, %%mm7\n"
591 "packsswb %%mm7, %%mm7\n"
594 "movq 8(%0), %%mm1\n"
595 "movq 8(%1), %%mm3\n"
598 "psubb %%mm2, %%mm0\n"
599 "psubb %%mm3, %%mm1\n"
600 "pxor %%mm7, %%mm0\n"
601 "pxor %%mm7, %%mm1\n"
605 SUM(%%mm4, %%mm5, %%mm0, %%mm1)
607 SUM(%%mm0, %%mm1, %%mm4, %%mm5)
612 "movq %%mm6, %%mm0\n"
614 "paddw %%mm6, %%mm0\n"
615 "movq %%mm0, %%mm6\n"
617 "paddw %%mm6, %%mm0\n"
619 :
"+r" (pix1),
"+r" (pix2),
"=r" (tmp)
620 :
"r" ((
x86_reg) line_size),
"m" (h)
628 int line_size,
int h)
632 assert((((
int) pix1) & 7) == 0);
633 assert((((
int) pix2) & 7) == 0);
634 assert((line_size & 7) == 0);
636 #define SUM(in0, in1, out0, out1) \
637 "movq (%0), " #out0 "\n" \
638 "movq (%1), %%mm2\n" \
639 "movq 8(%0), " #out1 "\n" \
640 "movq 8(%1), %%mm3\n" \
643 "psubb %%mm2, " #out0 "\n" \
644 "psubb %%mm3, " #out1 "\n" \
645 "pxor %%mm7, " #out0 "\n" \
646 "pxor %%mm7, " #out1 "\n" \
647 "psadbw " #out0 ", " #in0 "\n" \
648 "psadbw " #out1 ", " #in1 "\n" \
649 "paddw " #in1 ", " #in0 "\n" \
650 "paddw " #in0 ", %%mm6\n "
654 "pxor %%mm6, %%mm6\n"
655 "pcmpeqw %%mm7, %%mm7\n"
657 "packsswb %%mm7, %%mm7\n"
660 "movq 8(%0), %%mm1\n"
661 "movq 8(%1), %%mm3\n"
664 "psubb %%mm2, %%mm0\n"
665 "psubb %%mm3, %%mm1\n"
666 "pxor %%mm7, %%mm0\n"
667 "pxor %%mm7, %%mm1\n"
671 SUM(%%mm4, %%mm5, %%mm0, %%mm1)
673 SUM(%%mm0, %%mm1, %%mm4, %%mm5)
679 :
"+r" (pix1),
"+r" (pix2),
"=r" (tmp)
680 :
"r" ((
x86_reg) line_size),
"m" (h)
687 #define MMABS_MMX(a,z) \
688 "pxor " #z ", " #z " \n\t" \
689 "pcmpgtw " #a ", " #z " \n\t" \
690 "pxor " #z ", " #a " \n\t" \
691 "psubw " #z ", " #a " \n\t"
693 #define MMABS_MMXEXT(a, z) \
694 "pxor " #z ", " #z " \n\t" \
695 "psubw " #a ", " #z " \n\t" \
696 "pmaxsw " #z ", " #a " \n\t"
698 #define MMABS_SSSE3(a,z) \
699 "pabsw " #a ", " #a " \n\t"
701 #define MMABS_SUM(a,z, sum) \
703 "paddusw " #a ", " #sum " \n\t"
709 #define HSUM_MMX(a, t, dst) \
710 "movq " #a ", " #t " \n\t" \
711 "psrlq $32, " #a " \n\t" \
712 "paddusw " #t ", " #a " \n\t" \
713 "movq " #a ", " #t " \n\t" \
714 "psrlq $16, " #a " \n\t" \
715 "paddusw " #t ", " #a " \n\t" \
716 "movd " #a ", " #dst " \n\t" \
718 #define HSUM_MMXEXT(a, t, dst) \
719 "pshufw $0x0E, " #a ", " #t " \n\t" \
720 "paddusw " #t ", " #a " \n\t" \
721 "pshufw $0x01, " #a ", " #t " \n\t" \
722 "paddusw " #t ", " #a " \n\t" \
723 "movd " #a ", " #dst " \n\t" \
725 #define HSUM_SSE2(a, t, dst) \
726 "movhlps " #a ", " #t " \n\t" \
727 "paddusw " #t ", " #a " \n\t" \
728 "pshuflw $0x0E, " #a ", " #t " \n\t" \
729 "paddusw " #t ", " #a " \n\t" \
730 "pshuflw $0x01, " #a ", " #t " \n\t" \
731 "paddusw " #t ", " #a " \n\t" \
732 "movd " #a ", " #dst " \n\t" \
734 #define DCT_SAD4(m, mm, o) \
735 "mov"#m" "#o" + 0(%1), " #mm "2 \n\t" \
736 "mov"#m" "#o" + 16(%1), " #mm "3 \n\t" \
737 "mov"#m" "#o" + 32(%1), " #mm "4 \n\t" \
738 "mov"#m" "#o" + 48(%1), " #mm "5 \n\t" \
739 MMABS_SUM(mm ## 2, mm ## 6, mm ## 0) \
740 MMABS_SUM(mm ## 3, mm ## 7, mm ## 1) \
741 MMABS_SUM(mm ## 4, mm ## 6, mm ## 0) \
742 MMABS_SUM(mm ## 5, mm ## 7, mm ## 1) \
744 #define DCT_SAD_MMX \
745 "pxor %%mm0, %%mm0 \n\t" \
746 "pxor %%mm1, %%mm1 \n\t" \
747 DCT_SAD4(q, %%mm, 0) \
748 DCT_SAD4(q, %%mm, 8) \
749 DCT_SAD4(q, %%mm, 64) \
750 DCT_SAD4(q, %%mm, 72) \
751 "paddusw %%mm1, %%mm0 \n\t" \
752 HSUM(%%mm0, %%mm1, %0)
754 #define DCT_SAD_SSE2 \
755 "pxor %%xmm0, %%xmm0 \n\t" \
756 "pxor %%xmm1, %%xmm1 \n\t" \
757 DCT_SAD4(dqa, %%xmm, 0) \
758 DCT_SAD4(dqa, %%xmm, 64) \
759 "paddusw %%xmm1, %%xmm0 \n\t" \
760 HSUM(%%xmm0, %%xmm1, %0)
762 #define DCT_SAD_FUNC(cpu) \
763 static int sum_abs_dctelem_ ## cpu(int16_t *block) \
770 return sum & 0xFFFF; \
773 #define DCT_SAD DCT_SAD_MMX
774 #define HSUM(a, t, dst) HSUM_MMX(a, t, dst)
775 #define MMABS(a, z) MMABS_MMX(a, z)
780 #define HSUM(a, t, dst) HSUM_MMXEXT(a, t, dst)
781 #define MMABS(a, z) MMABS_MMXEXT(a, z)
786 #define DCT_SAD DCT_SAD_SSE2
787 #define HSUM(a, t, dst) HSUM_SSE2(a, t, dst)
791 #if HAVE_SSSE3_INLINE
792 #define MMABS(a, z) MMABS_SSSE3(a, z)
801 0x0000000000000000ULL,
802 0x0001000100010001ULL,
803 0x0002000200020002ULL,
814 "movq (%1, %%"REG_a
"), %%mm0 \n\t"
815 "movq (%2, %%"REG_a
"), %%mm2 \n\t"
816 "movq (%2, %%"REG_a
"), %%mm4 \n\t"
817 "add %3, %%"REG_a
" \n\t"
818 "psubusb %%mm0, %%mm2 \n\t"
819 "psubusb %%mm4, %%mm0 \n\t"
820 "movq (%1, %%"REG_a
"), %%mm1 \n\t"
821 "movq (%2, %%"REG_a
"), %%mm3 \n\t"
822 "movq (%2, %%"REG_a
"), %%mm5 \n\t"
823 "psubusb %%mm1, %%mm3 \n\t"
824 "psubusb %%mm5, %%mm1 \n\t"
825 "por %%mm2, %%mm0 \n\t"
826 "por %%mm1, %%mm3 \n\t"
827 "movq %%mm0, %%mm1 \n\t"
828 "movq %%mm3, %%mm2 \n\t"
829 "punpcklbw %%mm7, %%mm0 \n\t"
830 "punpckhbw %%mm7, %%mm1 \n\t"
831 "punpcklbw %%mm7, %%mm3 \n\t"
832 "punpckhbw %%mm7, %%mm2 \n\t"
833 "paddw %%mm1, %%mm0 \n\t"
834 "paddw %%mm3, %%mm2 \n\t"
835 "paddw %%mm2, %%mm0 \n\t"
836 "paddw %%mm0, %%mm6 \n\t"
837 "add %3, %%"REG_a
" \n\t"
849 "movq (%1), %%mm0 \n\t"
850 "movq (%1, %3), %%mm1 \n\t"
851 "psadbw (%2), %%mm0 \n\t"
852 "psadbw (%2, %3), %%mm1 \n\t"
853 "paddw %%mm0, %%mm6 \n\t"
854 "paddw %%mm1, %%mm6 \n\t"
855 "lea (%1,%3,2), %1 \n\t"
856 "lea (%2,%3,2), %2 \n\t"
859 :
"+r" (h),
"+r" (blk1),
"+r" (blk2)
868 "pxor %%xmm2, %%xmm2 \n\t"
871 "movdqu (%1), %%xmm0 \n\t"
872 "movdqu (%1, %4), %%xmm1 \n\t"
873 "psadbw (%2), %%xmm0 \n\t"
874 "psadbw (%2, %4), %%xmm1 \n\t"
875 "paddw %%xmm0, %%xmm2 \n\t"
876 "paddw %%xmm1, %%xmm2 \n\t"
877 "lea (%1,%4,2), %1 \n\t"
878 "lea (%2,%4,2), %2 \n\t"
881 "movhlps %%xmm2, %%xmm0 \n\t"
882 "paddw %%xmm0, %%xmm2 \n\t"
883 "movd %%xmm2, %3 \n\t"
884 :
"+r" (h),
"+r" (blk1),
"+r" (blk2),
"=r" (ret)
895 "movq (%1), %%mm0 \n\t"
896 "movq (%1, %3), %%mm1 \n\t"
897 "pavgb 1(%1), %%mm0 \n\t"
898 "pavgb 1(%1, %3), %%mm1 \n\t"
899 "psadbw (%2), %%mm0 \n\t"
900 "psadbw (%2, %3), %%mm1 \n\t"
901 "paddw %%mm0, %%mm6 \n\t"
902 "paddw %%mm1, %%mm6 \n\t"
903 "lea (%1,%3,2), %1 \n\t"
904 "lea (%2,%3,2), %2 \n\t"
907 :
"+r" (h),
"+r" (blk1),
"+r" (blk2)
915 "movq (%1), %%mm0 \n\t"
919 "movq (%1), %%mm1 \n\t"
920 "movq (%1, %3), %%mm2 \n\t"
921 "pavgb %%mm1, %%mm0 \n\t"
922 "pavgb %%mm2, %%mm1 \n\t"
923 "psadbw (%2), %%mm0 \n\t"
924 "psadbw (%2, %3), %%mm1 \n\t"
925 "paddw %%mm0, %%mm6 \n\t"
926 "paddw %%mm1, %%mm6 \n\t"
927 "movq %%mm2, %%mm0 \n\t"
928 "lea (%1,%3,2), %1 \n\t"
929 "lea (%2,%3,2), %2 \n\t"
932 :
"+r" (h),
"+r" (blk1),
"+r" (blk2)
940 "movq "MANGLE(bone)
", %%mm5 \n\t"
941 "movq (%1), %%mm0 \n\t"
942 "pavgb 1(%1), %%mm0 \n\t"
946 "movq (%1), %%mm1 \n\t"
947 "movq (%1,%3), %%mm2 \n\t"
948 "pavgb 1(%1), %%mm1 \n\t"
949 "pavgb 1(%1,%3), %%mm2 \n\t"
950 "psubusb %%mm5, %%mm1 \n\t"
951 "pavgb %%mm1, %%mm0 \n\t"
952 "pavgb %%mm2, %%mm1 \n\t"
953 "psadbw (%2), %%mm0 \n\t"
954 "psadbw (%2,%3), %%mm1 \n\t"
955 "paddw %%mm0, %%mm6 \n\t"
956 "paddw %%mm1, %%mm6 \n\t"
957 "movq %%mm2, %%mm0 \n\t"
958 "lea (%1,%3,2), %1 \n\t"
959 "lea (%2,%3,2), %2 \n\t"
962 :
"+r" (h),
"+r" (blk1),
"+r" (blk2)
973 "movq (%1, %%"REG_a
"), %%mm0 \n\t"
974 "movq (%2, %%"REG_a
"), %%mm1 \n\t"
975 "movq (%1, %%"REG_a
"), %%mm2 \n\t"
976 "movq (%2, %%"REG_a
"), %%mm3 \n\t"
977 "punpcklbw %%mm7, %%mm0 \n\t"
978 "punpcklbw %%mm7, %%mm1 \n\t"
979 "punpckhbw %%mm7, %%mm2 \n\t"
980 "punpckhbw %%mm7, %%mm3 \n\t"
981 "paddw %%mm0, %%mm1 \n\t"
982 "paddw %%mm2, %%mm3 \n\t"
983 "movq (%3, %%"REG_a
"), %%mm4 \n\t"
984 "movq (%3, %%"REG_a
"), %%mm2 \n\t"
985 "paddw %%mm5, %%mm1 \n\t"
986 "paddw %%mm5, %%mm3 \n\t"
987 "psrlw $1, %%mm1 \n\t"
988 "psrlw $1, %%mm3 \n\t"
989 "packuswb %%mm3, %%mm1 \n\t"
990 "psubusb %%mm1, %%mm4 \n\t"
991 "psubusb %%mm2, %%mm1 \n\t"
992 "por %%mm4, %%mm1 \n\t"
993 "movq %%mm1, %%mm0 \n\t"
994 "punpcklbw %%mm7, %%mm0 \n\t"
995 "punpckhbw %%mm7, %%mm1 \n\t"
996 "paddw %%mm1, %%mm0 \n\t"
997 "paddw %%mm0, %%mm6 \n\t"
998 "add %4, %%"REG_a
" \n\t"
1001 :
"r" (blk1a - len),
"r" (blk1b -
len),
"r" (blk2 - len),
1005 static inline void sad8_4_mmx(
uint8_t *blk1,
uint8_t *blk2,
int stride,
int h)
1009 "movq (%1, %%"REG_a
"), %%mm0 \n\t"
1010 "movq 1(%1, %%"REG_a
"), %%mm2 \n\t"
1011 "movq %%mm0, %%mm1 \n\t"
1012 "movq %%mm2, %%mm3 \n\t"
1013 "punpcklbw %%mm7, %%mm0 \n\t"
1014 "punpckhbw %%mm7, %%mm1 \n\t"
1015 "punpcklbw %%mm7, %%mm2 \n\t"
1016 "punpckhbw %%mm7, %%mm3 \n\t"
1017 "paddw %%mm2, %%mm0 \n\t"
1018 "paddw %%mm3, %%mm1 \n\t"
1021 "movq (%2, %%"REG_a
"), %%mm2 \n\t"
1022 "movq 1(%2, %%"REG_a
"), %%mm4 \n\t"
1023 "movq %%mm2, %%mm3 \n\t"
1024 "movq %%mm4, %%mm5 \n\t"
1025 "punpcklbw %%mm7, %%mm2 \n\t"
1026 "punpckhbw %%mm7, %%mm3 \n\t"
1027 "punpcklbw %%mm7, %%mm4 \n\t"
1028 "punpckhbw %%mm7, %%mm5 \n\t"
1029 "paddw %%mm4, %%mm2 \n\t"
1030 "paddw %%mm5, %%mm3 \n\t"
1031 "movq 16+"MANGLE(round_tab)
", %%mm5 \n\t"
1032 "paddw %%mm2, %%mm0 \n\t"
1033 "paddw %%mm3, %%mm1 \n\t"
1034 "paddw %%mm5, %%mm0 \n\t"
1035 "paddw %%mm5, %%mm1 \n\t"
1036 "movq (%3, %%"REG_a
"), %%mm4 \n\t"
1037 "movq (%3, %%"REG_a
"), %%mm5 \n\t"
1038 "psrlw $2, %%mm0 \n\t"
1039 "psrlw $2, %%mm1 \n\t"
1040 "packuswb %%mm1, %%mm0 \n\t"
1041 "psubusb %%mm0, %%mm4 \n\t"
1042 "psubusb %%mm5, %%mm0 \n\t"
1043 "por %%mm4, %%mm0 \n\t"
1044 "movq %%mm0, %%mm4 \n\t"
1045 "punpcklbw %%mm7, %%mm0 \n\t"
1046 "punpckhbw %%mm7, %%mm4 \n\t"
1047 "paddw %%mm0, %%mm6 \n\t"
1048 "paddw %%mm4, %%mm6 \n\t"
1049 "movq %%mm2, %%mm0 \n\t"
1050 "movq %%mm3, %%mm1 \n\t"
1051 "add %4, %%"REG_a
" \n\t"
1054 :
"r" (blk1 - len),
"r" (blk1 - len +
stride),
"r" (blk2 - len),
1058 static inline int sum_mmx(
void)
1062 "movq %%mm6, %%mm0 \n\t"
1063 "psrlq $32, %%mm6 \n\t"
1064 "paddw %%mm0, %%mm6 \n\t"
1065 "movq %%mm6, %%mm0 \n\t"
1066 "psrlq $16, %%mm6 \n\t"
1067 "paddw %%mm0, %%mm6 \n\t"
1068 "movd %%mm6, %0 \n\t"
1070 return ret & 0xFFFF;
1073 static inline int sum_mmxext(
void)
1077 "movd %%mm6, %0 \n\t"
1082 static inline void sad8_x2a_mmx(
uint8_t *blk1,
uint8_t *blk2,
int stride,
int h)
1084 sad8_2_mmx(blk1, blk1 + 1, blk2, stride, h);
1087 static inline void sad8_y2a_mmx(
uint8_t *blk1,
uint8_t *blk2,
int stride,
int h)
1089 sad8_2_mmx(blk1, blk1 + stride, blk2, stride, h);
1092 #define PIX_SAD(suf) \
1093 static int sad8_ ## suf(MpegEncContext *v, uint8_t *blk2, \
1094 uint8_t *blk1, int stride, int h) \
1097 __asm__ volatile ( \
1098 "pxor %%mm7, %%mm7 \n\t" \
1099 "pxor %%mm6, %%mm6 \n\t" \
1102 sad8_1_ ## suf(blk1, blk2, stride, 8); \
1104 return sum_ ## suf(); \
1107 static int sad8_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
1108 uint8_t *blk1, int stride, int h) \
1111 __asm__ volatile ( \
1112 "pxor %%mm7, %%mm7 \n\t" \
1113 "pxor %%mm6, %%mm6 \n\t" \
1114 "movq %0, %%mm5 \n\t" \
1115 :: "m" (round_tab[1])); \
1117 sad8_x2a_ ## suf(blk1, blk2, stride, 8); \
1119 return sum_ ## suf(); \
1122 static int sad8_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
1123 uint8_t *blk1, int stride, int h) \
1126 __asm__ volatile ( \
1127 "pxor %%mm7, %%mm7 \n\t" \
1128 "pxor %%mm6, %%mm6 \n\t" \
1129 "movq %0, %%mm5 \n\t" \
1130 :: "m" (round_tab[1])); \
1132 sad8_y2a_ ## suf(blk1, blk2, stride, 8); \
1134 return sum_ ## suf(); \
1137 static int sad8_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
1138 uint8_t *blk1, int stride, int h) \
1141 __asm__ volatile ( \
1142 "pxor %%mm7, %%mm7 \n\t" \
1143 "pxor %%mm6, %%mm6 \n\t" \
1146 sad8_4_ ## suf(blk1, blk2, stride, 8); \
1148 return sum_ ## suf(); \
1151 static int sad16_ ## suf(MpegEncContext *v, uint8_t *blk2, \
1152 uint8_t *blk1, int stride, int h) \
1154 __asm__ volatile ( \
1155 "pxor %%mm7, %%mm7 \n\t" \
1156 "pxor %%mm6, %%mm6 \n\t" \
1159 sad8_1_ ## suf(blk1, blk2, stride, h); \
1160 sad8_1_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
1162 return sum_ ## suf(); \
1165 static int sad16_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
1166 uint8_t *blk1, int stride, int h) \
1168 __asm__ volatile ( \
1169 "pxor %%mm7, %%mm7 \n\t" \
1170 "pxor %%mm6, %%mm6 \n\t" \
1171 "movq %0, %%mm5 \n\t" \
1172 :: "m" (round_tab[1])); \
1174 sad8_x2a_ ## suf(blk1, blk2, stride, h); \
1175 sad8_x2a_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
1177 return sum_ ## suf(); \
1180 static int sad16_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
1181 uint8_t *blk1, int stride, int h) \
1183 __asm__ volatile ( \
1184 "pxor %%mm7, %%mm7 \n\t" \
1185 "pxor %%mm6, %%mm6 \n\t" \
1186 "movq %0, %%mm5 \n\t" \
1187 :: "m" (round_tab[1])); \
1189 sad8_y2a_ ## suf(blk1, blk2, stride, h); \
1190 sad8_y2a_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
1192 return sum_ ## suf(); \
1195 static int sad16_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
1196 uint8_t *blk1, int stride, int h) \
1198 __asm__ volatile ( \
1199 "pxor %%mm7, %%mm7 \n\t" \
1200 "pxor %%mm6, %%mm6 \n\t" \
1203 sad8_4_ ## suf(blk1, blk2, stride, h); \
1204 sad8_4_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
1206 return sum_ ## suf(); \
1215 int line_size,
int h);
1217 #define hadamard_func(cpu) \
1218 int ff_hadamard8_diff_ ## cpu(MpegEncContext *s, uint8_t *src1, \
1219 uint8_t *src2, int stride, int h); \
1220 int ff_hadamard8_diff16_ ## cpu(MpegEncContext *s, uint8_t *src1, \
1221 uint8_t *src2, int stride, int h);
1234 c->sum_abs_dctelem = sum_abs_dctelem_mmx;
1236 c->pix_abs[0][0] = sad16_mmx;
1237 c->pix_abs[0][1] = sad16_x2_mmx;
1238 c->pix_abs[0][2] = sad16_y2_mmx;
1239 c->pix_abs[0][3] = sad16_xy2_mmx;
1240 c->pix_abs[1][0] = sad8_mmx;
1241 c->pix_abs[1][1] = sad8_x2_mmx;
1242 c->pix_abs[1][2] = sad8_y2_mmx;
1243 c->pix_abs[1][3] = sad8_xy2_mmx;
1245 c->sad[0] = sad16_mmx;
1246 c->sad[1] = sad8_mmx;
1248 c->sse[0] = sse16_mmx;
1249 c->sse[1] = sse8_mmx;
1250 c->vsad[4] = vsad_intra16_mmx;
1252 c->nsse[0] = nsse16_mmx;
1253 c->nsse[1] = nsse8_mmx;
1256 c->vsad[0] = vsad16_mmx;
1261 c->sum_abs_dctelem = sum_abs_dctelem_mmxext;
1263 c->vsad[4] = vsad_intra16_mmxext;
1265 c->pix_abs[0][0] = sad16_mmxext;
1266 c->pix_abs[1][0] = sad8_mmxext;
1268 c->sad[0] = sad16_mmxext;
1269 c->sad[1] = sad8_mmxext;
1272 c->pix_abs[0][1] = sad16_x2_mmxext;
1273 c->pix_abs[0][2] = sad16_y2_mmxext;
1274 c->pix_abs[0][3] = sad16_xy2_mmxext;
1275 c->pix_abs[1][1] = sad8_x2_mmxext;
1276 c->pix_abs[1][2] = sad8_y2_mmxext;
1277 c->pix_abs[1][3] = sad8_xy2_mmxext;
1279 c->vsad[0] = vsad16_mmxext;
1284 c->sum_abs_dctelem = sum_abs_dctelem_sse2;
1288 c->sad[0] = sad16_sse2;
1291 #if HAVE_SSSE3_INLINE
1293 c->sum_abs_dctelem = sum_abs_dctelem_ssse3;
1299 c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx;
1300 c->hadamard8_diff[1] = ff_hadamard8_diff_mmx;
1304 c->hadamard8_diff[0] = ff_hadamard8_diff16_mmxext;
1305 c->hadamard8_diff[1] = ff_hadamard8_diff_mmxext;
1311 #if HAVE_ALIGNED_STACK
1312 c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
1313 c->hadamard8_diff[1] = ff_hadamard8_diff_sse2;
1318 c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3;
1319 c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3;
#define EXTERNAL_MMX(flags)
#define INLINE_SSE2(flags)
int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Macro definitions for various function/variable attributes.
#define DECLARE_ASM_CONST(n, t, v)
#define hadamard_func(cpu)
#define CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
#define EXTERNAL_SSE2(flags)
#define INLINE_MMX(flags)
#define INLINE_SSSE3(flags)
#define AV_CPU_FLAG_3DNOW
AMD 3DNOW.
main external API structure.
#define EXTERNAL_SSSE3(flags)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
#define EXTERNAL_MMXEXT(flags)
struct AVCodecContext * avctx
#define INLINE_MMXEXT(flags)
void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)