95 int step_num,
ui32 repeat)
134 #ifndef OJPH_DISABLE_INTEL_SIMD
180 -1.586134342059924f, -0.052980118572961f, +0.882911075530934f,
182 +1.586134342059924f, +0.052980118572961f, -0.882911075530934f,
194 const si32 *src1 = line_src1->
i32, *src2 = line_src2->
i32;
195 for (
ui32 i = repeat; i > 0; --i)
196 *dst++ -= (*src1++ + *src2++) >> 1;
205 const si32 *src1 = line_src1->
i32, *src2 = line_src2->
i32;
206 for (
ui32 i = repeat; i > 0; --i)
207 *dst++ += (*src1++ + *src2++ + 2) >> 2;
217 si32 *ldst = line_ldst->
i32, *hdst = line_hdst->
i32;
219 const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
220 const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
224 src[width] = src[width-2];
226 const si32* sp = src + (even ? 1 : 0);
228 for (
ui32 i = H_width; i > 0; --i, sp+=2)
229 *dph++ = sp[0] - ((sp[-1] + sp[1]) >> 1);
233 hdst[H_width] = hdst[H_width-1];
235 sp = src + (even ? 0 : 1);
236 const si32* sph = hdst + (even ? 0 : 1);
238 for (
ui32 i = L_width; i > 0; --i, sp+=2, sph++)
239 *dpl++ = *sp + ((2 + sph[-1] + sph[0]) >> 2);
244 line_ldst->
i32[0] = line_src->
i32[0];
246 line_hdst->
i32[0] = line_src->
i32[0] << 1;
256 const si32 *src1 = line_src1->
i32, *src2 = line_src2->
i32;
257 for (
ui32 i = repeat; i > 0; --i)
258 *dst++ += (*src1++ + *src2++) >> 1;
267 const si32 *src1 = line_src1->
i32, *src2 = line_src2->
i32;
268 for (
ui32 i = repeat; i > 0; --i)
269 *dst++ -= (2 + *src1++ + *src2++) >> 2;
278 si32 *lsrc = line_lsrc->
i32, *hsrc = line_hsrc->
i32;
281 const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
282 const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
286 hsrc[H_width] = hsrc[H_width-1];
288 const si32 *sph = hsrc + (even ? 0 : 1);
290 for (
ui32 i = L_width; i > 0; --i, sph++, spl++)
291 *spl -= ((2 + sph[-1] + sph[0]) >> 2);
295 lsrc[L_width] = lsrc[L_width - 1];
297 si32 *dp = dst + (even ? 0 : -1);
298 spl = lsrc + (even ? 0 : -1);
300 for (
ui32 i = L_width + (even ? 0 : 1); i > 0; --i, spl++, sph++)
303 *dp++ = *sph + ((spl[0] + spl[1]) >> 1);
309 line_dst->
i32[0] = line_lsrc->
i32[0];
311 line_dst->
i32[0] = line_hsrc->
i32[0] >> 1;
320 int step_num,
ui32 repeat)
322 float *dst = line_dst->
f32;
323 const float *src1 = line_src1->
f32, *src2 = line_src2->
f32;
325 for (
ui32 i = repeat; i > 0; --i)
326 *dst++ += factor * (*src1++ + *src2++);
332 bool L_analysis_or_H_synthesis,
ui32 repeat)
334 float *dst = line_dst->
f32;
335 const float *src = line_src->
f32;
338 for (
ui32 i = repeat; i > 0; --i)
339 *dst++ = *src++ * factor;
347 ui32 width,
bool even)
351 float *src = line_src->
f32;
352 float *ldst = line_ldst->
f32, *hdst = line_hdst->
f32;
354 const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
355 const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
359 src[width] = src[width-2];
362 const float* sp = src + (even ? 1 : 0);
364 for (
ui32 i = H_width; i > 0; --i, sp+=2)
365 *dph++ = sp[0] + factor * (sp[-1] + sp[1]);
369 hdst[H_width] = hdst[H_width-1];
372 sp = src + (even ? 0 : 1);
373 const float* sph = hdst + (even ? 0 : 1);
375 for (
ui32 i = L_width; i > 0; --i, sp+=2, sph++)
376 *dpl++ = sp[0] + factor * (sph[-1] + sph[0]);
380 ldst[L_width] = ldst[L_width-1];
383 const float* spl = ldst + (even ? 1 : 0);
385 for (
ui32 i = H_width; i > 0; --i, spl++)
386 *dph++ += factor * (spl[-1] + spl[0]);
390 hdst[H_width] = hdst[H_width-1];
393 sph = hdst + (even ? 0 : 1);
395 for (
ui32 i = L_width; i > 0; --i, sph++)
396 *dpl++ += factor * (sph[-1] + sph[0]);
400 for (
ui32 i = L_width; i > 0; --i, dp++)
403 for (
ui32 i = H_width; i > 0; --i, dp++)
409 line_ldst->
f32[0] = line_src->
f32[0];
411 line_hdst->
f32[0] = line_src->
f32[0] + line_src->
f32[0];
422 float *lsrc = line_lsrc->
f32, *hsrc = line_hsrc->
f32;
423 float *dst = line_dst->
f32;
425 const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
426 const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
430 for (
ui32 i = L_width; i > 0; --i, dp++)
433 for (
ui32 i = H_width; i > 0; --i, dp++)
438 hsrc[H_width] = hsrc[H_width-1];
441 const float *sph = hsrc + (even ? 0 : 1);
443 for (
ui32 i = L_width; i > 0; --i, dpl++, sph++)
444 *dpl += factor * (sph[-1] + sph[0]);
448 lsrc[L_width] = lsrc[L_width-1];
451 const float *spl = lsrc + (even ? 0 : -1);
453 for (
ui32 i = H_width; i > 0; --i, dph++, spl++)
454 *dph += factor * (spl[0] + spl[1]);
458 hsrc[H_width] = hsrc[H_width-1];
461 sph = hsrc + (even ? 0 : 1);
463 for (
ui32 i = L_width; i > 0; --i, dpl++, sph++)
464 *dpl += factor * (sph[-1] + sph[0]);
468 lsrc[L_width] = lsrc[L_width-1];
471 dp = dst + (even ? 0 : -1);
472 spl = lsrc + (even ? 0 : -1);
474 for (
ui32 i = L_width+(even?0:1); i > 0; --i, spl++, sph++)
477 *dp++ = *sph + factor * (spl[0] + spl[1]);
483 line_dst->
f32[0] = line_lsrc->
f32[0];
485 line_dst->
f32[0] = line_hsrc->
f32[0] * 0.5f;
void avx2_rev_vert_wvlt_fwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void sse_irrev_horz_wvlt_bwd_tx(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void gen_rev_vert_wvlt_bwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void gen_rev_vert_wvlt_fwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void sse2_rev_horz_wvlt_fwd_tx(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void avx_irrev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, line_buf *line_hsrc, ui32 width, bool even)
void avx2_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void gen_rev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, line_buf *line_hsrc, ui32 width, bool even)
void(* irrev_horz_wvlt_fwd_tx)(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void avx2_rev_vert_wvlt_fwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void avx_irrev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void sse2_rev_vert_wvlt_fwd_update(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void(* rev_horz_wvlt_bwd_tx)(line_buf *dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even)
void sse2_rev_horz_wvlt_bwd_tx(line_buf *dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even)
void gen_rev_vert_wvlt_fwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
static bool wavelet_transform_functions_initialized
void avx_irrev_vert_wvlt_K(const line_buf *line_src, line_buf *line_dst, bool L_analysis_or_H_synthesis, ui32 repeat)
void avx2_rev_vert_wvlt_bwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void init_wavelet_transform_functions()
void(* rev_vert_wvlt_fwd_update)(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void avx_irrev_vert_wvlt_step(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, int step_num, ui32 repeat)
void sse_irrev_horz_wvlt_fwd_tx(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void sse2_rev_vert_wvlt_bwd_predict(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void gen_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void(* rev_horz_wvlt_fwd_tx)(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void(* irrev_vert_wvlt_step)(const line_buf *src1, const line_buf *src2, line_buf *dst, int step_num, ui32 repeat)
void gen_irrev_vert_wvlt_K(const line_buf *line_src, line_buf *line_dst, bool L_analysis_or_H_synthesis, ui32 repeat)
void gen_irrev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void sse2_rev_vert_wvlt_fwd_predict(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void(* rev_vert_wvlt_bwd_update)(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void avx2_rev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, line_buf *line_hsrc, ui32 width, bool even)
void gen_irrev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, line_buf *line_hsrc, ui32 width, bool even)
void(* rev_vert_wvlt_bwd_predict)(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void(* rev_vert_wvlt_fwd_predict)(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void(* irrev_horz_wvlt_bwd_tx)(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void sse_irrev_vert_wvlt_K(const line_buf *src, line_buf *dst, bool L_analysis_or_H_synthesis, ui32 repeat)
void(* irrev_vert_wvlt_K)(const line_buf *src, line_buf *dst, bool L_analysis_or_H_synthesis, ui32 repeat)
void sse_irrev_vert_wvlt_step(const line_buf *src1, const line_buf *src2, line_buf *dst, int step_num, ui32 repeat)
void gen_rev_vert_wvlt_bwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void avx2_rev_vert_wvlt_bwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void gen_irrev_vert_wvlt_step(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, int step_num, ui32 repeat)
void sse2_rev_vert_wvlt_bwd_update(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
static const float steps[8]