From 36d1c3d162c687cd11c05e88a93fa1de78f06eab Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Sun, 19 Jul 2009 17:56:22 +0200 Subject: [PATCH] Micro optimizations for the amp filter. Use memcpy in the special case amp==0 (no amplification) and optimize the code in the performance-critical loop. Intrestingly, using the likely()/unlikely() macros made the code slower. Results (three runs on identical input data on a 32bit x86 machine under Linux, gcc-4.4.0): old with --amp 3: 0m0.776s 0m0.790s 0m0.812s, avg: 792 new with --amp 3: 0m0.456s 0m0.492s 0m0.477s, avg: 475 speedup: 1.67 old with --amp 0: 0m0.791s 0m0.808s 0m0.810s, avg: 803 new with --amp 0: 0m0.100s 0m0.103s 0m0.094s, avg: 99 speedup: 8.1 --- amp_filter.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/amp_filter.c b/amp_filter.c index 63b1b9a6..e180db43 100644 --- a/amp_filter.c +++ b/amp_filter.c @@ -32,19 +32,29 @@ struct private_amp_data { static ssize_t amp_convert(char *inbuf, size_t inbuf_len, struct filter_node *fn) { - size_t i, length = PARA_MIN((inbuf_len / 2) * 2, - (fn->bufsize - fn->loaded) / 2 * 2); + size_t i, length = PARA_MIN((inbuf_len / 2), + (fn->bufsize - fn->loaded) / 2); struct private_amp_data *pad = fn->private_data; int16_t *ip = (int16_t *)inbuf, *op = (int16_t *)(fn->buf + fn->loaded); + int factor = 64 + pad->amp; if (!length) return 0; - for (i = 0; i < length / 2; i++) { - int x = (PARA_ABS(*ip) * (64 + pad->amp)) >> 6; - *op++ = *ip++ > 0? PARA_MIN(x, 32767) : PARA_MAX(-x, -32768); + + if (pad->amp == 0) { + memcpy(op, ip, length * 2); + goto out; + } + for (i = 0; i < length; i++) { + int x = (ip[i] * factor) >> 6; + + op[i] = x; + if (op[i] != x) + op[i] = (x >= 32768)? 32767 : -32768; } - fn->loaded += length; - return length; +out: + fn->loaded += length * 2; + return length * 2; } static void amp_close(struct filter_node *fn) -- 2.39.5