From: Andre Noll Date: Wed, 7 Oct 2009 21:25:27 +0000 (+0200) Subject: Rename mdcdt.c and mdcdt.h. X-Git-Tag: v0.4.1~109 X-Git-Url: http://git.tue.mpg.de/?a=commitdiff_plain;h=f481eaddec671c3783cc098d65be29ea16ee81b1;p=paraslash.git Rename mdcdt.c and mdcdt.h. libvorbis also has an mdct_init(), but our own one gets used. This makes the oggdec filter behave real funny. This patch makes the oggdec filter work again. --- diff --git a/configure.ac b/configure.ac index aadaee34..da9c5dbc 100644 --- a/configure.ac +++ b/configure.ac @@ -90,7 +90,7 @@ playlist sha1 sched audiod grab_client filter_common wav_filter compress_filter http_recv dccp_recv recv_common write_common file_write audiod_command client_common recv stdout filter stdin audioc write client exec send_common ggo udp_recv udp_send color fec fecdec_filter prebuffer_filter mm -server_command_list afs_command_list audiod_command_list bitstream mdct wma_afh +server_command_list afs_command_list audiod_command_list bitstream imdct wma_afh wma_common wmadec_filter " @@ -108,7 +108,7 @@ senders=" http dccp udp" filter_cmdline_objs="add_cmdline(filter compress_filter amp_filter prebuffer_filter)" filter_errlist_objs="filter_common wav_filter compress_filter filter string stdin stdout sched fd amp_filter ggo fecdec_filter fec - prebuffer_filter time bitstream mdct wma_common wmadec_filter" + prebuffer_filter time bitstream imdct wma_common wmadec_filter" filter_ldflags="-lm" filters=" compress wav amp fecdec wmadec prebuffer" @@ -121,7 +121,7 @@ audiod_errlist_objs="audiod signal string daemon stat net time grab_client filter_common wav_filter compress_filter amp_filter http_recv dccp_recv recv_common fd sched write_common file_write audiod_command crypt fecdec_filter client_common ggo udp_recv color fec prebuffer_filter sha1 audiod_command_list - bitstream mdct wma_common wmadec_filter" + bitstream imdct wma_common wmadec_filter" audiod_ldflags="" audiod_audio_formats="wma" diff --git a/error.h b/error.h index 0d2566d7..33d09f28 100644 --- a/error.h +++ b/error.h @@ -56,7 +56,7 @@ extern const char **para_errlist[]; PARA_ERROR(BAD_CODES, "detected incorrect codes") -#define MDCT_ERRORS \ +#define IMDCT_ERRORS \ PARA_ERROR(FFT_BAD_PARAMS, "invalid params for fft"), \ diff --git a/imdct.c b/imdct.c new file mode 100644 index 00000000..e3021b45 --- /dev/null +++ b/imdct.c @@ -0,0 +1,477 @@ +/* + * FFT/IFFT transforms. + * + * Extracted 2009 from mplayer 2009-02-10 libavcodec/fft.c and libavcodec/mdct.c + * + * Copyright (c) 2008 Loren Merritt + * Copyright (c) 2002 Fabrice Bellard + * Partly based on libdjbfft by D. J. Bernstein + * + * Licensed under the GNU Lesser General Public License. + * For licencing details see COPYING.LIB. + */ + +/** + * \file imdct.c Inverse modified discrete cosine transform. + */ + +#include +#include +#include +#include +#include + +#include "para.h" +#include "error.h" +#include "string.h" +#include "imdct.h" +#include "wma.h" + +typedef float fftsample_t; + +#define DECLARE_ALIGNED(n,t,v) t v __attribute__ ((aligned (n))) +#define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v) +#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ + +struct fft_complex { + fftsample_t re, im; +}; + +struct fft_context { + int nbits; + int inverse; + uint16_t *revtab; + struct fft_complex *exptab; + struct fft_complex *exptab1; /* only used by SSE code */ + struct fft_complex *tmp_buf; +}; + +struct mdct_context { + /** Size of MDCT (i.e. number of input data * 2). */ + int n; + /** n = 2^n bits. */ + int nbits; + /** pre/post rotation tables */ + fftsample_t *tcos; + fftsample_t *tsin; + struct fft_context fft; +}; + +/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */ +DECLARE_ALIGNED_16(fftsample_t, ff_cos_16[8]); +DECLARE_ALIGNED_16(fftsample_t, ff_cos_32[16]); +DECLARE_ALIGNED_16(fftsample_t, ff_cos_64[32]); +DECLARE_ALIGNED_16(fftsample_t, ff_cos_128[64]); +DECLARE_ALIGNED_16(fftsample_t, ff_cos_256[128]); +DECLARE_ALIGNED_16(fftsample_t, ff_cos_512[256]); +DECLARE_ALIGNED_16(fftsample_t, ff_cos_1024[512]); +DECLARE_ALIGNED_16(fftsample_t, ff_cos_2048[1024]); +DECLARE_ALIGNED_16(fftsample_t, ff_cos_4096[2048]); +DECLARE_ALIGNED_16(fftsample_t, ff_cos_8192[4096]); +DECLARE_ALIGNED_16(fftsample_t, ff_cos_16384[8192]); +DECLARE_ALIGNED_16(fftsample_t, ff_cos_32768[16384]); +DECLARE_ALIGNED_16(fftsample_t, ff_cos_65536[32768]); + +static fftsample_t *ff_cos_tabs[] = { + ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, + ff_cos_512, ff_cos_1024, ff_cos_2048, ff_cos_4096, ff_cos_8192, + ff_cos_16384, ff_cos_32768, ff_cos_65536, +}; + +static int split_radix_permutation(int i, int n, int inverse) +{ + int m; + if (n <= 2) + return i & 1; + m = n >> 1; + if (!(i & m)) + return split_radix_permutation(i, m, inverse) * 2; + m >>= 1; + if (inverse == !(i & m)) + return split_radix_permutation(i, m, inverse) * 4 + 1; + else + return split_radix_permutation(i, m, inverse) * 4 - 1; +} + +#define sqrthalf (float)M_SQRT1_2 + +#define BF(x,y,a,b) {\ + x = a - b;\ + y = a + b;\ +} + +#define BUTTERFLIES(a0,a1,a2,a3) {\ + BF(t3, t5, t5, t1);\ + BF(a2.re, a0.re, a0.re, t5);\ + BF(a3.im, a1.im, a1.im, t3);\ + BF(t4, t6, t2, t6);\ + BF(a3.re, a1.re, a1.re, t4);\ + BF(a2.im, a0.im, a0.im, t6);\ +} + +// force loading all the inputs before storing any. +// this is slightly slower for small data, but avoids store->load aliasing +// for addresses separated by large powers of 2. +#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\ + fftsample_t r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\ + BF(t3, t5, t5, t1);\ + BF(a2.re, a0.re, r0, t5);\ + BF(a3.im, a1.im, i1, t3);\ + BF(t4, t6, t2, t6);\ + BF(a3.re, a1.re, r1, t4);\ + BF(a2.im, a0.im, i0, t6);\ +} + +#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\ + t1 = a2.re * wre + a2.im * wim;\ + t2 = a2.im * wre - a2.re * wim;\ + t5 = a3.re * wre - a3.im * wim;\ + t6 = a3.im * wre + a3.re * wim;\ + BUTTERFLIES(a0,a1,a2,a3)\ +} + +#define TRANSFORM_ZERO(a0,a1,a2,a3) {\ + t1 = a2.re;\ + t2 = a2.im;\ + t5 = a3.re;\ + t6 = a3.im;\ + BUTTERFLIES(a0,a1,a2,a3)\ +} + +/* z[0...8n-1], w[1...2n-1] */ +#define PASS(name)\ +static void name(struct fft_complex *z, const fftsample_t *wre, unsigned int n)\ +{\ + fftsample_t t1, t2, t3, t4, t5, t6;\ + int o1 = 2*n;\ + int o2 = 4*n;\ + int o3 = 6*n;\ + const fftsample_t *wim = wre+o1;\ + n--;\ +\ + TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\ + TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ + do {\ + z += 2;\ + wre += 2;\ + wim -= 2;\ + TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\ + TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ + } while(--n);\ +} + +PASS(pass) +#undef BUTTERFLIES +#define BUTTERFLIES BUTTERFLIES_BIG + +#define DECL_FFT(n,n2,n4)\ +static void fft##n(struct fft_complex *z)\ +{\ + fft##n2(z);\ + fft##n4(z+n4*2);\ + fft##n4(z+n4*3);\ + pass(z,ff_cos_##n,n4/2);\ +} +static void fft4(struct fft_complex *z) +{ + fftsample_t t1, t2, t3, t4, t5, t6, t7, t8; + + BF(t3, t1, z[0].re, z[1].re); + BF(t8, t6, z[3].re, z[2].re); + BF(z[2].re, z[0].re, t1, t6); + BF(t4, t2, z[0].im, z[1].im); + BF(t7, t5, z[2].im, z[3].im); + BF(z[3].im, z[1].im, t4, t8); + BF(z[3].re, z[1].re, t3, t7); + BF(z[2].im, z[0].im, t2, t5); +} + +static void fft8(struct fft_complex *z) +{ + fftsample_t t1, t2, t3, t4, t5, t6, t7, t8; + + fft4(z); + + BF(t1, z[5].re, z[4].re, -z[5].re); + BF(t2, z[5].im, z[4].im, -z[5].im); + BF(t3, z[7].re, z[6].re, -z[7].re); + BF(t4, z[7].im, z[6].im, -z[7].im); + BF(t8, t1, t3, t1); + BF(t7, t2, t2, t4); + BF(z[4].re, z[0].re, z[0].re, t1); + BF(z[4].im, z[0].im, z[0].im, t2); + BF(z[6].re, z[2].re, z[2].re, t7); + BF(z[6].im, z[2].im, z[2].im, t8); + + TRANSFORM(z[1], z[3], z[5], z[7], sqrthalf, sqrthalf); +} + +static void fft16(struct fft_complex *z) +{ + fftsample_t t1, t2, t3, t4, t5, t6; + + fft8(z); + fft4(z + 8); + fft4(z + 12); + + TRANSFORM_ZERO(z[0], z[4], z[8], z[12]); + TRANSFORM(z[2], z[6], z[10], z[14], sqrthalf, sqrthalf); + TRANSFORM(z[1], z[5], z[9], z[13], ff_cos_16[1], ff_cos_16[3]); + TRANSFORM(z[3], z[7], z[11], z[15], ff_cos_16[3], ff_cos_16[1]); +} +DECL_FFT(32, 16, 8) +DECL_FFT(64, 32, 16) +DECL_FFT(128, 64, 32) +DECL_FFT(256, 128, 64) +DECL_FFT(512, 256, 128) + +DECL_FFT(1024, 512, 256) +DECL_FFT(2048, 1024, 512) +DECL_FFT(4096, 2048, 1024) +DECL_FFT(8192, 4096, 2048) +DECL_FFT(16384, 8192, 4096) +DECL_FFT(32768, 16384, 8192) +DECL_FFT(65536, 32768, 16384) + +static void (*fft_dispatch[]) (struct fft_complex *) = +{ + fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024, + fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, +}; + +static void fft(struct fft_context *s, struct fft_complex *z) +{ + fft_dispatch[s->nbits - 2] (z); +} + +/* complex multiplication: p = a * b */ +#define CMUL(pre, pim, are, aim, bre, bim) \ +{\ + fftsample_t _are = (are);\ + fftsample_t _aim = (aim);\ + fftsample_t _bre = (bre);\ + fftsample_t _bim = (bim);\ + (pre) = _are * _bre - _aim * _bim;\ + (pim) = _are * _bim + _aim * _bre;\ +} + +/** + * Compute the middle half of the inverse MDCT of size N = 2^nbits + * + * Thus excluding the parts that can be derived by symmetry. + * + * \param output N/2 samples. + * \param input N/2 samples. + */ +static void imdct_half(struct mdct_context *s, fftsample_t *output, + const fftsample_t *input) +{ + int k, n8, n4, n2, n, j; + const uint16_t *revtab = s->fft.revtab; + const fftsample_t *tcos = s->tcos; + const fftsample_t *tsin = s->tsin; + const fftsample_t *in1, *in2; + struct fft_complex *z = (struct fft_complex *)output; + + n = 1 << s->nbits; + n2 = n >> 1; + n4 = n >> 2; + n8 = n >> 3; + + /* pre rotation */ + in1 = input; + in2 = input + n2 - 1; + for (k = 0; k < n4; k++) { + j = revtab[k]; + CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); + in1 += 2; + in2 -= 2; + } + fft(&s->fft, z); + + /* post rotation + reordering */ + output += n4; + for (k = 0; k < n8; k++) { + fftsample_t r0, i0, r1, i1; + CMUL(r0, i1, z[n8 - k - 1].im, z[n8 - k - 1].re, + tsin[n8 - k - 1], tcos[n8 - k - 1]); + CMUL(r1, i0, z[n8 + k].im, z[n8 + k].re, tsin[n8 + k], + tcos[n8 + k]); + z[n8 - k - 1].re = r0; + z[n8 - k - 1].im = i0; + z[n8 + k].re = r1; + z[n8 + k].im = i1; + } +} + +/** + * Compute the inverse MDCT of size N = 2^nbits. + * + * \param output N samples. + * \param input N/2 samples. + */ +void imdct(struct mdct_context *s, float *output, const float *input) +{ + int k; + int n = 1 << s->nbits; + int n2 = n >> 1; + int n4 = n >> 2; + + imdct_half(s, output + n4, input); + + for (k = 0; k < n4; k++) { + output[k] = -output[n2 - k - 1]; + output[n - k - 1] = output[n2 + k]; + } +} + +static int fft_init(struct fft_context *s, int nbits, int inverse) +{ + int i, j, m, n; + float alpha, c1, s1, s2; + int split_radix = 1; + + if (nbits < 2 || nbits > 16) + return -E_FFT_BAD_PARAMS; + s->nbits = nbits; + n = 1 << nbits; + + s->tmp_buf = NULL; + s->exptab = para_malloc((n / 2) * sizeof(struct fft_complex)); + s->revtab = para_malloc(n * sizeof(uint16_t)); + s->inverse = inverse; + + s2 = inverse ? 1.0 : -1.0; + + s->exptab1 = NULL; + + if (split_radix) { + for (j = 4; j <= nbits; j++) { + int k = 1 << j; + double freq = 2 * M_PI / k; + fftsample_t *tab = ff_cos_tabs[j - 4]; + for (i = 0; i <= k / 4; i++) + tab[i] = cos(i * freq); + for (i = 1; i < k / 4; i++) + tab[k / 2 - i] = tab[i]; + } + for (i = 0; i < n; i++) + s->revtab[-split_radix_permutation( + i, n, s->inverse) & (n - 1)] = i; + s->tmp_buf = para_malloc(n * sizeof(struct fft_complex)); + } else { + int np, nblocks, np2, l; + struct fft_complex *q; + + for (i = 0; i < (n / 2); i++) { + alpha = 2 * M_PI * (float) i / (float) n; + c1 = cos(alpha); + s1 = sin(alpha) * s2; + s->exptab[i].re = c1; + s->exptab[i].im = s1; + } + + np = 1 << nbits; + nblocks = np >> 3; + np2 = np >> 1; + s->exptab1 = para_malloc(np * 2 * sizeof(struct fft_complex)); + q = s->exptab1; + do { + for (l = 0; l < np2; l += 2 * nblocks) { + *q++ = s->exptab[l]; + *q++ = s->exptab[l + nblocks]; + + q->re = -s->exptab[l].im; + q->im = s->exptab[l].re; + q++; + q->re = -s->exptab[l + nblocks].im; + q->im = s->exptab[l + nblocks].re; + q++; + } + nblocks = nblocks >> 1; + } while (nblocks != 0); + freep(&s->exptab); + + /* compute bit reverse table */ + for (i = 0; i < n; i++) { + m = 0; + for (j = 0; j < nbits; j++) { + m |= ((i >> j) & 1) << (nbits - j - 1); + } + s->revtab[i] = m; + } + } + return 0; +} + +static void fft_end(struct fft_context *ctx) +{ + freep(&ctx->revtab); + freep(&ctx->exptab); + freep(&ctx->exptab1); + freep(&ctx->tmp_buf); +} + +DECLARE_ALIGNED(16, float, ff_sine_128[128]); +DECLARE_ALIGNED(16, float, ff_sine_256[256]); +DECLARE_ALIGNED(16, float, ff_sine_512[512]); +DECLARE_ALIGNED(16, float, ff_sine_1024[1024]); +DECLARE_ALIGNED(16, float, ff_sine_2048[2048]); +DECLARE_ALIGNED(16, float, ff_sine_4096[4096]); + +float *ff_sine_windows[6] = { + ff_sine_128, ff_sine_256, ff_sine_512, ff_sine_1024, + ff_sine_2048, ff_sine_4096 +}; + +// Generate a sine window. +void sine_window_init(float *window, int n) +{ + int i; + + for (i = 0; i < n; i++) + window[i] = sinf((i + 0.5) * (M_PI / (2.0 * n))); +} + +/** + * Init MDCT or IMDCT computation. + */ +int imdct_init(int nbits, int inverse, struct mdct_context **result) +{ + int ret, n, n4, i; + double alpha; + struct mdct_context *s; + + s = para_malloc(sizeof(*s)); + memset(s, 0, sizeof(*s)); + n = 1 << nbits; + s->nbits = nbits; + s->n = n; + n4 = n >> 2; + s->tcos = para_malloc(n4 * sizeof(fftsample_t)); + s->tsin = para_malloc(n4 * sizeof(fftsample_t)); + + for (i = 0; i < n4; i++) { + alpha = 2 * M_PI * (i + 1.0 / 8.0) / n; + s->tcos[i] = -cos(alpha); + s->tsin[i] = -sin(alpha); + } + ret = fft_init(&s->fft, s->nbits - 2, inverse); + if (ret < 0) + goto fail; + *result = s; + return 0; +fail: + freep(&s->tcos); + freep(&s->tsin); + free(s); + return ret; +} + +void imdct_end(struct mdct_context *ctx) +{ + freep(&ctx->tcos); + freep(&ctx->tsin); + fft_end(&ctx->fft); + free(ctx); +} diff --git a/imdct.h b/imdct.h new file mode 100644 index 00000000..46fc4033 --- /dev/null +++ b/imdct.h @@ -0,0 +1,9 @@ +/** \file imdct.h: Exported symbols from imdct.c. */ + +struct mdct_context; + +float *ff_sine_windows[6]; +void sine_window_init(float *window, int n); +int imdct_init(int nbits, int inverse, struct mdct_context **result); +void imdct(struct mdct_context *s, float *output, const float *input); +void imdct_end(struct mdct_context *s); diff --git a/mdct.c b/mdct.c deleted file mode 100644 index 9964d4b6..00000000 --- a/mdct.c +++ /dev/null @@ -1,477 +0,0 @@ -/* - * FFT/IFFT transforms. - * - * Extracted 2009 from mplayer 2009-02-10 libavcodec/fft.c and libavcodec/mdct.c - * - * Copyright (c) 2008 Loren Merritt - * Copyright (c) 2002 Fabrice Bellard - * Partly based on libdjbfft by D. J. Bernstein - * - * Licensed under the GNU Lesser General Public License. - * For licencing details see COPYING.LIB. - */ - -/** - * \file fft.c FFT/MDCT transforms. - */ - -#include -#include -#include -#include -#include - -#include "para.h" -#include "error.h" -#include "string.h" -#include "mdct.h" -#include "wma.h" - -typedef float fftsample_t; - -#define DECLARE_ALIGNED(n,t,v) t v __attribute__ ((aligned (n))) -#define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v) -#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ - -struct fft_complex { - fftsample_t re, im; -}; - -struct fft_context { - int nbits; - int inverse; - uint16_t *revtab; - struct fft_complex *exptab; - struct fft_complex *exptab1; /* only used by SSE code */ - struct fft_complex *tmp_buf; -}; - -struct mdct_context { - /** Size of MDCT (i.e. number of input data * 2). */ - int n; - /** n = 2^n bits. */ - int nbits; - /** pre/post rotation tables */ - fftsample_t *tcos; - fftsample_t *tsin; - struct fft_context fft; -}; - -/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */ -DECLARE_ALIGNED_16(fftsample_t, ff_cos_16[8]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_32[16]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_64[32]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_128[64]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_256[128]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_512[256]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_1024[512]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_2048[1024]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_4096[2048]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_8192[4096]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_16384[8192]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_32768[16384]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_65536[32768]); - -static fftsample_t *ff_cos_tabs[] = { - ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, - ff_cos_512, ff_cos_1024, ff_cos_2048, ff_cos_4096, ff_cos_8192, - ff_cos_16384, ff_cos_32768, ff_cos_65536, -}; - -static int split_radix_permutation(int i, int n, int inverse) -{ - int m; - if (n <= 2) - return i & 1; - m = n >> 1; - if (!(i & m)) - return split_radix_permutation(i, m, inverse) * 2; - m >>= 1; - if (inverse == !(i & m)) - return split_radix_permutation(i, m, inverse) * 4 + 1; - else - return split_radix_permutation(i, m, inverse) * 4 - 1; -} - -#define sqrthalf (float)M_SQRT1_2 - -#define BF(x,y,a,b) {\ - x = a - b;\ - y = a + b;\ -} - -#define BUTTERFLIES(a0,a1,a2,a3) {\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, a0.re, t5);\ - BF(a3.im, a1.im, a1.im, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, a1.re, t4);\ - BF(a2.im, a0.im, a0.im, t6);\ -} - -// force loading all the inputs before storing any. -// this is slightly slower for small data, but avoids store->load aliasing -// for addresses separated by large powers of 2. -#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\ - fftsample_t r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, r0, t5);\ - BF(a3.im, a1.im, i1, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, r1, t4);\ - BF(a2.im, a0.im, i0, t6);\ -} - -#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\ - t1 = a2.re * wre + a2.im * wim;\ - t2 = a2.im * wre - a2.re * wim;\ - t5 = a3.re * wre - a3.im * wim;\ - t6 = a3.im * wre + a3.re * wim;\ - BUTTERFLIES(a0,a1,a2,a3)\ -} - -#define TRANSFORM_ZERO(a0,a1,a2,a3) {\ - t1 = a2.re;\ - t2 = a2.im;\ - t5 = a3.re;\ - t6 = a3.im;\ - BUTTERFLIES(a0,a1,a2,a3)\ -} - -/* z[0...8n-1], w[1...2n-1] */ -#define PASS(name)\ -static void name(struct fft_complex *z, const fftsample_t *wre, unsigned int n)\ -{\ - fftsample_t t1, t2, t3, t4, t5, t6;\ - int o1 = 2*n;\ - int o2 = 4*n;\ - int o3 = 6*n;\ - const fftsample_t *wim = wre+o1;\ - n--;\ -\ - TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - do {\ - z += 2;\ - wre += 2;\ - wim -= 2;\ - TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - } while(--n);\ -} - -PASS(pass) -#undef BUTTERFLIES -#define BUTTERFLIES BUTTERFLIES_BIG - -#define DECL_FFT(n,n2,n4)\ -static void fft##n(struct fft_complex *z)\ -{\ - fft##n2(z);\ - fft##n4(z+n4*2);\ - fft##n4(z+n4*3);\ - pass(z,ff_cos_##n,n4/2);\ -} -static void fft4(struct fft_complex *z) -{ - fftsample_t t1, t2, t3, t4, t5, t6, t7, t8; - - BF(t3, t1, z[0].re, z[1].re); - BF(t8, t6, z[3].re, z[2].re); - BF(z[2].re, z[0].re, t1, t6); - BF(t4, t2, z[0].im, z[1].im); - BF(t7, t5, z[2].im, z[3].im); - BF(z[3].im, z[1].im, t4, t8); - BF(z[3].re, z[1].re, t3, t7); - BF(z[2].im, z[0].im, t2, t5); -} - -static void fft8(struct fft_complex *z) -{ - fftsample_t t1, t2, t3, t4, t5, t6, t7, t8; - - fft4(z); - - BF(t1, z[5].re, z[4].re, -z[5].re); - BF(t2, z[5].im, z[4].im, -z[5].im); - BF(t3, z[7].re, z[6].re, -z[7].re); - BF(t4, z[7].im, z[6].im, -z[7].im); - BF(t8, t1, t3, t1); - BF(t7, t2, t2, t4); - BF(z[4].re, z[0].re, z[0].re, t1); - BF(z[4].im, z[0].im, z[0].im, t2); - BF(z[6].re, z[2].re, z[2].re, t7); - BF(z[6].im, z[2].im, z[2].im, t8); - - TRANSFORM(z[1], z[3], z[5], z[7], sqrthalf, sqrthalf); -} - -static void fft16(struct fft_complex *z) -{ - fftsample_t t1, t2, t3, t4, t5, t6; - - fft8(z); - fft4(z + 8); - fft4(z + 12); - - TRANSFORM_ZERO(z[0], z[4], z[8], z[12]); - TRANSFORM(z[2], z[6], z[10], z[14], sqrthalf, sqrthalf); - TRANSFORM(z[1], z[5], z[9], z[13], ff_cos_16[1], ff_cos_16[3]); - TRANSFORM(z[3], z[7], z[11], z[15], ff_cos_16[3], ff_cos_16[1]); -} -DECL_FFT(32, 16, 8) -DECL_FFT(64, 32, 16) -DECL_FFT(128, 64, 32) -DECL_FFT(256, 128, 64) -DECL_FFT(512, 256, 128) - -DECL_FFT(1024, 512, 256) -DECL_FFT(2048, 1024, 512) -DECL_FFT(4096, 2048, 1024) -DECL_FFT(8192, 4096, 2048) -DECL_FFT(16384, 8192, 4096) -DECL_FFT(32768, 16384, 8192) -DECL_FFT(65536, 32768, 16384) - -static void (*fft_dispatch[]) (struct fft_complex *) = -{ - fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024, - fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, -}; - -static void fft(struct fft_context *s, struct fft_complex *z) -{ - fft_dispatch[s->nbits - 2] (z); -} - -/* complex multiplication: p = a * b */ -#define CMUL(pre, pim, are, aim, bre, bim) \ -{\ - fftsample_t _are = (are);\ - fftsample_t _aim = (aim);\ - fftsample_t _bre = (bre);\ - fftsample_t _bim = (bim);\ - (pre) = _are * _bre - _aim * _bim;\ - (pim) = _are * _bim + _aim * _bre;\ -} - -/** - * Compute the middle half of the inverse MDCT of size N = 2^nbits - * - * Thus excluding the parts that can be derived by symmetry. - * - * \param output N/2 samples. - * \param input N/2 samples. - */ -static void imdct_half(struct mdct_context *s, fftsample_t *output, - const fftsample_t *input) -{ - int k, n8, n4, n2, n, j; - const uint16_t *revtab = s->fft.revtab; - const fftsample_t *tcos = s->tcos; - const fftsample_t *tsin = s->tsin; - const fftsample_t *in1, *in2; - struct fft_complex *z = (struct fft_complex *)output; - - n = 1 << s->nbits; - n2 = n >> 1; - n4 = n >> 2; - n8 = n >> 3; - - /* pre rotation */ - in1 = input; - in2 = input + n2 - 1; - for (k = 0; k < n4; k++) { - j = revtab[k]; - CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); - in1 += 2; - in2 -= 2; - } - fft(&s->fft, z); - - /* post rotation + reordering */ - output += n4; - for (k = 0; k < n8; k++) { - fftsample_t r0, i0, r1, i1; - CMUL(r0, i1, z[n8 - k - 1].im, z[n8 - k - 1].re, - tsin[n8 - k - 1], tcos[n8 - k - 1]); - CMUL(r1, i0, z[n8 + k].im, z[n8 + k].re, tsin[n8 + k], - tcos[n8 + k]); - z[n8 - k - 1].re = r0; - z[n8 - k - 1].im = i0; - z[n8 + k].re = r1; - z[n8 + k].im = i1; - } -} - -/** - * Compute the inverse MDCT of size N = 2^nbits. - * - * \param output N samples. - * \param input N/2 samples. - */ -void imdct(struct mdct_context *s, float *output, const float *input) -{ - int k; - int n = 1 << s->nbits; - int n2 = n >> 1; - int n4 = n >> 2; - - imdct_half(s, output + n4, input); - - for (k = 0; k < n4; k++) { - output[k] = -output[n2 - k - 1]; - output[n - k - 1] = output[n2 + k]; - } -} - -static int fft_init(struct fft_context *s, int nbits, int inverse) -{ - int i, j, m, n; - float alpha, c1, s1, s2; - int split_radix = 1; - - if (nbits < 2 || nbits > 16) - return -E_FFT_BAD_PARAMS; - s->nbits = nbits; - n = 1 << nbits; - - s->tmp_buf = NULL; - s->exptab = para_malloc((n / 2) * sizeof(struct fft_complex)); - s->revtab = para_malloc(n * sizeof(uint16_t)); - s->inverse = inverse; - - s2 = inverse ? 1.0 : -1.0; - - s->exptab1 = NULL; - - if (split_radix) { - for (j = 4; j <= nbits; j++) { - int k = 1 << j; - double freq = 2 * M_PI / k; - fftsample_t *tab = ff_cos_tabs[j - 4]; - for (i = 0; i <= k / 4; i++) - tab[i] = cos(i * freq); - for (i = 1; i < k / 4; i++) - tab[k / 2 - i] = tab[i]; - } - for (i = 0; i < n; i++) - s->revtab[-split_radix_permutation( - i, n, s->inverse) & (n - 1)] = i; - s->tmp_buf = para_malloc(n * sizeof(struct fft_complex)); - } else { - int np, nblocks, np2, l; - struct fft_complex *q; - - for (i = 0; i < (n / 2); i++) { - alpha = 2 * M_PI * (float) i / (float) n; - c1 = cos(alpha); - s1 = sin(alpha) * s2; - s->exptab[i].re = c1; - s->exptab[i].im = s1; - } - - np = 1 << nbits; - nblocks = np >> 3; - np2 = np >> 1; - s->exptab1 = para_malloc(np * 2 * sizeof(struct fft_complex)); - q = s->exptab1; - do { - for (l = 0; l < np2; l += 2 * nblocks) { - *q++ = s->exptab[l]; - *q++ = s->exptab[l + nblocks]; - - q->re = -s->exptab[l].im; - q->im = s->exptab[l].re; - q++; - q->re = -s->exptab[l + nblocks].im; - q->im = s->exptab[l + nblocks].re; - q++; - } - nblocks = nblocks >> 1; - } while (nblocks != 0); - freep(&s->exptab); - - /* compute bit reverse table */ - for (i = 0; i < n; i++) { - m = 0; - for (j = 0; j < nbits; j++) { - m |= ((i >> j) & 1) << (nbits - j - 1); - } - s->revtab[i] = m; - } - } - return 0; -} - -static void fft_end(struct fft_context *ctx) -{ - freep(&ctx->revtab); - freep(&ctx->exptab); - freep(&ctx->exptab1); - freep(&ctx->tmp_buf); -} - -DECLARE_ALIGNED(16, float, ff_sine_128[128]); -DECLARE_ALIGNED(16, float, ff_sine_256[256]); -DECLARE_ALIGNED(16, float, ff_sine_512[512]); -DECLARE_ALIGNED(16, float, ff_sine_1024[1024]); -DECLARE_ALIGNED(16, float, ff_sine_2048[2048]); -DECLARE_ALIGNED(16, float, ff_sine_4096[4096]); - -float *ff_sine_windows[6] = { - ff_sine_128, ff_sine_256, ff_sine_512, ff_sine_1024, - ff_sine_2048, ff_sine_4096 -}; - -// Generate a sine window. -void sine_window_init(float *window, int n) -{ - int i; - - for (i = 0; i < n; i++) - window[i] = sinf((i + 0.5) * (M_PI / (2.0 * n))); -} - -/** - * Init MDCT or IMDCT computation. - */ -int mdct_init(int nbits, int inverse, struct mdct_context **result) -{ - int ret, n, n4, i; - double alpha; - struct mdct_context *s; - - s = para_malloc(sizeof(*s)); - memset(s, 0, sizeof(*s)); - n = 1 << nbits; - s->nbits = nbits; - s->n = n; - n4 = n >> 2; - s->tcos = para_malloc(n4 * sizeof(fftsample_t)); - s->tsin = para_malloc(n4 * sizeof(fftsample_t)); - - for (i = 0; i < n4; i++) { - alpha = 2 * M_PI * (i + 1.0 / 8.0) / n; - s->tcos[i] = -cos(alpha); - s->tsin[i] = -sin(alpha); - } - ret = fft_init(&s->fft, s->nbits - 2, inverse); - if (ret < 0) - goto fail; - *result = s; - return 0; -fail: - freep(&s->tcos); - freep(&s->tsin); - free(s); - return ret; -} - -void mdct_end(struct mdct_context *ctx) -{ - freep(&ctx->tcos); - freep(&ctx->tsin); - fft_end(&ctx->fft); - free(ctx); -} diff --git a/mdct.h b/mdct.h deleted file mode 100644 index ce346882..00000000 --- a/mdct.h +++ /dev/null @@ -1,7 +0,0 @@ -struct mdct_context; - -float *ff_sine_windows[6]; -void sine_window_init(float *window, int n); -int mdct_init(int nbits, int inverse, struct mdct_context **result); -void imdct(struct mdct_context *s, float *output, const float *input); -void mdct_end(struct mdct_context *s); diff --git a/wma_common.c b/wma_common.c index c4d96ad0..cab89d3e 100644 --- a/wma_common.c +++ b/wma_common.c @@ -18,7 +18,7 @@ #include "error.h" #include "afh.h" #include "portable_io.h" -#include "mdct.h" +#include "imdct.h" #include "wma.h" const char *search_pattern(const char *pattern, int pattern_len, const char *buf, int buf_size) diff --git a/wmadec_filter.c b/wmadec_filter.c index 2ab6c178..a548d048 100644 --- a/wmadec_filter.c +++ b/wmadec_filter.c @@ -34,7 +34,7 @@ #include "sched.h" #include "filter.h" #include "bitstream.h" -#include "mdct.h" +#include "imdct.h" #include "wma.h" #include "wmadata.h" @@ -138,7 +138,7 @@ static int wmadec_cleanup(struct private_wmadec_data *s) int i; for (i = 0; i < s->nb_block_sizes; i++) - mdct_end(s->mdct_ctx[i]); + imdct_end(s->mdct_ctx[i]); if (s->use_exp_vlc) free_vlc(&s->exp_vlc); @@ -464,7 +464,7 @@ static int wma_decode_init(char *initial_buf, int len, struct private_wmadec_dat return ret; /* init MDCT */ for (i = 0; i < s->nb_block_sizes; i++) { - ret = mdct_init(s->frame_len_bits - i + 1, 1, &s->mdct_ctx[i]); + ret = imdct_init(s->frame_len_bits - i + 1, 1, &s->mdct_ctx[i]); if (ret < 0) return ret; }