/*
	audio_format: audio data format
	
	part of DerMixD
	(c)2010 Thomas Orgis, licensed under GPLv2
*/

#include "basics.hxx"
#include "audio/audio_format.hxx"

#ifdef DMD_INTEGER
#error "Integer code needs to be added."
#endif

// Number of encodings.
#define ENCS codecount

// Simple conversion routines to start with... might get optimized later on.
// Not even thinking about 8 bit law or such.

namespace audio
{

static const size_t samplesize[ENCS] = { 2, 2 /* s16 */, 4, 4 /* s32 */, 4, 4 /* f32 */ };

size_t samples_to_bytes(enum code enc, size_t samples)
{
	// The encoding is restricted to the proper range already by type.
	return samplesize[enc]*samples;
}

size_t bytes_to_samples(enum code enc, size_t bytes)
{
	return bytes/samplesize[enc];
}

// I have it halfway prepared for dumb int32 mixing.
// And for s32 ... conversion between that and f32 is tricky... mpg123 manages that via comparisons in double realm. One needs to ensure that the float-rounded boundaries are not too big (one would like to have truncation, actually).

// So these are the maximal/minimal integer values in double precision, to be used for determining safe float variants that are not rounded up/down, respectively.
static const double s16_max =  32767.;
static const double s16_min = -32768.;
static const double s32_max =  2147483647.;
static const double s32_min = -2147483648.;

// The various limits and scales, in their initial evaluation rounded to float.
// There will be corrections at runtime for float accuracy.
static float maxvals[ENCS] = 
{
	 s16_max      // s16
	,s16_max      // s16i
	,s32_max      // s32
	,s32_max      // s32i
	,AUDIO_SCALE  // f32
	,AUDIO_SCALE  // f32i
};
static float minvals[ENCS] = 
{
	 -32768.      // s16
	,-32768.      // s16i
	,-2147483648. // s32
	,-2147483648. // s32i
	,-AUDIO_SCALE // f32
	,-AUDIO_SCALE // f32i
};

// Factors for conversion to output samples.
static float ratios[ENCS] =
{
	 s16_max/AUDIO_SCALE
	,s16_max/AUDIO_SCALE
	,s32_max/AUDIO_SCALE
	,s32_max/AUDIO_SCALE
	,1 // One could make that different... for dividing off a non-1 audio scale.
	,1
};
// Factors for input conversion, reciprocal of the above.
// Is upfloat needed here?
static float inratios[ENCS] =
{
	 AUDIO_SCALE/s16_max
	,AUDIO_SCALE/s16_max
	,AUDIO_SCALE/s32_max
	,AUDIO_SCALE/s32_max
	,1
	,1
};

// When deciding if clipping will be needed, this is the threshold to decide if clipping will be possibly possible.
// Subject to floating point proof...
// This is specific to 
//  v*out_ratio >= out_max + 0.5
//  v >= (out_max + 0.5)/out_ratio
//  v >= (out_max + 0.5)*in_ratio
static float clip_limits[ENCS] =
{
	 (maxvals[signed_16] +0.5)*inratios[signed_16]
	,(maxvals[signed_16_i]+0.5)*inratios[signed_16_i]
	,(maxvals[signed_32] +0.5)*inratios[signed_32]
	,(maxvals[signed_32_i]+0.5)*inratios[signed_32_i]
	,AUDIO_SCALE // With the 1 in mind.
	,AUDIO_SCALE // With the 1 in mind.
};

// AUDIO_SCALE is the max value
static float clip_width = 0; // the width of clipping range (damping room)
static float clip_base = AUDIO_SCALE; // the beginning of clip
static float clip_w2 = 0; // width squared
static float clip_denoff = -AUDIO_SCALE; // The offset for the value in the denominator.

// Produce safe floating point values... as simple casting might increase/decrease values. This would break hard clipping for f32 -> s32.
// Excessive casting to drive the point home.
// A value that's <= the initial value.
static float downfloat(double v)
{
	float fv = (float)v;
	int diffs = 1;
	while(fv > v)
	{
		fv = (float)(v - diffs*( (double)((float)v) - v ));
		diffs *= 2;
	}
	return fv;
}

// A value that's >= the initial value.
static float upfloat(double v)
{
	float fv = (float)v;
	int diffs = 1;
	while(fv < v)
	{
		fv = (float)(v + diffs*( v - (double)((float)v)));
		diffs *= 2;
	}
	return fv;
}

void clip_init(float width)
{
	// More as reminder... the idea of some non-unit scaling.
	// Apart from that, normal workout of the function parameters.
	clip_width  = (width >= 0. ? width : -width)*AUDIO_SCALE;
	clip_base   = AUDIO_SCALE - clip_width;
	clip_w2     = clip_width*clip_width;
	clip_denoff = 2*clip_width-AUDIO_SCALE;

	// Getting the accuracy of 32 bit values right (could also treat others for consistency, but that's not necessary.
	maxvals[signed_32] = maxvals[signed_32_i] = downfloat(s32_max);
	minvals[signed_32] = minvals[signed_32_i] = upfloat(s32_min);
	ratios[signed_32]  = ratios[signed_32_i]  = downfloat(s32_max/AUDIO_SCALE);
}

// Clipping and conversion as separate stages.
// Dunno if this gives gain or pain in performance.
// Also, the clip functions are public, for debugging.

// Apply the smooth clipping, only call when clip_width > 0!
static void soft_scale_n_clip(const enum code enc, float *in, size_t count)
{
	for(size_t i=0; i<count; ++i)
	{
		// Optimize branching or keep computing time constant?
		if      (in[i] >  clip_base) in[i] =  AUDIO_SCALE - clip_w2/(clip_denoff + in[i]);
		else if (in[i] < -clip_base) in[i] = -AUDIO_SCALE + clip_w2/(clip_denoff - in[i]);

		// Scale for conversion to given encoding.
		// The soft clipping above should ensure the limits!
		in[i] *= ratios[enc];
	}
}

// Apply simple hard clipping. This is symmetric around zero and thus does not preserve -32768 for 16 bit audio...
static void hard_scale_n_clip(const enum code enc, float *in, size_t count)
{
	// For float and integer types with less precision, this works and preserves every input bit (so that you can get -32768 through).
	// 32 bit integer is not fully covered as 32 float precision cannot cover that.
	for(size_t i=0; i<count; ++i)
	{
		// Scale for conversion to given encoding
		in[i] *= ratios[enc];

		// Do hard clipping with the proper limits, hopefully preserving -32768 as input value for s16.
		if     (in[i] > maxvals[enc]) in[i] = maxvals[enc];
		else if(in[i] < minvals[enc]) in[i] = minvals[enc];
	}
}

// pointer to the non-interleaved channel, from base pointer.
#define chan_ptr(ptr, count, ch) (ptr+(count*ch))
// interleaved sample index
#define isample(channels, ch, i) (i*channels+ch)

// Those are the sample type conversion routines, as templates to cover the different conversions from/to integer types and float.
// Also, (de)interleaving is done here centrally. Reason being that both operations are desired at the same point in time, and both need individual mangling of sample values.

template<class A, class B> static void castcopy(A *veca, B *vecb, size_t count, unsigned int chan)
{
	for(size_t i=0; i<(count*chan); ++i) vecb[i] = (B)(veca[i]);
}

template<class A, class B> static void castcopy_interleave(A *veca, B *vecb, size_t count, unsigned int chan)
{
	for(unsigned int c=0; c<chan; ++c)
	{
		A * const chp = chan_ptr(veca, count, c);
		for(size_t i=0; i<count; ++i)
		{
			vecb[i*chan+c] = (B)(chp[i]);
		}
	}
}

template<class A, class B> static void castcopy_deinterleave(A *veca, B *vecb, size_t count, unsigned int chan)
{
	for(unsigned int c=0; c<chan; ++c)
	{
		B * const chp = chan_ptr(vecb, count, c);
		for(size_t i=0; i<count; ++i)
		{
			chp[i] = (B)(veca[i*chan+c]);
		}
	}
}

// The above but with rounding instead of simple casts.

// Rounding might be optimized in future (see mpg123).
// That's why it's here.
// Oh, and of course there might be dithering and such.
// It's used for both 16 and 32 bit integers...

template<class FLT, class INT> static INT rounder(FLT x)
{
	return (INT)(x>0.0 ? (x+0.5) : (x-0.5));
}

template<class A, class B> static void castround(A *veca, B *vecb, size_t count, unsigned int chan)
{
	for(size_t i=0; i<(count*chan); ++i) vecb[i] = rounder<A,B>(veca[i]);
}

template<class A, class B> static void castround_interleave(A *veca, B *vecb, size_t count, unsigned int chan)
{
	for(unsigned int c=0; c<chan; ++c)
	{
		A * const chp = chan_ptr(veca, count, c);
		for(size_t i=0; i<count; ++i)
		{
			vecb[i*chan+c] = rounder<A, B>(chp[i]);
		}
	}
}

template<class A, class B> static void castround_deinterleave(A *veca, B *vecb, size_t count, unsigned int chan)
{
	for(unsigned int c=0; c<chan; ++c)
	{
		B * const chp = chan_ptr(vecb, count, c);
		for(size_t i=0; i<count; ++i)
		{
			chp[i] = rounder<A,B>(veca[i*chan+c]);
		}
	}
}

void encode_from(enum code enc, const void* in, size_t bytes, audio_type *out, unsigned int channels, size_t &samples)
{
	samples = bytes_to_samples(enc, bytes) / channels;
#ifndef DMD_INTEGER
	switch(enc)
	{
		case signed_16:
			castcopy<int16_t, float>((int16_t*)in, out, samples, channels);
		break;
		case signed_16_i:
			castcopy_deinterleave<int16_t, float>((int16_t*)in, out, samples, channels);
		break;
		case signed_32:
			castcopy<int32_t, float>((int32_t*)in, out, samples, channels);
		break;
		case signed_32_i:
			castcopy_deinterleave<int32_t, float>((int32_t*)in, out, samples, channels);
		break;
		case float_32:
			memcpy(out, in, bytes);
		break;
		case float_32_i:
			castcopy_deinterleave<float, float>((float*)in, out, samples, channels);
		break;
	}
	// Input scaling.
	if(inratios[enc] != 1) // Special case .. since it could be float2float without scaling at all.
	for(size_t i=0; i<(samples*channels); ++i)
	out[i] *= inratios[enc];
#else
#error "At least give me something here."
#endif
}

void scale_to_sample(float scale, enum code enc, void *sample)
{
	float buf = scale*AUDIO_SCALE;
	hard_scale_n_clip(enc, &buf, 1);
	switch(enc)
	{
		case signed_16_i:
		case signed_16:
			castround<float, int16_t>(&buf, (int16_t*)sample, 1, 1);
		break;
		case signed_32_i:
		case signed_32:
			castround<float, int32_t>(&buf, (int32_t*)sample, 1, 1);
		break;
		case float_32_i:
		case float_32:
			*((float*)sample) = buf;
		break;
	}
}


void encode_to(enum code enc, audio_type *in, unsigned int channels, size_t samples, void *out, size_t &bytes)
{
	size_t allsamples = samples*channels;
	bytes = samples_to_bytes(enc, allsamples);
	// Handle clipping, soft or hard...
	// There is no check for the need of clipping ... I cannot switch between soft/hard clipping as that introduces discontinuities.
	if(clip_width > 0)
	soft_scale_n_clip(enc, in, allsamples);
	else
	hard_scale_n_clip(enc, in, allsamples);
#ifndef DMD_INTEGER
	switch(enc)
	{
		case signed_16:
			castround<float, int16_t>(in, (int16_t*)out, samples, channels);
		break;
		case signed_16_i:
			castround_interleave<float, int16_t>(in, (int16_t*)out, samples, channels);
		break;
		case signed_32:
			castround<float, int32_t>(in, (int32_t*)out, samples, channels);
		break;
		case signed_32_i:
			castround_interleave<float, int32_t>(in, (int32_t*)out, samples, channels);
		break;
		case float_32:
			memcpy(out, in, bytes);
		break;
		case float_32_i:
			castcopy_interleave<float, float>(in, (float*)out, samples, channels);
		break;
	}
#else
#error "At least give me something here."
#endif
}

}
