/*
	audio_buffer: audio data storage
	
	part of DerMixD
	(c)2010-2014 Thomas Orgis, licensed under GPLv2
*/

#include "basics.hxx"
#include "audio/audio_buffer.hxx"

#include "debug.hxx"

// It could be that this smoothing business is all crap... except for heavy volume discrepancies, perhaps. Dunno.
const size_t smooth_range = 50;
const audio_type smooth_threshold = 0.001;


size_t mixer_buffer::fill_in(mixer_buffer &source)
{
	if(channels != source.channels) return 0;

	size_t consume = source.fill;
	if(consume > size-fill) consume = size-fill;

	memcpy(ptr+fill*channels, source.ptr, consume*blocksize());
	fill += consume;
	return consume;
}


// no debugging message here because the constructor also bears none.
mixer_buffer::~mixer_buffer(){}

/*
	Theory of the smoothing:
	l     last sample value of leader
	f(i)  ith sample of the follower
	t     jump threshold
	r     smoothing range (sample count)
	j     actual jump

	n(i)  new sample at position 0 <= i < r

	j = f-l
	abs(j) > t ? --> smooth

	Limit the actual jump to +-t. It's positive or negative according to j.
	So, we want to substract (j-t) from the first sample and about zero from the last.

	n(0) = l+t = f(0)-j+t = f(0) - (r-0)/r * (j-t)
	n(r) = f(r) = f(r)-0  = f(r) - (r-r)/r * (j-t)

	n(i) = f(i) - (r-i)/r * (j-t)   for  0<=i<r
*/

// Reduce the given gap value by one smooth threshold... if the gap actually was wider before.
// Also return true if the gap was wider.
static bool gap_filter(audio_type &gap)
{
	if(gap > 0)
	{
		if(gap < smooth_threshold) return false;

		gap -= smooth_threshold;
	}
	else if(gap <  0)
	{
		if(gap > -smooth_threshold) return false;

		gap += smooth_threshold;
	}

	return true;
}

void mixer_buffer::smooth_beginning(audio_type *startval)
{
	size_t range = (fill > smooth_range) ? smooth_range : fill;
	if(!range) return;

	// It's not pretty, yes. But it works (does it)?
	if(channels == 2)
	{
		// Stereo smoothing.
		audio_type gap[2];
		gap[0] = startval[0] - ptr[0];
		gap[1] = startval[1] - ptr[1];
		if(!gap_filter(gap[0]) && !gap_filter(gap[1])) return;
		// Start with (r-i) = r, end with r-i = 1
		// Actually start with r+1, since we decrement before first use.
		size_t rsub = range+1;

		for(size_t a = 0; a < 2*range; a += 2)
		{
			--rsub;
			float gapfac = - (float)rsub/range;
			ptr[a] += (audio_type)(gapfac*gap[0]);
			ptr[a + 1] += (audio_type)(gapfac*gap[1]);
		}
	}
	else if(channels == 1)
	{
		// Mono smoothing.
		audio_type gap;
		gap = startval[0] - ptr[0];
		if(!gap_filter(gap)) return;
		size_t rsub = range+1;
		for(size_t a = 0; a < range; ++a)
		{
			--rsub;
			float gapfac = - (float)rsub/range;
			ptr[a] += (audio_type)(gapfac*gap);
		}
	}
	// There is nothing else.
	else MERROR("[mixer_buffer %p] Unsupported channel count for smoothing.", this);
}

void mixer_buffer::drop(size_t count)
{
	if(count > fill) count = fill;

	fill -= count;
	memmove(ptr, ptr+count*channels,	blocksize()*fill);
}

#ifdef DMD_INTEGER
#error "This code is for floats ... needs major adaption for plain integer math."
#endif

// Little helpers that I hope are going to be inlined by any sane compiler.

// Take fractional position in a buffer, turn it into integer offset and fractional part to next offset.
// The position starts at 1, positions <0 mean the room before the first sample.
// Hm, what's actually with the end? Do I sense some one-sample shift there?
static void fracpart(float frac, int &base, float &part)
{
	// The frac is positive, I need floor(part), which is identical to (int) part here.
	base = (int) frac;
	// I wonder if this code is expensive due to int-to-float conversion again...
	part = frac - base;
	--base; // -1: history, 0 - A.ptr[0]
}

// Interpolation from stereo samples.
// This writes two samples to target, for downmix, you sum up those two...
static void interpolate_stereo(audio_type *target, const audio_type *src, const float frac, const audio_type *start, const float *vol)
{
	int base;
	float part;
	fracpart(frac, base, part);
	if(base > -1) \
	{
		// In between.
		for(int c=0;c<2;++c)
		target[c] = (vol[c]*(part*(src[2*(base+1)+c] - src[2*base+c]) + src[2*base+c]));
	}
	else
	{
		// The first bit.
		for(int c=0;c<2;++c)
		target[c] = (start[c] + part*(vol[c]*src[c] - start[c]));
	}
}

// Return a mono sample interpolated from mono source.
static audio_type interpolate_mono(const audio_type *src, const float frac, const audio_type start, const float vol)
{
	int base;
	float part;
	fracpart(frac, base, part);
	return (base > -1)
		? (vol*(part*(src[base+1] - src[base]) + src[base]))
		: (start + part*(vol*src[0] - start));
}

// Interpolate stereo samples from mono source including volume treatment.
static void interpolate_mono2stereo(audio_type *target, const audio_type *src, const float frac, const audio_type *start, const float *vol)
{
	int base;
	float part;
	fracpart(frac, base, part);
	if(base > -1) \
	{
		// In between.
		target[0] = target[1] = part*(src[base+1] - src[base]) + src[base];
		target[0] *= vol[0];
		target[1] *= vol[1];
	}
	else
	{
		// The first bit.
		target[0] = (start[0] + part*(vol[0]*src[0] - start[0]));
		target[1] = (start[1] + part*(vol[1]*src[0] - start[1]));
	}
}

/*
	This has been fullchansample() in audio_functions.
	resample A.fill samples from buffer A to b samples in appended to buffer B
	with channel conversion included...

	only for 2on2, 1on1, 2on1, 1on2 and XonX (not general XonY) channel situations

	I spare me the vol=0 optimization since this dumb situation doesn't seem to be worth it
	One might optimize for vol=1 and matching buffer sizes (heck, just exchange points).
	But question is: Do I want a sudden change in code path (increase of CPU work) once you change volume?
	Some predictability in CPU usage might be good.

	For resampling there are always two startvals saved (possibly identical)... note that this resampling is really last resort to make it work at all. It's simple linear interpolation / reduction via mean.
	That being said, it's fine enough for any party or casual hearing.

	This function is what used to be called fullchansample_pan. Panning (separate volume per channel) is now included in the one and only default routine. I didn't really bother to measure if there's any performance impact, and also, the same consideration as for optimizations for specific volume values aqpplies.
*/
void mixer_buffer::chesamplify(mixer_buffer& source, const size_t b, const float vol[2], audio_type startval[2])
{
//  bad check with restricted data type... 4096**3 does make zero...
//	if(A.fill*b*B.size != 0) //should be some kind of error... or not?
//	{
	if(b) //at least no division by zero
	{
		audio_type* bpoint = ptr + fill*channels; //b should only be filled up
		float r = 1.0*source.fill/b; //resampling factor
		if(source.channels == channels)
		{
			SXDEBUG("fullchansample_pan evenmix");
			if(source.fill == b)
			{
				if(source.channels == 2)
				{
					for(size_t i = 0; i < b; ++i)
					{
						bpoint[2*i]   = vol[0]*source.ptr[2*i];
						bpoint[2*i+1] = vol[1]*source.ptr[2*i+1];
					}
				}
				else // 1 channel... panning doesn't make sense
				{
					for(size_t i = 0; i < b; ++i)
					{
						bpoint[i] = vol[0]*source.ptr[i];
					}
				}
			}
			else
			{
				if(source.channels == 2)
				{
					//This is the proven code of fullsample_hq for stereo.
					if(r > 1) //a > b ... downsample, arithmetic mean
					{
						SXDEBUG("fullchansample_pan 2on2 downsample");
						for(size_t i = 0; i < b; ++i) //samples of B
						{
							float sum = 0;
							float sum2 = 0;
							size_t n = 0;
							for(size_t j = (size_t) (r*i); j < (size_t) (r*(i+1)); ++j)
							{
								++n;
								sum += source.ptr[2*j];
								sum2 += source.ptr[2*j+1];
							}
							bpoint[2*i]   = vol[0]*sum/n;
							bpoint[2*i+1] = vol[1]*sum2/n;
						}
					}
					else //(a < b)... linear interpolation
					{
						SXDEBUG("fullchansample_pan 2on2 upsample");
						for(size_t i = 1; i <= b; ++i) //samples of B
						{
							// i-1 is the sample index, 2*(i-1) offset with channels considered.
							interpolate_stereo(bpoint+(2*(i-1)), source.ptr, r*i, startval, vol);
						}
					}
					startval[0] = bpoint[2*b-2]; startval[1] = bpoint[2*b-1];
				}
				else //mono!!!!
				{
					if(r > 1) //a > b ... downsample, arithmetic mean
					{
						SXDEBUG("fullchansample_pan 1on1 downsample");
						for(size_t i = 0; i < b; ++i) //samples of B
						{
							float sum = 0;
							size_t n = 0;
							for(size_t j = (size_t) (r*i); j < (size_t) (r*(i+1)); ++j)
							{
								++n;
								sum += source.ptr[j];
							}
							bpoint[i] = vol[0]*sum/n;
						}
					}
					else //(a < b)... linear interpolation
					{
						SXDEBUG("fullchansample_pan 1on1 upsample");
						for(size_t i = 1; i <= b; ++i) //samples of B
						{
							//i-1 is the sample index
							bpoint[i-1] = interpolate_mono(source.ptr, r*i, startval[0], vol[0]);
						}
					}
					startval[0] = startval[1] = bpoint[b-1];
				}
			}
		}
		else
		{
			if(source.channels == 1) //upmix
			{
				SXDEBUG("fullchansample_pan upmix");
				if(source.fill == b) for(size_t i = 0; i < source.fill; ++i)
				{
					bpoint[2*i]   = vol[0]*source.ptr[i];
					bpoint[2*i+1] = vol[1]*source.ptr[i];
				}
				else
				{
					//This is the proven code of fullsample_hq for stereo.
					if(r > 1) //a > b ... downsample, arithmetic mean
					{
						SXDEBUG("fullchansample_pan 1on2 downsample");
						for(size_t i = 0; i < b; ++i) //samples of B
						{
							float sum = 0;
							size_t n = 0;
							for(size_t j = (size_t) (r*i); j < (size_t) (r*(i+1)); ++j)
							{
								++n;
								sum += source.ptr[j];
							}
							bpoint[2*i] = bpoint[2*i+1] = sum/n;
							bpoint[2*i]   *= vol[0];
							bpoint[2*i+1] *= vol[1];
						}
					}
					else //(a < b)... linear interpolation
					{
						SXDEBUG("fullchansample_pan 1on2 upsample");
						for(size_t i = 1; i <= b; ++i) //samples of B
						{
							interpolate_mono2stereo(bpoint+2*(i-1), source.ptr, r*i, startval, vol);
						}
					}
					startval[0] = bpoint[2*b-2];
					startval[1] = bpoint[2*b-1];
				}
			}
			else //downmix
			{
				SXDEBUG("fullchansample_pan downmix");
				if(source.fill == b) for(size_t i = 0; i < b; ++i) bpoint[i] = vol[0]*source.ptr[2*i] + vol[1]*source.ptr[2*i+1];
				else
				{
					if(r > 1) //a > b ... downsample, arithmetic mean
					{
						SXDEBUG("fullchansample_pan 2on1 downsample");
						for(size_t i = 0; i < b; ++i) //samples of B
						{
							float sum = 0;
							size_t n = 0;
							for(size_t j = (size_t) (r*i); j < (size_t) (r*(i+1)); ++j)
							{
								n += 2;
								sum += vol[0]*source.ptr[2*j] + vol[1]*source.ptr[2*j+1];
							}
							bpoint[i] = sum/n;
						}
					}
					else //(a < b)... linear interpolation
					{
						SXDEBUG("fullchansample_pan 2on1 upsample");
						for(size_t i = 1; i <= b; ++i) //samples of B
						{
							// Was the old convoluted formula really better? Doubt it.
							audio_type tmp[2];
							interpolate_stereo(tmp, source.ptr, r*i, startval, vol);
							bpoint[i-1] = 0.5*(tmp[0]+tmp[1]);
						}
					}
					startval[0] = startval[1] = bpoint[b-1];
				}
			}
		}
		fill += b; //we have done something wrong if this isn't the case
	}
//	else cerr << "[fullchansample_pan] invalid call with source.fill=" << source.fill << " b=" << b << " size=" << size << endl;
	SXDEBUG("fullchansample_pan done");
}


audio_type* mixer_buffer::chan_ptr(unsigned int c)
{
	if(interleaved || c >= channels || size == 0) return NULL;

	return ptr+(size*c);
}

bool mixer_buffer::deinterleave(mixer_buffer &from)
{
	if(size < from.fill || channels != from.channels) return false;
 
	for(unsigned int c=0; c<channels; ++c)
	{
		audio_type *chp = chan_ptr(c);
		for(size_t i=0; i<from.fill; ++i)
		chp[i] = from(i, c);
	}
	fill = from.fill;
	return true;
}

bool mixer_buffer::interleave(mixer_buffer &to)
{
	if(fill > to.free_space() || channels != to.channels) return false;

	for(unsigned int c=0; c<channels; ++c)
	{
		audio_type *chp = chan_ptr(c);
		for(size_t i=0; i<fill; ++i)
		to(to.fill+i, c) = chp[i];
	}
	to.fill += fill;
	return true;
}
