//------------------------------------------------------------------------
//
//	File: constant_q.cl
//
//  Abstract: Constant Q transform compute kernel.
// 			 
//  Disclaimer: IMPORTANT:  This Apple software is supplied to you by Apple
//  Computer, Inc. ("Apple") in consideration of your agreement to the
//  following terms, and your use, installation, modification or
//  redistribution of this Apple software constitutes acceptance of these
//  terms.  If you do not agree with these terms, please do not use,
//  install, modify or redistribute this Apple software.
//  
//  In consideration of your agreement to abide by the following terms, and
//  subject to these terms, Apple grants you a personal, non-exclusive
//  license, under Apple's copyrights in this original Apple software (the
//  "Apple Software"), to use, reproduce, modify and redistribute the Apple
//  Software, with or without modifications, in source and/or binary forms;
//  provided that if you redistribute the Apple Software in its entirety and
//  without modifications, you must retain this notice and the following
//  text and disclaimers in all such redistributions of the Apple Software. 
//  Neither the name, trademarks, service marks or logos of Apple Computer,
//  Inc. may be used to endorse or promote products derived from the Apple
//  Software without specific prior written permission from Apple.  Except
//  as expressly stated in this notice, no other rights or licenses, express
//  or implied, are granted by Apple herein, including but not limited to
//  any patent rights that may be infringed by your derivative works or by
//  other works in which the Apple Software may be incorporated.
//  
//  The Apple Software is provided by Apple on an "AS IS" basis.  APPLE
//  MAKES NO WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION
//  THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS
//  FOR A PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND
//  OPERATION ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
//  
//  IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL
//  OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
//  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
//  INTERRUPTION) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION,
//  MODIFICATION AND/OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED
//  AND WHETHER UNDER THEORY OF CONTRACT, TORT (INCLUDING NEGLIGENCE),
//  STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE
//  POSSIBILITY OF SUCH DAMAGE.
// 
//  Copyright (c) 2010 Apple Inc., All rights reserved.
//
//------------------------------------------------------------------------

__kernel void constant_q( 
    const float radix2Exp,
    const float qualityRate,
    const float freqMin,
    const uint sampleStride,
    const uint maxWinLen,
    const uint samplesCount,
    const uint frameOffset,
	const uint cStride,
	const uint rStride,
    const __global float4 *imag,
    const __global float4 *real,
    const __global float4 *samples,
    __global float *spectrogram)
{
	const uint gid = get_global_id(0);
	const uint row = gid / cStride;
	const uint col = gid % cStride;
	
    const float freq    = freqMin * native_powr( radix2Exp, row );
	const float winLen  = native_divide( qualityRate, freq );
	const float winLenR = native_recip( winLen );
	
    const int length  = rint( winLen );
	const int hLength = length >> 1;
	const int fStride = col * sampleStride + frameOffset;
	const int qSCount = samplesCount >> 2;
	const int lBound  = ( fStride - hLength ) >> 2;
	const int uBound  = ( fStride + hLength ) >> 2;
    const int iMin    = max(       0, lBound );
    const int iMax    = min( qSCount, uBound );
	const int offset  = ( row * ( maxWinLen >> 2 ) ) - iMin;
	const int sIdx    = col * rStride + row;
	
	const __global float4 *vImag = imag + offset;
    const __global float4 *vReal = real + offset;

    float4 csum = 0.0f;
    float4 ssum = 0.0f;
	
	float2 nsum = 0.0f;
	
	int8 m = 0;
	
	int i = iMin;
	int j = iMax - iMin;
	
    while( j > 7 ) 
	{
		m[0] = i;
		m[1] = i + 1;
		m[2] = i + 2;
		m[3] = i + 3;
		m[4] = i + 4;
		m[5] = i + 5;
		m[6] = i + 6;
		m[7] = i + 7;

        csum += ( samples[m[0]] * vReal[m[0]] 
					+ samples[m[1]] * vReal[m[1]] 
					+ samples[m[2]] * vReal[m[2]] 
					+ samples[m[3]] * vReal[m[3]] 
					+ samples[m[4]] * vReal[m[4]] 
					+ samples[m[5]] * vReal[m[5]] 
					+ samples[m[6]] * vReal[m[6]] 
					+ samples[m[7]] * vReal[m[7]] );

        ssum += ( samples[m[0]] * vImag[m[0]] 
					+ samples[m[1]] * vImag[m[1]] 
					+ samples[m[2]] * vImag[m[2]] 
					+ samples[m[3]] * vImag[m[3]] 
					+ samples[m[4]] * vImag[m[4]] 
					+ samples[m[5]] * vImag[m[5]] 
					+ samples[m[6]] * vImag[m[6]] 
					+ samples[m[7]] * vImag[m[7]] );
		
		i += 8;
		j  = iMax - i;
    }
	
	switch( j )
	{
		case 7:
		{
			m[0] = i;
			m[1] = i + 1;
			m[2] = i + 2;
			m[3] = i + 3;
			m[4] = i + 4;
			m[5] = i + 5;
			m[6] = i + 6;

			csum += ( samples[m[0]] * vReal[m[0]] 
						+ samples[m[1]] * vReal[m[1]] 
						+ samples[m[2]] * vReal[m[2]] 
						+ samples[m[3]] * vReal[m[3]] 
						+ samples[m[4]] * vReal[m[4]] 
						+ samples[m[5]] * vReal[m[5]] 
						+ samples[m[6]] * vReal[m[6]] );

			ssum += ( samples[m[0]] * vImag[m[0]] 
						+ samples[m[1]] * vImag[m[1]] 
						+ samples[m[2]] * vImag[m[2]] 
						+ samples[m[3]] * vImag[m[3]] 
						+ samples[m[4]] * vImag[m[4]] 
						+ samples[m[5]] * vImag[m[5]] 
						+ samples[m[6]] * vImag[m[6]] );
		}
		break;
		case 6:
		{
			m[0] = i;
			m[1] = i + 1;
			m[2] = i + 2;
			m[3] = i + 3;
			m[4] = i + 4;
			m[5] = i + 5;

			csum += ( samples[m[0]] * vReal[m[0]] 
						+ samples[m[1]] * vReal[m[1]] 
						+ samples[m[2]] * vReal[m[2]] 
						+ samples[m[3]] * vReal[m[3]] 
						+ samples[m[4]] * vReal[m[4]] 
						+ samples[m[5]] * vReal[m[5]] );

			ssum += ( samples[m[0]] * vImag[m[0]] 
						+ samples[m[1]] * vImag[m[1]] 
						+ samples[m[2]] * vImag[m[2]] 
						+ samples[m[3]] * vImag[m[3]] 
						+ samples[m[4]] * vImag[m[4]] 
						+ samples[m[5]] * vImag[m[5]] );
		}
		break;
		case 5:
		{
			m[0] = i;
			m[1] = i + 1;
			m[2] = i + 2;
			m[3] = i + 3;
			m[4] = i + 4;

			csum += ( samples[m[0]] * vReal[m[0]] 
						+ samples[m[1]] * vReal[m[1]] 
						+ samples[m[2]] * vReal[m[2]] 
						+ samples[m[3]] * vReal[m[3]] 
						+ samples[m[4]] * vReal[m[4]] );

			ssum += ( samples[m[0]] * vImag[m[0]] 
						+ samples[m[1]] * vImag[m[1]] 
						+ samples[m[2]] * vImag[m[2]] 
						+ samples[m[3]] * vImag[m[3]] 
						+ samples[m[4]] * vImag[m[4]] );
		}
		break;
		case 4:
		{
			m[0] = i;
			m[1] = i + 1;
			m[2] = i + 2;
			m[3] = i + 3;

			csum += ( samples[m[0]] * vReal[m[0]] 
						+ samples[m[1]] * vReal[m[1]] 
						+ samples[m[2]] * vReal[m[2]] 
						+ samples[m[3]] * vReal[m[3]] );

			ssum += ( samples[m[0]] * vImag[m[0]] 
						+ samples[m[1]] * vImag[m[1]] 
						+ samples[m[2]] * vImag[m[2]] 
						+ samples[m[3]] * vImag[m[3]] );
		}
		break;
		case 3:
		{
			m[0] = i;
			m[1] = i + 1;
			m[2] = i + 2;

			csum += ( samples[m[0]] * vReal[m[0]] 
						+ samples[m[1]] * vReal[m[1]] 
						+ samples[m[2]] * vReal[m[2]] );

			ssum += ( samples[m[0]] * vImag[m[0]] 
						+ samples[m[1]] * vImag[m[1]] 
						+ samples[m[2]] * vImag[m[2]] );
		}
		break;
		case 2:
		{
			m[0] = i;
			m[1] = i + 1;

			csum += ( samples[m[0]] * vReal[m[0]] 
						+ samples[m[1]] * vReal[m[1]] );

			ssum += ( samples[m[0]] * vImag[m[0]] 
						+ samples[m[1]] * vImag[m[1]] );
		}
		break;
		case 1:
		{
			csum += ( samples[i] * vReal[i] );
			ssum += ( samples[i] * vImag[i] );
		}
	}

	nsum[0] = csum[0] + csum[1] + csum[2] + csum[3];
	nsum[1] = ssum[0] + ssum[1] + ssum[2] + ssum[3];
	
	nsum *= winLenR;
	nsum *= nsum;
	nsum *= 512.0f;

    spectrogram[sIdx] = nsum[0] + nsum[1];
}
