﻿/****************************************************************************

* General description
This code includes the implementation of our method below.
[1] "Fast 2D Complex Gabor Filter With Kernel Decomposition", IEEE Trans. Image Processing, 2018

We also provide the implemenation of two existing approaches for performance comparison.
[2] I. T. Young, L. J. V. Vliet, and M. V. Ginkel, "Recursive Gabor filtering," IEEE Trans. Signal Processing, vol. 50, no. 11, pp. 2798-2805, Nov. 2002.
[3] A. Bernardino and J. Santos-Victor, "Fast IIR isotropic 2-D complex Gabor filters with boundary initialization," IEEE Trans. Image Processing, vol. 15, no. 11, pp. 3338-3348, Nov. 2006.

The functions provided in this code are as below.
[1]: 'ourMethod'
[2]: 'recursiveGabor'
[3]: 'fastIIRIsptropicGabor'

For runtime evaluation, the code is optimized such that no sub-function call is used.


* Requirement
To run this code, OpenCV should be installed in your computer.


* Citation
Please cite the following paper if you use this code.

@article{DBLP:journals/tip/KimUM18,
author    = {Jaeyoon Kim and
Suhyuk Um and
Dongbo Min},
title     = {Fast 2D Complex Gabor Filter With Kernel Decomposition},
journal   = {{IEEE} Trans. Image Processing},
volume    = {27},
number    = {4},
pages     = {1713--1722},
year      = {2018},
url       = {https://doi.org/10.1109/TIP.2017.2783621},
doi       = {10.1109/TIP.2017.2783621},
timestamp = {Thu, 15 Feb 2018 17:56:05 +0100},
biburl    = {https://dblp.org/rec/bib/journals/tip/KimUM18},
bibsource = {dblp computer science bibliography, https://dblp.org}
}

*****************************************************************************/

#include <opencv\highgui.h>
#include <opencv2\highgui.hpp>
#include <opencv\cxcore.h>
#include <opencv2\opencv.hpp>
#include <math.h>
#include <stdio.h>
#include <iostream>
#include <vector>
#include <fstream>
#include <time.h> 
#include <string.h>
#include <windows.h>
using namespace cv;
using namespace std;





void fastIIRIsptropicGabor(Mat* inputMat, int ksize, float sigma, float lambda, float theta, float** resultReal, float** resultImag);
void recursiveGabor(Mat* inputMat, int ksize, float sigma, float lambda, float theta, float** resultReal, float** resultImag);
void ourMethod(Mat* inputMat, int ksize, float sigma, float lambda, float theta, float** resultReal, float** resultImag, float** pairReal, float** pairImag);
void calculateAverageTime(int time);


void memFreeFloat4(float ****p);
float**** memAllocFloat4(int u, int v, int x, int y);
void memFreeFloat2(float **p);
float** memAllocFloat2(int x, int y);
void memFreeFloat3(float ***p);
float *** memAllocFloat3(int x, int y, int z);
float* memAllocFloat1(int x);
void memFreeFloat1(float *p);


float pi = 3.141592;
int ksize;
int nIter = 0;
int averageTime = 0;
int sumOfTime = 0;
int main() {

	//Mat originMat = Mat::ones(15, 15, CV_32FC1);
	Mat originMat = imread("1.3.12.tiff", 0);
	Mat inputMat;
	Mat inputMat2;

	originMat.convertTo(inputMat, CV_32F);



	float sigma;
	sigma = 4;
	ksize = 6 * sigma;
	int r = ksize / 2;
	DWORD begin, end;
	int width = inputMat.size().width;
	int height = inputMat.size().height;






	float** resultReal = memAllocFloat2(width, height);
	float** resultImag = memAllocFloat2(width, height);
	float** resultReal2 = memAllocFloat2(width, height);
	float** resultImag2 = memAllocFloat2(width, height);



	for (int n = 8; n < 35; n = n + 6) {
		printf("fastIIR n = %d \n", n);


		for (int i = 0; i < 20; i++) {
			sumOfTime = 0;
			for (int i = 0; i < n; i++) {

				// ratio of lambda and sigma is pi
				fastIIRIsptropicGabor(&inputMat, ksize, sigma, sigma / pi, pi*i / n, resultReal, resultImag);




			}
			calculateAverageTime(sumOfTime);
		}

		cout << "Running time : " << averageTime << endl;
		nIter = 0;
		averageTime = 0;

		printf("our n = %d \n", n);

		for (int i = 0; i < 20; i++) {
			sumOfTime = 0;
			fastIIRIsptropicGabor(&inputMat, ksize, sigma, sigma / pi, 0, resultReal, resultImag);
			for (int i = 1; i < n / 2; i++) {

				ourMethod(&inputMat, ksize, sigma, sigma / pi, pi*i / n, resultReal, resultImag, resultReal2, resultImag2);


			}
			fastIIRIsptropicGabor(&inputMat, ksize, sigma, sigma / pi, pi / 2, resultReal, resultImag);
			calculateAverageTime(sumOfTime);
		}

		cout << "Running time : " << averageTime << endl;
		nIter = 0;
		averageTime = 0;

		printf("recursiveGabor n = %d \n", n);

		for (int i = 0; i < 20; i++) {
			sumOfTime = 0;
			for (int i = 0; i < n; i++) {



				recursiveGabor(&inputMat, ksize, sigma, sigma / pi, pi*i / n, resultReal, resultImag);




			}

			calculateAverageTime(sumOfTime);
		}

		cout << "Running time : " << averageTime << endl;
		nIter = 0;
		averageTime = 0;
	}





	waitKey(0);
}



void calculateAverageTime(int time) {
	averageTime = (nIter*averageTime + time) / (nIter + 1);
	nIter++;

}


void fastIIRIsptropicGabor(Mat* inputMat, int ksize, float sigma, float lambda, float theta, float** resultReal, float** resultImag) {
	int khalf = ksize / 2;
	int width = inputMat->size().width;
	int height = inputMat->size().height;

	float* w_cos = memAllocFloat1(width);
	float* w_sin = memAllocFloat1(width);
	float** tempReal = memAllocFloat2(height, width);
	float** tempImag = memAllocFloat2(height, width);
	float* w_cosInY = memAllocFloat1(height);
	float* w_sinInY = memAllocFloat1(height);
	float** cosSig = memAllocFloat2(height, width);
	float** sinSig = memAllocFloat2(height, width);
	float** inputArr = memAllocFloat2(height, width);
	float** result_real = resultReal;
	float** result_imag = resultImag;


	for (int x = 0; x < width; x++)
		for (int y = 0; y < height; y++)
			inputArr[y][x] = inputMat->at<float>(y, x);


	float coslookup2[2001];
	float sinlookup2[2001];
	const float coefficentOfNorm = 2 * pi / 1000;

	for (int i = -1000; i <= 1000; i++) {
		coslookup2[i + 1000] = cos(coefficentOfNorm*(float)i);
		sinlookup2[i + 1000] = sin(coefficentOfNorm*(float)i);
	}
	float* coslookup = coslookup2 + 1000, *sinlookup = sinlookup2 + 1000;


	int cc = 0;




	float m0 = 1.16680, m1 = 1.10783, m2 = 1.40586;
	float q = (sigma <3.556) ? -0.2568 + 0.5784*sigma + 0.0561*sigma*sigma : 2.5091 + 0.9804*(sigma - 3.556);

	float scale = (m0 + q)*(m1*m1 + m2*m2 + 2 * m1*q + q*q);
	float  b1 = -q*(2 * m0*m1 + m1*m1 + m2*m2 + (2 * m0 + 4 * m1)*q + 3 * q*q) / (scale), b2 = q*q*(m0 + 2 * m1 + 3 * q) / scale, b3 = -q*q*q / scale, largeB = m0*(m1*m1 + m2*m2) / scale;
	largeB = largeB*largeB;
	float omegaX = 2 * pi*cos(theta) / lambda;
	float omegaY = 2 * pi*sin(theta) / lambda;

	clock_t begin, end;
	begin = timeGetTime();
	for (int y = 0; y < height; y++) {

		float* p_CosSig = cosSig[y];
		float* p_SinSig = sinSig[y];
		float* p_inputArr = inputArr[y];
		for (int x = 0; x < width; x++) {

			cc = ((int)((omegaX*x + omegaY*y) / coefficentOfNorm)) % 1000;


			*p_CosSig++ = *p_inputArr * coslookup[cc];
			*p_SinSig++ = *p_inputArr++ * sinlookup[cc];
		}
	}



	for (int y = 0; y < height; y++)
	{

		float* p_w_cos_0 = w_cos, *p_w_cos_1 = w_cos, *p_w_cos_2 = w_cos, *p_w_cos_3 = w_cos;
		float *p_w_sin_0 = w_sin, *p_w_sin_1 = w_sin, *p_w_sin_2 = w_sin, *p_w_sin_3 = w_sin;
		float* p_CosSig = cosSig[y];
		float* p_SinSig = sinSig[y];
		///////////////////boundary exception///////////////////////////
		*p_w_cos_0++ = *p_CosSig++;
		*p_w_sin_0++ = *p_SinSig++;

		*p_w_cos_0++ = *p_CosSig++ - (b1*(*p_w_cos_1++));
		*p_w_sin_0++ = *p_SinSig++ - (b1*(*p_w_sin_1++));

		*p_w_cos_0++ = *p_CosSig++ - (b1*(*p_w_cos_1++) + b2*(*p_w_cos_2++));
		*p_w_sin_0++ = *p_SinSig++ - (b1*(*p_w_sin_1++) + b2*(*p_w_sin_2++));
		///////////////////boundary exception///////////////////////////
		for (int x = 3; x < width; x++)//forwarding
		{
			*p_w_cos_0++ = *p_CosSig++
				- (b1*(*p_w_cos_1++)
				+ b2*(*p_w_cos_2++)
				+ b3*(*p_w_cos_3++));
			*p_w_sin_0++ = *p_SinSig++
				- (b1*(*p_w_sin_1++)
				+ b2*(*p_w_sin_2++)
				+ b3*(*p_w_sin_3++));

		}



		float* p_tempReal_0 = tempReal[y] + width - 1, *p_tempReal_1 = tempReal[y] + width - 1, *p_tempReal_2 = tempReal[y] + width - 1, *p_tempReal_3 = tempReal[y] + width - 1;
		float* p_tempImag_0 = tempImag[y] + width - 1, *p_tempImag_1 = tempImag[y] + width - 1, *p_tempImag_2 = tempImag[y] + width - 1, *p_tempImag_3 = tempImag[y] + width - 1;
		float* p_w_cos = w_cos + width - 1, *p_w_sin = w_sin + width - 1;

		///////////////////boundary exception///////////////////////////
		*p_tempReal_0-- = largeB*(*p_w_cos--);
		*p_tempImag_0-- = largeB*(*p_w_sin--);

		*p_tempReal_0-- = largeB*(*p_w_cos--) - (b1*(*p_tempReal_1--));
		*p_tempImag_0-- = largeB*(*p_w_sin--) - (b1*(*p_tempImag_1--));

		*p_tempReal_0-- = largeB*(*p_w_cos--) - (b1*(*p_tempReal_1--) + b2*(*p_tempReal_2--));
		*p_tempImag_0-- = largeB*(*p_w_sin--) - (b1*(*p_tempImag_1--) + b2*(*p_tempImag_2--));
		///////////////////boundary exception///////////////////////////

		for (int x = width - 1 - 3; x >= 0; x--)//backwarding
		{
			*p_tempReal_0-- = largeB*(*p_w_cos--)
				- (b1*(*p_tempReal_1--)
				+ b2*(*p_tempReal_2--)
				+ b3*(*p_tempReal_3--));

			*p_tempImag_0-- = largeB*(*p_w_sin--)
				- (b1*(*p_tempImag_1--)
				+ b2*(*p_tempImag_2--)
				+ b3*(*p_tempImag_3--));
		}

	}






	for (int x = 0; x < width; x++)
	{

		float* p_w_cosInY_0 = w_cosInY, *p_w_cosInY_1 = w_cosInY, *p_w_cosInY_2 = w_cosInY, *p_w_cosInY_3 = w_cosInY;
		float* p_w_sinInY_0 = w_sinInY, *p_w_sinInY_1 = w_sinInY, *p_w_sinInY_2 = w_sinInY, *p_w_sinInY_3 = w_sinInY;

		///////////////////boundary exception///////////////////////////
		*p_w_cosInY_0++ = tempReal[0][x];
		*p_w_sinInY_0++ = tempImag[0][x];

		*p_w_cosInY_0++ = tempReal[1][x] - (b1*(*p_w_cosInY_1++));
		*p_w_sinInY_0++ = tempImag[1][x] - (b1*(*p_w_sinInY_1++));

		*p_w_cosInY_0++ = tempReal[2][x] - (b1*(*p_w_cosInY_1++) + b2*(*p_w_cosInY_2++));
		*p_w_sinInY_0++ = tempImag[2][x] - (b1*(*p_w_sinInY_1++) + b2*(*p_w_sinInY_2++));

		///////////////////boundary exception///////////////////////////
		for (int y = 3; y < height; y++)
		{
			*p_w_cosInY_0++ = tempReal[y][x] -
				(b1*(*p_w_cosInY_1++)
				+ b2*(*p_w_cosInY_2++)
				+ b3*(*p_w_cosInY_3++));
			*p_w_sinInY_0++ = tempImag[y][x] -
				(b1*(*p_w_sinInY_1++)
				+ b2*(*p_w_sinInY_2++)
				+ b3*(*p_w_sinInY_3++));
		}


		float* p_resultReal_0 = result_real[x] + height - 1, *p_resultReal_1 = result_real[x] + height - 1, *p_resultReal_2 = result_real[x] + height - 1, *p_resultReal_3 = result_real[x] + height - 1;
		float *p_resultImag_0 = result_imag[x] + height - 1, *p_resultImag_1 = result_imag[x] + height - 1, *p_resultImag_2 = result_imag[x] + height - 1, *p_resultImag_3 = result_imag[x] + height - 1;
		float *p_w_cosInY = w_cosInY + height - 1, *p_w_sinInY = w_sinInY + height - 1;

		///////////////////boundary exception///////////////////////////
		*p_resultReal_0-- = largeB*(*p_w_cosInY--);
		*p_resultImag_0-- = largeB*(*p_w_sinInY--);

		*p_resultReal_0-- = largeB*(*p_w_cosInY--) - (b1*(*p_resultReal_1--));
		*p_resultImag_0-- = largeB*(*p_w_sinInY--) - (b1*(*p_resultImag_1--));

		*p_resultReal_0-- = largeB*(*p_w_cosInY--) - (b1*(*p_resultReal_1--) + b2*(*p_resultReal_2--));
		*p_resultImag_0-- = largeB*(*p_w_sinInY--) - (b1*(*p_resultImag_1--) + b2*(*p_resultImag_2--));
		///////////////////boundary exception///////////////////////////
		for (int y = height - 1 - 3; y >= 0; y--)
		{

			*p_resultReal_0-- = largeB*(*p_w_cosInY--) -
				(b1*(*p_resultReal_1--)
				+ b2*(*p_resultReal_2--)
				+ b3*(*p_resultReal_3--));
			*p_resultImag_0-- = largeB*(*p_w_sinInY--) -
				(b1*(*p_resultImag_1--)
				+ b2*(*p_resultImag_2--)
				+ b3*(*p_resultImag_3--));
		}


	}
	for (int x = 0; x < width; x++) {
		float* p_resultReal = result_real[x], *p_resultImag = result_imag[x];

		for (int y = 0; y < height; y++) {
			float real = *p_resultReal;
			float imag = *p_resultImag;

			cc = ((int)((omegaX*(-x) + omegaY*(-y)) / coefficentOfNorm)) % 1000;

			*p_resultReal++ = coslookup[cc] * real - sinlookup[cc] * imag;
			*p_resultImag++ = coslookup[cc] * imag + sinlookup[cc] * real;
		}
	}
	end = timeGetTime();
	sumOfTime += (end - begin);

	memFreeFloat1(w_cos);
	memFreeFloat1(w_sin);
	memFreeFloat2(tempReal);
	memFreeFloat2(tempImag);
	memFreeFloat1(w_cosInY);
	memFreeFloat1(w_sinInY);
	memFreeFloat2(cosSig);
	memFreeFloat2(sinSig);
	memFreeFloat2(inputArr);


}


void recursiveGabor(Mat* inputMat, int ksize, float sigma, float lambda, float theta, float** resultReal, float** resultImag) {
	int khalf = ksize / 2;
	int width = inputMat->size().width;
	int height = inputMat->size().height;
	float u0, v0;

	float m0 = 1.16680, m1 = 1.10783, m2 = 1.40586;
	float q = (sigma <3.556) ? -0.2568 + 0.5784*sigma + 0.0561*sigma*sigma : 2.5091 + 0.9804*(sigma - 3.556);

	float scale = (m0 + q)*(m1*m1 + m2*m2 + 2 * m1*q + q*q);
	float  b1 = -q*(2 * m0*m1 + m1*m1 + m2*m2 + (2 * m0 + 4 * m1)*q + 3 * q*q) / (scale), b2 = q*q*(m0 + 2 * m1 + 3 * q) / scale, b3 = -q*q*q / scale, largeB = m0*(m1*m1 + m2*m2) / scale;
	largeB = largeB*largeB;



	float** imageData = memAllocFloat2(width, height);
	for (int x = 0; x < width; x++)
		for (int y = 0; y < height; y++)
			imageData[x][y] = inputMat->at<float>(y, x);

	float* W_Real_InX = new float[width];
	float* W_Imagin_InX = new float[width];
	float** Out_Real = memAllocFloat2(height, width);
	float** Out_Imagin = memAllocFloat2(height, width);

	float* W_Real_InY = new float[height];
	float* W_Imagin_InY = new float[height];

	float** Result_Real = resultReal;
	float** Result_Imagin = resultImag;

	float omegaX, omegaY;
	omegaX = 2 * pi*cos(theta) / lambda;
	omegaY = 2 * pi*sin(theta) / lambda;

	float bc1, bs1, bc2, bs2, bc3, bs3;
	float bcy1, bsy1, bcy2, bsy2, bcy3, bsy3;

	bc1 = b1*cos(omegaX); bs1 = b1*sin(omegaX);
	bc2 = b2*cos(omegaX * 2); bs2 = b2*sin(omegaX * 2);
	bc3 = b3*cos(omegaX * 3); bs3 = b3*sin(omegaX * 3);

	bcy1 = b1*cos(omegaY); bsy1 = b1*sin(omegaY);
	bcy2 = b2*cos(omegaY * 2); bsy2 = b2*sin(omegaY * 2);
	bcy3 = b3*cos(omegaY * 3); bsy3 = b3*sin(omegaY * 3);
	clock_t begin, end;
	begin = timeGetTime();



	for (int y = 0; y < height; y++)
	{

		float* W_Real_InX_0 = W_Real_InX, *W_Real_InX_1 = W_Real_InX, *W_Real_InX_2 = W_Real_InX, *W_Real_InX_3 = W_Real_InX;
		float* W_Imagin_InX_0 = W_Imagin_InX, *W_Imagin_InX_1 = W_Imagin_InX, *W_Imagin_InX_2 = W_Imagin_InX, *W_Imagin_InX_3 = W_Imagin_InX;
		////////////////////////// boundary exception//////////////////////////////////
		*W_Real_InX_0++ = imageData[0][y];
		*W_Imagin_InX_0++ = 0;

		*W_Real_InX_0++ = imageData[1][y] - (bc1*(*W_Real_InX_1) - bs1*(*W_Imagin_InX_1));
		*W_Imagin_InX_0++ = -(bc1*(*W_Imagin_InX_1++) + bs1*(*W_Real_InX_1++));

		*W_Real_InX_0++ = imageData[2][y] - (bc1*(*W_Real_InX_1) - bs1*(*W_Imagin_InX_1) +
			bc2*(*W_Real_InX_2) - bs2*(*W_Imagin_InX_2));
		*W_Imagin_InX_0++ = -(bc1*(*W_Imagin_InX_1++) + bs1*(*W_Real_InX_1++) +
			bc2*(*W_Imagin_InX_2++) + bs2*(*W_Real_InX_2++));

		////////////////////////// boundary exception//////////////////////////////////

		for (int x = 3; x < width; x++)//forwarding
		{

			*W_Real_InX_0++ = imageData[x][y] - (bc1*(*W_Real_InX_1) - bs1*(*W_Imagin_InX_1) +
				bc2*(*W_Real_InX_2) - bs2*(*W_Imagin_InX_2) +
				bc3*(*W_Real_InX_3) - bs3*(*W_Imagin_InX_3));

			*W_Imagin_InX_0++ = -(bc1*(*W_Imagin_InX_1++) + bs1*(*W_Real_InX_1++) +
				bc2*(*W_Imagin_InX_2++) + bs2*(*W_Real_InX_2++) +
				bc3*(*W_Imagin_InX_3++) + bs3*(*W_Real_InX_3++));

		}

		W_Real_InX_0 = W_Real_InX + width - 1;
		W_Imagin_InX_0 = W_Imagin_InX + width - 1;

		float* Out_Real_0 = Out_Real[y] + width - 1, *Out_Real_1 = Out_Real[y] + width - 1, *Out_Real_2 = Out_Real[y] + width - 1, *Out_Real_3 = Out_Real[y] + width - 1;
		float* Out_Imagin_0 = Out_Imagin[y] + width - 1, *Out_Imagin_1 = Out_Imagin[y] + width - 1, *Out_Imagin_2 = Out_Imagin[y] + width - 1, *Out_Imagin_3 = Out_Imagin[y] + width - 1;

		////////////////////////// boundary exception//////////////////////////////////
		*Out_Real_0-- = largeB*(*W_Real_InX_0--);
		*Out_Imagin_0-- = largeB*(*W_Imagin_InX_0--);

		*Out_Real_0-- = largeB*(*W_Real_InX_0--) - (bc1*(*Out_Real_1) + bs1*(*Out_Imagin_1));
		*Out_Imagin_0-- = largeB*(*W_Imagin_InX_0--) - (bc1*(*Out_Imagin_1--) - bs1*(*Out_Real_1--));

		*Out_Real_0-- = largeB*(*W_Real_InX_0--) - (bc1*(*Out_Real_1) + bs1*(*Out_Imagin_1) +
			bc2*(*Out_Real_2) + bs2*(*Out_Imagin_2));
		*Out_Imagin_0-- = largeB*(*W_Imagin_InX_0--) - (bc1*(*Out_Imagin_1--) - bs1*(*Out_Real_1--) +
			bc2*(*Out_Imagin_2--) - bs2*(*Out_Real_2--));

		////////////////////////// boundary exception//////////////////////////////////


		for (int x = width - 1 - 3; x >= 0; x--)//backwarding
		{


			*Out_Real_0-- = largeB * (*W_Real_InX_0--) - (bc1*(*Out_Real_1) + bs1*(*Out_Imagin_1) +
				bc2*(*Out_Real_2) + bs2*(*Out_Imagin_2) +
				bc3*(*Out_Real_3) + bs3*(*Out_Imagin_3));
			*Out_Imagin_0-- = largeB * (*W_Imagin_InX_0--) - (bc1*(*Out_Imagin_1--) - bs1*(*Out_Real_1--) +
				bc2*(*Out_Imagin_2--) - bs2*(*Out_Real_2--) +
				bc3*(*Out_Imagin_3--) - bs3*(*Out_Real_3--));
		}

	}



	bc1 = bcy1; bs1 = bsy1;
	bc2 = bcy2; bs2 = bsy2;
	bc3 = bcy3; bs3 = bsy3;




	for (int x = 0; x < width; x++)
	{

		float* W_Real_InY_0 = W_Real_InY, *W_Real_InY_1 = W_Real_InY, *W_Real_InY_2 = W_Real_InY, *W_Real_InY_3 = W_Real_InY;
		float* W_Imagin_InY_0 = W_Imagin_InY, *W_Imagin_InY_1 = W_Imagin_InY, *W_Imagin_InY_2 = W_Imagin_InY, *W_Imagin_InY_3 = W_Imagin_InY;


		////////////////////////// boundary exception//////////////////////////////////
		*W_Real_InY_0++ = Out_Real[0][x];
		*W_Imagin_InY_0++ = Out_Imagin[0][x];

		*W_Real_InY_0++ = Out_Real[1][x] - (bc1*(*W_Real_InY_1) - bs1*(*W_Imagin_InY_1));
		*W_Imagin_InY_0++ = Out_Imagin[1][x] - (bc1*(*W_Imagin_InY_1++) + bs1*(*W_Real_InY_1++));

		*W_Real_InY_0++ = Out_Real[2][x] - (bc1*(*W_Real_InY_1) - bs1*(*W_Imagin_InY_1) +
			bc2*(*W_Real_InY_2) - bs2*(*W_Imagin_InY_2));
		*W_Imagin_InY_0++ = Out_Imagin[2][x] - (bc1*(*W_Imagin_InY_1++) + bs1*(*W_Real_InY_1++) +
			bc2*(*W_Imagin_InY_2++) + bs2*(*W_Real_InY_2++));

		////////////////////////// boundary exception//////////////////////////////////


		for (int y = 3; y < height; y++)
		{

			*W_Real_InY_0++ = Out_Real[y][x] - (bc1*(*W_Real_InY_1) - bs1*(*W_Imagin_InY_1) +
				bc2*(*W_Real_InY_2) - bs2*(*W_Imagin_InY_2) +
				bc3*(*W_Real_InY_3) - bs3*(*W_Imagin_InY_3));
			*W_Imagin_InY_0++ = Out_Imagin[y][x] - (bc1*(*W_Imagin_InY_1++) + bs1*(*W_Real_InY_1++) +
				bc2*(*W_Imagin_InY_2++) + bs2*(*W_Real_InY_2++) +
				bc3*(*W_Imagin_InY_3++) + bs3*(*W_Real_InY_3++));


		}

		W_Real_InY_0 = W_Real_InY + height - 1;
		W_Imagin_InY_0 = W_Imagin_InY + height - 1;

		float* Result_Real_0 = Result_Real[x] + height - 1, *Result_Real_1 = Result_Real[x] + height - 1, *Result_Real_2 = Result_Real[x] + height - 1, *Result_Real_3 = Result_Real[x] + height - 1;
		float* Result_Imagin_0 = Result_Imagin[x] + height - 1, *Result_Imagin_1 = Result_Imagin[x] + height - 1, *Result_Imagin_2 = Result_Imagin[x] + height - 1, *Result_Imagin_3 = Result_Imagin[x] + height - 1;


		////////////////////////// boundary exception//////////////////////////////////
		*Result_Real_0-- = largeB*(*W_Real_InY_0--);
		*Result_Imagin_0-- = largeB*(*W_Imagin_InY_0--);

		*Result_Real_0-- = largeB*(*W_Real_InY_0--) - (bc1*(*Result_Real_1) + bs1*(*Result_Imagin_1));
		*Result_Imagin_0-- = largeB*(*W_Imagin_InY_0--) - (bc1*(*Result_Imagin_1--) - bs1*(*Result_Real_1--));

		*Result_Real_0-- = largeB*(*W_Real_InY_0--) - (bc1*(*Result_Real_1) + bs1*(*Result_Imagin_1) +
			bc2*(*Result_Real_2) + bs2*(*Result_Imagin_2));
		*Result_Imagin_0-- = largeB*(*W_Imagin_InY_0--) - (bc1*(*Result_Imagin_1--) - bs1*(*Result_Real_1--) +
			bc2*(*Result_Imagin_2--) - bs2*(*Result_Real_2--));

		////////////////////////// boundary exception//////////////////////////////////



		for (int y = height - 1 - 3; y >= 0; y--)
		{

			*Result_Real_0-- = largeB*(*W_Real_InY_0--) - (bc1*(*Result_Real_1) + bs1*(*Result_Imagin_1) +
				bc2*(*Result_Real_2) + bs2*(*Result_Imagin_2) +
				bc3*(*Result_Real_3) + bs3*(*Result_Imagin_3));
			*Result_Imagin_0-- = largeB*(*W_Imagin_InY_0--) - (bc1*(*Result_Imagin_1--) - bs1*(*Result_Real_1--) +
				bc2*(*Result_Imagin_2--) - bs2*(*Result_Real_2--) +
				bc3*(*Result_Imagin_3--) - bs3*(*Result_Real_3--));

		}


	}


	end = timeGetTime();
	sumOfTime += (end - begin);




	memFreeFloat1(W_Real_InX);
	memFreeFloat1(W_Imagin_InX);
	memFreeFloat1(W_Real_InY);
	memFreeFloat1(W_Imagin_InY);
	memFreeFloat2(Out_Real);
	memFreeFloat2(Out_Imagin);
	memFreeFloat2(imageData);
}


void ourMethod(Mat* inputMat, int ksize, float sigma, float lambda, float theta, float** resultReal, float** resultImag, float** pairReal, float** pairImag) {
	int khalf = ksize / 2;
	int width = inputMat->size().width;
	int height = inputMat->size().height;


	float m0 = 1.16680, m1 = 1.10783, m2 = 1.40586;
	float q = (sigma <3.556) ? -0.2568 + 0.5784*sigma + 0.0561*sigma*sigma : 2.5091 + 0.9804*(sigma - 3.556);

	float scale = (m0 + q)*(m1*m1 + m2*m2 + 2 * m1*q + q*q);
	float  b1 = -q*(2 * m0*m1 + m1*m1 + m2*m2 + (2 * m0 + 4 * m1)*q + 3 * q*q) / (scale), b2 = q*q*(m0 + 2 * m1 + 3 * q) / scale, b3 = -q*q*q / scale, largeB = m0*(m1*m1 + m2*m2) / scale;
	largeB = largeB*largeB;
	float first = 0, second = 0, third = 0, forth = 0;

	float* Out_Real = memAllocFloat1(width);
	float*   Out_Imagin = memAllocFloat1(width);
	float*   w_cos = memAllocFloat1(width);
	float*   w_sin = memAllocFloat1(width);
	float**   tempReal = memAllocFloat2(height, width);
	float** tempImag = memAllocFloat2(height, width);
	float* w_cosReal = memAllocFloat1(height);
	float*   w_sinReal = memAllocFloat1(height);
	float*   w_cosImag = memAllocFloat1(height);
	float*   w_sinImag = memAllocFloat1(height);
	float*   out_cosReal = memAllocFloat1(height);
	float*   out_cosImag = memAllocFloat1(height);
	float*   out_sinImag = memAllocFloat1(height);
	float*   out_sinReal = memAllocFloat1(height);
	float**   CosSig = memAllocFloat2(height, width);
	float**   SineSig = memAllocFloat2(height, width);
	float**   cosRealSig = memAllocFloat2(width, height);
	float**   sinRealSig = memAllocFloat2(width, height);
	float**   cosImagSig = memAllocFloat2(width, height);
	float**   sinImagSig = memAllocFloat2(width, height);
	float** inputArr = memAllocFloat2(height, width);

	float* preCalulatedCosX = memAllocFloat1(width);
	float* preCalulatedSinX = memAllocFloat1(width);
	float* preCalulatedCosY = memAllocFloat1(height);
	float* preCalulatedSinY = memAllocFloat1(height);

	float** Result_Real = resultReal;
	float** Result_Imagin = resultImag;




	float omegaX, omegaY;
	omegaX = 2 * pi*cos(theta) / lambda;
	omegaY = 2 * pi*sin(theta) / lambda;



	for (int x = 0; x < width; x++)
		for (int y = 0; y < height; y++)
			inputArr[y][x] = inputMat->at<float>(y, x);

	float coslookup2[2001];
	float sinlookup2[2001];
	const float coefficentOfNorm = 2 * pi / 1000;

	for (int i = -1000; i <= 1000; i++) {
		coslookup2[i + 1000] = cos(coefficentOfNorm*(float)i);
		sinlookup2[i + 1000] = sin(coefficentOfNorm*(float)i);
	}
	float* coslookup = coslookup2 + 1000, *sinlookup = sinlookup2 + 1000;

	int cc = 0;


	float* p_preCalulatedCosX = preCalulatedCosX, *p_preCalulatedSinX = preCalulatedSinX;








	clock_t begin, end;
	begin = timeGetTime();

	for (int x = 0; x < width; x++) {

		cc = ((int)(omegaX*x / coefficentOfNorm) % 1000);

		*p_preCalulatedCosX++ = coslookup[cc];
		*p_preCalulatedSinX++ = sinlookup[cc];


	}

	for (int y = 0; y < height; y++) {
		float* p_CosSig = CosSig[y];
		float* p_SineSig = SineSig[y];
		float* p_inputArr = inputArr[y];
		p_preCalulatedCosX = preCalulatedCosX;
		p_preCalulatedSinX = preCalulatedSinX;
		for (int x = 0; x < width; x++) {
			*p_CosSig++ = (*p_inputArr)*(*p_preCalulatedCosX++);
			*p_SineSig++ = (*p_inputArr++)*(*p_preCalulatedSinX++);
		}
	}


	//No problem
	for (int y = 0; y < height; y++)
	{

		float * p_w_cos_0 = w_cos, *p_w_cos_1 = w_cos, *p_w_cos_2 = w_cos, *p_w_cos_3 = w_cos;
		float * p_w_sin_0 = w_sin, *p_w_sin_1 = w_sin, *p_w_sin_2 = w_sin, *p_w_sin_3 = w_sin;

		float * p_CosSig = CosSig[y];
		float * p_SineSig = SineSig[y];
		////////////////////////// boundary exception//////////////////////////////////
		*p_w_cos_0++ = (*p_CosSig++);
		*p_w_sin_0++ = (*p_SineSig++);

		*p_w_cos_0++ = (*p_CosSig++) - (b1*(*p_w_cos_1++));
		*p_w_sin_0++ = (*p_SineSig++) - (b1*(*p_w_sin_1++));

		*p_w_cos_0++ = (*p_CosSig++) - (b1*(*p_w_cos_1++) + b2*(*p_w_cos_2++));
		*p_w_sin_0++ = (*p_SineSig++) - (b1*(*p_w_sin_1++) + b2*(*p_w_sin_2++));
		////////////////////////// boundary exception//////////////////////////////////
		for (int x = 3; x < width; x++)//forwarding//
		{



			*p_w_cos_0++ = (*p_CosSig++) -
				(b1*(*p_w_cos_1++)
				+ b2*(*p_w_cos_2++)
				+ b3*(*p_w_cos_3++));
			*p_w_sin_0++ = (*p_SineSig++) -
				(b1*(*p_w_sin_1++)
				+ b2*(*p_w_sin_2++)
				+ b3*(*p_w_sin_3++));
		}

		p_w_cos_0 = w_cos + width - 1;
		p_w_sin_0 = w_sin + width - 1;
		float * p_Out_Real_0 = Out_Real + width - 1, *p_Out_Real_1 = Out_Real + width - 1, *p_Out_Real_2 = Out_Real + width - 1, *p_Out_Real_3 = Out_Real + width - 1;
		float * p_Out_Imagin_0 = Out_Imagin + width - 1, *p_Out_Imagin_1 = Out_Imagin + width - 1, *p_Out_Imagin_2 = Out_Imagin + width - 1, *p_Out_Imagin_3 = Out_Imagin + width - 1;

		float * p_tempReal = tempReal[y] + width - 1, *p_tempImag = tempImag[y] + width - 1;
		////////////////////////// boundary exception//////////////////////////////////
		*p_Out_Real_0 = largeB * (*(p_w_cos_0--));
		*p_Out_Imagin_0 = largeB *(*(p_w_sin_0--));

		float c = coslookup[0];
		float s = sinlookup[0];
		*p_tempReal-- = { c*(*p_Out_Real_0)
			+ s*(*p_Out_Imagin_0) };
		*p_tempImag-- = { c*(*p_Out_Imagin_0--)
			- s*(*p_Out_Real_0--) };

		*p_Out_Real_0 = largeB *(*(p_w_cos_0--)) - (b1*(*p_Out_Real_1--));
		*p_Out_Imagin_0 = largeB *(*(p_w_sin_0--)) - (b1*(*p_Out_Imagin_1--));
		cc = ((int)(omegaX * 1 / coefficentOfNorm) % 1000);
		c = coslookup[cc];
		s = sinlookup[cc];
		*p_tempReal-- = { c*(*p_Out_Real_0)
			+ s*(*p_Out_Imagin_0) };
		*p_tempImag-- = { c*(*p_Out_Imagin_0--)
			- s*(*p_Out_Real_0--) };

		*p_Out_Real_0 = largeB *(*(p_w_cos_0--)) - (b1*(*p_Out_Real_1--) + b2*(*p_Out_Real_2--));
		*p_Out_Imagin_0 = largeB *(*(p_w_sin_0--)) - (b1*(*p_Out_Imagin_1--) + b2*(*p_Out_Imagin_2--));
		cc = ((int)(omegaX * 2 / coefficentOfNorm) % 1000);
		c = coslookup[cc];
		s = sinlookup[cc];
		*p_tempReal-- = { c*(*p_Out_Real_0)
			+ s*(*p_Out_Imagin_0) };
		*p_tempImag-- = { c*(*p_Out_Imagin_0--)
			- s*(*p_Out_Real_0--) };
		////////////////////////// boundary exception//////////////////////////////////
		for (int x = width - 1 - 3; x >= 0; x--)//backwarding
		{

			*p_Out_Real_0 = largeB*(*(p_w_cos_0--))
				- (b1*(*p_Out_Real_1--)
				+ b2*(*p_Out_Real_2--)
				+ b3*(*p_Out_Real_3--));
			*p_Out_Imagin_0 = largeB*(*(p_w_sin_0--))
				- (b1*(*p_Out_Imagin_1--)
				+ b2*(*p_Out_Imagin_2--)
				+ b3*(*p_Out_Imagin_3--));

			cc = ((int)(omegaX * x / coefficentOfNorm) % 1000);
			c = coslookup[cc];
			s = sinlookup[cc];
			*p_tempReal-- = { c*(*p_Out_Real_0)
				+ s*(*p_Out_Imagin_0) };
			*p_tempImag-- = { c*(*p_Out_Imagin_0--)
				- s*(*p_Out_Real_0--) };
		}

	}



	float* p_preCalulatedCosY = preCalulatedCosY, *p_preCalulatedSinY = preCalulatedSinY;
	for (int y = 0; y < height; y++)
	{
		cc = ((int)(omegaY*y / coefficentOfNorm) % 1000);
		*p_preCalulatedCosY++ = coslookup[cc];
		*p_preCalulatedSinY++ = sinlookup[cc];
	}

	for (int x = 0; x < width; x++) {
		float* p_CosRealSig = cosRealSig[x];
		float* p_SinRealSig = sinRealSig[x];
		float* p_CosImagSig = cosImagSig[x];
		float* p_SinImagSig = sinImagSig[x];
		p_preCalulatedCosY = preCalulatedCosY;
		p_preCalulatedSinY = preCalulatedSinY;
		for (int y = 0; y < height; y++) {
			*p_CosRealSig++ = tempReal[y][x] * (*p_preCalulatedCosY);
			*p_SinRealSig++ = tempReal[y][x] * (*p_preCalulatedSinY);
			*p_CosImagSig++ = tempImag[y][x] * (*p_preCalulatedCosY++);
			*p_SinImagSig++ = tempImag[y][x] * (*p_preCalulatedSinY++);
		}
	}



	float * p_w_cosReal_0;
	float * p_w_sinReal_0;
	float * p_w_cosImag_0;
	float * p_w_sinImag_0;
	for (int x = 0; x < width; x++)
	{
		float * p_cosRealSig = cosRealSig[x];
		float * p_sinRealSig = sinRealSig[x];
		float * p_cosImagSig = cosImagSig[x];
		float * p_sinImagSig = sinImagSig[x];
		p_w_cosReal_0 = w_cosReal;   p_w_sinReal_0 = w_sinReal;   p_w_cosImag_0 = w_cosImag;   p_w_sinImag_0 = w_sinImag;
		float * p_w_cosReal_1 = w_cosReal, *p_w_cosReal_2 = w_cosReal, *p_w_cosReal_3 = w_cosReal;
		float * p_w_sinReal_1 = w_sinReal, *p_w_sinReal_2 = w_sinReal, *p_w_sinReal_3 = w_sinReal;
		float * p_w_cosImag_1 = w_cosImag, *p_w_cosImag_2 = w_cosImag, *p_w_cosImag_3 = w_cosImag;
		float * p_w_sinImag_1 = w_sinImag, *p_w_sinImag_2 = w_sinImag, *p_w_sinImag_3 = w_sinImag;
		////////////////////////// boundary exception//////////////////////////////////
		*p_w_cosReal_0++ = (*p_cosRealSig++);
		*p_w_sinReal_0++ = (*p_sinRealSig++);
		*p_w_cosImag_0++ = (*p_cosImagSig++);
		*p_w_sinImag_0++ = (*p_sinImagSig++);

		*p_w_cosReal_0++ = (*p_cosRealSig++) - (b1 * (*p_w_cosReal_1++));
		*p_w_sinReal_0++ = (*p_sinRealSig++) - (b1 * (*p_w_sinReal_1++));
		*p_w_cosImag_0++ = (*p_cosImagSig++) - (b1 * (*p_w_cosImag_1++));
		*p_w_sinImag_0++ = (*p_sinImagSig++) - (b1 * (*p_w_sinImag_1++));

		*p_w_cosReal_0++ = (*p_cosRealSig++) - (b1 * (*p_w_cosReal_1++) + b2 * (*p_w_cosReal_2++));
		*p_w_sinReal_0++ = (*p_sinRealSig++) - (b1 * (*p_w_sinReal_1++) + b2 * (*p_w_sinReal_2++));
		*p_w_cosImag_0++ = (*p_cosImagSig++) - (b1 * (*p_w_cosImag_1++) + b2 * (*p_w_cosImag_2++));
		*p_w_sinImag_0++ = (*p_sinImagSig++) - (b1 * (*p_w_sinImag_1++) + b2 * (*p_w_sinImag_2++));
		////////////////////////// boundary exception//////////////////////////////////

		for (int y = 3; y < height; y++)
		{

			*p_w_cosReal_0++ = (*p_cosRealSig++) -
				(b1*(*p_w_cosReal_1++) +
				b2*(*p_w_cosReal_2++) +
				b3*(*p_w_cosReal_3++));
			*p_w_sinReal_0++ = (*p_sinRealSig++) -
				(b1*(*p_w_sinReal_1++) +
				b2*(*p_w_sinReal_2++) +
				b3*(*p_w_sinReal_3++));
			//have problem
			*p_w_cosImag_0++ = (*p_cosImagSig++) -
				(b1*(*p_w_cosImag_1++) +
				b2*(*p_w_cosImag_2++) +
				b3*(*p_w_cosImag_3++));
			*p_w_sinImag_0++ = (*p_sinImagSig++) -
				(b1*(*p_w_sinImag_1++) +
				b2*(*p_w_sinImag_2++) +
				b3*(*p_w_sinImag_3++));
		}
		p_w_cosReal_0 = w_cosReal + height - 1;
		p_w_sinReal_0 = w_sinReal + height - 1;
		p_w_cosImag_0 = w_cosImag + height - 1;
		p_w_sinImag_0 = w_sinImag + height - 1;
		float * p_out_cosReal_0 = out_cosReal + height - 1, *p_out_cosReal_1 = out_cosReal + height - 1, *p_out_cosReal_2 = out_cosReal + height - 1, *p_out_cosReal_3 = out_cosReal + height - 1;
		float * p_out_sinReal_0 = out_sinReal + height - 1, *p_out_sinReal_1 = out_sinReal + height - 1, *p_out_sinReal_2 = out_sinReal + height - 1, *p_out_sinReal_3 = out_sinReal + height - 1;
		float * p_out_cosImag_0 = out_cosImag + height - 1, *p_out_cosImag_1 = out_cosImag + height - 1, *p_out_cosImag_2 = out_cosImag + height - 1, *p_out_cosImag_3 = out_cosImag + height - 1;
		float * p_out_sinImag_0 = out_sinImag + height - 1, *p_out_sinImag_1 = out_sinImag + height - 1, *p_out_sinImag_2 = out_sinImag + height - 1, *p_out_sinImag_3 = out_sinImag + height - 1;

		float * r = Result_Real[x] + height - 1;
		float * i = Result_Imagin[x] + height - 1;
		float * sr = pairReal[x] + height - 1;
		float * si = pairImag[x] + height - 1;


		float * redundancyR;
		float * redundancyI;
		float * exceptR, *exceptI;
		////////////////////////// boundary exception//////////////////////////////////
		*p_out_cosReal_0 = largeB*(*p_w_cosReal_0--);
		*p_out_sinReal_0 = largeB*(*p_w_sinReal_0--);
		*p_out_cosImag_0 = largeB*(*p_w_cosImag_0--);
		*p_out_sinImag_0 = largeB*(*p_w_sinImag_0--);
		cc = ((int)(omegaY*(height - 1) / coefficentOfNorm) % 1000);
		float c = coslookup[cc];
		float s = sinlookup[cc];
		first = c*(*p_out_cosReal_0) +
			s*(*p_out_sinReal_0);
		second = c*(*p_out_sinImag_0) -
			s*(*p_out_cosImag_0);
		third = c*(*p_out_sinReal_0--) -
			s*(*p_out_cosReal_0--);
		forth = c*(*p_out_cosImag_0--) +
			s*(*p_out_sinImag_0--);
		*r-- = first - second;
		*i-- = third + forth;
		*sr-- = first + second;
		*si-- = third - forth;


		*p_out_cosReal_0 = largeB*(*p_w_cosReal_0--) - (b1 * (*p_out_cosReal_1--));
		*p_out_sinReal_0 = largeB*(*p_w_sinReal_0--) - (b1 * (*p_out_sinReal_1--));
		*p_out_cosImag_0 = largeB*(*p_w_cosImag_0--) - (b1 * (*p_out_cosImag_1--));
		*p_out_sinImag_0 = largeB*(*p_w_sinImag_0--) - (b1 * (*p_out_sinImag_1--));
		cc = ((int)(omegaY*(height - 2) / coefficentOfNorm) % 1000);
		c = coslookup[cc];
		s = sinlookup[cc];
		first = c*(*p_out_cosReal_0) +
			s*(*p_out_sinReal_0);
		second = c*(*p_out_sinImag_0) -
			s*(*p_out_cosImag_0);
		third = c*(*p_out_sinReal_0--) -
			s*(*p_out_cosReal_0--);
		forth = c*(*p_out_cosImag_0--) +
			s*(*p_out_sinImag_0--);
		*r-- = first - second;
		*i-- = third + forth;
		*sr-- = first + second;
		*si-- = third - forth;

		*p_out_cosReal_0 = largeB*(*p_w_cosReal_0--) - (b1 * (*p_out_cosReal_1--) + b2* (*p_out_cosReal_2--));
		*p_out_sinReal_0 = largeB*(*p_w_sinReal_0--) - (b1 * (*p_out_sinReal_1--) + b2* (*p_out_sinReal_2--));
		*p_out_cosImag_0 = largeB*(*p_w_cosImag_0--) - (b1 * (*p_out_cosImag_1--) + b2* (*p_out_cosImag_2--));
		*p_out_sinImag_0 = largeB*(*p_w_sinImag_0--) - (b1 * (*p_out_sinImag_1--) + b2* (*p_out_sinImag_2--));
		cc = ((int)(omegaY*(height - 3) / coefficentOfNorm) % 1000);
		c = coslookup[cc];
		s = sinlookup[cc];
		first = c*(*p_out_cosReal_0) +
			s*(*p_out_sinReal_0);
		second = c*(*p_out_sinImag_0) -
			s*(*p_out_cosImag_0);
		third = c*(*p_out_sinReal_0--) -
			s*(*p_out_cosReal_0--);
		forth = c*(*p_out_cosImag_0--) +
			s*(*p_out_sinImag_0--);
		*r-- = first - second;
		*i-- = third + forth;
		*sr-- = first + second;
		*si-- = third - forth;
		////////////////////////// boundary exception//////////////////////////////////


		for (int y = height - 1 - 3; y >= 0; y--)
		{

			*p_out_cosReal_0 = largeB*(*p_w_cosReal_0--) -
				(b1*(*p_out_cosReal_1--) +
				b2*(*p_out_cosReal_2--) +
				b3*(*p_out_cosReal_3--));
			*p_out_sinReal_0 = largeB*(*p_w_sinReal_0--) -
				(b1*(*p_out_sinReal_1--) +
				b2*(*p_out_sinReal_2--) +
				b3*(*p_out_sinReal_3--));
			*p_out_cosImag_0 = largeB*(*p_w_cosImag_0--) -
				(b1*(*p_out_cosImag_1--) +
				b2*(*p_out_cosImag_2--) +
				b3*(*p_out_cosImag_3--));
			*p_out_sinImag_0 = largeB*(*p_w_sinImag_0--) -
				(b1*(*p_out_sinImag_1--) +
				b2*(*p_out_sinImag_2--) +
				b3*(*p_out_sinImag_3--));



			cc = ((int)(omegaY*y / coefficentOfNorm) % 1000);
			c = coslookup[cc];
			s = sinlookup[cc];
			first = c*(*p_out_cosReal_0) +
				s*(*p_out_sinReal_0);
			second = c*(*p_out_sinImag_0) -
				s*(*p_out_cosImag_0);
			third = c*(*p_out_sinReal_0--) -
				s*(*p_out_cosReal_0--);
			forth = c*(*p_out_cosImag_0--) +
				s*(*p_out_sinImag_0--);
			*r-- = first - second;
			*i-- = third + forth;
			*sr-- = first + second;
			*si-- = third - forth;



		}


	}

	end = timeGetTime();
	sumOfTime += (end - begin);


	memFreeFloat1(Out_Real);
	memFreeFloat1(Out_Imagin);
	memFreeFloat1(w_cos);
	memFreeFloat1(w_sin);
	memFreeFloat2(tempReal);
	memFreeFloat2(tempImag);
	memFreeFloat2(CosSig);
	memFreeFloat2(SineSig);
	memFreeFloat2(cosRealSig);
	memFreeFloat2(sinRealSig);
	memFreeFloat2(cosImagSig);
	memFreeFloat2(sinImagSig);
	memFreeFloat2(inputArr);
	memFreeFloat1(w_sinImag);
	memFreeFloat1(w_cosReal);
	memFreeFloat1(w_sinReal);
	memFreeFloat1(w_cosImag);

	memFreeFloat1(out_cosReal);
	memFreeFloat1(out_cosImag);
	memFreeFloat1(out_sinImag);
	memFreeFloat1(out_sinReal);

}



float* memAllocFloat1(int x)
{
	int padding = 10;
	float *a;
	a = (float*)malloc(sizeof(float)*(x));
	//	if (a == NULL) { print("mem is too huge."); }
	return(a);
}
float *** memAllocFloat3(int x, int y, int z) {
	int padding = 10;
	float *a, **p, ***pp;
	int yz = y*z;
	int i, j;
	a = (float*)malloc(sizeof(float)*(x*yz));
	if (a == NULL) { printf("mem is too huge."); }
	p = (float**)malloc(sizeof(float*)*x*y);
	pp = (float***)malloc(sizeof(float**)*x);
	for (i = 0; i<x; i++)
		for (j = 0; j<y; j++)
			p[i*y + j] = &a[i*yz + j*z];
	for (i = 0; i<x; i++)
		pp[i] = &p[i*y];
	return(pp);
}

void memFreeFloat3(float ***p)
{
	if (p != NULL)
	{
		free(p[0][0]);
		free(p[0]);
		free(p);
		p = NULL;
	}
}

float** memAllocFloat2(int x, int y)
{
	int padding = 10;
	float *a, **p;
	a = (float*)malloc(sizeof(float)*(x*y));
	if (a == NULL) { printf("mem is too huge."); }
	p = (float**)malloc(sizeof(float*)*x);
	for (int i = 0; i<x; i++) p[i] = &a[i*y];
	return(p);
}

void memFreeFloat1(float *p)
{
	if (p != NULL)
	{
		free(p);
		p = NULL;
	}
}
void memFreeFloat2(float **p)
{
	if (p != NULL)
	{
		free(p[0]);
		free(p);
		p = NULL;
	}
}
float**** memAllocFloat4(int u, int v, int x, int y) {
	int padding = 10;
	float *a, **p, ***pp, ****ppp;
	int xy = x*y;
	int uv = u*v;
	int vx = v*x;
	int i, j, k;
	a = (float*)malloc(sizeof(float)*(uv*xy));
	if (a == NULL) { printf("mem is too huge."); }
	p = (float**)malloc(sizeof(float*)*uv*x);
	pp = (float***)malloc(sizeof(float**)*uv);
	ppp = (float****)malloc(sizeof(float***)*u);
	for (i = 0; i < u; i++)
		for (j = 0; j < v; j++)
			for (k = 0; k < x; k++)
				p[i*vx + j*x + k] = &a[i*v*xy + j*xy + k*y];
	for (i = 0; i < u; i++)
		for (j = 0; j < v; j++)
			pp[i*v + j] = &p[i*v*x + j*x];
	for (i = 0; i < u; i++)
		ppp[i] = &pp[i*v];
	return(ppp);

}
void memFreeFloat4(float ****p)
{
	if (p != NULL)
	{
		free(p[0][0][0]);
		free(p[0][0]);
		free(p[0]);
		free(p);
		p = NULL;
	}
}




