I am looking at the Nvidia SDK for the convolution FFT example (for large kernels), I know the theory behind fourier transforms and their FFT implementations (the basics at least), but I can't figure out what the following code does:
const int fftH = snapTransformSize(dataH + kernelH - 1);const int fftW = snapTransformSize(dataW + kernelW - 1);....//gpu initialization codeprintf("...creating R2C & C2R FFT plans for %i x %i\n", fftH, fftW); cuf ftSafeCall( cufftPlan2d(&fftPlanFwd, fftH, fftW, CUFFT_R2C) ); cufftSafeCall( cufftPlan2d(&fftPlanInv, fftH, fftW, CUFFT_C2R) ); printf("...uploading to GPU and padding convolution kernel and input data\n"); cutilSafeCall( cudaMemcpy(d_Kernel, h_Kernel, kernelH * kernelW * sizeof(float), cudaMemcpyHostToDevice) ); cutilSafeCall( cudaMemcpy(d_Data, h_Data, dataH * dataW * sizeof(float), cudaMemcpyHostToDevice) ); cutilSafeCall( cudaMemset(d_PaddedKernel, 0, fftH * fftW * sizeof(float)) ); cutilSafeCall( cudaMemset(d_PaddedData, 0, fftH * fftW * sizeof(float)) ); padKernel( d_PaddedKernel, d_Kernel, fftH, fftW, kernelH, kernelW, kernelY, kernelX ); padDataClampToBorder( d_PaddedData, d_Data, fftH, fftW, dataH, dataW, kernelH, kernelW, kernelY, kernelX );
I've never used CUFFT library before so I don't know what the snapTransformSize does
(here's the code)
int snapTransformSize(int dataSize){ int hiBit; unsigned int lowPOT, hiPOT; dataSize = iAlignUp(dataSize, 16); for(hiBit = 31; hiBit >= 0; hiBit--) if(dataSize & (1U << hiBit)) break; lowPOT = 1U << hiBit; if(lowPOT == dataSize) return dataSize; hiPOT = 1U << (hiBit + 1); if(hiPOT <= 1024) return hiPOT; else return iAlignUp(dataSize, 512);}
nor why the complex plane is such initialized.
Can you provide me explanation links or answers please?