#include <math.h>
#include "fft1d.h"
#include "fft2d.h"
#include "fft2d_decl.h"
#if defined(AUTO)
#include "fft2d_auto.cc"
#elif defined(AUTO2)
#include "fft2d_auto2.cc"
#elif defined(MAN2)
#include "fft2d_man2.cc"
#endif

void fft_col (Vector2Dcplx &A, Vector1Dcplx &W, Range2D &col, Range1D &r1, 
              Vector2Dcplx &B)
{
  int x, y, n = r1.l1();
  DataRa (Vector1Dcplx, F, r1);
  int mpe = getPE();

  CLOCK_BEGIN(CK_FC,mpe);
  for (y=col.s2; y<=col.e2; y++) {
    for (x=col.s1; x<=col.e1; x++) F(x) = A(x,y);
    fft (n, &F(r1.s1), &W(r1.s1));
    for (x=col.s1; x<=col.e1; x++) B(x,y) = F(x);
  }
  CLOCK_END(CK_FC,mpe);
}

void fft_row (Vector2Dcplx &A, Vector1Dcplx &W, Range2D &row, Range1D &r1, 
              Vector2Dcplx &B)
{
  int x, y, n = r1.l1();
  DataRa (Vector1Dcplx, F, r1);
  int mpe = getPE();

  CLOCK_BEGIN(CK_FR,mpe);
  for (x=row.s1; x<=row.e1; x++) {
    for (y=row.s2; y<=row.e2; y++) F(y) = A(x,y);
    fft (n, &F(r1.s1), &W(r1.s1));
    for (y=row.s2; y<=row.e2; y++) B(x,y) = F(y);
  }
  CLOCK_END(CK_FR,mpe);
}

void negate_im (Vector2Dcplx &A, Range2D &ra, 
               Vector2Dcplx &B)
{
  int x, y;
  for (x=ra.s1; x<=ra.e1; x++)
    for (y=ra.s2; y<=ra.e2; y++) {
      B(x,y).re = A(x,y).re;
      B(x,y).im = -A(x,y).im;
    } 
}

void pre_fft (Range1D &r1, 
              Vector1Dcplx &W)
{
  computeW (r1.l1(), &W(r1.s1));
}

void matrix_read (Config &cfg, Range2D &ra, 
		  Vector2Dcplx &A, Vector2Dcplx &R)
{
  int k, n, x, y;
  double w, pi;
  n = cfg.size;
  pi = 4 * atan(1); 
  k  = 1;
  w  = k*pi/n/n;

  for (x=ra.s1; x<=ra.e1; x++)
    for (y=ra.s2; y<=ra.e2; y++) {
      int a = x+y;
      // int a = x;
      R(x,y).re = A(x,y).re = cos(a*a*w);
      R(x,y).im = A(x,y).im = sin(a*a*w);
  }                    
}

void init (Config &cfg, 
           PartRange2D &row, PartRange2D &col, PartRange1D &r1n, File &f)
{
  int npe = cfg.nope, sz = cfg.size;

  makePart2D(row, sz, npe, sz, 1);
  makePart2D(col, sz, 1, sz, npe);
  for (int p=0; p<npe; p++) {
    r1n[p].set (0, sz-1); r1n.setNo(p,1);
  }

  f = stdout;
}

#define MAXERR 0.00000001
void check_equal (Vector2Dcplx &A, Vector2Dcplx &B, Range2D &ra)
{
  int i, j;
  for (i=ra.s1; i<=ra.e1; i++)
    for (j=ra.s2; j<=ra.e2; j++) {
      double dif = A(i,j).re - B(i,j).re;
      if (dif > MAXERR) {
  	fprintf(stderr,"inaccurate result for (%d,%d).re : %lf vs %lf\n",
		i, j, A(i,j).re, B(i,j).re);
	FAILURE(); return;
      }
      dif = A(i,j).im - B(i,j).im;
      if (dif > MAXERR) {
  	fprintf(stderr,"inaccurate result for (%d,%d).im : %lf vs %lf\n",
		i, j, A(i,j).im, B(i,j).im);
	FAILURE(); return;
      }
    }
  SUCCESS();
}

void econd (Config &cfg, Int &idx, 
            Int &cond)
{
  cond = idx+1 < cfg.noloop;
}

#include "fft1d.cc"

Config cfg = {20, 8, 4}; // loops, size, nope

void body (PreEnv *penv)
{
  Data (Env, env);
  env.pre = *penv;
  int pe=env.pre.pe;
  Config cf = cfg;

  PRINTP0("n=%d nope=%d lp=%d\n", cf.size, cf.nope, cf.noloop);
  if (cf.size==0) {
    BEFORE_ALL;

    cf.noloop = cf.noloop*16;
    // cf.clk = CK_LP1; cf.size =  32; test(cf, env);
    cf.clk = CK_LP2; cf.size =  64; test(cf, env);
    cf.clk = CK_LP3; cf.size = 128; test(cf, env);

    cf.noloop = cf.noloop/4;
    cf.clk = CK_LP4; cf.size = 256; test(cf, env);

    cf.noloop = cf.noloop/4;
    cf.clk = CK_LP5; cf.size = 512; test(cf, env);
    // cf.clk = CK_LP6; cf.size = 1024; test(cf, env);
  } else {
    cf.clk = CK_LOOP; test(cf, env);
  }
}


const char* ck_nm[] = { "fftc", "fftr", "fft", "loop", NCLK };
int ck_gr[]   = { GR_USR, GR_USR, GR_USR, GR_USR, GCLK };
int ck_fl[]   = { 1, 1, 1, 1, FCLK };

int main(int argc, char** argv)
{
  int c;
  dpcmArgs (argc, argv);
  while ((c = getopt (argc, argv, "l:n:p:")) != -1) {
    switch(c) {
    case 'l': cfg.noloop = atoi(optarg); break;
    case 'n': cfg.size = atoi(optarg); break;
    case 'p': cfg.nope = atoi(optarg); break;
    }}
  gr1_fl[GR_HPC] = 0; // gr1_fl[GR_LIB] = 0;

  dpcmInit (cfg.nope, MAXSWAPBUF, MAXMSGBUF,
	    0, NULL, NULL, CK_USR_OFF-CK_OFF, ck_nm, ck_gr, ck_fl);
  dpcmRun (body);
}

