/*
 * Test harness for exercising candidate DCT/IDCT implementations.
 *
 */
#include <time.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>

static const double PI=3.14159265358979323;

/***************************************************************************/

/* System dependencies: high-quality random numbers */

/* If your system supports a higher-quality random number generator,
   plug that in here */
void InitRandom() { srand(time(0)); }
int Random() { return rand(); }


/* Timing primitives:
 * timestamp() returns a TIME_T value which somehow indicates the current time
 * timeelapsed() accepts a TIME_T value and returns a double indicating
 *   the number of elapsed seconds
 */

/*
 * Define ONLY ONE of the following:
 */

#define BSD_TIMES (1)  /* 1 => This system supports BSD-style times() */
#define ANSI_TIMES (0) /* 1 => This system supports ANSI time()/difftime() */


/* BSD systems provide a times() call which provides CPU time usage
 * for the current process.  Historically, this was accurate to the
 * nearest 1/128 of a second.  Some newer systems may provide
 * millisecond resolution.
 */
#if BSD_TIMES
#include <sys/times.h>
#define CLOCK_TCK _BSD_CLOCKS_PER_SEC_
#define TIME_T double
TIME_T timestamp() {
  struct tms timeInfo;
  times(&timeInfo);
  return timeInfo.tms_utime/(double)CLOCK_TCK;
}
double timeelapsed(TIME_T last) {
  return timestamp()-last;
}
#endif

/* On most systems, ANSI-standard time()/difftime() functions
 * only provides timing to the nearest whole second.
 * If you use this, you may need to use very large iteration
 * counts (possibly hundreds of millions) to accurately
 * determine the speed of a function.
 */
#if ANSI_TIMES
#define TIME_T time_t
TIME_T timestamp() {
  return time(0);
}
double timeelapsed(TIME_T last) {
  return difftime(time(0),last);
}
#endif

/***************************************************************************/

/*
 * 1-d implemented directly from the formulas.
 * Very accurate, very slow.
 *
 * Modified to compute DCT scaled by sqrt(2)*4
 */
static void
dct1d4PtReference(int *data) {
  double output[4] = {0};
  short x,n;
  for(x=0;x<4;x++) {
    for(n=0;n<4;n++)
      output[x] += data[n] * cos(PI * x * (2*n+1)/8.0);
  }
  for(x=0;x<4;x++) {
    output[x] /= 2.0;  /* Apply typical weighting to output */
    if(x==0) output[x] /= sqrt(2.0);

    output[x] *= sqrt(2)*2; /* Scale output to match test DCT */

    data[x] = floor(output[x] + 0.5); /* Round accurately */
  }
}

/***************************************************************************/

static void
dct1d4PtTest(int *dctBlock) {
  static const int r2c3=554 /*sqrt(2)*cos(3pi/8)<<10*/, r2s3=1337;
  static const int r2=181; /* sqrt(2)<<7 */
  int x0=dctBlock[0], x1=dctBlock[1], x2=dctBlock[2], x3=dctBlock[3];
  int x4;

  /* Stage 1 */
  x4=x0+x3;
  x0-=x3;
  x3=x1+x2;
  x1-=x2;

  /* Stage 2 */
  x2=x4+x3;
  x4-=x3;
  x3=r2c3*(x1+x0);
  x0=(-r2c3+r2s3)*x0+x3;
  x1=(-r2c3-r2s3)*x1+x3;

  /* Round and output */
  dctBlock[0] = x2;
  dctBlock[2] = x4;
  dctBlock[1] = (x0+512)>>10;
  dctBlock[3] = (x1+512)>>10;
}

/***************************************************************************/

/*
 * 1-d implemented directly from the formulas.
 * Very accurate, very slow.
 */
static void
idct1d4PtReference(int *data) {
  double output[4] = {0};
  short x,n;
  for(x=0;x<4;x++) {
    output[x]= data[0] / sqrt(2.0);
    for(n=1;n<4;n++)
      output[x] += data[n] * cos(PI * n * (2*x+1)/8.0);
  }
  for(x=0;x<4;x++) {
    output[x] /= 2.0;
    output[x] *= sqrt(2.0)*2.0; /* Scale to match test */
    data[x] = floor(output[x] + 0.5); /* Round accurately */
  }
}

/***************************************************************************/

static void
idct1d4PtTest(int *dctBlock) {
  static const int r2c1=1338; /* sqrt(2)*cos(pi/8) <<10 */
  static const int r2s1=554; /* sqrt(2)*sin(pi/8) <<10 */

  /* Stage 2 */
  int x0=dctBlock[0]<<10, x1=dctBlock[1], x2=dctBlock[2]<<10,
    x3=dctBlock[3], x4;
  x4=x0+x2; x0-=x2;
  x2=(x1+x3)*r2s1; x3=x2+(-r2c1-r2s1)*x3; x1=x2+(r2c1-r2s1)*x1;

  /* Stage 1, Round and output */
  x4+=512;x0+=512;
  dctBlock[0] = (x4+x1)>>10;
  dctBlock[1] = (x0+x3)>>10;
  dctBlock[2] = (x0-x3)>>10;
  dctBlock[3] = (x4-x1)>>10;
}

/***************************************************************************/

void test1d4PtAccuracy(int maxIterations,
		       void (*testFunc)(int *),
		       char *testFuncName,
		       void (*referenceFunc)(int *),
		       char *referenceFuncName) {
  int input[4], reference[4], test[4];
  int iteration;
  int totalCoefficients=0; /* Total number of coefficients tested */
  int errorCoefficients[4]={0}; /* # coefficients out of range */
  double squareError=0; /* Total squared error over all coefficients */
  double maxSquareError=0; /* Largest squared error for any block */
  int i,j;
  printf("Testing Accuracy: %s (%d iterations, comparing to %s)\n",
	 testFuncName,maxIterations,referenceFuncName);

  for(iteration=0;iteration<maxIterations;iteration++) {

    double thisSquareError = 0.0;

    /* Build random input values in range -128...127 */
    for(i=0;i<4;i++) {
      int t = Random() & 0xff;
      if(t > 127) t-= 256;
      input[i] = t;
    }
    
    /* Compute reference version */
    memcpy(reference,input,sizeof(input));
    (*referenceFunc)(reference);
    
    /* Compute test version */
    memcpy(test,input,sizeof(input));
    (*testFunc)(test);
    
    /* Count number of errors exceeding one */
    totalCoefficients += 4;
    for(i=0;i<4;i++) {
      int err = test[i] - reference[i];
      double err2 = (double)err * (double)err;
      if(err < 0) err = -err;
      {
	int k;
	for(k=0;k<4;k++)
	  if(err > k) errorCoefficients[k]++;
      }
      squareError += err2;
      thisSquareError += err2;
    }
    if(thisSquareError > maxSquareError)
      maxSquareError = thisSquareError;
    if(thisSquareError > 100) {
      int i;
      printf("Bad Example: mean square error = %f\n",thisSquareError/4);
      printf("Input: ");  for(i=0;i<4;i++) printf("  %4d",input[i]);
      printf("\nRef:   ");  for(i=0;i<4;i++) printf("  %4d",reference[i]);
      printf("\nTest:  ");  for(i=0;i<4;i++) printf("  %4d",test[i]);
      printf("\n\n");
    }
  }

  {
    int k;
    printf("   Probability of error > 0: %g",
	   (double)errorCoefficients[0] / (double)totalCoefficients);
    for(k=1;k<4;k++)
      printf(",  > %d: %g",k,
	     (double)errorCoefficients[k] / (double)totalCoefficients);
    printf("\n");
  }
  printf("   Overall mean square error: %f\n", squareError/totalCoefficients);
  printf("   Maximum mean square error: %f\n", maxSquareError / 4);
}

/***************************************************************************/

/*
 * Since the Random() function might not be infinitely fast,
 * I choose one set of random values for every hundred calls
 * to the test function.  That way, my time measures the function being
 * tested, not the random number generator.
 */

static void
test1d4PtSpeed(int maxIterations, void (*testFunc)(int *), char *funcName) {
  int i,j,iterations;
  static const int incr = 100;
  int input[4],work[4];
  TIME_T start = timestamp();

  printf("   %s: ",funcName); fflush(stdout);
  for(iterations = 0; iterations < maxIterations; iterations+=incr) {
    /* Build random input values in range -128...127 */
    for(i=0;i<4;i++) {
      int t = Random() & 0xff;
      if(t > 127) t-= 256;
      input[i] = t;
    }
    for(i=0;i<incr;i++) {
      memcpy(work,input,sizeof(input));
      (*testFunc)(work);
    }
  }
  printf("%f microseconds (based on %d iterations)\n",
	 timeelapsed(start)/maxIterations * 1000000, maxIterations);
}

/***************************************************************************/

int
main(int argc, char **argv) {
  int i;
  InitRandom();

  printf("Testing 4-Element 1-D Forward DCT Implementation\n\n");
  {
    test1d4PtAccuracy(50000,dct1d4PtTest,"dct1d4PtTest",
		   dct1d4PtReference,"dct1d4PtReference");
    
    printf("Measuring Speed\n");
    test1d4PtSpeed(100000,dct1d4PtReference,"dct1d4PtReference");
    test1d4PtSpeed(10000000,dct1d4PtTest,"dct1d4PtTest");
  }
  printf("\n\nTesting 4-Element 1-D IDCT Implementation\n\n");
  {
    test1d4PtAccuracy(50000,idct1d4PtTest,"idct1d4PtTest",
		   idct1d4PtReference,"idct1d4PtReference");
    
    printf("Measuring Speed\n");
    test1d4PtSpeed(100000,idct1d4PtReference,"idct1d4PtReference");
    test1d4PtSpeed(10000000,idct1d4PtTest,"idct1d4PtTest");
  }
  return 0;
}

