/*
 * Test harness for exercising candidate DCT/IDCT implementations.
 *
 */
#include <time.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>

static const double PI=3.14159265358979323;

/***************************************************************************/

/* System dependencies: high-quality random numbers */

/* If your system supports a higher-quality random number generator,
   plug that in here */
void InitRandom() { srand(time(0)); }
int Random() { return rand(); }


/* Timing primitives:
 * timestamp() returns a TIME_T value which somehow indicates the current time
 * timeelapsed() accepts a TIME_T value and returns a double indicating
 *   the number of elapsed seconds
 */

/*
 * Define ONLY ONE of the following:
 */

#define BSD_TIMES (1)  /* 1 => This system supports BSD-style times() */
#define ANSI_TIMES (0) /* 1 => This system supports ANSI time()/difftime() */


/* BSD systems provide a times() call which provides CPU time usage
 * for the current process.  Historically, this was accurate to the
 * nearest 1/128 of a second.  Some newer systems may provide
 * millisecond resolution.
 */
#if BSD_TIMES
#include <sys/times.h>
#define CLOCK_TCK _BSD_CLOCKS_PER_SEC_
#define TIME_T double
TIME_T timestamp() {
  struct tms timeInfo;
  times(&timeInfo);
  return timeInfo.tms_utime/(double)CLOCK_TCK;
}
double timeelapsed(TIME_T last) {
  return timestamp()-last;
}
#endif

/* On most systems, ANSI-standard time()/difftime() functions
 * only provides timing to the nearest whole second.
 * If you use this, you may need to use very large iteration
 * counts (possibly hundreds of millions) to accurately
 * determine the speed of a function.
 */
#if ANSI_TIMES
#define TIME_T time_t
TIME_T timestamp() {
  return time(0);
}
double timeelapsed(TIME_T last) {
  return difftime(time(0),last);
}
#endif

/***************************************************************************/

/*
 * 1-d implemented directly from the formulas.
 * Very accurate, very slow.
 *
 * Modified to compute DCT scaled by sqrt(2)*4
 */
static void
dct1dReference(int *data) {
  double output[8] = {0};
  short x,n;
  for(x=0;x<8;x++) {
    for(n=0;n<8;n++)
      output[x] += data[n] * cos(PI * x * (2*n+1)/16.0);
  }
  for(x=0;x<8;x++) {
    output[x] /= 4.0;  /* Apply typical weighting to output */
    if(x==0) output[x] /= sqrt(2.0);

    output[x] *= 4.0 * sqrt(2);    /* Scale by sqrt(2)*4 */

    data[x] = floor(output[x] + 0.5); /* Round accurately */
  }
}

/***************************************************************************/

/*
 * From Figure 1 of Loeffler, Ligtenberg, and Moschytz.
 * ("Practical Fast 1-D DCT Algorithms with 11 Multiplications,"
 * Acoustics, Speech, and Signal Processing, 1989. ICASSP-89, 1989.
 * pp 988-991.)
 *
 * Note: Output is regular DCT scaled by sqrt(2)*4.
 *
 * The choice of 10-bit fixed-point constants here is arbitrary.
 * Using more bits gives better accuracy, but with increased risk
 * of overflow.  Note that with 10 bit accuracy, sqrt(2) is 1448/1024,
 * which is an exact multiple of 8.  Hence, 181/128 is just as accurate,
 * and reduces overflow.
 *
 * output[x] = sqrt(2)*((x==0)?sqrt(2):1) SUM(n=0..n-1) input[n] * cos(pi*x*(2n+1)/16)
 */
static void
dct1dTest(int *dctBlock) {
  static const int c1=1004 /*cos(pi/16)<<10*/, s1=200 /*sin(pi/16)<<10*/;
  static const int c3=851 /*cos(3pi/16)<<10*/, s3=569 /*sin(3pi/16)<<10*/;
  static const int r2c6=554 /*sqrt(2)*cos(6pi/16)<<10*/, r2s6=1337;
  static const int r2=181; /* sqrt(2)<<7 */
  int x0=dctBlock[0], x1=dctBlock[1], x2=dctBlock[2], x3=dctBlock[3],
    x4=dctBlock[4], x5=dctBlock[5], x6=dctBlock[6], x7=dctBlock[7];
  int x8;

  /* Stage 1 */
  x8=x7+x0; x0-=x7;  x7=x1+x6; x1-=x6;
  x6=x2+x5; x2-=x5;  x5=x3+x4; x3-=x4;

  /* Stage 2 */
  x4=x8+x5; x8-=x5;  x5=x7+x6; x7-=x6;
  x6=c1*(x1+x2); x2=(-s1-c1)*x2+x6; x1=(s1-c1)*x1+x6;
  x6=c3*(x0+x3); x3=(-s3-c3)*x3+x6; x0=(s3-c3)*x0+x6;

  /* Stage 3 */
  x6=x4+x5; x4-=x5;
  x5=r2c6*(x7+x8); x7=(-r2s6-r2c6)*x7+x5; x8=(r2s6-r2c6)*x8+x5;
  x5=x0+x2;x0-=x2; x2=x3+x1; x3-=x1;

  /* Stage 4, round, and output */
  dctBlock[0]=x6;  dctBlock[4]=x4;
  dctBlock[2]=(x8+512)>>10; dctBlock[6] = (x7+512)>>10;
  dctBlock[7]=(x2-x5+512)>>10; dctBlock[1]=(x2+x5+512)>>10;
  dctBlock[3]=(x3*r2+65536)>>17; dctBlock[5]=(x0*r2+65536)>>17;
}


/***************************************************************************/

/*
 * 1-d implemented directly from the formulas.
 * Very accurate, very slow.
 *
 * Modified to compute IDCT scaled by sqrt(2)*4
 */
static void
idct1dReference(int *data) {
  double output[8] = {0};
  short x,n;
  for(x=0;x<8;x++) {
    output[x]= data[0] / sqrt(2.0);
    for(n=1;n<8;n++)
      output[x] += data[n] * cos(PI * n * (2*x+1)/16.0);
  }
  for(x=0;x<8;x++) {
    output[x] /= 2.0;
    output[x] *= sqrt(2.0);
    data[x] = floor(output[x] + 0.5); /* Round accurately */
  }
}

/***************************************************************************/

/*
 * From Figure 1 of Loeffler, Ligtenberg, and Moschytz.
 * ("Practical Fast 1-D DCT Algorithms with 11 Multiplications,"
 * Acoustics, Speech, and Signal Processing, 1989. ICASSP-89, 1989.
 * pp 988-991.)
 *
 * Note: Output is regular IDCT scaled by sqrt(2)*4.
 *
 * output[x] = sqrt(2)*((x==0)?sqrt(2):1) SUM(n=0..n-1) input[n] * cos(pi*x*(2n+1)/16)
 */
static void
idct1dTest(int *dctBlock) {
  static const int c1=251 /*cos(pi/16)<<8*/, s1=50 /*sin(pi/16)<<8*/;
  static const int c3=213 /*cos(3pi/16)<<8*/, s3=142 /*sin(3pi/16)<<8*/;
  static const int r2c6=277 /*cos(6pi/16)*sqrt(2)<<9*/, r2s6=669;
  static const int r2=181; /* sqrt(2)<<7 */

  /* Stage 4 */
  int x0=dctBlock[0]<<9, x1=dctBlock[1]<<7, x2=dctBlock[2],
    x3=dctBlock[3]*r2, x4=dctBlock[4]<<9, x5=dctBlock[5]*r2,
    x6=dctBlock[6], x7=dctBlock[7]<<7;
  int x8=x7+x1; x1 -= x7;

  /* Stage 3 */
  x7=x0+x4; x0-=x4; x4=x1+x5; x1-=x5; x5=x3+x8; x8-=x3;
  x3=r2c6*(x2+x6);x6=x3+(-r2c6-r2s6)*x6;x2=x3+(-r2c6+r2s6)*x2;

  /* Stage 2 */
  x3=x7+x2; x7-=x2; x2=x0+x6; x0-= x6;
  x6=c3*(x4+x5);x5=(x6+(-c3-s3)*x5)>>6;x4=(x6+(-c3+s3)*x4)>>6;
  x6=c1*(x1+x8);x1=(x6+(-c1-s1)*x1)>>6;x8=(x6+(-c1+s1)*x8)>>6;

  /* Stage 1, rounding and output */
  x7+=512; x2+=512;x0+=512;x3+=512;
  dctBlock[0]=(x3+x4)>>10;  dctBlock[1]=(x2+x8)>>10;
  dctBlock[2]=(x0+x1)>>10;  dctBlock[3]=(x7+x5)>>10;
  dctBlock[4]=(x7-x5)>>10;  dctBlock[5]=(x0-x1)>>10;
  dctBlock[6]=(x2-x8)>>10;  dctBlock[7]=(x3-x4)>>10;
}

/***************************************************************************/

void test1dAccuracy(int maxIterations,
		    void (*testFunc)(int *),
		    char *testFuncName,
		    void (*referenceFunc)(int *),
		    char *referenceFuncName) {
  int input[8], reference[8], test[8];
  int iteration;
  int totalCoefficients=0; /* Total number of coefficients tested */
  int errorCoefficients[4]={0}; /* # coefficients out of range */
  double squareError=0; /* Total squared error over all coefficients */
  double maxSquareError=0; /* Largest squared error for any block */
  int i,j;
  printf("Testing Accuracy: %s (%d iterations, comparing to %s)\n",
	 testFuncName,maxIterations,referenceFuncName);

  for(iteration=0;iteration<maxIterations;iteration++) {

    double thisSquareError = 0.0;

    /* Build random input values in range -128...127 */
    for(i=0;i<8;i++) {
      int t = Random() & 0xff;
      if(t > 127) t-= 256;
      input[i] = t;
    }
    
    /* Compute reference version */
    memcpy(reference,input,sizeof(input));
    (*referenceFunc)(reference);
    
    /* Compute test version */
    memcpy(test,input,sizeof(input));
    (*testFunc)(test);
    
    /* Count number of errors exceeding one */
    totalCoefficients += 8;
    for(i=0;i<8;i++) {
      int err = test[i] - reference[i];
      double err2 = (double)err * (double)err;
      if(err < 0) err = -err;
      {
	int k;
	for(k=0;k<4;k++)
	  if(err > k) errorCoefficients[k]++;
      }
      squareError += err2;
      thisSquareError += err2;
    }
    if(thisSquareError > maxSquareError)
      maxSquareError = thisSquareError;
    if(thisSquareError > 100) {
      int i;
      printf("Bad Example: mean square error = %f\n",thisSquareError/8);
      printf("Input: ");  for(i=0;i<8;i++) printf("  %4d",input[i]);
      printf("\nRef:   ");  for(i=0;i<8;i++) printf("  %4d",reference[i]);
      printf("\nTest:  ");  for(i=0;i<8;i++) printf("  %4d",test[i]);
      printf("\n\n");
    }
  }

  {
    int k;
    printf("   Probability of error > 0: %g",
	   (double)errorCoefficients[0] / (double)totalCoefficients);
    for(k=1;k<4;k++)
      printf(",  > %d: %g",k,
	     (double)errorCoefficients[k] / (double)totalCoefficients);
    printf("\n");
  }
  printf("   Overall mean square error: %f\n", squareError/totalCoefficients);
  printf("   Maximum mean square error: %f\n", maxSquareError / 8);
}

/***************************************************************************/

/*
 * Since the Random() function might not be infinitely fast,
 * I choose one set of random values for every hundred calls
 * to the test function.  That way, my time measures the function being
 * tested, not the random number generator.
 */

static void
test1dSpeed(int maxIterations, void (*testFunc)(int *), char *funcName) {
  int i,j,iterations;
  static const int incr = 100;
  int input[8],work[8];
  TIME_T start = timestamp();

  printf("   %s: ",funcName); fflush(stdout);
  for(iterations = 0; iterations < maxIterations; iterations+=incr) {
    /* Build random input values in range -128...127 */
    for(i=0;i<8;i++) {
      int t = Random() & 0xff;
      if(t > 127) t-= 256;
      input[i] = t;
    }
    for(i=0;i<incr;i++) {
      memcpy(work,input,sizeof(input));
      (*testFunc)(work);
    }
  }
  printf("%f microseconds (based on %d iterations)\n",
	 timeelapsed(start)/maxIterations * 1000000, maxIterations);
}

/***************************************************************************/

int
main(int argc, char **argv) {
  int i;
  InitRandom();

  printf("Testing 8-Element 1-D Forward DCT Implementation\n\n");
  {
    test1dAccuracy(5000,dct1dTest,"dct1dTest",
		   dct1dReference,"dct1dReference");
    
    printf("Measuring Speed\n");
    test1dSpeed(1000,dct1dReference,"1d Reference");
    test1dSpeed(100000,dct1dTest,"1d Test");
  }
  printf("\n\nTesting 8-Element 1-D IDCT Implementation\n\n");
  {
    test1dAccuracy(5000,idct1dTest,"idct1dTest",
		   idct1dReference,"idct1dReference");
    
    printf("Measuring Speed\n");
    test1dSpeed(1000,idct1dReference,"1d Reference");
    test1dSpeed(100000,idct1dTest,"1d Test");
  }
  return 0;
}

