Dr. Dobb's Journal March 1999
dct1d4PtTest(int *dctBlock) {
static const int r2c3=554, r2s3=1337;
int x0=dctBlock[0], x1=dctBlock[1], x2=dctBlock[2], x3=dctBlock[3];
int x4=x0+x3; x0-=x3; x3=x1+x2; x1-=x2; /* Stage 1 */
x2=x4+x3; x4-=x3; /* Stage 2 */
x3=r2c3*(x1+x0)+512; x0=(-r2c3+r2s3)*x0+x3; x1=(-r2c3-r2s3)*x1+x3;
dctBlock[0] = x2; dctBlock[2] = x4; /* Round and output */
dctBlock[1] = x0>>10; dctBlock[3] = x1>>10;
}