#include <stdio.h>
#include <strings.h>

/* so sue me */

int yuvscaler(unsigned char **in, unsigned char **out, long int xin, long int yin,  long int xout,  long  int yout, long int xoff, long int yoff,  long int width, long int height)
	
/*
 * yuvscaler with trivial case detection (but no real speed improvements
 * as of yet, it isn't a bottleneck piece of code anyways)
 *
 * this might ruin the crispness and clarity of your saxophone,
 * you have been warned
 *
 * distributed entirely free of warranty as part of the mpeg2_movie
 * derivate nuppelvideo under the terms of the gnu gpl version 2
 * as described at http://www.gnu.org/copyleft/, thus copying is
 * left an as exercise to the user
 * 
 * Changes:
 * 27/3/2001	initial version, linear scaling, black frames, 
 * 		trivial case detection
 * 
 * TODO:
 * this is bad code, it only works with yuv 411 right now 
 * make this asm/mmx rsn 
 *
 
theory (at least it works that way)

   n(x,y)=o(x0+width/xout*x,y0+height/yout*y) <- in prescaled image (decimal
   						coordinates)

   in integer coordinates:
   
   xo1=int(x0+width/xout*x)
   yo1=int(y0+width/yout*y)
   xo2=int(x0+width/xout*x+1)
   yo2=int(y0+width/yout*y+1)
   
   n(x,y)=(o(xo1,yo1)*(xo2-x)+o(xo2,yo1)*(x-xo1))*(yo2-y)+
          (o(xo1,yo2)*(xo2-x)+o(xo2,yo2)*(x-xo1))*(yo1-y)
   ... right?

   n(x,y)=(o(xo1,yo1)*(yo2-y)+o(xo1,yo2)*(y-yo1))*(xo2-y)+
          (o(xo2,yo1)*(yo2-y)+o(xo2,yo2)*(y-yo1))*(xo1-y)
   /or/

   ... 
   step by width/xout (->x table), width/yout?
 x0=xo0/y0=yo0 
0 . . .
.
.  xo1/yo1      xo2/yo1 . . .
.  |            |
   +----------xn/yn
   |            |
   xo1/yo2      xo2/yo2

   ^ it should be possible to buffer and recycle most of these
   results ...
 * 
 * so that was where the code started ...
 */

{
	long int x,y;
	unsigned char *y_in1=in[0], *u_in1=in[1],*v_in1=in[2];
	unsigned char *y_in2=in[0], *u_in2=in[1],*v_in2=in[2];
	unsigned char *y_out=out[0], *u_out=out[1],*v_out=out[2];
	long int t1, t2; /* tmp lin interpol values */
	long int xo1, yo1, xr12, yr12;
	static long int xoff12, yoff12, width12, height12;
	static long int *dx,*xr,*dy,*yr,*xr2,*yr2;
	static int trivial=0, init=0, bbox=0;
	static long int xin2, yin2, xout2, yout2, xoff2, yoff2;
	static long int bboxx1, bboxy1, bboxx2, bboxy2;
	static long int bboxx12, bboxy12, bboxx22, bboxy22;

  if (!init) {
	xoff12=xoff<<12;
	yoff12=yoff<<12;
	width12=width<<12;
	height12=height<<12;
	bbox=0;
	bboxx1=0;
	bboxy1=0;
	bboxx2=xout;
	bboxy2=yout;
	if (xoff<0) {
		bbox=1;
		bboxx1=(-xoff+1)*xout/width;
		bboxx12=bboxx1/2+1;
	}
	if (yoff<0) {
		bbox=1;
		bboxy1=(-yoff+1)*yout/height;
		bboxy12=bboxy1/2+1;
	}
	if ((width+xoff)>xin) {
		bbox=1;
		bboxx2=((xin-xoff))*xout/width;
	}
	if ((height+yoff)>yin) {
		bbox=1;
		bboxy2=((yin-yoff))*yout/height;
	}
	bboxx22=bboxx2/2;
	bboxy22=bboxy2/2;
	fprintf(stderr,"yuvscaler 1.0 by aoe scaling %ix%i to %ix%i (%i,%i)-(%i,%i) ",xin,yin,xout,yout,bboxx1,bboxy1,bboxx2,bboxy2);
	init=1;
	xin2=xin/2;
	yin2=yin/2;
	xout2=xout/2;
	yout2=yout/2;
	xoff2=xoff/2;
	yoff2=yoff/2;
	if ((xout == width) && (yout == height) && /* no resizing */
	    ((xout&0x1) == 0) && ((yout&1) == 0)) {/* color info unshifted */
	  trivial=1;
	  fprintf(stderr,"- trivial scaler\n");
	} else {
	  fprintf(stderr,"- linear scaler (colorshift/size)\n");
	  dx=(long int *)calloc(xout,sizeof( long int));
	  xr=(long int *)calloc(xout,sizeof( long int));
	  dy=(long int *)calloc(yout,sizeof( long int));
	  yr=(long int *)calloc(yout,sizeof( long int));
	  xr2=(long int *)calloc(xout,sizeof( long int));
	  yr2=(long int *)calloc(xout,sizeof( long int));
	  for (x=bboxx1; x < bboxx2; x++) {
		xr12=(width12*x)/xout+xoff12;
		xr[x]=xr12>>12;
		xo1=xr[x]<<12;
		dx[x]=(xr12-xo1)>>4;
	  };
	  for (y=bboxy1; y < bboxy2; y++) {
		yr12=(height12*y)/yout+yoff12;
		yr[y]=yr12>>12;
   		yo1=yr[y]<<12;
		dy[y]=(yr12-yo1)>>4;
	  }
	  for (x=bboxx12; x < bboxx22; x++) {
		xr12=(width12*x)/xout+xoff12/2;
		xr2[x]=xr12>>12;
	  };
	  for (y=bboxy12; y < bboxy22; y++) {
		yr12=(height12*y)/yout+yoff12/2;
		yr2[y]=yr12>>12;
	  }
        }
  }
  if (bbox) {
	memset(y_out,0,xout*yout);
	memset(u_out,127,xout*yout/4);
	memset(v_out,127,xout*yout/4);
  }
  if (!trivial) {
	for (y=bboxy1; y < bboxy2; y++) {
		y_out=out[0]+y*xout+bboxx1;
		for (x=bboxx1; x < bboxx2; x++) {
			y_in1=in[0]+yr[y]*xin+xr[x]; 
			y_in2=y_in1+xin;
			t1=*y_in2++*(dy[y])+*y_in1++*(256-dy[y]);
			t2=*y_in2*dy[y]+*y_in1*(256-dy[y]);
			*y_out++=((t2*dx[x]+t1*(256-dx[x]))>>16);
		}
	}
	for (y=bboxy12; y < bboxy22; y++) {
		u_out=out[1]+y*xout2+bboxx12;
		v_out=out[2]+y*xout2+bboxx12;
		for (x=bboxx12; x < bboxx22; x++) {
			*u_out++=*(in[1]+yr2[y]*xin2+xr2[x]);
			*v_out++=*(in[2]+yr2[y]*xin2+xr2[x]);
		}
	}
  } else { /* trivial */

	for (y=bboxy1; y < bboxy2; y++) {
			y_in1=in[0]+yr[y]*xin+xr[x]; 
		y_in1=in[0]+(y+yoff)*xin/*+xoff*/; 
		y_out=out[0]+y*xout+bboxx1;
		for (x=bboxx1; x < bboxx2; x++) {
			*y_out++=*y_in1++;
		}
	}
	for (y=bboxy12; y < bboxy22; y++) {
		u_in1=in[1]+(y+yoff2)*xin2/*+xoff2*/;
		v_in1=in[2]+(y+yoff2)*xin2/*+xoff2*/;
		u_out=out[1]+y*xout2+bboxx12;
		v_out=out[2]+y*xout2+bboxx12;
		for (x=bboxx12; x < bboxx22; x++) {
			*u_out++=*u_in1++;
			*v_out++=*v_in1++;
		}
	}
  }
	
  return 0; /* any ideas? calloc failures should be detected, ... */
}
			
