--- spudec.h	 (revision 22772)
+++ spudec.h	 (working copy)
@@ -5,8 +5,38 @@
 
 void spudec_heartbeat(void *this, unsigned int pts100);
 void spudec_assemble(void *this, unsigned char *packet, unsigned int len, unsigned int pts100);
-void spudec_draw(void *this, void (*draw_alpha)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride));
-void spudec_draw_scaled(void *this, unsigned int dxs, unsigned int dys, void (*draw_alpha)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride));
+void spudec_draw(void *this, void (*draw_alpha)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride));
+void spudec_draw_scaled(void *this, unsigned int dxs, unsigned int dys, void (*draw_alpha)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride));
+
+#define DVDNAV_SPU_YUV	1
+#define DVDNAV_SPU_RGB	2
+#define DVDNAV_SPU_BGR	3
+#define DVDNAV_SPU_YUY	4
+
+// Convert yuv color to rgb color
+void spu_yuv_to_rgb(unsigned int y,unsigned int u,unsigned int v,
+    unsigned int *r,unsigned int *g,unsigned int *b);
+// Enable/disable dvdmenu mode, and set color mode
+//	cflg = 0:	Y SPU (default)
+//	cflg = 1:	YUV SPU
+//	cflg = 2:	RGB SPU
+//	cflg = 3:	BGR SPU
+//	cflg = 4:	YUY SPU
+void spudec_dvdnav_mode(void *this, int mode, int cflg);
+// Set dvd menu button draw area and palette
+void spudec_dvdnav_area(void *this, uint16_t sx, uint16_t sy,
+    uint16_t ex, uint16_t ey, uint32_t palette);
+// Set dvd menu button palette
+void spudec_dvdnav_palette(void *this, uint32_t palette);
+// Draw scaled image in YUV and YUY mode
+void spudec_draw_scaled_yuv(void *me, unsigned int dxs, unsigned int dys,
+    void (*draw_alpha)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride));
+// Draw scaled image in RGB and BGR mode
+void spudec_draw_scaled_rgb(void *me, unsigned int dxs, unsigned int dys,
+    void (*draw_alpha)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride));
+// Convert Yuv image to YuY image
+void spudec_create_yuy(void *this, int spu_scaled);
+
 void spudec_update_palette(void *this, unsigned int *palette);
 void *spudec_new_scaled(unsigned int *palette, unsigned int frame_width, unsigned int frame_height);
 void *spudec_new_scaled_vobsub(unsigned int *palette, unsigned int *cuspal, unsigned int custom, unsigned int frame_width, unsigned int frame_height);
@@ -18,7 +48,7 @@
 void spudec_set_hw_spu(void *this, vo_functions_t *hw_spu);
 int spudec_changed(void *this);
 void spudec_calc_bbox(void *me, unsigned int dxs, unsigned int dys, unsigned int* bbox);
-void spudec_draw_scaled(void *me, unsigned int dxs, unsigned int dys, void (*draw_alpha)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride));
+void spudec_draw_scaled(void *me, unsigned int dxs, unsigned int dys, void (*draw_alpha)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride));
 void spudec_set_forced_subs_only(void * const this, const unsigned int flag);
 #endif
 
--- spudec.c	 (revision 22772)
+++ spudec.c	 (working copy)
@@ -94,6 +94,52 @@
   int spu_changed;
   unsigned int forced_subs_only;     /* flag: 0=display all subtitle, !0 display only forced subtitles */
   unsigned int is_forced_sub;         /* true if current subtitle is a forced subtitle */
+
+  packet_t *last_packet;
+  unsigned int widthuv, heightuv, strideuv;
+  unsigned int start_coluv, end_coluv;
+  unsigned int start_rowuv, end_rowuv;
+  size_t image_sizeuv;
+  size_t scaled_image_sizeuv;
+  size_t image_sizeyuy;
+
+  unsigned int scaled_frame_widthuv, scaled_frame_heightuv;
+  unsigned int scaled_start_coluv, scaled_start_rowuv;
+  unsigned int scaled_widthuv, scaled_heightuv, scaled_strideuv;
+  unsigned char *scaled_imageu;
+  unsigned char *scaled_imagev;
+  unsigned char *scaled_aimageuv;
+
+  unsigned int hpalette[4];
+  unsigned int halpha[4];
+  unsigned int hcuspal[4];
+
+  unsigned char *imageu;		/* u value from yUv */
+  unsigned char *imagev;		/* v value from yuV*/
+  unsigned char *aimageuv;		/* alpha with uv*/
+  unsigned char *imageyuy;		/* yuy2 */
+  unsigned char *aimageyuy;		/* alpha with yuy2*/
+  unsigned int strideyuy;
+
+  int dvdnav_color_spu;		/* flag: 0 = grayscale SPU, 1 = YUV color SPU 2 = RGB 3 = BGR */
+
+  int dvdnav_menu;		/* flag: 0=normal subtitle, 1=dvdnav menu */
+  unsigned int dvdnav_sx;	/* dvdnav menu item box */
+  unsigned int dvdnav_ex;
+  unsigned int dvdnav_sy;
+  unsigned int dvdnav_ey;
+  unsigned int dvdnav_modify;	/* dvdnav menu item box is modify */
+  uint32_t     dvdnav_palette;	/* dvdnav menu button palette */
+  unsigned int dvdnav_x0;	/* dvdnav menu item draw_alpha coordinates */
+  unsigned int dvdnav_y0;
+  unsigned int dvdnav_w;
+  unsigned int dvdnav_h;
+  unsigned char *dvdnav_image;	/* dvdnav menu item image */
+  unsigned char *dvdnav_aimage;	/* dvdnav menu item alpha */
+  unsigned int dvdnav_stride;
+  unsigned int dvdnav_allocated;
+  unsigned int dvdnav_scalex;
+  unsigned int dvdnav_scaley;
 } spudec_handle_t;
 
 static void spudec_queue_packet(spudec_handle_t *this, packet_t *packet)
@@ -213,11 +259,147 @@
   } else {
     mp_msg(MSGT_SPUDEC, MSGL_FATAL, "Fatal: update_spu: malloc requested %d bytes\n", 2 * this->stride * this->height);
   }
+//
+// Cut the sub to visible part UV planes
+//
+  unsigned char *imageu;
+  unsigned char *imagev;
+  switch (this->dvdnav_color_spu) {
+    case DVDNAV_SPU_YUV:
+    case DVDNAV_SPU_YUY:
+      for (fy = 0; fy < this->image_sizeuv && !this->aimageuv[fy]; fy++);
+      for (ly = this->strideuv * this->heightuv-1;
+        ly && !this->aimageuv[ly]; ly--);
+      first_y = fy / this->strideuv;
+      last_y = ly / this->strideuv;
+      this->start_rowuv += first_y;
+      // Some subtitles trigger this condition
+      if (last_y + 1 > first_y ) {
+	  this->heightuv = last_y - first_y +1;
+        } else {
+	  this->heightuv = 0;
+	  this->image_sizeuv = 0;
+	  return;
+        }
+      //  printf("new h %d new start %d (sz %d st %d)---\n\n", this->height, this->start_row, this->image_size, this->stride);
+      imageu = malloc(3 * this->strideuv * this->heightuv);
+      if(imageu){
+        this->image_sizeuv = this->strideuv * this->heightuv;
+        imagev = imageu + this->image_sizeuv;
+        aimage = imagev + this->image_sizeuv;
+        memcpy(imageu, this->imageu + this->strideuv * first_y,
+	    this->image_sizeuv);
+        memcpy(imagev, this->imagev + this->strideuv * first_y,
+	    this->image_sizeuv);
+        memcpy(aimage, this->aimageuv + this->strideuv * first_y,
+	    this->image_sizeuv);
+        free(this->imageu);
+        this->imageu = imageu;
+        this->imagev = imagev;
+        this->aimageuv = aimage;
+        } else {
+          mp_msg(MSGT_SPUDEC, MSGL_FATAL,
+	    "Fatal: update_spu: malloc requested %d bytes\n",
+	    3 * this->strideuv * this->height);
+        }
+      break;
+    case DVDNAV_SPU_RGB:
+    case DVDNAV_SPU_BGR:
+      this->image_sizeuv = this->stride * this->height;
+      imageu = malloc(2 * this->stride * this->height);
+      if(imageu){
+        imagev = imageu + this->image_size;
+        memcpy(imageu, this->imageu + this->stride * first_y, this->image_size);
+        memcpy(imagev, this->imagev + this->stride * first_y, this->image_size);
+        free(this->imageu);
+        this->imageu = imageu;
+        this->imagev = imagev;
+        this->aimageuv = NULL;
+      } else {
+        mp_msg(MSGT_SPUDEC, MSGL_FATAL,
+	    "Fatal: update_spu: malloc requested %d bytes\n",
+	    2 * this->stride * this->height);
+      }
+      break;
+    }
+}
+
+//
+// Convert yuv color to rgb color
+//
+void spu_yuv_to_rgb(unsigned int y,unsigned int u,unsigned int v,
+    unsigned int *r,unsigned int *g,unsigned int *b)
+{
+int ty,tu,tv;
+int tr,tg,tb;
+ty=y;tv=u;tu=v;
+tr = (298*(ty-16)+408*(tv-128))/256;
+tg = (298*(ty-16)-100*(tu-128)-208*(tv-128))/256;
+tb = (298*(ty-16)+516*(tu-128))/256;
+if(tr>255) tr=255; if(tr<0) tr=0;
+if(tg>255) tg=255; if(tg<0) tg=0;
+if(tb>255) tb=255; if(tb<0) tb=0;
+*r=tr; *g=tg; *b=tb;
+return;
+}
+
+//
+// Fill to spu image buffer
+//	y : image col
+//	x : start pos in image row
+//	len : fill length in image row
+//	color : Y: (YUV,YUY,Y), Red: (RGB) or Blue: (BGR)
+//	coloru: U: (YUV,YUY), Green: (RGB,BGR)
+//	colorv: V: (YUV,YUY), Blue: (RGB) or Red (BGR)
+//	alpha: alpha channel
+static void spudec_process_fill(spudec_handle_t *this, int x, int y, int len,
+    unsigned char color, unsigned char coloru, unsigned char colorv,
+    unsigned char alpha)
+{
+unsigned int corrx, corry, corrl;
+if (this->stride-x-len<0) return;
+if (len<0) return;
+switch (this->dvdnav_color_spu)
+  {
+  case DVDNAV_SPU_YUV:
+  case DVDNAV_SPU_YUY:
+    corry=y & 0x01;
+    corrx=x & 0x01;
+    corrl=len & 0x01;
+    memset(this->image + y * this->stride + x, color, len);
+    memset(this->aimage + y * this->stride + x, alpha, len);
+    memset(this->imageu + (y-corry)/2 * this->strideuv + (x+corrx)/2, coloru,
+	(len-corrl)/2);
+    memset(this->imagev + (y-corry)/2 * this->strideuv + (x+corrx)/2, colorv,
+	(len-corrl)/2);
+    memset(this->aimageuv + (y-corry)/2 * this->strideuv + (x+corrx)/2, alpha,
+	(len-corrl)/2);
+    break;
+  case DVDNAV_SPU_RGB:
+  case DVDNAV_SPU_BGR:
+    memset(this->image + y * this->stride + x, color, len);
+    memset(this->imageu + y * this->stride + x, coloru, len);
+    memset(this->imagev + y * this->stride + x, colorv, len);
+    memset(this->aimage + y * this->stride + x, alpha, len);
+    break;
+  default:
+    memset(this->image + y * this->stride + x, color, len);
+    memset(this->aimage + y * this->stride + x, alpha, len);
+    break;
+}
 }
 
 static void spudec_process_data(spudec_handle_t *this, packet_t *packet)
 {
   unsigned int cmap[4], alpha[4];
+  unsigned int thpalette[4], thalpha[4];	/* dvdnav highlight menu palette */
+  unsigned int hcmap[4], halpha[4];		/* dvdnav highlight map */
+  unsigned int cmapu[4], cmapv[4];
+  unsigned int hcmapu[4], hcmapv[4];
+  unsigned int control_start;
+  unsigned int current_nibble[2];
+  unsigned int ty,tu,tv,tr,tg,tb;
+  int deinterlace_oddness;
   unsigned int i, x, y;
 
   this->scaled_frame_width = 0;
@@ -229,6 +411,18 @@
   this->height = packet->height;
   this->width = packet->width;
   this->stride = packet->stride;
+  this->strideuv = packet->stride;
+  control_start = packet->control_start;
+  current_nibble[0]=packet->current_nibble[0];
+  current_nibble[1]=packet->current_nibble[1];
+  deinterlace_oddness=packet->deinterlace_oddness;
+
+  this->start_coluv = packet->start_col/2;
+  this->end_coluv = packet->end_col/2;
+  this->start_rowuv = packet->start_row/2;
+  this->end_rowuv = packet->end_row/2;
+  this->heightuv = packet->height/2+1;
+  this->widthuv = packet->width/2+1;
   for (i = 0; i < 4; ++i) {
     alpha[i] = mkalpha(packet->alpha[i]);
     if (alpha[i] == 0)
@@ -244,7 +438,92 @@
 	cmap[i] = 256 - alpha[i];
     }
   }
-
+  if (this->dvdnav_menu) {
+    for (i = 0; i < 4; ++i) {	/* use button palette */
+      thalpha[i]=(this->dvdnav_palette >> ((3-i)*4)) & 0x0f;
+      thpalette[i]=(this->dvdnav_palette >> (16+(3-i)*4)) & 0x0f;
+      halpha[i] = mkalpha(thalpha[i]);
+      hcmap[i] = ((this->global_palette[thpalette[i]] >> 16) & 0xff);
+      if (alpha[i] == 0) {cmap[i] = 0; cmapu[i] = 0; cmapv[i] = 0;} else {
+        if (cmap[i] + alpha[i] > 255)
+	  cmap[i] = 256 - alpha[i];
+	  switch (this->dvdnav_color_spu) {
+	    case DVDNAV_SPU_YUV:
+	    case DVDNAV_SPU_YUY:
+	      cmap[i] = ((this->global_palette[packet->palette[i]] >> 16) & 0xff);	// Y
+	      cmap[i] = ((0x100-alpha[i])*cmap[i]) >> 8;
+	      cmapu[i] = ((this->global_palette[packet->palette[i]] >> 8) & 0xff);	// u
+	      cmapu[i] = ((0x100-alpha[i])*cmapu[i]) >> 8;
+	      cmapv[i] = ((this->global_palette[packet->palette[i]] >> 0) & 0xff);	// v
+	      cmapv[i] = ((0x100-alpha[i])*cmapv[i]) >> 8;
+	      break;
+	    case DVDNAV_SPU_RGB:
+	      ty = ((this->global_palette[packet->palette[i]] >> 16) & 0xff);	// Y
+              tu = ((this->global_palette[packet->palette[i]] >> 8) & 0xff);	// u
+              tv = ((this->global_palette[packet->palette[i]] >> 0) & 0xff);	// v
+	      spu_yuv_to_rgb(ty,tu,tv,&tr,&tg,&tb);
+	      cmap[i] = tr;							// Red
+	      cmapu[i] = tg;							// Green
+	      cmapv[i] = tb;							// Blue
+	      cmap[i] = ((0x100-alpha[i])*cmap[i]) >> 8;
+	      cmapu[i] = ((0x100-alpha[i])*cmapu[i]) >> 8;
+	      cmapv[i] = ((0x100-alpha[i])*cmapv[i]) >> 8;
+	      break;
+	    case DVDNAV_SPU_BGR:
+	      ty = ((this->global_palette[packet->palette[i]] >> 16) & 0xff);	// Y
+              tu = ((this->global_palette[packet->palette[i]] >> 8) & 0xff);	// u
+              tv = ((this->global_palette[packet->palette[i]] >> 0) & 0xff);	// v
+	      spu_yuv_to_rgb(ty,tu,tv,&tr,&tg,&tb);
+	      cmap[i] = tb;							// Blue
+	      cmapu[i] = tg;							// Green
+	      cmapv[i] = tr;							// Red
+	      cmap[i] = ((0x100-alpha[i])*cmap[i]) >> 8;
+	      cmapu[i] = ((0x100-alpha[i])*cmapu[i]) >> 8;
+	      cmapv[i] = ((0x100-alpha[i])*cmapv[i]) >> 8;
+	      break;
+	    }
+	  }
+      if (halpha[i] == 0) {hcmap[i] = 0; hcmapu[i] = 0; hcmapv[i] = 0;} else {
+        if (hcmap[i] + halpha[i] > 255)
+	  hcmap[i] = 256 - halpha[i];
+	  switch (this->dvdnav_color_spu) {
+	    case DVDNAV_SPU_YUV:
+	    case DVDNAV_SPU_YUY:
+              hcmap[i] = ((this->global_palette[thpalette[i]] >> 16) & 0xff);	// Y
+	      hcmap[i] = ((0x100-halpha[i])*hcmap[i]) >> 8;
+              hcmapu[i] = ((this->global_palette[thpalette[i]] >> 8) & 0xff);	// u
+	      hcmapu[i] = ((0x100-halpha[i])*hcmapu[i]) >> 8;
+              hcmapv[i] = ((this->global_palette[thpalette[i]] >> 0) & 0xff);	// v
+	      hcmapv[i] = ((0x100-halpha[i])*hcmapv[i]) >> 8;
+	      break;
+	    case DVDNAV_SPU_RGB:
+              ty = ((this->global_palette[thpalette[i]] >> 16) & 0xff);	// Y
+              tu = ((this->global_palette[thpalette[i]] >> 8) & 0xff);	// u
+              tv = ((this->global_palette[thpalette[i]] >> 0) & 0xff);	// v
+	      spu_yuv_to_rgb(ty,tu,tv,&tr,&tg,&tb);
+	      hcmap[i] = tr;
+	      hcmapu[i] = tg;
+	      hcmapv[i] = tb;
+	      hcmap[i] = ((0x100-halpha[i])*hcmap[i]) >> 8;
+	      hcmapu[i] = ((0x100-halpha[i])*hcmapu[i]) >> 8;
+	      hcmapv[i] = ((0x100-halpha[i])*hcmapv[i]) >> 8;
+	      break;
+	    case DVDNAV_SPU_BGR:
+              ty = ((this->global_palette[thpalette[i]] >> 16) & 0xff);	// Y
+              tu = ((this->global_palette[thpalette[i]] >> 8) & 0xff);	// u
+              tv = ((this->global_palette[thpalette[i]] >> 0) & 0xff);	// v
+	      spu_yuv_to_rgb(ty,tu,tv,&tr,&tg,&tb);
+	      hcmap[i] = tb;
+	      hcmapu[i] = tg;
+	      hcmapv[i] = tr;
+	      hcmap[i] = ((0x100-halpha[i])*hcmap[i]) >> 8;
+	      hcmapu[i] = ((0x100-halpha[i])*hcmapu[i]) >> 8;
+	      hcmapv[i] = ((0x100-halpha[i])*hcmapv[i]) >> 8;
+	      break;
+	    }
+	}
+      }
+}
   if (this->image_size < this->stride * this->height) {
     if (this->image != NULL) {
       free(this->image);
@@ -258,6 +537,58 @@
   }
   if (this->image == NULL)
     return;
+// Alloc 2nd image buffer (uv)
+if(this->dvdnav_menu && this->dvdnav_color_spu)
+  {
+  if (this->imageyuy)
+    {
+    free(this->imageyuy);
+    this->imageyuy=NULL;
+    this->aimageyuy=NULL;
+    }
+  if (this->dvdnav_color_spu==DVDNAV_SPU_YUV ||
+	this->dvdnav_color_spu==DVDNAV_SPU_YUY)
+    {
+    if (this->image_sizeuv < this->strideuv * this->heightuv)
+      {
+      if (this->imageu != NULL)
+        {
+        free(this->imageu);
+        this->image_sizeuv = 0;
+        }
+      this->imageu = malloc(3 * this->strideuv * this->heightuv);
+      if (this->imageu)
+        {
+        this->image_sizeuv = this->strideuv * this->heightuv;
+        this->imagev = this->imageu + this->image_sizeuv;
+        this->aimageuv = this->imagev + this->image_sizeuv;
+	}
+      }
+    memset(this->imageu,0,3 * this->strideuv * this->heightuv);
+    } else {
+    if (this->image_sizeuv < this->stride * this->height)
+      {
+      if (this->imageu != NULL)
+	{
+	free(this->imageu);
+	this->image_sizeuv = 0;
+        }
+      this->imageu = malloc(2 * this->stride * this->height);
+      if (this->imageu)
+        {
+        this->image_sizeuv = this->stride * this->height;
+        this->imagev = this->imageu + this->image_sizeuv;
+        this->aimageuv = this->imagev + this->image_sizeuv;
+	}
+      }
+    memset(this->imageu,0,2 * this->stride * this->height);
+    }
+  if (this->imageu == NULL) return;
+  } else {
+  if (this->imageu) free(this->imageu);
+  this->imageu=NULL;
+  this->image_sizeuv=0;
+  }
 
   /* Kludge: draw_alpha needs width multiple of 8. */
   if (this->width < this->stride)
@@ -292,8 +623,89 @@
     if (len > this->width - x || len == 0)
       len = this->width - x;
     /* FIXME have to use palette and alpha map*/
+//
+// Fill dvdnav menu area to image buffer
+//
+    if (this->dvdnav_menu)
+      {
+      if (this->start_row+y>=this->dvdnav_sy &&
+	    this->start_row+y<=this->dvdnav_ey)
+	{
+	if (this->start_col+x>=this->dvdnav_sx &&
+		this->start_col+x+len<=this->dvdnav_ex)
+	  spudec_process_fill(this,
+		    x,
+		    y,
+		    len,
+		    hcmap[color], hcmapu[color], hcmapv[color], halpha[color]);
+	else if(this->start_col+x<this->dvdnav_sx &&
+		this->start_col+x+len>this->dvdnav_sx &&
+		this->start_col+x+len<=this->dvdnav_ex)
+	  {
+	  spudec_process_fill(this,
+		    x,
+		    y,
+		    this->dvdnav_sx-this->start_col-x,
+		    cmap[color], cmapu[color], cmapv[color], alpha[color]);
+	  spudec_process_fill(this,
+		    this->dvdnav_sx-this->start_col,
+		    y,
+		    len+this->start_col+x-this->dvdnav_sx,
+		    hcmap[color], hcmapu[color], hcmapv[color], halpha[color]);
+	  }
+	else if(this->start_col+x<this->dvdnav_sx &&
+		this->start_col+x+len>this->dvdnav_sx &&
+		this->start_col+x+len>this->dvdnav_ex)
+	  {
+	  spudec_process_fill(this,
+		    x,
+		    y,
+		    this->dvdnav_sx-this->start_col-x,
+		    cmap[color], cmapu[color], cmapv[color], alpha[color]);
+	  spudec_process_fill(this,
+		    this->dvdnav_sx-this->start_col,
+		    y,
+		    this->dvdnav_ex-this->dvdnav_sx,
+		    hcmap[color], hcmapu[color], hcmapv[color], halpha[color]);
+	  spudec_process_fill(this,
+		    this->dvdnav_ex-this->start_col,
+		    y,
+		    x+len+this->start_col-this->dvdnav_ex,
+		    cmap[color], cmapu[color], cmapv[color], alpha[color]);
+	  }
+	else if(this->start_col+x>=this->dvdnav_sx &&
+		this->start_col+x<this->dvdnav_ex &&
+		this->start_col+x+len>this->dvdnav_ex)
+	  {
+	  spudec_process_fill(this,
+		    x,
+		    y,
+		    this->dvdnav_ex-this->start_col-x,
+		    hcmap[color], hcmapu[color], hcmapv[color], halpha[color]);
+	  spudec_process_fill(this,
+		    this->dvdnav_ex-this->start_col,
+		    y,
+		    len+this->start_col+x-this->dvdnav_ex,
+		    cmap[color], cmapu[color], cmapv[color], alpha[color]);
+	  }
+	  else
+	  spudec_process_fill(this,
+		    x,
+		    y,
+		    len,
+		    cmap[color], cmapu[color], cmapv[color], alpha[color]);
+	} else
+	spudec_process_fill(this,
+		    x,
+		    y,
+		    len,
+		    cmap[color], cmapu[color], cmapv[color], alpha[color]);
+      }
+      else
+      {
     memset(this->image + y * this->stride + x, cmap[color], len);
     memset(this->aimage + y * this->stride + x, alpha[color], len);
+      }
     x += len;
     if (x >= this->width) {
       next_line(packet);
@@ -301,7 +713,12 @@
       ++y;
     }
   }
+  packet->control_start = control_start;
+  packet->current_nibble[0]=current_nibble[0];
+  packet->current_nibble[1]=current_nibble[1];
+  packet->deinterlace_oddness=deinterlace_oddness;
   spudec_cut_image(this);
+//printf("spudec_process_data: w: %i h: %i end\n",this->height,this->width);
 }
 
 
@@ -316,6 +733,7 @@
 {
   int used[16],i,cused,start,step,color;
 
+//printf("spudec:c1      ");for(i=0;i<16;i++) printf("%x ",this->global_palette[i]); printf("\n");
   memset(used, 0, sizeof(used));
   for (i=0; i<4; i++)
     if (packet->alpha[i]) /* !Transparent? */
@@ -339,6 +757,7 @@
        start += step;
     }
   }
+//printf("spudec:c2      ");for(i=0;i<16;i++) printf("%x ",this->global_palette[i]); printf("\n");
 }
 
 static void spudec_process_control(spudec_handle_t *this, unsigned int pts100)
@@ -586,6 +1005,7 @@
   spu->now_pts = 0;
   spu->end_pts = 0;
   spu->packet_size = spu->packet_offset = 0;
+//  if (spu->last_packet) {printf("free4\n");spudec_free_packet(spu->last_packet); spu->last_packet=NULL;}
 }
 
 void spudec_heartbeat(void *this, unsigned int pts100)
@@ -593,13 +1013,22 @@
   spudec_handle_t *spu = (spudec_handle_t*) this;
   spu->now_pts = pts100;
 
+  if(spu->queue_head) spu->queue_head->start_pts=0;
   while (spu->queue_head != NULL && pts100 >= spu->queue_head->start_pts) {
     packet_t *packet = spudec_dequeue_packet(spu);
     spu->start_pts = packet->start_pts;
     spu->end_pts = packet->end_pts;
-    if (spu->auto_palette)
+    if (spu->auto_palette && !spu->dvdnav_menu)
       compute_palette(spu, packet);
     spudec_process_data(spu, packet);
+    if (spu->dvdnav_menu)
+      {
+      if(spu->last_packet)
+	{
+	spudec_free_packet(spu->last_packet);
+	}
+      spu->last_packet=packet;
+      } else
     spudec_free_packet(packet);
     spu->spu_changed = 1;
   }
@@ -605,12 +1034,18 @@
   }
 }
 
+
 int spudec_visible(void *this){
     spudec_handle_t *spu = (spudec_handle_t *)this;
+    if(!spu) return 0;
+    if (spu->dvdnav_menu && spu->height > 0)
+      {
+      if(spu->height>0) spu->end_pts=UINT_MAX;
+      return 1;
+      }
     int ret=(spu->start_pts <= spu->now_pts &&
 	     spu->now_pts < spu->end_pts &&
 	     spu->height > 0);
-//    printf("spu visible: %d  \n",ret);
     return ret;
 }
 
@@ -622,12 +1057,102 @@
   }
 }
 
-void spudec_draw(void *this, void (*draw_alpha)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride))
+void spudec_draw(void *this, void (*draw_alpha)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride))
 {
     spudec_handle_t *spu = (spudec_handle_t *)this;
     if (spu->start_pts <= spu->now_pts && spu->now_pts < spu->end_pts && spu->image)
     {
-	draw_alpha(spu->start_col, spu->start_row, spu->width, spu->height,
+    if (spu->dvdnav_menu)
+      {	/* spu menu mode? */
+      switch (spu->dvdnav_color_spu)
+	{
+//
+// Draw spu menu Y,u and v planes in YUV mode
+//
+	case DVDNAV_SPU_YUV:
+	  draw_alpha(spu->start_col,
+		    spu->start_row,
+		    spu->width,
+		    spu->height,
+		    DEST_PLANES_Y,
+		    spu->image,
+		    spu->aimage,
+		    spu->stride);
+	  draw_alpha(spu->start_coluv,
+		    spu->start_rowuv,
+		    spu->widthuv,
+		    spu->heightuv,
+		    DEST_PLANES_U,
+		    spu->imageu,
+		    spu->aimageuv,
+		    spu->strideuv);
+	  draw_alpha(spu->start_coluv,
+		    spu->start_rowuv,
+		    spu->widthuv,
+		    spu->heightuv,
+		    DEST_PLANES_V,
+		    spu->imagev,
+		    spu->aimageuv,
+		    spu->strideuv);
+	  break;
+//
+// Draw spu menu all planes in YUY mode
+//
+	case DVDNAV_SPU_YUY:
+	  if (!spu->imageyuy) spudec_create_yuy(spu,0);
+	  if (spu->imageyuy) draw_alpha(spu->start_col,
+		    spu->start_row,
+		    spu->width*2,
+		    spu->height/2,
+		    DEST_PLANES_YUYV,
+		    spu->imageyuy,
+		    spu->aimageyuy,
+		    spu->strideyuy);
+	  break;
+//
+// Draw spu menu Red,Blue and Green on RGB or BGR mode
+//
+	case DVDNAV_SPU_RGB:
+	case DVDNAV_SPU_BGR:
+	  draw_alpha(spu->start_col,
+		    spu->start_row,
+		    spu->width, spu->height,
+		    DEST_PLANES_BR,
+		    spu->imagev,
+		    spu->aimage,
+		    spu->stride);
+	  draw_alpha(spu->start_col,
+		    spu->start_row,
+		    spu->width,
+		    spu->height,
+		    DEST_PLANES_G,
+		    spu->imageu,
+		    spu->aimage,
+		    spu->stride);
+	  draw_alpha(spu->start_col,
+		    spu->start_row,
+		    spu->width,
+		    spu->height,
+		    DEST_PLANES_RB,
+		    spu->image,
+		    spu->aimage,
+		    spu->stride);
+	  break;
+//
+// Draw spu menu Y planes in normal mode
+//
+	default:
+	  draw_alpha(spu->start_col,
+		    spu->start_row,
+		    spu->width,
+		    spu->height,
+		    DEST_PLANES_Y,
+		    spu->image,
+		    spu->aimage,
+		    spu->stride);
+	}
+      } else
+	draw_alpha(spu->start_col, spu->start_row, spu->width, spu->height,DEST_PLANES_Y,
 		   spu->image, spu->aimage, spu->stride);
 	spu->spu_changed = 0;
     }
@@ -739,6 +1264,125 @@
   }
 }
 
+//
+// bilinear scale: u and v planes
+//
+static void scale_image_uv(int x, int y, scale_pixel* table_x,
+	scale_pixel* table_y, spudec_handle_t * spu)
+{
+  int alpha[4];
+  int coloru[4];
+  int colorv[4];
+  unsigned int scale[4];
+  int base = table_y[y].position * spu->strideuv + table_x[x].position;
+  int scaled = y * spu->scaled_strideuv + x;
+  alpha[0] = canon_alpha(spu->aimageuv[base]);
+  alpha[1] = canon_alpha(spu->aimageuv[base + 1]);
+  alpha[2] = canon_alpha(spu->aimageuv[base + spu->strideuv]);
+  alpha[3] = canon_alpha(spu->aimageuv[base + spu->strideuv + 1]);
+  coloru[0] = spu->imageu[base];
+  coloru[1] = spu->imageu[base + 1];
+  coloru[2] = spu->imageu[base + spu->strideuv];
+  coloru[3] = spu->imageu[base + spu->strideuv + 1];
+  colorv[0] = spu->imagev[base];
+  colorv[1] = spu->imagev[base + 1];
+  colorv[2] = spu->imagev[base + spu->strideuv];
+  colorv[3] = spu->imagev[base + spu->strideuv + 1];
+// FIXME: color hack!!!
+//  scale[0] = (table_x[x].left_up * table_y[y].left_up >> 16) * alpha[0];
+//  scale[1] = (table_x[x].right_down * table_y[y].left_up >>16) * alpha[1];
+//  scale[2] = (table_x[x].left_up * table_y[y].right_down >> 16) * alpha[2];
+//  scale[3] = (table_x[x].right_down * table_y[y].right_down >> 16) * alpha[3];
+  scale[0] = (table_x[x].left_up * table_y[y].left_up >> 16) * 0x100;
+  scale[1] = (table_x[x].right_down * table_y[y].left_up >>16) * 0x100;
+  scale[2] = (table_x[x].left_up * table_y[y].right_down >> 16) * 0x100;
+  scale[3] = (table_x[x].right_down * table_y[y].right_down >> 16) * 0x100;
+  spu->scaled_imageu[scaled] =
+	(coloru[0] * scale[0] +
+	coloru[1] * scale[1] +
+	coloru[2] * scale[2] +
+	coloru[3] * scale[3])>>24;
+  spu->scaled_imagev[scaled] =
+	(colorv[0] * scale[0] +
+	colorv[1] * scale[1] +
+	colorv[2] * scale[2] +
+	colorv[3] * scale[3])>>24;
+  scale[0] = (table_x[x].left_up * table_y[y].left_up >> 16) * alpha[0];
+  scale[1] = (table_x[x].right_down * table_y[y].left_up >>16) * alpha[1];
+  scale[2] = (table_x[x].left_up * table_y[y].right_down >> 16) * alpha[2];
+  scale[3] = (table_x[x].right_down * table_y[y].right_down >> 16) * alpha[3];
+  spu->scaled_aimageuv[scaled] =
+	(scale[0] + scale[1] + scale[2] + scale[3]) >> 16;
+  if (spu->scaled_aimageuv[scaled]){
+    spu->scaled_aimageuv[scaled] = 256 - spu->scaled_aimageuv[scaled];
+    if(spu->scaled_aimageuv[scaled] + spu->scaled_imageu[scaled] > 255)
+      spu->scaled_imageu[scaled] = 256 - spu->scaled_aimageuv[scaled];
+    if(spu->scaled_aimageuv[scaled] + spu->scaled_imagev[scaled] > 255)
+      spu->scaled_imagev[scaled] = 256 - spu->scaled_aimageuv[scaled];
+  }
+}
+
+//
+// bilinear scale: Red, Green and Blue planes
+//
+static void scale_image_rgb(int x, int y, scale_pixel* table_x, scale_pixel* table_y, spudec_handle_t * spu)
+{
+  int alpha[4];
+  int colorr[4];
+  int colorg[4];
+  int colorb[4];
+  unsigned int scale[4];
+  int base = table_y[y].position * spu->stride + table_x[x].position;
+  int scaled = y * spu->scaled_stride + x;
+  alpha[0] = canon_alpha(spu->aimage[base]);
+  alpha[1] = canon_alpha(spu->aimage[base + 1]);
+  alpha[2] = canon_alpha(spu->aimage[base + spu->stride]);
+  alpha[3] = canon_alpha(spu->aimage[base + spu->stride + 1]);
+  colorr[0] = spu->image[base];
+  colorr[1] = spu->image[base + 1];
+  colorr[2] = spu->image[base + spu->stride];
+  colorr[3] = spu->image[base + spu->stride + 1];
+  colorg[0] = spu->imageu[base];
+  colorg[1] = spu->imageu[base + 1];
+  colorg[2] = spu->imageu[base + spu->stride];
+  colorg[3] = spu->imageu[base + spu->stride + 1];
+  colorb[0] = spu->imagev[base];
+  colorb[1] = spu->imagev[base + 1];
+  colorb[2] = spu->imagev[base + spu->stride];
+  colorb[3] = spu->imagev[base + spu->stride + 1];
+  scale[0] = (table_x[x].left_up * table_y[y].left_up >> 16) * alpha[0];
+  scale[1] = (table_x[x].right_down * table_y[y].left_up >>16) * alpha[1];
+  scale[2] = (table_x[x].left_up * table_y[y].right_down >> 16) * alpha[2];
+  scale[3] = (table_x[x].right_down * table_y[y].right_down >> 16) * alpha[3];
+  spu->scaled_image[scaled] =
+	(colorr[0] * scale[0] +
+	colorr[1] * scale[1] +
+	colorr[2] * scale[2] +
+	colorr[3] * scale[3])>>24;
+  spu->scaled_imageu[scaled] =
+	(colorg[0] * scale[0] +
+	colorg[1] * scale[1] +
+	colorg[2] * scale[2] +
+	colorg[3] * scale[3])>>24;
+  spu->scaled_imagev[scaled] =
+	(colorb[0] * scale[0] +
+	colorb[1] * scale[1] +
+	colorb[2] * scale[2] +
+	colorb[3] * scale[3])>>24;
+  spu->scaled_aimage[scaled] =
+	(scale[0] + scale[1] + scale[2] + scale[3]) >> 16;
+  if (spu->scaled_aimage[scaled]){
+    spu->scaled_aimage[scaled] = 256 - spu->scaled_aimage[scaled];
+    if(spu->scaled_aimage[scaled] + spu->scaled_image[scaled] > 255)
+      spu->scaled_image[scaled] = 256 - spu->scaled_aimage[scaled];
+    if(spu->scaled_aimage[scaled] + spu->scaled_imageu[scaled] > 255)
+      spu->scaled_imageu[scaled] = 256 - spu->scaled_aimage[scaled];
+    if(spu->scaled_aimage[scaled] + spu->scaled_imagev[scaled] > 255)
+      spu->scaled_imagev[scaled] = 256 - spu->scaled_aimage[scaled];
+  }
+}
+
+
 void sws_spu_image(unsigned char *d1, unsigned char *d2, int dw, int dh, int ds,
 	unsigned char *s1, unsigned char *s2, int sw, int sh, int ss)
 {
@@ -765,68 +1409,358 @@
 	sws_freeContext(ctx);
 }
 
-void spudec_draw_scaled(void *me, unsigned int dxs, unsigned int dys, void (*draw_alpha)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride))
+//
+// Convert Yuv image to YuY image
+//
+void spudec_create_yuy(void *this, int spu_scaled)
+{
+spudec_handle_t *spu = this;
+unsigned char *dptr;
+unsigned char *daptr;
+unsigned char *sptry;
+unsigned char *sptru;
+unsigned char *sptrv;
+unsigned char *saptr;
+unsigned char *saptruv;
+int y,x;
+
+if (spu_scaled) {
+  spu->strideyuy=spu->scaled_stride*2;
+  spu->imageyuy=malloc(spu->strideyuy*(spu->height+2)*2);
+  memset(spu->imageyuy,0,spu->strideyuy*(spu->height+2)*2);
+  spu->aimageyuy=spu->imageyuy+spu->strideyuy*spu->scaled_height;
+  for(y=0;y<spu->scaled_height;y++) {
+    dptr=spu->imageyuy+y*spu->strideyuy;
+    daptr=spu->aimageyuy+y*spu->strideyuy;
+    sptry=spu->scaled_image+y*spu->scaled_stride;
+    sptru=spu->scaled_imageu+y/2*spu->scaled_strideuv;
+    sptrv=spu->scaled_imagev+y/2*spu->scaled_strideuv;
+    saptr=spu->scaled_aimage+y*spu->scaled_stride;
+    saptruv=spu->scaled_aimageuv+y/2*spu->scaled_strideuv;
+    for(x=0;x<spu->scaled_widthuv-1;x++) {
+      *dptr++=*sptry++;
+      *dptr++=*sptrv++;
+      *dptr++=*sptry++;
+      *dptr++=*sptru++;
+      *daptr++=*saptr++;
+      *daptr++=*saptruv;
+      *daptr++=*saptr++;
+      *daptr++=*saptruv++;
+      } }
+  } else {
+  spu->strideyuy=spu->stride*2;
+  spu->imageyuy=malloc(spu->strideyuy*(spu->height+2)*2);
+  memset(spu->imageyuy,0,spu->strideyuy*(spu->height+2)*2);
+  spu->aimageyuy=spu->imageyuy+spu->strideyuy*spu->height;
+  for(y=0;y<spu->height;y++) {
+    dptr=spu->imageyuy+y*spu->strideyuy;
+    daptr=spu->aimageyuy+y*spu->strideyuy;
+    sptry=spu->image+y*spu->stride;
+    sptru=spu->imageu+y/2*spu->strideuv;
+    sptrv=spu->imagev+y/2*spu->strideuv;
+    saptr=spu->aimage+y*spu->stride;
+    saptruv=spu->aimageuv+y/2*spu->strideuv;
+    for(x=0;x<spu->widthuv-1;x++) {
+      *dptr++=*sptry++;
+      *dptr++=*sptrv++;
+      *dptr++=*sptry++;
+      *dptr++=*sptru++;
+      *daptr++=*saptr++;
+      *daptr++=*saptruv;
+      *daptr++=*saptr++;
+      *daptr++=*saptruv++;
+      } }
+  }
+}
+
+//
+// Sws scale: u and v planes
+//
+void sws_spu_image_uv(unsigned char *du, unsigned char *dv, unsigned char *d2,
+	int dw, int dh, int ds, unsigned char *su, unsigned char *sv,
+	unsigned char *s2, int sw, int sh, int ss)
+{
+struct SwsContext *ctx;
+static SwsFilter filter;
+static int firsttime = 1;
+static float oldvar;
+int i;
+
+if (!firsttime && oldvar != spu_gaussvar) sws_freeVec(filter.lumH);
+if (firsttime)
+    {
+    filter.lumH = filter.lumV =
+	filter.chrH = filter.chrV = sws_getGaussianVec(spu_gaussvar, 3.0);
+    sws_normalizeVec(filter.lumH, 1.0);
+    firsttime = 0;
+    oldvar = spu_gaussvar;
+    }
+
+ctx=sws_getContext(sw, sh, IMGFMT_Y800, dw, dh, IMGFMT_Y800, SWS_GAUSS, &filter, NULL, NULL);
+sws_scale(ctx,&su,&ss,0,sh,&du,&ds);
+sws_scale(ctx,&sv,&ss,0,sh,&dv,&ds);
+for (i=ss*sh-1; i>=0; i--)
+  if (!s2[i]) s2[i] = 255; //else s2[i] = 1;
+sws_scale(ctx,&s2,&ss,0,sh,&d2,&ds);
+for (i=ds*dh-1; i>=0; i--)
+  if (d2[i]==0) d2[i] = 1;
+  else if (d2[i]==255) d2[i] = 0;
+sws_freeContext(ctx);
+}
+
+//
+// Sws scale: Red, Green and Blue planes
+//
+void sws_spu_image_rgb(unsigned char *dr, unsigned char *dg, unsigned char *db,
+	unsigned char *d2, int dw, int dh, int ds,
+	unsigned char *sr,unsigned char *sg,unsigned char *sb,
+	unsigned char *s2, int sw, int sh, int ss)
+{
+struct SwsContext *ctx;
+static SwsFilter filter;
+static int firsttime = 1;
+static float oldvar;
+int i;
+
+if (!firsttime && oldvar != spu_gaussvar) sws_freeVec(filter.lumH);
+if (firsttime)
+    {
+    filter.lumH = filter.lumV =
+	filter.chrH = filter.chrV = sws_getGaussianVec(spu_gaussvar, 3.0);
+    sws_normalizeVec(filter.lumH, 1.0);
+    firsttime = 0;
+    oldvar = spu_gaussvar;
+    }
+
+ctx=sws_getContext(sw, sh, IMGFMT_Y800, dw, dh, IMGFMT_Y800, SWS_GAUSS, &filter, NULL, NULL);
+sws_scale(ctx,&sr,&ss,0,sh,&dr,&ds);
+sws_scale(ctx,&sg,&ss,0,sh,&dg,&ds);
+sws_scale(ctx,&sb,&ss,0,sh,&db,&ds);
+for (i=ss*sh-1; i>=0; i--) if (!s2[i]) s2[i] = 255; //else s2[i] = 1;
+sws_scale(ctx,&s2,&ss,0,sh,&d2,&ds);
+for (i=ds*dh-1; i>=0; i--)
+    if (d2[i]==0) d2[i] = 1;
+    else if (d2[i]==255) d2[i] = 0;
+sws_freeContext(ctx);
+}
+
+//
+// Enable/disable dvdmenu mode, and set color mode
+//	cflg = 0:	Y SPU
+//	cflg = 1:	YUV SPU
+//	cflg = 2:	RGB SPU
+//	cflg = 3:	BGR SPU
+//	cflg = 4:	YUY SPU
+//
+void spudec_dvdnav_mode(void *this, int mode, int cflg)
+{		/* set/clear spu menu mode */
+spudec_handle_t *spu = (spudec_handle_t *)this;
+if (!spu) return;
+spu->dvdnav_menu=mode;
+if (mode)
+    spu->dvdnav_color_spu=cflg;
+    else
+    spu->dvdnav_color_spu=0;
+if (!spu->dvdnav_menu && spu->last_packet)
+  {
+  spudec_free_packet(spu->last_packet);
+  spu->last_packet=NULL;
+  }
+return;
+}
+
+//
+// Set dvd menu button draw area and palette
+//
+void spudec_dvdnav_area(void *this, uint16_t sx, uint16_t sy, uint16_t ex,
+	uint16_t ey, uint32_t palette)
+{
+spudec_handle_t *spu = this;
+if (!spu) return;
+if (spu->dvdnav_sx==FFMIN(sx,ex) &&
+	spu->dvdnav_ex==FFMAX(sx,ex) &&
+	spu->dvdnav_sy==FFMIN(sy,ey) &&
+	spu->dvdnav_ey==FFMAX(sy,ey) &&
+	spu->dvdnav_palette==palette) return;
+spu->dvdnav_sx=FFMIN(sx,ex);		/* set spu button area, palette & on */
+spu->dvdnav_ex=FFMAX(sx,ex);
+spu->dvdnav_sy=FFMIN(sy,ey);
+spu->dvdnav_ey=FFMAX(sy,ey);
+spu->dvdnav_palette=palette;
+spu->dvdnav_modify=1;
+if (spu->dvdnav_menu && spu->last_packet)
+  {
+//  if (spu->auto_palette)
+//    compute_palette(spu, spu->last_packet);
+  spudec_process_data(spu, spu->last_packet);
+  }
+return;
+}
+
+//
+// Set dvd menu button palette
+//
+void spudec_dvdnav_palette(void *this, uint32_t palette)
+{
+spudec_handle_t *spu = this;
+if (!spu) return;
+spu->dvdnav_palette=palette;		/* set spu button palette */
+return;
+}
+
+//
+// Draw scaled image in YUV and YUY mode
+//	Note: expanded spudec_draw_scale with half size uv planes
+//
+void spudec_draw_scaled_yuv(void *me, unsigned int dxs, unsigned int dys,
+	void (*draw_alpha)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride))
 {
   spudec_handle_t *spu = (spudec_handle_t *)me;
   scale_pixel *table_x;
   scale_pixel *table_y;
+  unsigned int scalex = 0;
+  unsigned int scaley = 0;
 
   if (spu->start_pts <= spu->now_pts && spu->now_pts < spu->end_pts) {
 
     // check if only forced subtitles are requested 
     if( (spu->forced_subs_only) && !(spu->is_forced_sub) ){ 
-	return;
-    }
+	return;}
 
-    if (!(spu_aamode&16) && (spu->orig_frame_width == 0 || spu->orig_frame_height == 0
-	|| (spu->orig_frame_width == dxs && spu->orig_frame_height == dys))) {
+    if (!(spu_aamode&16) && (spu->orig_frame_width == 0 ||
+	    spu->orig_frame_height == 0 ||
+	    (spu->orig_frame_width == dxs && spu->orig_frame_height == dys)))
+{
       if (spu->image)
       {
-	draw_alpha(spu->start_col, spu->start_row, spu->width, spu->height,
-		   spu->image, spu->aimage, spu->stride);
-	spu->spu_changed = 0;
+	if (spu->dvdnav_color_spu==DVDNAV_SPU_YUY) {
+	  if (!spu->imageyuy) spudec_create_yuy(spu,0);
+	  if (spu->imageyuy) draw_alpha(spu->start_col,
+		    spu->start_row,
+		    spu->width*2,
+		    spu->heightuv*2,
+		    DEST_PLANES_YUYV,
+		    spu->imageyuy,
+		    spu->aimageyuy,
+		    spu->strideyuy);
+	  } else {
+	  draw_alpha(spu->start_col,
+		    spu->start_row,
+		    spu->width,
+		    spu->height,
+		    DEST_PLANES_Y,
+		    spu->image,
+		    spu->aimage,
+		    spu->stride);
+	  if(spu->dvdnav_color_spu && spu->imageu && spu->aimageuv)
+	    draw_alpha(spu->start_coluv,
+		    spu->start_rowuv,
+		    spu->widthuv,
+		    spu->heightuv,
+		    DEST_PLANES_U,
+		    spu->imageu,
+		    spu->aimageuv,
+		    spu->strideuv);
+	  if(spu->dvdnav_color_spu && spu->imagev && spu->aimageuv)
+	    draw_alpha(spu->start_coluv,
+		    spu->start_rowuv,
+		    spu->widthuv,
+		    spu->heightuv,
+		    DEST_PLANES_V,
+		    spu->imagev,
+		    spu->aimageuv,
+		    spu->strideuv);
       }
+	spu->spu_changed = 0;
     }
-    else {
-      if (spu->scaled_frame_width != dxs || spu->scaled_frame_height != dys) {	/* Resizing is needed */
+    } else {
+      if (spu->scaled_frame_width != dxs || spu->scaled_frame_height != dys)
+	{	/* Resizing is needed */
 	/* scaled_x = scalex * x / 0x100
 	   scaled_y = scaley * y / 0x100
 	   order of operations is important because of rounding. */
-	unsigned int scalex = 0x100 * dxs / spu->orig_frame_width;
-	unsigned int scaley = 0x100 * dys / spu->orig_frame_height;
+	scalex = 0x100 * dxs / spu->orig_frame_width;
+	scaley = 0x100 * dys / spu->orig_frame_height;
+
 	spu->scaled_start_col = spu->start_col * scalex / 0x100;
 	spu->scaled_start_row = spu->start_row * scaley / 0x100;
 	spu->scaled_width = spu->width * scalex / 0x100;
 	spu->scaled_height = spu->height * scaley / 0x100;
+	spu->scaled_start_coluv = spu->start_coluv * scalex / 0x100;
+	spu->scaled_start_rowuv = spu->start_rowuv * scaley / 0x100;
+	spu->scaled_widthuv = spu->widthuv * scalex / 0x100;
+	spu->scaled_heightuv = spu->heightuv * scaley / 0x100;
 	/* Kludge: draw_alpha needs width multiple of 8 */
 	spu->scaled_stride = (spu->scaled_width + 7) & ~7;
-	if (spu->scaled_image_size < spu->scaled_stride * spu->scaled_height) {
-	  if (spu->scaled_image) {
+	spu->scaled_strideuv = (spu->scaled_widthuv + 7) & ~7;
+	if (spu->scaled_image_size < spu->scaled_stride *
+		(spu->scaled_height+2))
+{
+	  if (spu->scaled_image)
+	    {
 	    free(spu->scaled_image);
 	    spu->scaled_image_size = 0;
 	  }
-	  spu->scaled_image = malloc(2 * spu->scaled_stride * spu->scaled_height);
-	  if (spu->scaled_image) {
-	    spu->scaled_image_size = spu->scaled_stride * spu->scaled_height;
+	  spu->scaled_image = malloc(2 * spu->scaled_stride *
+		(spu->scaled_height+2));
+	  if (spu->scaled_image)
+{
+	    memset(spu->scaled_image,0,
+		    2 * spu->scaled_stride * (spu->scaled_height+2));
+	    spu->scaled_image_size = spu->scaled_stride *
+		    (spu->scaled_height+2);
 	    spu->scaled_aimage = spu->scaled_image + spu->scaled_image_size;
 	  }
 	}
+	if (spu->scaled_image_sizeuv < spu->scaled_strideuv *
+		(spu->scaled_heightuv+2))
+{
+	  if (spu->scaled_imageu)
+	    {
+	    free(spu->scaled_imageu);
+	    spu->scaled_image_sizeuv = 0;
+    }
+	  spu->scaled_imageu = malloc(3 * spu->scaled_strideuv *
+		(spu->scaled_height+2));
+	  if (spu->scaled_imageu)
+	    {
+	    memset(spu->scaled_imageu,0,3 * spu->scaled_strideuv *
+		    (spu->scaled_height+2));
+	    spu->scaled_image_sizeuv = spu->scaled_strideuv *
+		    (spu->scaled_heightuv+2);
+	    spu->scaled_imagev = spu->scaled_imageu +
+		    spu->scaled_image_sizeuv;
+	    spu->scaled_aimageuv = spu->scaled_imagev +
+		    spu->scaled_image_sizeuv;
+    }
+	}
 	if (spu->scaled_image) {
 	  unsigned int x, y;
 	  if (spu->scaled_width <= 1 || spu->scaled_height <= 1) {
 	    goto nothing_to_do;
 	  }
-	  switch(spu_aamode&15) {
+	  if (spu->scaled_widthuv <= 1 || spu->scaled_heightuv <= 1) {
+	    goto nothing_to_do;
+	    }
+	switch(spu_aamode&15)
+	  {
 	  case 4:
 	  sws_spu_image(spu->scaled_image, spu->scaled_aimage,
 		  spu->scaled_width, spu->scaled_height, spu->scaled_stride,
 		  spu->image, spu->aimage, spu->width, spu->height, spu->stride);
+	    sws_spu_image_uv(spu->scaled_imageu, spu->scaled_imagev,
+		  spu->scaled_aimageuv, spu->scaled_widthuv,
+		  spu->scaled_heightuv, spu->scaled_strideuv,
+		  spu->imageu, spu->imagev, spu->aimageuv,
+		  spu->widthuv, spu->heightuv, spu->strideuv);
 	  break;
 	  case 3:
 	  table_x = calloc(spu->scaled_width, sizeof(scale_pixel));
 	  table_y = calloc(spu->scaled_height, sizeof(scale_pixel));
-	  if (!table_x || !table_y) {
-	    mp_msg(MSGT_SPUDEC, MSGL_FATAL, "Fatal: spudec_draw_scaled: calloc failed\n");
+	    if (!table_x || !table_y)
+	      {
+	      mp_msg(MSGT_SPUDEC, MSGL_FATAL,
+		    "Fatal: spudec_draw_scaled: calloc failed\n");
 	  }
 	  scale_table(0, 0, spu->width - 1, spu->scaled_width - 1, table_x);
 	  scale_table(0, 0, spu->height - 1, spu->scaled_height - 1, table_y);
@@ -835,17 +1769,52 @@
 	      scale_image(x, y, table_x, table_y, spu);
 	  free(table_x);
 	  free(table_y);
+	    table_x = calloc(spu->scaled_widthuv, sizeof(scale_pixel));
+	    table_y = calloc(spu->scaled_heightuv, sizeof(scale_pixel));
+	    if (!table_x || !table_y) {
+	      mp_msg(MSGT_SPUDEC, MSGL_FATAL,
+		    "Fatal: spudec_draw_scaled: calloc failed\n");
+	      }
+	    scale_table(0, 0, spu->widthuv - 1, spu->scaled_widthuv - 1,
+		table_x);
+	    scale_table(0, 0, spu->heightuv - 1, spu->scaled_heightuv - 1,
+		table_y);
+	    for (y = 0; y < spu->scaled_heightuv; y++)
+	      for (x = 0; x < spu->scaled_widthuv; x++)
+		scale_image_uv(x, y, table_x, table_y, spu);
+	    free(table_x);
+	    free(table_y);
 	  break;
 	  case 0:
 	  /* no antialiasing */
-	  for (y = 0; y < spu->scaled_height; ++y) {
+	  for (y = 0; y < spu->scaled_height; ++y)
+	    {
 	    int unscaled_y = y * 0x100 / scaley;
 	    int strides = spu->stride * unscaled_y;
 	    int scaled_strides = spu->scaled_stride * y;
-	    for (x = 0; x < spu->scaled_width; ++x) {
+	    for (x = 0; x < spu->scaled_width; ++x)
+	      {
 	      int unscaled_x = x * 0x100 / scalex;
-	      spu->scaled_image[scaled_strides + x] = spu->image[strides + unscaled_x];
-	      spu->scaled_aimage[scaled_strides + x] = spu->aimage[strides + unscaled_x];
+	      spu->scaled_image[scaled_strides + x] =
+		    spu->image[strides + unscaled_x];
+	      spu->scaled_aimage[scaled_strides + x] =
+		    spu->aimage[strides + unscaled_x];
+	      }
+	    }
+	  for (y = 0; y < spu->scaled_heightuv; ++y)
+	    {
+	    int unscaled_y = y * 0x100 / scaley;
+	    int strides = spu->strideuv * unscaled_y;
+	    int scaled_strides = spu->scaled_strideuv * y;
+	    for (x = 0; x < spu->scaled_widthuv; ++x)
+	      {
+	      int unscaled_x = x * 0x100 / scalex;
+	      spu->scaled_imageu[scaled_strides + x] =
+		    spu->imageu[strides + unscaled_x];
+	      spu->scaled_imagev[scaled_strides + x] =
+		    spu->imagev[strides + unscaled_x];
+	      spu->scaled_aimageuv[scaled_strides + x] =
+		    spu->aimageuv[strides + unscaled_x];
 	    }
 	  }
 	  break;
@@ -853,13 +1822,18 @@
 	  {
 	    /* Intermediate antialiasing. */
 	    for (y = 0; y < spu->scaled_height; ++y) {
-	      const unsigned int unscaled_top = y * spu->orig_frame_height / dys;
-	      unsigned int unscaled_bottom = (y + 1) * spu->orig_frame_height / dys;
+	      const unsigned int unscaled_top =
+		    y * spu->orig_frame_height / dys;
+	      unsigned int unscaled_bottom =
+		    (y + 1) * spu->orig_frame_height / dys;
 	      if (unscaled_bottom >= spu->height)
 		unscaled_bottom = spu->height - 1;
-	      for (x = 0; x < spu->scaled_width; ++x) {
-		const unsigned int unscaled_left = x * spu->orig_frame_width / dxs;
-		unsigned int unscaled_right = (x + 1) * spu->orig_frame_width / dxs;
+	      for (x = 0; x < spu->scaled_width; ++x)
+	        {
+		const unsigned int unscaled_left =
+			x * spu->orig_frame_width / dxs;
+		unsigned int unscaled_right =
+			(x + 1) * spu->orig_frame_width / dxs;
 		unsigned int color = 0;
 		unsigned int alpha = 0;
 		unsigned int walkx, walky;
@@ -867,7 +1841,8 @@
 		if (unscaled_right >= spu->width)
 		  unscaled_right = spu->width - 1;
 		for (walky = unscaled_top; walky <= unscaled_bottom; ++walky)
-		  for (walkx = unscaled_left; walkx <= unscaled_right; ++walkx) {
+		  for (walkx = unscaled_left; walkx <= unscaled_right; ++walkx)
+		    {
 		    base = walky * spu->stride + walkx;
 		    tmp = canon_alpha(spu->aimage[base]);
 		    alpha += tmp;
@@ -876,7 +1851,8 @@
 		base = y * spu->scaled_stride + x;
 		spu->scaled_image[base] = alpha ? color / alpha : 0;
 		spu->scaled_aimage[base] =
-		  alpha * (1 + unscaled_bottom - unscaled_top) * (1 + unscaled_right - unscaled_left);
+		  alpha * (1 + unscaled_bottom - unscaled_top) *
+		  (1 + unscaled_right - unscaled_left);
 		/* spu->scaled_aimage[base] =
 		  alpha * dxs * dys / spu->orig_frame_width / spu->orig_frame_height; */
 		if (spu->scaled_aimage[base]) {
@@ -886,6 +1862,51 @@
 		}
 	      }
 	    }
+	    for (y = 0; y < spu->scaled_heightuv; ++y) {
+	      const unsigned int unscaled_top = y *
+		    (spu->orig_frame_height/2) / (dys/2);
+	      unsigned int unscaled_bottom = (y + 1) *
+		    (spu->orig_frame_height/2) / (dys/2);
+	      if (unscaled_bottom >= spu->heightuv)
+		unscaled_bottom = spu->heightuv - 1;
+	      for (x = 0; x < spu->scaled_widthuv; ++x) {
+		const unsigned int unscaled_left = x *
+			(spu->orig_frame_width/2) / (dxs/2);
+		unsigned int unscaled_right = (x + 1) *
+			(spu->orig_frame_width/2) / (dxs/2);
+		unsigned int coloru = 0;
+		unsigned int colorv = 0;
+		unsigned int alpha = 0;
+		unsigned int walkx, walky;
+		unsigned int base, tmp;
+		if (unscaled_right >= spu->widthuv)
+		  unscaled_right = spu->widthuv - 1;
+		for (walky = unscaled_top; walky <= unscaled_bottom; ++walky)
+		  for (walkx = unscaled_left; walkx <= unscaled_right; ++walkx)
+		    {
+		    base = walky * spu->strideuv + walkx;
+		    tmp = canon_alpha(spu->aimageuv[base]);
+		    alpha += tmp;
+		    coloru += tmp * spu->imageu[base];
+		    colorv += tmp * spu->imagev[base];
+		    }
+		base = y * spu->scaled_strideuv + x;
+		spu->scaled_imageu[base] = alpha ? coloru / alpha : 0;
+		spu->scaled_imagev[base] = alpha ? colorv / alpha : 0;
+		spu->scaled_aimageuv[base] =
+		  alpha * (1 + unscaled_bottom - unscaled_top) *
+		  (1 + unscaled_right - unscaled_left);
+		/* spu->scaled_aimage[base] =
+		  alpha * dxs * dys / spu->orig_frame_width / spu->orig_frame_height; */
+		if (spu->scaled_aimageuv[base]) {
+		  spu->scaled_aimageuv[base] = 256 - spu->scaled_aimageuv[base];
+		  if (spu->scaled_aimageuv[base] + spu->scaled_imageu[base] > 255)
+		    spu->scaled_imageu[base] = 256 - spu->scaled_aimageuv[base];
+		  if (spu->scaled_aimageuv[base] + spu->scaled_imagev[base] > 255)
+		    spu->scaled_imagev[base] = 256 - spu->scaled_aimageuv[base];
+		}
+	      }
+	    }
 	  }
 	  break;
 	  case 2:
@@ -945,7 +1966,8 @@
 	      for (x = 0; x < spu->scaled_width; ++x) {
 		const double unscaled_x = x * inv_scalex;
 		const double unscaled_x_right = unscaled_x + inv_scalex;
-		const unsigned int left_right_column = FFMIN(unscaled_x_right, unscaled_x + 1.0);
+		const unsigned int left_right_column =
+			FFMIN(unscaled_x_right, unscaled_x + 1.0);
 		const double left = left_right_column - unscaled_x;
 		const unsigned int width = unscaled_x_right > left_right_column
 		  ? (unsigned int) unscaled_x_right - left_right_column
@@ -966,13 +1988,17 @@
 		*/
 		/* 1: top left part */
 		base = spu->stride * (unsigned int) unscaled_y;
-		tmp = left * top * canon_alpha(spu->aimage[base + (unsigned int) unscaled_x]);
+		tmp = left * top *
+		    canon_alpha(spu->aimage[base + (unsigned int) unscaled_x]);
 		alpha += tmp;
 		color += tmp * spu->image[base + (unsigned int) unscaled_x];
 		/* 2: top center part */
 		if (width > 0) {
 		  unsigned int walkx;
-		  for (walkx = left_right_column; walkx < (unsigned int) unscaled_x_right; ++walkx) {
+		  for (walkx = left_right_column;
+			walkx < (unsigned int) unscaled_x_right;
+			++walkx)
+		    {
 		    base = spu->stride * (unsigned int) unscaled_y + walkx;
 		    tmp = /* 1.0 * */ top * canon_alpha(spu->aimage[base]);
 		    alpha += tmp;
@@ -981,7 +2007,8 @@
 		}
 		/* 3: top right part */
 		if (right > 0.0) {
-		  base = spu->stride * (unsigned int) unscaled_y + (unsigned int) unscaled_x_right;
+		  base = spu->stride * (unsigned int) unscaled_y +
+			(unsigned int) unscaled_x_right;
 		  tmp = right * top * canon_alpha(spu->aimage[base]);
 		  alpha += tmp;
 		  color += tmp * spu->image[base];
@@ -989,7 +2016,1069 @@
 		/* 4: center left part */
 		if (height > 0) {
 		  unsigned int walky;
-		  for (walky = top_low_row; walky < (unsigned int) unscaled_y_bottom; ++walky) {
+		  for (walky = top_low_row;
+			walky < (unsigned int) unscaled_y_bottom;
+			++walky)
+		    {
+		    base = spu->stride * walky + (unsigned int) unscaled_x;
+		    tmp = left /* * 1.0 */ * canon_alpha(spu->aimage[base]);
+		    alpha += tmp;
+		    color += tmp * spu->image[base];
+		  }
+		}
+		/* 5: center part */
+		if (width > 0 && height > 0) {
+		  unsigned int walky;
+		  for (walky = top_low_row;
+			walky < (unsigned int) unscaled_y_bottom;
+			++walky)
+		    {
+		    unsigned int walkx;
+		    base = spu->stride * walky;
+		    for (walkx = left_right_column;
+			    walkx < (unsigned int) unscaled_x_right;
+			    ++walkx) {
+		      tmp = /* 1.0 * 1.0 * */ canon_alpha(spu->aimage[base + walkx]);
+		      alpha += tmp;
+		      color += tmp * spu->image[base + walkx];
+		    }
+		  }
+		}
+		/* 6: center right part */
+		if (right > 0.0 && height > 0) {
+		  unsigned int walky;
+		  for (walky = top_low_row;
+			walky < (unsigned int) unscaled_y_bottom;
+			++walky)
+		    {
+		    base = spu->stride * walky +
+			    (unsigned int) unscaled_x_right;
+		    tmp = right /* * 1.0 */ * canon_alpha(spu->aimage[base]);
+		    alpha += tmp;
+		    color += tmp * spu->image[base];
+		  }
+		}
+		/* 7: bottom left part */
+		if (bottom > 0.0) {
+		  base = spu->stride * (unsigned int) unscaled_y_bottom +
+			    (unsigned int) unscaled_x;
+		  tmp = left * bottom * canon_alpha(spu->aimage[base]);
+		  alpha += tmp;
+		  color += tmp * spu->image[base];
+		}
+		/* 8: bottom center part */
+		if (width > 0 && bottom > 0.0) {
+		  unsigned int walkx;
+		  base = spu->stride * (unsigned int) unscaled_y_bottom;
+		  for (walkx = left_right_column;
+			walkx < (unsigned int) unscaled_x_right;
+			++walkx)
+		    {
+		    tmp = /* 1.0 * */ bottom *
+			canon_alpha(spu->aimage[base + walkx]);
+		    alpha += tmp;
+		    color += tmp * spu->image[base + walkx];
+		  }
+		}
+		/* 9: bottom right part */
+		if (right > 0.0 && bottom > 0.0) {
+		  base = spu->stride * (unsigned int)
+			unscaled_y_bottom + (unsigned int) unscaled_x_right;
+		  tmp = right * bottom * canon_alpha(spu->aimage[base]);
+		  alpha += tmp;
+		  color += tmp * spu->image[base];
+		}
+		/* Finally mix these transparency and brightness information suitably */
+		base = spu->scaled_stride * y + x;
+		spu->scaled_image[base] = alpha > 0 ? color / alpha : 0;
+		spu->scaled_aimage[base] = alpha * scalex * scaley / 0x10000;
+		if (spu->scaled_aimage[base]) {
+		  spu->scaled_aimage[base] = 256 - spu->scaled_aimage[base];
+		  if (spu->scaled_aimage[base] + spu->scaled_image[base] > 255)
+		    spu->scaled_image[base] = 256 - spu->scaled_aimage[base];
+		}
+	      }
+	    }
+	    for (y = 0; y < spu->scaled_heightuv; ++y) {
+	      const double unscaled_y = y * inv_scaley;
+	      const double unscaled_y_bottom = unscaled_y + inv_scaley;
+	      const unsigned int top_low_row =
+			FFMIN(unscaled_y_bottom, unscaled_y + 1.0);
+	      const double top = top_low_row - unscaled_y;
+	      const unsigned int height = unscaled_y_bottom > top_low_row
+		? (unsigned int) unscaled_y_bottom - top_low_row
+		: 0;
+	      const double bottom = unscaled_y_bottom > top_low_row
+		? unscaled_y_bottom - floor(unscaled_y_bottom)
+		: 0.0;
+	      for (x = 0; x < spu->scaled_widthuv; ++x) {
+		const double unscaled_x = x * inv_scalex;
+		const double unscaled_x_right = unscaled_x + inv_scalex;
+		const unsigned int left_right_column =
+			FFMIN(unscaled_x_right, unscaled_x + 1.0);
+		const double left = left_right_column - unscaled_x;
+		const unsigned int width = unscaled_x_right > left_right_column
+		  ? (unsigned int) unscaled_x_right - left_right_column
+		  : 0;
+		const double right = unscaled_x_right > left_right_column
+		  ? unscaled_x_right - floor(unscaled_x_right)
+		  : 0.0;
+		double coloru = 0.0;
+		double colorv = 0.0;
+		double alpha = 0.0;
+		double tmp;
+		unsigned int base;
+		/* Now use these informations to compute a good alpha,
+                   and lightness.  The sum is on each of the 9
+                   region's surface and alpha and lightness.
+
+		  transformed alpha = sum(surface * alpha) / sum(surface)
+		  transformed color = sum(surface * alpha * color) / sum(surface * alpha)
+		*/
+		/* 1: top left part */
+		base = spu->strideuv * (unsigned int) unscaled_y;
+		tmp = left * top *
+		    canon_alpha(spu->aimageuv[base + (unsigned int) unscaled_x]);
+		alpha += tmp;
+		coloru += tmp * spu->imageu[base + (unsigned int) unscaled_x];
+		colorv += tmp * spu->imagev[base + (unsigned int) unscaled_x];
+		/* 2: top center part */
+		if (width > 0) {
+		  unsigned int walkx;
+		  for (walkx = left_right_column;
+			    walkx < (unsigned int) unscaled_x_right;
+			    ++walkx)
+		    {
+		    base = spu->strideuv * (unsigned int) unscaled_y + walkx;
+		    tmp = /* 1.0 * */ top * canon_alpha(spu->aimageuv[base]);
+		    alpha += tmp;
+		    coloru += tmp * spu->imageu[base];
+		    colorv += tmp * spu->imagev[base];
+		  }
+		}
+		/* 3: top right part */
+		if (right > 0.0) {
+		  base = spu->strideuv * (unsigned int) unscaled_y +
+			(unsigned int) unscaled_x_right;
+		  tmp = right * top * canon_alpha(spu->aimageuv[base]);
+		  alpha += tmp;
+		  coloru += tmp * spu->imageu[base];
+		  colorv += tmp * spu->imagev[base];
+		}
+		/* 4: center left part */
+		if (height > 0) {
+		  unsigned int walky;
+		  for (walky = top_low_row;
+			walky < (unsigned int) unscaled_y_bottom;
+			++walky)
+		    {
+		    base = spu->strideuv * walky + (unsigned int) unscaled_x;
+		    tmp = left /* * 1.0 */ * canon_alpha(spu->aimageuv[base]);
+		    alpha += tmp;
+		    coloru += tmp * spu->imageu[base];
+		    colorv += tmp * spu->imagev[base];
+		  }
+		}
+		/* 5: center part */
+		if (width > 0 && height > 0) {
+		  unsigned int walky;
+		  for (walky = top_low_row;
+			walky < (unsigned int) unscaled_y_bottom;
+			++walky)
+		    {
+		    unsigned int walkx;
+		    base = spu->strideuv * walky;
+		    for (walkx = left_right_column;
+			walkx < (unsigned int) unscaled_x_right;
+			++walkx)
+		    {
+		      tmp = /* 1.0 * 1.0 * */ canon_alpha(spu->aimageuv[base + walkx]);
+		      alpha += tmp;
+		      coloru += tmp * spu->imageu[base + walkx];
+		      colorv += tmp * spu->imagev[base + walkx];
+		    }
+		  }
+		}
+		/* 6: center right part */
+		if (right > 0.0 && height > 0) {
+		  unsigned int walky;
+		  for (walky = top_low_row;
+			walky < (unsigned int) unscaled_y_bottom;
+			++walky)
+		    {
+		    base = spu->strideuv * walky +
+			    (unsigned int) unscaled_x_right;
+		    tmp = right /* * 1.0 */ * canon_alpha(spu->aimageuv[base]);
+		    alpha += tmp;
+		    coloru += tmp * spu->imageu[base];
+		    colorv += tmp * spu->imagev[base];
+		  }
+		}
+		/* 7: bottom left part */
+		if (bottom > 0.0) {
+		  base = spu->strideuv * (unsigned int) unscaled_y_bottom +
+			    (unsigned int) unscaled_x;
+		  tmp = left * bottom * canon_alpha(spu->aimageuv[base]);
+		  alpha += tmp;
+		  coloru += tmp * spu->imageu[base];
+		  colorv += tmp * spu->imagev[base];
+		}
+		/* 8: bottom center part */
+		if (width > 0 && bottom > 0.0) {
+		  unsigned int walkx;
+		  base = spu->strideuv * (unsigned int) unscaled_y_bottom;
+		  for (walkx = left_right_column;
+			    walkx < (unsigned int) unscaled_x_right;
+			    ++walkx) {
+		    tmp = /* 1.0 * */ bottom * canon_alpha(spu->aimageuv[base + walkx]);
+		    alpha += tmp;
+		    coloru += tmp * spu->imageu[base + walkx];
+		    colorv += tmp * spu->imagev[base + walkx];
+		  }
+		}
+		/* 9: bottom right part */
+		if (right > 0.0 && bottom > 0.0) {
+		  base = spu->strideuv * (unsigned int) unscaled_y_bottom +
+			(unsigned int) unscaled_x_right;
+		  tmp = right * bottom * canon_alpha(spu->aimageuv[base]);
+		  alpha += tmp;
+		  coloru += tmp * spu->imageu[base];
+		  colorv += tmp * spu->imagev[base];
+		}
+		/* Finally mix these transparency and brightness information suitably */
+		base = spu->scaled_strideuv * y + x;
+		spu->scaled_imageu[base] = alpha > 0 ? coloru / alpha : 0;
+		spu->scaled_imagev[base] = alpha > 0 ? colorv / alpha : 0;
+		spu->scaled_aimageuv[base] = alpha * scalex * scaley / 0x10000;
+		if (spu->scaled_aimageuv[base]) {
+		  spu->scaled_aimageuv[base] = 256 - spu->scaled_aimageuv[base];
+		  if (spu->scaled_aimageuv[base] + spu->scaled_imageu[base] > 255)
+		    spu->scaled_imageu[base] = 256 - spu->scaled_aimageuv[base];
+		  if (spu->scaled_aimageuv[base] + spu->scaled_imagev[base] > 255)
+		    spu->scaled_imagev[base] = 256 - spu->scaled_aimageuv[base];
+		}
+	      }
+	    }
+	  }
+	  }
+nothing_to_do:
+	  /* Kludge: draw_alpha needs width multiple of 8. */
+	  if (spu->scaled_width < spu->scaled_stride)
+	    for (y = 0; y < spu->scaled_height; ++y) {
+	      memset(spu->scaled_aimage + y * spu->scaled_stride +
+		    spu->scaled_width, 0,
+		    spu->scaled_stride - spu->scaled_width);
+	    }
+	  spu->scaled_frame_width = dxs;
+	  spu->scaled_frame_height = dys;
+	  if (spu->scaled_widthuv < spu->scaled_strideuv)
+	    for (y = 0; y < spu->scaled_heightuv; ++y) {
+	      memset(spu->scaled_aimageuv + y * spu->scaled_stride +
+		    spu->scaled_widthuv, 0,
+		    spu->scaled_strideuv - spu->scaled_widthuv);
+	    }
+	  spu->scaled_frame_widthuv = dxs/2;
+	  spu->scaled_frame_heightuv = dys/2;
+	}
+      }
+      if (spu->scaled_image){
+        switch (spu_alignment) {
+        case 0:
+          spu->scaled_start_row = dys*sub_pos/100;
+	  if (spu->scaled_start_row + spu->scaled_height > dys)
+	    spu->scaled_start_row = dys - spu->scaled_height;
+          spu->scaled_start_rowuv = (dys/2)*sub_pos/100;
+	  if (spu->scaled_start_rowuv + spu->scaled_heightuv > (dys/2))
+	    spu->scaled_start_rowuv = (dys/2) - spu->scaled_heightuv;
+	  break;
+	case 1:
+          spu->scaled_start_row = dys*sub_pos/100 - spu->scaled_height/2;
+          if (sub_pos < 50) {
+	    if (spu->scaled_start_row < 0) spu->scaled_start_row = 0;
+	  } else {
+	    if (spu->scaled_start_row + spu->scaled_height > dys)
+	      spu->scaled_start_row = dys - spu->scaled_height;
+	  }
+          spu->scaled_start_rowuv = (dys/2)*sub_pos/100 - spu->scaled_heightuv/2;
+          if (sub_pos < 50) {
+	    if (spu->scaled_start_rowuv < 0) spu->scaled_start_rowuv = 0;
+	  } else {
+	    if (spu->scaled_start_rowuv + spu->scaled_heightuv > (dys/2))
+	      spu->scaled_start_rowuv = (dys/2) - spu->scaled_heightuv;
+	  }
+	  break;
+        case 2:
+          spu->scaled_start_row = dys*sub_pos/100 - spu->scaled_height;
+	  if (spu->scaled_start_row < 0) spu->scaled_start_row = 0;
+          spu->scaled_start_rowuv = (dys/2)*sub_pos/100 - spu->scaled_heightuv;
+	  if (spu->scaled_start_rowuv < 0) spu->scaled_start_rowuv = 0;
+	  break;
+	}
+	if (spu->dvdnav_color_spu==DVDNAV_SPU_YUY) {
+// Convert yuv to yuy
+	  if (!spu->imageyuy) spudec_create_yuy(spu,1);
+// Draw yuy
+	  if (spu->imageyuy) draw_alpha(spu->scaled_start_col,
+		    spu->scaled_start_row,
+		    spu->scaled_width*2,
+		    spu->scaled_height,
+		    DEST_PLANES_YUYV,
+		    spu->imageyuy,
+		    spu->aimageyuy,
+		    spu->strideyuy);
+	  } else {
+// Draw yuv Y, u and v planes
+	  draw_alpha(spu->scaled_start_col,
+		    spu->scaled_start_row,
+		    spu->scaled_width,
+		    spu->scaled_height,
+		    DEST_PLANES_Y,
+		    spu->scaled_image,
+		    spu->scaled_aimage,
+		    spu->scaled_stride);
+	  draw_alpha(spu->scaled_start_coluv,
+		    spu->scaled_start_rowuv,
+		    spu->scaled_widthuv,
+		    spu->scaled_heightuv,
+		    DEST_PLANES_U,
+		    spu->scaled_imageu,
+		    spu->scaled_aimageuv,
+		    spu->scaled_strideuv);
+	  draw_alpha(spu->scaled_start_coluv,
+		    spu->scaled_start_rowuv,
+		    spu->scaled_widthuv,
+		    spu->scaled_heightuv,
+		    DEST_PLANES_V,
+		    spu->scaled_imagev,
+		    spu->scaled_aimageuv,
+		    spu->scaled_strideuv);
+	  }
+	spu->spu_changed = 0;
+      }
+    }
+  }
+  else
+  {
+    mp_msg(MSGT_SPUDEC,MSGL_DBG2,
+	"SPU not displayed: start_pts=%d  end_pts=%d  now_pts=%d\n",
+        spu->start_pts, spu->end_pts, spu->now_pts);
+  }
+}
+
+//
+// Draw scaled image in RGB and BGR mode
+//	Note: expanded spudec_draw_scale with Green and Blue planes (Y->Red planes)
+//
+void spudec_draw_scaled_rgb(void *me, unsigned int dxs, unsigned int dys,
+	void (*draw_alpha)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride))
+{
+  spudec_handle_t *spu = (spudec_handle_t *)me;
+  scale_pixel *table_x;
+  scale_pixel *table_y;
+  unsigned int scalex = 0;
+  unsigned int scaley = 0;
+
+  if (spu->start_pts <= spu->now_pts && spu->now_pts < spu->end_pts) {
+
+    // check if only forced subtitles are requested
+    if( (spu->forced_subs_only) && !(spu->is_forced_sub) ){
+	return;
+    }
+
+    if (!(spu_aamode&16) && (spu->orig_frame_width == 0 ||
+	spu->orig_frame_height == 0 ||
+	(spu->orig_frame_width == dxs && spu->orig_frame_height == dys))) {
+      if (spu->image)
+      {
+      draw_alpha(spu->start_col,
+		spu->start_row,
+		spu->width,
+		spu->height,
+		DEST_PLANES_RB,
+		spu->image,
+		spu->aimage,
+		spu->stride);
+      draw_alpha(spu->start_col,
+		spu->start_row,
+		spu->width,
+		spu->height,
+		DEST_PLANES_G,
+		spu->imageu,
+		spu->aimage,
+		spu->stride);
+      draw_alpha(spu->start_col,
+		spu->start_row,
+		spu->width,
+		spu->height,
+		DEST_PLANES_BR,
+		spu->imagev,
+		spu->aimage,
+		spu->stride);
+      spu->spu_changed = 0;
+      }
+    }
+    else {
+      if (spu->scaled_frame_width != dxs || spu->scaled_frame_height != dys) {	/* Resizing is needed */
+	/* scaled_x = scalex * x / 0x100
+	   scaled_y = scaley * y / 0x100
+	   order of operations is important because of rounding. */
+	scalex = 0x100 * dxs / spu->orig_frame_width;
+	scaley = 0x100 * dys / spu->orig_frame_height;
+
+	spu->scaled_start_col = spu->start_col * scalex / 0x100;
+	spu->scaled_start_row = spu->start_row * scaley / 0x100;
+	spu->scaled_width = spu->width * scalex / 0x100;
+	spu->scaled_height = spu->height * scaley / 0x100;
+	/* Kludge: draw_alpha needs width multiple of 8 */
+	spu->scaled_stride = (spu->scaled_width + 7) & ~7;
+	if (spu->scaled_image_size < spu->scaled_stride * spu->scaled_height) {
+	  if (spu->scaled_image) {
+	    free(spu->scaled_image);
+	  if (spu->scaled_imageu)
+	    free(spu->scaled_imageu);
+	    spu->scaled_image_size = 0;
+	  }
+	  spu->scaled_image = malloc(2 * spu->scaled_stride * spu->scaled_height);
+	  spu->scaled_imageu = malloc(3 * spu->scaled_stride * spu->scaled_height);
+	  if (spu->scaled_image) {
+	    spu->scaled_image_size = spu->scaled_stride * spu->scaled_height;
+	    spu->scaled_aimage = spu->scaled_image + spu->scaled_image_size;
+	  }
+	  if (spu->scaled_imageu) {
+	    spu->scaled_imagev = spu->scaled_imageu + spu->scaled_image_size;
+	  }
+	}
+	if (spu->scaled_image) {
+	  unsigned int x, y;
+	  if (spu->scaled_width <= 1 || spu->scaled_height <= 1) {
+	    goto nothing_to_do;
+	  }
+	  switch(spu_aamode&15) {
+	  case 4:
+	  sws_spu_image_rgb(spu->scaled_image,spu->scaled_imageu,
+		    spu->scaled_imagev, spu->scaled_aimage,
+		    spu->scaled_width, spu->scaled_height, spu->scaled_stride,
+		    spu->image, spu->imageu, spu->imagev, spu->aimage,
+		    spu->width, spu->height, spu->stride);
+	  break;
+	  case 3:
+	  table_x = calloc(spu->scaled_width, sizeof(scale_pixel));
+	  table_y = calloc(spu->scaled_height, sizeof(scale_pixel));
+	  if (!table_x || !table_y) {
+	    mp_msg(MSGT_SPUDEC, MSGL_FATAL,
+		    "Fatal: spudec_draw_scaled: calloc failed\n");
+	  }
+	  scale_table(0, 0, spu->width - 1, spu->scaled_width - 1, table_x);
+	  scale_table(0, 0, spu->height - 1, spu->scaled_height - 1, table_y);
+	  for (y = 0; y < spu->scaled_height; y++)
+	    for (x = 0; x < spu->scaled_width; x++)
+	      scale_image_rgb(x, y, table_x, table_y, spu);
+	  free(table_x);
+	  free(table_y);
+	  break;
+	  case 0:
+	  /* no antialiasing */
+	  for (y = 0; y < spu->scaled_height; ++y) {
+	    int unscaled_y = y * 0x100 / scaley;
+	    int strides = spu->stride * unscaled_y;
+	    int scaled_strides = spu->scaled_stride * y;
+	    for (x = 0; x < spu->scaled_width; ++x) {
+	      int unscaled_x = x * 0x100 / scalex;
+	      spu->scaled_image[scaled_strides + x] =
+		    spu->image[strides + unscaled_x];
+	      spu->scaled_imageu[scaled_strides + x] =
+		    spu->imageu[strides + unscaled_x];
+	      spu->scaled_imagev[scaled_strides + x] =
+		    spu->imagev[strides + unscaled_x];
+	      spu->scaled_aimage[scaled_strides + x] =
+		    spu->aimage[strides + unscaled_x];
+	    }
+	  }
+	  break;
+	  case 1:
+	  {
+	    /* Intermediate antialiasing. */
+	    for (y = 0; y < spu->scaled_height; ++y) {
+	      const unsigned int unscaled_top = y *
+			spu->orig_frame_height / dys;
+	      unsigned int unscaled_bottom = (y + 1) *
+			spu->orig_frame_height / dys;
+	      if (unscaled_bottom >= spu->height)
+		unscaled_bottom = spu->height - 1;
+	      for (x = 0; x < spu->scaled_width; ++x) {
+		const unsigned int unscaled_left = x *
+			    spu->orig_frame_width / dxs;
+		unsigned int unscaled_right = (x + 1) *
+			    spu->orig_frame_width / dxs;
+		unsigned int colorr = 0;
+		unsigned int colorg = 0;
+		unsigned int colorb = 0;
+		unsigned int alpha = 0;
+		unsigned int walkx, walky;
+		unsigned int base, tmp;
+		if (unscaled_right >= spu->width)
+		  unscaled_right = spu->width - 1;
+		for (walky = unscaled_top; walky <= unscaled_bottom; ++walky)
+		  for (walkx = unscaled_left; walkx <= unscaled_right; ++walkx) {
+		    base = walky * spu->stride + walkx;
+		    tmp = canon_alpha(spu->aimage[base]);
+		    alpha += tmp;
+		    colorr += tmp * spu->image[base];
+		    colorg += tmp * spu->imageu[base];
+		    colorb += tmp * spu->imagev[base];
+		  }
+		base = y * spu->scaled_stride + x;
+		spu->scaled_image[base] = alpha ? colorr / alpha : 0;
+		spu->scaled_imageu[base] = alpha ? colorg / alpha : 0;
+		spu->scaled_imagev[base] = alpha ? colorb / alpha : 0;
+		spu->scaled_aimage[base] =
+		  alpha * (1 + unscaled_bottom - unscaled_top) * (1 + unscaled_right - unscaled_left);
+		/* spu->scaled_aimage[base] =
+		  alpha * dxs * dys / spu->orig_frame_width / spu->orig_frame_height; */
+		if (spu->scaled_aimage[base]) {
+		  spu->scaled_aimage[base] = 256 - spu->scaled_aimage[base];
+		  if (spu->scaled_aimage[base] + spu->scaled_image[base] > 255)
+		    spu->scaled_image[base] = 256 - spu->scaled_aimage[base];
+		  if (spu->scaled_aimage[base] + spu->scaled_imageu[base] > 255)
+		    spu->scaled_imageu[base] = 256 - spu->scaled_aimage[base];
+		  if (spu->scaled_aimage[base] + spu->scaled_imagev[base] > 255)
+		    spu->scaled_imagev[base] = 256 - spu->scaled_aimage[base];
+		}
+	      }
+	    }
+	  }
+	  break;
+	  case 2:
+	  {
+	    /* Best antialiasing.  Very slow. */
+	    /* Any pixel (x, y) represents pixels from the original
+	       rectangular region comprised between the columns
+	       unscaled_y and unscaled_y + 0x100 / scaley and the rows
+	       unscaled_x and unscaled_x + 0x100 / scalex
+
+	       The original rectangular region that the scaled pixel
+	       represents is cut in 9 rectangular areas like this:
+
+	       +---+-----------------+---+
+	       | 1 |        2        | 3 |
+	       +---+-----------------+---+
+	       |   |                 |   |
+	       | 4 |        5        | 6 |
+	       |   |                 |   |
+	       +---+-----------------+---+
+	       | 7 |        8        | 9 |
+	       +---+-----------------+---+
+
+	       The width of the left column is at most one pixel and
+	       it is never null and its right column is at a pixel
+	       boundary.  The height of the top row is at most one
+	       pixel it is never null and its bottom row is at a
+	       pixel boundary. The width and height of region 5 are
+	       integral values.  The width of the right column is
+	       what remains and is less than one pixel.  The height
+	       of the bottom row is what remains and is less than
+	       one pixel.
+
+	       The row above 1, 2, 3 is unscaled_y.  The row between
+	       1, 2, 3 and 4, 5, 6 is top_low_row.  The row between 4,
+	       5, 6 and 7, 8, 9 is (unsigned int)unscaled_y_bottom.
+	       The row beneath 7, 8, 9 is unscaled_y_bottom.
+
+	       The column left of 1, 4, 7 is unscaled_x.  The column
+	       between 1, 4, 7 and 2, 5, 8 is left_right_column.  The
+	       column between 2, 5, 8 and 3, 6, 9 is (unsigned
+	       int)unscaled_x_right.  The column right of 3, 6, 9 is
+	       unscaled_x_right. */
+	    const double inv_scalex = (double) 0x100 / scalex;
+	    const double inv_scaley = (double) 0x100 / scaley;
+	    for (y = 0; y < spu->scaled_height; ++y) {
+	      const double unscaled_y = y * inv_scaley;
+	      const double unscaled_y_bottom = unscaled_y + inv_scaley;
+	      const unsigned int top_low_row =
+			FFMIN(unscaled_y_bottom, unscaled_y + 1.0);
+	      const double top = top_low_row - unscaled_y;
+	      const unsigned int height = unscaled_y_bottom > top_low_row
+		? (unsigned int) unscaled_y_bottom - top_low_row
+		: 0;
+	      const double bottom = unscaled_y_bottom > top_low_row
+		? unscaled_y_bottom - floor(unscaled_y_bottom)
+		: 0.0;
+	      for (x = 0; x < spu->scaled_width; ++x) {
+		const double unscaled_x = x * inv_scalex;
+		const double unscaled_x_right = unscaled_x + inv_scalex;
+		const unsigned int left_right_column =
+			FFMIN(unscaled_x_right, unscaled_x + 1.0);
+		const double left = left_right_column - unscaled_x;
+		const unsigned int width = unscaled_x_right > left_right_column
+		  ? (unsigned int) unscaled_x_right - left_right_column
+		  : 0;
+		const double right = unscaled_x_right > left_right_column
+		  ? unscaled_x_right - floor(unscaled_x_right)
+		  : 0.0;
+		double colorr = 0.0;
+		double colorg = 0.0;
+		double colorb = 0.0;
+		double alpha = 0.0;
+		double tmp;
+		unsigned int base;
+		/* Now use these informations to compute a good alpha,
+                   and lightness.  The sum is on each of the 9
+                   region's surface and alpha and lightness.
+
+		  transformed alpha = sum(surface * alpha) / sum(surface)
+		  transformed color = sum(surface * alpha * color) / sum(surface * alpha)
+		*/
+		/* 1: top left part */
+		base = spu->stride * (unsigned int) unscaled_y;
+		tmp = left * top * canon_alpha(spu->aimage[base + (unsigned int) unscaled_x]);
+		alpha += tmp;
+		colorr += tmp * spu->image[base + (unsigned int) unscaled_x];
+		colorg += tmp * spu->imageu[base + (unsigned int) unscaled_x];
+		colorb += tmp * spu->imagev[base + (unsigned int) unscaled_x];
+		/* 2: top center part */
+		if (width > 0) {
+		  unsigned int walkx;
+		  for (walkx = left_right_column;
+			    walkx < (unsigned int) unscaled_x_right;
+			    ++walkx) {
+		    base = spu->stride * (unsigned int) unscaled_y + walkx;
+		    tmp = /* 1.0 * */ top * canon_alpha(spu->aimage[base]);
+		    alpha += tmp;
+		    colorr += tmp * spu->image[base];
+		    colorg += tmp * spu->imageu[base];
+		    colorb += tmp * spu->imagev[base];
+		  }
+		}
+		/* 3: top right part */
+		if (right > 0.0) {
+		  base = spu->stride * (unsigned int) unscaled_y +
+			    (unsigned int) unscaled_x_right;
+		  tmp = right * top * canon_alpha(spu->aimage[base]);
+		  alpha += tmp;
+		  colorr += tmp * spu->image[base];
+		  colorg += tmp * spu->imageu[base];
+		  colorb += tmp * spu->imagev[base];
+		}
+		/* 4: center left part */
+		if (height > 0) {
+		  unsigned int walky;
+		  for (walky = top_low_row;
+			    walky < (unsigned int) unscaled_y_bottom;
+			    ++walky) {
+		    base = spu->stride * walky + (unsigned int) unscaled_x;
+		    tmp = left /* * 1.0 */ * canon_alpha(spu->aimage[base]);
+		    alpha += tmp;
+		    colorr += tmp * spu->image[base];
+		    colorg += tmp * spu->imageu[base];
+		    colorb += tmp * spu->imagev[base];
+		  }
+		}
+		/* 5: center part */
+		if (width > 0 && height > 0) {
+		  unsigned int walky;
+		  for (walky = top_low_row;
+			    walky < (unsigned int) unscaled_y_bottom;
+			    ++walky) {
+		    unsigned int walkx;
+		    base = spu->stride * walky;
+		    for (walkx = left_right_column;
+			    walkx < (unsigned int) unscaled_x_right;
+			    ++walkx) {
+		      tmp = /* 1.0 * 1.0 * */ canon_alpha(spu->aimage[base + walkx]);
+		      alpha += tmp;
+		      colorr += tmp * spu->image[base + walkx];
+		      colorg += tmp * spu->imageu[base + walkx];
+		      colorb += tmp * spu->imagev[base + walkx];
+		    }
+		  }
+		}
+		/* 6: center right part */
+		if (right > 0.0 && height > 0) {
+		  unsigned int walky;
+		  for (walky = top_low_row;
+				walky < (unsigned int) unscaled_y_bottom;
+				++walky) {
+		    base = spu->stride * walky + (unsigned int) unscaled_x_right;
+		    tmp = right /* * 1.0 */ * canon_alpha(spu->aimage[base]);
+		    alpha += tmp;
+		    colorr += tmp * spu->image[base];
+		    colorg += tmp * spu->imageu[base];
+		    colorb += tmp * spu->imagev[base];
+		  }
+		}
+		/* 7: bottom left part */
+		if (bottom > 0.0) {
+		  base = spu->stride * (unsigned int) unscaled_y_bottom +
+			    (unsigned int) unscaled_x;
+		  tmp = left * bottom * canon_alpha(spu->aimage[base]);
+		  alpha += tmp;
+		  colorr += tmp * spu->image[base];
+		  colorg += tmp * spu->imageu[base];
+		  colorb += tmp * spu->imagev[base];
+		}
+		/* 8: bottom center part */
+		if (width > 0 && bottom > 0.0) {
+		  unsigned int walkx;
+		  base = spu->stride * (unsigned int) unscaled_y_bottom;
+		  for (walkx = left_right_column;
+			    walkx < (unsigned int) unscaled_x_right;
+			    ++walkx) {
+		    tmp = /* 1.0 * */ bottom * canon_alpha(spu->aimage[base + walkx]);
+		    alpha += tmp;
+		    colorr += tmp * spu->image[base + walkx];
+		    colorg += tmp * spu->imageu[base + walkx];
+		    colorb += tmp * spu->imagev[base + walkx];
+		  }
+		}
+		/* 9: bottom right part */
+		if (right > 0.0 && bottom > 0.0) {
+		  base = spu->stride * (unsigned int) unscaled_y_bottom +
+			    (unsigned int) unscaled_x_right;
+		  tmp = right * bottom * canon_alpha(spu->aimage[base]);
+		  alpha += tmp;
+		  colorr += tmp * spu->image[base];
+		  colorg += tmp * spu->imageu[base];
+		  colorb += tmp * spu->imagev[base];
+		}
+		/* Finally mix these transparency and brightness information suitably */
+		base = spu->scaled_stride * y + x;
+		spu->scaled_image[base] = alpha > 0 ? colorr / alpha : 0;
+		spu->scaled_imageu[base] = alpha > 0 ? colorg / alpha : 0;
+		spu->scaled_imagev[base] = alpha > 0 ? colorb / alpha : 0;
+		spu->scaled_aimage[base] = alpha * scalex * scaley / 0x10000;
+		if (spu->scaled_aimage[base]) {
+		  spu->scaled_aimage[base] = 256 - spu->scaled_aimage[base];
+		  if (spu->scaled_aimage[base] + spu->scaled_image[base] > 255)
+		    spu->scaled_image[base] = 256 - spu->scaled_aimage[base];
+		  if (spu->scaled_aimage[base] + spu->scaled_imageu[base] > 255)
+		    spu->scaled_imageu[base] = 256 - spu->scaled_aimage[base];
+		  if (spu->scaled_aimage[base] + spu->scaled_imagev[base] > 255)
+		    spu->scaled_imagev[base] = 256 - spu->scaled_aimage[base];
+		}
+	      }
+	    }
+	  }
+	  }
+nothing_to_do:
+	  /* Kludge: draw_alpha needs width multiple of 8. */
+	  if (spu->scaled_width < spu->scaled_stride)
+	    for (y = 0; y < spu->scaled_height; ++y) {
+	      memset(spu->scaled_aimage + y * spu->scaled_stride +
+			spu->scaled_width, 0,
+			spu->scaled_stride - spu->scaled_width);
+	    }
+	  spu->scaled_frame_width = dxs;
+	  spu->scaled_frame_height = dys;
+	}
+      }
+      if (spu->scaled_image){
+        switch (spu_alignment) {
+        case 0:
+          spu->scaled_start_row = dys*sub_pos/100;
+	  if (spu->scaled_start_row + spu->scaled_height > dys)
+	    spu->scaled_start_row = dys - spu->scaled_height;
+	  break;
+	case 1:
+          spu->scaled_start_row = dys*sub_pos/100 - spu->scaled_height/2;
+          if (sub_pos < 50) {
+	    if (spu->scaled_start_row < 0) spu->scaled_start_row = 0;
+	  } else {
+	    if (spu->scaled_start_row + spu->scaled_height > dys)
+	      spu->scaled_start_row = dys - spu->scaled_height;
+	  }
+	  break;
+        case 2:
+          spu->scaled_start_row = dys*sub_pos/100 - spu->scaled_height;
+	  if (spu->scaled_start_row < 0) spu->scaled_start_row = 0;
+	  break;
+	}
+// Draw planes: Red in RGB mode or Blue in BGR mode
+	draw_alpha(spu->scaled_start_col,
+		spu->scaled_start_row,
+		spu->scaled_width,
+		spu->scaled_height,
+		DEST_PLANES_RB,
+		spu->scaled_image,
+		spu->scaled_aimage,
+		spu->scaled_stride);
+// Draw Green planes in RGB and BGR mode
+	draw_alpha(spu->scaled_start_col,
+		spu->scaled_start_row,
+		spu->scaled_width,
+		spu->scaled_height,
+		DEST_PLANES_G,
+		spu->scaled_imageu,
+		spu->scaled_aimage,
+		spu->scaled_stride);
+// Draw planes: Blue in RGB mode or Red in BGR mode
+	draw_alpha(spu->scaled_start_col,
+		spu->scaled_start_row,
+		spu->scaled_width,
+		spu->scaled_height,
+		DEST_PLANES_BR,
+		spu->scaled_imagev,
+		spu->scaled_aimage,
+		spu->scaled_stride);
+	spu->spu_changed = 0;
+      }
+    }
+  }
+  else
+  {
+    mp_msg(MSGT_SPUDEC,MSGL_DBG2,
+	"SPU not displayed: start_pts=%d  end_pts=%d  now_pts=%d\n",
+        spu->start_pts, spu->end_pts, spu->now_pts);
+  }
+}
+
+void spudec_draw_scaled(void *me, unsigned int dxs, unsigned int dys, void (*draw_alpha)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride))
+{
+  spudec_handle_t *spu = (spudec_handle_t *)me;
+  scale_pixel *table_x;
+  scale_pixel *table_y;
+  if (spu->dvdnav_menu) {
+    switch (spu->dvdnav_color_spu)
+      {
+//
+// Draw scaled image in YUV and YUY mode
+//
+      case DVDNAV_SPU_YUV:
+      case DVDNAV_SPU_YUY:
+        spudec_draw_scaled_yuv(me,dxs,dys,draw_alpha);
+	return;
+	break;
+//
+// Draw scaled image in RGB and BGR mode
+//
+      case DVDNAV_SPU_RGB:
+      case DVDNAV_SPU_BGR:
+        spudec_draw_scaled_rgb(me,dxs,dys,draw_alpha);
+	return;
+	break;
+      }
+    }
+//
+// Draw scaled image in Y mode (default)
+//
+  if (spu->start_pts <= spu->now_pts && spu->now_pts < spu->end_pts) {
+
+    // check if only forced subtitles are requested 
+    if( (spu->forced_subs_only) && !(spu->is_forced_sub) ){ 
+	return;
+    }
+
+    if (!(spu_aamode&16) && (spu->orig_frame_width == 0 || spu->orig_frame_height == 0
+	|| (spu->orig_frame_width == dxs && spu->orig_frame_height == dys))) {
+      if (spu->image)
+      {
+	draw_alpha(spu->start_col, spu->start_row, spu->width, spu->height,DEST_PLANES_Y,
+		   spu->image, spu->aimage, spu->stride);
+	spu->spu_changed = 0;
+      }
+    }
+    else {
+      if (spu->scaled_frame_width != dxs || spu->scaled_frame_height != dys) {	/* Resizing is needed */
+	/* scaled_x = scalex * x / 0x100
+	   scaled_y = scaley * y / 0x100
+	   order of operations is important because of rounding. */
+	unsigned int scalex = 0x100 * dxs / spu->orig_frame_width;
+	unsigned int scaley = 0x100 * dys / spu->orig_frame_height;
+	spu->scaled_start_col = spu->start_col * scalex / 0x100;
+	spu->scaled_start_row = spu->start_row * scaley / 0x100;
+	spu->scaled_width = spu->width * scalex / 0x100;
+	spu->scaled_height = spu->height * scaley / 0x100;
+	/* Kludge: draw_alpha needs width multiple of 8 */
+	spu->scaled_stride = (spu->scaled_width + 7) & ~7;
+	if (spu->scaled_image_size < spu->scaled_stride * spu->scaled_height) {
+	  if (spu->scaled_image) {
+	    free(spu->scaled_image);
+	    spu->scaled_image_size = 0;
+	  }
+	  spu->scaled_image = malloc(2 * spu->scaled_stride * spu->scaled_height);
+	  if (spu->scaled_image) {
+	    spu->scaled_image_size = spu->scaled_stride * spu->scaled_height;
+	    spu->scaled_aimage = spu->scaled_image + spu->scaled_image_size;
+	  }
+	}
+	if (spu->scaled_image) {
+	  unsigned int x, y;
+	  if (spu->scaled_width <= 1 || spu->scaled_height <= 1) {
+	    goto nothing_to_do;
+	  }
+	  switch(spu_aamode&15) {
+	  case 4:
+	  sws_spu_image(spu->scaled_image, spu->scaled_aimage,
+		  spu->scaled_width, spu->scaled_height, spu->scaled_stride,
+		  spu->image, spu->aimage, spu->width, spu->height, spu->stride);
+	  break;
+	  case 3:
+	  table_x = calloc(spu->scaled_width, sizeof(scale_pixel));
+	  table_y = calloc(spu->scaled_height, sizeof(scale_pixel));
+	  if (!table_x || !table_y) {
+	    mp_msg(MSGT_SPUDEC, MSGL_FATAL, "Fatal: spudec_draw_scaled: calloc failed\n");
+	  }
+	  scale_table(0, 0, spu->width - 1, spu->scaled_width - 1, table_x);
+	  scale_table(0, 0, spu->height - 1, spu->scaled_height - 1, table_y);
+	  for (y = 0; y < spu->scaled_height; y++)
+	    for (x = 0; x < spu->scaled_width; x++)
+	      scale_image(x, y, table_x, table_y, spu);
+	  free(table_x);
+	  free(table_y);
+	  break;
+	  case 0:
+	  /* no antialiasing */
+	  for (y = 0; y < spu->scaled_height; ++y) {
+	    int unscaled_y = y * 0x100 / scaley;
+	    int strides = spu->stride * unscaled_y;
+	    int scaled_strides = spu->scaled_stride * y;
+	    for (x = 0; x < spu->scaled_width; ++x) {
+	      int unscaled_x = x * 0x100 / scalex;
+	      spu->scaled_image[scaled_strides + x] = spu->image[strides + unscaled_x];
+	      spu->scaled_aimage[scaled_strides + x] = spu->aimage[strides + unscaled_x];
+	    }
+	  }
+	  break;
+	  case 1:
+	  {
+	    /* Intermediate antialiasing. */
+	    for (y = 0; y < spu->scaled_height; ++y) {
+	      const unsigned int unscaled_top = y * spu->orig_frame_height / dys;
+	      unsigned int unscaled_bottom = (y + 1) * spu->orig_frame_height / dys;
+	      if (unscaled_bottom >= spu->height)
+		unscaled_bottom = spu->height - 1;
+	      for (x = 0; x < spu->scaled_width; ++x) {
+		const unsigned int unscaled_left = x * spu->orig_frame_width / dxs;
+		unsigned int unscaled_right = (x + 1) * spu->orig_frame_width / dxs;
+		unsigned int color = 0;
+		unsigned int alpha = 0;
+		unsigned int walkx, walky;
+		unsigned int base, tmp;
+		if (unscaled_right >= spu->width)
+		  unscaled_right = spu->width - 1;
+		for (walky = unscaled_top; walky <= unscaled_bottom; ++walky)
+		  for (walkx = unscaled_left; walkx <= unscaled_right; ++walkx) {
+		    base = walky * spu->stride + walkx;
+		    tmp = canon_alpha(spu->aimage[base]);
+		    alpha += tmp;
+		    color += tmp * spu->image[base];
+		  }
+		base = y * spu->scaled_stride + x;
+		spu->scaled_image[base] = alpha ? color / alpha : 0;
+		spu->scaled_aimage[base] =
+		  alpha * (1 + unscaled_bottom - unscaled_top) * (1 + unscaled_right - unscaled_left);
+		/* spu->scaled_aimage[base] =
+		  alpha * dxs * dys / spu->orig_frame_width / spu->orig_frame_height; */
+		if (spu->scaled_aimage[base]) {
+		  spu->scaled_aimage[base] = 256 - spu->scaled_aimage[base];
+		  if (spu->scaled_aimage[base] + spu->scaled_image[base] > 255)
+		    spu->scaled_image[base] = 256 - spu->scaled_aimage[base];
+		}
+	      }
+	    }
+	  }
+	  break;
+	  case 2:
+	  {
+	    /* Best antialiasing.  Very slow. */
+	    /* Any pixel (x, y) represents pixels from the original
+	       rectangular region comprised between the columns
+	       unscaled_y and unscaled_y + 0x100 / scaley and the rows
+	       unscaled_x and unscaled_x + 0x100 / scalex
+
+	       The original rectangular region that the scaled pixel
+	       represents is cut in 9 rectangular areas like this:
+	       
+	       +---+-----------------+---+
+	       | 1 |        2        | 3 |
+	       +---+-----------------+---+
+	       |   |                 |   |
+	       | 4 |        5        | 6 |
+	       |   |                 |   |
+	       +---+-----------------+---+
+	       | 7 |        8        | 9 |
+	       +---+-----------------+---+
+
+	       The width of the left column is at most one pixel and
+	       it is never null and its right column is at a pixel
+	       boundary.  The height of the top row is at most one
+	       pixel it is never null and its bottom row is at a
+	       pixel boundary. The width and height of region 5 are
+	       integral values.  The width of the right column is
+	       what remains and is less than one pixel.  The height
+	       of the bottom row is what remains and is less than
+	       one pixel.
+
+	       The row above 1, 2, 3 is unscaled_y.  The row between
+	       1, 2, 3 and 4, 5, 6 is top_low_row.  The row between 4,
+	       5, 6 and 7, 8, 9 is (unsigned int)unscaled_y_bottom.
+	       The row beneath 7, 8, 9 is unscaled_y_bottom.
+
+	       The column left of 1, 4, 7 is unscaled_x.  The column
+	       between 1, 4, 7 and 2, 5, 8 is left_right_column.  The
+	       column between 2, 5, 8 and 3, 6, 9 is (unsigned
+	       int)unscaled_x_right.  The column right of 3, 6, 9 is
+	       unscaled_x_right. */
+	    const double inv_scalex = (double) 0x100 / scalex;
+	    const double inv_scaley = (double) 0x100 / scaley;
+	    for (y = 0; y < spu->scaled_height; ++y) {
+	      const double unscaled_y = y * inv_scaley;
+	      const double unscaled_y_bottom = unscaled_y + inv_scaley;
+	      const unsigned int top_low_row = FFMIN(unscaled_y_bottom, unscaled_y + 1.0);
+	      const double top = top_low_row - unscaled_y;
+	      const unsigned int height = unscaled_y_bottom > top_low_row
+		? (unsigned int) unscaled_y_bottom - top_low_row
+		: 0;
+	      const double bottom = unscaled_y_bottom > top_low_row
+		? unscaled_y_bottom - floor(unscaled_y_bottom)
+		: 0.0;
+	      for (x = 0; x < spu->scaled_width; ++x) {
+		const double unscaled_x = x * inv_scalex;
+		const double unscaled_x_right = unscaled_x + inv_scalex;
+		const unsigned int left_right_column = FFMIN(unscaled_x_right, unscaled_x + 1.0);
+		const double left = left_right_column - unscaled_x;
+		const unsigned int width = unscaled_x_right > left_right_column
+		  ? (unsigned int) unscaled_x_right - left_right_column
+		  : 0;
+		const double right = unscaled_x_right > left_right_column
+		  ? unscaled_x_right - floor(unscaled_x_right)
+		  : 0.0;
+		double color = 0.0;
+		double alpha = 0.0;
+		double tmp;
+		unsigned int base;
+		/* Now use these informations to compute a good alpha,
+                   and lightness.  The sum is on each of the 9
+                   region's surface and alpha and lightness.
+
+		  transformed alpha = sum(surface * alpha) / sum(surface)
+		  transformed color = sum(surface * alpha * color) / sum(surface * alpha)
+		*/
+		/* 1: top left part */
+		base = spu->stride * (unsigned int) unscaled_y;
+		tmp = left * top * canon_alpha(spu->aimage[base + (unsigned int) unscaled_x]);
+		alpha += tmp;
+		color += tmp * spu->image[base + (unsigned int) unscaled_x];
+		/* 2: top center part */
+		if (width > 0) {
+		  unsigned int walkx;
+		  for (walkx = left_right_column; walkx < (unsigned int) unscaled_x_right; ++walkx) {
+		    base = spu->stride * (unsigned int) unscaled_y + walkx;
+		    tmp = /* 1.0 * */ top * canon_alpha(spu->aimage[base]);
+		    alpha += tmp;
+		    color += tmp * spu->image[base];
+		  }
+		}
+		/* 3: top right part */
+		if (right > 0.0) {
+		  base = spu->stride * (unsigned int) unscaled_y + (unsigned int) unscaled_x_right;
+		  tmp = right * top * canon_alpha(spu->aimage[base]);
+		  alpha += tmp;
+		  color += tmp * spu->image[base];
+		}
+		/* 4: center left part */
+		if (height > 0) {
+		  unsigned int walky;
+		  for (walky = top_low_row; walky < (unsigned int) unscaled_y_bottom; ++walky) {
 		    base = spu->stride * walky + (unsigned int) unscaled_x;
 		    tmp = left /* * 1.0 */ * canon_alpha(spu->aimage[base]);
 		    alpha += tmp;
@@ -1089,7 +3178,7 @@
 	  break;
 	}
 	draw_alpha(spu->scaled_start_col, spu->scaled_start_row, spu->scaled_width, spu->scaled_height,
-		   spu->scaled_image, spu->scaled_aimage, spu->scaled_stride);
+		    DEST_PLANES_Y,spu->scaled_image, spu->scaled_aimage, spu->scaled_stride);
 	spu->spu_changed = 0;
       }
     }
@@ -1173,6 +3262,21 @@
 	free(spu->scaled_image);
     if (spu->image)
       free(spu->image);
+    if (spu->dvdnav_image)	// Free dvdnav SPU image
+      free(spu->dvdnav_image);
+    if (spu->dvdnav_aimage)	// Free dvdnav SPU image alpha
+      free(spu->dvdnav_aimage);
+
+    if (spu->imageu)		// Free dvdnav SPU uv or GB image
+      free(spu->imageu);
+    if (spu->imageyuy)		// Free dvdnav SPU YUY image
+      free(spu->imageyuy);
+    spu->imageyuy=NULL;
+    if (spu->scaled_imageu)	// Free dvdnav SPU uv or GB alpha
+	free(spu->scaled_imageu);
+
+    if (spu->last_packet) {spudec_free_packet(spu->last_packet); spu->last_packet=NULL;}
+    spu->dvdnav_allocated = 0;
     free(spu);
   }
 }
--- libvo/sub.h	 (revision 22772)
+++ libvo/sub.h	 (working copy)
@@ -112,7 +112,7 @@
 //extern void vo_draw_text_osd(int dxs,int dys,void (*draw_alpha)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride));
 //extern void vo_draw_text_progbar(int dxs,int dys,void (*draw_alpha)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride));
 //extern void vo_draw_text_sub(int dxs,int dys,void (*draw_alpha)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride));
-extern void vo_draw_text(int dxs,int dys,void (*draw_alpha)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride));
+extern void vo_draw_text(int dxs,int dys,void (*draw_alpha)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride));
 extern void vo_remove_text(int dxs,int dys,void (*remove)(int x0,int y0, int w,int h));
 
 void vo_init_osd(void);
--- libvo/sub.c	 (revision 22772)
+++ libvo/sub.c	 (working copy)
@@ -139,11 +139,12 @@
 }
 
 // renders the buffer
-inline static void vo_draw_text_from_buffer(mp_osd_obj_t* obj,void (*draw_alpha)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride)){
+inline static void vo_draw_text_from_buffer(mp_osd_obj_t* obj,void (*draw_alpha)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride)){
     if (obj->allocated > 0) {
 	draw_alpha(obj->bbox.x1,obj->bbox.y1,
 		   obj->bbox.x2-obj->bbox.x1,
 		   obj->bbox.y2-obj->bbox.y1,
+		   DEST_PLANES_Y,
 		   obj->bitmap_buffer,
 		   obj->alpha_buffer,
 		   obj->stride);
@@ -763,7 +764,7 @@
   obj->flags |= OSDFLAG_BBOX;
 }
 
-inline static void vo_draw_spudec_sub(mp_osd_obj_t* obj, void (*draw_alpha)(int x0, int y0, int w, int h, unsigned char* src, unsigned char* srca, int stride))
+inline static void vo_draw_spudec_sub(mp_osd_obj_t* obj, void (*draw_alpha)(int x0, int y0, int w, int h, int dp, unsigned char* src, unsigned char* srca, int stride))
 {
   spudec_draw_scaled(vo_spudec, obj->dxs, obj->dys, draw_alpha);
 }
@@ -943,7 +944,7 @@
     }
 }
 
-void vo_draw_text(int dxs,int dys,void (*draw_alpha)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride)){
+void vo_draw_text(int dxs,int dys,void (*draw_alpha)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride)){
     mp_osd_obj_t* obj=vo_osd_list;
     vo_update_osd(dxs,dys);
     while(obj){
--- libvo/video_out.h	 (revision 22772)
+++ libvo/video_out.h	 (working copy)
@@ -85,6 +85,15 @@
   int w,h;
 } mp_win_t;
 
+// Current vo is YUV mode
+#define VOCTRL_YUVSPU_SUPPORT 33
+// Current vo is RGB mode
+#define VOCTRL_RGBSPU_SUPPORT 34
+// Current vo is BGR mode
+#define VOCTRL_BGRSPU_SUPPORT 35
+// Current vo is YUY mode
+#define VOCTRL_YUYSPU_SUPPORT 36
+
 #define VO_TRUE		1
 #define VO_FALSE	0
 #define VO_ERROR	-1
@@ -97,6 +106,15 @@
 #define VOFLAG_FLIPPING		0x08
 #define VOFLAG_XOVERLAY_SUB_VO  0x10000
 
+// dest planes (draw_alpha)
+#define	DEST_PLANES_Y		0	// Y planes (default)
+#define DEST_PLANES_U		1	// U planes in YUV mode
+#define DEST_PLANES_V		2	// V planes in YUV mode
+#define DEST_PLANES_RB		3	// R or B planes in RGB or BGR mode
+#define DEST_PLANES_G		4	// G planes in RGB or BGR mode
+#define DEST_PLANES_BR		5	// B or R planes in RGB or BGR mode
+#define DEST_PLANES_YUYV	6	// Yuv (all) planes in YUYv mode
+
 typedef struct vo_info_s
 {
         /* driver name ("Matrox Millennium G200/G400" */
--- libvo/vo_fbdev.c	 (revision 22772)
+++ libvo/vo_fbdev.c	 (working copy)
@@ -562,7 +562,7 @@
 static int fb_line_len;
 static int fb_xres;
 static int fb_yres;
-static void (*draw_alpha_p)(int w, int h, unsigned char *src,
+static void (*draw_alpha_p)(int w, int h, int dp, unsigned char *src,
 		unsigned char *srca, int stride, unsigned char *dst,
 		int dstride);
 
@@ -1053,14 +1053,14 @@
 	return 0;
 }
 
-static void draw_alpha(int x0, int y0, int w, int h, unsigned char *src,
+static void draw_alpha(int x0, int y0, int w, int h, int dp, unsigned char *src,
 		unsigned char *srca, int stride)
 {
 	unsigned char *dst;
 
 	dst = center + fb_line_len * y0 + fb_pixel_size * x0;
 
-	(*draw_alpha_p)(w, h, src, srca, stride, dst, fb_line_len);
+	(*draw_alpha_p)(w, h, dp, src, srca, stride, dst, fb_line_len);
 }
 
 static int draw_frame(uint8_t *src[]) { return 1; }
@@ -1174,6 +1174,13 @@
     return get_image(data);
   case VOCTRL_QUERY_FORMAT:
     return query_format(*((uint32_t*)data));
+  case VOCTRL_YUVSPU_SUPPORT:
+    return VO_FALSE;
+  case VOCTRL_RGBSPU_SUPPORT:
+    return VO_FALSE;
+  case VOCTRL_BGRSPU_SUPPORT:
+    if (fb_bpp == 32 || fb_bpp==24 || fb_bpp==16 || fb_bpp==15) return VO_TRUE;
+    return VO_FALSE;
   }
 
 #ifdef CONFIG_VIDIX
--- libvo/vo_directfb2.c	 (revision 22772)
+++ libvo/vo_directfb2.c	 (working copy)
@@ -1447,6 +1447,20 @@
     
 	return(directfb_get_video_eq(data, value));
       }
+    case VOCTRL_YUVSPU_SUPPORT:
+	if (pixel_format==DSPF_I420 || pixel_format==DSPF_YV12) return VO_TRUE;
+	return VO_FALSE;
+    case VOCTRL_RGBSPU_SUPPORT:
+	if (pixel_format==DSPF_RGB32 || pixel_format==DSPF_ARGB || pixel_format==DSPF_RGB24 ||
+		pixel_format==DSPF_RGB16) return VO_TRUE;
+#if DIRECTFBVERSION > 915
+	if (pixel_format==DSPF_ARGB1555) return VO_TRUE;
+#else
+	if (pixel_format==DSPF_RGB15) return VO_TRUE;
+#endif
+	return VO_FALSE;
+    case VOCTRL_BGRSPU_SUPPORT:
+	return VO_FALSE;
   };
   return VO_NOTIMPL;
 }
@@ -1460,7 +1474,7 @@
 
 // hopefully will be removed soon
 
-static void draw_alpha(int x0, int y0, int w, int h, unsigned char *src,
+static void draw_alpha(int x0, int y0, int w, int h, int dp, unsigned char *src,
 		unsigned char *srca, int stride)
 {
         void *dst;
@@ -1479,35 +1493,51 @@
 	switch(pixel_format) {
                 case DSPF_RGB32:
                 case DSPF_ARGB:
-                        vo_draw_alpha_rgb32(w,h,src,srca,stride,((uint8_t *) dst)+pitch*y0 + 4*x0,pitch);
+                        vo_draw_alpha_rgb32(w,h,dp,src,srca,stride,((uint8_t *) dst)+pitch*y0 + 4*x0,pitch);
                         break;
 
                 case DSPF_RGB24:
-                        vo_draw_alpha_rgb24(w,h,src,srca,stride,((uint8_t *) dst)+pitch*y0 + 3*x0,pitch);
+                        vo_draw_alpha_rgb24(w,h,dp,src,srca,stride,((uint8_t *) dst)+pitch*y0 + 3*x0,pitch);
                         break;
 
                 case DSPF_RGB16:
-                        vo_draw_alpha_rgb16(w,h,src,srca,stride,((uint8_t *) dst)+pitch*y0 + 2*x0,pitch);
+                        vo_draw_alpha_rgb16(w,h,dp,src,srca,stride,((uint8_t *) dst)+pitch*y0 + 2*x0,pitch);
                         break;
 #if DIRECTFBVERSION > DFB_VERSION(0,9,15)
                 case DSPF_ARGB1555:
 #else
                 case DSPF_RGB15:
 #endif
-                        vo_draw_alpha_rgb15(w,h,src,srca,stride,((uint8_t *) dst)+pitch*y0 + 2*x0,pitch);
+                        vo_draw_alpha_rgb15(w,h,dp,src,srca,stride,((uint8_t *) dst)+pitch*y0 + 2*x0,pitch);
                         break;
 
 		case DSPF_YUY2:
-    			vo_draw_alpha_yuy2(w,h,src,srca,stride,((uint8_t *) dst) + pitch*y0 + 2*x0,pitch);
+    			vo_draw_alpha_yuy2(w,h,dp,src,srca,stride,((uint8_t *) dst) + pitch*y0 + 2*x0,pitch);
 		break;
 
         	case DSPF_UYVY:
-    			vo_draw_alpha_yuy2(w,h,src,srca,stride,((uint8_t *) dst) + pitch*y0 + 2*x0 + 1,pitch);
+    			vo_draw_alpha_yuy2(w,h,dp,src,srca,stride,((uint8_t *) dst) + pitch*y0 + 2*x0 + 1,pitch);
 		break;
 
         	case DSPF_I420:
 		case DSPF_YV12:
-    			vo_draw_alpha_yv12(w,h,src,srca,stride,((uint8_t *) dst) + pitch*y0 + 1*x0,pitch);
+ 		  switch (dp) {
+ 		    case DEST_PLANES_Y:
+ 			vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) dst) + pitch*y0 + 1*x0,pitch);
+ 			break;
+ 		    case DEST_PLANES_U:
+ 			if (pixel_format==DSPF_YV12)
+ 			    vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) dst) + pitch*height/4 + pitch*y0 + 1*x0,pitch);
+ 			    else
+ 			    vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) dst) + pitch*height/2 + pitch*y0 + 1*x0,pitch);
+ 		        break;
+ 		    case DEST_PLANES_V:
+ 			if (pixel_format==DSPF_YV12)
+ 			    vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) dst) + pitch*height/2 + pitch*y0 + 1*x0,pitch);
+ 			    else
+ 			    vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) dst) + pitch*height/4 + pitch*y0 + 1*x0,pitch);
+ 		        break;
+ 		    }
 		break;
 		}
 
--- libvo/vo_vesa.c	 (revision 22772)
+++ libvo/vo_vesa.c	 (working copy)
@@ -47,6 +47,7 @@
 #include "vosub_vidix.h"
 #endif
 #include "mp_msg.h"
+#include "help_mp.h"
 
 #include "libswscale/swscale.h"
 #include "libmpcodecs/vf_scale.h"
@@ -108,7 +109,7 @@
 static unsigned video_mode; /* selected video mode for playback */
 static struct VesaModeInfoBlock video_mode_info;
 static int flip_trigger = 0;
-static void (*draw_alpha_fnc)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride);
+static void (*draw_alpha_fnc)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride);
 
 /* multibuffering */
 uint8_t*  video_base; /* should be never changed */
@@ -296,7 +297,7 @@
 /* Please comment it out if you want have OSD within movie */
 /*#define OSD_OUTSIDE_MOVIE 1*/
 
-static void draw_alpha_32(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride)
+static void draw_alpha_32(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride)
 {
    int dstride=HAS_DGA()?video_mode_info.XResolution:dstW;
 #ifndef OSD_OUTSIDE_MOVIE
@@ -306,10 +307,10 @@
 	y0 += y_offset;
    }
 #endif
-   vo_draw_alpha_rgb32(w,h,src,srca,stride,dga_buffer+4*(y0*dstride+x0),4*dstride);
+   vo_draw_alpha_rgb32(w,h,dp,src,srca,stride,dga_buffer+4*(y0*dstride+x0),4*dstride);
 }
 
-static void draw_alpha_24(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride)
+static void draw_alpha_24(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride)
 {
    int dstride=HAS_DGA()?video_mode_info.XResolution:dstW;
 #ifndef OSD_OUTSIDE_MOVIE
@@ -319,10 +320,10 @@
 	y0 += y_offset;
    }
 #endif
-   vo_draw_alpha_rgb24(w,h,src,srca,stride,dga_buffer+3*(y0*dstride+x0),3*dstride);
+   vo_draw_alpha_rgb24(w,h,dp,src,srca,stride,dga_buffer+3*(y0*dstride+x0),3*dstride);
 }
 
-static void draw_alpha_16(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride)
+static void draw_alpha_16(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride)
 {
    int dstride=HAS_DGA()?video_mode_info.XResolution:dstW;
 #ifndef OSD_OUTSIDE_MOVIE
@@ -332,10 +333,10 @@
 	y0 += y_offset;
    }
 #endif
-   vo_draw_alpha_rgb16(w,h,src,srca,stride,dga_buffer+2*(y0*dstride+x0),2*dstride);
+   vo_draw_alpha_rgb16(w,h,dp,src,srca,stride,dga_buffer+2*(y0*dstride+x0),2*dstride);
 }
 
-static void draw_alpha_15(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride)
+static void draw_alpha_15(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride)
 {
    int dstride=HAS_DGA()?video_mode_info.XResolution:dstW;
 #ifndef OSD_OUTSIDE_MOVIE
@@ -345,15 +346,16 @@
 	y0 += y_offset;
    }
 #endif
-   vo_draw_alpha_rgb15(w,h,src,srca,stride,dga_buffer+2*(y0*dstride+x0),2*dstride);
+   vo_draw_alpha_rgb15(w,h,dp,src,srca,stride,dga_buffer+2*(y0*dstride+x0),2*dstride);
 }
 
-static void draw_alpha_null(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride)
+static void draw_alpha_null(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride)
 {
   UNUSED(x0);
   UNUSED(y0);
   UNUSED(w);
   UNUSED(h);
+  UNUSED(dp);
   UNUSED(src);
   UNUSED(srca);
   UNUSED(stride);
@@ -1101,6 +1103,14 @@
   switch (request) {
   case VOCTRL_QUERY_FORMAT:
     return query_format(*((uint32_t*)data));
+  case VOCTRL_YUVSPU_SUPPORT:
+    return VO_FALSE;
+  case VOCTRL_RGBSPU_SUPPORT:
+    return VO_FALSE;
+  case VOCTRL_BGRSPU_SUPPORT:
+    if (dstFourcc == IMGFMT_BGR15 || dstFourcc == IMGFMT_BGR16 || dstFourcc == IMGFMT_BGR24 ||
+	    dstFourcc == IMGFMT_BGR32 || dstFourcc == IMGFMT_BGR16) return VO_TRUE;
+    return VO_FALSE;
   }
 
 #ifdef CONFIG_VIDIX
--- libvo/osd.c	 (revision 22772)
+++ libvo/osd.c	 (working copy)
@@ -11,8 +11,9 @@
 #include <inttypes.h>
 #include "cpudetect.h"
 #include "mangle.h"
+#include "video_out.h"
 
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 #define CAN_COMPILE_X86_ASM
 #endif
 
@@ -101,152 +102,152 @@
 
 #endif //CAN_COMPILE_X86_ASM
 
-void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+void vo_draw_alpha_yv12(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
 #ifdef RUNTIME_CPUDETECT
 #ifdef CAN_COMPILE_X86_ASM
 	// ordered by speed / fastest first
 	if(gCpuCaps.hasMMX2)
-		vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yv12_MMX2(w, h, dp, src, srca, srcstride, dstbase, dststride);
 	else if(gCpuCaps.has3DNow)
-		vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yv12_3DNow(w, h, dp, src, srca, srcstride, dstbase, dststride);
 	else if(gCpuCaps.hasMMX)
-		vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yv12_MMX(w, h, dp, src, srca, srcstride, dstbase, dststride);
 	else
-		vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yv12_X86(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #else
-		vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yv12_C(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #endif
 #else //RUNTIME_CPUDETECT
 #ifdef HAVE_MMX2
-		vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yv12_MMX2(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #elif defined (HAVE_3DNOW)
-		vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yv12_3DNow(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #elif defined (HAVE_MMX)
-		vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined(ARCH_X86)
-		vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yv12_MMX(w, h, dp, src, srca, srcstride, dstbase, dststride);
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
+		vo_draw_alpha_yv12_X86(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #else
-		vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yv12_C(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #endif
 #endif //!RUNTIME_CPUDETECT
 }
 
-void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+void vo_draw_alpha_yuy2(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
 #ifdef RUNTIME_CPUDETECT
 #ifdef CAN_COMPILE_X86_ASM
 	// ordered by speed / fastest first
 	if(gCpuCaps.hasMMX2)
-		vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yuy2_MMX2(w, h, dp, src, srca, srcstride, dstbase, dststride);
 	else if(gCpuCaps.has3DNow)
-		vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yuy2_3DNow(w, h, dp, src, srca, srcstride, dstbase, dststride);
 	else if(gCpuCaps.hasMMX)
-		vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yuy2_MMX(w, h, dp, src, srca, srcstride, dstbase, dststride);
 	else
-		vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yuy2_X86(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #else
-		vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yuy2_C(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #endif
 #else //RUNTIME_CPUDETECT
 #ifdef HAVE_MMX2
-		vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yuy2_MMX2(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #elif defined (HAVE_3DNOW)
-		vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yuy2_3DNow(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #elif defined (HAVE_MMX)
-		vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined(ARCH_X86)
-		vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yuy2_MMX(w, h, dp, src, srca, srcstride, dstbase, dststride);
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
+		vo_draw_alpha_yuy2_X86(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #else
-		vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_yuy2_C(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #endif
 #endif //!RUNTIME_CPUDETECT
 }
 
-void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+void vo_draw_alpha_uyvy(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
 #ifdef RUNTIME_CPUDETECT
 #ifdef CAN_COMPILE_X86_ASM
 	// ordered by speed / fastest first
 	if(gCpuCaps.hasMMX2)
-		vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_uyvy_MMX2(w, h, dp, src, srca, srcstride, dstbase, dststride);
 	else if(gCpuCaps.has3DNow)
-		vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_uyvy_3DNow(w, h, dp, src, srca, srcstride, dstbase, dststride);
 	else if(gCpuCaps.hasMMX)
-		vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_uyvy_MMX(w, h, dp, src, srca, srcstride, dstbase, dststride);
 	else
-		vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_uyvy_X86(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #else
-		vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_uyvy_C(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #endif
 #else //RUNTIME_CPUDETECT
 #ifdef HAVE_MMX2
-		vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_uyvy_MMX2(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #elif defined (HAVE_3DNOW)
-		vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_uyvy_3DNow(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #elif defined (HAVE_MMX)
-		vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined(ARCH_X86)
-		vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_uyvy_MMX(w, h, dp, src, srca, srcstride, dstbase, dststride);
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
+		vo_draw_alpha_uyvy_X86(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #else
-		vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_uyvy_C(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #endif
 #endif //!RUNTIME_CPUDETECT
 }
 
-void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+void vo_draw_alpha_rgb24(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
 #ifdef RUNTIME_CPUDETECT
 #ifdef CAN_COMPILE_X86_ASM
 	// ordered by speed / fastest first
 	if(gCpuCaps.hasMMX2)
-		vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb24_MMX2(w, h, dp, src, srca, srcstride, dstbase, dststride);
 	else if(gCpuCaps.has3DNow)
-		vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb24_3DNow(w, h, dp, src, srca, srcstride, dstbase, dststride);
 	else if(gCpuCaps.hasMMX)
-		vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb24_MMX(w, h, dp, src, srca, srcstride, dstbase, dststride);
 	else
-		vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb24_X86(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #else
-		vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb24_C(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #endif
 #else //RUNTIME_CPUDETECT
 #ifdef HAVE_MMX2
-		vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb24_MMX2(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #elif defined (HAVE_3DNOW)
-		vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb24_3DNow(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #elif defined (HAVE_MMX)
-		vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined(ARCH_X86)
-		vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb24_MMX(w, h, dp, src, srca, srcstride, dstbase, dststride);
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
+		vo_draw_alpha_rgb24_X86(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #else
-		vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb24_C(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #endif
 #endif //!RUNTIME_CPUDETECT
 }
 
-void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+void vo_draw_alpha_rgb32(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
 #ifdef RUNTIME_CPUDETECT
 #ifdef CAN_COMPILE_X86_ASM
 	// ordered by speed / fastest first
 	if(gCpuCaps.hasMMX2)
-		vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb32_MMX2(w, h, dp, src, srca, srcstride, dstbase, dststride);
 	else if(gCpuCaps.has3DNow)
-		vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb32_3DNow(w, h, dp, src, srca, srcstride, dstbase, dststride);
 	else if(gCpuCaps.hasMMX)
-		vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb32_MMX(w, h, dp, src, srca, srcstride, dstbase, dststride);
 	else
-		vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb32_X86(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #else
-		vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb32_C(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #endif
 #else //RUNTIME_CPUDETECT
 #ifdef HAVE_MMX2
-		vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb32_MMX2(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #elif defined (HAVE_3DNOW)
-		vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb32_3DNow(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #elif defined (HAVE_MMX)
-		vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined(ARCH_X86)
-		vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb32_MMX(w, h, dp, src, srca, srcstride, dstbase, dststride);
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
+		vo_draw_alpha_rgb32_X86(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #else
-		vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
+		vo_draw_alpha_rgb32_C(w, h, dp, src, srca, srcstride, dstbase, dststride);
 #endif
 #endif //!RUNTIME_CPUDETECT
 }
@@ -288,7 +289,7 @@
 			mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
 #elif defined (HAVE_MMX)
 			mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
-#elif defined(ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
 			mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
 #else
 			mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
@@ -297,8 +298,10 @@
 	}
 }
 
-void vo_draw_alpha_rgb15(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+void vo_draw_alpha_rgb15(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
     int y;
+switch (dp) {
+  case DEST_PLANES_Y :
     for(y=0;y<h;y++){
         register unsigned short *dst = (unsigned short*) dstbase;
         register int x;
@@ -327,10 +330,67 @@
         dstbase+=dststride;
     }
     return;
+  case DEST_PLANES_RB:
+    for(y=0;y<h;y++){
+        register unsigned short *dst = (unsigned short*) dstbase;
+        register int x;
+        for(x=0;x<w;x++){
+            if(srca[x]){
+                unsigned char r=dst[x]&0x1F;
+                unsigned char g=(dst[x]>>5)&0x1F;
+                unsigned char b=(dst[x]>>10)&0x1F;
+                r=(((r*srca[x])>>5)+src[x])>>3;
+                dst[x]=(b<<10)|(g<<5)|r;
+            }
+        }
+        src+=srcstride;
+        srca+=srcstride;
+        dstbase+=dststride;
+    }
+    return;
+  case DEST_PLANES_G:
+    for(y=0;y<h;y++){
+        register unsigned short *dst = (unsigned short*) dstbase;
+        register int x;
+        for(x=0;x<w;x++){
+            if(srca[x]){
+                unsigned char r=dst[x]&0x1F;
+                unsigned char g=(dst[x]>>5)&0x1F;
+                unsigned char b=(dst[x]>>10)&0x1F;
+                g=(((g*srca[x])>>5)+src[x])>>3;
+                dst[x]=(b<<10)|(g<<5)|r;
+            }
+        }
+        src+=srcstride;
+        srca+=srcstride;
+        dstbase+=dststride;
+    }
+    return;
+  case DEST_PLANES_BR:
+    for(y=0;y<h;y++){
+        register unsigned short *dst = (unsigned short*) dstbase;
+        register int x;
+        for(x=0;x<w;x++){
+            if(srca[x]){
+                unsigned char r=dst[x]&0x1F;
+                unsigned char g=(dst[x]>>5)&0x1F;
+                unsigned char b=(dst[x]>>10)&0x1F;
+                b=(((b*srca[x])>>5)+src[x])>>3;
+                dst[x]=(b<<10)|(g<<5)|r;
+            }
+        }
+        src+=srcstride;
+        srca+=srcstride;
+        dstbase+=dststride;
+    }
+    return;
+  }
 }
 
-void vo_draw_alpha_rgb16(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+void vo_draw_alpha_rgb16(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
     int y;
+switch (dp) {
+  case DEST_PLANES_Y :
     for(y=0;y<h;y++){
         register unsigned short *dst = (unsigned short*) dstbase;
         register int x;
@@ -358,5 +418,60 @@
         dstbase+=dststride;
     }
     return;
+  case DEST_PLANES_RB :
+    for(y=0;y<h;y++){
+        register unsigned short *dst = (unsigned short*) dstbase;
+        register int x;
+        for(x=0;x<w;x++){
+            if(srca[x]){
+                unsigned char r=dst[x]&0x1F;
+                unsigned char b=(dst[x]>>11)&0x1F;
+                unsigned char g=(dst[x]>>5)&0x3F;
+                r=(((r*srca[x])>>5)+src[x])>>3;
+                dst[x]=(b<<11)|(g<<5)|r;
+            }
+        }
+        src+=srcstride;
+        srca+=srcstride;
+        dstbase+=dststride;
+    }
+    return;
+  case DEST_PLANES_G :
+    for(y=0;y<h;y++){
+        register unsigned short *dst = (unsigned short*) dstbase;
+        register int x;
+        for(x=0;x<w;x++){
+            if(srca[x]){
+                unsigned char r=dst[x]&0x1F;
+                unsigned char g=(dst[x]>>5)&0x3F;
+                unsigned char b=(dst[x]>>11)&0x1F;
+                g=(((g*srca[x])>>6)+src[x])>>2;
+                dst[x]=(b<<11)|(g<<5)|r;
+            }
+        }
+        src+=srcstride;
+        srca+=srcstride;
+        dstbase+=dststride;
+    }
+    return;
+  case DEST_PLANES_BR :
+    for(y=0;y<h;y++){
+        register unsigned short *dst = (unsigned short*) dstbase;
+        register int x;
+        for(x=0;x<w;x++){
+            if(srca[x]){
+                unsigned char r=dst[x]&0x1F;
+                unsigned char g=(dst[x]>>5)&0x3F;
+                unsigned char b=(dst[x]>>11)&0x1F;
+                b=(((b*srca[x])>>5)+src[x])>>3;
+                dst[x]=(b<<11)|(g<<5)|r;
+            }
+        }
+        src+=srcstride;
+        srca+=srcstride;
+        dstbase+=dststride;
+    }
+    return;
+  }
 }
 
--- libvo/osd.h	 (revision 22772)
+++ libvo/osd.h	 (working copy)
@@ -7,13 +7,13 @@
 
 extern void vo_draw_alpha_init(void); // build tables
 
-extern void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
-extern void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
-extern void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
-extern void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
-extern void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
-extern void vo_draw_alpha_rgb15(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
-extern void vo_draw_alpha_rgb16(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
+extern void vo_draw_alpha_yv12(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
+extern void vo_draw_alpha_yuy2(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
+extern void vo_draw_alpha_uyvy(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
+extern void vo_draw_alpha_rgb24(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
+extern void vo_draw_alpha_rgb32(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
+extern void vo_draw_alpha_rgb15(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
+extern void vo_draw_alpha_rgb16(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
 
 #endif
 
--- libvo/vo_dfbmga.c	 (revision 22772)
+++ libvo/vo_dfbmga.c	 (working copy)
@@ -926,7 +926,7 @@
 }
 
 static void
-vo_draw_alpha_alut44( int w, int h,
+vo_draw_alpha_alut44( int w, int h, int dp,
                     unsigned char* src,
                     unsigned char *srca,
                     int srcstride,
@@ -957,6 +957,7 @@
 static void
 draw_alpha( int x0, int y0,
             int w, int h,
+	    int dp,
             unsigned char *src,
 	    unsigned char *srca,
             int stride )
@@ -981,38 +982,47 @@
 
      switch (subframe_format) {
      case DSPF_ALUT44:
-          vo_draw_alpha_alut44( w, h, src, srca, stride,
+          vo_draw_alpha_alut44( w, h, dp, src, srca, stride,
                                 ((uint8_t *) dst) + pitch * y0 + x0,
                               pitch );
           break;
      case DSPF_RGB32:
      case DSPF_ARGB:
-	  vo_draw_alpha_rgb32( w, h, src, srca, stride,
+	  vo_draw_alpha_rgb32( w, h, dp, src, srca, stride,
 			       (( uint8_t *) dst) + pitch * y0 + 4 * x0,
                                pitch );
 	  break;
      case DSPF_RGB24:
-	  vo_draw_alpha_rgb24( w, h, src, srca, stride,
+	  vo_draw_alpha_rgb24( w, h, dp, src, srca, stride,
 			       ((uint8_t *) dst) + pitch * y0 + 3 * x0,
                                pitch );
 	  break;
      case DSPF_RGB16:
-	  vo_draw_alpha_rgb16( w, h, src, srca, stride,
+	  vo_draw_alpha_rgb16( w, h, dp, src, srca, stride,
 			       ((uint8_t *) dst) + pitch * y0 + 2 * x0,
                                pitch );
 	  break;
      case DSPF_ARGB1555:
-	  vo_draw_alpha_rgb15( w, h, src, srca, stride,
+	  vo_draw_alpha_rgb15( w, h, dp, src, srca, stride,
 			       ((uint8_t *) dst) + pitch * y0 + 2 * x0,
                                pitch );
 	  break;
      case DSPF_YUY2:
-	  vo_draw_alpha_yuy2( w, h, src, srca, stride,
+	switch (dp) {
+	    case DEST_PLANES_Y:
+	    vo_draw_alpha_yuy2( w, h, dp, src, srca, stride,
 			      ((uint8_t *) dst) + pitch * y0 + 2 * x0,
                               pitch );
 	  break;
+	    case DEST_PLANES_YUYV:
+	    vo_draw_alpha_yv12( w, h, dp, src, srca, stride,
+			      ((uint8_t *) dst) + pitch * y0 + 2 * x0,
+                              pitch );
+	    break;
+	    }
+	  break;
      case DSPF_UYVY:
-	  vo_draw_alpha_yuy2( w, h, src, srca, stride,
+	  vo_draw_alpha_yuy2( w, h, dp, src, srca, stride,
 			      ((uint8_t *) dst) + pitch * y0 + 2 * x0 + 1,
                               pitch );
 	  break;
@@ -1022,10 +1032,34 @@
 #endif
      case DSPF_I420:
      case DSPF_YV12:
-	  vo_draw_alpha_yv12( w, h, src, srca, stride,
+	switch (dp) {
+	  case DEST_PLANES_Y:
+	    vo_draw_alpha_yv12( w, h, dp, src, srca, stride,
 			      ((uint8_t *) dst) + pitch * y0 + x0,
                               pitch );
 	  break;
+	  case DEST_PLANES_U:
+	    if (subframe_format==DSPF_YV12)
+		vo_draw_alpha_yv12( w, h, dp, src, srca, stride,
+			      ((uint8_t *) dst) + pitch * y0 + x0 + pitch * in_height,
+                              pitch );
+		else
+		vo_draw_alpha_yv12( w, h, dp, src, srca, stride,
+			      ((uint8_t *) dst) + pitch * y0 + x0 + pitch * (in_height+in_height/2),
+                              pitch );
+	    break;
+	  case DEST_PLANES_V:
+	    if (subframe_format==DSPF_YV12)
+		vo_draw_alpha_yv12( w, h, dp, src, srca, stride,
+			      ((uint8_t *) dst) + pitch * y0 + x0 + pitch * (in_height+in_height/2),
+                              pitch );
+		else
+		vo_draw_alpha_yv12( w, h, dp, src, srca, stride,
+			      ((uint8_t *) dst) + pitch * y0 + x0 + pitch * in_height,
+                              pitch );
+	    break;
+	  }
+	  break;
      }
 
      subframe->Unlock( subframe );
@@ -1420,6 +1454,18 @@
 
                return get_equalizer( data, value );
           }
+    case VOCTRL_YUVSPU_SUPPORT:
+		if (subframe_format==DSPF_I420 || subframe_format==DSPF_YV12) return VO_TRUE;
+		return VO_FALSE;
+    case VOCTRL_YUYSPU_SUPPORT:
+		if (subframe_format==DSPF_YUY2) return VO_TRUE;
+		return VO_FALSE;
+    case VOCTRL_RGBSPU_SUPPORT:
+		if (subframe_format==DSPF_RGB32 || subframe_format==DSPF_ARGB || subframe_format==DSPF_RGB24 ||
+			subframe_format==DSPF_RGB16 || subframe_format==DSPF_ARGB1555) return VO_TRUE;
+		return VO_FALSE;
+    case VOCTRL_BGRSPU_SUPPORT:
+		return VO_FALSE;
      }
 
      return VO_NOTIMPL;
--- libvo/vo_quartz.c	 (revision 22772)
+++ libvo/vo_quartz.c	 (working copy)
@@ -129,7 +129,7 @@
 #include "osdep/keycodes.h"
 
 extern void mplayer_put_key(int code);
-extern void vo_draw_text(int dxs,int dys,void (*draw_alpha)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride));
+extern void vo_draw_text(int dxs,int dys,void (*draw_alpha)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride));
 
 //PROTOTYPE/////////////////////////////////////////////////////////////////
 static OSStatus KeyEventHandler(EventHandlerCallRef nextHandler, EventRef event, void *userData);
@@ -194,23 +194,23 @@
     }
 }
 
-static void draw_alpha(int x0, int y0, int w, int h, unsigned char *src, unsigned char *srca, int stride)
+static void draw_alpha(int x0, int y0, int w, int h, int dp, unsigned char *src, unsigned char *srca, int stride)
 {
 	switch (image_format)
 	{
 		case IMGFMT_RGB32:
-			vo_draw_alpha_rgb32(w,h,src,srca,stride,image_data+4*(y0*imgRect.right+x0),4*imgRect.right);
+			vo_draw_alpha_rgb32(w,h,dp,src,srca,stride,image_data+4*(y0*imgRect.right+x0),4*imgRect.right);
 			break;
 		case IMGFMT_YV12:
 		case IMGFMT_IYUV:
 		case IMGFMT_I420:
-			vo_draw_alpha_yv12(w,h,src,srca,stride, ((char*)P) + P->componentInfoY.offset + x0 + y0 * imgRect.right, imgRect.right);
+			vo_draw_alpha_yv12(w,h,dp,src,srca,stride, ((char*)P) + P->componentInfoY.offset + x0 + y0 * imgRect.right, imgRect.right);
 			break;
 		case IMGFMT_UYVY:
-			vo_draw_alpha_uyvy(w,h,src,srca,stride,((char*)P) + (x0 + y0 * imgRect.right) * 2,imgRect.right*2);
+			vo_draw_alpha_uyvy(w,h,dp,src,srca,stride,((char*)P) + (x0 + y0 * imgRect.right) * 2,imgRect.right*2);
 			break;
 		case IMGFMT_YUY2:
-			vo_draw_alpha_yuy2(w,h,src,srca,stride,((char*)P) + (x0 + y0 * imgRect.right) * 2,imgRect.right*2);
+			vo_draw_alpha_yuy2(w,h,dp,src,srca,stride,((char*)P) + (x0 + y0 * imgRect.right) * 2,imgRect.right*2);
 			break;
 	}
 }
--- libvo/vo_directx.c	 (revision 22772)
+++ libvo/vo_directx.c	 (working copy)
@@ -86,7 +86,7 @@
 static RECT last_rect = {0xDEADC0DE, 0xDEADC0DE, 0xDEADC0DE, 0xDEADC0DE};
 
 extern void mplayer_put_key(int code);              //let mplayer handel the keyevents 
-extern void vo_draw_text(int dxs,int dys,void (*draw_alpha)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride));
+extern void vo_draw_text(int dxs,int dys,void (*draw_alpha)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride));
 extern int vidmode;
 
 /*****************************************************************************
@@ -144,37 +144,62 @@
 
 LIBVO_EXTERN(directx)
 
-static void draw_alpha(int x0, int y0, int w, int h, unsigned char *src,
+static void draw_alpha(int x0, int y0, int w, int h, int dp, unsigned char *src,
 		unsigned char *srca, int stride)
 {
+    uint8_t *d;
+    uint32_t uvstride=dstride/2;
     switch(image_format) {
     case IMGFMT_YV12 :
     case IMGFMT_I420 :
 	case IMGFMT_IYUV :
 	case IMGFMT_YVU9 :
-    	vo_draw_alpha_yv12(w,h,src,srca,stride,((uint8_t *) image) + dstride*y0 + x0,dstride);
+	    switch (dp) {
+		case DEST_PLANES_Y:
+		    vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) image) + dstride*y0 + x0,dstride);
+		    break;
+		case DEST_PLANES_U:
+		    if(image_format == IMGFMT_YV12)
+			vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) image) + image_height*dstride + uvstride*y0 + x0,uvstride);
+			else
+			vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) image) + image_height*dstride + uvstride*(image_height/2) + uvstride*y0 + x0,uvstride);
+		    break;
+		case DEST_PLANES_V:
+		    if(image_format == IMGFMT_YV12)
+			vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) image) + image_height*dstride + uvstride*(image_height/2) + uvstride*y0 + x0,uvstride);
+			else
+			vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) image) + image_height*dstride + uvstride*y0 + x0,uvstride);
+		    break;
+		}
 	break;
 	case IMGFMT_YUY2 :
-	    vo_draw_alpha_yuy2(w,h,src,srca,stride,((uint8_t *) image)+ dstride*y0 + 2*x0 ,dstride);
+	    switch (dp) {
+		case DEST_PLANES_Y:
+		    vo_draw_alpha_yuy2(w,h,dp,src,srca,stride,((uint8_t *) image)+ dstride*y0 + 2*x0 ,dstride);
+		    break;
+		case DEST_PLANES_YUYV:
+		    vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) image)+ dstride*y0 + 2*x0 ,dstride);
+		    break;
+		}
     break;
     case IMGFMT_UYVY :
-        vo_draw_alpha_yuy2(w,h,src,srca,stride,((uint8_t *) image) + dstride*y0 + 2*x0 + 1,dstride);
+	vo_draw_alpha_yuy2(w,h,dp,src,srca,stride,((uint8_t *) image) + dstride*y0 + 2*x0 + 1,dstride);
     break;
 	case IMGFMT_RGB15:	
     case IMGFMT_BGR15:
-		vo_draw_alpha_rgb15(w,h,src,srca,stride,((uint8_t *) image)+dstride*y0+2*x0,dstride);
+		vo_draw_alpha_rgb15(w,h,dp,src,srca,stride,((uint8_t *) image)+dstride*y0+2*x0,dstride);
     break;
     case IMGFMT_RGB16:
 	case IMGFMT_BGR16:
-        vo_draw_alpha_rgb16(w,h,src,srca,stride,((uint8_t *) image)+dstride*y0+2*x0,dstride);
+	vo_draw_alpha_rgb16(w,h,dp,src,srca,stride,((uint8_t *) image)+dstride*y0+2*x0,dstride);
     break;
     case IMGFMT_RGB24:
 	case IMGFMT_BGR24:
-        vo_draw_alpha_rgb24(w,h,src,srca,stride,((uint8_t *) image)+dstride*y0+4*x0,dstride);
+        vo_draw_alpha_rgb24(w,h,dp,src,srca,stride,((uint8_t *) image)+dstride*y0+4*x0,dstride);
     break;
     case IMGFMT_RGB32:
 	case IMGFMT_BGR32:
-        vo_draw_alpha_rgb32(w,h,src,srca,stride,((uint8_t *) image)+dstride*y0+4*x0,dstride);
+        vo_draw_alpha_rgb32(w,h,dp,src,srca,stride,((uint8_t *) image)+dstride*y0+4*x0,dstride);
     break;
     }
 }
@@ -1576,6 +1601,25 @@
         }
         aspect_save_screenres(vo_screenwidth, vo_screenheight);
         return VO_TRUE;
+	case VOCTRL_YUVSPU_SUPPORT: {
+		if (image_format==IMGFMT_YV12 || image_format==IMGFMT_I420 ||
+		    image_format==IMGFMT_IYUV || image_format==IMGFMT_YVU9) return VO_TRUE;
+		return VO_FALSE;
+	}
+	case VOCTRL_YUYSPU_SUPPORT: {
+		if (image_format==IMGFMT_YUY2) return VO_TRUE;
+		return VO_FALSE;
+	}
+	case VOCTRL_RGBSPU_SUPPORT: {
+		if (image_format==IMGFMT_RGB15 || image_format==IMGFMT_RGB16 ||
+		    image_format==IMGFMT_RGB32) return VO_TRUE;
+		return VO_FALSE;
+	}
+	case VOCTRL_BGRSPU_SUPPORT: {
+		if (image_format==IMGFMT_BGR15 || image_format==IMGFMT_BGR16 ||
+		    image_format==IMGFMT_BGR32) return VO_TRUE;
+		return VO_FALSE;
+	}
     case VOCTRL_RESET:
         last_rect.left = 0xDEADC0DE;   // reset window position cache
         // fall-through intended
--- libvo/osd_template.c	 (revision 22772)
+++ libvo/osd_template.c	 (working copy)
@@ -27,7 +27,7 @@
 #define EMMS     "emms"
 #endif
 
-static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
     int y;
 #if defined(FAST_OSD) && !defined(HAVE_MMX)
     w=w>>1;
@@ -97,7 +97,7 @@
     return;
 }
 
-static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
     int y;
 #if defined(FAST_OSD) && !defined(HAVE_MMX)
     w=w>>1;
@@ -169,7 +169,7 @@
     return;
 }
 
-static inline void RENAME(vo_draw_alpha_uyvy)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+static inline void RENAME(vo_draw_alpha_uyvy)(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
   int y;
 #if defined(FAST_OSD)
   w=w>>1;
@@ -193,8 +193,10 @@
   }
 }
 
-static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
     int y;
+switch (dp) {
+ case DEST_PLANES_Y:
 #ifdef HAVE_MMX
     asm volatile(
         "pxor %%mm7, %%mm7\n\t"
@@ -299,13 +301,61 @@
 	asm volatile(EMMS:::"memory");
 #endif
     return;
+ case DEST_PLANES_RB:
+ for(y=0;y<h;y++){
+     register unsigned char *dst = dstbase;
+     register int x;
+     for(x=0;x<w;x++){
+         if(srca[x]){
+		dst[0]=((dst[0]*srca[x])>>8)+src[x];
+         }
+         dst+=3; // 24bpp
+     }
+     src+=srcstride;
+     srca+=srcstride;
+     dstbase+=dststride;
+ }
+ return;
+ case DEST_PLANES_G:
+ for(y=0;y<h;y++){
+     register unsigned char *dst = dstbase;
+     register int x;
+     for(x=0;x<w;x++){
+         if(srca[x]){
+		dst[1]=((dst[1]*srca[x])>>8)+src[x];
+         }
+         dst+=3; // 24bpp
+     }
+     src+=srcstride;
+     srca+=srcstride;
+     dstbase+=dststride;
+ }
+ return;
+ case DEST_PLANES_BR:
+ for(y=0;y<h;y++){
+     register unsigned char *dst = dstbase;
+     register int x;
+     for(x=0;x<w;x++){
+         if(srca[x]){
+		dst[2]=((dst[2]*srca[x])>>8)+src[x];
+         }
+         dst+=3; // 24bpp
+     }
+     src+=srcstride;
+     srca+=srcstride;
+     dstbase+=dststride;
+ }
+ return;
+ }
 }
 
-static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
     int y;
 #ifdef WORDS_BIGENDIAN
     dstbase++;
 #endif
+switch (dp) {
+ case DEST_PLANES_Y:
 #ifdef HAVE_MMX
 #ifdef HAVE_3DNOW
     asm volatile(
@@ -464,4 +514,44 @@
 	asm volatile(EMMS:::"memory");
 #endif
     return;
+ case DEST_PLANES_RB:
+    for(y=0;y<h;y++){
+        register int x;
+        for(x=0;x<w;x++){
+            if(srca[x]){
+		dstbase[4*x+0]=(((dstbase[4*x+0]*srca[x])>>8)+src[x])&0xff;
+            }
+        }
+        src+=srcstride;
+        srca+=srcstride;
+        dstbase+=dststride;
+    }
+    return;
+ case DEST_PLANES_G:
+    for(y=0;y<h;y++){
+        register int x;
+        for(x=0;x<w;x++){
+            if(srca[x]){
+		dstbase[4*x+1]=(((dstbase[4*x+1]*srca[x])>>8)+src[x])&0xff;
+            }
+        }
+        src+=srcstride;
+        srca+=srcstride;
+        dstbase+=dststride;
+    }
+    return;
+ case DEST_PLANES_BR:
+    for(y=0;y<h;y++){
+        register int x;
+        for(x=0;x<w;x++){
+            if(srca[x]){
+		dstbase[4*x+2]=(((dstbase[4*x+2]*srca[x])>>8)+src[x])&0xff;
+            }
+        }
+        src+=srcstride;
+        srca+=srcstride;
+        dstbase+=dststride;
+    }
+    return;
+ }
 }
--- libvo/vo_yuv4mpeg.c	 (revision 22772)
+++ libvo/vo_yuv4mpeg.c	 (working copy)
@@ -181,25 +181,37 @@
 	}
 }
 
-static void draw_alpha(int x0, int y0, int w, int h, unsigned char *src,
+static void draw_alpha(int x0, int y0, int w, int h, int dp, unsigned char *src,
                        unsigned char *srca, int stride) {
 	switch (using_format)
 	{
     	case IMGFMT_YV12:
-	    	vo_draw_alpha_yv12(w, h, src, srca, stride, 
+	    switch (dp) {
+		case DEST_PLANES_Y:
+		    vo_draw_alpha_yv12(w, h, dp, src, srca, stride,
 				       image + y0 * image_width + x0, image_width);
 			break;
+		case DEST_PLANES_U:
+		    if (image_u) vo_draw_alpha_yv12(w, h, dp, src, srca, stride,
+				image_u + y0 * image_width + x0, image_width);
+		    break;
+		case DEST_PLANES_V:
+		    if (image_v) vo_draw_alpha_yv12(w, h, dp, src, srca, stride,
+				image_v + y0 * image_width + x0, image_width);
+		    break;
+		}
+			break;
 		
 		case IMGFMT_BGR|24:
 		case IMGFMT_RGB|24:
 			if (config_interlace != Y4M_ILACE_BOTTOM_FIRST)
-				vo_draw_alpha_rgb24(w, h, src, srca, stride,
+				vo_draw_alpha_rgb24(w, h, dp, src, srca, stride,
 						rgb_buffer + (y0 * image_width + x0) * 3, image_width * 3);
 			else
 			{
 				swap_fields (rgb_buffer, image_height, image_width * 3);
 
-				vo_draw_alpha_rgb24(w, h, src, srca, stride,
+				vo_draw_alpha_rgb24(w, h, dp, src, srca, stride,
 						rgb_buffer + (y0 * image_width  + x0) * 3, image_width * 3);
 				
 				swap_fields (rgb_buffer, image_height, image_width * 3);
@@ -545,6 +557,15 @@
     return query_format(*((uint32_t*)data));
   case VOCTRL_DUPLICATE_FRAME:
     return write_last_frame();
+  case VOCTRL_YUVSPU_SUPPORT:
+    if (using_format==IMGFMT_YV12) return VO_TRUE;
+    return VO_FALSE;
+  case VOCTRL_RGBSPU_SUPPORT:
+    if (using_format==IMGFMT_RGB|24) return VO_TRUE;
+    return VO_FALSE;
+  case VOCTRL_BGRSPU_SUPPORT:
+    if (using_format==IMGFMT_BGR|24) return VO_TRUE;
+    return VO_FALSE;
   }
   return VO_NOTIMPL;
 }
--- libvo/vosub_vidix.c	 (revision 22772)
+++ libvo/vosub_vidix.c	 (working copy)
@@ -283,7 +283,7 @@
   }	
 }
 
-static void draw_alpha(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride)
+static void draw_alpha(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride)
 {
     uint32_t apitch,bespitch;
     void *lvo_mem;
@@ -297,36 +297,57 @@
     case IMGFMT_IF09:
     case IMGFMT_Y8:
     case IMGFMT_Y800:
+	switch (dp) {
+	  case DEST_PLANES_Y:
 	bespitch = (vidix_play.src.w + apitch) & (~apitch);
-        vo_draw_alpha_yv12(w,h,src,srca,stride,lvo_mem+bespitch*y0+x0,bespitch);
+           vo_draw_alpha_yv12(w,h,dp,src,srca,stride,lvo_mem+bespitch*y0+x0,bespitch);
+	  break;
+	  case DEST_PLANES_U:
+	     lvo_mem = vidix_mem + vidix_play.offsets[next_frame] + vidix_play.offset.u;
+	     lvo_mem += dstrides.u*y0/2 + x0;
+	     vo_draw_alpha_yv12(w,h,dp,src,srca,stride,lvo_mem,dstrides.u/2);
+	  break;
+	  case DEST_PLANES_V:
+	     lvo_mem = vidix_mem + vidix_play.offsets[next_frame] + vidix_play.offset.v;
+	     lvo_mem += dstrides.v*y0/2 + x0;
+	     vo_draw_alpha_yv12(w,h,dp,src,srca,stride,lvo_mem,dstrides.v/2);
+	  break;
+	  }
         break;
     case IMGFMT_YUY2:
 	bespitch = (vidix_play.src.w*2 + apitch) & (~apitch);
-        vo_draw_alpha_yuy2(w,h,src,srca,stride,lvo_mem+bespitch*y0+2*x0,bespitch);
+	switch (dp) {
+	case DEST_PLANES_Y:
+          vo_draw_alpha_yuy2(w,h,dp,src,srca,stride,lvo_mem+bespitch*y0+2*x0,bespitch);
+	  break;
+	case DEST_PLANES_YUYV:
+           vo_draw_alpha_yv12(w,h,dp,src,srca,stride,lvo_mem+bespitch*y0+x0,bespitch);
+	  break;
+	  }
         break;
     case IMGFMT_UYVY:
 	bespitch = (vidix_play.src.w*2 + apitch) & (~apitch);
-        vo_draw_alpha_yuy2(w,h,src,srca,stride,lvo_mem+bespitch*y0+2*x0+1,bespitch);
+        vo_draw_alpha_yuy2(w,h,dp,src,srca,stride,lvo_mem+bespitch*y0+2*x0+1,bespitch);
         break;
     case IMGFMT_RGB32:
     case IMGFMT_BGR32:
 	bespitch = (vidix_play.src.w*4 + apitch) & (~apitch);
-	vo_draw_alpha_rgb32(w,h,src,srca,stride,lvo_mem+y0*bespitch+4*x0,bespitch);
+	vo_draw_alpha_rgb32(w,h,dp,src,srca,stride,lvo_mem+y0*bespitch+4*x0,bespitch);
         break;
     case IMGFMT_RGB24:
     case IMGFMT_BGR24:
 	bespitch = (vidix_play.src.w*3 + apitch) & (~apitch);
-	vo_draw_alpha_rgb24(w,h,src,srca,stride,lvo_mem+y0*bespitch+3*x0,bespitch);
+	vo_draw_alpha_rgb24(w,h,dp,src,srca,stride,lvo_mem+y0*bespitch+3*x0,bespitch);
         break;
     case IMGFMT_RGB16:
     case IMGFMT_BGR16:
 	bespitch = (vidix_play.src.w*2 + apitch) & (~apitch);
-	vo_draw_alpha_rgb16(w,h,src,srca,stride,lvo_mem+y0*bespitch+2*x0,bespitch);
+	vo_draw_alpha_rgb16(w,h,dp,src,srca,stride,lvo_mem+y0*bespitch+2*x0,bespitch);
         break;
     case IMGFMT_RGB15:
     case IMGFMT_BGR15:
 	bespitch = (vidix_play.src.w*2 + apitch) & (~apitch);
-	vo_draw_alpha_rgb15(w,h,src,srca,stride,lvo_mem+y0*bespitch+2*x0,bespitch);
+	vo_draw_alpha_rgb15(w,h,dp,src,srca,stride,lvo_mem+y0*bespitch+2*x0,bespitch);
         break;
     default:
 	return;
@@ -512,7 +533,7 @@
 	    next_frame = i;
 	    memset(vidix_mem + vidix_play.offsets[i], 0x80,
 		vidix_play.frame_size);
-	    draw_alpha(0, 0, image_width, image_height, tmp, tmpa, image_width);
+	    draw_alpha(0, 0, image_width, image_height, DEST_PLANES_Y, tmp, tmpa, image_width);
 	}
 	free(tmp);
 	free(tmpa);
@@ -700,6 +721,16 @@
 
     return VO_TRUE;
   }
+  case VOCTRL_YUVSPU_SUPPORT:
+    if((vidix_play.fourcc==IMGFMT_YV12 || vidix_play.fourcc==IMGFMT_YV12 || vidix_play.fourcc==IMGFMT_IYUV ||
+	vidix_play.fourcc==IMGFMT_I420 || vidix_play.fourcc==IMGFMT_YVU9 || vidix_play.fourcc==IMGFMT_IF09 ||
+	vidix_play.fourcc==IMGFMT_Y8 || vidix_play.fourcc==IMGFMT_Y800) &&
+	!(vidix_play.flags & VID_PLAY_INTERLEAVED_UV)) return VO_TRUE;
+    return VO_FALSE;
+  case VOCTRL_YUYSPU_SUPPORT:
+    if((vidix_play.fourcc==IMGFMT_YUY2) &&
+	!(vidix_play.flags & VID_PLAY_INTERLEAVED_UV)) return VO_TRUE;
+    return VO_FALSE;
   }
   return VO_NOTIMPL;
   // WARNING: we drop extra parameters (...) here!
--- libvo/vo_dga.c	 (revision 22772)
+++ libvo/vo_dga.c	 (working copy)
@@ -227,7 +227,7 @@
 
 //---------------------------------------------------------
 
-static void draw_alpha(int x0, int y0, int w, int h, unsigned char *src,
+static void draw_alpha(int x0, int y0, int w, int h, int dp, unsigned char *src,
                        unsigned char *srca, int stride)
 {
 
@@ -243,19 +243,19 @@
     {
 
         case 32:
-            vo_draw_alpha_rgb32(w, h, src, srca, stride, d + 4 * offset,
+            vo_draw_alpha_rgb32(w, h, dp, src, srca, stride, d + 4 * offset,
                                 4 * buffer_stride);
             break;
         case 24:
-            vo_draw_alpha_rgb24(w, h, src, srca, stride, d + 3 * offset,
+            vo_draw_alpha_rgb24(w, h, dp, src, srca, stride, d + 3 * offset,
                                 3 * buffer_stride);
             break;
         case 15:
-            vo_draw_alpha_rgb15(w, h, src, srca, stride, d + 2 * offset,
+            vo_draw_alpha_rgb15(w, h, dp, src, srca, stride, d + 2 * offset,
                                 2 * buffer_stride);
             break;
         case 16:
-            vo_draw_alpha_rgb16(w, h, src, srca, stride, d + 2 * offset,
+            vo_draw_alpha_rgb16(w, h, dp, src, srca, stride, d + 2 * offset,
                                 2 * buffer_stride);
             break;
     }
@@ -981,6 +981,14 @@
             return get_image(data);
         case VOCTRL_QUERY_FORMAT:
             return query_format(*((uint32_t *) data));
+	case VOCTRL_YUVSPU_SUPPORT:
+	    return VO_FALSE;
+	case VOCTRL_RGBSPU_SUPPORT:
+	    return VO_FALSE;
+	case VOCTRL_BGRSPU_SUPPORT:
+	    if(HW_MODE.vdm_mplayer_depth==32 || HW_MODE.vdm_mplayer_depth==24 ||
+		HW_MODE.vdm_mplayer_depth==15 || HW_MODE.vdm_mplayer_depth==16) return VO_TRUE;
+	    return VO_FALSE;
     }
     return VO_NOTIMPL;
 }
--- libvo/vo_gl2.c	 (revision 22772)
+++ libvo/vo_gl2.c	 (working copy)
@@ -86,7 +86,7 @@
 static int      use_glFinish;
 
 static void (*draw_alpha_fnc)
-                 (int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride);
+                 (int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride);
 
 
 /* The squares that are tiled to make up the game screen polygon */
@@ -575,23 +575,23 @@
   glLoadIdentity();
 }
 
-static void draw_alpha_32(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride){
-   vo_draw_alpha_rgb32(w,h,src,srca,stride,ImageData+4*(y0*image_width+x0),4*image_width);
+static void draw_alpha_32(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride){
+   vo_draw_alpha_rgb32(w,h,dp,src,srca,stride,ImageData+4*(y0*image_width+x0),4*image_width);
 }
 
-static void draw_alpha_24(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride){
-   vo_draw_alpha_rgb24(w,h,src,srca,stride,ImageData+3*(y0*image_width+x0),3*image_width);
+static void draw_alpha_24(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride){
+   vo_draw_alpha_rgb24(w,h,dp,src,srca,stride,ImageData+3*(y0*image_width+x0),3*image_width);
 }
 
-static void draw_alpha_16(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride){
-   vo_draw_alpha_rgb16(w,h,src,srca,stride,ImageData+2*(y0*image_width+x0),2*image_width);
+static void draw_alpha_16(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride){
+   vo_draw_alpha_rgb16(w,h,dp,src,srca,stride,ImageData+2*(y0*image_width+x0),2*image_width);
 }
 
-static void draw_alpha_15(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride){
-   vo_draw_alpha_rgb15(w,h,src,srca,stride,ImageData+2*(y0*image_width+x0),2*image_width);
+static void draw_alpha_15(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride){
+   vo_draw_alpha_rgb15(w,h,dp,src,srca,stride,ImageData+2*(y0*image_width+x0),2*image_width);
 }
 
-static void draw_alpha_null(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride){
+static void draw_alpha_null(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride){
 }
 
 #ifdef GL_WIN32
@@ -1130,6 +1130,13 @@
   case VOCTRL_UPDATE_SCREENINFO:
     update_xinerama_info();
     return VO_TRUE;
-  }
+  case VOCTRL_YUVSPU_SUPPORT:
+    return VO_FALSE;
+  case VOCTRL_RGBSPU_SUPPORT:
+    return VO_FALSE;
+  case VOCTRL_BGRSPU_SUPPORT:
+    if (image_bpp==15 || image_bpp==16 || image_bpp==24 || image_bpp==32) return VO_TRUE;
+	return VO_FALSE;
   return VO_NOTIMPL;
 }
+}
--- libvo/vo_x11.c	 (revision 22772)
+++ libvo/vo_x11.c	 (working copy)
@@ -61,7 +61,7 @@
 LIBVO_EXTERN(x11)
 /* private prototypes */
 static void Display_Image(XImage * myximage, unsigned char *ImageData);
-static void (*draw_alpha_fnc) (int x0, int y0, int w, int h,
+static void (*draw_alpha_fnc) (int x0, int y0, int w, int h, int dp,
                                unsigned char *src, unsigned char *srca,
                                int stride);
 
@@ -109,39 +109,39 @@
 
 }
 
-static void draw_alpha_32(int x0, int y0, int w, int h, unsigned char *src,
+static void draw_alpha_32(int x0, int y0, int w, int h, int dp, unsigned char *src,
                           unsigned char *srca, int stride)
 {
-    vo_draw_alpha_rgb32(w, h, src, srca, stride,
+    vo_draw_alpha_rgb32(w, h, dp, src, srca, stride,
                         ImageData + 4 * (y0 * image_width + x0),
                         4 * image_width);
 }
 
-static void draw_alpha_24(int x0, int y0, int w, int h, unsigned char *src,
+static void draw_alpha_24(int x0, int y0, int w, int h, int dp, unsigned char *src,
                           unsigned char *srca, int stride)
 {
-    vo_draw_alpha_rgb24(w, h, src, srca, stride,
+    vo_draw_alpha_rgb24(w, h, dp, src, srca, stride,
                         ImageData + 3 * (y0 * image_width + x0),
                         3 * image_width);
 }
 
-static void draw_alpha_16(int x0, int y0, int w, int h, unsigned char *src,
+static void draw_alpha_16(int x0, int y0, int w, int h, int dp, unsigned char *src,
                           unsigned char *srca, int stride)
 {
-    vo_draw_alpha_rgb16(w, h, src, srca, stride,
+    vo_draw_alpha_rgb16(w, h, dp, src, srca, stride,
                         ImageData + 2 * (y0 * image_width + x0),
                         2 * image_width);
 }
 
-static void draw_alpha_15(int x0, int y0, int w, int h, unsigned char *src,
+static void draw_alpha_15(int x0, int y0, int w, int h, int dp, unsigned char *src,
                           unsigned char *srca, int stride)
 {
-    vo_draw_alpha_rgb15(w, h, src, srca, stride,
+    vo_draw_alpha_rgb15(w, h, dp, src, srca, stride,
                         ImageData + 2 * (y0 * image_width + x0),
                         2 * image_width);
 }
 
-static void draw_alpha_null(int x0, int y0, int w, int h,
+static void draw_alpha_null(int x0, int y0, int w, int h, int dp,
                             unsigned char *src, unsigned char *srca,
                             int stride)
 {
@@ -828,6 +828,14 @@
         case VOCTRL_UPDATE_SCREENINFO:
             update_xinerama_info();
             return VO_TRUE;
+        case VOCTRL_YUVSPU_SUPPORT:
+            return VO_FALSE;
+        case VOCTRL_RGBSPU_SUPPORT:
+	    return VO_FALSE;
+        case VOCTRL_BGRSPU_SUPPORT:
+	    if (out_format == IMGFMT_BGR15 || out_format == IMGFMT_BGR16 ||
+		out_format == IMGFMT_BGR24 || out_format == IMGFMT_BGR32) return VO_TRUE;
+	    return VO_FALSE;
     }
     return VO_NOTIMPL;
 }
--- libvo/vo_sdl.c	 (revision 22772)
+++ libvo/vo_sdl.c	 (working copy)
@@ -286,7 +286,7 @@
  *
  **/
 
-static void draw_alpha(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride){
+static void draw_alpha(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride){
 	struct sdl_priv_s *priv = &sdl_priv;
 	
     if(priv->osd_has_changed) {
@@ -327,16 +327,43 @@
 		case IMGFMT_YV12:  
 		case IMGFMT_I420:
         	case IMGFMT_IYUV:
-            vo_draw_alpha_yv12(w,h,src,srca,stride,((uint8_t *) *(priv->overlay->pixels))+priv->overlay->pitches[0]*y0+x0,priv->overlay->pitches[0]);
+		    switch (dp) {
+			case DEST_PLANES_Y :
+			    vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) *(priv->overlay->pixels))+priv->overlay->pitches[0]*y0+x0,priv->overlay->pitches[0]);
+			    break;
+			case DEST_PLANES_U :
+			    SDL_OVR_LOCK(-1)
+			    if (priv->format==IMGFMT_YV12)
+				vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) (priv->overlay->pixels[1]))+priv->overlay->pitches[1]*y0+x0,priv->overlay->pitches[1]);
+				else
+				vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) (priv->overlay->pixels[2]))+priv->overlay->pitches[2]*y0+x0,priv->overlay->pitches[2]);
+			    SDL_OVR_UNLOCK
+			    break;
+			case DEST_PLANES_V :
+			    SDL_OVR_LOCK(-1)
+			    if (priv->format==IMGFMT_YV12)
+				vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) (priv->overlay->pixels[2]))+priv->overlay->pitches[2]*y0+x0,priv->overlay->pitches[2]);
+				else
+				vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) (priv->overlay->pixels[1]))+priv->overlay->pitches[1]*y0+x0,priv->overlay->pitches[1]);
+			    SDL_OVR_UNLOCK
+			    break;
+			}
 		break;
 		case IMGFMT_YUY2:
         	case IMGFMT_YVYU:
                 x0 *= 2;
-    			vo_draw_alpha_yuy2(w,h,src,srca,stride,((uint8_t *) *(priv->overlay->pixels))+priv->overlay->pitches[0]*y0+x0,priv->overlay->pitches[0]);
+		switch (dp) {
+		    case DEST_PLANES_Y :
+			vo_draw_alpha_yuy2(w,h,dp,src,srca,stride,((uint8_t *) *(priv->overlay->pixels))+priv->overlay->pitches[0]*y0+x0,priv->overlay->pitches[0]);
+			break;
+		    case DEST_PLANES_YUYV :
+			vo_draw_alpha_yv12(w,h,dp,src,srca,stride,((uint8_t *) *(priv->overlay->pixels))+priv->overlay->pitches[0]*y0+x0,priv->overlay->pitches[0]);
+			break;
+		    }
 		break;	
         	case IMGFMT_UYVY:
                 x0 *= 2;
-    			vo_draw_alpha_yuy2(w,h,src,srca,stride,((uint8_t *) *(priv->overlay->pixels))+priv->overlay->pitches[0]*y0+x0,priv->overlay->pitches[0]);
+    			vo_draw_alpha_yuy2(w,h,dp,src,srca,stride,((uint8_t *) *(priv->overlay->pixels))+priv->overlay->pitches[0]*y0+x0,priv->overlay->pitches[0]);
 		break;
 
 		default:
@@ -345,19 +372,19 @@
 		switch(priv->format) {
 		case IMGFMT_RGB15:
 		case IMGFMT_BGR15:
-    			vo_draw_alpha_rgb15(w,h,src,srca,stride,((uint8_t *) priv->surface->pixels)+y0*priv->surface->pitch+x0,priv->surface->pitch);
+			vo_draw_alpha_rgb15(w,h,dp,src,srca,stride,((uint8_t *) priv->surface->pixels)+y0*priv->surface->pitch+x0,priv->surface->pitch);
 		break;
 		case IMGFMT_RGB16:
 		case IMGFMT_BGR16:
-    			vo_draw_alpha_rgb16(w,h,src,srca,stride,((uint8_t *) priv->surface->pixels)+y0*priv->surface->pitch+x0,priv->surface->pitch);
+			vo_draw_alpha_rgb16(w,h,dp,src,srca,stride,((uint8_t *) priv->surface->pixels)+y0*priv->surface->pitch+x0,priv->surface->pitch);
 		break;
 		case IMGFMT_RGB24:
 		case IMGFMT_BGR24:
-    			vo_draw_alpha_rgb24(w,h,src,srca,stride,((uint8_t *) priv->surface->pixels)+y0*priv->surface->pitch+x0,priv->surface->pitch);
+			vo_draw_alpha_rgb24(w,h,dp,src,srca,stride,((uint8_t *) priv->surface->pixels)+y0*priv->surface->pitch+x0,priv->surface->pitch);
 		break;
 		case IMGFMT_RGB32:
 		case IMGFMT_BGR32:
-    			vo_draw_alpha_rgb32(w,h,src,srca,stride,((uint8_t *) priv->surface->pixels)+y0*priv->surface->pitch+x0,priv->surface->pitch);
+			vo_draw_alpha_rgb32(w,h,dp,src,srca,stride,((uint8_t *) priv->surface->pixels)+y0*priv->surface->pitch+x0,priv->surface->pitch);
 		break;
 		}
         }
@@ -366,19 +393,19 @@
 		switch(priv->format) {
 		case IMGFMT_RGB15:
 		case IMGFMT_BGR15:
-    			vo_draw_alpha_rgb15(w,h,src,srca,stride,((uint8_t *) priv->rgbsurface->pixels)+y0*priv->rgbsurface->pitch+x0,priv->rgbsurface->pitch);
+			vo_draw_alpha_rgb15(w,h,dp,src,srca,stride,((uint8_t *) priv->rgbsurface->pixels)+y0*priv->rgbsurface->pitch+x0,priv->rgbsurface->pitch);
 		break;
 		case IMGFMT_RGB16:
 		case IMGFMT_BGR16:
-    			vo_draw_alpha_rgb16(w,h,src,srca,stride,((uint8_t *) priv->rgbsurface->pixels)+y0*priv->rgbsurface->pitch+x0,priv->rgbsurface->pitch);
+			vo_draw_alpha_rgb16(w,h,dp,src,srca,stride,((uint8_t *) priv->rgbsurface->pixels)+y0*priv->rgbsurface->pitch+x0,priv->rgbsurface->pitch);
 		break;
 		case IMGFMT_RGB24:
 		case IMGFMT_BGR24:
-    			vo_draw_alpha_rgb24(w,h,src,srca,stride,((uint8_t *) priv->rgbsurface->pixels)+y0*priv->rgbsurface->pitch+x0,priv->rgbsurface->pitch);
+			vo_draw_alpha_rgb24(w,h,dp,src,srca,stride,((uint8_t *) priv->rgbsurface->pixels)+y0*priv->rgbsurface->pitch+x0,priv->rgbsurface->pitch);
 		break;
 		case IMGFMT_RGB32:
 		case IMGFMT_BGR32:
-    			vo_draw_alpha_rgb32(w,h,src,srca,stride,((uint8_t *) priv->rgbsurface->pixels)+y0*priv->rgbsurface->pitch+x0,priv->rgbsurface->pitch);
+			vo_draw_alpha_rgb32(w,h,dp,src,srca,stride,((uint8_t *) priv->rgbsurface->pixels)+y0*priv->rgbsurface->pitch+x0,priv->rgbsurface->pitch);
 		break;
 		}
         }
@@ -1727,6 +1754,18 @@
 	mp_msg(MSGT_VO,MSGL_DBG2, "SDL: Set fullscreen mode\n"); }
     }
     return VO_TRUE;
+  case VOCTRL_YUVSPU_SUPPORT:
+    if (priv->format == IMGFMT_YV12 || priv->format == IMGFMT_I420 || priv->format==IMGFMT_IYUV) return VO_TRUE;
+    return VO_FALSE;
+  case VOCTRL_YUYSPU_SUPPORT:
+    if (priv->format==IMGFMT_YUY2) return VO_TRUE;
+    return VO_FALSE;
+  case VOCTRL_RGBSPU_SUPPORT:
+    if (priv->mode == RGB) return VO_TRUE;
+    return VO_FALSE;
+  case VOCTRL_BGRSPU_SUPPORT:
+    if (priv->mode == BGR) return VO_TRUE;
+    return VO_FALSE;
   }
 
   return VO_NOTIMPL;
--- libvo/vesa_lvo.c	 (revision 22772)
+++ libvo/vesa_lvo.c	 (working copy)
@@ -233,50 +233,51 @@
   }	
 }
 
-static void draw_alpha_null(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride)
+static void draw_alpha_null(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride)
 {
   UNUSED(x0);
   UNUSED(y0);
   UNUSED(w);
   UNUSED(h);
+  UNUSED(dp);
   UNUSED(src);
   UNUSED(srca);
   UNUSED(stride);
 }
 
-static void draw_alpha(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride)
+static void draw_alpha(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride)
 {
     uint32_t bespitch = /*(*/mga_vid_config.src_width;// + 15) & ~15;
     switch(mga_vid_config.format){
     case IMGFMT_BGR15:
     case IMGFMT_RGB15:
-	vo_draw_alpha_rgb15(w,h,src,srca,stride,lvo_mem+2*(y0*bespitch+x0),2*bespitch);
+	vo_draw_alpha_rgb15(w,h,dp,src,srca,stride,lvo_mem+2*(y0*bespitch+x0),2*bespitch);
 	break;
     case IMGFMT_BGR16:
     case IMGFMT_RGB16:
-	vo_draw_alpha_rgb16(w,h,src,srca,stride,lvo_mem+2*(y0*bespitch+x0),2*bespitch);
+	vo_draw_alpha_rgb16(w,h,dp,src,srca,stride,lvo_mem+2*(y0*bespitch+x0),2*bespitch);
 	break;
     case IMGFMT_BGR24:
     case IMGFMT_RGB24:
-	vo_draw_alpha_rgb24(w,h,src,srca,stride,lvo_mem+3*(y0*bespitch+x0),3*bespitch);
+	vo_draw_alpha_rgb24(w,h,dp,src,srca,stride,lvo_mem+3*(y0*bespitch+x0),3*bespitch);
 	break;
     case IMGFMT_BGR32:
     case IMGFMT_RGB32:
-	vo_draw_alpha_rgb32(w,h,src,srca,stride,lvo_mem+4*(y0*bespitch+x0),4*bespitch);
+	vo_draw_alpha_rgb32(w,h,dp,src,srca,stride,lvo_mem+4*(y0*bespitch+x0),4*bespitch);
 	break;
     case IMGFMT_YV12:
     case IMGFMT_IYUV:
     case IMGFMT_I420:
-        vo_draw_alpha_yv12(w,h,src,srca,stride,lvo_mem+bespitch*y0+x0,bespitch);
+        vo_draw_alpha_yv12(w,h,dp,src,srca,stride,lvo_mem+bespitch*y0+x0,bespitch);
         break;
     case IMGFMT_YUY2:
-        vo_draw_alpha_yuy2(w,h,src,srca,stride,lvo_mem+2*(bespitch*y0+x0),bespitch);
+        vo_draw_alpha_yuy2(w,h,dp,src,srca,stride,lvo_mem+2*(bespitch*y0+x0),bespitch);
         break;
     case IMGFMT_UYVY:
-        vo_draw_alpha_yuy2(w,h,src,srca,stride,lvo_mem+2*(bespitch*y0+x0)+1,bespitch);
+        vo_draw_alpha_yuy2(w,h,dp,src,srca,stride,lvo_mem+2*(bespitch*y0+x0)+1,bespitch);
         break;
     default:
-        draw_alpha_null(x0,y0,w,h,src,srca,stride);
+        draw_alpha_null(x0,y0,w,h,dp,src,srca,stride);
     }
 }
 
--- libvo/vo_aa.c	 (revision 22772)
+++ libvo/vo_aa.c	 (working copy)
@@ -519,7 +519,7 @@
     aa_close(c);
 }
 
-static void draw_alpha(int x,int y, int w,int h, unsigned char* src, unsigned char *srca, int stride){
+static void draw_alpha(int x,int y, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride){
     int i,j;
     for (i = 0; i < h; i++) {
 	for (j = 0; j < w; j++) {
--- libvo/vo_xv.c	 (revision 22772)
+++ libvo/vo_xv.c	 (working copy)
@@ -93,47 +93,83 @@
 static uint32_t drwX, drwY, drwBorderWidth, drwDepth;
 static uint32_t max_width = 0, max_height = 0; // zero means: not set
 
-static void (*draw_alpha_fnc) (int x0, int y0, int w, int h,
+static int support_yuvspu = VO_FALSE;
+static int support_yuyspu = VO_FALSE;
+
+static void (*draw_alpha_fnc) (int x0, int y0, int w, int h, int dp,
                                unsigned char *src, unsigned char *srca,
                                int stride);
 
-static void draw_alpha_yv12(int x0, int y0, int w, int h,
+static void draw_alpha_yv12(int x0, int y0, int w, int h, int dp,
                             unsigned char *src, unsigned char *srca,
                             int stride)
 {
+    switch (dp)  {
+	case DEST_PLANES_U:  {
+    x0 += image_width/2 * (vo_panscan_x >> 1) / (vo_dwidth + vo_panscan_x);
+    vo_draw_alpha_yv12(w, h, dp, src, srca, stride,
+                       xvimage[current_buf]->data +
+                       xvimage[current_buf]->offsets[1] +
+                       xvimage[current_buf]->pitches[1] * y0 + x0,
+                       xvimage[current_buf]->pitches[1]);
+	  break; }
+	case DEST_PLANES_V:  {
+    x0 += image_width/2 * (vo_panscan_x >> 1) / (vo_dwidth + vo_panscan_x);
+    vo_draw_alpha_yv12(w, h, dp, src, srca, stride,
+                       xvimage[current_buf]->data +
+                       xvimage[current_buf]->offsets[2] +
+                       xvimage[current_buf]->pitches[2] * y0 + x0,
+                       xvimage[current_buf]->pitches[2]);
+	  break; }
+	case DEST_PLANES_Y: {
     x0 += image_width * (vo_panscan_x >> 1) / (vo_dwidth + vo_panscan_x);
-    vo_draw_alpha_yv12(w, h, src, srca, stride,
+    vo_draw_alpha_yv12(w, h, dp, src, srca, stride,
                        xvimage[current_buf]->data +
                        xvimage[current_buf]->offsets[0] +
                        xvimage[current_buf]->pitches[0] * y0 + x0,
                        xvimage[current_buf]->pitches[0]);
 }
+	}
 
-static void draw_alpha_yuy2(int x0, int y0, int w, int h,
+}
+
+static void draw_alpha_yuy2(int x0, int y0, int w, int h, int dp,
                             unsigned char *src, unsigned char *srca,
                             int stride)
 {
+  switch (dp) {
+    case DEST_PLANES_Y:
+    x0 += image_width * (vo_panscan_x >> 1) / (vo_dwidth + vo_panscan_x);
+    vo_draw_alpha_yuy2(w, h, dp, src, srca, stride,
+                       xvimage[current_buf]->data +
+                       xvimage[current_buf]->offsets[0] +
+                       xvimage[current_buf]->pitches[0] * y0 + 2 * x0,
+                       xvimage[current_buf]->pitches[0]);
+    break;
+    case DEST_PLANES_YUYV:
     x0 += image_width * (vo_panscan_x >> 1) / (vo_dwidth + vo_panscan_x);
-    vo_draw_alpha_yuy2(w, h, src, srca, stride,
+    vo_draw_alpha_yv12(w, h, dp, src, srca, stride,
                        xvimage[current_buf]->data +
                        xvimage[current_buf]->offsets[0] +
                        xvimage[current_buf]->pitches[0] * y0 + 2 * x0,
                        xvimage[current_buf]->pitches[0]);
+    break;
+    }
 }
 
-static void draw_alpha_uyvy(int x0, int y0, int w, int h,
+static void draw_alpha_uyvy(int x0, int y0, int w, int h, int dp,
                             unsigned char *src, unsigned char *srca,
                             int stride)
 {
     x0 += image_width * (vo_panscan_x >> 1) / (vo_dwidth + vo_panscan_x);
-    vo_draw_alpha_yuy2(w, h, src, srca, stride,
+    vo_draw_alpha_yuy2(w, h, dp, src, srca, stride,
                        xvimage[current_buf]->data +
                        xvimage[current_buf]->offsets[0] +
                        xvimage[current_buf]->pitches[0] * y0 + 2 * x0 + 1,
                        xvimage[current_buf]->pitches[0]);
 }
 
-static void draw_alpha_null(int x0, int y0, int w, int h,
+static void draw_alpha_null(int x0, int y0, int w, int h, int dp,
                             unsigned char *src, unsigned char *srca,
                             int stride)
 {
@@ -367,11 +403,13 @@
     {
         case IMGFMT_YV12:
         case IMGFMT_I420:
+	    support_yuvspu = VO_TRUE;
         case IMGFMT_IYUV:
             draw_alpha_fnc = draw_alpha_yv12;
             break;
         case IMGFMT_YUY2:
         case IMGFMT_YVYU:
+	    support_yuyspu = VO_TRUE;
             draw_alpha_fnc = draw_alpha_yuy2;
             break;
         case IMGFMT_UYVY:
@@ -951,6 +989,14 @@
         case VOCTRL_UPDATE_SCREENINFO:
             update_xinerama_info();
             return VO_TRUE;
+        case VOCTRL_YUVSPU_SUPPORT:
+            return support_yuvspu;
+        case VOCTRL_RGBSPU_SUPPORT:
+	    return VO_FALSE;
+        case VOCTRL_BGRSPU_SUPPORT:
+	    return VO_FALSE;
+        case VOCTRL_YUYSPU_SUPPORT:
+            return support_yuyspu;
     }
     return VO_NOTIMPL;
 }
--- libvo/vo_fbdev2.c	 (revision 22772)
+++ libvo/vo_fbdev2.c	 (working copy)
@@ -79,7 +79,7 @@
 static int fb_bpp;		// 32: 32  24: 24  16: 16  15: 15
 static size_t fb_size; // size of frame_buffer
 static int fb_line_len; // length of one line in bytes
-static void (*draw_alpha_p)(int w, int h, unsigned char *src,
+static void (*draw_alpha_p)(int w, int h, int dp, unsigned char *src,
 		unsigned char *srca, int stride, unsigned char *dst,
 		int dstride);
 
@@ -317,7 +317,7 @@
 	return 0;
 }
 
-static void draw_alpha(int x0, int y0, int w, int h, unsigned char *src,
+static void draw_alpha(int x0, int y0, int w, int h, int dp, unsigned char *src,
 		unsigned char *srca, int stride)
 {
 	unsigned char *dst;
@@ -330,7 +330,7 @@
 	dst = next_frame + (in_width * y0 + x0) * fb_pixel_size;
 	dstride = in_width * fb_pixel_size;
 #endif
-	(*draw_alpha_p)(w, h, src, srca, stride, dst, dstride);
+	(*draw_alpha_p)(w, h, dp, src, srca, stride, dst, dstride);
 }
 
 static void draw_osd(void)
@@ -403,6 +403,13 @@
   switch (request) {
   case VOCTRL_QUERY_FORMAT:
     return query_format(*((uint32_t*)data));
+  case VOCTRL_YUVSPU_SUPPORT:
+    return VO_FALSE;
+  case VOCTRL_RGBSPU_SUPPORT:
+    return VO_FALSE;
+  case VOCTRL_BGRSPU_SUPPORT:
+    if (fb_bpp == 32 || fb_bpp==24 || fb_bpp==16 || fb_bpp==15) return VO_TRUE;
+    return VO_FALSE;
   }
   return VO_NOTIMPL;
 }
--- libvo/vo_macosx.m	 (revision 22772)
+++ libvo/vo_macosx.m	 (working copy)
@@ -78,17 +78,17 @@
 LIBVO_EXTERN(macosx)
 
 extern void mplayer_put_key(int code);
-extern void vo_draw_text(int dxs,int dys,void (*draw_alpha)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride));
+extern void vo_draw_text(int dxs,int dys,void (*draw_alpha)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride));
 
-static void draw_alpha(int x0, int y0, int w, int h, unsigned char *src, unsigned char *srca, int stride)
+static void draw_alpha(int x0, int y0, int w, int h, int dp, unsigned char *src, unsigned char *srca, int stride)
 {
 	switch (image_format)
 	{
 		case IMGFMT_RGB32:
-			vo_draw_alpha_rgb32(w,h,src,srca,stride,image_data+4*(y0*image_width+x0),4*image_width);
+			vo_draw_alpha_rgb32(w,h,dp,src,srca,stride,image_data+4*(y0*image_width+x0),4*image_width);
 			break;
 		case IMGFMT_YUY2:
-			vo_draw_alpha_yuy2(w,h,src,srca,stride,image_data + (x0 + y0 * image_width) * 2,image_width*2);
+			vo_draw_alpha_yuy2(w,h,dp,src,srca,stride,image_data + (x0 + y0 * image_width) * 2,image_width*2);
 			break;
 	}
 }
--- libvo/vo_dxr3.c	 (revision 22772)
+++ libvo/vo_dxr3.c	 (working copy)
@@ -677,7 +677,7 @@
 	return 0;
 }
 
-static void draw_alpha(int x, int y, int w, int h, unsigned char* src, unsigned char *srca, int srcstride)
+static void draw_alpha(int x, int y, int w, int h, int dp, unsigned char* src, unsigned char *srca, int srcstride)
 {
 #ifdef SPU_SUPPORT
 	unsigned char *buf = &osdpicbuf[(y * osdpicbuf_w) + x];
--- libvo/vo_xvmc.c	 (revision 22772)
+++ libvo/vo_xvmc.c	 (working copy)
@@ -89,12 +89,12 @@
 static int free_element;
 
 
-static void (*draw_osd_fnc)(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride);
+static void (*draw_osd_fnc)(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride);
 static void (*clear_osd_fnc)(int x0,int y0, int w,int h);
 static void (*init_osd_fnc)(void);
 
-static void   draw_osd_AI44(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride);
-static void   draw_osd_IA44(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride);
+static void   draw_osd_AI44(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride);
+static void   draw_osd_IA44(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride);
 static void   clear_osd_subpic(int x0,int y0, int w,int h);
 static void   init_osd_yuv_pal(void);
 
@@ -875,7 +875,7 @@
    subpicture_alloc = 1;
 }
 
-static void draw_osd_IA44(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride){
+static void draw_osd_IA44(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride){
 int ox,oy;
 int rez;
 
@@ -895,7 +895,7 @@
    }
 }
 
-static void draw_osd_AI44(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride){
+static void draw_osd_AI44(int x0,int y0, int w,int h, int dp, unsigned char* src, unsigned char *srca, int stride){
 int ox,oy;
 int rez;
    if(  mp_msg_test(MSGT_VO,MSGL_DBG4) )
--- libmpcodecs/vf_expand.c	 (revision 22772)
+++ libmpcodecs/vf_expand.c	 (working copy)
@@ -82,7 +82,7 @@
     // TODO  clear left and right side of the image if needed
 }
 
-static void draw_func(int x0,int y0, int w,int h,unsigned char* src, unsigned char *srca, int stride){
+static void draw_func(int x0,int y0, int w,int h,int dp,unsigned char* src, unsigned char *srca, int stride){
     unsigned char* dst;
     if(!vo_osd_changed_flag && vf->dmpi->planes[0]==vf->priv->fb_ptr){
 	// ok, enough to update the area inside the video, leave the black bands
@@ -110,19 +110,19 @@
     switch(vf->dmpi->imgfmt){
     case IMGFMT_BGR15:
     case IMGFMT_RGB15:
-	vo_draw_alpha_rgb15(w,h,src,srca,stride,dst,vf->dmpi->stride[0]);
+	vo_draw_alpha_rgb15(w,h,dp,src,srca,stride,dst,vf->dmpi->stride[0]);
 	break;
     case IMGFMT_BGR16:
     case IMGFMT_RGB16:
-	vo_draw_alpha_rgb16(w,h,src,srca,stride,dst,vf->dmpi->stride[0]);
+	vo_draw_alpha_rgb16(w,h,dp,src,srca,stride,dst,vf->dmpi->stride[0]);
 	break;
     case IMGFMT_BGR24:
     case IMGFMT_RGB24:
-	vo_draw_alpha_rgb24(w,h,src,srca,stride,dst,vf->dmpi->stride[0]);
+	vo_draw_alpha_rgb24(w,h,dp,src,srca,stride,dst,vf->dmpi->stride[0]);
 	break;
     case IMGFMT_BGR32:
     case IMGFMT_RGB32:
-	vo_draw_alpha_rgb32(w,h,src,srca,stride,dst,vf->dmpi->stride[0]);
+	vo_draw_alpha_rgb32(w,h,dp,src,srca,stride,dst,vf->dmpi->stride[0]);
 	break;
     case IMGFMT_YV12:
     case IMGFMT_I420:
@@ -131,13 +131,13 @@
     case IMGFMT_IF09:
     case IMGFMT_Y800:
     case IMGFMT_Y8:
-	vo_draw_alpha_yv12(w,h,src,srca,stride,dst,vf->dmpi->stride[0]);
+	vo_draw_alpha_yv12(w,h,dp,src,srca,stride,dst,vf->dmpi->stride[0]);
 	break;
     case IMGFMT_YUY2:
-	vo_draw_alpha_yuy2(w,h,src,srca,stride,dst,vf->dmpi->stride[0]);
+	vo_draw_alpha_yuy2(w,h,dp,src,srca,stride,dst,vf->dmpi->stride[0]);
 	break;
     case IMGFMT_UYVY:
-	vo_draw_alpha_yuy2(w,h,src,srca,stride,dst+1,vf->dmpi->stride[0]);
+	vo_draw_alpha_yuy2(w,h,dp,src,srca,stride,dst+1,vf->dmpi->stride[0]);
 	break;
     }
 }
--- libmenu/menu.c	 (revision 22772)
+++ libmenu/menu.c	 (working copy)
@@ -20,6 +20,7 @@
 #include "m_option.h"
 #include "m_struct.h"
 #include "menu.h"
+#include "libvo/video_out.h"
 
 extern menu_info_t menu_info_cmdlist;
 extern menu_info_t menu_info_pt;
@@ -255,7 +256,7 @@
 
 ///////////////////////////// Helpers ////////////////////////////////////
 
-typedef void (*draw_alpha_f)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
+typedef void (*draw_alpha_f)(int w,int h, int dp, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride);
 
 inline static draw_alpha_f get_draw_alpha(uint32_t fmt) {
   switch(fmt) {
@@ -317,7 +318,7 @@
   while (*txt) {
     unsigned char c=*txt++;
     if ((font=vo_font->font[c])>=0 && (x + vo_font->width[c] <= mpi->w) && (y + vo_font->pic_a[font]->h <= mpi->h))
-      draw_alpha(vo_font->width[c], vo_font->pic_a[font]->h,
+      draw_alpha(vo_font->width[c], vo_font->pic_a[font]->h,DEST_PLANES_Y,
 		 vo_font->pic_b[font]->bmp+vo_font->start[c],
 		 vo_font->pic_a[font]->bmp+vo_font->start[c],
 		 vo_font->pic_a[font]->w,
@@ -484,7 +485,7 @@
       if(font >= 0) {
  	int cs = (vo_font->pic_a[font]->h - vo_font->height) / 2;
 	if ((sx + vo_font->width[c] < xmax)  &&  (sy + vo_font->height < ymax) )
-	  draw_alpha(vo_font->width[c], vo_font->height,
+	  draw_alpha(vo_font->width[c], vo_font->height, DEST_PLANES_Y,
 		     vo_font->pic_b[font]->bmp+vo_font->start[c] +
 		     cs * vo_font->pic_a[font]->w,
 		     vo_font->pic_a[font]->bmp+vo_font->start[c] +
@@ -592,7 +593,7 @@
     char pic[stride*h],pic_alpha[stride*h];
     memset(pic,g,stride*h);
     memset(pic_alpha,alpha,stride*h);
-    draw_alpha(w,h,pic,pic_alpha,stride,
+    draw_alpha(w,h,DEST_PLANES_Y,pic,pic_alpha,stride,
                mpi->planes[0] + y * mpi->stride[0] + x * (mpi->bpp>>3),
                mpi->stride[0]);
   }
