slouken@libsdl.org
#include "SDL_config.h"
#include "SDL_video.h"
#include "SDL_blit.h"
#if ((defined(_MFC_VER) && defined(_M_IX86)) || \
defined(__WATCOMC__) || \
(defined(__GNUC__) && defined(__i386__))) && SDL_ASSEMBLY_ROUTINES
#define USE_ASM_STRETCH
#endif
#ifdef USE_ASM_STRETCH
#if defined(_M_IX86) || defined(i386)
#define PREFIX16 0x66
#define STORE_BYTE 0xAA
#define STORE_WORD 0xAB
#define LOAD_BYTE 0xAC
#define LOAD_WORD 0xAD
#define RETURN 0xC3
#else
#error Need assembly opcodes for this architecture
#endif
static unsigned char copy_row[4096];
static int generate_rowbytes(int src_w, int dst_w, int bpp)
{
static struct {
int bpp;
int src_w;
int dst_w;
} last;
int i;
int pos, inc;
unsigned char *eip;
unsigned char load, store;
if ( (src_w == last.src_w) &&
(dst_w == last.dst_w) && (bpp == last.bpp) ) {
return(0);
}
last.bpp = bpp;
last.src_w = src_w;
last.dst_w = dst_w;
switch (bpp) {
case 1:
load = LOAD_BYTE;
store = STORE_BYTE;
break;
case 2:
case 4:
load = LOAD_WORD;
store = STORE_WORD;
break;
default:
SDL_SetError("ASM stretch of %d bytes isn't supported\n", bpp);
return(-1);
}
pos = 0x10000;
inc = (src_w << 16) / dst_w;
eip = copy_row;
for ( i=0; i<dst_w; ++i ) {
while ( pos >= 0x10000L ) {
if ( bpp == 2 ) {
*eip++ = PREFIX16;
}
*eip++ = load;
pos -= 0x10000L;
}
if ( bpp == 2 ) {
*eip++ = PREFIX16;
}
*eip++ = store;
pos += inc;
}
*eip++ = RETURN;
if ( eip > (copy_row+sizeof(copy_row)) ) {
SDL_SetError("Copy buffer overflow");
return(-1);
}
return(0);
}
#else
#define DEFINE_COPY_ROW(name, type) \
void name(type *src, int src_w, type *dst, int dst_w) \
{ \
int i; \
int pos, inc; \
type pixel = 0; \
\
pos = 0x10000; \
inc = (src_w << 16) / dst_w; \
for ( i=dst_w; i>0; --i ) { \
while ( pos >= 0x10000L ) { \
pixel = *src++; \
pos -= 0x10000L; \
} \
*dst++ = pixel; \
pos += inc; \
} \
}
DEFINE_COPY_ROW(copy_row1, Uint8)
DEFINE_COPY_ROW(copy_row2, Uint16)
DEFINE_COPY_ROW(copy_row4, Uint32)
#endif
void copy_row3(Uint8 *src, int src_w, Uint8 *dst, int dst_w)
{
int i;
int pos, inc;
Uint8 pixel[3];
pos = 0x10000;
inc = (src_w << 16) / dst_w;
for ( i=dst_w; i>0; --i ) {
while ( pos >= 0x10000L ) {
pixel[0] = *src++;
pixel[1] = *src++;
pixel[2] = *src++;
pos -= 0x10000L;
}
*dst++ = pixel[0];
*dst++ = pixel[1];
*dst++ = pixel[2];
pos += inc;
}
}
int SDL_SoftStretch(SDL_Surface *src, SDL_Rect *srcrect,
SDL_Surface *dst, SDL_Rect *dstrect)
{
int src_locked;
int dst_locked;
int pos, inc;
int dst_width;
int dst_maxrow;
int src_row, dst_row;
Uint8 *srcp = NULL;
Uint8 *dstp;
SDL_Rect full_src;
SDL_Rect full_dst;
#if defined(USE_ASM_STRETCH) && defined(__GNUC__)
int u1, u2;
#endif
const int bpp = dst->format->BytesPerPixel;
if ( src->format->BitsPerPixel != dst->format->BitsPerPixel ) {
SDL_SetError("Only works with same format surfaces");
return(-1);
}
if ( srcrect ) {
if ( (srcrect->x < 0) || (srcrect->y < 0) ||
((srcrect->x+srcrect->w) > src->w) ||
((srcrect->y+srcrect->h) > src->h) ) {
SDL_SetError("Invalid source blit rectangle");
return(-1);
}
} else {
full_src.x = 0;
full_src.y = 0;
full_src.w = src->w;
full_src.h = src->h;
srcrect = &full_src;
}
if ( dstrect ) {
if ( (dstrect->x < 0) || (dstrect->y < 0) ||
((dstrect->x+dstrect->w) > dst->w) ||
((dstrect->y+dstrect->h) > dst->h) ) {
SDL_SetError("Invalid destination blit rectangle");
return(-1);
}
} else {
full_dst.x = 0;
full_dst.y = 0;
full_dst.w = dst->w;
full_dst.h = dst->h;
dstrect = &full_dst;
}
dst_locked = 0;
if ( SDL_MUSTLOCK(dst) ) {
if ( SDL_LockSurface(dst) < 0 ) {
SDL_SetError("Unable to lock destination surface");
return(-1);
}
dst_locked = 1;
}
src_locked = 0;
if ( SDL_MUSTLOCK(src) ) {
if ( SDL_LockSurface(src) < 0 ) {
if ( dst_locked ) {
SDL_UnlockSurface(dst);
}
SDL_SetError("Unable to lock source surface");
return(-1);
}
src_locked = 1;
}
pos = 0x10000;
inc = (srcrect->h << 16) / dstrect->h;
src_row = srcrect->y;
dst_row = dstrect->y;
dst_width = dstrect->w*bpp;
#ifdef USE_ASM_STRETCH
if ( (bpp != 3) &&
(generate_rowbytes(srcrect->w, dstrect->w, bpp) < 0) ) {
return(-1);
}
#endif
for ( dst_maxrow = dst_row+dstrect->h; dst_row<dst_maxrow; ++dst_row ) {
dstp = (Uint8 *)dst->pixels + (dst_row*dst->pitch)
+ (dstrect->x*bpp);
while ( pos >= 0x10000L ) {
srcp = (Uint8 *)src->pixels + (src_row*src->pitch)
+ (srcrect->x*bpp);
++src_row;
pos -= 0x10000L;
}
#ifdef USE_ASM_STRETCH
switch (bpp) {
case 3:
copy_row3(srcp, srcrect->w, dstp, dstrect->w);
break;
default:
#ifdef __GNUC__
__asm__ __volatile__ (
"call *%4"
: "=&D" (u1), "=&S" (u2)
: "0" (dstp), "1" (srcp), "r" (copy_row)
: "memory" );
#elif defined(_MSC_VER) || defined(__WATCOMC__)
{ void *code = copy_row;
__asm {
push edi
push esi
mov edi, dstp
mov esi, srcp
call dword ptr code
pop esi
pop edi
}
}
#else
#error Need inline assembly for this compiler
#endif
break;
}
#else
switch (bpp) {
case 1:
copy_row1(srcp, srcrect->w, dstp, dstrect->w);
break;
case 2:
copy_row2((Uint16 *)srcp, srcrect->w,
(Uint16 *)dstp, dstrect->w);
break;
case 3:
copy_row3(srcp, srcrect->w, dstp, dstrect->w);
break;
case 4:
copy_row4((Uint32 *)srcp, srcrect->w,
(Uint32 *)dstp, dstrect->w);
break;
}
#endif
pos += inc;
}
if ( dst_locked ) {
SDL_UnlockSurface(dst);
}
if ( src_locked ) {
SDL_UnlockSurface(src);
}
return(0);
}