remove "function factory", improve R_DrawSpan (#2341)

* improve performance of R_DrawSpan (from Eternity Engine)
This commit is contained in:
Roman Fomin 2025-07-28 06:21:53 +07:00 committed by GitHub
parent fb64b7304b
commit 69bd1d18b7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 468 additions and 204 deletions

View File

@ -3349,8 +3349,7 @@ static setup_menu_t gen_settings5[] = {
{"Voxels", S_ONOFF | S_STRICT, OFF_CNTR_X, M_SPC, {"voxels_rendering"}},
{"Brightmaps", S_ONOFF | S_STRICT, OFF_CNTR_X, M_SPC, {"brightmaps"},
.action = R_InitDrawFunctions},
{"Brightmaps", S_ONOFF | S_STRICT, OFF_CNTR_X, M_SPC, {"brightmaps"}},
{"Stretch Short Skies", S_ONOFF, OFF_CNTR_X, M_SPC, {"stretchsky"},
.action = R_UpdateStretchSkies},

View File

@ -97,72 +97,84 @@ byte dc_skycolor;
// heightmask is the Tutti-Frutti fix -- killough
#define DRAW_COLUMN(NAME, SRCPIXEL) \
static void DrawColumn##NAME(void) \
{ \
int count = dc_yh - dc_yl + 1; \
\
if (count <= 0) \
return; \
\
if ((unsigned)dc_x >= video.width || dc_yl < 0 \
|| dc_yh >= video.height) \
{ \
I_Error("%i to %i at %i", dc_yl, dc_yh, dc_x); \
} \
\
pixel_t *dest = ylookup[dc_yl] + columnofs[dc_x]; \
\
const fixed_t fracstep = dc_iscale; \
fixed_t frac = dc_texturemid + (dc_yl - centery) * fracstep; \
\
int heightmask = dc_texheight - 1; \
\
if (dc_texheight & heightmask) \
{ \
heightmask++; \
heightmask <<= FRACBITS; \
\
if (frac < 0) \
while ((frac += heightmask) < 0) \
; \
else \
while (frac >= heightmask) \
frac -= heightmask; \
do \
{ \
byte src = dc_source[frac >> FRACBITS]; \
*dest = SRCPIXEL; \
dest += linesize; \
if ((frac += fracstep) >= heightmask) \
frac -= heightmask; \
if (frac < 0) \
frac += heightmask; \
} while (--count); \
} \
else \
{ \
while ((count -= 2) >= 0) \
{ \
byte src = dc_source[(frac >> FRACBITS) & heightmask]; \
*dest = SRCPIXEL; \
dest += linesize; \
frac += fracstep; \
src = dc_source[(frac >> FRACBITS) & heightmask]; \
*dest = SRCPIXEL; \
dest += linesize; \
frac += fracstep; \
} \
if (count & 1) \
{ \
byte src = dc_source[(frac >> FRACBITS) & heightmask]; \
*dest = SRCPIXEL; \
} \
} \
void R_DrawColumn(void)
{
int count = dc_yh - dc_yl + 1;
if (count <= 0)
{
return;
}
DRAW_COLUMN(, dc_colormap[0][src])
DRAW_COLUMN(Brightmap, dc_colormap[dc_brightmap[src]][src])
#ifdef RANGECHECK
if ((unsigned)dc_x >= video.width || dc_yl < 0 || dc_yh >= video.height)
{
I_Error("%i to %i at %i", dc_yl, dc_yh, dc_x);
}
#endif
pixel_t *dest = ylookup[dc_yl] + columnofs[dc_x];
const fixed_t fracstep = dc_iscale;
fixed_t frac = dc_texturemid + (dc_yl - centery) * fracstep;
const byte *source = dc_source;
lighttable_t *const *colormap = dc_colormap;
const byte *brightmap = dc_brightmap;
int heightmask = dc_texheight - 1;
byte src;
if (dc_texheight & heightmask)
{
heightmask++;
heightmask <<= 16;
if (frac < 0)
{
while ((frac += heightmask) < 0)
;
}
else
{
while (frac >= heightmask)
{
frac -= heightmask;
}
}
do
{
src = source[frac >> 16];
*dest = colormap[brightmap[src]][src];
dest += linesize;
if ((frac += fracstep) >= heightmask)
{
frac -= heightmask;
}
if (frac < 0)
{
frac += heightmask;
}
} while (--count);
}
else
{
while ((count -= 2) >= 0)
{
src = source[(frac >> FRACBITS) & heightmask];
*dest = colormap[brightmap[src]][src];
dest += linesize;
frac += fracstep;
src = source[(frac >> FRACBITS) & heightmask];
*dest = colormap[brightmap[src]][src];
dest += linesize;
frac += fracstep;
}
if (count & 1)
{
src = source[(frac >> FRACBITS) & heightmask];
*dest = colormap[brightmap[src]][src];
}
}
}
// Here is the version of R_DrawColumn that deals with translucent // phares
// textures and sprites. It's identical to R_DrawColumn except // |
@ -176,10 +188,84 @@ DRAW_COLUMN(Brightmap, dc_colormap[dc_brightmap[src]][src])
// opaque' decision is made outside this routine, not down where the
// actual code differences are.
DRAW_COLUMN(TL,
tranmap[(*dest << 8) + dc_colormap[0][src]])
DRAW_COLUMN(TLBrightmap,
tranmap[(*dest << 8) + dc_colormap[dc_brightmap[src]][src]])
void R_DrawTLColumn(void)
{
int count = dc_yh - dc_yl + 1;
if (count <= 0)
{
return;
}
#ifdef RANGECHECK
if ((unsigned)dc_x >= video.width || dc_yl < 0 || dc_yh >= video.height)
{
I_Error("%i to %i at %i", dc_yl, dc_yh, dc_x);
}
#endif
pixel_t *dest = ylookup[dc_yl] + columnofs[dc_x];
const fixed_t fracstep = dc_iscale;
fixed_t frac = dc_texturemid + (dc_yl - centery) * fracstep;
const byte *source = dc_source;
lighttable_t *const *colormap = dc_colormap;
const byte *brightmap = dc_brightmap;
int heightmask = dc_texheight - 1;
byte src;
if (dc_texheight & heightmask)
{
heightmask++;
heightmask <<= 16;
if (frac < 0)
{
while ((frac += heightmask) < 0)
;
}
else
{
while (frac >= heightmask)
{
frac -= heightmask;
}
}
do
{
src = source[frac >> 16];
*dest = tranmap[(*dest << 8) + colormap[brightmap[src]][src]];
dest += linesize;
if ((frac += fracstep) >= heightmask)
{
frac -= heightmask;
}
if (frac < 0)
{
frac += heightmask;
}
} while (--count);
}
else
{
while ((count -= 2) >= 0)
{
src = source[(frac >> FRACBITS) & heightmask];
*dest = tranmap[(*dest << 8) + colormap[brightmap[src]][src]];
dest += linesize;
frac += fracstep;
src = source[(frac >> FRACBITS) & heightmask];
*dest = tranmap[(*dest << 8) + colormap[brightmap[src]][src]];
dest += linesize;
frac += fracstep;
}
if (count & 1)
{
src = source[(frac >> FRACBITS) & heightmask];
*dest = tranmap[(*dest << 8) + colormap[brightmap[src]][src]];
}
}
}
//
// Sky drawing: for showing just a color above the texture
@ -703,10 +789,85 @@ void R_SetFuzzColumnMode(void)
byte *dc_translation, *translationtables;
DRAW_COLUMN(TR,
dc_colormap[0][dc_translation[src]])
DRAW_COLUMN(TRBrightmap,
dc_colormap[dc_brightmap[src]][dc_translation[src]])
void R_DrawTranslatedColumn(void)
{
int count = dc_yh - dc_yl + 1;
if (count <= 0)
{
return;
}
#ifdef RANGECHECK
if ((unsigned)dc_x >= video.width || dc_yl < 0 || dc_yh >= video.height)
{
I_Error("%i to %i at %i", dc_yl, dc_yh, dc_x);
}
#endif
pixel_t *dest = ylookup[dc_yl] + columnofs[dc_x];
const fixed_t fracstep = dc_iscale;
fixed_t frac = dc_texturemid + (dc_yl - centery) * fracstep;
const byte *source = dc_source;
const byte *translation = dc_translation;
lighttable_t *const *colormap = dc_colormap;
const byte *brightmap = dc_brightmap;
int heightmask = dc_texheight - 1;
byte src;
if (dc_texheight & heightmask)
{
heightmask++;
heightmask <<= 16;
if (frac < 0)
{
while ((frac += heightmask) < 0)
;
}
else
{
while (frac >= heightmask)
{
frac -= heightmask;
}
}
do
{
src = source[frac >> 16];
*dest = colormap[brightmap[src]][translation[src]];
dest += linesize;
if ((frac += fracstep) >= heightmask)
{
frac -= heightmask;
}
if (frac < 0)
{
frac += heightmask;
}
} while (--count);
}
else
{
while ((count -= 2) >= 0)
{
src = source[(frac >> FRACBITS) & heightmask];
*dest = colormap[brightmap[src]][translation[src]];
dest += linesize;
frac += fracstep;
src = source[(frac >> FRACBITS) & heightmask];
*dest = colormap[brightmap[src]][translation[src]];
dest += linesize;
frac += fracstep;
}
if (count & 1)
{
src = source[(frac >> FRACBITS) & heightmask];
*dest = colormap[brightmap[src]][translation[src]];
}
}
}
//
// R_InitTranslationTables
@ -770,94 +931,65 @@ fixed_t ds_ystep;
// start of a 64*64 tile image
byte *ds_source;
#define R_DRAW_SPAN(NAME, SRCPIXEL) \
static void DrawSpan##NAME(void) \
{ \
pixel_t *dest = ylookup[ds_y] + columnofs[ds_x1]; \
\
unsigned count = ds_x2 - ds_x1 + 1; \
\
unsigned xtemp, ytemp, spot; \
\
while (count >= 4) \
{ \
byte src; \
ytemp = (ds_yfrac >> 10) & 0x0FC0; \
xtemp = (ds_xfrac >> 16) & 0x003F; \
spot = xtemp | ytemp; \
ds_xfrac += ds_xstep; \
ds_yfrac += ds_ystep; \
src = ds_source[spot]; \
dest[0] = SRCPIXEL; \
\
ytemp = (ds_yfrac >> 10) & 0x0FC0; \
xtemp = (ds_xfrac >> 16) & 0x003F; \
spot = xtemp | ytemp; \
ds_xfrac += ds_xstep; \
ds_yfrac += ds_ystep; \
src = ds_source[spot]; \
dest[1] = SRCPIXEL; \
\
ytemp = (ds_yfrac >> 10) & 0x0FC0; \
xtemp = (ds_xfrac >> 16) & 0x003F; \
spot = xtemp | ytemp; \
ds_xfrac += ds_xstep; \
ds_yfrac += ds_ystep; \
src = ds_source[spot]; \
dest[2] = SRCPIXEL; \
\
ytemp = (ds_yfrac >> 10) & 0x0FC0; \
xtemp = (ds_xfrac >> 16) & 0x003F; \
spot = xtemp | ytemp; \
ds_xfrac += ds_xstep; \
ds_yfrac += ds_ystep; \
src = ds_source[spot]; \
dest[3] = SRCPIXEL; \
\
dest += 4; \
count -= 4; \
} \
\
while (count) \
{ \
byte src; \
ytemp = (ds_yfrac >> 10) & 0x0FC0; \
xtemp = (ds_xfrac >> 16) & 0x003F; \
spot = xtemp | ytemp; \
ds_xfrac += ds_xstep; \
ds_yfrac += ds_ystep; \
src = ds_source[spot]; \
*dest++ = SRCPIXEL; \
count--; \
} \
}
R_DRAW_SPAN(, ds_colormap[0][src])
R_DRAW_SPAN(Brightmap, ds_colormap[ds_brightmap[src]][src])
void (*R_DrawColumn)(void) = DrawColumn;
void (*R_DrawTLColumn)(void) = DrawColumnTL;
void (*R_DrawTranslatedColumn)(void) = DrawColumnTR;
void (*R_DrawSpan)(void) = DrawSpan;
void R_InitDrawFunctions(void)
void R_DrawSpan(void)
{
boolean local_brightmaps = (STRICTMODE(brightmaps) || force_brightmaps);
int count = ds_x2 - ds_x1 + 1;
pixel_t *dest = ylookup[ds_y] + columnofs[ds_x1];
const byte *source = ds_source;
lighttable_t *const *colormap = ds_colormap;
const byte *brightmap = ds_brightmap;
if (local_brightmaps)
// SoM: we only need 6 bits for the integer part (0 thru 63) so the rest
// can be used for the fraction part. This allows calculation of the memory
// address in the texture with two shifts, an OR and one AND.
unsigned int xf = ds_xfrac << 10, yf = ds_yfrac << 10;
const unsigned int xs = ds_xstep << 10, ys = ds_ystep << 10;
#define XSHIFT (32 - 6 - 6)
#define XMASK (63 * 64) // 0x0FC0
#define YSHIFT (32 - 6)
byte src;
while (count >= 4)
{
R_DrawColumn = DrawColumnBrightmap;
R_DrawTLColumn = DrawColumnTLBrightmap;
R_DrawTranslatedColumn = DrawColumnTRBrightmap;
R_DrawSpan = DrawSpanBrightmap;
// SoM: Why didn't I see this earlier? the spot variable is a waste now
// because we don't have the uber complicated math to calculate it now,
// so that was a memory write we didn't need!
src = source[((xf >> XSHIFT) & XMASK) | (yf >> YSHIFT)];
dest[0] = colormap[brightmap[src]][src];
xf += xs;
yf += ys;
src = source[((xf >> XSHIFT) & XMASK) | (yf >> YSHIFT)];
dest[1] = colormap[brightmap[src]][src];
xf += xs;
yf += ys;
src = source[((xf >> XSHIFT) & XMASK) | (yf >> YSHIFT)];
dest[2] = colormap[brightmap[src]][src];
xf += xs;
yf += ys;
src = source[((xf >> XSHIFT) & XMASK) | (yf >> YSHIFT)];
dest[3] = colormap[brightmap[src]][src];
xf += xs;
yf += ys;
dest += 4;
count -= 4;
}
else
while (count--)
{
R_DrawColumn = DrawColumn;
R_DrawTLColumn = DrawColumnTL;
R_DrawTranslatedColumn = DrawColumnTR;
R_DrawSpan = DrawSpan;
src = source[((xf >> XSHIFT) & XMASK) | (yf >> YSHIFT)];
*dest++ = colormap[brightmap[src]][src];
xf += xs;
yf += ys;
}
#undef XSHIFT
#undef XMASK
#undef YSHIFT
}
void R_InitBufferRes(void)

View File

@ -39,8 +39,8 @@ extern const byte *dc_brightmap;
// The span blitting interface.
// Hook in assembler or system specific BLT here.
extern void (*R_DrawColumn)(void);
extern void (*R_DrawTLColumn)(void); // drawing translucent textures // phares
void R_DrawColumn(void);
void R_DrawTLColumn(void); // drawing translucent textures // phares
extern void (*R_DrawFuzzColumn)(void); // The Spectre/Invisibility effect.
// [crispy] draw fuzz effect independent of rendering frame rate
@ -65,7 +65,7 @@ void R_DrawSkyColumnMasked(void);
// Draw with color translation tables, for player sprite rendering,
// Green/Red/Blue/Indigo shirts.
extern void (*R_DrawTranslatedColumn)(void);
void R_DrawTranslatedColumn(void);
extern lighttable_t *ds_colormap[2];
@ -84,7 +84,7 @@ extern byte *dc_translation;
extern const byte *ds_brightmap;
// Span blitting for rows, floor/ceiling. No Spectre effect needed.
extern void (*R_DrawSpan)(void);
void R_DrawSpan(void);
void R_InitBuffer(void);
@ -101,8 +101,6 @@ void R_DrawViewBorder(void);
void R_InitBufferRes(void);
void R_InitDrawFunctions(void);
#endif
//----------------------------------------------------------------------------

View File

@ -694,7 +694,6 @@ void R_Init (void)
R_SetFuzzColumnMode();
colfunc = R_DrawColumn;
R_InitDrawFunctions();
}
//

View File

@ -257,49 +257,185 @@ static byte *translation1, *translation2;
static void (*drawcolfunc)(const patch_column_t *patchcol);
#define DRAW_COLUMN(NAME, SRCPIXEL) \
static void DrawPatchColumn##NAME(const patch_column_t *patchcol) \
{ \
int count = patchcol->y2 - patchcol->y1 + 1; \
\
if (count <= 0) \
return; \
\
if ((unsigned int)patchcol->x >= (unsigned int)video.width \
|| (unsigned int)patchcol->y1 >= (unsigned int)video.height) \
{ \
I_Error("%i to %i at %i", patchcol->y1, patchcol->y2, \
patchcol->x); \
} \
\
pixel_t *dest = V_ADDRESS(dest_screen, patchcol->x, patchcol->y1); \
\
const fixed_t fracstep = patchcol->step; \
fixed_t frac = \
patchcol->frac + ((patchcol->y1 * fracstep) & FRACMASK); \
\
const byte *source = patchcol->source; \
\
while ((count -= 2) >= 0) \
{ \
*dest = SRCPIXEL; \
dest += linesize; \
frac += fracstep; \
*dest = SRCPIXEL; \
dest += linesize; \
frac += fracstep; \
} \
if (count & 1) \
{ \
*dest = SRCPIXEL; \
} \
static void DrawPatchColumn(const patch_column_t *patchcol)
{
int count = patchcol->y2 - patchcol->y1 + 1;
if (count <= 0)
{
return;
}
DRAW_COLUMN(, source[frac >> FRACBITS])
DRAW_COLUMN(TR, translation[source[frac >> FRACBITS]])
DRAW_COLUMN(TRTR, translation2[translation1[source[frac >> FRACBITS]]])
DRAW_COLUMN(TL, tranmap[(*dest << 8) + source[frac >> FRACBITS]])
DRAW_COLUMN(TRTL, tranmap[(*dest << 8) + translation[source[frac >> FRACBITS]]])
#ifdef RANGECHECK
if ((unsigned int)patchcol->x >= (unsigned int)video.width
|| (unsigned int)patchcol->y1 >= (unsigned int)video.height)
{
I_Error("%i to %i at %i", patchcol->y1, patchcol->y2, patchcol->x);
}
#endif
pixel_t *dest = V_ADDRESS(dest_screen, patchcol->x, patchcol->y1);
const fixed_t fracstep = patchcol->step;
fixed_t frac = patchcol->frac + ((patchcol->y1 * fracstep) & FRACMASK);
const byte *source = patchcol->source;
while ((count -= 2) >= 0)
{
*dest = source[frac >> FRACBITS];
dest += linesize;
frac += fracstep;
*dest = source[frac >> FRACBITS];
dest += linesize;
frac += fracstep;
}
if (count & 1)
{
*dest = source[frac >> FRACBITS];
}
}
static void DrawPatchColumnTR(const patch_column_t *patchcol)
{
int count = patchcol->y2 - patchcol->y1 + 1;
if (count <= 0)
{
return;
}
#ifdef RANGECHECK
if ((unsigned int)patchcol->x >= (unsigned int)video.width
|| (unsigned int)patchcol->y1 >= (unsigned int)video.height)
{
I_Error("%i to %i at %i", patchcol->y1, patchcol->y2, patchcol->x);
}
#endif
pixel_t *dest = V_ADDRESS(dest_screen, patchcol->x, patchcol->y1);
const fixed_t fracstep = patchcol->step;
fixed_t frac = patchcol->frac + ((patchcol->y1 * fracstep) & FRACMASK);
const byte *source = patchcol->source;
while ((count -= 2) >= 0)
{
*dest = translation[source[frac >> FRACBITS]];
dest += linesize;
frac += fracstep;
*dest = translation[source[frac >> FRACBITS]];
dest += linesize;
frac += fracstep;
}
if (count & 1)
{
*dest = translation[source[frac >> FRACBITS]];
}
}
static void DrawPatchColumnTRTR(const patch_column_t *patchcol)
{
int count = patchcol->y2 - patchcol->y1 + 1;
if (count <= 0)
{
return;
}
#ifdef RANGECHECK
if ((unsigned int)patchcol->x >= (unsigned int)video.width
|| (unsigned int)patchcol->y1 >= (unsigned int)video.height)
{
I_Error("%i to %i at %i", patchcol->y1, patchcol->y2, patchcol->x);
}
#endif
pixel_t *dest = V_ADDRESS(dest_screen, patchcol->x, patchcol->y1);
const fixed_t fracstep = patchcol->step;
fixed_t frac = patchcol->frac + ((patchcol->y1 * fracstep) & FRACMASK);
const byte *source = patchcol->source;
while ((count -= 2) >= 0)
{
*dest = translation2[translation1[source[frac >> FRACBITS]]];
dest += linesize;
frac += fracstep;
*dest = translation2[translation1[source[frac >> FRACBITS]]];
dest += linesize;
frac += fracstep;
}
if (count & 1)
{
*dest = translation2[translation1[source[frac >> FRACBITS]]];
}
}
static void DrawPatchColumnTL(const patch_column_t *patchcol)
{
int count = patchcol->y2 - patchcol->y1 + 1;
if (count <= 0)
{
return;
}
#ifdef RANGECHECK
if ((unsigned int)patchcol->x >= (unsigned int)video.width
|| (unsigned int)patchcol->y1 >= (unsigned int)video.height)
{
I_Error("%i to %i at %i", patchcol->y1, patchcol->y2, patchcol->x);
}
#endif
pixel_t *dest = V_ADDRESS(dest_screen, patchcol->x, patchcol->y1);
const fixed_t fracstep = patchcol->step;
fixed_t frac = patchcol->frac + ((patchcol->y1 * fracstep) & FRACMASK);
const byte *source = patchcol->source;
while ((count -= 2) >= 0)
{
*dest = tranmap[(*dest << 8) + source[frac >> FRACBITS]];
dest += linesize;
frac += fracstep;
*dest = tranmap[(*dest << 8) + source[frac >> FRACBITS]];
dest += linesize;
frac += fracstep;
}
if (count & 1)
{
*dest = tranmap[(*dest << 8) + source[frac >> FRACBITS]];
}
}
static void DrawPatchColumnTRTL(const patch_column_t *patchcol)
{
int count = patchcol->y2 - patchcol->y1 + 1;
if (count <= 0)
{
return;
}
#ifdef RANGECHECK
if ((unsigned int)patchcol->x >= (unsigned int)video.width
|| (unsigned int)patchcol->y1 >= (unsigned int)video.height)
{
I_Error("%i to %i at %i", patchcol->y1, patchcol->y2, patchcol->x);
}
#endif
pixel_t *dest = V_ADDRESS(dest_screen, patchcol->x, patchcol->y1);
const fixed_t fracstep = patchcol->step;
fixed_t frac = patchcol->frac + ((patchcol->y1 * fracstep) & FRACMASK);
const byte *source = patchcol->source;
while ((count -= 2) >= 0)
{
*dest = tranmap[(*dest << 8) + translation[source[frac >> FRACBITS]]];
dest += linesize;
frac += fracstep;
*dest = tranmap[(*dest << 8) + translation[source[frac >> FRACBITS]]];
dest += linesize;
frac += fracstep;
}
if (count & 1)
{
*dest = tranmap[(*dest << 8) + translation[source[frac >> FRACBITS]]];
}
}
static void DrawMaskedColumn(patch_column_t *patchcol, const int ytop,
column_t *column)