Xbox: Prefer writing raw GPU commands directly

This commit is contained in:
UnknownShadow200 2025-09-03 18:49:12 +10:00
parent 26dc506d53
commit e88058ed37
2 changed files with 67 additions and 43 deletions

View File

@ -97,6 +97,10 @@ static void ResetState(void) {
p = NV2A_set_clear_rect(p, 0, 0, width, height);
pb_end(p);
p = pb_begin();
p = NV2A_reset_all_vertex_attribs(p);
pb_end(p);
}
static GfxResourceID white_square;
@ -418,24 +422,14 @@ static GfxResourceID Gfx_AllocStaticVb(VertexFormat fmt, int count) {
return AllocBuffer(count, strideSizes[fmt]);
}
static uint32_t* PushAttribOffset(uint32_t* p, int index, cc_uint8* data) {
return pb_push1(p, NV097_SET_VERTEX_DATA_ARRAY_OFFSET + index * 4,
(uint32_t)data & 0x03ffffff);
}
void Gfx_BindVb(GfxResourceID vb) {
gfx_vertices = vb;
uint32_t* p = pb_begin();
// TODO: Avoid the same code twice..
if (gfx_format == VERTEX_FORMAT_TEXTURED) {
p = PushAttribOffset(p, VERTEX_ATTR_INDEX, gfx_vertices + 0);
p = PushAttribOffset(p, COLOUR_ATTR_INDEX, gfx_vertices + 12);
p = PushAttribOffset(p, TEXTURE_ATTR_INDEX, gfx_vertices + 16);
} else {
p = PushAttribOffset(p, VERTEX_ATTR_INDEX, gfx_vertices + 0);
p = PushAttribOffset(p, COLOUR_ATTR_INDEX, gfx_vertices + 12);
}
p = NV2A_set_vertex_attrib_pointer(p, VERTEX_ATTR_INDEX, gfx_vertices + 0);
p = NV2A_set_vertex_attrib_pointer(p, COLOUR_ATTR_INDEX, gfx_vertices + 12);
p = NV2A_set_vertex_attrib_pointer(p, TEXTURE_ATTR_INDEX, gfx_vertices + 16);
// Harmless to set TEXTURE_ATTR_INDEX, even when vertex format is coloured only
pb_end(p);
}
@ -627,8 +621,6 @@ void Gfx_SetVertexFormat(VertexFormat fmt) {
gfx_stride = strideSizes[fmt];
uint32_t* p = pb_begin();
// TODO not always call this. But trying to just clear TEXTURE_ATTR_INDEX breaks on XEMU
p = NV2A_reset_all_vertex_attribs(p);
if (fmt == VERTEX_FORMAT_TEXTURED) {
p = NV2A_set_vertex_attrib_format(p, VERTEX_ATTR_INDEX,
@ -642,6 +634,8 @@ void Gfx_SetVertexFormat(VertexFormat fmt) {
NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F, 3, SIZEOF_VERTEX_COLOURED);
p = NV2A_set_vertex_attrib_format(p, COLOUR_ATTR_INDEX,
NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D, 4, SIZEOF_VERTEX_COLOURED);
p = NV2A_set_vertex_attrib_format(p, TEXTURE_ATTR_INDEX,
NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F, 0, 0);
}
p = NV2A_set_program_run_offset(p, CalcProgramOffset());

View File

@ -1,12 +1,32 @@
// disables the default increment behaviour when writing multiple registers
// E.g. with pb_push4(p, REG, v1, v2, v3, v4):
// E.g. with NV2A_3D_COMMAND(cmd, 4):
// - default: REG+0 = v1, REG+4 = v2, REG+8 = v3, REG+12= v4
// - disable: REG = v1, REG = v2, REG = v3, REG = v4
#define NV2A_WRITE_SAME_REGISTER 0x40000000
#define NV2A_COMMAND(subchan, cmd, num_params) (((num_params) << 18) | ((subchan) << 13) | (cmd))
#define NV2A_3D_COMMAND(cmd, num_params) NV2A_COMMAND(SUBCH_3D, cmd, num_params)
static CC_INLINE uint32_t* NV2A_push1(uint32_t* p, int cmd, uint32_t value) {
*p++ = NV2A_3D_COMMAND(cmd, 1);
*p++ = value;
return p;
}
static CC_INLINE uint32_t* NV2A_push2(uint32_t* p, int cmd, uint32_t val1, uint32_t val2) {
*p++ = NV2A_3D_COMMAND(cmd, 2);
*p++ = val1;
*p++ = val2;
return p;
}
/*########################################################################################################################*
*-----------------------------------------------------Misc commands-------------------------------------------------------*
*#########################################################################################################################*/
static CC_INLINE uint32_t* NV2A_reset_control0(uint32_t* p) {
// resets "z perspective" flag
return pb_push1(p, NV097_SET_CONTROL0, 0);
return NV2A_push1(p, NV097_SET_CONTROL0, 0);
}
@ -15,7 +35,8 @@ static CC_INLINE uint32_t* NV2A_reset_control0(uint32_t* p) {
*#########################################################################################################################*/
static CC_INLINE uint32_t* NV2A_set_clip_rect(uint32_t* p, int x, int y, int w, int h) {
// NV097_SET_SURFACE_CLIP_HORIZONTAL, then NV097_SET_SURFACE_CLIP_VERTICAL
return pb_push2(p, NV097_SET_SURFACE_CLIP_HORIZONTAL, x | (w << 16), y | (h << 16));
return NV2A_push2(p, NV097_SET_SURFACE_CLIP_HORIZONTAL,
x | (w << 16), y | (h << 16));
}
static CC_INLINE uint32_t* NV2A_set_color_write_mask(uint32_t* p, int r, int g, int b, int a) {
@ -25,7 +46,7 @@ static CC_INLINE uint32_t* NV2A_set_color_write_mask(uint32_t* p, int r, int g,
if (b) mask |= NV097_SET_COLOR_MASK_BLUE_WRITE_ENABLE;
if (a) mask |= NV097_SET_COLOR_MASK_ALPHA_WRITE_ENABLE;
return pb_push1(p, NV097_SET_COLOR_MASK, mask);
return NV2A_push1(p, NV097_SET_COLOR_MASK, mask);
}
@ -33,11 +54,13 @@ static CC_INLINE uint32_t* NV2A_set_color_write_mask(uint32_t* p, int r, int g,
*-----------------------------------------------------State management----------------------------------------------------*
*#########################################################################################################################*/
static CC_INLINE uint32_t* NV2A_set_fog_colour(uint32_t* p, int R, int G, int B, int A) {
return pb_push1(p, NV097_SET_FOG_COLOR,
MASK(NV097_SET_FOG_COLOR_RED, R) |
MASK(NV097_SET_FOG_COLOR_GREEN, G) |
MASK(NV097_SET_FOG_COLOR_BLUE, B) |
MASK(NV097_SET_FOG_COLOR_ALPHA, A));
uint32_t mask =
MASK(NV097_SET_FOG_COLOR_RED, R) |
MASK(NV097_SET_FOG_COLOR_GREEN, G) |
MASK(NV097_SET_FOG_COLOR_BLUE, B) |
MASK(NV097_SET_FOG_COLOR_ALPHA, A);
return NV2A_push1(p, NV097_SET_FOG_COLOR, mask);
}
@ -46,11 +69,11 @@ static CC_INLINE uint32_t* NV2A_set_fog_colour(uint32_t* p, int R, int G, int B,
*#########################################################################################################################*/
static CC_INLINE uint32_t* NV2A_set_constant_upload_offset(uint32_t* p, int offset) {
// set shader constants cursor to: C0 + offset
return pb_push1(p, NV097_SET_TRANSFORM_CONSTANT_LOAD, 96 + offset);
return NV2A_push1(p, NV097_SET_TRANSFORM_CONSTANT_LOAD, 96 + offset);
}
static CC_INLINE uint32_t* NV2A_upload_constants(uint32_t* p, void* src, int num_dwords) {
pb_push(p++, NV097_SET_TRANSFORM_CONSTANT, num_dwords);
*p++ = NV2A_3D_COMMAND(NV097_SET_TRANSFORM_CONSTANT, num_dwords);
Mem_Copy(p, src, num_dwords * 4); p += num_dwords;
return p;
}
@ -60,21 +83,21 @@ static CC_INLINE uint32_t* NV2A_upload_constants(uint32_t* p, void* src, int num
*---------------------------------------------------Vertex shader programs-------------------------------------------------*
*#########################################################################################################################*/
static CC_INLINE uint32_t* NV2A_set_program_upload_offset(uint32_t* p, int offset) {
return pb_push1(p, NV097_SET_TRANSFORM_PROGRAM_LOAD, offset);
return NV2A_push1(p, NV097_SET_TRANSFORM_PROGRAM_LOAD, offset);
}
static CC_INLINE uint32_t* NV2A_upload_program(uint32_t* p, uint32_t* program, int size) {
// Copy program instructions (16 bytes each)
for (int i = 0; i < size / 16; i++, program += 4)
{
pb_push(p++, NV097_SET_TRANSFORM_PROGRAM, 4);
*p++ = NV2A_3D_COMMAND(NV097_SET_TRANSFORM_PROGRAM, 4);
Mem_Copy(p, program, 16); p += 4;
}
return p;
}
static CC_INLINE uint32_t* NV2A_set_program_run_offset(uint32_t* p, int offset) {
return pb_push1(p, NV097_SET_TRANSFORM_PROGRAM_START, offset);
return NV2A_push1(p, NV097_SET_TRANSFORM_PROGRAM_START, offset);
}
@ -86,20 +109,27 @@ static CC_INLINE uint32_t* NV2A_set_program_run_offset(uint32_t* p, int offset)
#define NV2A_MAX_INPUT_ATTRIBS 16
static uint32_t* NV2A_reset_all_vertex_attribs(uint32_t* p) {
pb_push(p++, NV097_SET_VERTEX_DATA_ARRAY_FORMAT, NV2A_MAX_INPUT_ATTRIBS);
*p++ = NV2A_3D_COMMAND(NV097_SET_VERTEX_DATA_ARRAY_FORMAT, NV2A_MAX_INPUT_ATTRIBS);
for (int i = 0; i < NV2A_MAX_INPUT_ATTRIBS; i++)
{
*(p++) = NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F;
*p++ = NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F;
}
return p;
}
static uint32_t* NV2A_set_vertex_attrib_format(uint32_t* p, int index, int format, int size, int stride) {
return pb_push1(p, NV097_SET_VERTEX_DATA_ARRAY_FORMAT + index * 4,
MASK(NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE, format) |
MASK(NV097_SET_VERTEX_DATA_ARRAY_FORMAT_SIZE, size) |
MASK(NV097_SET_VERTEX_DATA_ARRAY_FORMAT_STRIDE, stride));
uint32_t mask =
MASK(NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE, format) |
MASK(NV097_SET_VERTEX_DATA_ARRAY_FORMAT_SIZE, size) |
MASK(NV097_SET_VERTEX_DATA_ARRAY_FORMAT_STRIDE, stride);
return NV2A_push1(p, NV097_SET_VERTEX_DATA_ARRAY_FORMAT + index * 4, mask);
}
static uint32_t* NV2A_set_vertex_attrib_pointer(uint32_t* p, int index, cc_uint8* data) {
uint32_t offset = (uint32_t)data & 0x03ffffff;
return NV2A_push1(p, NV097_SET_VERTEX_DATA_ARRAY_OFFSET + index * 4, offset);
}
@ -108,16 +138,16 @@ static uint32_t* NV2A_set_vertex_attrib_format(uint32_t* p, int index, int forma
*#########################################################################################################################*/
static CC_INLINE uint32_t* NV2A_set_clear_rect(uint32_t* p, int x, int y, int w, int h) {
// Sets NV097_SET_CLEAR_RECT_HORIZONTAL then NV097_SET_CLEAR_RECT_VERTICAL
return pb_push2(p, NV097_SET_CLEAR_RECT_HORIZONTAL,
return NV2A_push2(p, NV097_SET_CLEAR_RECT_HORIZONTAL,
((x + w - 1) << 16) | x,
((y + h - 1) << 16) | y);
((y + h - 1) << 16) | y);
}
static CC_INLINE uint32_t* NV2A_set_clear_colour(uint32_t* p, uint32_t colour) {
// Sets NV097_SET_ZSTENCIL_CLEAR_VALUE then NV097_SET_COLOR_CLEAR_VALUE
return pb_push2(p, NV097_SET_ZSTENCIL_CLEAR_VALUE,
0xFFFFFF00, // (depth << 8) | stencil
colour);
return NV2A_push2(p, NV097_SET_ZSTENCIL_CLEAR_VALUE,
0xFFFFFF00, // (depth << 8) | stencil,
colour);
}
static CC_INLINE uint32_t* NV2A_start_clear(uint32_t* p, int color, int depth) {
@ -126,6 +156,6 @@ static CC_INLINE uint32_t* NV2A_start_clear(uint32_t* p, int color, int depth) {
if (depth) mask |= NV097_CLEAR_SURFACE_Z;
if (depth) mask |= NV097_CLEAR_SURFACE_STENCIL;
return pb_push1(p, NV097_CLEAR_SURFACE, mask);
return NV2A_push1(p, NV097_CLEAR_SURFACE, mask);
}