From b4989b62a136b0d69967fe897fcfd57a5b052de2 Mon Sep 17 00:00:00 2001 From: UnknownShadow200 Date: Fri, 18 Jul 2025 22:26:16 +1000 Subject: [PATCH] Revert "N64: Optimise RSP transform code, reduces time from ~13.6 to ~11.1 ms on a moderately complex world with ~3,500 quads" This reverts commit 9f939692118622088634ac8b067abb7cf8531aa4. --- misc/n64/rsp_gpu.S | 503 ++++++++++++++++++++++----------------------- 1 file changed, 241 insertions(+), 262 deletions(-) diff --git a/misc/n64/rsp_gpu.S b/misc/n64/rsp_gpu.S index bf061f210..0d9faff54 100644 --- a/misc/n64/rsp_gpu.S +++ b/misc/n64/rsp_gpu.S @@ -11,8 +11,6 @@ #define zzzzZZZZ h2 #define wwwwWWWW h3 -#define XYZ_CLIP_FLAGS 0x707 // Isolate -X/Y/Z and +X/Y/Z clipping flags - #define SCREEN_VTX_CS_POSi 0 // X, Y, Z, W (all 32-bit) #define SCREEN_VTX_CS_POSf 8 // X, Y, Z, W (all 32-bit) @@ -27,10 +25,14 @@ #define SCREEN_VTX_INVW 36 // 32-bit #define SCREEN_VTX_SIZE 40 -#define V0_OFFSET 0 * SCREEN_VTX_SIZE -#define V1_OFFSET 1 * SCREEN_VTX_SIZE -#define V2_OFFSET 2 * SCREEN_VTX_SIZE -#define V3_OFFSET 3 * SCREEN_VTX_SIZE +//0-39 same as screenvtx +#define PRIM_VTX_TRCODE 40 // trivial-reject clipping flags (against -w/+w) +#define PRIM_VTX_SIZE 48 + +#define V0_OFFSET 0 * PRIM_VTX_SIZE +#define V1_OFFSET 1 * PRIM_VTX_SIZE +#define V2_OFFSET 2 * PRIM_VTX_SIZE +#define V3_OFFSET 3 * PRIM_VTX_SIZE .data @@ -71,7 +73,7 @@ DRAW_TRI_RA: .word 0 .bss .align 3 -VERTEX_CACHE: .dcb.b SCREEN_VTX_SIZE * 4 +VERTEX_CACHE: .dcb.b PRIM_VTX_SIZE * 4 .text @@ -162,6 +164,193 @@ GPUCmd_MatrixLoad: #undef dst .endfunc + .align 3 + .func GPUCmd_DrawQuad +GPUCmd_DrawQuad: + #define vtx a0 + #define mtx_ptr s0 + #define src_ptr s4 + + #define v___ $v01 + + #define vmtx0_i $v16 // m00 m01 m02 m03 + #define vmtx0_f $v17 + #define vmtx1_i $v18 // m10 m11 m12 m13 + #define vmtx1_f $v19 + #define vmtx2_i $v20 // m20 m21 m22 m23 + #define vmtx2_f $v21 + #define vmtx3_i $v22 // m30 m31 m32 m03 + #define vmtx3_f $v23 + + #define vpos $v24 + #define vcol $v25 + #define vtex $v26 + #define vcspos_i $v28 + #define vcspos_f $v29 + + #define x e0 + #define y e1 + #define z e2 + #define w e3 + + addi src_ptr, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) - 64 + li vtx, %lo(VERTEX_CACHE) + + li mtx_ptr, %lo(GPU_MATRIX_MVP) + lqv vmtx0_i, 0x00,mtx_ptr // [m00.I m01.I m02.I m03.I m00.I m01.I m02.I m03.I] + lqv vmtx1_i, 0x10,mtx_ptr // etc + lqv vmtx2_i, 0x20,mtx_ptr + lqv vmtx3_i, 0x30,mtx_ptr + lqv vmtx0_f, 0x40,mtx_ptr + lqv vmtx1_f, 0x50,mtx_ptr + lqv vmtx2_f, 0x60,mtx_ptr + lqv vmtx3_f, 0x70,mtx_ptr + +### VERTEX 0 + ldv vpos.e0, 0, src_ptr // Load v0 X, Y, Z + ldv vpos.e4, 16, src_ptr // Load v1 X, Y, Z + + # matrix multiply + vmudn v___, vmtx0_f, vpos.xxxxXXXX + vmadh v___, vmtx0_i, vpos.xxxxXXXX + vmadn v___, vmtx1_f, vpos.yyyyYYYY + vmadh v___, vmtx1_i, vpos.yyyyYYYY + vmadn v___, vmtx2_f, vpos.zzzzZZZZ + vmadh v___, vmtx2_i, vpos.zzzzZZZZ + vmadn v___, vmtx3_f, ONE_W + vmadh vcspos_i, vmtx3_i, ONE_W + vmadn vcspos_f, vzero, vzero + + llv vcol.e0, 8, src_ptr // Load v0 RGBA + llv vtex.e0, 12, src_ptr // Load v0 U, V + llv vcol.e2, 24, src_ptr // Load v1 RGBA + llv vtex.e2, 28, src_ptr // Load v1 U, V + + # 32-bit right shift by 5, to keep the clip space coordinates unscaled + vmudm vcspos_i, vcspos_i, K2048 + vmadl vcspos_f, vcspos_f, K2048 + + slv vcol.e0, SCREEN_VTX_RGBA + V0_OFFSET, vtx + slv vtex.e0, SCREEN_VTX_S_T + V0_OFFSET, vtx + slv vcol.e2, SCREEN_VTX_RGBA + V1_OFFSET, vtx + slv vtex.e2, SCREEN_VTX_S_T + V1_OFFSET, vtx + + # Calculate and store clipping flags against CS.W. + # These will be used for trivial rejections. + vch v___, vcspos_i, vcspos_i.wwwwWWWW + vcl v___, vcspos_f, vcspos_f.wwwwWWWW + + cfc2 t0, COP2_CTRL_VCC + sdv vcspos_i.e0, SCREEN_VTX_CS_POSi + V0_OFFSET, vtx + sdv vcspos_f.e0, SCREEN_VTX_CS_POSf + V0_OFFSET, vtx + sdv vcspos_i.e4, SCREEN_VTX_CS_POSi + V1_OFFSET, vtx + sdv vcspos_f.e4, SCREEN_VTX_CS_POSf + V1_OFFSET, vtx + +###################### VERTEX 2 + ldv vpos.e0, 32, src_ptr // Load v2 X, Y, Z + ldv vpos.e4, 48, src_ptr // Load v3 X, Y, Z + andi t2, t0, 0x707 // Isolate X/Y/Z clipping flags + srl t1, t2, 5 // Shift hi flags to be aligned next to lo flags + andi t2, t2, 0x7 // Isolate lo clip flags + or t2, t1 // Merge clip flags (compressed to 6 bits) + + # matrix multiply + vmudn v___, vmtx0_f, vpos.xxxxXXXX + vmadh v___, vmtx0_i, vpos.xxxxXXXX + vmadn v___, vmtx1_f, vpos.yyyyYYYY + sb t2, (PRIM_VTX_TRCODE + V0_OFFSET)(vtx) + vmadh v___, vmtx1_i, vpos.yyyyYYYY + srl t0, t0, 4 + vmadn v___, vmtx2_f, vpos.zzzzZZZZ + andi t2, t0, 0x707 // Isolate X/Y/Z clipping flags + vmadh v___, vmtx2_i, vpos.zzzzZZZZ + srl t1, t2, 5 // Shift hi flags to be aligned next to lo flags + vmadn v___, vmtx3_f, ONE_W + andi t2, t2, 0x7 // Isolate lo clip flags + vmadh vcspos_i, vmtx3_i, ONE_W + or t2, t1 // Merge clip flags (compressed to 6 bits) + vmadn vcspos_f, vzero, vzero + sb t2, (PRIM_VTX_TRCODE + V1_OFFSET)(vtx) + + llv vcol.e4, 40, src_ptr # Load v2 RGBA + llv vtex.e4, 44, src_ptr # Load v2 U, V + llv vcol.e6, 56, src_ptr # Load v3 RGBA + llv vtex.e6, 60, src_ptr # Load v3 U, V + + # 32-bit right shift by 5, to keep the clip space coordinates unscaled + vmudm vcspos_i, vcspos_i, K2048 + vmadl vcspos_f, vcspos_f, K2048 + + slv vcol.e4, SCREEN_VTX_RGBA + V2_OFFSET, vtx + slv vtex.e4, SCREEN_VTX_S_T + V2_OFFSET, vtx + slv vcol.e6, SCREEN_VTX_RGBA + V3_OFFSET, vtx + slv vtex.e6, SCREEN_VTX_S_T + V3_OFFSET, vtx + + # Calculate and store clipping flags against CS.W. + # These will be used for trivial rejections. + vch v___, vcspos_i, vcspos_i.wwwwWWWW + vcl v___, vcspos_f, vcspos_f.wwwwWWWW + + cfc2 t0, COP2_CTRL_VCC + sdv vcspos_i.e0, SCREEN_VTX_CS_POSi + V2_OFFSET, vtx + sdv vcspos_f.e0, SCREEN_VTX_CS_POSf + V2_OFFSET, vtx + + andi t2, t0, 0x707 // Isolate X/Y/Z clipping flags + srl t1, t2, 5 // Shift hi flags to be aligned next to lo flags + andi t2, t2, 0x7 // Isolate lo clip flags + or t2, t1 // Merge clip flags (compressed to 6 bits) + sb t2, (PRIM_VTX_TRCODE + V2_OFFSET)(vtx) + +###################### VERTEX 3 + + sdv vcspos_i.e4, SCREEN_VTX_CS_POSi + V3_OFFSET, vtx + sdv vcspos_f.e4, SCREEN_VTX_CS_POSf + V3_OFFSET, vtx + + srl t0, t0, 4 + andi t2, t0, 0x707 // Isolate X/Y/Z clipping flags + srl t1, t2, 5 // Shift hi flags to be aligned next to lo flags + andi t2, t2, 0x7 // Isolate lo clip flags + or t2, t1 // Merge clip flags (compressed to 6 bits) + sb t2, (PRIM_VTX_TRCODE + V3_OFFSET)(vtx) + + # now do the actual drawing + li a1, %lo(VERTEX_CACHE) + V0_OFFSET + li a2, %lo(VERTEX_CACHE) + V1_OFFSET + jal GPUCmd_DrawTriangle + li a3, %lo(VERTEX_CACHE) + V2_OFFSET + + li a1, %lo(VERTEX_CACHE) + V0_OFFSET + li a2, %lo(VERTEX_CACHE) + V2_OFFSET + jal GPUCmd_DrawTriangle + li a3, %lo(VERTEX_CACHE) + V3_OFFSET + + j RSPQ_Loop + nop + #undef src_ptr + #undef vtx + + #undef x + #undef y + #undef z + #undef w + + #undef v___ + + #undef vmtx0_i + #undef vmtx0_f + #undef vmtx1_i + #undef vmtx1_f + #undef vmtx2_i + #undef vmtx2_f + #undef vmtx3_i + #undef vmtx3_f + + #undef vpos + #undef vcspos_i + #undef vcspos_f + + .endfunc + ################################################################ # GL_CalcScreenSpace # @@ -318,12 +507,18 @@ GL_TnL: #undef vst_i #undef vst_f + lbu t0, PRIM_VTX_TRCODE(vtx) slv vst, SCREEN_VTX_S_T, vtx ldv vcspos_f, SCREEN_VTX_CS_POSf,vtx - jal GL_CalcScreenSpace ldv vcspos_i, SCREEN_VTX_CS_POSi,vtx + # Mark this vertex as having T&L applied + ori t0, 0x80 + + jal GL_CalcScreenSpace + sb t0, PRIM_VTX_TRCODE(vtx) + j GL_CalcClipCodes move ra, ra2 @@ -342,238 +537,53 @@ GL_TnL: .endfunc - .align 3 - .func GPUCmd_DrawQuad -GPUCmd_DrawQuad: - #define vtx a0 - #define mtx_ptr s0 - #define src_ptr s4 + .func GPUCmd_DrawTriangle +GPUCmd_DrawTriangle: + #define vtx1 a1 + #define vtx2 a2 + #define vtx3 a3 + #define trcode1 t6 + #define trcode2 t7 + #define trcode3 t8 + sw ra, %lo(DRAW_TRI_RA) # TODO find a register for this - #define v___ $v01 - - #define vmtx0_i $v16 // m00 m01 m02 m03 - #define vmtx0_f $v17 - #define vmtx1_i $v18 // m10 m11 m12 m13 - #define vmtx1_f $v19 - #define vmtx2_i $v20 // m20 m21 m22 m23 - #define vmtx2_f $v21 - #define vmtx3_i $v22 // m30 m31 m32 m03 - #define vmtx3_f $v23 - - #define vpos $v24 - #define vcol $v25 - #define vtex $v26 - #define vcspos_i $v28 - #define vcspos_f $v29 - - #define tmp t0 - #define v0_cflags t1 - #define v1_cflags t2 - #define v2_cflags t3 - #define v3_cflags t4 - - addi src_ptr, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) - 64 - li vtx, %lo(VERTEX_CACHE) - - li mtx_ptr, %lo(GPU_MATRIX_MVP) - lqv vmtx0_i, 0x00,mtx_ptr // [m00.I m01.I m02.I m03.I m00.I m01.I m02.I m03.I] - lqv vmtx1_i, 0x10,mtx_ptr // etc - lqv vmtx2_i, 0x20,mtx_ptr - lqv vmtx3_i, 0x30,mtx_ptr - lqv vmtx0_f, 0x40,mtx_ptr - lqv vmtx1_f, 0x50,mtx_ptr - lqv vmtx2_f, 0x60,mtx_ptr - lqv vmtx3_f, 0x70,mtx_ptr - -// ######################## -// Vertex 0 and 1 transform -// ######################## - ldv vpos.e0, 0, src_ptr // Load v0 X, Y, Z - ldv vpos.e4, 16, src_ptr // Load v1 X, Y, Z - - // matrix multiply - vmudn v___, vmtx0_f, vpos.xxxxXXXX - vmadh v___, vmtx0_i, vpos.xxxxXXXX - vmadn v___, vmtx1_f, vpos.yyyyYYYY - vmadh v___, vmtx1_i, vpos.yyyyYYYY - vmadn v___, vmtx2_f, vpos.zzzzZZZZ - vmadh v___, vmtx2_i, vpos.zzzzZZZZ - vmadn v___, vmtx3_f, ONE_W - vmadh vcspos_i, vmtx3_i, ONE_W - vmadn vcspos_f, vzero, vzero - - llv vcol.e0, 8, src_ptr // Load v0 RGBA - llv vtex.e0, 12, src_ptr // Load v0 U, V - llv vcol.e2, 24, src_ptr // Load v1 RGBA - llv vtex.e2, 28, src_ptr // Load v1 U, V - - # 32-bit right shift by 5, to keep the clip space coordinates unscaled - vmudm vcspos_i, vcspos_i, K2048 - vmadl vcspos_f, vcspos_f, K2048 - - slv vcol.e0, SCREEN_VTX_RGBA + V0_OFFSET, vtx - slv vtex.e0, SCREEN_VTX_S_T + V0_OFFSET, vtx - slv vcol.e2, SCREEN_VTX_RGBA + V1_OFFSET, vtx - slv vtex.e2, SCREEN_VTX_S_T + V1_OFFSET, vtx - - # Calculate and store clipping flags against CS.W. - # These will be used for trivial rejections. - vch v___, vcspos_i, vcspos_i.wwwwWWWW - vcl v___, vcspos_f, vcspos_f.wwwwWWWW - - cfc2 tmp, COP2_CTRL_VCC - sdv vcspos_i.e0, SCREEN_VTX_CS_POSi + V0_OFFSET, vtx - sdv vcspos_f.e0, SCREEN_VTX_CS_POSf + V0_OFFSET, vtx - sdv vcspos_i.e4, SCREEN_VTX_CS_POSi + V1_OFFSET, vtx - sdv vcspos_f.e4, SCREEN_VTX_CS_POSf + V1_OFFSET, vtx - -// ######################## -// Vertex 2 and 3 transform -// ######################## - ldv vpos.e0, 32, src_ptr // Load v2 X, Y, Z - ldv vpos.e4, 48, src_ptr // Load v3 X, Y, Z - - andi v0_cflags, tmp, XYZ_CLIP_FLAGS - srl tmp, tmp, 4 - andi v1_cflags, tmp, XYZ_CLIP_FLAGS - - # matrix multiply - vmudn v___, vmtx0_f, vpos.xxxxXXXX - vmadh v___, vmtx0_i, vpos.xxxxXXXX - vmadn v___, vmtx1_f, vpos.yyyyYYYY - vmadh v___, vmtx1_i, vpos.yyyyYYYY - vmadn v___, vmtx2_f, vpos.zzzzZZZZ - vmadh v___, vmtx2_i, vpos.zzzzZZZZ - vmadn v___, vmtx3_f, ONE_W - vmadh vcspos_i, vmtx3_i, ONE_W - vmadn vcspos_f, vzero, vzero - - llv vcol.e4, 40, src_ptr # Load v2 RGBA - llv vtex.e4, 44, src_ptr # Load v2 U, V - llv vcol.e6, 56, src_ptr # Load v3 RGBA - llv vtex.e6, 60, src_ptr # Load v3 U, V - - # 32-bit right shift by 5, to keep the clip space coordinates unscaled - vmudm vcspos_i, vcspos_i, K2048 - vmadl vcspos_f, vcspos_f, K2048 - - slv vcol.e4, SCREEN_VTX_RGBA + V2_OFFSET, vtx - slv vtex.e4, SCREEN_VTX_S_T + V2_OFFSET, vtx - slv vcol.e6, SCREEN_VTX_RGBA + V3_OFFSET, vtx - slv vtex.e6, SCREEN_VTX_S_T + V3_OFFSET, vtx - - # Calculate and store clipping flags against CS.W. - # These will be used for trivial rejections. - vch v___, vcspos_i, vcspos_i.wwwwWWWW - vcl v___, vcspos_f, vcspos_f.wwwwWWWW - - cfc2 tmp, COP2_CTRL_VCC - sdv vcspos_i.e0, SCREEN_VTX_CS_POSi + V2_OFFSET, vtx - sdv vcspos_f.e0, SCREEN_VTX_CS_POSf + V2_OFFSET, vtx - sdv vcspos_i.e4, SCREEN_VTX_CS_POSi + V3_OFFSET, vtx - sdv vcspos_f.e4, SCREEN_VTX_CS_POSf + V3_OFFSET, vtx - - andi v2_cflags, tmp, XYZ_CLIP_FLAGS - srl tmp, tmp, 4 - andi v3_cflags, tmp, XYZ_CLIP_FLAGS - #undef src_ptr - #undef vtx - #undef v___ - - #undef vmtx0_i - #undef vmtx0_f - #undef vmtx1_i - #undef vmtx1_f - #undef vmtx2_i - #undef vmtx2_f - #undef vmtx3_i - #undef vmtx3_f - - #undef vpos - #undef vcspos_i - #undef vcspos_f - - #define vtx1 a1 - #define vtx2 a2 - #define vtx3 a3 - #define vtx4 a0 - -// ######################## -// Trivial rejection check -// ######################## - // If for any plane, all 4 vertices are outside the plane, - // then the quad is out of the viewport and can be trivially rejected - and tmp, v0_cflags, v1_cflags - and tmp, v2_cflags - and tmp, v3_cflags - bnez tmp, JrRa + # Trivial reject: if all the vertices are out of the same plane (at least one), + # the triangle is out of the viewport. + # NOTE: This deliberately uses lb instead of lbu so the sign bit is extended. + # The MSB of each TR-code is a bit flag that is set if the vertex has already + # had T&L applied once. + lb trcode1, PRIM_VTX_TRCODE(vtx1) + lb trcode2, PRIM_VTX_TRCODE(vtx2) + lb trcode3, PRIM_VTX_TRCODE(vtx3) + and t0, trcode1, trcode2 + and t0, trcode3 + andi t0, 0x3F + bnez t0, JrRa nop -// ######################## -// Perform rest of T&L -// ######################## - jal GL_TnL - li s3, %lo(VERTEX_CACHE) + V0_OFFSET - jal GL_TnL - li s3, %lo(VERTEX_CACHE) + V1_OFFSET - jal GL_TnL - li s3, %lo(VERTEX_CACHE) + V2_OFFSET - jal GL_TnL - li s3, %lo(VERTEX_CACHE) + V3_OFFSET + # Perform T&L for each vertex if we haven't already + bgezal trcode1, GL_TnL + move s3, vtx1 -// ######################## -// Guardband check -// ######################## - // Check if all vertices fit within guardband - lbu v0_cflags, (%lo(VERTEX_CACHE) + V0_OFFSET + SCREEN_VTX_CLIP_CODE)(zero) - lbu v1_cflags, (%lo(VERTEX_CACHE) + V1_OFFSET + SCREEN_VTX_CLIP_CODE)(zero) - lbu v2_cflags, (%lo(VERTEX_CACHE) + V2_OFFSET + SCREEN_VTX_CLIP_CODE)(zero) - lbu v3_cflags, (%lo(VERTEX_CACHE) + V3_OFFSET + SCREEN_VTX_CLIP_CODE)(zero) + bgezal trcode2, GL_TnL + move s3, vtx2 - or tmp, v0_cflags, v1_cflags - or tmp, v2_cflags - or tmp, v3_cflags + bgezal trcode3, GL_TnL + move s3, vtx3 - beqz tmp, DrawQuadForRDP - nop - - #undef tmp - #undef v0_cflags - #undef v1_cflags - #undef v2_cflags - #undef v3_cflags - -// ######################## -// Clipped triangle path -// ######################## - // If not, go with slow clipping path - - # now do the actual drawing - li a1, %lo(VERTEX_CACHE) + V0_OFFSET - li a2, %lo(VERTEX_CACHE) + V1_OFFSET - jal DrawClippedTriangle - li a3, %lo(VERTEX_CACHE) + V2_OFFSET - - li a1, %lo(VERTEX_CACHE) + V0_OFFSET - li a2, %lo(VERTEX_CACHE) + V2_OFFSET - jal DrawClippedTriangle - li a3, %lo(VERTEX_CACHE) + V3_OFFSET - - j RSPQ_Loop - nop -.endfunc - - ################################################################ - # DrawClippedTriangle - Breaks a triangle into one or more clipped tris - ################################################################ -.func DrawClippedTriangle -DrawClippedTriangle: - sw ra, %lo(DRAW_TRI_RA) // TODO find a register for this + lbu t0, SCREEN_VTX_CLIP_CODE(vtx1) + lbu t1, SCREEN_VTX_CLIP_CODE(vtx2) + lbu t2, SCREEN_VTX_CLIP_CODE(vtx3) + or t5, t0, t1 + or t5, t2 move s1, zero - jal GL_ClipTriangle + beqz t5, gl_draw_single_triangle move s2, zero + jal GL_ClipTriangle + nop + beqz v1, gl_draw_triangle_end addi s2, -6 lhu s5, 0(s1) @@ -602,42 +612,11 @@ gl_draw_triangle_end: lw ra, %lo(DRAW_TRI_RA) jr ra nop -.endfunc - - ################################################################ - # DrawQuadForRDP - Draws two triangles for a quad - ################################################################ -.func DrawQuadForRDP -DrawQuadForRDP: - lhu a0, %lo(GL_TRI_CMD) - lh v0, %lo(GL_TRI_CULL) - li a1, %lo(VERTEX_CACHE) + V0_OFFSET + SCREEN_VTX_X - li a2, %lo(VERTEX_CACHE) + V1_OFFSET + SCREEN_VTX_X - li a3, %lo(VERTEX_CACHE) + V2_OFFSET + SCREEN_VTX_X - jal RDPQ_Triangle - li s3, %lo(RDPQ_CMD_STAGING) - - jal RDPQ_Send - li s4, %lo(RDPQ_CMD_STAGING) - - lhu a0, %lo(GL_TRI_CMD) - lh v0, %lo(GL_TRI_CULL) - li a1, %lo(VERTEX_CACHE) + V2_OFFSET + SCREEN_VTX_X - li a2, %lo(VERTEX_CACHE) + V3_OFFSET + SCREEN_VTX_X - li a3, %lo(VERTEX_CACHE) + V0_OFFSET + SCREEN_VTX_X - jal RDPQ_Triangle - li s3, %lo(RDPQ_CMD_STAGING) - - jal RDPQ_Send - li s4, %lo(RDPQ_CMD_STAGING) - - j RSPQ_Loop - nop -.endfunc #undef vtx1 #undef vtx2 #undef vtx3 + .endfunc #include "rsp_gpu_clipping.inc" #include