#define CLIPPING_PLANE_COUNT 6 #define CLIPPING_CACHE_SIZE 9 #define CLIPPING_PLANE_SIZE 8 .section .data.gl_clipping .align 4 CLIP_PLANES: .half 1, 0, 0, GUARD_BAND_FACTOR .half 0, 1, 0, GUARD_BAND_FACTOR .half 0, 0, 1, 1 .half 1, 0, 0, -GUARD_BAND_FACTOR .half 0, 1, 0, -GUARD_BAND_FACTOR .half 0, 0, 1, -1 .align 4 CACHE_OFFSETS: .half 2,4,6,8,10,12,14,16,18 .section .bss.gl_clipping CLIP_CACHE: .dcb.b SCREEN_VTX_SIZE * CLIPPING_CACHE_SIZE CLIP_CACHE_END: CLIP_LISTS: CLIP_LIST0: .dcb.w CLIPPING_CACHE_SIZE CLIP_LIST1: .dcb.w CLIPPING_CACHE_SIZE .section .text.gl_clipping ################################################################ # GL_ClipTriangle # Clip a triangle against the view-frustum by using the Sutherland-Hodgman algorithm # https://en.wikipedia.org/wiki/Sutherland%E2%80%93Hodgman_algorithm # Args: # a1-a3 = Vertices # t5 = OR'd clip flags of the triangle's vertices # Returns: # s1 = Pointer to list of output vertices # s2 = Pointer to end of list ################################################################ .func GL_ClipTriangle GL_ClipTriangle: #define out_count v1 #define clip_flags t5 #define plane_flag t6 #define in_count t7 #define in_end t8 #define in_list s0 #define out_list s1 #define plane s2 #define intersection s3 #define cur_ptr s4 #define prev_ptr s5 #define cur_vtx s6 #define prev_vtx s7 #define p0 k0 #define p1 k1 #define vtx1 a1 #define vtx2 a2 #define vtx3 a3 #define vplane $v01 #define vint_f $v02 #define vint_i $v03 #define vdot_i $v04 #define vdot_f $v05 #define vdiff_i $v06 #define vdiff_f $v07 #define va_i $v08 #define va_f $v09 #define vpos_i $v10 #define vpos_f $v11 #define vattr0 $v12 #define vattr1 $v13 #define voff0 $v14 #define voff1 $v15 #define vcache0 $v16 #define vcache1 $v17 #define v__ $v29 move ra2, ra # Init in_list as empty li in_list, %lo(CLIP_LIST0) move in_count, zero # Put three original vertices in the out_list # (So after the initial swap they will be in the in_list) li out_list, %lo(CLIP_LIST1) sh vtx1, 0(out_list) sh vtx2, 2(out_list) sh vtx3, 4(out_list) li out_count, 3*2 li plane, %lo(CLIP_PLANES) li plane_flag, 1 # Load cache offsets li t0, %lo(CACHE_OFFSETS) vxor voff1, voff1 lqv voff0, 0,t0 lsv voff1, 16,t0 # Temporarily use the RDP staging area as a map of which cache slots are used # Init to zero li t0, %lo(RDPQ_CMD_STAGING) sqv vzero, 0,t0 sqv vzero, 16,t0 # Iterate over the 6 clipping planes gl_clip_plane_loop: and t0, clip_flags, plane_flag beqz t0, gl_clip_plane_loop_end move t1, in_list # Swap in and out lists # If the out list is empty from the last iteration, # the triangle has no visible points and we are done beqz out_count, gl_clip_return move in_list, out_list move out_list, t1 move in_count, out_count move out_count, zero # Iterate over the egdes of the polygon in the input list # The current edge is between cur_vtx and prev_vtx move cur_ptr, in_list add in_end, in_list, in_count # Init the "previous" vertex to the last in the list for the wrap-around addi prev_ptr, in_end, -2 gl_clip_edge_loop: #define cur_flag t3 #define prev_flag t4 # Check which side of the plane the two vertices are on lhu cur_vtx, 0(cur_ptr) lhu prev_vtx, 0(prev_ptr) lbu cur_flag, SCREEN_VTX_CLIP_CODE(cur_vtx) lbu prev_flag, SCREEN_VTX_CLIP_CODE(prev_vtx) and cur_flag, plane_flag and prev_flag, plane_flag # If they are on opposite sides, there is an intersection xor t0, cur_flag, prev_flag beqz t0, gl_clip_no_intersection move p0, cur_vtx # Swap the two points if necessary to make intersection calculation consistent # This will make sure p0 is always inside and p1 is always outside bnez prev_flag, gl_clip_no_swap move p1, prev_vtx xor p0, p0, p1 xor p1, p0, p1 xor p0, p0, p1 #undef prev_flag gl_clip_no_swap: # Calculate intersection of the line segment and the plane li t0, %lo(RDPQ_CMD_STAGING) lqv vcache0, 0,t0 lqv vcache1, 16,t0 # Repeat plane coefficients twice ldv vplane.e0, 0,plane ldv vplane.e4, 0,plane # vpos: x0 y0 z0 w0 x1 y1 z1 w1 ldv vpos_i.e0, SCREEN_VTX_CS_POSi,p0 ldv vpos_f.e0, SCREEN_VTX_CS_POSf,p0 ldv vpos_i.e4, SCREEN_VTX_CS_POSi,p1 ldv vpos_f.e4, SCREEN_VTX_CS_POSf,p1 # vint: x1 y1 z1 w1 ldv vint_i.e0, SCREEN_VTX_CS_POSi,p1 ldv vint_f.e0, SCREEN_VTX_CS_POSf,p1 # vattr0: r0 g0 b0 a0 s0 t0 luv vattr0.e0, SCREEN_VTX_RGBA ,p0 llv vattr0.e4, SCREEN_VTX_S_T ,p0 # vattr1: r1 g1 b1 a1 s1 t1 luv vattr1.e0, SCREEN_VTX_RGBA ,p1 llv vattr1.e4, SCREEN_VTX_S_T ,p1 # Find first free slot in clip cache # Add the values from the "used slots map" to the cache offsets # After this, each lane will contain the offset of its corresponding cache slot, # but only if the slot is not used. If it is used, it will contain some large value. vaddc vcache0, voff0 vaddc vcache1, voff1 # Look for the smallest value, which will end up in vcache.e0 # Because used slots are marked as large values, they will never be found. vlt vcache0, vcache0.q1 vlt vcache0, vcache0.h2 vlt vcache0, vcache0.e4 vlt vcache0, vcache1.e0 mfc2 t0, vcache0.e0 # Mark slot as used by storing some large value (careful of overflows!) li t1, 0xFF sh t1, %lo(RDPQ_CMD_STAGING)-2(t0) # t0 is the index multiplied by 2 # intersection = t0 * 20 = t0 * 16 + t0 * 4 sll intersection, t0, 4 sll t1, t0, 2 add intersection, t1 # CAUTION: intersection might point to the same address as either p0 or p1, # because one of them is the previous point, which could have been marked unused # in the previous iteration. As long as we don't access p0 or p1 after writing to # intersection, this is fine. addi intersection, %lo(CLIP_CACHE) - SCREEN_VTX_SIZE # Store the cache offset in unused memory (used later when finding the cache slot to mark as unused) sb t0, SCREEN_VTX_PADDING(intersection) # Compute dot products of both positions with the clip plane # vdot.e0: d0 = dot(p0, plane) # vdot.e4: d1 = dot(p1, plane) vmudn vdot_f, vpos_f, vplane vmadh vdot_i, vpos_i, vplane vaddc vdot_f, vdot_f.q1 vadd vdot_i, vdot_i.q1 vaddc vdot_f, vdot_f.h2 vadd vdot_i, vdot_i.h2 # d0 - d1 vsubc vdiff_f, vdot_f, vdot_f.e4 vsub vdiff_i, vdot_i, vdot_i.e4 # 1 / (d0 - d1) vrcph v__.e0, vdiff_i.e0 vrcpl va_f.e0, vdiff_f.e0 vrcph va_i.e0, vzero.e0 # a = d0 / (d0 - d1) vmudl v__, va_f, vdot_f.e0 vmadm v__, va_i, vdot_f.e0 vmadn va_f, va_f, vdot_i.e0 # Prepare 0x7FFF in va_i.e0 vsubc va_i, vshift8, K1 # a = min(a, 1) vge v__, va_f, vzero vmrg va_f, va_f, va_i.e0 # Account for right shift introduced by vrcp vmudn va_f, va_f, K2 # p1 - p0 vsubc vint_f, vpos_f vsub vint_i, vpos_i # attr1 - attr0 vsubc vattr1, vattr0 # Result of linear interpolation: # p0 + a * (p1 - p0) vmudl v__, vint_f, va_f.e0 vmadm v__, vint_i, va_f.e0 vmadn vint_f, vpos_f, K1 vmadh vint_i, vpos_i, K1 # a * (attr1 - attr0) vmudm vattr1, vattr1, va_f.e0 # attr0 + a * (attr1 - attr0) vaddc vattr0, vattr1 # Store results sdv vint_i.e0, SCREEN_VTX_CS_POSi,intersection sdv vint_f.e0, SCREEN_VTX_CS_POSf,intersection suv vattr0.e0, SCREEN_VTX_RGBA ,intersection jal GL_CalcClipCodes slv vattr0.e4, SCREEN_VTX_S_T ,intersection # Add intersection to the output list add t0, out_list, out_count sh intersection, 0(t0) addi out_count, 2 gl_clip_no_intersection: # If cur_vtx is inside, add it to the output list bnez cur_flag, gl_clip_no_current add t0, out_list, out_count sh cur_vtx, 0(t0) b gl_clip_edge_loop_end addi out_count, 2 #undef cur_flag gl_clip_no_current: # Check if the vertex is stored in the clip cache lbu t0, SCREEN_VTX_PADDING(cur_vtx) beqz t0, gl_clip_edge_loop_end # Reset the padding field to zero, so the screen space values won't be recalculated below sb zero, SCREEN_VTX_PADDING(cur_vtx) # If so, mark it as unused sh zero, %lo(RDPQ_CMD_STAGING)-2(t0) gl_clip_edge_loop_end: # Advance to the next edge addi cur_ptr, 2 blt cur_ptr, in_end, gl_clip_edge_loop addi prev_ptr, cur_ptr, -2 gl_clip_plane_loop_end: # Advance to the next clipping plane sll plane_flag, 1 blt plane_flag, (1<