#define CLIPPING_PLANE_COUNT 6 #define CLIPPING_CACHE_SIZE 10 #define CLIPPING_PLANE_SIZE 8 .section .data.gl_clipping .align 4 CLIP_PLANES: .half 1, 0, 0, GUARD_BAND_FACTOR .half 0, 1, 0, GUARD_BAND_FACTOR .half 0, 0, 1, 1 .half 1, 0, 0, -GUARD_BAND_FACTOR .half 0, 1, 0, -GUARD_BAND_FACTOR .half 0, 0, 1, -1 .align 4 CACHE_OFFSETS: .half 2,4,6,8, 10,12,14,16, 18,20 .section .bss.gl_clipping CLIP_CACHE: .dcb.b SCREEN_VTX_SIZE * CLIPPING_CACHE_SIZE CLIP_CACHE_END: CLIP_LISTS: CLIP_LIST0: .dcb.w CLIPPING_CACHE_SIZE CLIP_LIST1: .dcb.w CLIPPING_CACHE_SIZE .section .text.gl_clipping ################################################################ # GL_ClipTriangle # Clip a triangle against the view-frustum by using the Sutherland-Hodgman algorithm # https://en.wikipedia.org/wiki/Sutherland%E2%80%93Hodgman_algorithm # Args: # a1-a3,a0 = Vertices # t5 = OR'd clip flags of the triangle's vertices # Returns: # s1 = Pointer to list of output vertices # s2 = Pointer to end of list ################################################################ .func GL_ClipTriangle GL_ClipTriangle: #define out_count v1 #define clip_flags t5 #define plane_flag t6 #define in_count t7 #define in_end t8 #define in_list v0 #define out_list s1 #define plane s2 #define intersection s3 #define cur_ptr a0 #define prev_ptr a1 #define cur_vtx a2 #define prev_vtx a3 #define p0 k0 #define p1 k1 #define vtx1 a1 #define vtx2 a2 #define vtx3 a3 #define vtx4 a0 #define vplane $v01 #define vint_f $v02 #define vint_i $v03 #define vdot_i $v04 #define vdot_f $v05 #define vdiff_i $v06 #define vdiff_f $v07 #define va_i $v08 #define va_f $v09 #define vpos_i $v10 #define vpos_f $v11 #define vattr0 $v12 #define vattr1 $v13 #define voff0 $v14 #define voff1 $v15 #define vcache0 $v16 #define vcache1 $v17 // v18,v19 - reserved for viewport // v20,v21 - reserved for vguard #define vguard_f $v27 #define vguard_i $v28 #define v__ $v29 move ra2, ra # Init in_list as empty li in_list, %lo(CLIP_LIST0) move in_count, zero # Put four original vertices in the out_list # (So after the initial swap they will be in the in_list) li out_list, %lo(CLIP_LIST1) sh vtx1, 0(out_list) sh vtx2, 2(out_list) sh vtx3, 4(out_list) sh vtx4, 6(out_list) li out_count, 4*2 #undef vtx1 #undef vtx2 #undef vtx3 #undef vtx4 li plane, %lo(CLIP_PLANES) li plane_flag, 1 # Load cache offsets li t0, %lo(CACHE_OFFSETS) vxor voff1, voff1 lqv voff0, 0,t0 ldv voff1, 16,t0 # Temporarily use the RDP staging area as a map of which cache slots are used # Init to zero li t0, %lo(RDPQ_CMD_STAGING) sqv vzero, 0,t0 sqv vzero, 16,t0 # Iterate over the 6 clipping planes gl_clip_plane_loop: and t0, clip_flags, plane_flag beqz t0, gl_clip_plane_loop_end move t1, in_list # Swap in and out lists # If the out list is empty from the last iteration, # the triangle has no visible points and we are done beqz out_count, gl_clip_return move in_list, out_list move out_list, t1 move in_count, out_count move out_count, zero # Iterate over the egdes of the polygon in the input list # The current edge is between cur_vtx and prev_vtx move cur_ptr, in_list add in_end, in_list, in_count # Init the "previous" vertex to the last in the list for the wrap-around addi prev_ptr, in_end, -2 gl_clip_edge_loop: #define cur_flag t3 #define prev_flag t4 # Check which side of the plane the two vertices are on lhu cur_vtx, 0(cur_ptr) lhu prev_vtx, 0(prev_ptr) lbu cur_flag, SCREEN_VTX_CLIP_CODE(cur_vtx) lbu prev_flag, SCREEN_VTX_CLIP_CODE(prev_vtx) and cur_flag, plane_flag and prev_flag, plane_flag # If they are on opposite sides, there is an intersection xor t0, cur_flag, prev_flag beqz t0, gl_clip_no_intersection move p0, cur_vtx # Swap the two points if necessary to make intersection calculation consistent # This will make sure p0 is always inside and p1 is always outside bnez prev_flag, gl_clip_no_swap move p1, prev_vtx xor p0, p0, p1 xor p1, p0, p1 xor p0, p0, p1 #undef prev_flag gl_clip_no_swap: # Calculate intersection of the line segment and the plane li t0, %lo(RDPQ_CMD_STAGING) lqv vcache0, 0,t0 lqv vcache1, 16,t0 # Repeat plane coefficients twice ldv vplane.e0, 0,plane ldv vplane.e4, 0,plane # vpos: x0 y0 z0 w0 x1 y1 z1 w1 ldv vpos_i.e0, SCREEN_VTX_CS_POSi,p0 ldv vpos_f.e0, SCREEN_VTX_CS_POSf,p0 ldv vpos_i.e4, SCREEN_VTX_CS_POSi,p1 ldv vpos_f.e4, SCREEN_VTX_CS_POSf,p1 # vint: x1 y1 z1 w1 ldv vint_i.e0, SCREEN_VTX_CS_POSi,p1 ldv vint_f.e0, SCREEN_VTX_CS_POSf,p1 # vattr0: r0 g0 b0 a0 s0 t0 luv vattr0.e0, SCREEN_VTX_RGBA ,p0 llv vattr0.e4, SCREEN_VTX_S_T ,p0 # vattr1: r1 g1 b1 a1 s1 t1 luv vattr1.e0, SCREEN_VTX_RGBA ,p1 llv vattr1.e4, SCREEN_VTX_S_T ,p1 # Find first free slot in clip cache # Add the values from the "used slots map" to the cache offsets # After this, each lane will contain the offset of its corresponding cache slot, # but only if the slot is not used. If it is used, it will contain some large value. vaddc vcache0, voff0 vaddc vcache1, voff1 # Look for the smallest value, which will end up in vcache.e0 # Because used slots are marked as large values, they will never be found. vlt vcache0, vcache0.q1 vlt vcache0, vcache0.h2 vlt vcache0, vcache0.e4 vlt vcache0, vcache1.e0 vlt vcache0, vcache1.e1 mfc2 t0, vcache0.e0 # Mark slot as used by storing some large value (careful of overflows!) li t1, 0xFF sh t1, %lo(RDPQ_CMD_STAGING)-2(t0) # t0 is the index multiplied by 2 # intersection = t0 * 20 = t0 * 16 + t0 * 4 sll intersection, t0, 4 sll t1, t0, 2 add intersection, t1 # CAUTION: intersection might point to the same address as either p0 or p1, # because one of them is the previous point, which could have been marked unused # in the previous iteration. As long as we don't access p0 or p1 after writing to # intersection, this is fine. addi intersection, %lo(CLIP_CACHE) - SCREEN_VTX_SIZE # Store the cache offset in unused memory (used later when finding the cache slot to mark as unused) sb t0, SCREEN_VTX_PADDING(intersection) # Compute dot products of both positions with the clip plane # vdot.e0: d0 = dot(p0, plane) # vdot.e4: d1 = dot(p1, plane) vmudn vdot_f, vpos_f, vplane vmadh vdot_i, vpos_i, vplane vaddc vdot_f, vdot_f.q1 vadd vdot_i, vdot_i.q1 vaddc vdot_f, vdot_f.h2 vadd vdot_i, vdot_i.h2 # d0 - d1 vsubc vdiff_f, vdot_f, vdot_f.e4 vsub vdiff_i, vdot_i, vdot_i.e4 # 1 / (d0 - d1) vrcph v__.e0, vdiff_i.e0 vrcpl va_f.e0, vdiff_f.e0 vrcph va_i.e0, vzero.e0 # a = d0 / (d0 - d1) vmudl v__, va_f, vdot_f.e0 vmadm v__, va_i, vdot_f.e0 vmadn va_f, va_f, vdot_i.e0 # Prepare 0x7FFF in va_i.e0 vsubc va_i, vshift8, K1 # a = min(a, 1) vge v__, va_f, vzero vmrg va_f, va_f, va_i.e0 # Account for right shift introduced by vrcp vmudn va_f, va_f, K2 # p1 - p0 vsubc vint_f, vpos_f vsub vint_i, vpos_i # attr1 - attr0 vsubc vattr1, vattr0 # Result of linear interpolation: # p0 + a * (p1 - p0) vmudl v__, vint_f, va_f.e0 vmadm v__, vint_i, va_f.e0 vmadn vint_f, vpos_f, K1 vmadh vint_i, vpos_i, K1 # a * (attr1 - attr0) vmudm vattr1, vattr1, va_f.e0 # attr0 + a * (attr1 - attr0) vaddc vattr0, vattr1 # Store results sdv vint_i.e0, SCREEN_VTX_CS_POSi,intersection sdv vint_f.e0, SCREEN_VTX_CS_POSf,intersection suv vattr0.e0, SCREEN_VTX_RGBA ,intersection slv vattr0.e4, SCREEN_VTX_S_T ,intersection # Update clip flags vmudn vguard_f, vint_f, vguardscale // vint_f is vcspos_f vmadh vguard_i, vint_i, vguardscale // vint_i is vcspos_i vch v__, vguard_i, vguard_i.e3 // w vcl v__, vguard_f, vguard_f.e3 // w cfc2 t0, COP2_CTRL_VCC compressClipCodes sb t2, SCREEN_VTX_CLIP_CODE(intersection) # Add intersection to the output list add t0, out_list, out_count sh intersection, 0(t0) addi out_count, 2 gl_clip_no_intersection: # If cur_vtx is inside, add it to the output list bnez cur_flag, gl_clip_no_current add t0, out_list, out_count sh cur_vtx, 0(t0) b gl_clip_edge_loop_end addi out_count, 2 #undef cur_flag gl_clip_no_current: # Check if the vertex is stored in the clip cache lbu t0, SCREEN_VTX_PADDING(cur_vtx) beqz t0, gl_clip_edge_loop_end # Reset the padding field to zero, so the screen space values won't be recalculated below sb zero, SCREEN_VTX_PADDING(cur_vtx) # If so, mark it as unused sh zero, %lo(RDPQ_CMD_STAGING)-2(t0) gl_clip_edge_loop_end: # Advance to the next edge addi cur_ptr, 2 blt cur_ptr, in_end, gl_clip_edge_loop addi prev_ptr, cur_ptr, -2 gl_clip_plane_loop_end: # Advance to the next clipping plane sll plane_flag, 1 blt plane_flag, (1<