diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/i386/block16.h | 123 | ||||
-rw-r--r-- | src/i386/block8.h | 124 | ||||
-rw-r--r-- | src/i386/qasm.h | 285 | ||||
-rw-r--r-- | src/i386/r_spr8.s | 879 | ||||
-rw-r--r-- | src/i386/r_varsa.s | 66 |
5 files changed, 0 insertions, 1477 deletions
diff --git a/src/i386/block16.h b/src/i386/block16.h deleted file mode 100644 index 98a1cf7..0000000 --- a/src/i386/block16.h +++ /dev/null @@ -1,123 +0,0 @@ -LEnter16_16: - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movw 0x12345678(,%eax,2),%ax -LBPatch0: - addl %ebp,%edx - movw %ax,(%edi) - movw 0x12345678(,%ecx,2),%cx -LBPatch1: - movw %cx,2(%edi) - addl $0x4,%edi - - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movw 0x12345678(,%eax,2),%ax -LBPatch2: - addl %ebp,%edx - movw %ax,(%edi) - movw 0x12345678(,%ecx,2),%cx -LBPatch3: - movw %cx,2(%edi) - addl $0x4,%edi - - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movw 0x12345678(,%eax,2),%ax -LBPatch4: - addl %ebp,%edx - movw %ax,(%edi) - movw 0x12345678(,%ecx,2),%cx -LBPatch5: - movw %cx,2(%edi) - addl $0x4,%edi - - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movw 0x12345678(,%eax,2),%ax -LBPatch6: - addl %ebp,%edx - movw %ax,(%edi) - movw 0x12345678(,%ecx,2),%cx -LBPatch7: - movw %cx,2(%edi) - addl $0x4,%edi - -LEnter8_16: - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movw 0x12345678(,%eax,2),%ax -LBPatch8: - addl %ebp,%edx - movw %ax,(%edi) - movw 0x12345678(,%ecx,2),%cx -LBPatch9: - movw %cx,2(%edi) - addl $0x4,%edi - - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movw 0x12345678(,%eax,2),%ax -LBPatch10: - addl %ebp,%edx - movw %ax,(%edi) - movw 0x12345678(,%ecx,2),%cx -LBPatch11: - movw %cx,2(%edi) - addl $0x4,%edi - -LEnter4_16: - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movw 0x12345678(,%eax,2),%ax -LBPatch12: - addl %ebp,%edx - movw %ax,(%edi) - movw 0x12345678(,%ecx,2),%cx -LBPatch13: - movw %cx,2(%edi) - addl $0x4,%edi - -LEnter2_16: - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movw 0x12345678(,%eax,2),%ax -LBPatch14: - addl %ebp,%edx - movw %ax,(%edi) - movw 0x12345678(,%ecx,2),%cx -LBPatch15: - movw %cx,2(%edi) - addl $0x4,%edi diff --git a/src/i386/block8.h b/src/i386/block8.h deleted file mode 100644 index d501379..0000000 --- a/src/i386/block8.h +++ /dev/null @@ -1,124 +0,0 @@ -LEnter16_8: - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movb 0x12345678(%eax),%al -LBPatch0: - addl %ebp,%edx - movb %al,(%edi) - movb 0x12345678(%ecx),%cl -LBPatch1: - movb %cl,1(%edi) - addl $0x2,%edi - - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movb 0x12345678(%eax),%al -LBPatch2: - addl %ebp,%edx - movb %al,(%edi) - movb 0x12345678(%ecx),%cl -LBPatch3: - movb %cl,1(%edi) - addl $0x2,%edi - - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movb 0x12345678(%eax),%al -LBPatch4: - addl %ebp,%edx - movb %al,(%edi) - movb 0x12345678(%ecx),%cl -LBPatch5: - movb %cl,1(%edi) - addl $0x2,%edi - - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movb 0x12345678(%eax),%al -LBPatch6: - addl %ebp,%edx - movb %al,(%edi) - movb 0x12345678(%ecx),%cl -LBPatch7: - movb %cl,1(%edi) - addl $0x2,%edi - -LEnter8_8: - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movb 0x12345678(%eax),%al -LBPatch8: - addl %ebp,%edx - movb %al,(%edi) - movb 0x12345678(%ecx),%cl -LBPatch9: - movb %cl,1(%edi) - addl $0x2,%edi - - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movb 0x12345678(%eax),%al -LBPatch10: - addl %ebp,%edx - movb %al,(%edi) - movb 0x12345678(%ecx),%cl -LBPatch11: - movb %cl,1(%edi) - addl $0x2,%edi - -LEnter4_8: - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movb 0x12345678(%eax),%al -LBPatch12: - addl %ebp,%edx - movb %al,(%edi) - movb 0x12345678(%ecx),%cl -LBPatch13: - movb %cl,1(%edi) - addl $0x2,%edi - -LEnter2_8: - movb (%esi),%al - movb (%esi,%ebx,),%cl - movb %dh,%ah - addl %ebp,%edx - movb %dh,%ch - leal (%esi,%ebx,2),%esi - movb 0x12345678(%eax),%al -LBPatch14: - addl %ebp,%edx - movb %al,(%edi) - movb 0x12345678(%ecx),%cl -LBPatch15: - movb %cl,1(%edi) - addl $0x2,%edi - diff --git a/src/i386/qasm.h b/src/i386/qasm.h index 05106f8..58f1e0d 100644 --- a/src/i386/qasm.h +++ b/src/i386/qasm.h @@ -31,240 +31,6 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. // !!! must be kept the same as in d_iface.h !!! #define TRANSPARENT_COLOR 255 -#ifdef SOFTWARE_RENDERER - .extern C(d_zistepu) - .extern C(d_pzbuffer) - .extern C(d_zistepv) - .extern C(d_zrowbytes) - .extern C(d_ziorigin) - .extern C(r_turb_s) - .extern C(r_turb_t) - .extern C(r_turb_pdest) - .extern C(r_turb_spancount) - .extern C(r_turb_turb) - .extern C(r_turb_pbase) - .extern C(r_turb_sstep) - .extern C(r_turb_tstep) - .extern C(r_bmodelactive) - .extern C(d_sdivzstepu) - .extern C(d_tdivzstepu) - .extern C(d_sdivzstepv) - .extern C(d_tdivzstepv) - .extern C(d_sdivzorigin) - .extern C(d_tdivzorigin) - .extern C(sadjust) - .extern C(tadjust) - .extern C(bbextents) - .extern C(bbextentt) - .extern C(cacheblock) - .extern C(d_viewbuffer) - .extern C(cachewidth) - .extern C(d_pzbuffer) - .extern C(d_zrowbytes) - .extern C(d_zwidth) - .extern C(d_scantable) - .extern C(r_lightptr) - .extern C(r_numvblocks) - .extern C(prowdestbase) - .extern C(pbasesource) - .extern C(r_lightwidth) - .extern C(lightright) - .extern C(lightrightstep) - .extern C(lightdeltastep) - .extern C(lightdelta) - .extern C(lightright) - .extern C(lightdelta) - .extern C(sourcetstep) - .extern C(surfrowbytes) - .extern C(lightrightstep) - .extern C(lightdeltastep) - .extern C(r_sourcemax) - .extern C(r_stepback) - .extern C(blocksize) - .extern C(sourcesstep) - .extern C(lightleft) - .extern C(blockdivshift) - .extern C(blockdivmask) - .extern C(lightleftstep) - .extern C(r_origin) - .extern C(r_ppn) - .extern C(r_pup) - .extern C(r_pright) - .extern C(ycenter) - .extern C(xcenter) - .extern C(d_vrectbottom_particle) - .extern C(d_vrectright_particle) - .extern C(d_vrecty) - .extern C(d_vrectx) - .extern C(d_pix_shift) - .extern C(d_pix_min) - .extern C(d_pix_max) - .extern C(d_y_aspect_shift) - .extern C(screenwidth) - .extern C(r_leftclipped) - .extern C(r_leftenter) - .extern C(r_rightclipped) - .extern C(r_rightenter) - .extern C(modelorg) - .extern C(xscale) - .extern C(r_refdef) - .extern C(yscale) - .extern C(r_leftexit) - .extern C(r_rightexit) - .extern C(r_lastvertvalid) - .extern C(cacheoffset) - .extern C(newedges) - .extern C(removeedges) - .extern C(r_pedge) - .extern C(r_framecount) - .extern C(r_u1) - .extern C(r_emitted) - .extern C(edge_p) - .extern C(surface_p) - .extern C(surfaces) - .extern C(r_lzi1) - .extern C(r_v1) - .extern C(r_ceilv1) - .extern C(r_nearzi) - .extern C(r_nearzionly) - .extern C(edge_aftertail) - .extern C(edge_tail) - .extern C(current_iv) - .extern C(edge_head_u_shift20) - .extern C(span_p) - .extern C(edge_head) - .extern C(fv) - .extern C(edge_tail_u_shift20) - .extern C(r_apverts) - .extern C(r_anumverts) - .extern C(aliastransform) - .extern C(r_avertexnormals) - .extern C(r_plightvec) - .extern C(r_ambientlight) - .extern C(r_shadelight) - .extern C(aliasxcenter) - .extern C(aliasycenter) - .extern C(a_sstepxfrac) - .extern C(r_affinetridesc) - .extern C(d_pcolormap) - .extern C(r_affinetridesc) - .extern C(d_sfrac) - .extern C(d_ptex) - .extern C(d_pedgespanpackage) - .extern C(d_tfrac) - .extern C(d_light) - .extern C(d_zi) - .extern C(d_pdest) - .extern C(d_pz) - .extern C(d_aspancount) - .extern C(erroradjustup) - .extern C(errorterm) - .extern C(d_xdenom) - .extern C(r_p0) - .extern C(r_p1) - .extern C(r_p2) - .extern C(a_tstepxfrac) - .extern C(r_sstepx) - .extern C(r_tstepx) - .extern C(a_ststepxwhole) - .extern C(zspantable) - .extern C(skintable) - .extern C(r_zistepx) - .extern C(erroradjustdown) - .extern C(d_countextrastep) - .extern C(ubasestep) - .extern C(a_ststepxwhole) - .extern C(a_tstepxfrac) - .extern C(r_lstepx) - .extern C(a_spans) - .extern C(erroradjustdown) - .extern C(d_pdestextrastep) - .extern C(d_pzextrastep) - .extern C(d_sfracextrastep) - .extern C(d_ptexextrastep) - .extern C(d_countextrastep) - .extern C(d_tfracextrastep) - .extern C(d_lightextrastep) - .extern C(d_ziextrastep) - .extern C(d_pdestbasestep) - .extern C(d_pzbasestep) - .extern C(d_sfracbasestep) - .extern C(d_ptexbasestep) - .extern C(ubasestep) - .extern C(d_tfracbasestep) - .extern C(d_lightbasestep) - .extern C(d_zibasestep) - .extern C(zspantable) - .extern C(r_lstepy) - .extern C(r_sstepy) - .extern C(r_tstepy) - .extern C(r_zistepy) - .extern C(D_PolysetSetEdgeTable) - .extern C(D_RasterizeAliasPolySmooth) - - .extern float_point5 - .extern Float2ToThe31nd - .extern izistep - .extern izi - .extern FloatMinus2ToThe31nd - .extern float_1 - .extern float_particle_z_clip - .extern float_minus_1 - .extern float_0 - .extern fp_16 - .extern fp_64k - .extern fp_1m - .extern fp_1m_minus_1 - .extern fp_8 - .extern entryvec_table - .extern advancetable - .extern sstep - .extern tstep - .extern pspantemp - .extern counttemp - .extern jumptemp - .extern reciprocal_table - .extern DP_Count - .extern DP_u - .extern DP_v - .extern DP_32768 - .extern DP_Color - .extern DP_Pix - .extern DP_EntryTable - .extern pbase - .extern s - .extern t - .extern sfracf - .extern tfracf - .extern snext - .extern tnext - .extern spancountminus1 - .extern zi16stepu - .extern sdivz16stepu - .extern tdivz16stepu - .extern zi8stepu - .extern sdivz8stepu - .extern tdivz8stepu - .extern reciprocal_table_16 - .extern entryvec_table_16 - .extern ceil_cw - .extern single_cw - .extern fp_64kx64k - .extern pz - .extern spr8entryvec_table -#endif - - .extern C(snd_scaletable) - .extern C(paintbuffer) - .extern C(snd_linear_count) - .extern C(snd_p) - .extern C(snd_vol) - .extern C(snd_out) - .extern C(vright) - .extern C(vup) - .extern C(vpn) - .extern C(BOPS_Error) - // // !!! note that this file must match the corresponding C structures at all // times !!! @@ -281,57 +47,6 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #define pl_pad 18 #define pl_size 20 -// hull_t structure -// !!! if this is changed, it must be changed in model.h too !!! -#define hu_clipnodes 0 -#define hu_planes 4 -#define hu_firstclipnode 8 -#define hu_lastclipnode 12 -#define hu_clip_mins 16 -#define hu_clip_maxs 28 -#define hu_size 40 - -// dnode_t structure -// !!! if this is changed, it must be changed in bspfile.h too !!! -#define nd_planenum 0 -#define nd_children 4 -#define nd_mins 8 -#define nd_maxs 20 -#define nd_firstface 32 -#define nd_numfaces 36 -#define nd_size 40 - -// sfxcache_t structure -// !!! if this is changed, it much be changed in sound.h too !!! -#define sfxc_length 0 -#define sfxc_loopstart 4 -#define sfxc_speed 8 -#define sfxc_width 12 -#define sfxc_stereo 16 -#define sfxc_data 20 - -// channel_t structure -// !!! if this is changed, it much be changed in sound.h too !!! -#define ch_sfx 0 -#define ch_leftvol 4 -#define ch_rightvol 8 -#define ch_end 12 -#define ch_pos 16 -#define ch_looping 20 -#define ch_entnum 24 -#define ch_entchannel 28 -#define ch_origin 32 -#define ch_dist_mult 44 -#define ch_master_vol 48 -#define ch_size 52 - -// portable_samplepair_t structure -// !!! if this is changed, it much be changed in sound.h too !!! -#define psp_left 0 -#define psp_right 4 -#define psp_size 8 - - // // !!! note that this file must match the corresponding C structures at all // times !!! diff --git a/src/i386/r_spr8.s b/src/i386/r_spr8.s deleted file mode 100644 index 80cbcbe..0000000 --- a/src/i386/r_spr8.s +++ /dev/null @@ -1,879 +0,0 @@ -// -// d_spr8.s -// x86 assembly-language horizontal 8-bpp transparent span-drawing code. -// - -#include "qasm.h" - -#if USE_ASM - -//---------------------------------------------------------------------- -// 8-bpp horizontal span drawing code for polygons, with transparency. -//---------------------------------------------------------------------- - - .text - -// out-of-line, rarely-needed clamping code - -LClampHigh0: - movl C(bbextents),%esi - jmp LClampReentry0 -LClampHighOrLow0: - jg LClampHigh0 - xorl %esi,%esi - jmp LClampReentry0 - -LClampHigh1: - movl C(bbextentt),%edx - jmp LClampReentry1 -LClampHighOrLow1: - jg LClampHigh1 - xorl %edx,%edx - jmp LClampReentry1 - -LClampLow2: - movl $2048,%ebp - jmp LClampReentry2 -LClampHigh2: - movl C(bbextents),%ebp - jmp LClampReentry2 - -LClampLow3: - movl $2048,%ecx - jmp LClampReentry3 -LClampHigh3: - movl C(bbextentt),%ecx - jmp LClampReentry3 - -LClampLow4: - movl $2048,%eax - jmp LClampReentry4 -LClampHigh4: - movl C(bbextents),%eax - jmp LClampReentry4 - -LClampLow5: - movl $2048,%ebx - jmp LClampReentry5 -LClampHigh5: - movl C(bbextentt),%ebx - jmp LClampReentry5 - - -#define pspans 4+16 - - .align 4 -.globl C(D_SpriteDrawSpans) -C(D_SpriteDrawSpans): - pushl %ebp // preserve caller's stack frame - pushl %edi - pushl %esi // preserve register variables - pushl %ebx - -// -// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock -// and span list pointers, and 1/z step in 0.32 fixed-point -// -// FIXME: any overlap from rearranging? - flds C(d_sdivzstepu) - fmuls fp_8 - movl C(cacheblock),%edx - flds C(d_tdivzstepu) - fmuls fp_8 - movl pspans(%esp),%ebx // point to the first span descriptor - flds C(d_zistepu) - fmuls fp_8 - movl %edx,pbase // pbase = cacheblock - flds C(d_zistepu) - fmuls fp_64kx64k - fxch %st(3) - fstps sdivz8stepu - fstps zi8stepu - fstps tdivz8stepu - fistpl izistep - movl izistep,%eax - rorl $16,%eax // put upper 16 bits in low word - movl sspan_t_count(%ebx),%ecx - movl %eax,izistep - - cmpl $0,%ecx - jle LNextSpan - -LSpanLoop: - -// -// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the -// initial s and t values -// -// FIXME: pipeline FILD? - fildl sspan_t_v(%ebx) - fildl sspan_t_u(%ebx) - - fld %st(1) // dv | du | dv - fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv - fld %st(1) // du | dv*d_sdivzstepv | du | dv - fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv - fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv - fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu | - // dv*d_sdivzstepv | du | dv - fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu | - // dv*d_sdivzstepv | du | dv - faddp %st(0),%st(2) // du*d_tdivzstepu | - // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv - fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | - // du*d_tdivzstepu | du | dv - fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv | - // du*d_tdivzstepu | du | dv - fmuls C(d_tdivzstepv) // dv*d_tdivzstepv | - // du*d_sdivzstepu + dv*d_sdivzstepv | - // du*d_tdivzstepu | du | dv - fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | - // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv - fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv + - // du*d_sdivzstepu; stays in %st(2) at end - fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du | - // s/z - fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv | - // du*d_tdivzstepu | du | s/z - fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv | - // du*d_tdivzstepu | du | s/z - faddp %st(0),%st(2) // dv*d_zistepv | - // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z - fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu | - // dv*d_zistepv | s/z - fmuls C(d_zistepu) // du*d_zistepu | - // dv*d_tdivzstepv + du*d_tdivzstepu | - // dv*d_zistepv | s/z - fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu | - // du*d_zistepu | dv*d_zistepv | s/z - fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv + - // du*d_tdivzstepu; stays in %st(1) at end - fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z - faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z - - flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z - fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z - fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv + - // du*d_zistepu; stays in %st(0) at end - // 1/z | fp_64k | t/z | s/z - - fld %st(0) // FIXME: get rid of stall on FMUL? - fmuls fp_64kx64k - fxch %st(1) - -// -// calculate and clamp s & t -// - fdivr %st(0),%st(2) // 1/z | z*64k | t/z | s/z - fxch %st(1) - - fistpl izi // 0.32 fixed-point 1/z - movl izi,%ebp - -// -// set pz to point to the first z-buffer pixel in the span -// - rorl $16,%ebp // put upper 16 bits in low word - movl sspan_t_v(%ebx),%eax - movl %ebp,izi - movl sspan_t_u(%ebx),%ebp - imull C(d_zrowbytes) - shll $1,%ebp // a word per pixel - addl C(d_pzbuffer),%eax - addl %ebp,%eax - movl %eax,pz - -// -// point %edi to the first pixel in the span -// - movl C(d_viewbuffer),%ebp - movl sspan_t_v(%ebx),%eax - pushl %ebx // preserve spans pointer - movl C(tadjust),%edx - movl C(sadjust),%esi - movl C(d_scantable)(,%eax,4),%edi // v * screenwidth - addl %ebp,%edi - movl sspan_t_u(%ebx),%ebp - addl %ebp,%edi // pdest = &pdestspan[scans->u]; - -// -// now start the FDIV for the end of the span -// - cmpl $8,%ecx - ja LSetupNotLast1 - - decl %ecx - jz LCleanup1 // if only one pixel, no need to start an FDIV - movl %ecx,spancountminus1 - -// finish up the s and t calcs - fxch %st(1) // z*64k | 1/z | t/z | s/z - - fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z - fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z - fxch %st(1) // z*64k | s | 1/z | t/z | s/z - fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z - fxch %st(1) // s | t | 1/z | t/z | s/z - fistpl s // 1/z | t | t/z | s/z - fistpl t // 1/z | t/z | s/z - - fildl spancountminus1 - - flds C(d_tdivzstepu) // _d_tdivzstepu | spancountminus1 - flds C(d_zistepu) // _d_zistepu | _d_tdivzstepu | spancountminus1 - fmul %st(2),%st(0) // _d_zistepu*scm1 | _d_tdivzstepu | scm1 - fxch %st(1) // _d_tdivzstepu | _d_zistepu*scm1 | scm1 - fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1 - fxch %st(2) // scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1 - fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_zistepu*scm1 | - // _d_tdivzstepu*scm1 - fxch %st(1) // _d_zistepu*scm1 | _d_sdivzstepu*scm1 | - // _d_tdivzstepu*scm1 - faddp %st(0),%st(3) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1 - fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1 - faddp %st(0),%st(3) // _d_sdivzstepu*scm1 - faddp %st(0),%st(3) - - flds fp_64k - fdiv %st(1),%st(0) // this is what we've gone to all this trouble to - // overlap - jmp LFDIVInFlight1 - -LCleanup1: -// finish up the s and t calcs - fxch %st(1) // z*64k | 1/z | t/z | s/z - - fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z - fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z - fxch %st(1) // z*64k | s | 1/z | t/z | s/z - fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z - fxch %st(1) // s | t | 1/z | t/z | s/z - fistpl s // 1/z | t | t/z | s/z - fistpl t // 1/z | t/z | s/z - jmp LFDIVInFlight1 - - .align 4 -LSetupNotLast1: -// finish up the s and t calcs - fxch %st(1) // z*64k | 1/z | t/z | s/z - - fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z - fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z - fxch %st(1) // z*64k | s | 1/z | t/z | s/z - fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z - fxch %st(1) // s | t | 1/z | t/z | s/z - fistpl s // 1/z | t | t/z | s/z - fistpl t // 1/z | t/z | s/z - - fadds zi8stepu - fxch %st(2) - fadds sdivz8stepu - fxch %st(2) - flds tdivz8stepu - faddp %st(0),%st(2) - flds fp_64k - fdiv %st(1),%st(0) // z = 1/1/z - // this is what we've gone to all this trouble to - // overlap -LFDIVInFlight1: - - addl s,%esi - addl t,%edx - movl C(bbextents),%ebx - movl C(bbextentt),%ebp - cmpl %ebx,%esi - ja LClampHighOrLow0 -LClampReentry0: - movl %esi,s - movl pbase,%ebx - shll $16,%esi - cmpl %ebp,%edx - movl %esi,sfracf - ja LClampHighOrLow1 -LClampReentry1: - movl %edx,t - movl s,%esi // sfrac = scans->sfrac; - shll $16,%edx - movl t,%eax // tfrac = scans->tfrac; - sarl $16,%esi - movl %edx,tfracf - -// -// calculate the texture starting address -// - sarl $16,%eax - addl %ebx,%esi - imull C(cachewidth),%eax // (tfrac >> 16) * cachewidth - addl %eax,%esi // psource = pbase + (sfrac >> 16) + - // ((tfrac >> 16) * cachewidth); - -// -// determine whether last span or not -// - cmpl $8,%ecx - jna LLastSegment - -// -// not the last segment; do full 8-wide segment -// -LNotLastSegment: - -// -// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to -// get there -// - -// pick up after the FDIV that was left in flight previously - - fld %st(0) // duplicate it - fmul %st(4),%st(0) // s = s/z * z - fxch %st(1) - fmul %st(3),%st(0) // t = t/z * z - fxch %st(1) - fistpl snext - fistpl tnext - movl snext,%eax - movl tnext,%edx - - subl $8,%ecx // count off this segments' pixels - movl C(sadjust),%ebp - pushl %ecx // remember count of remaining pixels - movl C(tadjust),%ecx - - addl %eax,%ebp - addl %edx,%ecx - - movl C(bbextents),%eax - movl C(bbextentt),%edx - - cmpl $2048,%ebp - jl LClampLow2 - cmpl %eax,%ebp - ja LClampHigh2 -LClampReentry2: - - cmpl $2048,%ecx - jl LClampLow3 - cmpl %edx,%ecx - ja LClampHigh3 -LClampReentry3: - - movl %ebp,snext - movl %ecx,tnext - - subl s,%ebp - subl t,%ecx - -// -// set up advancetable -// - movl %ecx,%eax - movl %ebp,%edx - sarl $19,%edx // sstep >>= 16; - movl C(cachewidth),%ebx - sarl $19,%eax // tstep >>= 16; - jz LIsZero - imull %ebx,%eax // (tstep >> 16) * cachewidth; -LIsZero: - addl %edx,%eax // add in sstep - // (tstep >> 16) * cachewidth + (sstep >> 16); - movl tfracf,%edx - movl %eax,advancetable+4 // advance base in t - addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth + - // (sstep >> 16); - shll $13,%ebp // left-justify sstep fractional part - movl %ebp,sstep - movl sfracf,%ebx - shll $13,%ecx // left-justify tstep fractional part - movl %eax,advancetable // advance extra in t - movl %ecx,tstep - - movl pz,%ecx - movl izi,%ebp - - cmpw (%ecx),%bp - jl Lp1 - movb (%esi),%al // get first source texel - cmpb $(TRANSPARENT_COLOR),%al - jz Lp1 - movw %bp,(%ecx) - movb %al,(%edi) // store first dest pixel -Lp1: - addl izistep,%ebp - adcl $0,%ebp - addl tstep,%edx // advance tfrac fractional part by tstep frac - - sbbl %eax,%eax // turn tstep carry into -1 (0 if none) - addl sstep,%ebx // advance sfrac fractional part by sstep frac - adcl advancetable+4(,%eax,4),%esi // point to next source texel - - cmpw 2(%ecx),%bp - jl Lp2 - movb (%esi),%al - cmpb $(TRANSPARENT_COLOR),%al - jz Lp2 - movw %bp,2(%ecx) - movb %al,1(%edi) -Lp2: - addl izistep,%ebp - adcl $0,%ebp - addl tstep,%edx - sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi - - cmpw 4(%ecx),%bp - jl Lp3 - movb (%esi),%al - cmpb $(TRANSPARENT_COLOR),%al - jz Lp3 - movw %bp,4(%ecx) - movb %al,2(%edi) -Lp3: - addl izistep,%ebp - adcl $0,%ebp - addl tstep,%edx - sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi - - cmpw 6(%ecx),%bp - jl Lp4 - movb (%esi),%al - cmpb $(TRANSPARENT_COLOR),%al - jz Lp4 - movw %bp,6(%ecx) - movb %al,3(%edi) -Lp4: - addl izistep,%ebp - adcl $0,%ebp - addl tstep,%edx - sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi - - cmpw 8(%ecx),%bp - jl Lp5 - movb (%esi),%al - cmpb $(TRANSPARENT_COLOR),%al - jz Lp5 - movw %bp,8(%ecx) - movb %al,4(%edi) -Lp5: - addl izistep,%ebp - adcl $0,%ebp - addl tstep,%edx - sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi - -// -// start FDIV for end of next segment in flight, so it can overlap -// - popl %eax - cmpl $8,%eax // more than one segment after this? - ja LSetupNotLast2 // yes - - decl %eax - jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV - movl %eax,spancountminus1 - fildl spancountminus1 - - flds C(d_zistepu) // _d_zistepu | spancountminus1 - fmul %st(1),%st(0) // _d_zistepu*scm1 | scm1 - flds C(d_tdivzstepu) // _d_tdivzstepu | _d_zistepu*scm1 | scm1 - fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1 - fxch %st(1) // _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1 - faddp %st(0),%st(3) // _d_tdivzstepu*scm1 | scm1 - fxch %st(1) // scm1 | _d_tdivzstepu*scm1 - fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1 - fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1 - faddp %st(0),%st(3) // _d_sdivzstepu*scm1 - flds fp_64k // 64k | _d_sdivzstepu*scm1 - fxch %st(1) // _d_sdivzstepu*scm1 | 64k - faddp %st(0),%st(4) // 64k - - fdiv %st(1),%st(0) // this is what we've gone to all this trouble to - // overlap - jmp LFDIVInFlight2 - - .align 4 -LSetupNotLast2: - fadds zi8stepu - fxch %st(2) - fadds sdivz8stepu - fxch %st(2) - flds tdivz8stepu - faddp %st(0),%st(2) - flds fp_64k - fdiv %st(1),%st(0) // z = 1/1/z - // this is what we've gone to all this trouble to - // overlap -LFDIVInFlight2: - pushl %eax - - cmpw 10(%ecx),%bp - jl Lp6 - movb (%esi),%al - cmpb $(TRANSPARENT_COLOR),%al - jz Lp6 - movw %bp,10(%ecx) - movb %al,5(%edi) -Lp6: - addl izistep,%ebp - adcl $0,%ebp - addl tstep,%edx - sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi - - cmpw 12(%ecx),%bp - jl Lp7 - movb (%esi),%al - cmpb $(TRANSPARENT_COLOR),%al - jz Lp7 - movw %bp,12(%ecx) - movb %al,6(%edi) -Lp7: - addl izistep,%ebp - adcl $0,%ebp - addl tstep,%edx - sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi - - cmpw 14(%ecx),%bp - jl Lp8 - movb (%esi),%al - cmpb $(TRANSPARENT_COLOR),%al - jz Lp8 - movw %bp,14(%ecx) - movb %al,7(%edi) -Lp8: - addl izistep,%ebp - adcl $0,%ebp - addl tstep,%edx - sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi - - addl $8,%edi - addl $16,%ecx - movl %edx,tfracf - movl snext,%edx - movl %ebx,sfracf - movl tnext,%ebx - movl %edx,s - movl %ebx,t - - movl %ecx,pz - movl %ebp,izi - - popl %ecx // retrieve count - -// -// determine whether last span or not -// - cmpl $8,%ecx // are there multiple segments remaining? - ja LNotLastSegment // yes - -// -// last segment of scan -// -LLastSegment: - -// -// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to -// get there. The number of pixels left is variable, and we want to land on the -// last pixel, not step one past it, so we can't run into arithmetic problems -// - testl %ecx,%ecx - jz LNoSteps // just draw the last pixel and we're done - -// pick up after the FDIV that was left in flight previously - - - fld %st(0) // duplicate it - fmul %st(4),%st(0) // s = s/z * z - fxch %st(1) - fmul %st(3),%st(0) // t = t/z * z - fxch %st(1) - fistpl snext - fistpl tnext - - movl C(tadjust),%ebx - movl C(sadjust),%eax - - addl snext,%eax - addl tnext,%ebx - - movl C(bbextents),%ebp - movl C(bbextentt),%edx - - cmpl $2048,%eax - jl LClampLow4 - cmpl %ebp,%eax - ja LClampHigh4 -LClampReentry4: - movl %eax,snext - - cmpl $2048,%ebx - jl LClampLow5 - cmpl %edx,%ebx - ja LClampHigh5 -LClampReentry5: - - cmpl $1,%ecx // don't bother - je LOnlyOneStep // if two pixels in segment, there's only one step, - // of the segment length - subl s,%eax - subl t,%ebx - - addl %eax,%eax // convert to 15.17 format so multiply by 1.31 - addl %ebx,%ebx // reciprocal yields 16.48 - imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1) - movl %edx,%ebp - - movl %ebx,%eax - imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1) - -LSetEntryvec: -// -// set up advancetable -// - movl spr8entryvec_table(,%ecx,4),%ebx - movl %edx,%eax - pushl %ebx // entry point into code for RET later - movl %ebp,%ecx - sarl $16,%ecx // sstep >>= 16; - movl C(cachewidth),%ebx - sarl $16,%edx // tstep >>= 16; - jz LIsZeroLast - imull %ebx,%edx // (tstep >> 16) * cachewidth; -LIsZeroLast: - addl %ecx,%edx // add in sstep - // (tstep >> 16) * cachewidth + (sstep >> 16); - movl tfracf,%ecx - movl %edx,advancetable+4 // advance base in t - addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth + - // (sstep >> 16); - shll $16,%ebp // left-justify sstep fractional part - movl sfracf,%ebx - shll $16,%eax // left-justify tstep fractional part - movl %edx,advancetable // advance extra in t - - movl %eax,tstep - movl %ebp,sstep - movl %ecx,%edx - - movl pz,%ecx - movl izi,%ebp - - ret // jump to the number-of-pixels handler - -//---------------------------------------- - -LNoSteps: - movl pz,%ecx - subl $7,%edi // adjust for hardwired offset - subl $14,%ecx - jmp LEndSpan - - -LOnlyOneStep: - subl s,%eax - subl t,%ebx - movl %eax,%ebp - movl %ebx,%edx - jmp LSetEntryvec - -//---------------------------------------- - -.globl Spr8Entry2_8 -Spr8Entry2_8: - subl $6,%edi // adjust for hardwired offsets - subl $12,%ecx - movb (%esi),%al - jmp LLEntry2_8 - -//---------------------------------------- - -.globl Spr8Entry3_8 -Spr8Entry3_8: - subl $5,%edi // adjust for hardwired offsets - subl $10,%ecx - jmp LLEntry3_8 - -//---------------------------------------- - -.globl Spr8Entry4_8 -Spr8Entry4_8: - subl $4,%edi // adjust for hardwired offsets - subl $8,%ecx - jmp LLEntry4_8 - -//---------------------------------------- - -.globl Spr8Entry5_8 -Spr8Entry5_8: - subl $3,%edi // adjust for hardwired offsets - subl $6,%ecx - jmp LLEntry5_8 - -//---------------------------------------- - -.globl Spr8Entry6_8 -Spr8Entry6_8: - subl $2,%edi // adjust for hardwired offsets - subl $4,%ecx - jmp LLEntry6_8 - -//---------------------------------------- - -.globl Spr8Entry7_8 -Spr8Entry7_8: - decl %edi // adjust for hardwired offsets - subl $2,%ecx - jmp LLEntry7_8 - -//---------------------------------------- - -.globl Spr8Entry8_8 -Spr8Entry8_8: - cmpw (%ecx),%bp - jl Lp9 - movb (%esi),%al - cmpb $(TRANSPARENT_COLOR),%al - jz Lp9 - movw %bp,(%ecx) - movb %al,(%edi) -Lp9: - addl izistep,%ebp - adcl $0,%ebp - addl tstep,%edx - sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi -LLEntry7_8: - cmpw 2(%ecx),%bp - jl Lp10 - movb (%esi),%al - cmpb $(TRANSPARENT_COLOR),%al - jz Lp10 - movw %bp,2(%ecx) - movb %al,1(%edi) -Lp10: - addl izistep,%ebp - adcl $0,%ebp - addl tstep,%edx - sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi -LLEntry6_8: - cmpw 4(%ecx),%bp - jl Lp11 - movb (%esi),%al - cmpb $(TRANSPARENT_COLOR),%al - jz Lp11 - movw %bp,4(%ecx) - movb %al,2(%edi) -Lp11: - addl izistep,%ebp - adcl $0,%ebp - addl tstep,%edx - sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi -LLEntry5_8: - cmpw 6(%ecx),%bp - jl Lp12 - movb (%esi),%al - cmpb $(TRANSPARENT_COLOR),%al - jz Lp12 - movw %bp,6(%ecx) - movb %al,3(%edi) -Lp12: - addl izistep,%ebp - adcl $0,%ebp - addl tstep,%edx - sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi -LLEntry4_8: - cmpw 8(%ecx),%bp - jl Lp13 - movb (%esi),%al - cmpb $(TRANSPARENT_COLOR),%al - jz Lp13 - movw %bp,8(%ecx) - movb %al,4(%edi) -Lp13: - addl izistep,%ebp - adcl $0,%ebp - addl tstep,%edx - sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi -LLEntry3_8: - cmpw 10(%ecx),%bp - jl Lp14 - movb (%esi),%al - cmpb $(TRANSPARENT_COLOR),%al - jz Lp14 - movw %bp,10(%ecx) - movb %al,5(%edi) -Lp14: - addl izistep,%ebp - adcl $0,%ebp - addl tstep,%edx - sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi -LLEntry2_8: - cmpw 12(%ecx),%bp - jl Lp15 - movb (%esi),%al - cmpb $(TRANSPARENT_COLOR),%al - jz Lp15 - movw %bp,12(%ecx) - movb %al,6(%edi) -Lp15: - addl izistep,%ebp - adcl $0,%ebp - addl tstep,%edx - sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi - -LEndSpan: - cmpw 14(%ecx),%bp - jl Lp16 - movb (%esi),%al // load first texel in segment - cmpb $(TRANSPARENT_COLOR),%al - jz Lp16 - movw %bp,14(%ecx) - movb %al,7(%edi) -Lp16: - -// -// clear s/z, t/z, 1/z from FP stack -// - fstp %st(0) - fstp %st(0) - fstp %st(0) - - popl %ebx // restore spans pointer -LNextSpan: - addl $(sspan_t_size),%ebx // point to next span - movl sspan_t_count(%ebx),%ecx - cmpl $0,%ecx // any more spans? - jg LSpanLoop // yes - jz LNextSpan // yes, but this one's empty - - popl %ebx // restore register variables - popl %esi - popl %edi - popl %ebp // restore the caller's stack frame - ret - -#endif // USE_ASM diff --git a/src/i386/r_varsa.s b/src/i386/r_varsa.s index a3448eb..16ec815 100644 --- a/src/i386/r_varsa.s +++ b/src/i386/r_varsa.s @@ -129,50 +129,11 @@ reciprocal_table_16: .long 0x40000000, 0x2aaaaaaa, 0x20000000 .long 0x10000000, 0xe38e38e, 0xccccccc, 0xba2e8ba .long 0xaaaaaaa, 0x9d89d89, 0x9249249, 0x8888888 -#ifndef NeXT - .extern Entry2_16 - .extern Entry3_16 - .extern Entry4_16 - .extern Entry5_16 - .extern Entry6_16 - .extern Entry7_16 - .extern Entry8_16 - .extern Entry9_16 - .extern Entry10_16 - .extern Entry11_16 - .extern Entry12_16 - .extern Entry13_16 - .extern Entry14_16 - .extern Entry15_16 - .extern Entry16_16 -#endif - entryvec_table_16: .long 0, Entry2_16, Entry3_16, Entry4_16 .long Entry5_16, Entry6_16, Entry7_16, Entry8_16 .long Entry9_16, Entry10_16, Entry11_16, Entry12_16 .long Entry13_16, Entry14_16, Entry15_16, Entry16_16 -//------------------------------------------------------- -// local variables for d_parta.s -//------------------------------------------------------- -.globl DP_Count, DP_u, DP_v, DP_32768, DP_Color, DP_Pix, DP_EntryTable -DP_Count: .long 0 -DP_u: .long 0 -DP_v: .long 0 -DP_32768: .single 32768.0 -DP_Color: .long 0 -DP_Pix: .long 0 - - -#if 0 - .extern DP_1x1 - .extern DP_2x2 - .extern DP_3x3 - .extern DP_4x4 - -DP_EntryTable: .long DP_1x1, DP_2x2, DP_3x3, DP_4x4 -#endif - // // advancetable is 8 bytes, but points to the middle of that range so negative // offsets will work @@ -191,33 +152,6 @@ jumptemp: .long 0 reciprocal_table: .long 0x40000000, 0x2aaaaaaa, 0x20000000 .long 0x19999999, 0x15555555, 0x12492492 -#if 0 - .extern Entry2_8 - .extern Entry3_8 - .extern Entry4_8 - .extern Entry5_8 - .extern Entry6_8 - .extern Entry7_8 - .extern Entry8_8 - -entryvec_table: .long 0, Entry2_8, Entry3_8, Entry4_8 - .long Entry5_8, Entry6_8, Entry7_8, Entry8_8 -#endif - -#ifndef NeXT - .extern Spr8Entry2_8 - .extern Spr8Entry3_8 - .extern Spr8Entry4_8 - .extern Spr8Entry5_8 - .extern Spr8Entry6_8 - .extern Spr8Entry7_8 - .extern Spr8Entry8_8 -#endif - -.globl spr8entryvec_table -spr8entryvec_table: .long 0, Spr8Entry2_8, Spr8Entry3_8, Spr8Entry4_8 - .long Spr8Entry5_8, Spr8Entry6_8, Spr8Entry7_8, Spr8Entry8_8 - #endif // USE_ASM |