diff options
author | Andrey Nazarov <skuller@skuller.net> | 2013-03-16 00:44:15 +0400 |
---|---|---|
committer | Andrey Nazarov <skuller@skuller.net> | 2013-03-19 03:44:50 +0400 |
commit | 6e5aa4dec92010a3794152a379d95f62fb8cc029 (patch) | |
tree | b5c7a53488a43cd2b55730405f50cd532030d899 | |
parent | 5a4744ed5ccab66fa7b45c5369d210807f120137 (diff) |
Remove x86 assembly code.
-rw-r--r-- | Makefile | 39 | ||||
-rw-r--r-- | doc/examples/buildconfig | 4 | ||||
-rw-r--r-- | inc/common/x86/asm.h | 40 | ||||
-rw-r--r-- | src/common/math.c | 4 | ||||
-rw-r--r-- | src/common/x86/math.S | 329 | ||||
-rw-r--r-- | src/refresh/sw/aclip.c | 20 | ||||
-rw-r--r-- | src/refresh/sw/edge.c | 23 | ||||
-rw-r--r-- | src/refresh/sw/main.c | 15 | ||||
-rw-r--r-- | src/refresh/sw/misc.c | 26 | ||||
-rw-r--r-- | src/refresh/sw/polyset.c | 118 | ||||
-rw-r--r-- | src/refresh/sw/raster.c | 3 | ||||
-rw-r--r-- | src/refresh/sw/scan.c | 4 | ||||
-rw-r--r-- | src/refresh/sw/surf.c | 4 | ||||
-rw-r--r-- | src/refresh/sw/sw.h | 30 | ||||
-rw-r--r-- | src/refresh/sw/x86/aclip.S | 195 | ||||
-rw-r--r-- | src/refresh/sw/x86/draw.S | 817 | ||||
-rw-r--r-- | src/refresh/sw/x86/edge.S | 730 | ||||
-rw-r--r-- | src/refresh/sw/x86/polyset.S | 1247 | ||||
-rw-r--r-- | src/refresh/sw/x86/protect.c | 31 | ||||
-rw-r--r-- | src/refresh/sw/x86/span16.S | 1227 | ||||
-rw-r--r-- | src/refresh/sw/x86/surf8.S | 762 | ||||
-rw-r--r-- | src/refresh/sw/x86/sw.h | 183 | ||||
-rw-r--r-- | src/refresh/sw/x86/turb8.S | 68 | ||||
-rw-r--r-- | src/refresh/sw/x86/vars.S | 157 |
24 files changed, 18 insertions, 6058 deletions
@@ -29,7 +29,6 @@ RMDIR ?= rm -rf MKDIR ?= mkdir -p CFLAGS ?= -O2 -Wall -g -MMD $(INCLUDES) -ASFLAGS ?= RCFLAGS ?= LDFLAGS ?= LIBS ?= @@ -38,9 +37,6 @@ CFLAGS_s := -iquote./inc CFLAGS_c := -iquote./inc CFLAGS_g := -iquote./inc -fno-strict-aliasing -ASFLAGS_s := -iquote./inc -ASFLAGS_c := -iquote./inc - RCFLAGS_s := RCFLAGS_c := RCFLAGS_g := @@ -323,18 +319,6 @@ ifdef CONFIG_SOFTWARE_RENDERER OBJS_c += src/refresh/sw/surf.o OBJS_c += src/refresh/sw/sird.o OBJS_c += src/refresh/sw/sky.o - - ifdef CONFIG_X86_ASSEMBLY - OBJS_c += src/refresh/sw/x86/protect.o - OBJS_c += src/refresh/sw/x86/aclip.o - OBJS_c += src/refresh/sw/x86/draw.o - OBJS_c += src/refresh/sw/x86/edge.o - OBJS_c += src/refresh/sw/x86/polyset.o - OBJS_c += src/refresh/sw/x86/span16.o - OBJS_c += src/refresh/sw/x86/surf8.o - OBJS_c += src/refresh/sw/x86/turb8.o - OBJS_c += src/refresh/sw/x86/vars.o - endif else CFLAGS_c += -DREF_GL=1 -DUSE_REF=1 -DVID_REF='"gl"' OBJS_c += src/refresh/gl/draw.o @@ -558,19 +542,6 @@ ifdef CONFIG_DEBUG CFLAGS_s += -D_DEBUG endif -ifdef CONFIG_X86_ASSEMBLY - ASFLAGS_c += -DUSE_ASM=1 - ASFLAGS_s += -DUSE_ASM=1 - ifdef CONFIG_WINDOWS - ASFLAGS_c += -DUNDERSCORES - ASFLAGS_s += -DUNDERSCORES - endif - CFLAGS_c += -DUSE_ASM=1 - CFLAGS_s += -DUSE_ASM=1 - OBJS_c += src/common/x86/math.o - OBJS_s += src/common/x86/math.o -endif - ifeq ($(CPU),x86) OBJS_c += src/common/x86/fpu.o OBJS_s += src/common/x86/fpu.o @@ -642,11 +613,6 @@ $(BUILD_s)/%.o: %.c $(Q)$(MKDIR) $(@D) $(Q)$(CC) -c $(CFLAGS) $(CFLAGS_s) -o $@ $< -$(BUILD_s)/%.o: %.S - $(E) [AS] $@ - $(Q)$(MKDIR) $(@D) - $(Q)$(CC) -c $(ASFLAGS) $(ASFLAGS_s) -o $@ $< - $(BUILD_s)/%.o: %.rc $(E) [RC] $@ $(Q)$(MKDIR) $(@D) @@ -664,11 +630,6 @@ $(BUILD_c)/%.o: %.c $(Q)$(MKDIR) $(@D) $(Q)$(CC) -c $(CFLAGS) $(CFLAGS_c) -o $@ $< -$(BUILD_c)/%.o: %.S - $(E) [AS] $@ - $(Q)$(MKDIR) $(@D) - $(Q)$(CC) -c $(ASFLAGS) $(ASFLAGS_c) -o $@ $< - $(BUILD_c)/%.o: %.rc $(E) [RC] $@ $(Q)$(MKDIR) $(@D) diff --git a/doc/examples/buildconfig b/doc/examples/buildconfig index 8cfc83c..b452ebe 100644 --- a/doc/examples/buildconfig +++ b/doc/examples/buildconfig @@ -93,10 +93,6 @@ # Options below have no effect on architectures other than x86. -# Build x86 assembly versions of certain C routines. Mostly useful for software -# renderer. -#CONFIG_X86_ASSEMBLY=y - # Enable this option on Linux to build a server capable of loading game mods # built using ancient GCC versions. #CONFIG_X86_GAME_ABI_HACK=y diff --git a/inc/common/x86/asm.h b/inc/common/x86/asm.h deleted file mode 100644 index 6d690db..0000000 --- a/inc/common/x86/asm.h +++ /dev/null @@ -1,40 +0,0 @@ -/* -Copyright (C) 1997-2001 Id Software, Inc. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -#ifndef X86_ASM_H -#define X86_ASM_H - -#if HAVE_CONFIG_H -#include "config.h" -#endif - -#ifndef UNDERSCORES -#define C(label) label -#else -#define C(label) _##label -#endif - -// cplane_t structure -#define pl_normal 0 -#define pl_dist 12 -#define pl_type 16 -#define pl_signbits 17 -#define pl_pad 18 -#define pl_size 20 - -#endif // X86_ASM_H diff --git a/src/common/math.c b/src/common/math.c index 4c01d54..57e63ba 100644 --- a/src/common/math.c +++ b/src/common/math.c @@ -260,7 +260,6 @@ BoxOnPlaneSide Returns 1, 2, or 1 + 2 ================== */ -#if !USE_ASM int BoxOnPlaneSide(vec3_t emins, vec3_t emaxs, cplane_t *p) { vec_t *bounds[2] = { emins, emaxs }; @@ -286,6 +285,3 @@ int BoxOnPlaneSide(vec3_t emins, vec3_t emaxs, cplane_t *p) return sides; } -#endif // USE_ASM - - diff --git a/src/common/x86/math.S b/src/common/x86/math.S deleted file mode 100644 index 587eef7..0000000 --- a/src/common/x86/math.S +++ /dev/null @@ -1,329 +0,0 @@ -// -// math.s -// x86 assembly-language math routines. - -#include "common/x86/asm.h" - -#if USE_ASM - - .data - - .align 4 -Ljmptab: .long Lcase0, Lcase1, Lcase2, Lcase3 - .long Lcase4, Lcase5, Lcase6, Lcase7 - - .text - -#define EMINS 4+4 -#define EMAXS 4+8 -#define P 4+12 - - .align 2 -.globl C(BoxOnPlaneSide) -C(BoxOnPlaneSide): - pushl %ebx - - movl P(%esp),%edx - movl EMINS(%esp),%ecx - xorl %eax,%eax - movl EMAXS(%esp),%ebx - movb pl_signbits(%edx),%al - cmpb $8,%al - jge Lerror - flds pl_normal(%edx) // p->normal[0] - fld %st(0) // p->normal[0] | p->normal[0] - jmp *Ljmptab(,%eax,4) - - -//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; -//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; -Lcase0: - fmuls (%ebx) // p->normal[0]*emaxs[0] | p->normal[0] - flds pl_normal+4(%edx) // p->normal[1] | p->normal[0]*emaxs[0] | - // p->normal[0] - fxch %st(2) // p->normal[0] | p->normal[0]*emaxs[0] | - // p->normal[1] - fmuls (%ecx) // p->normal[0]*emins[0] | - // p->normal[0]*emaxs[0] | p->normal[1] - fxch %st(2) // p->normal[1] | p->normal[0]*emaxs[0] | - // p->normal[0]*emins[0] - fld %st(0) // p->normal[1] | p->normal[1] | - // p->normal[0]*emaxs[0] | - // p->normal[0]*emins[0] - fmuls 4(%ebx) // p->normal[1]*emaxs[1] | p->normal[1] | - // p->normal[0]*emaxs[0] | - // p->normal[0]*emins[0] - flds pl_normal+8(%edx) // p->normal[2] | p->normal[1]*emaxs[1] | - // p->normal[1] | p->normal[0]*emaxs[0] | - // p->normal[0]*emins[0] - fxch %st(2) // p->normal[1] | p->normal[1]*emaxs[1] | - // p->normal[2] | p->normal[0]*emaxs[0] | - // p->normal[0]*emins[0] - fmuls 4(%ecx) // p->normal[1]*emins[1] | - // p->normal[1]*emaxs[1] | - // p->normal[2] | p->normal[0]*emaxs[0] | - // p->normal[0]*emins[0] - fxch %st(2) // p->normal[2] | p->normal[1]*emaxs[1] | - // p->normal[1]*emins[1] | - // p->normal[0]*emaxs[0] | - // p->normal[0]*emins[0] - fld %st(0) // p->normal[2] | p->normal[2] | - // p->normal[1]*emaxs[1] | - // p->normal[1]*emins[1] | - // p->normal[0]*emaxs[0] | - // p->normal[0]*emins[0] - fmuls 8(%ebx) // p->normal[2]*emaxs[2] | - // p->normal[2] | - // p->normal[1]*emaxs[1] | - // p->normal[1]*emins[1] | - // p->normal[0]*emaxs[0] | - // p->normal[0]*emins[0] - fxch %st(5) // p->normal[0]*emins[0] | - // p->normal[2] | - // p->normal[1]*emaxs[1] | - // p->normal[1]*emins[1] | - // p->normal[0]*emaxs[0] | - // p->normal[2]*emaxs[2] - faddp %st(0),%st(3) //p->normal[2] | - // p->normal[1]*emaxs[1] | - // p->normal[1]*emins[1]+p->normal[0]*emins[0]| - // p->normal[0]*emaxs[0] | - // p->normal[2]*emaxs[2] - fmuls 8(%ecx) //p->normal[2]*emins[2] | - // p->normal[1]*emaxs[1] | - // p->normal[1]*emins[1]+p->normal[0]*emins[0]| - // p->normal[0]*emaxs[0] | - // p->normal[2]*emaxs[2] - fxch %st(1) //p->normal[1]*emaxs[1] | - // p->normal[2]*emins[2] | - // p->normal[1]*emins[1]+p->normal[0]*emins[0]| - // p->normal[0]*emaxs[0] | - // p->normal[2]*emaxs[2] - faddp %st(0),%st(3) //p->normal[2]*emins[2] | - // p->normal[1]*emins[1]+p->normal[0]*emins[0]| - // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]| - // p->normal[2]*emaxs[2] - fxch %st(3) //p->normal[2]*emaxs[2] + - // p->normal[1]*emins[1]+p->normal[0]*emins[0]| - // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]| - // p->normal[2]*emins[2] - faddp %st(0),%st(2) //p->normal[1]*emins[1]+p->normal[0]*emins[0]| - // dist1 | p->normal[2]*emins[2] - - jmp LSetSides - -//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; -//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; -Lcase1: - fmuls (%ecx) // emins[0] - flds pl_normal+4(%edx) - fxch %st(2) - fmuls (%ebx) // emaxs[0] - fxch %st(2) - fld %st(0) - fmuls 4(%ebx) // emaxs[1] - flds pl_normal+8(%edx) - fxch %st(2) - fmuls 4(%ecx) // emins[1] - fxch %st(2) - fld %st(0) - fmuls 8(%ebx) // emaxs[2] - fxch %st(5) - faddp %st(0),%st(3) - fmuls 8(%ecx) // emins[2] - fxch %st(1) - faddp %st(0),%st(3) - fxch %st(3) - faddp %st(0),%st(2) - - jmp LSetSides - -//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; -//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; -Lcase2: - fmuls (%ebx) // emaxs[0] - flds pl_normal+4(%edx) - fxch %st(2) - fmuls (%ecx) // emins[0] - fxch %st(2) - fld %st(0) - fmuls 4(%ecx) // emins[1] - flds pl_normal+8(%edx) - fxch %st(2) - fmuls 4(%ebx) // emaxs[1] - fxch %st(2) - fld %st(0) - fmuls 8(%ebx) // emaxs[2] - fxch %st(5) - faddp %st(0),%st(3) - fmuls 8(%ecx) // emins[2] - fxch %st(1) - faddp %st(0),%st(3) - fxch %st(3) - faddp %st(0),%st(2) - - jmp LSetSides - -//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; -//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; -Lcase3: - fmuls (%ecx) // emins[0] - flds pl_normal+4(%edx) - fxch %st(2) - fmuls (%ebx) // emaxs[0] - fxch %st(2) - fld %st(0) - fmuls 4(%ecx) // emins[1] - flds pl_normal+8(%edx) - fxch %st(2) - fmuls 4(%ebx) // emaxs[1] - fxch %st(2) - fld %st(0) - fmuls 8(%ebx) // emaxs[2] - fxch %st(5) - faddp %st(0),%st(3) - fmuls 8(%ecx) // emins[2] - fxch %st(1) - faddp %st(0),%st(3) - fxch %st(3) - faddp %st(0),%st(2) - - jmp LSetSides - -//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; -//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; -Lcase4: - fmuls (%ebx) // emaxs[0] - flds pl_normal+4(%edx) - fxch %st(2) - fmuls (%ecx) // emins[0] - fxch %st(2) - fld %st(0) - fmuls 4(%ebx) // emaxs[1] - flds pl_normal+8(%edx) - fxch %st(2) - fmuls 4(%ecx) // emins[1] - fxch %st(2) - fld %st(0) - fmuls 8(%ecx) // emins[2] - fxch %st(5) - faddp %st(0),%st(3) - fmuls 8(%ebx) // emaxs[2] - fxch %st(1) - faddp %st(0),%st(3) - fxch %st(3) - faddp %st(0),%st(2) - - jmp LSetSides - -//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; -//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; -Lcase5: - fmuls (%ecx) // emins[0] - flds pl_normal+4(%edx) - fxch %st(2) - fmuls (%ebx) // emaxs[0] - fxch %st(2) - fld %st(0) - fmuls 4(%ebx) // emaxs[1] - flds pl_normal+8(%edx) - fxch %st(2) - fmuls 4(%ecx) // emins[1] - fxch %st(2) - fld %st(0) - fmuls 8(%ecx) // emins[2] - fxch %st(5) - faddp %st(0),%st(3) - fmuls 8(%ebx) // emaxs[2] - fxch %st(1) - faddp %st(0),%st(3) - fxch %st(3) - faddp %st(0),%st(2) - - jmp LSetSides - -//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; -//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; -Lcase6: - fmuls (%ebx) // emaxs[0] - flds pl_normal+4(%edx) - fxch %st(2) - fmuls (%ecx) // emins[0] - fxch %st(2) - fld %st(0) - fmuls 4(%ecx) // emins[1] - flds pl_normal+8(%edx) - fxch %st(2) - fmuls 4(%ebx) // emaxs[1] - fxch %st(2) - fld %st(0) - fmuls 8(%ecx) // emins[2] - fxch %st(5) - faddp %st(0),%st(3) - fmuls 8(%ebx) // emaxs[2] - fxch %st(1) - faddp %st(0),%st(3) - fxch %st(3) - faddp %st(0),%st(2) - - jmp LSetSides - -//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; -//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; -Lcase7: - fmuls (%ecx) // emins[0] - flds pl_normal+4(%edx) - fxch %st(2) - fmuls (%ebx) // emaxs[0] - fxch %st(2) - fld %st(0) - fmuls 4(%ecx) // emins[1] - flds pl_normal+8(%edx) - fxch %st(2) - fmuls 4(%ebx) // emaxs[1] - fxch %st(2) - fld %st(0) - fmuls 8(%ecx) // emins[2] - fxch %st(5) - faddp %st(0),%st(3) - fmuls 8(%ebx) // emaxs[2] - fxch %st(1) - faddp %st(0),%st(3) - fxch %st(3) - faddp %st(0),%st(2) - -LSetSides: - -// sides = 0; -// if (dist1 >= p->dist) -// sides = 1; -// if (dist2 < p->dist) -// sides |= 2; - - faddp %st(0),%st(2) // dist1 | dist2 - fcomps pl_dist(%edx) - xorl %ecx,%ecx - fnstsw %ax - fcomps pl_dist(%edx) - andb $1,%ah - xorb $1,%ah - addb %ah,%cl - - fnstsw %ax - andb $1,%ah - addb %ah,%ah - addb %ah,%cl - -// return sides; - - popl %ebx - movl %ecx,%eax // return status - - ret - - -Lerror: - xorl %eax,%eax - ret - -#endif // USE_ASM diff --git a/src/refresh/sw/aclip.c b/src/refresh/sw/aclip.c index 1551647..83f26e8 100644 --- a/src/refresh/sw/aclip.c +++ b/src/refresh/sw/aclip.c @@ -49,20 +49,6 @@ static void R_Alias_clip_z(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t *ou R_AliasProjectAndClipTestFinalVert(out); } - -#if USE_ASM - -void R_Alias_clip_top(finalvert_t *pfv0, finalvert_t *pfv1, - finalvert_t *out); -void R_Alias_clip_bottom(finalvert_t *pfv0, finalvert_t *pfv1, - finalvert_t *out); -void R_Alias_clip_left(finalvert_t *pfv0, finalvert_t *pfv1, - finalvert_t *out); -void R_Alias_clip_right(finalvert_t *pfv0, finalvert_t *pfv1, - finalvert_t *out); - -#else - static void R_Alias_clip_left(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t *out) { float scale; @@ -88,7 +74,6 @@ static void R_Alias_clip_left(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t } } - static void R_Alias_clip_right(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t *out) { float scale; @@ -114,7 +99,6 @@ static void R_Alias_clip_right(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t } } - static void R_Alias_clip_top(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t *out) { float scale; @@ -140,7 +124,6 @@ static void R_Alias_clip_top(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t * } } - static void R_Alias_clip_bottom(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t *out) { @@ -169,9 +152,6 @@ static void R_Alias_clip_bottom(finalvert_t *pfv0, finalvert_t *pfv1, } } -#endif - - static int R_AliasClip(finalvert_t *in, finalvert_t *out, int flag, int count, void(*clip)(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t *out)) { diff --git a/src/refresh/sw/edge.c b/src/refresh/sw/edge.c index f83cf86..f4b2e61 100644 --- a/src/refresh/sw/edge.c +++ b/src/refresh/sw/edge.c @@ -19,21 +19,6 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "sw.h" -#if !USE_ASM -void R_SurfacePatch(void) -{ -} - -void R_EdgeCodeStart(void) -{ -} - -void R_EdgeCodeEnd(void) -{ -} -#endif - - /* the complex cases add new polys on most lines, so dont optimize for keeping them the same have multiple free span lists to try to get better coherence? @@ -135,8 +120,6 @@ void R_BeginEdgeFrame(void) } -#if !USE_ASM - /* ============== R_InsertNewEdges @@ -254,8 +237,6 @@ pushback: } } -#endif // !USE_ASM - /* ============== @@ -400,8 +381,6 @@ void R_TrailingEdge(surf_t *surf, edge_t *edge) } -#if !USE_ASM - /* ============== R_LeadingEdge @@ -547,8 +526,6 @@ void R_GenerateSpans(void) R_CleanupSpan(); } -#endif // !USE_ASM - /* ============== diff --git a/src/refresh/sw/main.c b/src/refresh/sw/main.c index 467170c..13ce08d 100644 --- a/src/refresh/sw/main.c +++ b/src/refresh/sw/main.c @@ -126,12 +126,6 @@ cvar_t *vid_gamma; cvar_t *sw_lockpvs; //PGM -#if USE_ASM - -void *d_pcolormap; - -#else // USE_ASM - // all global and static refresh variables are collected in a contiguous block // to avoid cache conflicts. @@ -155,8 +149,6 @@ short *d_pzbuffer; unsigned int d_zrowbytes; unsigned int d_zwidth; -#endif // !USE_ASM - int sintable[CYCLE * 2]; int intsintable[CYCLE * 2]; int blanktable[CYCLE * 2]; // PGM @@ -272,11 +264,6 @@ qboolean R_Init(qboolean total) Com_DPrintf("ref_soft " VERSION ", " __DATE__ "\n"); -#if USE_ASM - Sys_MakeCodeWriteable((uintptr_t)R_EdgeCodeStart, - (uintptr_t)R_EdgeCodeEnd - (uintptr_t)R_EdgeCodeStart); -#endif - r_aliasuvscale = 1.0; // create the window @@ -390,7 +377,6 @@ void R_NewMap(void) // surface 0 doesn't really exist; it's just a dummy because index 0 // is used to indicate no edge attached to surface surfaces--; - R_SurfacePatch(); } r_maxedgesseen = 0; @@ -792,7 +778,6 @@ void R_EdgeDrawing(void) // surface 0 doesn't really exist; it's just a dummy because index 0 // is used to indicate no edge attached to surface surfaces--; - R_SurfacePatch(); } R_BeginEdgeFrame(); diff --git a/src/refresh/sw/misc.c b/src/refresh/sw/misc.c index 6829280..755b5fe 100644 --- a/src/refresh/sw/misc.c +++ b/src/refresh/sw/misc.c @@ -38,30 +38,6 @@ short *zspantable[MAXHEIGHT]; /* ================ -D_Patch -================ -*/ -void D_Patch(void) -{ -#if USE_ASM - extern void D_Aff8Patch(void); - static qboolean protectset8 = qfalse; - extern void D_PolysetAff8Start(void); - - if (!protectset8) { - Sys_MakeCodeWriteable((uintptr_t)D_PolysetAff8Start, - (uintptr_t)D_Aff8Patch - (uintptr_t)D_PolysetAff8Start); - Sys_MakeCodeWriteable((uintptr_t)R_Surf8Start, - (uintptr_t)R_Surf8End - (uintptr_t)R_Surf8Start); - protectset8 = qtrue; - } - - R_Surf8Patch(); - D_Aff8Patch(); -#endif -} -/* -================ D_ViewChanged ================ */ @@ -103,8 +79,6 @@ void D_ViewChanged(void) memset(d_pzbuffer, 0xff, vid.width * vid.height * sizeof(d_pzbuffer[0])); R_DrawFill8(r_newrefdef.x, r_newrefdef.y, r_newrefdef.width, r_newrefdef.height, /*(int)sw_clearcolor->value & 0xff*/0); } - - D_Patch(); } diff --git a/src/refresh/sw/polyset.c b/src/refresh/sw/polyset.c index 869fc34..73d1d04 100644 --- a/src/refresh/sw/polyset.c +++ b/src/refresh/sw/polyset.c @@ -21,11 +21,9 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "sw.h" // TODO: put in span spilling to shrink list size -// !!! if this is changed, it must be changed in d_polysa.s too !!! #define DPS_MAXSPANS MAXHEIGHT+1 // 1 extra for spanpackage that marks end -// !!! if this is changed, it must be changed in asm_draw.h too !!! typedef struct { void *pdest; short *pz; @@ -378,16 +376,8 @@ void R_PolysetCalcGradients(int skinwidth) r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv); -#if USE_ASM - if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) { - a_sstepxfrac = r_sstepx << 16; - a_tstepxfrac = r_tstepx << 16; - } else -#endif - { - a_sstepxfrac = r_sstepx & 0xFFFF; - a_tstepxfrac = r_tstepx & 0xFFFF; - } + a_sstepxfrac = r_sstepx & 0xFFFF; + a_tstepxfrac = r_tstepx & 0xFFFF; a_ststepxwhole = skinwidth * (r_tstepx >> 16) + (r_sstepx >> 16) * TEX_BYTES; } @@ -492,7 +482,6 @@ void R_PolysetDrawSpansConstant8_Blended(spanpackage_t *pspanpackage) } while (pspanpackage->count != -999999); } -#if !USE_ASM void R_PolysetDrawSpans8_Opaque(spanpackage_t *pspanpackage) { int lcount; @@ -550,7 +539,6 @@ void R_PolysetDrawSpans8_Opaque(spanpackage_t *pspanpackage) pspanpackage++; } while (pspanpackage->count != -999999); } -#endif /* @@ -593,16 +581,8 @@ void R_RasterizeAliasPolySmooth(void) d_ptex = (byte *)r_affinetridesc.pskin + (plefttop[2] >> 16) * TEX_BYTES + (plefttop[3] >> 16) * r_affinetridesc.skinwidth; -#if USE_ASM - if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) { - d_sfrac = (plefttop[2] & 0xFFFF) << 16; - d_tfrac = (plefttop[3] & 0xFFFF) << 16; - } else -#endif - { - d_sfrac = plefttop[2] & 0xFFFF; - d_tfrac = plefttop[3] & 0xFFFF; - } + d_sfrac = plefttop[2] & 0xFFFF; + d_tfrac = plefttop[3] & 0xFFFF; d_light = plefttop[4]; d_zi = plefttop[5]; @@ -627,16 +607,8 @@ void R_RasterizeAliasPolySmooth(void) R_PolysetSetUpForLineScan(plefttop[0], plefttop[1], pleftbottom[0], pleftbottom[1]); -#if USE_ASM - if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) { - d_pzbasestep = (d_zwidth + ubasestep) << 1; - d_pzextrastep = d_pzbasestep + 2; - } else -#endif - { - d_pzbasestep = d_zwidth + ubasestep; - d_pzextrastep = d_pzbasestep + 1; - } + d_pzbasestep = d_zwidth + ubasestep; + d_pzextrastep = d_pzbasestep + 1; d_pdestbasestep = r_screenrowbytes + ubasestep * VID_BYTES; d_pdestextrastep = d_pdestbasestep + 1 * VID_BYTES; @@ -655,43 +627,20 @@ void R_RasterizeAliasPolySmooth(void) d_ptexbasestep = ((r_sstepy + r_sstepx * ubasestep) >> 16) * TEX_BYTES + ((r_tstepy + r_tstepx * ubasestep) >> 16) * r_affinetridesc.skinwidth; -#if USE_ASM - if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) { - d_sfracbasestep = (r_sstepy + r_sstepx * ubasestep) << 16; - d_tfracbasestep = (r_tstepy + r_tstepx * ubasestep) << 16; - } else -#endif - { - d_sfracbasestep = (r_sstepy + r_sstepx * ubasestep) & 0xFFFF; - d_tfracbasestep = (r_tstepy + r_tstepx * ubasestep) & 0xFFFF; - } + d_sfracbasestep = (r_sstepy + r_sstepx * ubasestep) & 0xFFFF; + d_tfracbasestep = (r_tstepy + r_tstepx * ubasestep) & 0xFFFF; d_lightbasestep = r_lstepy + working_lstepx * ubasestep; d_zibasestep = r_zistepy + r_zistepx * ubasestep; d_ptexextrastep = ((r_sstepy + r_sstepx * d_countextrastep) >> 16) * TEX_BYTES + ((r_tstepy + r_tstepx * d_countextrastep) >> 16) * r_affinetridesc.skinwidth; -#if USE_ASM - if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) { - d_sfracextrastep = (r_sstepy + r_sstepx * d_countextrastep) << 16; - d_tfracextrastep = (r_tstepy + r_tstepx * d_countextrastep) << 16; - } else -#endif - { - d_sfracextrastep = (r_sstepy + r_sstepx * d_countextrastep) & 0xFFFF; - d_tfracextrastep = (r_tstepy + r_tstepx * d_countextrastep) & 0xFFFF; - } + d_sfracextrastep = (r_sstepy + r_sstepx * d_countextrastep) & 0xFFFF; + d_tfracextrastep = (r_tstepy + r_tstepx * d_countextrastep) & 0xFFFF; d_lightextrastep = d_lightbasestep + working_lstepx; d_ziextrastep = d_zibasestep + r_zistepx; -#if USE_ASM - if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) { - R_PolysetScanLeftEdge(initialleftheight); - } else -#endif - { - R_PolysetScanLeftEdge_C(initialleftheight); - } + R_PolysetScanLeftEdge_C(initialleftheight); } // @@ -740,16 +689,8 @@ void R_RasterizeAliasPolySmooth(void) d_pdestbasestep = r_screenrowbytes + ubasestep * VID_BYTES; d_pdestextrastep = d_pdestbasestep + 1 * VID_BYTES; -#if USE_ASM - if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) { - d_pzbasestep = (d_zwidth + ubasestep) << 1; - d_pzextrastep = d_pzbasestep + 2; - } else -#endif - { - d_pzbasestep = d_zwidth + ubasestep; - d_pzextrastep = d_pzbasestep + 1; - } + d_pzbasestep = d_zwidth + ubasestep; + d_pzextrastep = d_pzbasestep + 1; if (ubasestep < 0) working_lstepx = r_lstepx - 1; @@ -760,43 +701,20 @@ void R_RasterizeAliasPolySmooth(void) d_ptexbasestep = ((r_sstepy + r_sstepx * ubasestep) >> 16) * TEX_BYTES + ((r_tstepy + r_tstepx * ubasestep) >> 16) * r_affinetridesc.skinwidth; -#if USE_ASM - if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) { - d_sfracbasestep = (r_sstepy + r_sstepx * ubasestep) << 16; - d_tfracbasestep = (r_tstepy + r_tstepx * ubasestep) << 16; - } else -#endif - { - d_sfracbasestep = (r_sstepy + r_sstepx * ubasestep) & 0xFFFF; - d_tfracbasestep = (r_tstepy + r_tstepx * ubasestep) & 0xFFFF; - } + d_sfracbasestep = (r_sstepy + r_sstepx * ubasestep) & 0xFFFF; + d_tfracbasestep = (r_tstepy + r_tstepx * ubasestep) & 0xFFFF; d_lightbasestep = r_lstepy + working_lstepx * ubasestep; d_zibasestep = r_zistepy + r_zistepx * ubasestep; d_ptexextrastep = ((r_sstepy + r_sstepx * d_countextrastep) >> 16) * TEX_BYTES + ((r_tstepy + r_tstepx * d_countextrastep) >> 16) * r_affinetridesc.skinwidth; -#if USE_ASM - if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) { - d_sfracextrastep = ((r_sstepy + r_sstepx * d_countextrastep) & 0xFFFF) << 16; - d_tfracextrastep = ((r_tstepy + r_tstepx * d_countextrastep) & 0xFFFF) << 16; - } else -#endif - { - d_sfracextrastep = (r_sstepy + r_sstepx * d_countextrastep) & 0xFFFF; - d_tfracextrastep = (r_tstepy + r_tstepx * d_countextrastep) & 0xFFFF; - } + d_sfracextrastep = (r_sstepy + r_sstepx * d_countextrastep) & 0xFFFF; + d_tfracextrastep = (r_tstepy + r_tstepx * d_countextrastep) & 0xFFFF; d_lightextrastep = d_lightbasestep + working_lstepx; d_ziextrastep = d_zibasestep + r_zistepx; -#if USE_ASM - if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) { - R_PolysetScanLeftEdge(height); - } else -#endif - { - R_PolysetScanLeftEdge_C(height); - } + R_PolysetScanLeftEdge_C(height); } } diff --git a/src/refresh/sw/raster.c b/src/refresh/sw/raster.c index a50e877..dce9b03 100644 --- a/src/refresh/sw/raster.c +++ b/src/refresh/sw/raster.c @@ -55,7 +55,6 @@ int r_ceilv1; qboolean r_lastvertvalid; -#if !USE_ASM /* ================ @@ -301,8 +300,6 @@ void R_ClipEdge(mvertex_t *pv0, mvertex_t *pv1, clipplane_t *clip) R_EmitEdge(pv0, pv1); } -#endif // !USE_ASM - /* ================ diff --git a/src/refresh/sw/scan.c b/src/refresh/sw/scan.c index 8868ee6..5833fe4 100644 --- a/src/refresh/sw/scan.c +++ b/src/refresh/sw/scan.c @@ -214,8 +214,6 @@ void D_DrawTurbulent16(espan_t *pspan, int *warptable) } while ((pspan = pspan->pnext) != NULL); } -#if !USE_ASM - /* ============= D_DrawSpans16 @@ -400,5 +398,3 @@ void D_DrawZSpans(espan_t *pspan) } while ((pspan = pspan->pnext) != NULL); } -#endif - diff --git a/src/refresh/sw/surf.c b/src/refresh/sw/surf.c index 45076f0..335bad0 100644 --- a/src/refresh/sw/surf.c +++ b/src/refresh/sw/surf.c @@ -143,7 +143,6 @@ void R_DrawSurface(void) } //============================================================================= -#if !USE_ASM #define BLOCK_FUNC R_DrawSurfaceBlock8_mip0 #define BLOCK_SHIFT 4 @@ -161,9 +160,6 @@ void R_DrawSurface(void) #define BLOCK_SHIFT 1 #include "block.h" -#endif - - //============================================================================ diff --git a/src/refresh/sw/sw.h b/src/refresh/sw/sw.h index e9e3c48..603aa3c 100644 --- a/src/refresh/sw/sw.h +++ b/src/refresh/sw/sw.h @@ -51,7 +51,6 @@ typedef struct { extern viddef_t vid; -// !!! if this is changed, it must be changed in asm_draw.h too !!! typedef struct { vrectSoft_t vrect; // subwindow in video for refresh // FIXME: not need vrect next field here? @@ -105,7 +104,6 @@ extern oldrefdef_t r_refdef; #define MAXWORKINGVERTS (MAXVERTS + 4) // max points in an intermediate // polygon (while processing) -// !!! if this is changed, it must be changed in d_ifacea.h too !!! #define MAXHEIGHT 1200 #define MAXWIDTH 1600 @@ -124,14 +122,11 @@ extern oldrefdef_t r_refdef; #define PARTICLE_Z_CLIP 8.0 -// !!! must be kept the same as in quakeasm.h !!! #define TRANSPARENT_COLOR 0xFF -// !!! if this is changed, it must be changed in d_ifacea.h too !!! #define TURB_TEX_SIZE 64 // base turbulent texture size -// !!! if this is changed, it must be changed in d_ifacea.h too !!! #define CYCLE 128 // turbulent cycle size #define SCANBUFFERPAD 0x1000 @@ -166,7 +161,6 @@ extern oldrefdef_t r_refdef; #define BACKFACE_EPSILON 0.01 -// !!! if this is changed, it must be changed in asm_draw.h too !!! #define NEAR_CLIP 0.01 @@ -198,10 +192,6 @@ typedef struct { float zi; } emitpoint_t; -/* -** if you change this structure be sure to change the #defines -** listed after it! -*/ typedef struct finalvert_s { int u, v, s, t; int l; @@ -210,18 +200,6 @@ typedef struct finalvert_s { float xyz[3]; // eye space } finalvert_t; -#define FINALVERT_V0 0 -#define FINALVERT_V1 4 -#define FINALVERT_V2 8 -#define FINALVERT_V3 12 -#define FINALVERT_V4 16 -#define FINALVERT_V5 20 -#define FINALVERT_FLAGS 24 -#define FINALVERT_X 28 -#define FINALVERT_Y 32 -#define FINALVERT_Z 36 -#define FINALVERT_SIZE 40 - typedef struct { void *pskin; int pskindesc; @@ -256,7 +234,6 @@ typedef struct bedge_s { } bedge_t; -// !!! if this is changed, it must be changed in asm_draw.h too !!! typedef struct clipplane_s { vec3_t normal; float dist; @@ -284,7 +261,6 @@ typedef struct surfcache_s { byte data[4]; // width*height elements } surfcache_t; -// !!! if this is changed, it must be changed in asm_draw.h too !!! typedef struct espan_s { int u, v, count; struct espan_s *pnext; @@ -327,7 +303,6 @@ typedef struct surf_s { int pad[2]; // to 64 bytes } surf_t; -// !!! if this is changed, it must be changed in asm_draw.h too !!! typedef struct edge_s { fixed16_t u; fixed16_t u_step; @@ -589,7 +564,6 @@ extern int r_currentbkey; void R_InitTurb(void); void R_DrawParticles(void); -void R_SurfacePatch(void); extern int r_amodels_drawn; extern edge_t *auxedges; @@ -684,10 +658,6 @@ void R_RenderFrame(refdef_t *fd); void R_BeginFrame(void); -#if USE_ASM -void Sys_MakeCodeWriteable(uintptr_t start, size_t length); -#endif - void R_InitImages(void); void R_ShutdownImages(void); diff --git a/src/refresh/sw/x86/aclip.S b/src/refresh/sw/x86/aclip.S deleted file mode 100644 index df3674e..0000000 --- a/src/refresh/sw/x86/aclip.S +++ /dev/null @@ -1,195 +0,0 @@ -// -// r_aliasa.s -// x86 assembly-language Alias model transform and project code. -// - -#include "common/x86/asm.h" -#include "sw.h" - -#if USE_ASM - - .data -Ltemp0: .long 0 -Ltemp1: .long 0 - - .text - -#define pfv0 8+4 -#define pfv1 8+8 -#define out 8+12 - -.globl C(R_Alias_clip_bottom) -C(R_Alias_clip_bottom): - pushl %esi - pushl %edi - - movl pfv0(%esp),%esi - movl pfv1(%esp),%edi - - movl C(r_refdef)+rd_aliasvrectbottom,%eax - -LDoForwardOrBackward: - - movl fv_v+4(%esi),%edx - movl fv_v+4(%edi),%ecx - - cmpl %ecx,%edx - jl LDoForward - - movl fv_v+4(%esi),%ecx - movl fv_v+4(%edi),%edx - movl pfv0(%esp),%edi - movl pfv1(%esp),%esi - -LDoForward: - - subl %edx,%ecx - subl %edx,%eax - movl %ecx,Ltemp1 - movl %eax,Ltemp0 - fildl Ltemp1 - fildl Ltemp0 - movl out(%esp),%edx - movl $2,%eax - - fdivp %st(0),%st(1) // scale - -LDo3Forward: - fildl fv_v+0(%esi) // fv0v0 | scale - fildl fv_v+0(%edi) // fv1v0 | fv0v0 | scale - fildl fv_v+4(%esi) // fv0v1 | fv1v0 | fv0v0 | scale - fildl fv_v+4(%edi) // fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale - fildl fv_v+8(%esi) // fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale - fildl fv_v+8(%edi) // fv1v2 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 | - // scale - fxch %st(5) // fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv1v2 | - // scale - fsubr %st(0),%st(4) // fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0-fv0v0 | - // fv1v2 | scale - fxch %st(3) // fv0v1 | fv0v2 | fv1v1 | fv0v0 | fv1v0-fv0v0 | - // fv1v2 | scale - fsubr %st(0),%st(2) // fv0v1 | fv0v2 | fv1v1-fv0v1 | fv0v0 | - // fv1v0-fv0v0 | fv1v2 | scale - fxch %st(1) // fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 | - // fv1v0-fv0v0 | fv1v2 | scale - fsubr %st(0),%st(5) // fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 | - // fv1v0-fv0v0 | fv1v2-fv0v2 | scale - fxch %st(6) // scale | fv0v1 | fv1v1-fv0v1 | fv0v0 | - // fv1v0-fv0v0 | fv1v2-fv0v2 | fv0v2 - fmul %st(0),%st(4) // scale | fv0v1 | fv1v1-fv0v1 | fv0v0 | - // (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2 - addl $12,%edi - fmul %st(0),%st(2) // scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 | - // (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2 - addl $12,%esi - addl $12,%edx - fmul %st(0),%st(5) // scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 | - // (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale | - // fv0v2 - fxch %st(3) // fv0v0 | fv0v1 | (fv1v1-fv0v1)*scale | scale | - // (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale | - // fv0v2 - faddp %st(0),%st(4) // fv0v1 | (fv1v1-fv0v1)*scale | scale | - // fv0v0+(fv1v0-fv0v0)*scale | - // (fv1v2-fv0v2)*scale | fv0v2 - faddp %st(0),%st(1) // fv0v1+(fv1v1-fv0v1)*scale | scale | - // fv0v0+(fv1v0-fv0v0)*scale | - // (fv1v2-fv0v2)*scale | fv0v2 - fxch %st(4) // fv0v2 | scale | fv0v0+(fv1v0-fv0v0)*scale | - // (fv1v2-fv0v2)*scale | fv0v1+(fv1v1-fv0v1)*scale - faddp %st(0),%st(3) // scale | fv0v0+(fv1v0-fv0v0)*scale | - // fv0v2+(fv1v2-fv0v2)*scale | - // fv0v1+(fv1v1-fv0v1)*scale - fxch %st(1) // fv0v0+(fv1v0-fv0v0)*scale | scale | - // fv0v2+(fv1v2-fv0v2)*scale | - // fv0v1+(fv1v1-fv0v1)*scale - fadds float_point5 - fxch %st(3) // fv0v1+(fv1v1-fv0v1)*scale | scale | - // fv0v2+(fv1v2-fv0v2)*scale | - // fv0v0+(fv1v0-fv0v0)*scale - fadds float_point5 - fxch %st(2) // fv0v2+(fv1v2-fv0v2)*scale | scale | - // fv0v1+(fv1v1-fv0v1)*scale | - // fv0v0+(fv1v0-fv0v0)*scale - fadds float_point5 - fxch %st(3) // fv0v0+(fv1v0-fv0v0)*scale | scale | - // fv0v1+(fv1v1-fv0v1)*scale | - // fv0v2+(fv1v2-fv0v2)*scale - fistpl fv_v+0-12(%edx) // scale | fv0v1+(fv1v1-fv0v1)*scale | - // fv0v2+(fv1v2-fv0v2)*scale - fxch %st(1) // fv0v1+(fv1v1-fv0v1)*scale | scale | - // fv0v2+(fv1v2-fv0v2)*scale | scale - fistpl fv_v+4-12(%edx) // scale | fv0v2+(fv1v2-fv0v2)*scale - fxch %st(1) // fv0v2+(fv1v2-fv0v2)*sc | scale - fistpl fv_v+8-12(%edx) // scale - - decl %eax - jnz LDo3Forward - - fstp %st(0) - - popl %edi - popl %esi - - ret - - -.globl C(R_Alias_clip_top) -C(R_Alias_clip_top): - pushl %esi - pushl %edi - - movl pfv0(%esp),%esi - movl pfv1(%esp),%edi - - movl C(r_refdef)+rd_aliasvrect+4,%eax - jmp LDoForwardOrBackward - - - -.globl C(R_Alias_clip_right) -C(R_Alias_clip_right): - pushl %esi - pushl %edi - - movl pfv0(%esp),%esi - movl pfv1(%esp),%edi - - movl C(r_refdef)+rd_aliasvrectright,%eax - -LRightLeftEntry: - - - movl fv_v+4(%esi),%edx - movl fv_v+4(%edi),%ecx - - cmpl %ecx,%edx - movl fv_v+0(%esi),%edx - - movl fv_v+0(%edi),%ecx - jl LDoForward2 - - movl fv_v+0(%esi),%ecx - movl fv_v+0(%edi),%edx - movl pfv0(%esp),%edi - movl pfv1(%esp),%esi - -LDoForward2: - - jmp LDoForward - - -.globl C(R_Alias_clip_left) -C(R_Alias_clip_left): - pushl %esi - pushl %edi - - movl pfv0(%esp),%esi - movl pfv1(%esp),%edi - - movl C(r_refdef)+rd_aliasvrect+0,%eax - jmp LRightLeftEntry - - -#endif // USE_ASM - diff --git a/src/refresh/sw/x86/draw.S b/src/refresh/sw/x86/draw.S deleted file mode 100644 index 3aeceea..0000000 --- a/src/refresh/sw/x86/draw.S +++ /dev/null @@ -1,817 +0,0 @@ -// -// r_drawa.s -// x86 assembly-language edge clipping and emission code -// - -#include "common/x86/asm.h" -#include "sw.h" - -#if USE_ASM - -// !!! if these are changed, they must be changed in r_draw.c too !!! -#define FULLY_CLIPPED_CACHED 0x80000000 -#define FRAMECOUNT_MASK 0x7FFFFFFF - - .data - -Ld0: .single 0.0 -Ld1: .single 0.0 -Lstack: .long 0 -Lfp_near_clip: .single NEAR_CLIP -Lceilv0: .long 0 -Lv: .long 0 -Lu0: .long 0 -Lv0: .long 0 -Lzi0: .long 0 - - .text - -//---------------------------------------------------------------------- -// edge clipping code -//---------------------------------------------------------------------- - -#define pv0 4+12 -#define pv1 8+12 -#define clip 12+12 - - .align 4 -.globl C(R_ClipEdge) -C(R_ClipEdge): - pushl %esi // preserve register variables - pushl %edi - pushl %ebx - movl %esp,Lstack // for clearing the stack later - -// float d0, d1, f; -// mvertex_t clipvert; - - movl clip(%esp),%ebx - movl pv0(%esp),%esi - movl pv1(%esp),%edx - -// if (clip) -// { - testl %ebx,%ebx - jz Lemit - -// do -// { - -Lcliploop: - -// d0 = DotProduct (pv0->position, clip->normal) - clip->dist; -// d1 = DotProduct (pv1->position, clip->normal) - clip->dist; - flds mv_position+0(%esi) - fmuls cp_normal+0(%ebx) - flds mv_position+4(%esi) - fmuls cp_normal+4(%ebx) - flds mv_position+8(%esi) - fmuls cp_normal+8(%ebx) - fxch %st(1) - faddp %st(0),%st(2) // d0mul2 | d0add0 - - flds mv_position+0(%edx) - fmuls cp_normal+0(%ebx) - flds mv_position+4(%edx) - fmuls cp_normal+4(%ebx) - flds mv_position+8(%edx) - fmuls cp_normal+8(%ebx) - fxch %st(1) - faddp %st(0),%st(2) // d1mul2 | d1add0 | d0mul2 | d0add0 - fxch %st(3) // d0add0 | d1add0 | d0mul2 | d1mul2 - - faddp %st(0),%st(2) // d1add0 | dot0 | d1mul2 - faddp %st(0),%st(2) // dot0 | dot1 - - fsubs cp_dist(%ebx) // d0 | dot1 - fxch %st(1) // dot1 | d0 - fsubs cp_dist(%ebx) // d1 | d0 - fxch %st(1) - fstps Ld0 - fstps Ld1 - -// if (d0 >= 0) -// { - movl Ld0,%eax - movl Ld1,%ecx - orl %eax,%ecx - js Lp2 - -// both points are unclipped - -Lcontinue: - -// -// R_ClipEdge (&clipvert, pv1, clip->next); -// return; -// } -// } while ((clip = clip->next) != NULL); - movl cp_next(%ebx),%ebx - testl %ebx,%ebx - jnz Lcliploop - -// } - -//// add the edge -// R_EmitEdge (pv0, pv1); -Lemit: - -// -// set integer rounding to ceil mode, set to single precision -// -// FIXME: do away with by manually extracting integers from floats? -// FIXME: set less often - fldcw C(ceil_cw) - -// edge_t *edge, *pcheck; -// int u_check; -// float u, u_step; -// vec3_t local, transformed; -// float *world; -// int v, v2, ceilv0; -// float scale, lzi0, u0, v0; -// int side; - -// if (r_lastvertvalid) -// { - cmpl $0,C(r_lastvertvalid) - jz LCalcFirst - -// u0 = r_u1; -// v0 = r_v1; -// lzi0 = r_lzi1; -// ceilv0 = r_ceilv1; - movl C(r_lzi1),%eax - movl C(r_u1),%ecx - movl %eax,Lzi0 - movl %ecx,Lu0 - movl C(r_v1),%ecx - movl C(r_ceilv1),%eax - movl %ecx,Lv0 - movl %eax,Lceilv0 - jmp LCalcSecond - -// } - -LCalcFirst: - -// else -// { -// world = &pv0->position[0]; - - call LTransformAndProject // v0 | lzi0 | u0 - - fsts Lv0 - fxch %st(2) // u0 | lzi0 | v0 - fstps Lu0 // lzi0 | v0 - fstps Lzi0 // v0 - -// ceilv0 = (int)(v0 - 2000) + 2000; // ceil(v0); - fistpl Lceilv0 - -// } - -LCalcSecond: - -// world = &pv1->position[0]; - movl %edx,%esi - - call LTransformAndProject // v1 | lzi1 | u1 - - flds Lu0 // u0 | v1 | lzi1 | u1 - fxch %st(3) // u1 | v1 | lzi1 | u0 - flds Lzi0 // lzi0 | u1 | v1 | lzi1 | u0 - fxch %st(3) // lzi1 | u1 | v1 | lzi0 | u0 - flds Lv0 // v0 | lzi1 | u1 | v1 | lzi0 | u0 - fxch %st(3) // v1 | lzi1 | u1 | v0 | lzi0 | u0 - -// r_ceilv1 = (int)(r_v1 - 2000) + 2000; // ceil(r_v1); - fistl C(r_ceilv1) - - fldcw C(chop_cw) // put back normal floating-point state - - fsts C(r_v1) - fxch %st(4) // lzi0 | lzi1 | u1 | v0 | v1 | u0 - -// if (r_lzi1 > lzi0) -// lzi0 = r_lzi1; - fcom %st(1) - fnstsw %ax - testb $1,%ah - jz LP0 - fstp %st(0) - fld %st(0) -LP0: - - fxch %st(1) // lzi1 | lzi0 | u1 | v0 | v1 | u0 - fstps C(r_lzi1) // lzi0 | u1 | v0 | v1 | u0 - fxch %st(1) - fsts C(r_u1) - fxch %st(1) - -// if (lzi0 > r_nearzi) // for mipmap finding -// r_nearzi = lzi0; - fcoms C(r_nearzi) - fnstsw %ax - testb $0x45,%ah - jnz LP1 - fsts C(r_nearzi) -LP1: - -// // for right edges, all we want is the effect on 1/z -// if (r_nearzionly) -// return; - movl C(r_nearzionly),%eax - testl %eax,%eax - jz LP2 -LPop5AndDone: - movl C(cacheoffset),%eax - movl C(r_framecount),%edx - cmpl $0x7FFFFFFF,%eax - jz LDoPop - andl $(FRAMECOUNT_MASK),%edx - orl $(FULLY_CLIPPED_CACHED),%edx - movl %edx,C(cacheoffset) - -LDoPop: - fstp %st(0) // u1 | v0 | v1 | u0 - fstp %st(0) // v0 | v1 | u0 - fstp %st(0) // v1 | u0 - fstp %st(0) // u0 - fstp %st(0) - jmp Ldone - -LP2: - -// // create the edge -// if (ceilv0 == r_ceilv1) -// return; // horizontal edge - movl Lceilv0,%ebx - movl C(edge_p),%edi - movl C(r_ceilv1),%ecx - movl %edi,%edx - movl C(r_pedge),%esi - addl $(et_size),%edx - cmpl %ecx,%ebx - jz LPop5AndDone - - movl C(r_pedge),%eax - movl %eax,et_owner(%edi) - -// side = ceilv0 > r_ceilv1; -// -// edge->nearzi = lzi0; - fstps et_nearzi(%edi) // u1 | v0 | v1 | u0 - -// if (side == 1) -// { - jc LSide0 - -LSide1: - -// // leading edge (go from p2 to p1) - -// u_step = ((u0 - r_u1) / (v0 - r_v1)); - fsubrp %st(0),%st(3) // v0 | v1 | u0-u1 - fsub %st(1),%st(0) // v0-v1 | v1 | u0-u1 - fdivrp %st(0),%st(2) // v1 | ustep - -// r_emitted = 1; - movl $1,C(r_emitted) - -// edge = edge_p++; - movl %edx,C(edge_p) - -// pretouch next edge - movl (%edx),%eax - -// v2 = ceilv0 - 1; -// v = r_ceilv1; - movl %ecx,%eax - leal -1(%ebx),%ecx - movl %eax,%ebx - -// edge->surfs[0] = 0; -// edge->surfs[1] = surface_p - surfaces; - movl C(surface_p),%eax - movl C(surfaces),%esi - subl %edx,%edx - subl %esi,%eax - shrl $(SURF_T_SHIFT),%eax - movl %edx,et_surfs(%edi) - movl %eax,et_surfs+2(%edi) - - subl %esi,%esi - -// u = r_u1 + ((float)v - r_v1) * u_step; - movl %ebx,Lv - fildl Lv // v | v1 | ustep - fsubp %st(0),%st(1) // v-v1 | ustep - fmul %st(1),%st(0) // (v-v1)*ustep | ustep - fadds C(r_u1) // u | ustep - - jmp LSideDone - -// } - -LSide0: - -// else -// { -// // trailing edge (go from p1 to p2) - -// u_step = ((r_u1 - u0) / (r_v1 - v0)); - fsub %st(3),%st(0) // u1-u0 | v0 | v1 | u0 - fxch %st(2) // v1 | v0 | u1-u0 | u0 - fsub %st(1),%st(0) // v1-v0 | v0 | u1-u0 | u0 - fdivrp %st(0),%st(2) // v0 | ustep | u0 - -// r_emitted = 1; - movl $1,C(r_emitted) - -// edge = edge_p++; - movl %edx,C(edge_p) - -// pretouch next edge - movl (%edx),%eax - -// v = ceilv0; -// v2 = r_ceilv1 - 1; - decl %ecx - -// edge->surfs[0] = surface_p - surfaces; -// edge->surfs[1] = 0; - movl C(surface_p),%eax - movl C(surfaces),%esi - subl %edx,%edx - subl %esi,%eax - shrl $(SURF_T_SHIFT),%eax - movl %edx,et_surfs+2(%edi) - movl %eax,et_surfs(%edi) - - movl $1,%esi - -// u = u0 + ((float)v - v0) * u_step; - movl %ebx,Lv - fildl Lv // v | v0 | ustep | u0 - fsubp %st(0),%st(1) // v-v0 | ustep | u0 - fmul %st(1),%st(0) // (v-v0)*ustep | ustep | u0 - faddp %st(0),%st(2) // ustep | u - fxch %st(1) // u | ustep - -// } - -LSideDone: - -// edge->u_step = u_step*0x100000; -// edge->u = u*0x100000 + 0xFFFFF; - - fmuls fp_1m // u*0x100000 | ustep - fxch %st(1) // ustep | u*0x100000 - fmuls fp_1m // ustep*0x100000 | u*0x100000 - fxch %st(1) // u*0x100000 | ustep*0x100000 - fadds fp_1m_minus_1 // u*0x100000 + 0xFFFFF | ustep*0x100000 - fxch %st(1) // ustep*0x100000 | u*0x100000 + 0xFFFFF - fistpl et_u_step(%edi) // u*0x100000 + 0xFFFFF - fistpl et_u(%edi) - -// // we need to do this to avoid stepping off the edges if a very nearly -// // horizontal edge is less than epsilon above a scan, and numeric error -// // causes it to incorrectly extend to the scan, and the extension of the -// // line goes off the edge of the screen -// // FIXME: is this actually needed? -// if (edge->u < r_refdef.vrect_x_adj_shift20) -// edge->u = r_refdef.vrect_x_adj_shift20; -// if (edge->u > r_refdef.vrectright_adj_shift20) -// edge->u = r_refdef.vrectright_adj_shift20; - movl et_u(%edi),%eax - movl C(r_refdef)+rd_vrect_x_adj_shift20,%edx - cmpl %edx,%eax - jl LP4 - movl C(r_refdef)+rd_vrectright_adj_shift20,%edx - cmpl %edx,%eax - jng LP5 -LP4: - movl %edx,et_u(%edi) - movl %edx,%eax -LP5: - -// // sort the edge in normally -// u_check = edge->u; -// -// if (edge->surfs[0]) -// u_check++; // sort trailers after leaders - addl %esi,%eax - -// if (!newedges[v] || newedges[v]->u >= u_check) -// { - movl C(newedges)(,%ebx,4),%esi - testl %esi,%esi - jz LDoFirst - cmpl %eax,et_u(%esi) - jl LNotFirst -LDoFirst: - -// edge->next = newedges[v]; -// newedges[v] = edge; - movl %esi,et_next(%edi) - movl %edi,C(newedges)(,%ebx,4) - - jmp LSetRemove - -// } - -LNotFirst: - -// else -// { -// pcheck = newedges[v]; -// -// while (pcheck->next && pcheck->next->u < u_check) -// pcheck = pcheck->next; -LFindInsertLoop: - movl %esi,%edx - movl et_next(%esi),%esi - testl %esi,%esi - jz LInsertFound - cmpl %eax,et_u(%esi) - jl LFindInsertLoop - -LInsertFound: - -// edge->next = pcheck->next; -// pcheck->next = edge; - movl %esi,et_next(%edi) - movl %edi,et_next(%edx) - -// } - -LSetRemove: - -// edge->nextremove = removeedges[v2]; -// removeedges[v2] = edge; - movl C(removeedges)(,%ecx,4),%eax - movl %edi,C(removeedges)(,%ecx,4) - movl %eax,et_nextremove(%edi) - -Ldone: - movl Lstack,%esp // clear temporary variables from stack - - popl %ebx // restore register variables - popl %edi - popl %esi - ret - -// at least one point is clipped - -Lp2: - testl %eax,%eax - jns Lp1 - -// else -// { -// // point 0 is clipped - -// if (d1 < 0) -// { - movl Ld1,%eax - testl %eax,%eax - jns Lp3 - -// // both points are clipped -// // we do cache fully clipped edges -// if (!leftclipped) - movl C(r_leftclipped),%eax - movl C(r_pedge),%ecx - testl %eax,%eax - jnz Ldone - -// r_pedge->framecount = r_framecount; - movl C(r_framecount),%eax - andl $(FRAMECOUNT_MASK),%eax - orl $(FULLY_CLIPPED_CACHED),%eax - movl %eax,C(cacheoffset) - -// return; - jmp Ldone - -// } - -Lp1: - -// // point 0 is unclipped -// if (d1 >= 0) -// { -// // both points are unclipped -// continue; - -// // only point 1 is clipped - -// f = d0 / (d0 - d1); - flds Ld0 - flds Ld1 - fsubr %st(1),%st(0) - -// // we don't cache partially clipped edges - movl $0x7FFFFFFF,C(cacheoffset) - - fdivrp %st(0),%st(1) - - subl $(mv_size),%esp // allocate space for clipvert - -// clipvert.position[0] = pv0->position[0] + -// f * (pv1->position[0] - pv0->position[0]); -// clipvert.position[1] = pv0->position[1] + -// f * (pv1->position[1] - pv0->position[1]); -// clipvert.position[2] = pv0->position[2] + -// f * (pv1->position[2] - pv0->position[2]); - flds mv_position+8(%edx) - fsubs mv_position+8(%esi) - flds mv_position+4(%edx) - fsubs mv_position+4(%esi) - flds mv_position+0(%edx) - fsubs mv_position+0(%esi) // 0 | 1 | 2 - -// replace pv1 with the clip point - movl %esp,%edx - movl cp_leftedge(%ebx),%eax - testb %al,%al - - fmul %st(3),%st(0) - fxch %st(1) // 1 | 0 | 2 - fmul %st(3),%st(0) - fxch %st(2) // 2 | 0 | 1 - fmulp %st(0),%st(3) // 0 | 1 | 2 - fadds mv_position+0(%esi) - fxch %st(1) // 1 | 0 | 2 - fadds mv_position+4(%esi) - fxch %st(2) // 2 | 0 | 1 - fadds mv_position+8(%esi) - fxch %st(1) // 0 | 2 | 1 - fstps mv_position+0(%esp) // 2 | 1 - fstps mv_position+8(%esp) // 1 - fstps mv_position+4(%esp) - -// if (clip->leftedge) -// { - jz Ltestright - -// r_leftclipped = true; -// r_leftexit = clipvert; - movl $1,C(r_leftclipped) - movl mv_position+0(%esp),%eax - movl %eax,C(r_leftexit)+mv_position+0 - movl mv_position+4(%esp),%eax - movl %eax,C(r_leftexit)+mv_position+4 - movl mv_position+8(%esp),%eax - movl %eax,C(r_leftexit)+mv_position+8 - - jmp Lcontinue - -// } - -Ltestright: -// else if (clip->rightedge) -// { - testb %ah,%ah - jz Lcontinue - -// r_rightclipped = true; -// r_rightexit = clipvert; - movl $1,C(r_rightclipped) - movl mv_position+0(%esp),%eax - movl %eax,C(r_rightexit)+mv_position+0 - movl mv_position+4(%esp),%eax - movl %eax,C(r_rightexit)+mv_position+4 - movl mv_position+8(%esp),%eax - movl %eax,C(r_rightexit)+mv_position+8 - -// } -// -// R_ClipEdge (pv0, &clipvert, clip->next); -// return; -// } - jmp Lcontinue - -// } - -Lp3: - -// // only point 0 is clipped -// r_lastvertvalid = false; - - movl $0,C(r_lastvertvalid) - -// f = d0 / (d0 - d1); - flds Ld0 - flds Ld1 - fsubr %st(1),%st(0) - -// // we don't cache partially clipped edges - movl $0x7FFFFFFF,C(cacheoffset) - - fdivrp %st(0),%st(1) - - subl $(mv_size),%esp // allocate space for clipvert - -// clipvert.position[0] = pv0->position[0] + -// f * (pv1->position[0] - pv0->position[0]); -// clipvert.position[1] = pv0->position[1] + -// f * (pv1->position[1] - pv0->position[1]); -// clipvert.position[2] = pv0->position[2] + -// f * (pv1->position[2] - pv0->position[2]); - flds mv_position+8(%edx) - fsubs mv_position+8(%esi) - flds mv_position+4(%edx) - fsubs mv_position+4(%esi) - flds mv_position+0(%edx) - fsubs mv_position+0(%esi) // 0 | 1 | 2 - - movl cp_leftedge(%ebx),%eax - testb %al,%al - - fmul %st(3),%st(0) - fxch %st(1) // 1 | 0 | 2 - fmul %st(3),%st(0) - fxch %st(2) // 2 | 0 | 1 - fmulp %st(0),%st(3) // 0 | 1 | 2 - fadds mv_position+0(%esi) - fxch %st(1) // 1 | 0 | 2 - fadds mv_position+4(%esi) - fxch %st(2) // 2 | 0 | 1 - fadds mv_position+8(%esi) - fxch %st(1) // 0 | 2 | 1 - fstps mv_position+0(%esp) // 2 | 1 - fstps mv_position+8(%esp) // 1 - fstps mv_position+4(%esp) - -// replace pv0 with the clip point - movl %esp,%esi - -// if (clip->leftedge) -// { - jz Ltestright2 - -// r_leftclipped = true; -// r_leftenter = clipvert; - movl $1,C(r_leftclipped) - movl mv_position+0(%esp),%eax - movl %eax,C(r_leftenter)+mv_position+0 - movl mv_position+4(%esp),%eax - movl %eax,C(r_leftenter)+mv_position+4 - movl mv_position+8(%esp),%eax - movl %eax,C(r_leftenter)+mv_position+8 - - jmp Lcontinue - -// } - -Ltestright2: -// else if (clip->rightedge) -// { - testb %ah,%ah - jz Lcontinue - -// r_rightclipped = true; -// r_rightenter = clipvert; - movl $1,C(r_rightclipped) - movl mv_position+0(%esp),%eax - movl %eax,C(r_rightenter)+mv_position+0 - movl mv_position+4(%esp),%eax - movl %eax,C(r_rightenter)+mv_position+4 - movl mv_position+8(%esp),%eax - movl %eax,C(r_rightenter)+mv_position+8 - -// } - jmp Lcontinue - -// %esi = vec3_t point to transform and project -// %edx preserved -LTransformAndProject: - -// // transform and project -// VectorSubtract (world, modelorg, local); - flds mv_position+0(%esi) - fsubs C(modelorg)+0 - flds mv_position+4(%esi) - fsubs C(modelorg)+4 - flds mv_position+8(%esi) - fsubs C(modelorg)+8 - fxch %st(2) // local[0] | local[1] | local[2] - -// TransformVector (local, transformed); -// -// if (transformed[2] < NEAR_CLIP) -// transformed[2] = NEAR_CLIP; -// -// lzi0 = 1.0 / transformed[2]; - fld %st(0) // local[0] | local[0] | local[1] | local[2] - fmuls C(vpn)+0 // zm0 | local[0] | local[1] | local[2] - fld %st(1) // local[0] | zm0 | local[0] | local[1] | - // local[2] - fmuls C(vright)+0 // xm0 | zm0 | local[0] | local[1] | local[2] - fxch %st(2) // local[0] | zm0 | xm0 | local[1] | local[2] - fmuls C(vup)+0 // ym0 | zm0 | xm0 | local[1] | local[2] - fld %st(3) // local[1] | ym0 | zm0 | xm0 | local[1] | - // local[2] - fmuls C(vpn)+4 // zm1 | ym0 | zm0 | xm0 | local[1] | - // local[2] - fld %st(4) // local[1] | zm1 | ym0 | zm0 | xm0 | - // local[1] | local[2] - fmuls C(vright)+4 // xm1 | zm1 | ym0 | zm0 | xm0 | - // local[1] | local[2] - fxch %st(5) // local[1] | zm1 | ym0 | zm0 | xm0 | - // xm1 | local[2] - fmuls C(vup)+4 // ym1 | zm1 | ym0 | zm0 | xm0 | - // xm1 | local[2] - fxch %st(1) // zm1 | ym1 | ym0 | zm0 | xm0 | - // xm1 | local[2] - faddp %st(0),%st(3) // ym1 | ym0 | zm2 | xm0 | xm1 | local[2] - fxch %st(3) // xm0 | ym0 | zm2 | ym1 | xm1 | local[2] - faddp %st(0),%st(4) // ym0 | zm2 | ym1 | xm2 | local[2] - faddp %st(0),%st(2) // zm2 | ym2 | xm2 | local[2] - fld %st(3) // local[2] | zm2 | ym2 | xm2 | local[2] - fmuls C(vpn)+8 // zm3 | zm2 | ym2 | xm2 | local[2] - fld %st(4) // local[2] | zm3 | zm2 | ym2 | xm2 | local[2] - fmuls C(vright)+8 // xm3 | zm3 | zm2 | ym2 | xm2 | local[2] - fxch %st(5) // local[2] | zm3 | zm2 | ym2 | xm2 | xm3 - fmuls C(vup)+8 // ym3 | zm3 | zm2 | ym2 | xm2 | xm3 - fxch %st(1) // zm3 | ym3 | zm2 | ym2 | xm2 | xm3 - faddp %st(0),%st(2) // ym3 | zm4 | ym2 | xm2 | xm3 - fxch %st(4) // xm3 | zm4 | ym2 | xm2 | ym3 - faddp %st(0),%st(3) // zm4 | ym2 | xm4 | ym3 - fxch %st(1) // ym2 | zm4 | xm4 | ym3 - faddp %st(0),%st(3) // zm4 | xm4 | ym4 - - fcoms Lfp_near_clip - fnstsw %ax - testb $1,%ah - jz LNoClip - fstp %st(0) - flds Lfp_near_clip - -LNoClip: - - fdivrs float_1 // lzi0 | x | y - fxch %st(1) // x | lzi0 | y - -// // FIXME: build x/yscale into transform? -// scale = xscale * lzi0; -// u0 = (xcenter + scale*transformed[0]); - flds C(xscale) // xscale | x | lzi0 | y - fmul %st(2),%st(0) // scale | x | lzi0 | y - fmulp %st(0),%st(1) // scale*x | lzi0 | y - fadds C(xcenter) // u0 | lzi0 | y - -// if (u0 < r_refdef.fvrectx_adj) -// u0 = r_refdef.fvrectx_adj; -// if (u0 > r_refdef.fvrectright_adj) -// u0 = r_refdef.fvrectright_adj; -// FIXME: use integer compares of floats? - fcoms C(r_refdef)+rd_fvrectx_adj - fnstsw %ax - testb $1,%ah - jz LClampP0 - fstp %st(0) - flds C(r_refdef)+rd_fvrectx_adj -LClampP0: - fcoms C(r_refdef)+rd_fvrectright_adj - fnstsw %ax - testb $0x45,%ah - jnz LClampP1 - fstp %st(0) - flds C(r_refdef)+rd_fvrectright_adj -LClampP1: - - fld %st(1) // lzi0 | u0 | lzi0 | y - -// scale = yscale * lzi0; -// v0 = (ycenter - scale*transformed[1]); - fmuls C(yscale) // scale | u0 | lzi0 | y - fmulp %st(0),%st(3) // u0 | lzi0 | scale*y - fxch %st(2) // scale*y | lzi0 | u0 - fsubrs C(ycenter) // v0 | lzi0 | u0 - -// if (v0 < r_refdef.fvrecty_adj) -// v0 = r_refdef.fvrecty_adj; -// if (v0 > r_refdef.fvrectbottom_adj) -// v0 = r_refdef.fvrectbottom_adj; -// FIXME: use integer compares of floats? - fcoms C(r_refdef)+rd_fvrecty_adj - fnstsw %ax - testb $1,%ah - jz LClampP2 - fstp %st(0) - flds C(r_refdef)+rd_fvrecty_adj -LClampP2: - fcoms C(r_refdef)+rd_fvrectbottom_adj - fnstsw %ax - testb $0x45,%ah - jnz LClampP3 - fstp %st(0) - flds C(r_refdef)+rd_fvrectbottom_adj -LClampP3: - ret - -#endif // USE_ASM - diff --git a/src/refresh/sw/x86/edge.S b/src/refresh/sw/x86/edge.S deleted file mode 100644 index 03449cd..0000000 --- a/src/refresh/sw/x86/edge.S +++ /dev/null @@ -1,730 +0,0 @@ -// -// r_edgea.s -// x86 assembly-language edge-processing code. -// - -#include "common/x86/asm.h" -#include "sw.h" - -#if USE_ASM - - .data -Ltemp: .long 0 -float_1_div_0100000h: .long 0x35800000 // 1.0/(float)0x100000 -float_point_999: .single 0.999 -float_1_point_001: .single 1.001 - - .text - -//-------------------------------------------------------------------- - -#define edgestoadd 4+8 // note odd stack offsets because of interleaving -#define edgelist 8+12 // with pushes - -.globl C(R_EdgeCodeStart) -C(R_EdgeCodeStart): - -.globl C(R_InsertNewEdges) -C(R_InsertNewEdges): - pushl %edi - pushl %esi // preserve register variables - movl edgestoadd(%esp),%edx - pushl %ebx - movl edgelist(%esp),%ecx - -LDoNextEdge: - movl et_u(%edx),%eax - movl %edx,%edi - -LContinueSearch: - movl et_u(%ecx),%ebx - movl et_next(%ecx),%esi - cmpl %ebx,%eax - jle LAddedge - movl et_u(%esi),%ebx - movl et_next(%esi),%ecx - cmpl %ebx,%eax - jle LAddedge2 - movl et_u(%ecx),%ebx - movl et_next(%ecx),%esi - cmpl %ebx,%eax - jle LAddedge - movl et_u(%esi),%ebx - movl et_next(%esi),%ecx - cmpl %ebx,%eax - jg LContinueSearch - -LAddedge2: - movl et_next(%edx),%edx - movl et_prev(%esi),%ebx - movl %esi,et_next(%edi) - movl %ebx,et_prev(%edi) - movl %edi,et_next(%ebx) - movl %edi,et_prev(%esi) - movl %esi,%ecx - - cmpl $0,%edx - jnz LDoNextEdge - jmp LDone - - .align 4 -LAddedge: - movl et_next(%edx),%edx - movl et_prev(%ecx),%ebx - movl %ecx,et_next(%edi) - movl %ebx,et_prev(%edi) - movl %edi,et_next(%ebx) - movl %edi,et_prev(%ecx) - - cmpl $0,%edx - jnz LDoNextEdge - -LDone: - popl %ebx // restore register variables - popl %esi - popl %edi - - ret - -//-------------------------------------------------------------------- - -#define predge 4+4 - -.globl C(R_RemoveEdges) -C(R_RemoveEdges): - pushl %ebx - movl predge(%esp),%eax - -Lre_loop: - movl et_next(%eax),%ecx - movl et_nextremove(%eax),%ebx - movl et_prev(%eax),%edx - testl %ebx,%ebx - movl %edx,et_prev(%ecx) - jz Lre_done - movl %ecx,et_next(%edx) - - movl et_next(%ebx),%ecx - movl et_prev(%ebx),%edx - movl et_nextremove(%ebx),%eax - movl %edx,et_prev(%ecx) - testl %eax,%eax - movl %ecx,et_next(%edx) - jnz Lre_loop - - popl %ebx - ret - -Lre_done: - movl %ecx,et_next(%edx) - popl %ebx - - ret - -//-------------------------------------------------------------------- - -#define pedgelist 4+4 // note odd stack offset because of interleaving - // with pushes - -.globl C(R_StepActiveU) -C(R_StepActiveU): - pushl %edi - movl pedgelist(%esp),%edx - pushl %esi // preserve register variables - pushl %ebx - - movl et_prev(%edx),%esi - -LNewEdge: - movl et_u(%esi),%edi - -LNextEdge: - movl et_u(%edx),%eax - movl et_u_step(%edx),%ebx - addl %ebx,%eax - movl et_next(%edx),%esi - movl %eax,et_u(%edx) - cmpl %edi,%eax - jl LPushBack - - movl et_u(%esi),%edi - movl et_u_step(%esi),%ebx - addl %ebx,%edi - movl et_next(%esi),%edx - movl %edi,et_u(%esi) - cmpl %eax,%edi - jl LPushBack2 - - movl et_u(%edx),%eax - movl et_u_step(%edx),%ebx - addl %ebx,%eax - movl et_next(%edx),%esi - movl %eax,et_u(%edx) - cmpl %edi,%eax - jl LPushBack - - movl et_u(%esi),%edi - movl et_u_step(%esi),%ebx - addl %ebx,%edi - movl et_next(%esi),%edx - movl %edi,et_u(%esi) - cmpl %eax,%edi - jnl LNextEdge - -LPushBack2: - movl %edx,%ebx - movl %edi,%eax - movl %esi,%edx - movl %ebx,%esi - -LPushBack: -// push it back to keep it sorted - movl et_prev(%edx),%ecx - movl et_next(%edx),%ebx - -// done if the -1 in edge_aftertail triggered this - cmpl $(C(edge_aftertail)),%edx - jz LUDone - -// pull the edge out of the edge list - movl et_prev(%ecx),%edi - movl %ecx,et_prev(%esi) - movl %ebx,et_next(%ecx) - -// find out where the edge goes in the edge list -LPushBackLoop: - movl et_prev(%edi),%ecx - movl et_u(%edi),%ebx - cmpl %ebx,%eax - jnl LPushBackFound - - movl et_prev(%ecx),%edi - movl et_u(%ecx),%ebx - cmpl %ebx,%eax - jl LPushBackLoop - - movl %ecx,%edi - -// put the edge back into the edge list -LPushBackFound: - movl et_next(%edi),%ebx - movl %edi,et_prev(%edx) - movl %ebx,et_next(%edx) - movl %edx,et_next(%edi) - movl %edx,et_prev(%ebx) - - movl %esi,%edx - movl et_prev(%esi),%esi - - cmpl $(C(edge_tail)),%edx - jnz LNewEdge - -LUDone: - popl %ebx // restore register variables - popl %esi - popl %edi - - ret - -//-------------------------------------------------------------------- - -#define surf 4 // note this is loaded before any pushes - - .align 4 -TrailingEdge: - movl st_spanstate(%esi),%eax // check for edge inversion - decl %eax - jnz LInverted - - movl %eax,st_spanstate(%esi) - movl st_insubmodel(%esi),%ecx - movl 0x12345678,%edx // surfaces[1].st_next -LPatch0: - movl C(r_bmodelactive),%eax - subl %ecx,%eax - cmpl %esi,%edx - movl %eax,C(r_bmodelactive) - jnz LNoEmit // surface isn't on top, just remove - -// emit a span (current top going away) - movl et_u(%ebx),%eax - shrl $20,%eax // iu = integral pixel u - movl st_last_u(%esi),%edx - movl st_next(%esi),%ecx - cmpl %edx,%eax - jle LNoEmit2 // iu <= surf->last_u, so nothing to emit - - movl %eax,st_last_u(%ecx) // surf->next->last_u = iu; - subl %edx,%eax - movl %edx,espan_t_u(%ebp) // span->u = surf->last_u; - - movl %eax,espan_t_count(%ebp) // span->count = iu - span->u; - movl C(current_iv),%eax - movl %eax,espan_t_v(%ebp) // span->v = current_iv; - movl st_spans(%esi),%eax - movl %eax,espan_t_pnext(%ebp) // span->pnext = surf->spans; - movl %ebp,st_spans(%esi) // surf->spans = span; - addl $(espan_t_size),%ebp - - movl st_next(%esi),%edx // remove the surface from the surface - movl st_prev(%esi),%esi // stack - - movl %edx,st_next(%esi) - movl %esi,st_prev(%edx) - ret - -LNoEmit2: - movl %eax,st_last_u(%ecx) // surf->next->last_u = iu; - movl st_next(%esi),%edx // remove the surface from the surface - movl st_prev(%esi),%esi // stack - - movl %edx,st_next(%esi) - movl %esi,st_prev(%edx) - ret - -LNoEmit: - movl st_next(%esi),%edx // remove the surface from the surface - movl st_prev(%esi),%esi // stack - - movl %edx,st_next(%esi) - movl %esi,st_prev(%edx) - ret - -LInverted: - movl %eax,st_spanstate(%esi) - ret - -//-------------------------------------------------------------------- - -// trailing edge only -Lgs_trailing: - pushl $Lgs_nextedge - jmp TrailingEdge - - -.globl C(R_GenerateSpans) -C(R_GenerateSpans): - pushl %ebp // preserve caller's stack frame - pushl %edi - pushl %esi // preserve register variables - pushl %ebx - -// clear active surfaces to just the background surface - movl C(surfaces),%eax - movl C(edge_head_u_shift20),%edx - addl $(st_size),%eax -// %ebp = span_p throughout - movl C(span_p),%ebp - - movl $0,C(r_bmodelactive) - - movl %eax,st_next(%eax) - movl %eax,st_prev(%eax) - movl %edx,st_last_u(%eax) - movl C(edge_head)+et_next,%ebx // edge=edge_head.next - -// generate spans - cmpl $(C(edge_tail)),%ebx // done if empty list - jz Lgs_lastspan - -Lgs_edgeloop: - - movl et_surfs(%ebx),%edi - movl C(surfaces),%eax - movl %edi,%esi - andl $0xFFFF0000,%edi - andl $0xFFFF,%esi - jz Lgs_leading // not a trailing edge - -// it has a left surface, so a surface is going away for this span - shll $(SURF_T_SHIFT),%esi - addl %eax,%esi - testl %edi,%edi - jz Lgs_trailing - -// both leading and trailing - call TrailingEdge - movl C(surfaces),%eax - -// --------------------------------------------------------------- -// handle a leading edge -// --------------------------------------------------------------- - -Lgs_leading: - shrl $16-SURF_T_SHIFT,%edi - movl C(surfaces),%eax - addl %eax,%edi - movl 0x12345678,%esi // surf2 = surfaces[1].next; -LPatch2: - movl st_spanstate(%edi),%edx - movl st_insubmodel(%edi),%eax - testl %eax,%eax - jnz Lbmodel_leading - -// handle a leading non-bmodel edge - -// don't start a span if this is an inverted span, with the end edge preceding -// the start edge (that is, we've already seen the end edge) - testl %edx,%edx - jnz Lxl_done - - -// if (surf->key < surf2->key) -// goto newtop; - incl %edx - movl st_key(%edi),%eax - movl %edx,st_spanstate(%edi) - movl st_key(%esi),%ecx - cmpl %ecx,%eax - jl Lnewtop - -// main sorting loop to search through surface stack until insertion point -// found. Always terminates because background surface is sentinel -// do -// { -// surf2 = surf2->next; -// } while (surf->key >= surf2->key); -Lsortloopnb: - movl st_next(%esi),%esi - movl st_key(%esi),%ecx - cmpl %ecx,%eax - jge Lsortloopnb - - jmp LInsertAndExit - - -// handle a leading bmodel edge - .align 4 -Lbmodel_leading: - -// don't start a span if this is an inverted span, with the end edge preceding -// the start edge (that is, we've already seen the end edge) - testl %edx,%edx - jnz Lxl_done - - movl C(r_bmodelactive),%ecx - incl %edx - incl %ecx - movl %edx,st_spanstate(%edi) - movl %ecx,C(r_bmodelactive) - -// if (surf->key < surf2->key) -// goto newtop; - movl st_key(%edi),%eax - movl st_key(%esi),%ecx - cmpl %ecx,%eax - jl Lnewtop - -// if ((surf->key == surf2->key) && surf->insubmodel) -// { - jz Lzcheck_for_newtop - -// main sorting loop to search through surface stack until insertion point -// found. Always terminates because background surface is sentinel -// do -// { -// surf2 = surf2->next; -// } while (surf->key > surf2->key); -Lsortloop: - movl st_next(%esi),%esi - movl st_key(%esi),%ecx - cmpl %ecx,%eax - jg Lsortloop - - jne LInsertAndExit - -// Do 1/z sorting to see if we've arrived in the right position - movl et_u(%ebx),%eax - subl $0xFFFFF,%eax - movl %eax,Ltemp - fildl Ltemp - - fmuls float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) * - // (1.0 / 0x100000); - - fld %st(0) // fu | fu - fmuls st_d_zistepu(%edi) // fu*surf->d_zistepu | fu - flds C(fv) // fv | fu*surf->d_zistepu | fu - fmuls st_d_zistepv(%edi) // fv*surf->d_zistepv | fu*surf->d_zistepu | fu - fxch %st(1) // fu*surf->d_zistepu | fv*surf->d_zistepv | fu - fadds st_d_ziorigin(%edi) // fu*surf->d_zistepu + surf->d_ziorigin | - // fv*surf->d_zistepv | fu - - flds st_d_zistepu(%esi) // surf2->d_zistepu | - // fu*surf->d_zistepu + surf->d_ziorigin | - // fv*surf->d_zistepv | fu - fmul %st(3),%st(0) // fu*surf2->d_zistepu | - // fu*surf->d_zistepu + surf->d_ziorigin | - // fv*surf->d_zistepv | fu - fxch %st(1) // fu*surf->d_zistepu + surf->d_ziorigin | - // fu*surf2->d_zistepu | - // fv*surf->d_zistepv | fu - faddp %st(0),%st(2) // fu*surf2->d_zistepu | newzi | fu - - flds C(fv) // fv | fu*surf2->d_zistepu | newzi | fu - fmuls st_d_zistepv(%esi) // fv*surf2->d_zistepv | - // fu*surf2->d_zistepu | newzi | fu - fld %st(2) // newzi | fv*surf2->d_zistepv | - // fu*surf2->d_zistepu | newzi | fu - fmuls float_point_999 // newzibottom | fv*surf2->d_zistepv | - // fu*surf2->d_zistepu | newzi | fu - - fxch %st(2) // fu*surf2->d_zistepu | fv*surf2->d_zistepv | - // newzibottom | newzi | fu - fadds st_d_ziorigin(%esi) // fu*surf2->d_zistepu + surf2->d_ziorigin | - // fv*surf2->d_zistepv | newzibottom | newzi | - // fu - faddp %st(0),%st(1) // testzi | newzibottom | newzi | fu - fxch %st(1) // newzibottom | testzi | newzi | fu - -// if (newzibottom >= testzi) -// goto Lgotposition; - - fcomp %st(1) // testzi | newzi | fu - - fxch %st(1) // newzi | testzi | fu - fmuls float_1_point_001 // newzitop | testzi | fu - fxch %st(1) // testzi | newzitop | fu - - fnstsw %ax - testb $0x01,%ah - jz Lgotposition_fpop3 - -// if (newzitop >= testzi) -// { - - fcomp %st(1) // newzitop | fu - fnstsw %ax - testb $0x45,%ah - jz Lsortloop_fpop2 - -// if (surf->d_zistepu >= surf2->d_zistepu) -// goto newtop; - - flds st_d_zistepu(%edi) // surf->d_zistepu | newzitop| fu - fcomps st_d_zistepu(%esi) // newzitop | fu - fnstsw %ax - testb $0x01,%ah - jz Lgotposition_fpop2 - - fstp %st(0) // clear the FPstack - fstp %st(0) - movl st_key(%edi),%eax - jmp Lsortloop - - -Lgotposition_fpop3: - fstp %st(0) -Lgotposition_fpop2: - fstp %st(0) - fstp %st(0) - jmp LInsertAndExit - - -// emit a span (obscures current top) - -Lnewtop_fpop3: - fstp %st(0) -Lnewtop_fpop2: - fstp %st(0) - fstp %st(0) - movl st_key(%edi),%eax // reload the sorting key - -Lnewtop: - movl et_u(%ebx),%eax - movl st_last_u(%esi),%edx - shrl $20,%eax // iu = integral pixel u - movl %eax,st_last_u(%edi) // surf->last_u = iu; - cmpl %edx,%eax - jle LInsertAndExit // iu <= surf->last_u, so nothing to emit - - subl %edx,%eax - movl %edx,espan_t_u(%ebp) // span->u = surf->last_u; - - movl %eax,espan_t_count(%ebp) // span->count = iu - span->u; - movl C(current_iv),%eax - movl %eax,espan_t_v(%ebp) // span->v = current_iv; - movl st_spans(%esi),%eax - movl %eax,espan_t_pnext(%ebp) // span->pnext = surf->spans; - movl %ebp,st_spans(%esi) // surf->spans = span; - addl $(espan_t_size),%ebp - -LInsertAndExit: -// insert before surf2 - movl %esi,st_next(%edi) // surf->next = surf2; - movl st_prev(%esi),%eax - movl %eax,st_prev(%edi) // surf->prev = surf2->prev; - movl %edi,st_prev(%esi) // surf2->prev = surf; - movl %edi,st_next(%eax) // surf2->prev->next = surf; - -// --------------------------------------------------------------- -// leading edge done -// --------------------------------------------------------------- - -// --------------------------------------------------------------- -// see if there are any more edges -// --------------------------------------------------------------- - -Lgs_nextedge: - movl et_next(%ebx),%ebx - cmpl $(C(edge_tail)),%ebx - jnz Lgs_edgeloop - -// clean up at the right edge -Lgs_lastspan: - -// now that we've reached the right edge of the screen, we're done with any -// unfinished surfaces, so emit a span for whatever's on top - movl 0x12345678,%esi // surfaces[1].st_next -LPatch3: - movl C(edge_tail_u_shift20),%eax - xorl %ecx,%ecx - movl st_last_u(%esi),%edx - subl %edx,%eax - jle Lgs_resetspanstate - - movl %edx,espan_t_u(%ebp) - movl %eax,espan_t_count(%ebp) - movl C(current_iv),%eax - movl %eax,espan_t_v(%ebp) - movl st_spans(%esi),%eax - movl %eax,espan_t_pnext(%ebp) - movl %ebp,st_spans(%esi) - addl $(espan_t_size),%ebp - -// reset spanstate for all surfaces in the surface stack -Lgs_resetspanstate: - movl %ecx,st_spanstate(%esi) - movl st_next(%esi),%esi - cmpl $0x12345678,%esi // &surfaces[1] -LPatch4: - jnz Lgs_resetspanstate - -// store the final span_p - movl %ebp,C(span_p) - - popl %ebx // restore register variables - popl %esi - popl %edi - popl %ebp // restore the caller's stack frame - ret - - -// --------------------------------------------------------------- -// 1/z sorting for bmodels in the same leaf -// --------------------------------------------------------------- - .align 4 -Lxl_done: - incl %edx - movl %edx,st_spanstate(%edi) - - jmp Lgs_nextedge - - - .align 4 -Lzcheck_for_newtop: - movl et_u(%ebx),%eax - subl $0xFFFFF,%eax - movl %eax,Ltemp - fildl Ltemp - - fmuls float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) * - // (1.0 / 0x100000); - - fld %st(0) // fu | fu - fmuls st_d_zistepu(%edi) // fu*surf->d_zistepu | fu - flds C(fv) // fv | fu*surf->d_zistepu | fu - fmuls st_d_zistepv(%edi) // fv*surf->d_zistepv | fu*surf->d_zistepu | fu - fxch %st(1) // fu*surf->d_zistepu | fv*surf->d_zistepv | fu - fadds st_d_ziorigin(%edi) // fu*surf->d_zistepu + surf->d_ziorigin | - // fv*surf->d_zistepv | fu - - flds st_d_zistepu(%esi) // surf2->d_zistepu | - // fu*surf->d_zistepu + surf->d_ziorigin | - // fv*surf->d_zistepv | fu - fmul %st(3),%st(0) // fu*surf2->d_zistepu | - // fu*surf->d_zistepu + surf->d_ziorigin | - // fv*surf->d_zistepv | fu - fxch %st(1) // fu*surf->d_zistepu + surf->d_ziorigin | - // fu*surf2->d_zistepu | - // fv*surf->d_zistepv | fu - faddp %st(0),%st(2) // fu*surf2->d_zistepu | newzi | fu - - flds C(fv) // fv | fu*surf2->d_zistepu | newzi | fu - fmuls st_d_zistepv(%esi) // fv*surf2->d_zistepv | - // fu*surf2->d_zistepu | newzi | fu - fld %st(2) // newzi | fv*surf2->d_zistepv | - // fu*surf2->d_zistepu | newzi | fu - fmuls float_point_999 // newzibottom | fv*surf2->d_zistepv | - // fu*surf2->d_zistepu | newzi | fu - - fxch %st(2) // fu*surf2->d_zistepu | fv*surf2->d_zistepv | - // newzibottom | newzi | fu - fadds st_d_ziorigin(%esi) // fu*surf2->d_zistepu + surf2->d_ziorigin | - // fv*surf2->d_zistepv | newzibottom | newzi | - // fu - faddp %st(0),%st(1) // testzi | newzibottom | newzi | fu - fxch %st(1) // newzibottom | testzi | newzi | fu - -// if (newzibottom >= testzi) -// goto newtop; - - fcomp %st(1) // testzi | newzi | fu - - fxch %st(1) // newzi | testzi | fu - fmuls float_1_point_001 // newzitop | testzi | fu - fxch %st(1) // testzi | newzitop | fu - - fnstsw %ax - testb $0x01,%ah - jz Lnewtop_fpop3 - -// if (newzitop >= testzi) -// { - - fcomp %st(1) // newzitop | fu - fnstsw %ax - testb $0x45,%ah - jz Lsortloop_fpop2 - -// if (surf->d_zistepu >= surf2->d_zistepu) -// goto newtop; - - flds st_d_zistepu(%edi) // surf->d_zistepu | newzitop | fu - fcomps st_d_zistepu(%esi) // newzitop | fu - fnstsw %ax - testb $0x01,%ah - jz Lnewtop_fpop2 - -Lsortloop_fpop2: - fstp %st(0) // clear the FP stack - fstp %st(0) - movl st_key(%edi),%eax - jmp Lsortloop - - -.globl C(R_EdgeCodeEnd) -C(R_EdgeCodeEnd): - - -//---------------------------------------------------------------------- -// Surface array address code patching routine -//---------------------------------------------------------------------- - - .align 4 -.globl C(R_SurfacePatch) -C(R_SurfacePatch): - - movl C(surfaces),%eax - addl $(st_size),%eax - movl %eax,LPatch4-4 - - addl $(st_next),%eax - movl %eax,LPatch0-4 - movl %eax,LPatch2-4 - movl %eax,LPatch3-4 - - ret - -#endif // USE_ASM - diff --git a/src/refresh/sw/x86/polyset.S b/src/refresh/sw/x86/polyset.S deleted file mode 100644 index 63b31dc..0000000 --- a/src/refresh/sw/x86/polyset.S +++ /dev/null @@ -1,1247 +0,0 @@ -// -// d_polysa.s -// x86 assembly-language polygon model drawing code -// - -#include "common/x86/asm.h" -#include "sw.h" - -#if USE_ASM - -// !!! if this is changed, it must be changed in d_polyse.c too !!! -#define DPS_MAXSPANS MAXHEIGHT+1 - // 1 extra for spanpackage that marks end - -//#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size) -#define SPAN_SIZE (1024+1+1+1)*32 - - - - .data - - .align 4 -p10_minus_p20: .single 0 -p01_minus_p21: .single 0 -temp0: .single 0 -temp1: .single 0 -Ltemp: .single 0 - -aff8entryvec_table: .long LDraw8, LDraw7, LDraw6, LDraw5 - .long LDraw4, LDraw3, LDraw2, LDraw1 - -lzistepx: .long 0 - - - .text - -#ifndef NeXT - .extern C(D_PolysetSetEdgeTable) - .extern C(D_RasterizeAliasPolySmooth) -#endif - -//---------------------------------------------------------------------- -// affine triangle gradient calculation code -//---------------------------------------------------------------------- - -#if 0 -#define skinwidth 4+0 - -.globl C(R_PolysetCalcGradients) -C(R_PolysetCalcGradients): - -// p00_minus_p20 = r_p0[0] - r_p2[0]; -// p01_minus_p21 = r_p0[1] - r_p2[1]; -// p10_minus_p20 = r_p1[0] - r_p2[0]; -// p11_minus_p21 = r_p1[1] - r_p2[1]; -// -// xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 - -// p00_minus_p20 * p11_minus_p21); -// -// ystepdenominv = -xstepdenominv; - - fildl C(r_p0)+0 // r_p0[0] - fildl C(r_p2)+0 // r_p2[0] | r_p0[0] - fildl C(r_p0)+4 // r_p0[1] | r_p2[0] | r_p0[0] - fildl C(r_p2)+4 // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0] - fildl C(r_p1)+0 // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0] - fildl C(r_p1)+4 // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] | - // r_p2[0] | r_p0[0] - fxch %st(3) // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] | - // r_p2[0] | r_p0[0] - fsub %st(2),%st(0) // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] | - // r_p2[0] | r_p0[0] - fxch %st(1) // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] | - // r_p2[0] | r_p0[0] - fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2[1] | - // r_p1[1] | r_p2[0] | r_p0[0] - fxch %st(5) // r_p0[0] | p01_minus_p21 | r_p2[1] | - // r_p1[1] | r_p2[0] | p10_minus_p20 - fsubp %st(0),%st(4) // p01_minus_p21 | r_p2[1] | r_p1[1] | - // p00_minus_p20 | p10_minus_p20 - fxch %st(2) // r_p1[1] | r_p2[1] | p01_minus_p21 | - // p00_minus_p20 | p10_minus_p20 - fsubp %st(0),%st(1) // p11_minus_p21 | p01_minus_p21 | - // p00_minus_p20 | p10_minus_p20 - fxch %st(1) // p01_minus_p21 | p11_minus_p21 | - // p00_minus_p20 | p10_minus_p20 - flds C(d_xdenom) // d_xdenom | p01_minus_p21 | p11_minus_p21 | - // p00_minus_p20 | p10_minus_p20 - fxch %st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 | - // p00_minus_p20 | d_xdenom - fstps p10_minus_p20 // p01_minus_p21 | p11_minus_p21 | - // p00_minus_p20 | d_xdenom - fstps p01_minus_p21 // p11_minus_p21 | p00_minus_p20 | xstepdenominv - fxch %st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21 - -//// ceil () for light so positive steps are exaggerated, negative steps -//// diminished, pushing us away from underflow toward overflow. Underflow is -//// very visible, overflow is very unlikely, because of ambient lighting -// t0 = r_p0[4] - r_p2[4]; -// t1 = r_p1[4] - r_p2[4]; - - fildl C(r_p2)+16 // r_p2[4] | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fildl C(r_p0)+16 // r_p0[4] | r_p2[4] | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fildl C(r_p1)+16 // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fxch %st(2) // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fld %st(0) // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fsubrp %st(0),%st(2) // r_p2[4] | t0 | r_p1[4] | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fsubrp %st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - -// r_lstepx = (int) -// ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv); -// r_lstepy = (int) -// ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv); - - fld %st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fmul %st(5),%st(0) // t0*p11_minus_p21 | t0 | t1 | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | - // t0*p11_minus_p21 | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | - // t0*p11_minus_p21 | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fmul %st(5),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | - // t1*p01_minus_p21 | t0*p11_minus_p21 | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | - // t1*p00_minus_p20 | t0*p11_minus_p21 | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | - // t1*p01_minus_p21 - t0*p11_minus_p21 | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | - // t1*p01_minus_p21 - t0*p11_minus_p21 | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fld %st(2) // xstepdenominv | - // t1*p00_minus_p20 - t0*p10_minus_p20 | - // t1*p01_minus_p21 - t0*p11_minus_p21 | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fmuls float_minus_1 // ystepdenominv | - // t1*p00_minus_p20 - t0*p10_minus_p20 | - // t1*p01_minus_p21 - t0*p11_minus_p21 | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fxch %st(2) // t1*p01_minus_p21 - t0*p11_minus_p21 | - // t1*p00_minus_p20 - t0*p10_minus_p20 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* - // xstepdenominv | - // t1*p00_minus_p20 - t0*p10_minus_p20 | - // | ystepdenominv | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fxch %st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | - // (t1*p01_minus_p21 - t0*p11_minus_p21)* - // xstepdenominv | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* - // ystepdenominv | - // (t1*p01_minus_p21 - t0*p11_minus_p21)* - // xstepdenominv | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fldcw C(ceil_cw) - fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fldcw chop_cw - -// t0 = r_p0[2] - r_p2[2]; -// t1 = r_p1[2] - r_p2[2]; - - fildl C(r_p2)+8 // r_p2[2] | ystepdenominv | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fildl C(r_p0)+8 // r_p0[2] | r_p2[2] | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fildl C(r_p1)+8 // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fxch %st(2) // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fld %st(0) // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fsubrp %st(0),%st(2) // r_p2[2] | t0 | r_p1[2] | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - -// r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * -// xstepdenominv); -// r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * -// ystepdenominv); - - fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv - fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | - // t0*p11_minus_p21 | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | - // t0*p11_minus_p21 | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | - // t1*p01_minus_p21 | t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | - // t1*p00_minus_p20 | t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | - // t1*p01_minus_p21 - t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | - // t1*p01_minus_p21 - t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* - // ystepdenominv | - // t1*p01_minus_p21 - t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | - // (t1*p00_minus_p20 - t0*p10_minus_p20)* - // ystepdenominv | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* - // xstepdenominv | - // (t1*p00_minus_p20 - t0*p10_minus_p20)* - // ystepdenominv | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)* - // ystepdenominv | - // (t1*p01_minus_p21 - t0*p11_minus_p21)* - // xstepdenominv | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fistpl C(r_sstepy) // r_sstepx | ystepdenominv | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fistpl C(r_sstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - -// t0 = r_p0[3] - r_p2[3]; -// t1 = r_p1[3] - r_p2[3]; - - fildl C(r_p2)+12 // r_p2[3] | ystepdenominv | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fildl C(r_p0)+12 // r_p0[3] | r_p2[3] | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fildl C(r_p1)+12 // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fxch %st(2) // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fld %st(0) // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fsubrp %st(0),%st(2) // r_p2[3] | t0 | r_p1[3] | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - -// r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * -// xstepdenominv); -// r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * -// ystepdenominv); - - fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | - // t0*p11_minus_p21 | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | - // t0*p11_minus_p21 | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | - // t1*p01_minus_p21 | t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | - // t1*p00_minus_p20 | t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | - // t1*p01_minus_p21 - t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | - // t1*p01_minus_p21 - t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* - // ystepdenominv | - // t1*p01_minus_p21 - t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | - // (t1*p00_minus_p20 - t0*p10_minus_p20)* - // ystepdenominv | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* - // xstepdenominv | - // (t1*p00_minus_p20 - t0*p10_minus_p20)* - // ystepdenominv | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)* - // ystepdenominv | - // (t1*p01_minus_p21 - t0*p11_minus_p21)* - // xstepdenominv | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fistpl C(r_tstepy) // r_tstepx | ystepdenominv | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fistpl C(r_tstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - -// t0 = r_p0[5] - r_p2[5]; -// t1 = r_p1[5] - r_p2[5]; - - fildl C(r_p2)+20 // r_p2[5] | ystepdenominv | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fildl C(r_p0)+20 // r_p0[5] | r_p2[5] | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fildl C(r_p1)+20 // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fxch %st(2) // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fld %st(0) // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // p11_minus_p21 - fsubrp %st(0),%st(2) // r_p2[5] | t0 | r_p1[5] | ystepdenominv | - // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - -// r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * -// xstepdenominv); -// r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * -// ystepdenominv); - - fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv | - // p00_minus_p20 | p11_minus_p21 - fmulp %st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv | - // p00_minus_p20 | t0*p11_minus_p21 - fxch %st(1) // t1 | t0 | ystepdenominv | xstepdenominv | - // p00_minus_p20 | t0*p11_minus_p21 - fld %st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv | - // p00_minus_p20 | t0*p11_minus_p21 - fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | ystepdenominv | - // xstepdenominv | p00_minus_p20 | - // t0*p11_minus_p21 - fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | ystepdenominv | - // xstepdenominv | p00_minus_p20 | - // t0*p11_minus_p21 - fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // t0*p11_minus_p21 - fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | - // ystepdenominv | xstepdenominv | p00_minus_p20 | - // t0*p11_minus_p21 - fmulp %st(0),%st(5) // t0*p10_minus_p20 | t1*p01_minus_p21 | - // ystepdenominv | xstepdenominv | - // t1*p00_minus_p20 | t0*p11_minus_p21 - fxch %st(5) // t0*p11_minus_p21 | t1*p01_minus_p21 | - // ystepdenominv | xstepdenominv | - // t1*p00_minus_p20 | t0*p10_minus_p20 - fsubrp %st(0),%st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | - // ystepdenominv | xstepdenominv | - // t1*p00_minus_p20 | t0*p10_minus_p20 - fxch %st(3) // t1*p00_minus_p20 | ystepdenominv | - // xstepdenominv | - // t1*p01_minus_p21 - t0*p11_minus_p21 | - // t0*p10_minus_p20 - fsubp %st(0),%st(4) // ystepdenominv | xstepdenominv | - // t1*p01_minus_p21 - t0*p11_minus_p21 | - // t1*p00_minus_p20 - t0*p10_minus_p20 - fxch %st(1) // xstepdenominv | ystepdenominv | - // t1*p01_minus_p21 - t0*p11_minus_p21 | - // t1*p00_minus_p20 - t0*p10_minus_p20 - fmulp %st(0),%st(2) // ystepdenominv | - // (t1*p01_minus_p21 - t0*p11_minus_p21) * - // xstepdenominv | - // t1*p00_minus_p20 - t0*p10_minus_p20 - fmulp %st(0),%st(2) // (t1*p01_minus_p21 - t0*p11_minus_p21) * - // xstepdenominv | - // (t1*p00_minus_p20 - t0*p10_minus_p20) * - // ystepdenominv - fistpl C(r_zistepx) // (t1*p00_minus_p20 - t0*p10_minus_p20) * - // ystepdenominv - fistpl C(r_zistepy) - -// a_sstepxfrac = r_sstepx << 16; -// a_tstepxfrac = r_tstepx << 16; -// -// a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) + -// (r_sstepx >> 16); - - movl C(r_sstepx),%eax - movl C(r_tstepx),%edx - shll $16,%eax - shll $16,%edx - movl %eax,C(a_sstepxfrac) - movl %edx,C(a_tstepxfrac) - - movl C(r_sstepx),%ecx - movl C(r_tstepx),%eax - sarl $16,%ecx - sarl $16,%eax - imull skinwidth(%esp) - addl %ecx,%eax - movl %eax,C(a_ststepxwhole) - - ret - -#endif - -//---------------------------------------------------------------------- -// recursive subdivision affine triangle drawing code -// -// not C-callable because of stdcall return -//---------------------------------------------------------------------- - -#define lp1 4+16 -#define lp2 8+16 -#define lp3 12+16 - -.globl C(D_PolysetRecursiveTriangle) -C(D_PolysetRecursiveTriangle): - pushl %ebp // preserve caller stack frame pointer - pushl %esi // preserve register variables - pushl %edi - pushl %ebx - -// int *temp; -// int d; -// int new[6]; -// int i; -// int z; -// short *zbuf; - movl lp2(%esp),%esi - movl lp1(%esp),%ebx - movl lp3(%esp),%edi - -// d = lp2[0] - lp1[0]; -// if (d < -1 || d > 1) -// goto split; - movl 0(%esi),%eax - - movl 0(%ebx),%edx - movl 4(%esi),%ebp - - subl %edx,%eax - movl 4(%ebx),%ecx - - subl %ecx,%ebp - incl %eax - - cmpl $2,%eax - ja LSplit - -// d = lp2[1] - lp1[1]; -// if (d < -1 || d > 1) -// goto split; - movl 0(%edi),%eax - incl %ebp - - cmpl $2,%ebp - ja LSplit - -// d = lp3[0] - lp2[0]; -// if (d < -1 || d > 1) -// goto split2; - movl 0(%esi),%edx - movl 4(%edi),%ebp - - subl %edx,%eax - movl 4(%esi),%ecx - - subl %ecx,%ebp - incl %eax - - cmpl $2,%eax - ja LSplit2 - -// d = lp3[1] - lp2[1]; -// if (d < -1 || d > 1) -// goto split2; - movl 0(%ebx),%eax - incl %ebp - - cmpl $2,%ebp - ja LSplit2 - -// d = lp1[0] - lp3[0]; -// if (d < -1 || d > 1) -// goto split3; - movl 0(%edi),%edx - movl 4(%ebx),%ebp - - subl %edx,%eax - movl 4(%edi),%ecx - - subl %ecx,%ebp - incl %eax - - incl %ebp - movl %ebx,%edx - - cmpl $2,%eax - ja LSplit3 - -// d = lp1[1] - lp3[1]; -// if (d < -1 || d > 1) -// { -//split3: -// temp = lp1; -// lp3 = lp2; -// lp1 = lp3; -// lp2 = temp; -// goto split; -// } -// -// return; // entire tri is filled -// - cmpl $2,%ebp - jna LDone - -LSplit3: - movl %edi,%ebx - movl %esi,%edi - movl %edx,%esi - jmp LSplit - -//split2: -LSplit2: - -// temp = lp1; -// lp1 = lp2; -// lp2 = lp3; -// lp3 = temp; - movl %ebx,%eax - movl %esi,%ebx - movl %edi,%esi - movl %eax,%edi - -//split: -LSplit: - - subl $24,%esp // allocate space for a new vertex - -//// split this edge -// new[0] = (lp1[0] + lp2[0]) >> 1; -// new[1] = (lp1[1] + lp2[1]) >> 1; -// new[2] = (lp1[2] + lp2[2]) >> 1; -// new[3] = (lp1[3] + lp2[3]) >> 1; -// new[5] = (lp1[5] + lp2[5]) >> 1; - movl 8(%ebx),%eax - - movl 8(%esi),%edx - movl 12(%ebx),%ecx - - addl %edx,%eax - movl 12(%esi),%edx - - sarl $1,%eax - addl %edx,%ecx - - movl %eax,8(%esp) - movl 20(%ebx),%eax - - sarl $1,%ecx - movl 20(%esi),%edx - - movl %ecx,12(%esp) - addl %edx,%eax - - movl 0(%ebx),%ecx - movl 0(%esi),%edx - - sarl $1,%eax - addl %ecx,%edx - - movl %eax,20(%esp) - movl 4(%ebx),%eax - - sarl $1,%edx - movl 4(%esi),%ebp - - movl %edx,0(%esp) - addl %eax,%ebp - - sarl $1,%ebp - movl %ebp,4(%esp) - -//// draw the point if splitting a leading edge -// if (lp2[1] > lp1[1]) -// goto nodraw; - cmpl %eax,4(%esi) - jg LNoDraw - -// if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0])) -// goto nodraw; - movl 0(%esi),%edx - jnz LDraw - - cmpl %ecx,%edx - jl LNoDraw - -LDraw: - -// z = new[5] >> 16; - movl 20(%esp),%edx - movl 4(%esp),%ecx - - sarl $16,%edx - movl 0(%esp),%ebp - -// zbuf = zspantable[new[1]] + new[0]; - movl C(zspantable)(,%ecx,4),%eax - -// if (z >= *zbuf) -// { - cmpw (%eax,%ebp,2),%dx - jnge LNoDraw - -// int pix; -// -// *zbuf = z; - movw %dx,(%eax,%ebp,2) - -// pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]]; - movl 12(%esp),%eax - - sarl $16,%eax - movl 8(%esp),%edx - - sarl $16,%edx - subl %ecx,%ecx - - movl C(skintable)(,%eax,4),%eax - movl 4(%esp),%ebp - - movb (%eax,%edx,),%cl - movl C(d_pcolormap),%edx - - movb (%edx,%ecx,),%dl - movl 0(%esp),%ecx - -// d_viewbuffer[d_scantable[new[1]] + new[0]] = pix; - movl C(d_scantable)(,%ebp,4),%eax - addl %eax,%ecx - movl C(d_viewbuffer),%eax - movb %dl,(%eax,%ecx,1) - -// } -// -//nodraw: -LNoDraw: - -//// recursively continue -// D_PolysetRecursiveTriangle (lp3, lp1, new); - pushl %esp - pushl %ebx - pushl %edi - call C(D_PolysetRecursiveTriangle) - -// D_PolysetRecursiveTriangle (lp3, new, lp2); - movl %esp,%ebx - pushl %esi - pushl %ebx - pushl %edi - call C(D_PolysetRecursiveTriangle) - addl $24,%esp - -LDone: - popl %ebx // restore register variables - popl %edi - popl %esi - popl %ebp // restore caller stack frame pointer - ret $12 - - -//---------------------------------------------------------------------- -// 8-bpp horizontal span drawing code for affine polygons, with smooth -// shading and no transparency -//---------------------------------------------------------------------- - -#define pspans 4+8 - -.globl C(D_PolysetAff8Start) -C(D_PolysetAff8Start): - -.globl C(R_PolysetDrawSpans8_Opaque) -C(R_PolysetDrawSpans8_Opaque): - pushl %esi // preserve register variables - pushl %ebx - - movl pspans(%esp),%esi // point to the first span descriptor - movl C(r_zistepx),%ecx - - pushl %ebp // preserve caller's stack frame - pushl %edi - - rorl $16,%ecx // put high 16 bits of 1/z step in low word - movl spanpackage_t_count(%esi),%edx - - movl %ecx,lzistepx - -LSpanLoop: - -// lcount = d_aspancount - pspanpackage->count; -// -// errorterm += erroradjustup; -// if (errorterm >= 0) -// { -// d_aspancount += d_countextrastep; -// errorterm -= erroradjustdown; -// } -// else -// { -// d_aspancount += ubasestep; -// } - movl C(d_aspancount),%eax - subl %edx,%eax - - movl C(erroradjustup),%edx - movl C(errorterm),%ebx - addl %edx,%ebx - js LNoTurnover - - movl C(erroradjustdown),%edx - movl C(d_countextrastep),%edi - subl %edx,%ebx - movl C(d_aspancount),%ebp - movl %ebx,C(errorterm) - addl %edi,%ebp - movl %ebp,C(d_aspancount) - jmp LRightEdgeStepped - -LNoTurnover: - movl C(d_aspancount),%edi - movl C(ubasestep),%edx - movl %ebx,C(errorterm) - addl %edx,%edi - movl %edi,C(d_aspancount) - -LRightEdgeStepped: - cmpl $1,%eax - - jl LNextSpan - jz LExactlyOneLong - -// -// set up advancetable -// - movl C(a_ststepxwhole),%ecx - movl C(r_affinetridesc)+atd_skinwidth,%edx - - movl %ecx,advancetable+4 // advance base in t - addl %edx,%ecx - - movl %ecx,advancetable // advance extra in t - movl C(a_tstepxfrac),%ecx - - movw C(r_lstepx),%cx - movl %eax,%edx // count - - movl %ecx,tstep - addl $7,%edx - - shrl $3,%edx // count of full and partial loops - movl spanpackage_t_sfrac(%esi),%ebx - - movw %dx,%bx - movl spanpackage_t_pz(%esi),%ecx - - negl %eax - - movl spanpackage_t_pdest(%esi),%edi - andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1 - - subl %eax,%edi // compensate for hardwired offsets - subl %eax,%ecx - - subl %eax,%ecx - movl spanpackage_t_tfrac(%esi),%edx - - movw spanpackage_t_light(%esi),%dx - movl spanpackage_t_zi(%esi),%ebp - - rorl $16,%ebp // put high 16 bits of 1/z in low word - pushl %esi - - movl spanpackage_t_ptex(%esi),%esi - jmp *aff8entryvec_table(,%eax,4) - -// %bx = count of full and partial loops -// %ebx high word = sfrac -// %ecx = pz -// %dx = light -// %edx high word = tfrac -// %esi = ptex -// %edi = pdest -// %ebp = 1/z -// tstep low word = C(r_lstepx) -// tstep high word = C(a_tstepxfrac) -// C(a_sstepxfrac) low word = 0 -// C(a_sstepxfrac) high word = C(a_sstepxfrac) - -LDrawLoop: - -// FIXME: do we need to clamp light? We may need at least a buffer bit to -// keep it from poking into tfrac and causing problems - -LDraw8: - cmpw (%ecx),%bp - jl Lp1 - xorl %eax,%eax - movb %dh,%ah - movb (%esi),%al - movw %bp,(%ecx) - movb 0x12345678(%eax),%al -LPatch8: - movb %al,(%edi) -Lp1: - addl tstep,%edx - sbbl %eax,%eax - addl lzistepx,%ebp - adcl $0,%ebp - addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi - -LDraw7: - cmpw 2(%ecx),%bp - jl Lp2 - xorl %eax,%eax - movb %dh,%ah - movb (%esi),%al - movw %bp,2(%ecx) - movb 0x12345678(%eax),%al -LPatch7: - movb %al,1(%edi) -Lp2: - addl tstep,%edx - sbbl %eax,%eax - addl lzistepx,%ebp - adcl $0,%ebp - addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi - -LDraw6: - cmpw 4(%ecx),%bp - jl Lp3 - xorl %eax,%eax - movb %dh,%ah - movb (%esi),%al - movw %bp,4(%ecx) - movb 0x12345678(%eax),%al -LPatch6: - movb %al,2(%edi) -Lp3: - addl tstep,%edx - sbbl %eax,%eax - addl lzistepx,%ebp - adcl $0,%ebp - addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi - -LDraw5: - cmpw 6(%ecx),%bp - jl Lp4 - xorl %eax,%eax - movb %dh,%ah - movb (%esi),%al - movw %bp,6(%ecx) - movb 0x12345678(%eax),%al -LPatch5: - movb %al,3(%edi) -Lp4: - addl tstep,%edx - sbbl %eax,%eax - addl lzistepx,%ebp - adcl $0,%ebp - addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi - -LDraw4: - cmpw 8(%ecx),%bp - jl Lp5 - xorl %eax,%eax - movb %dh,%ah - movb (%esi),%al - movw %bp,8(%ecx) - movb 0x12345678(%eax),%al -LPatch4: - movb %al,4(%edi) -Lp5: - addl tstep,%edx - sbbl %eax,%eax - addl lzistepx,%ebp - adcl $0,%ebp - addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi - -LDraw3: - cmpw 10(%ecx),%bp - jl Lp6 - xorl %eax,%eax - movb %dh,%ah - movb (%esi),%al - movw %bp,10(%ecx) - movb 0x12345678(%eax),%al -LPatch3: - movb %al,5(%edi) -Lp6: - addl tstep,%edx - sbbl %eax,%eax - addl lzistepx,%ebp - adcl $0,%ebp - addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi - -LDraw2: - cmpw 12(%ecx),%bp - jl Lp7 - xorl %eax,%eax - movb %dh,%ah - movb (%esi),%al - movw %bp,12(%ecx) - movb 0x12345678(%eax),%al -LPatch2: - movb %al,6(%edi) -Lp7: - addl tstep,%edx - sbbl %eax,%eax - addl lzistepx,%ebp - adcl $0,%ebp - addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi - -LDraw1: - cmpw 14(%ecx),%bp - jl Lp8 - xorl %eax,%eax - movb %dh,%ah - movb (%esi),%al - movw %bp,14(%ecx) - movb 0x12345678(%eax),%al -LPatch1: - movb %al,7(%edi) -Lp8: - addl tstep,%edx - sbbl %eax,%eax - addl lzistepx,%ebp - adcl $0,%ebp - addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi - - addl $8,%edi - addl $16,%ecx - - decw %bx - jnz LDrawLoop - - popl %esi // restore spans pointer -LNextSpan: - addl $(spanpackage_t_size),%esi // point to next span -LNextSpanESISet: - movl spanpackage_t_count(%esi),%edx - cmpl $-999999,%edx // any more spans? - jnz LSpanLoop // yes - - popl %edi - popl %ebp // restore the caller's stack frame - popl %ebx // restore register variables - popl %esi - ret - - -// draw a one-long span - -LExactlyOneLong: - - movl spanpackage_t_pz(%esi),%ecx - movl spanpackage_t_zi(%esi),%ebp - - rorl $16,%ebp // put high 16 bits of 1/z in low word - movl spanpackage_t_ptex(%esi),%ebx - - cmpw (%ecx),%bp - jl LNextSpan - xorl %eax,%eax - movl spanpackage_t_pdest(%esi),%edi - movb spanpackage_t_light+1(%esi),%ah - addl $(spanpackage_t_size),%esi // point to next span - movb (%ebx),%al - movw %bp,(%ecx) - movb 0x12345678(%eax),%al -LPatch9: - movb %al,(%edi) - - jmp LNextSpanESISet - -.globl C(D_PolysetAff8End) -C(D_PolysetAff8End): - - -.globl C(D_Aff8Patch) -C(D_Aff8Patch): - movl C(d_pcolormap),%eax - movl %eax,LPatch1-4 - movl %eax,LPatch2-4 - movl %eax,LPatch3-4 - movl %eax,LPatch4-4 - movl %eax,LPatch5-4 - movl %eax,LPatch6-4 - movl %eax,LPatch7-4 - movl %eax,LPatch8-4 - movl %eax,LPatch9-4 - - ret - -//---------------------------------------------------------------------- -// Alias model triangle left-edge scanning code -//---------------------------------------------------------------------- - -#define height 4+16 - -.globl C(R_PolysetScanLeftEdge) -C(R_PolysetScanLeftEdge): - pushl %ebp // preserve caller stack frame pointer - pushl %esi // preserve register variables - pushl %edi - pushl %ebx - - movl height(%esp),%eax - movl C(d_sfrac),%ecx - andl $0xFFFF,%eax - movl C(d_ptex),%ebx - orl %eax,%ecx - movl C(d_pedgespanpackage),%esi - movl C(d_tfrac),%edx - movl C(d_light),%edi - movl C(d_zi),%ebp - -// %eax: scratch -// %ebx: d_ptex -// %ecx: d_sfrac in high word, count in low word -// %edx: d_tfrac -// %esi: d_pedgespanpackage, errorterm, scratch alternately -// %edi: d_light -// %ebp: d_zi - -// do -// { - -LScanLoop: - -// d_pedgespanpackage->ptex = ptex; -// d_pedgespanpackage->pdest = d_pdest; -// d_pedgespanpackage->pz = d_pz; -// d_pedgespanpackage->count = d_aspancount; -// d_pedgespanpackage->light = d_light; -// d_pedgespanpackage->zi = d_zi; -// d_pedgespanpackage->sfrac = d_sfrac << 16; -// d_pedgespanpackage->tfrac = d_tfrac << 16; - movl %ebx,spanpackage_t_ptex(%esi) - movl C(d_pdest),%eax - movl %eax,spanpackage_t_pdest(%esi) - movl C(d_pz),%eax - movl %eax,spanpackage_t_pz(%esi) - movl C(d_aspancount),%eax - movl %eax,spanpackage_t_count(%esi) - movl %edi,spanpackage_t_light(%esi) - movl %ebp,spanpackage_t_zi(%esi) - movl %ecx,spanpackage_t_sfrac(%esi) - movl %edx,spanpackage_t_tfrac(%esi) - -// pretouch the next cache line - movb spanpackage_t_size(%esi),%al - -// d_pedgespanpackage++; - addl $(spanpackage_t_size),%esi - movl C(erroradjustup),%eax - movl %esi,C(d_pedgespanpackage) - -// errorterm += erroradjustup; - movl C(errorterm),%esi - addl %eax,%esi - movl C(d_pdest),%eax - -// if (errorterm >= 0) -// { - js LNoLeftEdgeTurnover - -// errorterm -= erroradjustdown; -// d_pdest += d_pdestextrastep; - subl C(erroradjustdown),%esi - addl C(d_pdestextrastep),%eax - movl %esi,C(errorterm) - movl %eax,C(d_pdest) - -// d_pz += d_pzextrastep; -// d_aspancount += d_countextrastep; -// d_ptex += d_ptexextrastep; -// d_sfrac += d_sfracextrastep; -// d_ptex += d_sfrac >> 16; -// d_sfrac &= 0xFFFF; -// d_tfrac += d_tfracextrastep; - movl C(d_pz),%eax - movl C(d_aspancount),%esi - addl C(d_pzextrastep),%eax - addl C(d_sfracextrastep),%ecx - adcl C(d_ptexextrastep),%ebx - addl C(d_countextrastep),%esi - movl %eax,C(d_pz) - movl C(d_tfracextrastep),%eax - movl %esi,C(d_aspancount) - addl %eax,%edx - -// if (d_tfrac & 0x10000) -// { - jnc LSkip1 - -// d_ptex += r_affinetridesc.skinwidth; -// d_tfrac &= 0xFFFF; - addl C(r_affinetridesc)+atd_skinwidth,%ebx - -// } - -LSkip1: - -// d_light += d_lightextrastep; -// d_zi += d_ziextrastep; - addl C(d_lightextrastep),%edi - addl C(d_ziextrastep),%ebp - -// } - movl C(d_pedgespanpackage),%esi - decl %ecx - testl $0xFFFF,%ecx - jnz LScanLoop - - popl %ebx - popl %edi - popl %esi - popl %ebp - ret - -// else -// { - -LNoLeftEdgeTurnover: - movl %esi,C(errorterm) - -// d_pdest += d_pdestbasestep; - addl C(d_pdestbasestep),%eax - movl %eax,C(d_pdest) - -// d_pz += d_pzbasestep; -// d_aspancount += ubasestep; -// d_ptex += d_ptexbasestep; -// d_sfrac += d_sfracbasestep; -// d_ptex += d_sfrac >> 16; -// d_sfrac &= 0xFFFF; - movl C(d_pz),%eax - movl C(d_aspancount),%esi - addl C(d_pzbasestep),%eax - addl C(d_sfracbasestep),%ecx - adcl C(d_ptexbasestep),%ebx - addl C(ubasestep),%esi - movl %eax,C(d_pz) - movl %esi,C(d_aspancount) - -// d_tfrac += d_tfracbasestep; - movl C(d_tfracbasestep),%esi - addl %esi,%edx - -// if (d_tfrac & 0x10000) -// { - jnc LSkip2 - -// d_ptex += r_affinetridesc.skinwidth; -// d_tfrac &= 0xFFFF; - addl C(r_affinetridesc)+atd_skinwidth,%ebx - -// } - -LSkip2: - -// d_light += d_lightbasestep; -// d_zi += d_zibasestep; - addl C(d_lightbasestep),%edi - addl C(d_zibasestep),%ebp - -// } -// } while (--height); - movl C(d_pedgespanpackage),%esi - decl %ecx - testl $0xFFFF,%ecx - jnz LScanLoop - - popl %ebx - popl %edi - popl %esi - popl %ebp - ret - -#endif // USE_ASM - diff --git a/src/refresh/sw/x86/protect.c b/src/refresh/sw/x86/protect.c deleted file mode 100644 index 969f2be..0000000 --- a/src/refresh/sw/x86/protect.c +++ /dev/null @@ -1,31 +0,0 @@ -#include "shared/shared.h" - -#ifdef _WIN32 -#define WIN32_LEAN_AND_MEAN -#include <windows.h> -#else -#include <unistd.h> -#include <sys/mman.h> -#endif - -/* -================ -Sys_MakeCodeWriteable -================ -*/ -void Sys_MakeCodeWriteable(uintptr_t start, size_t length) -{ -#ifdef _WIN32 - DWORD unused; - - if (!VirtualProtect((LPVOID)start, length, PAGE_EXECUTE_READWRITE, &unused)) - Com_Error(ERR_FATAL, "Protection change failed"); -#else - int psize = getpagesize(); - uintptr_t addr = (start & ~(psize - 1)) - psize; - - if (mprotect((void *)addr, length + start - addr + psize, PROT_READ | PROT_WRITE | PROT_EXEC)) - Com_Error(ERR_FATAL, "Protection change failed"); -#endif -} - diff --git a/src/refresh/sw/x86/span16.S b/src/refresh/sw/x86/span16.S deleted file mode 100644 index 480ce60..0000000 --- a/src/refresh/sw/x86/span16.S +++ /dev/null @@ -1,1227 +0,0 @@ -// -// d_draw16.s -// x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel -// subdivision. -// - -#include "common/x86/asm.h" -#include "sw.h" - -#if USE_ASM - -//---------------------------------------------------------------------- -// 8-bpp horizontal span drawing code for polygons, with no transparency and -// 16-pixel subdivision. -// -// Assumes there is at least one span in pspans, and that every span -// contains at least one pixel -//---------------------------------------------------------------------- - - .data - - .text - -// out-of-line, rarely-needed clamping code - -LClampHigh0: - movl C(bbextents),%esi - jmp LClampReentry0 -LClampHighOrLow0: - jg LClampHigh0 - xorl %esi,%esi - jmp LClampReentry0 - -LClampHigh1: - movl C(bbextentt),%edx - jmp LClampReentry1 -LClampHighOrLow1: - jg LClampHigh1 - xorl %edx,%edx - jmp LClampReentry1 - -LClampLow2: - movl $4096,%ebp - jmp LClampReentry2 -LClampHigh2: - movl C(bbextents),%ebp - jmp LClampReentry2 - -LClampLow3: - movl $4096,%ecx - jmp LClampReentry3 -LClampHigh3: - movl C(bbextentt),%ecx - jmp LClampReentry3 - -LClampLow4: - movl $4096,%eax - jmp LClampReentry4 -LClampHigh4: - movl C(bbextents),%eax - jmp LClampReentry4 - -LClampLow5: - movl $4096,%ebx - jmp LClampReentry5 -LClampHigh5: - movl C(bbextentt),%ebx - jmp LClampReentry5 - - -#define pspans 4+16 - - .align 4 -.globl C(D_DrawSpans16) -C(D_DrawSpans16): - pushl %ebp // preserve caller's stack frame - pushl %edi - pushl %esi // preserve register variables - pushl %ebx - -// -// set up scaled-by-16 steps, for 16-long segments; also set up cacheblock -// and span list pointers -// -// TODO: any overlap from rearranging? - flds C(d_sdivzstepu) - fmuls fp_16 - movl C(cacheblock),%edx - flds C(d_tdivzstepu) - fmuls fp_16 - movl pspans(%esp),%ebx // point to the first span descriptor - flds C(d_zistepu) - fmuls fp_16 - movl %edx,pbase // pbase = cacheblock - fstps zi16stepu - fstps tdivz16stepu - fstps sdivz16stepu - -LSpanLoop: -// -// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the -// initial s and t values -// -// FIXME: pipeline FILD? - fildl espan_t_v(%ebx) - fildl espan_t_u(%ebx) - - fld %st(1) // dv | du | dv - fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv - fld %st(1) // du | dv*d_sdivzstepv | du | dv - fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv - fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv - fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu | - // dv*d_sdivzstepv | du | dv - fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu | - // dv*d_sdivzstepv | du | dv - faddp %st(0),%st(2) // du*d_tdivzstepu | - // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv - fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | - // du*d_tdivzstepu | du | dv - fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv | - // du*d_tdivzstepu | du | dv - fmuls C(d_tdivzstepv) // dv*d_tdivzstepv | - // du*d_sdivzstepu + dv*d_sdivzstepv | - // du*d_tdivzstepu | du | dv - fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | - // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv - fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv + - // du*d_sdivzstepu; stays in %st(2) at end - fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du | - // s/z - fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv | - // du*d_tdivzstepu | du | s/z - fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv | - // du*d_tdivzstepu | du | s/z - faddp %st(0),%st(2) // dv*d_zistepv | - // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z - fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu | - // dv*d_zistepv | s/z - fmuls C(d_zistepu) // du*d_zistepu | - // dv*d_tdivzstepv + du*d_tdivzstepu | - // dv*d_zistepv | s/z - fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu | - // du*d_zistepu | dv*d_zistepv | s/z - fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv + - // du*d_tdivzstepu; stays in %st(1) at end - fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z - faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z - - flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z - fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z - fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv + - // du*d_zistepu; stays in %st(0) at end - // 1/z | fp_64k | t/z | s/z -// -// calculate and clamp s & t -// - fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z - -// -// point %edi to the first pixel in the span -// - movl C(d_viewbuffer),%ecx - movl espan_t_v(%ebx),%eax - movl %ebx,pspantemp // preserve spans pointer - - movl C(tadjust),%edx - movl C(sadjust),%esi - movl C(d_scantable)(,%eax,4),%edi // v * screenwidth - addl %ecx,%edi - movl espan_t_u(%ebx),%ecx - addl %ecx,%edi // pdest = &pdestspan[scans->u]; - movl espan_t_count(%ebx),%ecx - -// -// now start the FDIV for the end of the span -// - cmpl $16,%ecx - ja LSetupNotLast1 - - decl %ecx - jz LCleanup1 // if only one pixel, no need to start an FDIV - movl %ecx,spancountminus1 - -// finish up the s and t calcs - fxch %st(1) // z*64k | 1/z | t/z | s/z - - fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z - fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z - fxch %st(1) // z*64k | s | 1/z | t/z | s/z - fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z - fxch %st(1) // s | t | 1/z | t/z | s/z - fistpl s // 1/z | t | t/z | s/z - fistpl t // 1/z | t/z | s/z - - fildl spancountminus1 - - flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1 - flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1 - fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1 - fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1 - fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1 - fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 - fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 | - // C(d_tdivzstepu)*scm1 - fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 | - // C(d_tdivzstepu)*scm1 - faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1 - fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1 - faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 - faddp %st(0),%st(3) - - flds fp_64k - fdiv %st(1),%st(0) // this is what we've gone to all this trouble to - // overlap - jmp LFDIVInFlight1 - -LCleanup1: -// finish up the s and t calcs - fxch %st(1) // z*64k | 1/z | t/z | s/z - - fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z - fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z - fxch %st(1) // z*64k | s | 1/z | t/z | s/z - fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z - fxch %st(1) // s | t | 1/z | t/z | s/z - fistpl s // 1/z | t | t/z | s/z - fistpl t // 1/z | t/z | s/z - jmp LFDIVInFlight1 - - .align 4 -LSetupNotLast1: -// finish up the s and t calcs - fxch %st(1) // z*64k | 1/z | t/z | s/z - - fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z - fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z - fxch %st(1) // z*64k | s | 1/z | t/z | s/z - fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z - fxch %st(1) // s | t | 1/z | t/z | s/z - fistpl s // 1/z | t | t/z | s/z - fistpl t // 1/z | t/z | s/z - - fadds zi16stepu - fxch %st(2) - fadds sdivz16stepu - fxch %st(2) - flds tdivz16stepu - faddp %st(0),%st(2) - flds fp_64k - fdiv %st(1),%st(0) // z = 1/1/z - // this is what we've gone to all this trouble to - // overlap -LFDIVInFlight1: - - addl s,%esi - addl t,%edx - movl C(bbextents),%ebx - movl C(bbextentt),%ebp - cmpl %ebx,%esi - ja LClampHighOrLow0 -LClampReentry0: - movl %esi,s - movl pbase,%ebx - shll $16,%esi - cmpl %ebp,%edx - movl %esi,sfracf - ja LClampHighOrLow1 -LClampReentry1: - movl %edx,t - movl s,%esi // sfrac = scans->sfrac; - shll $16,%edx - movl t,%eax // tfrac = scans->tfrac; - sarl $16,%esi - movl %edx,tfracf - -// -// calculate the texture starting address -// - sarl $16,%eax - movl C(cachewidth),%edx - imull %edx,%eax // (tfrac >> 16) * cachewidth - addl %ebx,%esi - addl %eax,%esi // psource = pbase + (sfrac >> 16) + - // ((tfrac >> 16) * cachewidth); -// -// determine whether last span or not -// - cmpl $16,%ecx - jna LLastSegment - -// -// not the last segment; do full 16-wide segment -// -LNotLastSegment: - -// -// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to -// get there -// - -// pick up after the FDIV that was left in flight previously - - fld %st(0) // duplicate it - fmul %st(4),%st(0) // s = s/z * z - fxch %st(1) - fmul %st(3),%st(0) // t = t/z * z - fxch %st(1) - fistpl snext - fistpl tnext - movl snext,%eax - movl tnext,%edx - - movb (%esi),%bl // get first source texel - subl $16,%ecx // count off this segments' pixels - movl C(sadjust),%ebp - movl %ecx,counttemp // remember count of remaining pixels - - movl C(tadjust),%ecx - movb %bl,(%edi) // store first dest pixel - - addl %eax,%ebp - addl %edx,%ecx - - movl C(bbextents),%eax - movl C(bbextentt),%edx - - cmpl $4096,%ebp - jl LClampLow2 - cmpl %eax,%ebp - ja LClampHigh2 -LClampReentry2: - - cmpl $4096,%ecx - jl LClampLow3 - cmpl %edx,%ecx - ja LClampHigh3 -LClampReentry3: - - movl %ebp,snext - movl %ecx,tnext - - subl s,%ebp - subl t,%ecx - -// -// set up advancetable -// - movl %ecx,%eax - movl %ebp,%edx - sarl $20,%eax // tstep >>= 16; - jz LZero - sarl $20,%edx // sstep >>= 16; - movl C(cachewidth),%ebx - imull %ebx,%eax - jmp LSetUp1 - -LZero: - sarl $20,%edx // sstep >>= 16; - movl C(cachewidth),%ebx - -LSetUp1: - - addl %edx,%eax // add in sstep - // (tstep >> 16) * cachewidth + (sstep >> 16); - movl tfracf,%edx - movl %eax,advancetable+4 // advance base in t - addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth + - // (sstep >> 16); - shll $12,%ebp // left-justify sstep fractional part - movl sfracf,%ebx - shll $12,%ecx // left-justify tstep fractional part - movl %eax,advancetable // advance extra in t - - movl %ecx,tstep - addl %ecx,%edx // advance tfrac fractional part by tstep frac - - sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none) - addl %ebp,%ebx // advance sfrac fractional part by sstep frac - adcl advancetable+4(,%ecx,4),%esi // point to next source texel - - addl tstep,%edx - sbbl %ecx,%ecx - movb (%esi),%al - addl %ebp,%ebx - movb %al,1(%edi) - adcl advancetable+4(,%ecx,4),%esi - - addl tstep,%edx - sbbl %ecx,%ecx - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - - addl tstep,%edx - sbbl %ecx,%ecx - movb %al,2(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - - addl tstep,%edx - sbbl %ecx,%ecx - movb %al,3(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - - addl tstep,%edx - sbbl %ecx,%ecx - movb %al,4(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - - addl tstep,%edx - sbbl %ecx,%ecx - movb %al,5(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - - addl tstep,%edx - sbbl %ecx,%ecx - movb %al,6(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - - addl tstep,%edx - sbbl %ecx,%ecx - movb %al,7(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - - -// -// start FDIV for end of next segment in flight, so it can overlap -// - movl counttemp,%ecx - cmpl $16,%ecx // more than one segment after this? - ja LSetupNotLast2 // yes - - decl %ecx - jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV - movl %ecx,spancountminus1 - fildl spancountminus1 - - flds C(d_zistepu) // C(d_zistepu) | spancountminus1 - fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1 - flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1 - fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1 - fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1 - faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1 - fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1 - fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1 - fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1 - faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 - flds fp_64k // 64k | C(d_sdivzstepu)*scm1 - fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k - faddp %st(0),%st(4) // 64k - - fdiv %st(1),%st(0) // this is what we've gone to all this trouble to - // overlap - jmp LFDIVInFlight2 - - .align 4 -LSetupNotLast2: - fadds zi16stepu - fxch %st(2) - fadds sdivz16stepu - fxch %st(2) - flds tdivz16stepu - faddp %st(0),%st(2) - flds fp_64k - fdiv %st(1),%st(0) // z = 1/1/z - // this is what we've gone to all this trouble to - // overlap -LFDIVInFlight2: - movl %ecx,counttemp - - addl tstep,%edx - sbbl %ecx,%ecx - movb %al,8(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - - addl tstep,%edx - sbbl %ecx,%ecx - movb %al,9(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - - addl tstep,%edx - sbbl %ecx,%ecx - movb %al,10(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - - addl tstep,%edx - sbbl %ecx,%ecx - movb %al,11(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - - addl tstep,%edx - sbbl %ecx,%ecx - movb %al,12(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - - addl tstep,%edx - sbbl %ecx,%ecx - movb %al,13(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - - addl tstep,%edx - sbbl %ecx,%ecx - movb %al,14(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - - addl $16,%edi - movl %edx,tfracf - movl snext,%edx - movl %ebx,sfracf - movl tnext,%ebx - movl %edx,s - movl %ebx,t - - movl counttemp,%ecx // retrieve count - -// -// determine whether last span or not -// - cmpl $16,%ecx // are there multiple segments remaining? - movb %al,-1(%edi) - ja LNotLastSegment // yes - -// -// last segment of scan -// -LLastSegment: - -// -// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to -// get there. The number of pixels left is variable, and we want to land on the -// last pixel, not step one past it, so we can't run into arithmetic problems -// - testl %ecx,%ecx - jz LNoSteps // just draw the last pixel and we're done - -// pick up after the FDIV that was left in flight previously - - - fld %st(0) // duplicate it - fmul %st(4),%st(0) // s = s/z * z - fxch %st(1) - fmul %st(3),%st(0) // t = t/z * z - fxch %st(1) - fistpl snext - fistpl tnext - - movb (%esi),%al // load first texel in segment - movl C(tadjust),%ebx - movb %al,(%edi) // store first pixel in segment - movl C(sadjust),%eax - - addl snext,%eax - addl tnext,%ebx - - movl C(bbextents),%ebp - movl C(bbextentt),%edx - - cmpl $4096,%eax - jl LClampLow4 - cmpl %ebp,%eax - ja LClampHigh4 -LClampReentry4: - movl %eax,snext - - cmpl $4096,%ebx - jl LClampLow5 - cmpl %edx,%ebx - ja LClampHigh5 -LClampReentry5: - - cmpl $1,%ecx // don't bother - je LOnlyOneStep // if two pixels in segment, there's only one step, - // of the segment length - subl s,%eax - subl t,%ebx - - addl %eax,%eax // convert to 15.17 format so multiply by 1.31 - addl %ebx,%ebx // reciprocal yields 16.48 - - imull reciprocal_table_16-8(,%ecx,4) // sstep = (snext - s) / - // (spancount-1) - movl %edx,%ebp - - movl %ebx,%eax - imull reciprocal_table_16-8(,%ecx,4) // tstep = (tnext - t) / - // (spancount-1) -LSetEntryvec: -// -// set up advancetable -// - movl entryvec_table_16(,%ecx,4),%ebx - movl %edx,%eax - movl %ebx,jumptemp // entry point into code for RET later - movl %ebp,%ecx - sarl $16,%edx // tstep >>= 16; - movl C(cachewidth),%ebx - sarl $16,%ecx // sstep >>= 16; - imull %ebx,%edx - - addl %ecx,%edx // add in sstep - // (tstep >> 16) * cachewidth + (sstep >> 16); - movl tfracf,%ecx - movl %edx,advancetable+4 // advance base in t - addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth + - // (sstep >> 16); - shll $16,%ebp // left-justify sstep fractional part - movl sfracf,%ebx - shll $16,%eax // left-justify tstep fractional part - movl %edx,advancetable // advance extra in t - - movl %eax,tstep - movl %ecx,%edx - addl %eax,%edx - sbbl %ecx,%ecx - addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - - jmp *jumptemp // jump to the number-of-pixels handler - -//---------------------------------------- - -LNoSteps: - movb (%esi),%al // load first texel in segment - subl $15,%edi // adjust for hardwired offset - jmp LEndSpan - - -LOnlyOneStep: - subl s,%eax - subl t,%ebx - movl %eax,%ebp - movl %ebx,%edx - jmp LSetEntryvec - -//---------------------------------------- - -.globl Entry2_16, Entry3_16, Entry4_16, Entry5_16 -.globl Entry6_16, Entry7_16, Entry8_16, Entry9_16 -.globl Entry10_16, Entry11_16, Entry12_16, Entry13_16 -.globl Entry14_16, Entry15_16, Entry16_16 - -Entry2_16: - subl $14,%edi // adjust for hardwired offsets - movb (%esi),%al - jmp LEntry2_16 - -//---------------------------------------- - -Entry3_16: - subl $13,%edi // adjust for hardwired offsets - addl %eax,%edx - movb (%esi),%al - sbbl %ecx,%ecx - addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - jmp LEntry3_16 - -//---------------------------------------- - -Entry4_16: - subl $12,%edi // adjust for hardwired offsets - addl %eax,%edx - movb (%esi),%al - sbbl %ecx,%ecx - addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx - jmp LEntry4_16 - -//---------------------------------------- - -Entry5_16: - subl $11,%edi // adjust for hardwired offsets - addl %eax,%edx - movb (%esi),%al - sbbl %ecx,%ecx - addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx - jmp LEntry5_16 - -//---------------------------------------- - -Entry6_16: - subl $10,%edi // adjust for hardwired offsets - addl %eax,%edx - movb (%esi),%al - sbbl %ecx,%ecx - addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx - jmp LEntry6_16 - -//---------------------------------------- - -Entry7_16: - subl $9,%edi // adjust for hardwired offsets - addl %eax,%edx - movb (%esi),%al - sbbl %ecx,%ecx - addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx - jmp LEntry7_16 - -//---------------------------------------- - -Entry8_16: - subl $8,%edi // adjust for hardwired offsets - addl %eax,%edx - movb (%esi),%al - sbbl %ecx,%ecx - addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx - jmp LEntry8_16 - -//---------------------------------------- - -Entry9_16: - subl $7,%edi // adjust for hardwired offsets - addl %eax,%edx - movb (%esi),%al - sbbl %ecx,%ecx - addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx - jmp LEntry9_16 - -//---------------------------------------- - -Entry10_16: - subl $6,%edi // adjust for hardwired offsets - addl %eax,%edx - movb (%esi),%al - sbbl %ecx,%ecx - addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx - jmp LEntry10_16 - -//---------------------------------------- - -Entry11_16: - subl $5,%edi // adjust for hardwired offsets - addl %eax,%edx - movb (%esi),%al - sbbl %ecx,%ecx - addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx - jmp LEntry11_16 - -//---------------------------------------- - -Entry12_16: - subl $4,%edi // adjust for hardwired offsets - addl %eax,%edx - movb (%esi),%al - sbbl %ecx,%ecx - addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx - jmp LEntry12_16 - -//---------------------------------------- - -Entry13_16: - subl $3,%edi // adjust for hardwired offsets - addl %eax,%edx - movb (%esi),%al - sbbl %ecx,%ecx - addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx - jmp LEntry13_16 - -//---------------------------------------- - -Entry14_16: - subl $2,%edi // adjust for hardwired offsets - addl %eax,%edx - movb (%esi),%al - sbbl %ecx,%ecx - addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx - jmp LEntry14_16 - -//---------------------------------------- - -Entry15_16: - decl %edi // adjust for hardwired offsets - addl %eax,%edx - movb (%esi),%al - sbbl %ecx,%ecx - addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx - jmp LEntry15_16 - -//---------------------------------------- - -Entry16_16: - addl %eax,%edx - movb (%esi),%al - sbbl %ecx,%ecx - addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - - addl tstep,%edx - sbbl %ecx,%ecx - movb %al,1(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx -LEntry15_16: - sbbl %ecx,%ecx - movb %al,2(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx -LEntry14_16: - sbbl %ecx,%ecx - movb %al,3(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx -LEntry13_16: - sbbl %ecx,%ecx - movb %al,4(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx -LEntry12_16: - sbbl %ecx,%ecx - movb %al,5(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx -LEntry11_16: - sbbl %ecx,%ecx - movb %al,6(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx -LEntry10_16: - sbbl %ecx,%ecx - movb %al,7(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx -LEntry9_16: - sbbl %ecx,%ecx - movb %al,8(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx -LEntry8_16: - sbbl %ecx,%ecx - movb %al,9(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx -LEntry7_16: - sbbl %ecx,%ecx - movb %al,10(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx -LEntry6_16: - sbbl %ecx,%ecx - movb %al,11(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx -LEntry5_16: - sbbl %ecx,%ecx - movb %al,12(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx -LEntry4_16: - sbbl %ecx,%ecx - movb %al,13(%edi) - addl %ebp,%ebx - movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi -LEntry3_16: - movb %al,14(%edi) - movb (%esi),%al -LEntry2_16: - -LEndSpan: - -// -// clear s/z, t/z, 1/z from FP stack -// - fstp %st(0) - fstp %st(0) - fstp %st(0) - - movl pspantemp,%ebx // restore spans pointer - movl espan_t_pnext(%ebx),%ebx // point to next span - testl %ebx,%ebx // any more spans? - movb %al,15(%edi) - jnz LSpanLoop // more spans - - popl %ebx // restore register variables - popl %esi - popl %edi - popl %ebp // restore the caller's stack frame - ret - -//---------------------------------------------------------------------- -// 8-bpp horizontal span z drawing codefor polygons, with no transparency. -// -// Assumes there is at least one span in pzspans, and that every span -// contains at least one pixel -//---------------------------------------------------------------------- - - .text - -// z-clamp on a non-negative gradient span -LClamp: - movl $0x40000000,%edx - xorl %ebx,%ebx - fstp %st(0) - jmp LZDraw - -// z-clamp on a negative gradient span -LClampNeg: - movl $0x40000000,%edx - xorl %ebx,%ebx - fstp %st(0) - jmp LZDrawNeg - - -#define pzspans 4+16 - -.globl C(D_DrawZSpans) -C(D_DrawZSpans): - pushl %ebp // preserve caller's stack frame - pushl %edi - pushl %esi // preserve register variables - pushl %ebx - - flds C(d_zistepu) - movl C(d_zistepu),%eax - movl pzspans(%esp),%esi - testl %eax,%eax - jz LFNegSpan - - fmuls Float2ToThe31nd - fistpl izistep // note: we are relying on FP exceptions being turned - // off here to avoid range problems - movl izistep,%ebx // remains loaded for all spans - -LFSpanLoop: -// set up the initial 1/z value - fildl espan_t_v(%esi) - fildl espan_t_u(%esi) - movl espan_t_v(%esi),%ecx - movl C(d_pzbuffer),%edi - fmuls C(d_zistepu) - fxch %st(1) - fmuls C(d_zistepv) - fxch %st(1) - fadds C(d_ziorigin) - imull C(d_zrowbytes),%ecx - faddp %st(0),%st(1) - -// clamp if z is nearer than 2 (1/z > 0.5) - fcoms float_point5 - addl %ecx,%edi - movl espan_t_u(%esi),%edx - addl %edx,%edx // word count - movl espan_t_count(%esi),%ecx - addl %edx,%edi // pdest = &pdestspan[scans->u]; - pushl %esi // preserve spans pointer - fnstsw %ax - testb $0x45,%ah - jz LClamp - - fmuls Float2ToThe31nd - fistpl izi // note: we are relying on FP exceptions being turned - // off here to avoid problems when the span is closer - // than 1/(2**31) - movl izi,%edx - -// at this point: -// %ebx = izistep -// %ecx = count -// %edx = izi -// %edi = pdest - -LZDraw: - -// do a single pixel up front, if necessary to dword align the destination - testl $2,%edi - jz LFMiddle - movl %edx,%eax - addl %ebx,%edx - shrl $16,%eax - decl %ecx - movw %ax,(%edi) - addl $2,%edi - -// do middle a pair of aligned dwords at a time -LFMiddle: - pushl %ecx - shrl $1,%ecx // count / 2 - jz LFLast // no aligned dwords to do - shrl $1,%ecx // (count / 2) / 2 - jnc LFMiddleLoop // even number of aligned dwords to do - - movl %edx,%eax - addl %ebx,%edx - shrl $16,%eax - movl %edx,%esi - addl %ebx,%edx - andl $0xFFFF0000,%esi - orl %esi,%eax - movl %eax,(%edi) - addl $4,%edi - andl %ecx,%ecx - jz LFLast - -LFMiddleLoop: - movl %edx,%eax - addl %ebx,%edx - shrl $16,%eax - movl %edx,%esi - addl %ebx,%edx - andl $0xFFFF0000,%esi - orl %esi,%eax - movl %edx,%ebp - movl %eax,(%edi) - addl %ebx,%edx - shrl $16,%ebp - movl %edx,%esi - addl %ebx,%edx - andl $0xFFFF0000,%esi - orl %esi,%ebp - movl %ebp,4(%edi) // FIXME: eliminate register contention - addl $8,%edi - - decl %ecx - jnz LFMiddleLoop - -LFLast: - popl %ecx // retrieve count - popl %esi // retrieve span pointer - -// do the last, unaligned pixel, if there is one - andl $1,%ecx // is there an odd pixel left to do? - jz LFSpanDone // no - shrl $16,%edx - movw %dx,(%edi) // do the final pixel's z - -LFSpanDone: - movl espan_t_pnext(%esi),%esi - testl %esi,%esi - jnz LFSpanLoop - - jmp LFDone - -LFNegSpan: - fmuls FloatMinus2ToThe31nd - fistpl izistep // note: we are relying on FP exceptions being turned - // off here to avoid range problems - movl izistep,%ebx // remains loaded for all spans - -LFNegSpanLoop: -// set up the initial 1/z value - fildl espan_t_v(%esi) - fildl espan_t_u(%esi) - movl espan_t_v(%esi),%ecx - movl C(d_pzbuffer),%edi - fmuls C(d_zistepu) - fxch %st(1) - fmuls C(d_zistepv) - fxch %st(1) - fadds C(d_ziorigin) - imull C(d_zrowbytes),%ecx - faddp %st(0),%st(1) - -// clamp if z is nearer than 2 (1/z > 0.5) - fcoms float_point5 - addl %ecx,%edi - movl espan_t_u(%esi),%edx - addl %edx,%edx // word count - movl espan_t_count(%esi),%ecx - addl %edx,%edi // pdest = &pdestspan[scans->u]; - pushl %esi // preserve spans pointer - fnstsw %ax - testb $0x45,%ah - jz LClampNeg - - fmuls Float2ToThe31nd - fistpl izi // note: we are relying on FP exceptions being turned - // off here to avoid problems when the span is closer - // than 1/(2**31) - movl izi,%edx - -// at this point: -// %ebx = izistep -// %ecx = count -// %edx = izi -// %edi = pdest - -LZDrawNeg: - -// do a single pixel up front, if necessary to dword align the destination - testl $2,%edi - jz LFNegMiddle - movl %edx,%eax - subl %ebx,%edx - shrl $16,%eax - decl %ecx - movw %ax,(%edi) - addl $2,%edi - -// do middle a pair of aligned dwords at a time -LFNegMiddle: - pushl %ecx - shrl $1,%ecx // count / 2 - jz LFNegLast // no aligned dwords to do - shrl $1,%ecx // (count / 2) / 2 - jnc LFNegMiddleLoop // even number of aligned dwords to do - - movl %edx,%eax - subl %ebx,%edx - shrl $16,%eax - movl %edx,%esi - subl %ebx,%edx - andl $0xFFFF0000,%esi - orl %esi,%eax - movl %eax,(%edi) - addl $4,%edi - andl %ecx,%ecx - jz LFNegLast - -LFNegMiddleLoop: - movl %edx,%eax - subl %ebx,%edx - shrl $16,%eax - movl %edx,%esi - subl %ebx,%edx - andl $0xFFFF0000,%esi - orl %esi,%eax - movl %edx,%ebp - movl %eax,(%edi) - subl %ebx,%edx - shrl $16,%ebp - movl %edx,%esi - subl %ebx,%edx - andl $0xFFFF0000,%esi - orl %esi,%ebp - movl %ebp,4(%edi) // FIXME: eliminate register contention - addl $8,%edi - - decl %ecx - jnz LFNegMiddleLoop - -LFNegLast: - popl %ecx // retrieve count - popl %esi // retrieve span pointer - -// do the last, unaligned pixel, if there is one - andl $1,%ecx // is there an odd pixel left to do? - jz LFNegSpanDone // no - shrl $16,%edx - movw %dx,(%edi) // do the final pixel's z - -LFNegSpanDone: - movl espan_t_pnext(%esi),%esi - testl %esi,%esi - jnz LFNegSpanLoop - -LFDone: - popl %ebx // restore register variables - popl %esi - popl %edi - popl %ebp // restore the caller's stack frame - ret - -#endif // USE_ASM - diff --git a/src/refresh/sw/x86/surf8.S b/src/refresh/sw/x86/surf8.S deleted file mode 100644 index b972f8f..0000000 --- a/src/refresh/sw/x86/surf8.S +++ /dev/null @@ -1,762 +0,0 @@ -// -// surf8.s -// x86 assembly-language 8 bpp surface block drawing code. -// - -#include "common/x86/asm.h" - -#if USE_ASM - - .data - -sb_v: .long 0 - - .text - - .align 4 -.globl C(R_Surf8Start) -C(R_Surf8Start): - -//---------------------------------------------------------------------- -// Surface block drawer for mip level 0 -//---------------------------------------------------------------------- - - .align 4 -.globl C(R_DrawSurfaceBlock8_mip0) -C(R_DrawSurfaceBlock8_mip0): - pushl %ebp // preserve caller's stack frame - pushl %edi - pushl %esi // preserve register variables - pushl %ebx - -// for (v=0 ; v<numvblocks ; v++) -// { - movl C(r_lightptr),%ebx - movl C(r_numvblocks),%eax - - movl %eax,sb_v - movl C(prowdestbase),%edi - - movl C(pbasesource),%esi - -Lv_loop_mip0: - -// lightleft = lightptr[0]; -// lightright = lightptr[1]; -// lightdelta = (lightleft - lightright) & 0xFFFFF; - movl (%ebx),%eax // lightleft - movl 4(%ebx),%edx // lightright - - movl %eax,%ebp - movl C(r_lightwidth),%ecx - - movl %edx,C(lightright) - subl %edx,%ebp - - andl $0xFFFFF,%ebp - leal (%ebx,%ecx,4),%ebx - -// lightptr += lightwidth; - movl %ebx,C(r_lightptr) - -// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift; -// lightrightstep = (lightptr[1] - lightright) >> blockdivshift; -// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) | -// 0xF0000000; - movl 4(%ebx),%ecx // lightptr[1] - movl (%ebx),%ebx // lightptr[0] - - subl %eax,%ebx - subl %edx,%ecx - - sarl $4,%ecx - orl $0xF0000000,%ebp - - sarl $4,%ebx - movl %ecx,C(lightrightstep) - - subl %ecx,%ebx - andl $0xFFFFF,%ebx - - orl $0xF0000000,%ebx - subl %ecx,%ecx // high word must be 0 in loop for addressing - - movl %ebx,C(lightdeltastep) - subl %ebx,%ebx // high word must be 0 in loop for addressing - -Lblockloop8_mip0: - movl %ebp,C(lightdelta) - movb 14(%esi),%cl - - sarl $4,%ebp - movb %dh,%bh - - movb 15(%esi),%bl - addl %ebp,%edx - - movb %dh,%ch - addl %ebp,%edx - - movb 0x12345678(%ebx),%ah -LBPatch0: - movb 13(%esi),%bl - - movb 0x12345678(%ecx),%al -LBPatch1: - movb 12(%esi),%cl - - movb %dh,%bh - addl %ebp,%edx - - rorl $16,%eax - movb %dh,%ch - - addl %ebp,%edx - movb 0x12345678(%ebx),%ah -LBPatch2: - - movb 11(%esi),%bl - movb 0x12345678(%ecx),%al -LBPatch3: - - movb 10(%esi),%cl - movl %eax,12(%edi) - - movb %dh,%bh - addl %ebp,%edx - - movb %dh,%ch - addl %ebp,%edx - - movb 0x12345678(%ebx),%ah -LBPatch4: - movb 9(%esi),%bl - - movb 0x12345678(%ecx),%al -LBPatch5: - movb 8(%esi),%cl - - movb %dh,%bh - addl %ebp,%edx - - rorl $16,%eax - movb %dh,%ch - - addl %ebp,%edx - movb 0x12345678(%ebx),%ah -LBPatch6: - - movb 7(%esi),%bl - movb 0x12345678(%ecx),%al -LBPatch7: - - movb 6(%esi),%cl - movl %eax,8(%edi) - - movb %dh,%bh - addl %ebp,%edx - - movb %dh,%ch - addl %ebp,%edx - - movb 0x12345678(%ebx),%ah -LBPatch8: - movb 5(%esi),%bl - - movb 0x12345678(%ecx),%al -LBPatch9: - movb 4(%esi),%cl - - movb %dh,%bh - addl %ebp,%edx - - rorl $16,%eax - movb %dh,%ch - - addl %ebp,%edx - movb 0x12345678(%ebx),%ah -LBPatch10: - - movb 3(%esi),%bl - movb 0x12345678(%ecx),%al -LBPatch11: - - movb 2(%esi),%cl - movl %eax,4(%edi) - - movb %dh,%bh - addl %ebp,%edx - - movb %dh,%ch - addl %ebp,%edx - - movb 0x12345678(%ebx),%ah -LBPatch12: - movb 1(%esi),%bl - - movb 0x12345678(%ecx),%al -LBPatch13: - movb (%esi),%cl - - movb %dh,%bh - addl %ebp,%edx - - rorl $16,%eax - movb %dh,%ch - - movb 0x12345678(%ebx),%ah -LBPatch14: - movl C(lightright),%edx - - movb 0x12345678(%ecx),%al -LBPatch15: - movl C(lightdelta),%ebp - - movl %eax,(%edi) - - addl C(sourcetstep),%esi - addl C(surfrowbytes),%edi - - addl C(lightrightstep),%edx - addl C(lightdeltastep),%ebp - - movl %edx,C(lightright) - jc Lblockloop8_mip0 - -// if (pbasesource >= r_sourcemax) -// pbasesource -= stepback; - - cmpl C(r_sourcemax),%esi - jb LSkip_mip0 - subl C(r_stepback),%esi -LSkip_mip0: - - movl C(r_lightptr),%ebx - decl sb_v - - jnz Lv_loop_mip0 - - popl %ebx // restore register variables - popl %esi - popl %edi - popl %ebp // restore the caller's stack frame - ret - - -//---------------------------------------------------------------------- -// Surface block drawer for mip level 1 -//---------------------------------------------------------------------- - - .align 4 -.globl C(R_DrawSurfaceBlock8_mip1) -C(R_DrawSurfaceBlock8_mip1): - pushl %ebp // preserve caller's stack frame - pushl %edi - pushl %esi // preserve register variables - pushl %ebx - -// for (v=0 ; v<numvblocks ; v++) -// { - movl C(r_lightptr),%ebx - movl C(r_numvblocks),%eax - - movl %eax,sb_v - movl C(prowdestbase),%edi - - movl C(pbasesource),%esi - -Lv_loop_mip1: - -// lightleft = lightptr[0]; -// lightright = lightptr[1]; -// lightdelta = (lightleft - lightright) & 0xFFFFF; - movl (%ebx),%eax // lightleft - movl 4(%ebx),%edx // lightright - - movl %eax,%ebp - movl C(r_lightwidth),%ecx - - movl %edx,C(lightright) - subl %edx,%ebp - - andl $0xFFFFF,%ebp - leal (%ebx,%ecx,4),%ebx - -// lightptr += lightwidth; - movl %ebx,C(r_lightptr) - -// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift; -// lightrightstep = (lightptr[1] - lightright) >> blockdivshift; -// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) | -// 0xF0000000; - movl 4(%ebx),%ecx // lightptr[1] - movl (%ebx),%ebx // lightptr[0] - - subl %eax,%ebx - subl %edx,%ecx - - sarl $3,%ecx - orl $0x70000000,%ebp - - sarl $3,%ebx - movl %ecx,C(lightrightstep) - - subl %ecx,%ebx - andl $0xFFFFF,%ebx - - orl $0xF0000000,%ebx - subl %ecx,%ecx // high word must be 0 in loop for addressing - - movl %ebx,C(lightdeltastep) - subl %ebx,%ebx // high word must be 0 in loop for addressing - -Lblockloop8_mip1: - movl %ebp,C(lightdelta) - movb 6(%esi),%cl - - sarl $3,%ebp - movb %dh,%bh - - movb 7(%esi),%bl - addl %ebp,%edx - - movb %dh,%ch - addl %ebp,%edx - - movb 0x12345678(%ebx),%ah -LBPatch22: - movb 5(%esi),%bl - - movb 0x12345678(%ecx),%al -LBPatch23: - movb 4(%esi),%cl - - movb %dh,%bh - addl %ebp,%edx - - rorl $16,%eax - movb %dh,%ch - - addl %ebp,%edx - movb 0x12345678(%ebx),%ah -LBPatch24: - - movb 3(%esi),%bl - movb 0x12345678(%ecx),%al -LBPatch25: - - movb 2(%esi),%cl - movl %eax,4(%edi) - - movb %dh,%bh - addl %ebp,%edx - - movb %dh,%ch - addl %ebp,%edx - - movb 0x12345678(%ebx),%ah -LBPatch26: - movb 1(%esi),%bl - - movb 0x12345678(%ecx),%al -LBPatch27: - movb (%esi),%cl - - movb %dh,%bh - addl %ebp,%edx - - rorl $16,%eax - movb %dh,%ch - - movb 0x12345678(%ebx),%ah -LBPatch28: - movl C(lightright),%edx - - movb 0x12345678(%ecx),%al -LBPatch29: - movl C(lightdelta),%ebp - - movl %eax,(%edi) - movl C(sourcetstep),%eax - - addl %eax,%esi - movl C(surfrowbytes),%eax - - addl %eax,%edi - movl C(lightrightstep),%eax - - addl %eax,%edx - movl C(lightdeltastep),%eax - - addl %eax,%ebp - movl %edx,C(lightright) - - jc Lblockloop8_mip1 - -// if (pbasesource >= r_sourcemax) -// pbasesource -= stepback; - - cmpl C(r_sourcemax),%esi - jb LSkip_mip1 - subl C(r_stepback),%esi -LSkip_mip1: - - movl C(r_lightptr),%ebx - decl sb_v - - jnz Lv_loop_mip1 - - popl %ebx // restore register variables - popl %esi - popl %edi - popl %ebp // restore the caller's stack frame - ret - - -//---------------------------------------------------------------------- -// Surface block drawer for mip level 2 -//---------------------------------------------------------------------- - - .align 4 -.globl C(R_DrawSurfaceBlock8_mip2) -C(R_DrawSurfaceBlock8_mip2): - pushl %ebp // preserve caller's stack frame - pushl %edi - pushl %esi // preserve register variables - pushl %ebx - -// for (v=0 ; v<numvblocks ; v++) -// { - movl C(r_lightptr),%ebx - movl C(r_numvblocks),%eax - - movl %eax,sb_v - movl C(prowdestbase),%edi - - movl C(pbasesource),%esi - -Lv_loop_mip2: - -// lightleft = lightptr[0]; -// lightright = lightptr[1]; -// lightdelta = (lightleft - lightright) & 0xFFFFF; - movl (%ebx),%eax // lightleft - movl 4(%ebx),%edx // lightright - - movl %eax,%ebp - movl C(r_lightwidth),%ecx - - movl %edx,C(lightright) - subl %edx,%ebp - - andl $0xFFFFF,%ebp - leal (%ebx,%ecx,4),%ebx - -// lightptr += lightwidth; - movl %ebx,C(r_lightptr) - -// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift; -// lightrightstep = (lightptr[1] - lightright) >> blockdivshift; -// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) | -// 0xF0000000; - movl 4(%ebx),%ecx // lightptr[1] - movl (%ebx),%ebx // lightptr[0] - - subl %eax,%ebx - subl %edx,%ecx - - sarl $2,%ecx - orl $0x30000000,%ebp - - sarl $2,%ebx - movl %ecx,C(lightrightstep) - - subl %ecx,%ebx - - andl $0xFFFFF,%ebx - - orl $0xF0000000,%ebx - subl %ecx,%ecx // high word must be 0 in loop for addressing - - movl %ebx,C(lightdeltastep) - subl %ebx,%ebx // high word must be 0 in loop for addressing - -Lblockloop8_mip2: - movl %ebp,C(lightdelta) - movb 2(%esi),%cl - - sarl $2,%ebp - movb %dh,%bh - - movb 3(%esi),%bl - addl %ebp,%edx - - movb %dh,%ch - addl %ebp,%edx - - movb 0x12345678(%ebx),%ah -LBPatch18: - movb 1(%esi),%bl - - movb 0x12345678(%ecx),%al -LBPatch19: - movb (%esi),%cl - - movb %dh,%bh - addl %ebp,%edx - - rorl $16,%eax - movb %dh,%ch - - movb 0x12345678(%ebx),%ah -LBPatch20: - movl C(lightright),%edx - - movb 0x12345678(%ecx),%al -LBPatch21: - movl C(lightdelta),%ebp - - movl %eax,(%edi) - movl C(sourcetstep),%eax - - addl %eax,%esi - movl C(surfrowbytes),%eax - - addl %eax,%edi - movl C(lightrightstep),%eax - - addl %eax,%edx - movl C(lightdeltastep),%eax - - addl %eax,%ebp - movl %edx,C(lightright) - - jc Lblockloop8_mip2 - -// if (pbasesource >= r_sourcemax) -// pbasesource -= stepback; - - cmpl C(r_sourcemax),%esi - jb LSkip_mip2 - subl C(r_stepback),%esi -LSkip_mip2: - - movl C(r_lightptr),%ebx - decl sb_v - - jnz Lv_loop_mip2 - - popl %ebx // restore register variables - popl %esi - popl %edi - popl %ebp // restore the caller's stack frame - ret - - -//---------------------------------------------------------------------- -// Surface block drawer for mip level 3 -//---------------------------------------------------------------------- - - .align 4 -.globl C(R_DrawSurfaceBlock8_mip3) -C(R_DrawSurfaceBlock8_mip3): - pushl %ebp // preserve caller's stack frame - pushl %edi - pushl %esi // preserve register variables - pushl %ebx - -// for (v=0 ; v<numvblocks ; v++) -// { - movl C(r_lightptr),%ebx - movl C(r_numvblocks),%eax - - movl %eax,sb_v - movl C(prowdestbase),%edi - - movl C(pbasesource),%esi - -Lv_loop_mip3: - -// lightleft = lightptr[0]; -// lightright = lightptr[1]; -// lightdelta = (lightleft - lightright) & 0xFFFFF; - movl (%ebx),%eax // lightleft - movl 4(%ebx),%edx // lightright - - movl %eax,%ebp - movl C(r_lightwidth),%ecx - - movl %edx,C(lightright) - subl %edx,%ebp - - andl $0xFFFFF,%ebp - leal (%ebx,%ecx,4),%ebx - - movl %ebp,C(lightdelta) -// lightptr += lightwidth; - movl %ebx,C(r_lightptr) - -// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift; -// lightrightstep = (lightptr[1] - lightright) >> blockdivshift; -// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) | -// 0xF0000000; - movl 4(%ebx),%ecx // lightptr[1] - movl (%ebx),%ebx // lightptr[0] - - subl %eax,%ebx - subl %edx,%ecx - - sarl $1,%ecx - - sarl $1,%ebx - movl %ecx,C(lightrightstep) - - subl %ecx,%ebx - andl $0xFFFFF,%ebx - - sarl $1,%ebp - orl $0xF0000000,%ebx - - movl %ebx,C(lightdeltastep) - subl %ebx,%ebx // high word must be 0 in loop for addressing - - movb 1(%esi),%bl - subl %ecx,%ecx // high word must be 0 in loop for addressing - - movb %dh,%bh - movb (%esi),%cl - - addl %ebp,%edx - movb %dh,%ch - - movb 0x12345678(%ebx),%al -LBPatch16: - movl C(lightright),%edx - - movb %al,1(%edi) - movb 0x12345678(%ecx),%al -LBPatch17: - - movb %al,(%edi) - movl C(sourcetstep),%eax - - addl %eax,%esi - movl C(surfrowbytes),%eax - - addl %eax,%edi - movl C(lightdeltastep),%eax - - movl C(lightdelta),%ebp - movb (%esi),%cl - - addl %eax,%ebp - movl C(lightrightstep),%eax - - sarl $1,%ebp - addl %eax,%edx - - movb %dh,%bh - movb 1(%esi),%bl - - addl %ebp,%edx - movb %dh,%ch - - movb 0x12345678(%ebx),%al -LBPatch30: - movl C(sourcetstep),%edx - - movb %al,1(%edi) - movb 0x12345678(%ecx),%al -LBPatch31: - - movb %al,(%edi) - movl C(surfrowbytes),%ebp - - addl %edx,%esi - addl %ebp,%edi - -// if (pbasesource >= r_sourcemax) -// pbasesource -= stepback; - - cmpl C(r_sourcemax),%esi - jb LSkip_mip3 - subl C(r_stepback),%esi -LSkip_mip3: - - movl C(r_lightptr),%ebx - decl sb_v - - jnz Lv_loop_mip3 - - popl %ebx // restore register variables - popl %esi - popl %edi - popl %ebp // restore the caller's stack frame - ret - - -.globl C(R_Surf8End) -C(R_Surf8End): - -//---------------------------------------------------------------------- -// Code patching routines -//---------------------------------------------------------------------- - .data - - .align 4 -LPatchTable8: - .long LBPatch0-4 - .long LBPatch1-4 - .long LBPatch2-4 - .long LBPatch3-4 - .long LBPatch4-4 - .long LBPatch5-4 - .long LBPatch6-4 - .long LBPatch7-4 - .long LBPatch8-4 - .long LBPatch9-4 - .long LBPatch10-4 - .long LBPatch11-4 - .long LBPatch12-4 - .long LBPatch13-4 - .long LBPatch14-4 - .long LBPatch15-4 - .long LBPatch16-4 - .long LBPatch17-4 - .long LBPatch18-4 - .long LBPatch19-4 - .long LBPatch20-4 - .long LBPatch21-4 - .long LBPatch22-4 - .long LBPatch23-4 - .long LBPatch24-4 - .long LBPatch25-4 - .long LBPatch26-4 - .long LBPatch27-4 - .long LBPatch28-4 - .long LBPatch29-4 - .long LBPatch30-4 - .long LBPatch31-4 - - .text - - .align 4 -.globl C(R_Surf8Patch) -C(R_Surf8Patch): - pushl %ebx - - movl C(d_pcolormap),%eax - movl $LPatchTable8,%ebx - movl $32,%ecx -LPatchLoop8: - movl (%ebx),%edx - addl $4,%ebx - movl %eax,(%edx) - decl %ecx - jnz LPatchLoop8 - - popl %ebx - - ret - -#endif // USE_ASM diff --git a/src/refresh/sw/x86/sw.h b/src/refresh/sw/x86/sw.h deleted file mode 100644 index f817fa6..0000000 --- a/src/refresh/sw/x86/sw.h +++ /dev/null @@ -1,183 +0,0 @@ -/* -Copyright (C) 1997-2001 Id Software, Inc. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -// -// Include file for asm driver interface. -// - -#define TRANSPARENT_COLOR 255 - -#define ALIAS_ONSEAM 0x0020 - -#define TURB_TEX_SIZE 64 // base turbulent texture size - -#define NEAR_CLIP 0.01 - -#define CYCLE 128 - -#define MAXHEIGHT 1200 - -#define CACHE_SIZE 32 // used to align key data structures - -#define PARTICLE_Z_CLIP 8.0 - -// particle_t structure -// driver-usable fields -#define pt_org 0 -#define pt_color 12 -// drivers never touch the following fields -#define pt_next 16 -#define pt_vel 20 -#define pt_ramp 32 -#define pt_die 36 -#define pt_type 40 -#define pt_size 44 - -// finalvert_t structure -#define fv_v 0 -#define fv_flags 24 -#define fv_reserved 28 -#define fv_size 32 -#define fv_shift 5 - -// stvert_t structure -#define stv_onseam 0 -#define stv_s 4 -#define stv_t 8 -#define stv_size 12 - -// trivertx_t structure -#define tv_v 0 -#define tv_lightnormalindex 3 -#define tv_size 4 - -// affinetridesc_t structure -#define atd_pskin 0 -#define atd_pskindesc 4 -#define atd_skinwidth 8 -#define atd_skinheight 12 -#define atd_ptriangles 16 -#define atd_pfinalverts 20 -#define atd_numtriangles 24 -#define atd_drawtype 28 -#define atd_seamfixupX16 32 -#define atd_size 36 - -// espan_t structure -#define espan_t_u 0 -#define espan_t_v 4 -#define espan_t_count 8 -#define espan_t_pnext 12 -#define espan_t_size 16 - -// sspan_t structure -#define sspan_t_u 0 -#define sspan_t_v 4 -#define sspan_t_count 8 -#define sspan_t_size 12 - -// spanpackage_t structure -#define spanpackage_t_pdest 0 -#define spanpackage_t_pz 4 -#define spanpackage_t_count 8 -#define spanpackage_t_ptex 12 -#define spanpackage_t_sfrac 16 -#define spanpackage_t_tfrac 20 -#define spanpackage_t_light 24 -#define spanpackage_t_zi 28 -#define spanpackage_t_size 32 - -// edge_t structure -#define et_u 0 -#define et_u_step 4 -#define et_prev 8 -#define et_next 12 -#define et_surfs 16 -#define et_nextremove 20 -#define et_nearzi 24 -#define et_owner 28 -#define et_size 32 - -// surf_t structure -#define SURF_T_SHIFT 6 -#define st_next 0 -#define st_prev 4 -#define st_spans 8 -#define st_key 12 -#define st_last_u 16 -#define st_spanstate 20 -#define st_flags 24 -#define st_data 28 -#define st_entity 32 -#define st_nearzi 36 -#define st_insubmodel 40 -#define st_d_ziorigin 44 -#define st_d_zistepu 48 -#define st_d_zistepv 52 -#define st_pad 56 -#define st_size 64 - -// clipplane_t structure -#define cp_normal 0 -#define cp_dist 12 -#define cp_next 16 -#define cp_leftedge 20 -#define cp_rightedge 21 -#define cp_reserved 22 -#define cp_size 24 - -// medge_t structure -#define me_v 0 -#define me_cachededgeoffset 4 -#define me_size 8 - -// mvertex_t structure -#define mv_position 0 -#define mv_size 12 - -// refdef_t structure -#define rd_vrect 0 -#define rd_aliasvrect 20 -#define rd_vrectright 40 -#define rd_vrectbottom 44 -#define rd_aliasvrectright 48 -#define rd_aliasvrectbottom 52 -#define rd_vrectrightedge 56 -#define rd_fvrectx 60 -#define rd_fvrecty 64 -#define rd_fvrectx_adj 68 -#define rd_fvrecty_adj 72 -#define rd_vrect_x_adj_shift20 76 -#define rd_vrectright_adj_shift20 80 -#define rd_fvrectright_adj 84 -#define rd_fvrectbottom_adj 88 -#define rd_fvrectright 92 -#define rd_fvrectbottom 96 -#define rd_horizontalFieldOfView 100 -#define rd_xOrigin 104 -#define rd_yOrigin 108 -#define rd_vieworg 112 -#define rd_viewangles 124 -#define rd_ambientlight 136 -#define rd_size 140 - -// mtriangle_t structure -#define mtri_facesfront 0 -#define mtri_vertindex 4 -#define mtri_size 16 -#define mtri_shift 4 diff --git a/src/refresh/sw/x86/turb8.S b/src/refresh/sw/x86/turb8.S deleted file mode 100644 index fefe228..0000000 --- a/src/refresh/sw/x86/turb8.S +++ /dev/null @@ -1,68 +0,0 @@ -// -// d_scana.s -// x86 assembly-language turbulent texture mapping code -// - -#include "common/x86/asm.h" -#include "sw.h" - -#if USE_ASM - - .data - - .text - -//---------------------------------------------------------------------- -// turbulent texture mapping code -//---------------------------------------------------------------------- - - .align 4 -.globl C(D_DrawTurbulent8Span) -C(D_DrawTurbulent8Span): - pushl %ebp // preserve caller's stack frame pointer - pushl %esi // preserve register variables - pushl %edi - pushl %ebx - - movl C(r_turb_s),%esi - movl C(r_turb_t),%ecx - movl C(r_turb_pdest),%edi - movl C(r_turb_spancount),%ebx - -Llp: - movl %ecx,%eax - movl %esi,%edx - sarl $16,%eax - movl C(r_turb_turb),%ebp - sarl $16,%edx - andl $(CYCLE-1),%eax - andl $(CYCLE-1),%edx - movl (%ebp,%eax,4),%eax - movl (%ebp,%edx,4),%edx - addl %esi,%eax - sarl $16,%eax - addl %ecx,%edx - sarl $16,%edx - andl $(TURB_TEX_SIZE-1),%eax - andl $(TURB_TEX_SIZE-1),%edx - shll $6,%edx - movl C(r_turb_pbase),%ebp - addl %eax,%edx - incl %edi - addl C(r_turb_sstep),%esi - addl C(r_turb_tstep),%ecx - movb (%ebp,%edx,1),%dl - decl %ebx - movb %dl,-1(%edi) - jnz Llp - - movl %edi,C(r_turb_pdest) - - popl %ebx // restore register variables - popl %edi - popl %esi - popl %ebp // restore caller's stack frame pointer - ret - -#endif // USE_ASM - diff --git a/src/refresh/sw/x86/vars.S b/src/refresh/sw/x86/vars.S deleted file mode 100644 index a886bc9..0000000 --- a/src/refresh/sw/x86/vars.S +++ /dev/null @@ -1,157 +0,0 @@ -// -// r_varsa.s -// - -#include "common/x86/asm.h" -#include "sw.h" - -#if USE_ASM - - .data - -//------------------------------------------------------- -// ASM-only variables -//------------------------------------------------------- -.globl float_1, float_particle_z_clip, float_point5 -.globl float_minus_1, float_0 -float_0: .single 0.0 -float_1: .single 1.0 -float_minus_1: .single -1.0 -float_particle_z_clip: .single PARTICLE_Z_CLIP -float_point5: .single 0.5 - -.globl fp_16, fp_64k, fp_1m, fp_64kx64k -.globl fp_1m_minus_1 -.globl fp_8 -fp_1m: .single 1048576.0 -fp_1m_minus_1: .single 1048575.0 -fp_64k: .single 65536.0 -fp_8: .single 8.0 -fp_16: .single 16.0 -fp_64kx64k: .long 0x4f000000 // (float)0x8000*0x10000 - - -.globl FloatZero, Float2ToThe31nd, FloatMinus2ToThe31nd -FloatZero: .long 0 -Float2ToThe31nd: .long 0x4f000000 -FloatMinus2ToThe31nd: .long 0xcf000000 - -.globl C(r_bmodelactive) -C(r_bmodelactive): .long 0 - -//------------------------------------------------------- -// global refresh variables -//------------------------------------------------------- - -// FIXME: put all refresh variables into one contiguous block. Make into one -// big structure, like cl or sv? - - .align 4 -.globl C(d_sdivzstepu) -.globl C(d_tdivzstepu) -.globl C(d_zistepu) -.globl C(d_sdivzstepv) -.globl C(d_tdivzstepv) -.globl C(d_zistepv) -.globl C(d_sdivzorigin) -.globl C(d_tdivzorigin) -.globl C(d_ziorigin) -C(d_sdivzstepu): .single 0 -C(d_tdivzstepu): .single 0 -C(d_zistepu): .single 0 -C(d_sdivzstepv): .single 0 -C(d_tdivzstepv): .single 0 -C(d_zistepv): .single 0 -C(d_sdivzorigin): .single 0 -C(d_tdivzorigin): .single 0 -C(d_ziorigin): .single 0 - -.globl C(sadjust) -.globl C(tadjust) -.globl C(bbextents) -.globl C(bbextentt) -C(sadjust): .long 0 -C(tadjust): .long 0 -C(bbextents): .long 0 -C(bbextentt): .long 0 - -.globl C(cacheblock) -.globl C(d_viewbuffer) -.globl C(cachewidth) -.globl C(d_pzbuffer) -.globl C(d_zrowbytes) -.globl C(d_zwidth) -C(cacheblock): .long 0 -C(cachewidth): .long 0 -C(d_viewbuffer): .long 0 -C(d_pzbuffer): .long 0 -C(d_zrowbytes): .long 0 -C(d_zwidth): .long 0 - - -//------------------------------------------------------- -// ASM-only variables -//------------------------------------------------------- -.globl izi -izi: .long 0 - -.globl pbase, s, t, sfracf, tfracf, snext, tnext -.globl spancountminus1, zi16stepu, sdivz16stepu, tdivz16stepu -.globl zi8stepu, sdivz8stepu, tdivz8stepu, pz -s: .long 0 -t: .long 0 -snext: .long 0 -tnext: .long 0 -sfracf: .long 0 -tfracf: .long 0 -pbase: .long 0 -zi8stepu: .long 0 -sdivz8stepu: .long 0 -tdivz8stepu: .long 0 -zi16stepu: .long 0 -sdivz16stepu: .long 0 -tdivz16stepu: .long 0 -spancountminus1: .long 0 -pz: .long 0 - -.globl izistep -izistep: .long 0 - -//------------------------------------------------------- -// local variables for d_draw16.s -//------------------------------------------------------- - -.globl reciprocal_table_16, entryvec_table_16 -// 1/2, 1/3, 1/4, 1/5, 1/6, 1/7, 1/8, 1/9, 1/10, 1/11, 1/12, 1/13, -// 1/14, and 1/15 in 0.32 form -reciprocal_table_16: .long 0x40000000, 0x2aaaaaaa, 0x20000000 - .long 0x19999999, 0x15555555, 0x12492492 - .long 0x10000000, 0xe38e38e, 0xccccccc, 0xba2e8ba - .long 0xaaaaaaa, 0x9d89d89, 0x9249249, 0x8888888 - -entryvec_table_16: .long 0, Entry2_16, Entry3_16, Entry4_16 - .long Entry5_16, Entry6_16, Entry7_16, Entry8_16 - .long Entry9_16, Entry10_16, Entry11_16, Entry12_16 - .long Entry13_16, Entry14_16, Entry15_16, Entry16_16 - -// -// advancetable is 8 bytes, but points to the middle of that range so negative -// offsets will work -// -.globl advancetable, sstep, tstep, pspantemp, counttemp, jumptemp -advancetable: .long 0, 0 -sstep: .long 0 -tstep: .long 0 - -pspantemp: .long 0 -counttemp: .long 0 -jumptemp: .long 0 - -// 1/2, 1/3, 1/4, 1/5, 1/6, and 1/7 in 0.32 form -.globl reciprocal_table, entryvec_table -reciprocal_table: .long 0x40000000, 0x2aaaaaaa, 0x20000000 - .long 0x19999999, 0x15555555, 0x12492492 - -#endif // USE_ASM - - |