summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrey Nazarov <skuller@skuller.net>2013-03-16 00:44:15 +0400
committerAndrey Nazarov <skuller@skuller.net>2013-03-19 03:44:50 +0400
commit6e5aa4dec92010a3794152a379d95f62fb8cc029 (patch)
treeb5c7a53488a43cd2b55730405f50cd532030d899
parent5a4744ed5ccab66fa7b45c5369d210807f120137 (diff)
Remove x86 assembly code.
-rw-r--r--Makefile39
-rw-r--r--doc/examples/buildconfig4
-rw-r--r--inc/common/x86/asm.h40
-rw-r--r--src/common/math.c4
-rw-r--r--src/common/x86/math.S329
-rw-r--r--src/refresh/sw/aclip.c20
-rw-r--r--src/refresh/sw/edge.c23
-rw-r--r--src/refresh/sw/main.c15
-rw-r--r--src/refresh/sw/misc.c26
-rw-r--r--src/refresh/sw/polyset.c118
-rw-r--r--src/refresh/sw/raster.c3
-rw-r--r--src/refresh/sw/scan.c4
-rw-r--r--src/refresh/sw/surf.c4
-rw-r--r--src/refresh/sw/sw.h30
-rw-r--r--src/refresh/sw/x86/aclip.S195
-rw-r--r--src/refresh/sw/x86/draw.S817
-rw-r--r--src/refresh/sw/x86/edge.S730
-rw-r--r--src/refresh/sw/x86/polyset.S1247
-rw-r--r--src/refresh/sw/x86/protect.c31
-rw-r--r--src/refresh/sw/x86/span16.S1227
-rw-r--r--src/refresh/sw/x86/surf8.S762
-rw-r--r--src/refresh/sw/x86/sw.h183
-rw-r--r--src/refresh/sw/x86/turb8.S68
-rw-r--r--src/refresh/sw/x86/vars.S157
24 files changed, 18 insertions, 6058 deletions
diff --git a/Makefile b/Makefile
index 3f3cd62..ccb620c 100644
--- a/Makefile
+++ b/Makefile
@@ -29,7 +29,6 @@ RMDIR ?= rm -rf
MKDIR ?= mkdir -p
CFLAGS ?= -O2 -Wall -g -MMD $(INCLUDES)
-ASFLAGS ?=
RCFLAGS ?=
LDFLAGS ?=
LIBS ?=
@@ -38,9 +37,6 @@ CFLAGS_s := -iquote./inc
CFLAGS_c := -iquote./inc
CFLAGS_g := -iquote./inc -fno-strict-aliasing
-ASFLAGS_s := -iquote./inc
-ASFLAGS_c := -iquote./inc
-
RCFLAGS_s :=
RCFLAGS_c :=
RCFLAGS_g :=
@@ -323,18 +319,6 @@ ifdef CONFIG_SOFTWARE_RENDERER
OBJS_c += src/refresh/sw/surf.o
OBJS_c += src/refresh/sw/sird.o
OBJS_c += src/refresh/sw/sky.o
-
- ifdef CONFIG_X86_ASSEMBLY
- OBJS_c += src/refresh/sw/x86/protect.o
- OBJS_c += src/refresh/sw/x86/aclip.o
- OBJS_c += src/refresh/sw/x86/draw.o
- OBJS_c += src/refresh/sw/x86/edge.o
- OBJS_c += src/refresh/sw/x86/polyset.o
- OBJS_c += src/refresh/sw/x86/span16.o
- OBJS_c += src/refresh/sw/x86/surf8.o
- OBJS_c += src/refresh/sw/x86/turb8.o
- OBJS_c += src/refresh/sw/x86/vars.o
- endif
else
CFLAGS_c += -DREF_GL=1 -DUSE_REF=1 -DVID_REF='"gl"'
OBJS_c += src/refresh/gl/draw.o
@@ -558,19 +542,6 @@ ifdef CONFIG_DEBUG
CFLAGS_s += -D_DEBUG
endif
-ifdef CONFIG_X86_ASSEMBLY
- ASFLAGS_c += -DUSE_ASM=1
- ASFLAGS_s += -DUSE_ASM=1
- ifdef CONFIG_WINDOWS
- ASFLAGS_c += -DUNDERSCORES
- ASFLAGS_s += -DUNDERSCORES
- endif
- CFLAGS_c += -DUSE_ASM=1
- CFLAGS_s += -DUSE_ASM=1
- OBJS_c += src/common/x86/math.o
- OBJS_s += src/common/x86/math.o
-endif
-
ifeq ($(CPU),x86)
OBJS_c += src/common/x86/fpu.o
OBJS_s += src/common/x86/fpu.o
@@ -642,11 +613,6 @@ $(BUILD_s)/%.o: %.c
$(Q)$(MKDIR) $(@D)
$(Q)$(CC) -c $(CFLAGS) $(CFLAGS_s) -o $@ $<
-$(BUILD_s)/%.o: %.S
- $(E) [AS] $@
- $(Q)$(MKDIR) $(@D)
- $(Q)$(CC) -c $(ASFLAGS) $(ASFLAGS_s) -o $@ $<
-
$(BUILD_s)/%.o: %.rc
$(E) [RC] $@
$(Q)$(MKDIR) $(@D)
@@ -664,11 +630,6 @@ $(BUILD_c)/%.o: %.c
$(Q)$(MKDIR) $(@D)
$(Q)$(CC) -c $(CFLAGS) $(CFLAGS_c) -o $@ $<
-$(BUILD_c)/%.o: %.S
- $(E) [AS] $@
- $(Q)$(MKDIR) $(@D)
- $(Q)$(CC) -c $(ASFLAGS) $(ASFLAGS_c) -o $@ $<
-
$(BUILD_c)/%.o: %.rc
$(E) [RC] $@
$(Q)$(MKDIR) $(@D)
diff --git a/doc/examples/buildconfig b/doc/examples/buildconfig
index 8cfc83c..b452ebe 100644
--- a/doc/examples/buildconfig
+++ b/doc/examples/buildconfig
@@ -93,10 +93,6 @@
# Options below have no effect on architectures other than x86.
-# Build x86 assembly versions of certain C routines. Mostly useful for software
-# renderer.
-#CONFIG_X86_ASSEMBLY=y
-
# Enable this option on Linux to build a server capable of loading game mods
# built using ancient GCC versions.
#CONFIG_X86_GAME_ABI_HACK=y
diff --git a/inc/common/x86/asm.h b/inc/common/x86/asm.h
deleted file mode 100644
index 6d690db..0000000
--- a/inc/common/x86/asm.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
-Copyright (C) 1997-2001 Id Software, Inc.
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-*/
-
-#ifndef X86_ASM_H
-#define X86_ASM_H
-
-#if HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef UNDERSCORES
-#define C(label) label
-#else
-#define C(label) _##label
-#endif
-
-// cplane_t structure
-#define pl_normal 0
-#define pl_dist 12
-#define pl_type 16
-#define pl_signbits 17
-#define pl_pad 18
-#define pl_size 20
-
-#endif // X86_ASM_H
diff --git a/src/common/math.c b/src/common/math.c
index 4c01d54..57e63ba 100644
--- a/src/common/math.c
+++ b/src/common/math.c
@@ -260,7 +260,6 @@ BoxOnPlaneSide
Returns 1, 2, or 1 + 2
==================
*/
-#if !USE_ASM
int BoxOnPlaneSide(vec3_t emins, vec3_t emaxs, cplane_t *p)
{
vec_t *bounds[2] = { emins, emaxs };
@@ -286,6 +285,3 @@ int BoxOnPlaneSide(vec3_t emins, vec3_t emaxs, cplane_t *p)
return sides;
}
-#endif // USE_ASM
-
-
diff --git a/src/common/x86/math.S b/src/common/x86/math.S
deleted file mode 100644
index 587eef7..0000000
--- a/src/common/x86/math.S
+++ /dev/null
@@ -1,329 +0,0 @@
-//
-// math.s
-// x86 assembly-language math routines.
-
-#include "common/x86/asm.h"
-
-#if USE_ASM
-
- .data
-
- .align 4
-Ljmptab: .long Lcase0, Lcase1, Lcase2, Lcase3
- .long Lcase4, Lcase5, Lcase6, Lcase7
-
- .text
-
-#define EMINS 4+4
-#define EMAXS 4+8
-#define P 4+12
-
- .align 2
-.globl C(BoxOnPlaneSide)
-C(BoxOnPlaneSide):
- pushl %ebx
-
- movl P(%esp),%edx
- movl EMINS(%esp),%ecx
- xorl %eax,%eax
- movl EMAXS(%esp),%ebx
- movb pl_signbits(%edx),%al
- cmpb $8,%al
- jge Lerror
- flds pl_normal(%edx) // p->normal[0]
- fld %st(0) // p->normal[0] | p->normal[0]
- jmp *Ljmptab(,%eax,4)
-
-
-//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
-//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
-Lcase0:
- fmuls (%ebx) // p->normal[0]*emaxs[0] | p->normal[0]
- flds pl_normal+4(%edx) // p->normal[1] | p->normal[0]*emaxs[0] |
- // p->normal[0]
- fxch %st(2) // p->normal[0] | p->normal[0]*emaxs[0] |
- // p->normal[1]
- fmuls (%ecx) // p->normal[0]*emins[0] |
- // p->normal[0]*emaxs[0] | p->normal[1]
- fxch %st(2) // p->normal[1] | p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fld %st(0) // p->normal[1] | p->normal[1] |
- // p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fmuls 4(%ebx) // p->normal[1]*emaxs[1] | p->normal[1] |
- // p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- flds pl_normal+8(%edx) // p->normal[2] | p->normal[1]*emaxs[1] |
- // p->normal[1] | p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fxch %st(2) // p->normal[1] | p->normal[1]*emaxs[1] |
- // p->normal[2] | p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fmuls 4(%ecx) // p->normal[1]*emins[1] |
- // p->normal[1]*emaxs[1] |
- // p->normal[2] | p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fxch %st(2) // p->normal[2] | p->normal[1]*emaxs[1] |
- // p->normal[1]*emins[1] |
- // p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fld %st(0) // p->normal[2] | p->normal[2] |
- // p->normal[1]*emaxs[1] |
- // p->normal[1]*emins[1] |
- // p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fmuls 8(%ebx) // p->normal[2]*emaxs[2] |
- // p->normal[2] |
- // p->normal[1]*emaxs[1] |
- // p->normal[1]*emins[1] |
- // p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fxch %st(5) // p->normal[0]*emins[0] |
- // p->normal[2] |
- // p->normal[1]*emaxs[1] |
- // p->normal[1]*emins[1] |
- // p->normal[0]*emaxs[0] |
- // p->normal[2]*emaxs[2]
- faddp %st(0),%st(3) //p->normal[2] |
- // p->normal[1]*emaxs[1] |
- // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
- // p->normal[0]*emaxs[0] |
- // p->normal[2]*emaxs[2]
- fmuls 8(%ecx) //p->normal[2]*emins[2] |
- // p->normal[1]*emaxs[1] |
- // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
- // p->normal[0]*emaxs[0] |
- // p->normal[2]*emaxs[2]
- fxch %st(1) //p->normal[1]*emaxs[1] |
- // p->normal[2]*emins[2] |
- // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
- // p->normal[0]*emaxs[0] |
- // p->normal[2]*emaxs[2]
- faddp %st(0),%st(3) //p->normal[2]*emins[2] |
- // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
- // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
- // p->normal[2]*emaxs[2]
- fxch %st(3) //p->normal[2]*emaxs[2] +
- // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
- // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
- // p->normal[2]*emins[2]
- faddp %st(0),%st(2) //p->normal[1]*emins[1]+p->normal[0]*emins[0]|
- // dist1 | p->normal[2]*emins[2]
-
- jmp LSetSides
-
-//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
-//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
-Lcase1:
- fmuls (%ecx) // emins[0]
- flds pl_normal+4(%edx)
- fxch %st(2)
- fmuls (%ebx) // emaxs[0]
- fxch %st(2)
- fld %st(0)
- fmuls 4(%ebx) // emaxs[1]
- flds pl_normal+8(%edx)
- fxch %st(2)
- fmuls 4(%ecx) // emins[1]
- fxch %st(2)
- fld %st(0)
- fmuls 8(%ebx) // emaxs[2]
- fxch %st(5)
- faddp %st(0),%st(3)
- fmuls 8(%ecx) // emins[2]
- fxch %st(1)
- faddp %st(0),%st(3)
- fxch %st(3)
- faddp %st(0),%st(2)
-
- jmp LSetSides
-
-//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
-//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
-Lcase2:
- fmuls (%ebx) // emaxs[0]
- flds pl_normal+4(%edx)
- fxch %st(2)
- fmuls (%ecx) // emins[0]
- fxch %st(2)
- fld %st(0)
- fmuls 4(%ecx) // emins[1]
- flds pl_normal+8(%edx)
- fxch %st(2)
- fmuls 4(%ebx) // emaxs[1]
- fxch %st(2)
- fld %st(0)
- fmuls 8(%ebx) // emaxs[2]
- fxch %st(5)
- faddp %st(0),%st(3)
- fmuls 8(%ecx) // emins[2]
- fxch %st(1)
- faddp %st(0),%st(3)
- fxch %st(3)
- faddp %st(0),%st(2)
-
- jmp LSetSides
-
-//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
-//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
-Lcase3:
- fmuls (%ecx) // emins[0]
- flds pl_normal+4(%edx)
- fxch %st(2)
- fmuls (%ebx) // emaxs[0]
- fxch %st(2)
- fld %st(0)
- fmuls 4(%ecx) // emins[1]
- flds pl_normal+8(%edx)
- fxch %st(2)
- fmuls 4(%ebx) // emaxs[1]
- fxch %st(2)
- fld %st(0)
- fmuls 8(%ebx) // emaxs[2]
- fxch %st(5)
- faddp %st(0),%st(3)
- fmuls 8(%ecx) // emins[2]
- fxch %st(1)
- faddp %st(0),%st(3)
- fxch %st(3)
- faddp %st(0),%st(2)
-
- jmp LSetSides
-
-//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
-//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
-Lcase4:
- fmuls (%ebx) // emaxs[0]
- flds pl_normal+4(%edx)
- fxch %st(2)
- fmuls (%ecx) // emins[0]
- fxch %st(2)
- fld %st(0)
- fmuls 4(%ebx) // emaxs[1]
- flds pl_normal+8(%edx)
- fxch %st(2)
- fmuls 4(%ecx) // emins[1]
- fxch %st(2)
- fld %st(0)
- fmuls 8(%ecx) // emins[2]
- fxch %st(5)
- faddp %st(0),%st(3)
- fmuls 8(%ebx) // emaxs[2]
- fxch %st(1)
- faddp %st(0),%st(3)
- fxch %st(3)
- faddp %st(0),%st(2)
-
- jmp LSetSides
-
-//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
-//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
-Lcase5:
- fmuls (%ecx) // emins[0]
- flds pl_normal+4(%edx)
- fxch %st(2)
- fmuls (%ebx) // emaxs[0]
- fxch %st(2)
- fld %st(0)
- fmuls 4(%ebx) // emaxs[1]
- flds pl_normal+8(%edx)
- fxch %st(2)
- fmuls 4(%ecx) // emins[1]
- fxch %st(2)
- fld %st(0)
- fmuls 8(%ecx) // emins[2]
- fxch %st(5)
- faddp %st(0),%st(3)
- fmuls 8(%ebx) // emaxs[2]
- fxch %st(1)
- faddp %st(0),%st(3)
- fxch %st(3)
- faddp %st(0),%st(2)
-
- jmp LSetSides
-
-//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
-//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
-Lcase6:
- fmuls (%ebx) // emaxs[0]
- flds pl_normal+4(%edx)
- fxch %st(2)
- fmuls (%ecx) // emins[0]
- fxch %st(2)
- fld %st(0)
- fmuls 4(%ecx) // emins[1]
- flds pl_normal+8(%edx)
- fxch %st(2)
- fmuls 4(%ebx) // emaxs[1]
- fxch %st(2)
- fld %st(0)
- fmuls 8(%ecx) // emins[2]
- fxch %st(5)
- faddp %st(0),%st(3)
- fmuls 8(%ebx) // emaxs[2]
- fxch %st(1)
- faddp %st(0),%st(3)
- fxch %st(3)
- faddp %st(0),%st(2)
-
- jmp LSetSides
-
-//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
-//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
-Lcase7:
- fmuls (%ecx) // emins[0]
- flds pl_normal+4(%edx)
- fxch %st(2)
- fmuls (%ebx) // emaxs[0]
- fxch %st(2)
- fld %st(0)
- fmuls 4(%ecx) // emins[1]
- flds pl_normal+8(%edx)
- fxch %st(2)
- fmuls 4(%ebx) // emaxs[1]
- fxch %st(2)
- fld %st(0)
- fmuls 8(%ecx) // emins[2]
- fxch %st(5)
- faddp %st(0),%st(3)
- fmuls 8(%ebx) // emaxs[2]
- fxch %st(1)
- faddp %st(0),%st(3)
- fxch %st(3)
- faddp %st(0),%st(2)
-
-LSetSides:
-
-// sides = 0;
-// if (dist1 >= p->dist)
-// sides = 1;
-// if (dist2 < p->dist)
-// sides |= 2;
-
- faddp %st(0),%st(2) // dist1 | dist2
- fcomps pl_dist(%edx)
- xorl %ecx,%ecx
- fnstsw %ax
- fcomps pl_dist(%edx)
- andb $1,%ah
- xorb $1,%ah
- addb %ah,%cl
-
- fnstsw %ax
- andb $1,%ah
- addb %ah,%ah
- addb %ah,%cl
-
-// return sides;
-
- popl %ebx
- movl %ecx,%eax // return status
-
- ret
-
-
-Lerror:
- xorl %eax,%eax
- ret
-
-#endif // USE_ASM
diff --git a/src/refresh/sw/aclip.c b/src/refresh/sw/aclip.c
index 1551647..83f26e8 100644
--- a/src/refresh/sw/aclip.c
+++ b/src/refresh/sw/aclip.c
@@ -49,20 +49,6 @@ static void R_Alias_clip_z(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t *ou
R_AliasProjectAndClipTestFinalVert(out);
}
-
-#if USE_ASM
-
-void R_Alias_clip_top(finalvert_t *pfv0, finalvert_t *pfv1,
- finalvert_t *out);
-void R_Alias_clip_bottom(finalvert_t *pfv0, finalvert_t *pfv1,
- finalvert_t *out);
-void R_Alias_clip_left(finalvert_t *pfv0, finalvert_t *pfv1,
- finalvert_t *out);
-void R_Alias_clip_right(finalvert_t *pfv0, finalvert_t *pfv1,
- finalvert_t *out);
-
-#else
-
static void R_Alias_clip_left(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t *out)
{
float scale;
@@ -88,7 +74,6 @@ static void R_Alias_clip_left(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t
}
}
-
static void R_Alias_clip_right(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t *out)
{
float scale;
@@ -114,7 +99,6 @@ static void R_Alias_clip_right(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t
}
}
-
static void R_Alias_clip_top(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t *out)
{
float scale;
@@ -140,7 +124,6 @@ static void R_Alias_clip_top(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t *
}
}
-
static void R_Alias_clip_bottom(finalvert_t *pfv0, finalvert_t *pfv1,
finalvert_t *out)
{
@@ -169,9 +152,6 @@ static void R_Alias_clip_bottom(finalvert_t *pfv0, finalvert_t *pfv1,
}
}
-#endif
-
-
static int R_AliasClip(finalvert_t *in, finalvert_t *out, int flag, int count,
void(*clip)(finalvert_t *pfv0, finalvert_t *pfv1, finalvert_t *out))
{
diff --git a/src/refresh/sw/edge.c b/src/refresh/sw/edge.c
index f83cf86..f4b2e61 100644
--- a/src/refresh/sw/edge.c
+++ b/src/refresh/sw/edge.c
@@ -19,21 +19,6 @@ with this program; if not, write to the Free Software Foundation, Inc.,
#include "sw.h"
-#if !USE_ASM
-void R_SurfacePatch(void)
-{
-}
-
-void R_EdgeCodeStart(void)
-{
-}
-
-void R_EdgeCodeEnd(void)
-{
-}
-#endif
-
-
/*
the complex cases add new polys on most lines, so dont optimize for keeping them the same
have multiple free span lists to try to get better coherence?
@@ -135,8 +120,6 @@ void R_BeginEdgeFrame(void)
}
-#if !USE_ASM
-
/*
==============
R_InsertNewEdges
@@ -254,8 +237,6 @@ pushback:
}
}
-#endif // !USE_ASM
-
/*
==============
@@ -400,8 +381,6 @@ void R_TrailingEdge(surf_t *surf, edge_t *edge)
}
-#if !USE_ASM
-
/*
==============
R_LeadingEdge
@@ -547,8 +526,6 @@ void R_GenerateSpans(void)
R_CleanupSpan();
}
-#endif // !USE_ASM
-
/*
==============
diff --git a/src/refresh/sw/main.c b/src/refresh/sw/main.c
index 467170c..13ce08d 100644
--- a/src/refresh/sw/main.c
+++ b/src/refresh/sw/main.c
@@ -126,12 +126,6 @@ cvar_t *vid_gamma;
cvar_t *sw_lockpvs;
//PGM
-#if USE_ASM
-
-void *d_pcolormap;
-
-#else // USE_ASM
-
// all global and static refresh variables are collected in a contiguous block
// to avoid cache conflicts.
@@ -155,8 +149,6 @@ short *d_pzbuffer;
unsigned int d_zrowbytes;
unsigned int d_zwidth;
-#endif // !USE_ASM
-
int sintable[CYCLE * 2];
int intsintable[CYCLE * 2];
int blanktable[CYCLE * 2]; // PGM
@@ -272,11 +264,6 @@ qboolean R_Init(qboolean total)
Com_DPrintf("ref_soft " VERSION ", " __DATE__ "\n");
-#if USE_ASM
- Sys_MakeCodeWriteable((uintptr_t)R_EdgeCodeStart,
- (uintptr_t)R_EdgeCodeEnd - (uintptr_t)R_EdgeCodeStart);
-#endif
-
r_aliasuvscale = 1.0;
// create the window
@@ -390,7 +377,6 @@ void R_NewMap(void)
// surface 0 doesn't really exist; it's just a dummy because index 0
// is used to indicate no edge attached to surface
surfaces--;
- R_SurfacePatch();
}
r_maxedgesseen = 0;
@@ -792,7 +778,6 @@ void R_EdgeDrawing(void)
// surface 0 doesn't really exist; it's just a dummy because index 0
// is used to indicate no edge attached to surface
surfaces--;
- R_SurfacePatch();
}
R_BeginEdgeFrame();
diff --git a/src/refresh/sw/misc.c b/src/refresh/sw/misc.c
index 6829280..755b5fe 100644
--- a/src/refresh/sw/misc.c
+++ b/src/refresh/sw/misc.c
@@ -38,30 +38,6 @@ short *zspantable[MAXHEIGHT];
/*
================
-D_Patch
-================
-*/
-void D_Patch(void)
-{
-#if USE_ASM
- extern void D_Aff8Patch(void);
- static qboolean protectset8 = qfalse;
- extern void D_PolysetAff8Start(void);
-
- if (!protectset8) {
- Sys_MakeCodeWriteable((uintptr_t)D_PolysetAff8Start,
- (uintptr_t)D_Aff8Patch - (uintptr_t)D_PolysetAff8Start);
- Sys_MakeCodeWriteable((uintptr_t)R_Surf8Start,
- (uintptr_t)R_Surf8End - (uintptr_t)R_Surf8Start);
- protectset8 = qtrue;
- }
-
- R_Surf8Patch();
- D_Aff8Patch();
-#endif
-}
-/*
-================
D_ViewChanged
================
*/
@@ -103,8 +79,6 @@ void D_ViewChanged(void)
memset(d_pzbuffer, 0xff, vid.width * vid.height * sizeof(d_pzbuffer[0]));
R_DrawFill8(r_newrefdef.x, r_newrefdef.y, r_newrefdef.width, r_newrefdef.height, /*(int)sw_clearcolor->value & 0xff*/0);
}
-
- D_Patch();
}
diff --git a/src/refresh/sw/polyset.c b/src/refresh/sw/polyset.c
index 869fc34..73d1d04 100644
--- a/src/refresh/sw/polyset.c
+++ b/src/refresh/sw/polyset.c
@@ -21,11 +21,9 @@ with this program; if not, write to the Free Software Foundation, Inc.,
#include "sw.h"
// TODO: put in span spilling to shrink list size
-// !!! if this is changed, it must be changed in d_polysa.s too !!!
#define DPS_MAXSPANS MAXHEIGHT+1
// 1 extra for spanpackage that marks end
-// !!! if this is changed, it must be changed in asm_draw.h too !!!
typedef struct {
void *pdest;
short *pz;
@@ -378,16 +376,8 @@ void R_PolysetCalcGradients(int skinwidth)
r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
ystepdenominv);
-#if USE_ASM
- if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) {
- a_sstepxfrac = r_sstepx << 16;
- a_tstepxfrac = r_tstepx << 16;
- } else
-#endif
- {
- a_sstepxfrac = r_sstepx & 0xFFFF;
- a_tstepxfrac = r_tstepx & 0xFFFF;
- }
+ a_sstepxfrac = r_sstepx & 0xFFFF;
+ a_tstepxfrac = r_tstepx & 0xFFFF;
a_ststepxwhole = skinwidth * (r_tstepx >> 16) + (r_sstepx >> 16) * TEX_BYTES;
}
@@ -492,7 +482,6 @@ void R_PolysetDrawSpansConstant8_Blended(spanpackage_t *pspanpackage)
} while (pspanpackage->count != -999999);
}
-#if !USE_ASM
void R_PolysetDrawSpans8_Opaque(spanpackage_t *pspanpackage)
{
int lcount;
@@ -550,7 +539,6 @@ void R_PolysetDrawSpans8_Opaque(spanpackage_t *pspanpackage)
pspanpackage++;
} while (pspanpackage->count != -999999);
}
-#endif
/*
@@ -593,16 +581,8 @@ void R_RasterizeAliasPolySmooth(void)
d_ptex = (byte *)r_affinetridesc.pskin + (plefttop[2] >> 16) * TEX_BYTES +
(plefttop[3] >> 16) * r_affinetridesc.skinwidth;
-#if USE_ASM
- if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) {
- d_sfrac = (plefttop[2] & 0xFFFF) << 16;
- d_tfrac = (plefttop[3] & 0xFFFF) << 16;
- } else
-#endif
- {
- d_sfrac = plefttop[2] & 0xFFFF;
- d_tfrac = plefttop[3] & 0xFFFF;
- }
+ d_sfrac = plefttop[2] & 0xFFFF;
+ d_tfrac = plefttop[3] & 0xFFFF;
d_light = plefttop[4];
d_zi = plefttop[5];
@@ -627,16 +607,8 @@ void R_RasterizeAliasPolySmooth(void)
R_PolysetSetUpForLineScan(plefttop[0], plefttop[1],
pleftbottom[0], pleftbottom[1]);
-#if USE_ASM
- if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) {
- d_pzbasestep = (d_zwidth + ubasestep) << 1;
- d_pzextrastep = d_pzbasestep + 2;
- } else
-#endif
- {
- d_pzbasestep = d_zwidth + ubasestep;
- d_pzextrastep = d_pzbasestep + 1;
- }
+ d_pzbasestep = d_zwidth + ubasestep;
+ d_pzextrastep = d_pzbasestep + 1;
d_pdestbasestep = r_screenrowbytes + ubasestep * VID_BYTES;
d_pdestextrastep = d_pdestbasestep + 1 * VID_BYTES;
@@ -655,43 +627,20 @@ void R_RasterizeAliasPolySmooth(void)
d_ptexbasestep = ((r_sstepy + r_sstepx * ubasestep) >> 16) * TEX_BYTES +
((r_tstepy + r_tstepx * ubasestep) >> 16) *
r_affinetridesc.skinwidth;
-#if USE_ASM
- if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) {
- d_sfracbasestep = (r_sstepy + r_sstepx * ubasestep) << 16;
- d_tfracbasestep = (r_tstepy + r_tstepx * ubasestep) << 16;
- } else
-#endif
- {
- d_sfracbasestep = (r_sstepy + r_sstepx * ubasestep) & 0xFFFF;
- d_tfracbasestep = (r_tstepy + r_tstepx * ubasestep) & 0xFFFF;
- }
+ d_sfracbasestep = (r_sstepy + r_sstepx * ubasestep) & 0xFFFF;
+ d_tfracbasestep = (r_tstepy + r_tstepx * ubasestep) & 0xFFFF;
d_lightbasestep = r_lstepy + working_lstepx * ubasestep;
d_zibasestep = r_zistepy + r_zistepx * ubasestep;
d_ptexextrastep = ((r_sstepy + r_sstepx * d_countextrastep) >> 16) * TEX_BYTES +
((r_tstepy + r_tstepx * d_countextrastep) >> 16) *
r_affinetridesc.skinwidth;
-#if USE_ASM
- if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) {
- d_sfracextrastep = (r_sstepy + r_sstepx * d_countextrastep) << 16;
- d_tfracextrastep = (r_tstepy + r_tstepx * d_countextrastep) << 16;
- } else
-#endif
- {
- d_sfracextrastep = (r_sstepy + r_sstepx * d_countextrastep) & 0xFFFF;
- d_tfracextrastep = (r_tstepy + r_tstepx * d_countextrastep) & 0xFFFF;
- }
+ d_sfracextrastep = (r_sstepy + r_sstepx * d_countextrastep) & 0xFFFF;
+ d_tfracextrastep = (r_tstepy + r_tstepx * d_countextrastep) & 0xFFFF;
d_lightextrastep = d_lightbasestep + working_lstepx;
d_ziextrastep = d_zibasestep + r_zistepx;
-#if USE_ASM
- if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) {
- R_PolysetScanLeftEdge(initialleftheight);
- } else
-#endif
- {
- R_PolysetScanLeftEdge_C(initialleftheight);
- }
+ R_PolysetScanLeftEdge_C(initialleftheight);
}
//
@@ -740,16 +689,8 @@ void R_RasterizeAliasPolySmooth(void)
d_pdestbasestep = r_screenrowbytes + ubasestep * VID_BYTES;
d_pdestextrastep = d_pdestbasestep + 1 * VID_BYTES;
-#if USE_ASM
- if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) {
- d_pzbasestep = (d_zwidth + ubasestep) << 1;
- d_pzextrastep = d_pzbasestep + 2;
- } else
-#endif
- {
- d_pzbasestep = d_zwidth + ubasestep;
- d_pzextrastep = d_pzbasestep + 1;
- }
+ d_pzbasestep = d_zwidth + ubasestep;
+ d_pzextrastep = d_pzbasestep + 1;
if (ubasestep < 0)
working_lstepx = r_lstepx - 1;
@@ -760,43 +701,20 @@ void R_RasterizeAliasPolySmooth(void)
d_ptexbasestep = ((r_sstepy + r_sstepx * ubasestep) >> 16) * TEX_BYTES +
((r_tstepy + r_tstepx * ubasestep) >> 16) *
r_affinetridesc.skinwidth;
-#if USE_ASM
- if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) {
- d_sfracbasestep = (r_sstepy + r_sstepx * ubasestep) << 16;
- d_tfracbasestep = (r_tstepy + r_tstepx * ubasestep) << 16;
- } else
-#endif
- {
- d_sfracbasestep = (r_sstepy + r_sstepx * ubasestep) & 0xFFFF;
- d_tfracbasestep = (r_tstepy + r_tstepx * ubasestep) & 0xFFFF;
- }
+ d_sfracbasestep = (r_sstepy + r_sstepx * ubasestep) & 0xFFFF;
+ d_tfracbasestep = (r_tstepy + r_tstepx * ubasestep) & 0xFFFF;
d_lightbasestep = r_lstepy + working_lstepx * ubasestep;
d_zibasestep = r_zistepy + r_zistepx * ubasestep;
d_ptexextrastep = ((r_sstepy + r_sstepx * d_countextrastep) >> 16) * TEX_BYTES +
((r_tstepy + r_tstepx * d_countextrastep) >> 16) *
r_affinetridesc.skinwidth;
-#if USE_ASM
- if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) {
- d_sfracextrastep = ((r_sstepy + r_sstepx * d_countextrastep) & 0xFFFF) << 16;
- d_tfracextrastep = ((r_tstepy + r_tstepx * d_countextrastep) & 0xFFFF) << 16;
- } else
-#endif
- {
- d_sfracextrastep = (r_sstepy + r_sstepx * d_countextrastep) & 0xFFFF;
- d_tfracextrastep = (r_tstepy + r_tstepx * d_countextrastep) & 0xFFFF;
- }
+ d_sfracextrastep = (r_sstepy + r_sstepx * d_countextrastep) & 0xFFFF;
+ d_tfracextrastep = (r_tstepy + r_tstepx * d_countextrastep) & 0xFFFF;
d_lightextrastep = d_lightbasestep + working_lstepx;
d_ziextrastep = d_zibasestep + r_zistepx;
-#if USE_ASM
- if (d_pdrawspans == R_PolysetDrawSpans8_Opaque) {
- R_PolysetScanLeftEdge(height);
- } else
-#endif
- {
- R_PolysetScanLeftEdge_C(height);
- }
+ R_PolysetScanLeftEdge_C(height);
}
}
diff --git a/src/refresh/sw/raster.c b/src/refresh/sw/raster.c
index a50e877..dce9b03 100644
--- a/src/refresh/sw/raster.c
+++ b/src/refresh/sw/raster.c
@@ -55,7 +55,6 @@ int r_ceilv1;
qboolean r_lastvertvalid;
-#if !USE_ASM
/*
================
@@ -301,8 +300,6 @@ void R_ClipEdge(mvertex_t *pv0, mvertex_t *pv1, clipplane_t *clip)
R_EmitEdge(pv0, pv1);
}
-#endif // !USE_ASM
-
/*
================
diff --git a/src/refresh/sw/scan.c b/src/refresh/sw/scan.c
index 8868ee6..5833fe4 100644
--- a/src/refresh/sw/scan.c
+++ b/src/refresh/sw/scan.c
@@ -214,8 +214,6 @@ void D_DrawTurbulent16(espan_t *pspan, int *warptable)
} while ((pspan = pspan->pnext) != NULL);
}
-#if !USE_ASM
-
/*
=============
D_DrawSpans16
@@ -400,5 +398,3 @@ void D_DrawZSpans(espan_t *pspan)
} while ((pspan = pspan->pnext) != NULL);
}
-#endif
-
diff --git a/src/refresh/sw/surf.c b/src/refresh/sw/surf.c
index 45076f0..335bad0 100644
--- a/src/refresh/sw/surf.c
+++ b/src/refresh/sw/surf.c
@@ -143,7 +143,6 @@ void R_DrawSurface(void)
}
//=============================================================================
-#if !USE_ASM
#define BLOCK_FUNC R_DrawSurfaceBlock8_mip0
#define BLOCK_SHIFT 4
@@ -161,9 +160,6 @@ void R_DrawSurface(void)
#define BLOCK_SHIFT 1
#include "block.h"
-#endif
-
-
//============================================================================
diff --git a/src/refresh/sw/sw.h b/src/refresh/sw/sw.h
index e9e3c48..603aa3c 100644
--- a/src/refresh/sw/sw.h
+++ b/src/refresh/sw/sw.h
@@ -51,7 +51,6 @@ typedef struct {
extern viddef_t vid;
-// !!! if this is changed, it must be changed in asm_draw.h too !!!
typedef struct {
vrectSoft_t vrect; // subwindow in video for refresh
// FIXME: not need vrect next field here?
@@ -105,7 +104,6 @@ extern oldrefdef_t r_refdef;
#define MAXWORKINGVERTS (MAXVERTS + 4) // max points in an intermediate
// polygon (while processing)
-// !!! if this is changed, it must be changed in d_ifacea.h too !!!
#define MAXHEIGHT 1200
#define MAXWIDTH 1600
@@ -124,14 +122,11 @@ extern oldrefdef_t r_refdef;
#define PARTICLE_Z_CLIP 8.0
-// !!! must be kept the same as in quakeasm.h !!!
#define TRANSPARENT_COLOR 0xFF
-// !!! if this is changed, it must be changed in d_ifacea.h too !!!
#define TURB_TEX_SIZE 64 // base turbulent texture size
-// !!! if this is changed, it must be changed in d_ifacea.h too !!!
#define CYCLE 128 // turbulent cycle size
#define SCANBUFFERPAD 0x1000
@@ -166,7 +161,6 @@ extern oldrefdef_t r_refdef;
#define BACKFACE_EPSILON 0.01
-// !!! if this is changed, it must be changed in asm_draw.h too !!!
#define NEAR_CLIP 0.01
@@ -198,10 +192,6 @@ typedef struct {
float zi;
} emitpoint_t;
-/*
-** if you change this structure be sure to change the #defines
-** listed after it!
-*/
typedef struct finalvert_s {
int u, v, s, t;
int l;
@@ -210,18 +200,6 @@ typedef struct finalvert_s {
float xyz[3]; // eye space
} finalvert_t;
-#define FINALVERT_V0 0
-#define FINALVERT_V1 4
-#define FINALVERT_V2 8
-#define FINALVERT_V3 12
-#define FINALVERT_V4 16
-#define FINALVERT_V5 20
-#define FINALVERT_FLAGS 24
-#define FINALVERT_X 28
-#define FINALVERT_Y 32
-#define FINALVERT_Z 36
-#define FINALVERT_SIZE 40
-
typedef struct {
void *pskin;
int pskindesc;
@@ -256,7 +234,6 @@ typedef struct bedge_s {
} bedge_t;
-// !!! if this is changed, it must be changed in asm_draw.h too !!!
typedef struct clipplane_s {
vec3_t normal;
float dist;
@@ -284,7 +261,6 @@ typedef struct surfcache_s {
byte data[4]; // width*height elements
} surfcache_t;
-// !!! if this is changed, it must be changed in asm_draw.h too !!!
typedef struct espan_s {
int u, v, count;
struct espan_s *pnext;
@@ -327,7 +303,6 @@ typedef struct surf_s {
int pad[2]; // to 64 bytes
} surf_t;
-// !!! if this is changed, it must be changed in asm_draw.h too !!!
typedef struct edge_s {
fixed16_t u;
fixed16_t u_step;
@@ -589,7 +564,6 @@ extern int r_currentbkey;
void R_InitTurb(void);
void R_DrawParticles(void);
-void R_SurfacePatch(void);
extern int r_amodels_drawn;
extern edge_t *auxedges;
@@ -684,10 +658,6 @@ void R_RenderFrame(refdef_t *fd);
void R_BeginFrame(void);
-#if USE_ASM
-void Sys_MakeCodeWriteable(uintptr_t start, size_t length);
-#endif
-
void R_InitImages(void);
void R_ShutdownImages(void);
diff --git a/src/refresh/sw/x86/aclip.S b/src/refresh/sw/x86/aclip.S
deleted file mode 100644
index df3674e..0000000
--- a/src/refresh/sw/x86/aclip.S
+++ /dev/null
@@ -1,195 +0,0 @@
-//
-// r_aliasa.s
-// x86 assembly-language Alias model transform and project code.
-//
-
-#include "common/x86/asm.h"
-#include "sw.h"
-
-#if USE_ASM
-
- .data
-Ltemp0: .long 0
-Ltemp1: .long 0
-
- .text
-
-#define pfv0 8+4
-#define pfv1 8+8
-#define out 8+12
-
-.globl C(R_Alias_clip_bottom)
-C(R_Alias_clip_bottom):
- pushl %esi
- pushl %edi
-
- movl pfv0(%esp),%esi
- movl pfv1(%esp),%edi
-
- movl C(r_refdef)+rd_aliasvrectbottom,%eax
-
-LDoForwardOrBackward:
-
- movl fv_v+4(%esi),%edx
- movl fv_v+4(%edi),%ecx
-
- cmpl %ecx,%edx
- jl LDoForward
-
- movl fv_v+4(%esi),%ecx
- movl fv_v+4(%edi),%edx
- movl pfv0(%esp),%edi
- movl pfv1(%esp),%esi
-
-LDoForward:
-
- subl %edx,%ecx
- subl %edx,%eax
- movl %ecx,Ltemp1
- movl %eax,Ltemp0
- fildl Ltemp1
- fildl Ltemp0
- movl out(%esp),%edx
- movl $2,%eax
-
- fdivp %st(0),%st(1) // scale
-
-LDo3Forward:
- fildl fv_v+0(%esi) // fv0v0 | scale
- fildl fv_v+0(%edi) // fv1v0 | fv0v0 | scale
- fildl fv_v+4(%esi) // fv0v1 | fv1v0 | fv0v0 | scale
- fildl fv_v+4(%edi) // fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale
- fildl fv_v+8(%esi) // fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale
- fildl fv_v+8(%edi) // fv1v2 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 |
- // scale
- fxch %st(5) // fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv1v2 |
- // scale
- fsubr %st(0),%st(4) // fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0-fv0v0 |
- // fv1v2 | scale
- fxch %st(3) // fv0v1 | fv0v2 | fv1v1 | fv0v0 | fv1v0-fv0v0 |
- // fv1v2 | scale
- fsubr %st(0),%st(2) // fv0v1 | fv0v2 | fv1v1-fv0v1 | fv0v0 |
- // fv1v0-fv0v0 | fv1v2 | scale
- fxch %st(1) // fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 |
- // fv1v0-fv0v0 | fv1v2 | scale
- fsubr %st(0),%st(5) // fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 |
- // fv1v0-fv0v0 | fv1v2-fv0v2 | scale
- fxch %st(6) // scale | fv0v1 | fv1v1-fv0v1 | fv0v0 |
- // fv1v0-fv0v0 | fv1v2-fv0v2 | fv0v2
- fmul %st(0),%st(4) // scale | fv0v1 | fv1v1-fv0v1 | fv0v0 |
- // (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2
- addl $12,%edi
- fmul %st(0),%st(2) // scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 |
- // (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2
- addl $12,%esi
- addl $12,%edx
- fmul %st(0),%st(5) // scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 |
- // (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale |
- // fv0v2
- fxch %st(3) // fv0v0 | fv0v1 | (fv1v1-fv0v1)*scale | scale |
- // (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale |
- // fv0v2
- faddp %st(0),%st(4) // fv0v1 | (fv1v1-fv0v1)*scale | scale |
- // fv0v0+(fv1v0-fv0v0)*scale |
- // (fv1v2-fv0v2)*scale | fv0v2
- faddp %st(0),%st(1) // fv0v1+(fv1v1-fv0v1)*scale | scale |
- // fv0v0+(fv1v0-fv0v0)*scale |
- // (fv1v2-fv0v2)*scale | fv0v2
- fxch %st(4) // fv0v2 | scale | fv0v0+(fv1v0-fv0v0)*scale |
- // (fv1v2-fv0v2)*scale | fv0v1+(fv1v1-fv0v1)*scale
- faddp %st(0),%st(3) // scale | fv0v0+(fv1v0-fv0v0)*scale |
- // fv0v2+(fv1v2-fv0v2)*scale |
- // fv0v1+(fv1v1-fv0v1)*scale
- fxch %st(1) // fv0v0+(fv1v0-fv0v0)*scale | scale |
- // fv0v2+(fv1v2-fv0v2)*scale |
- // fv0v1+(fv1v1-fv0v1)*scale
- fadds float_point5
- fxch %st(3) // fv0v1+(fv1v1-fv0v1)*scale | scale |
- // fv0v2+(fv1v2-fv0v2)*scale |
- // fv0v0+(fv1v0-fv0v0)*scale
- fadds float_point5
- fxch %st(2) // fv0v2+(fv1v2-fv0v2)*scale | scale |
- // fv0v1+(fv1v1-fv0v1)*scale |
- // fv0v0+(fv1v0-fv0v0)*scale
- fadds float_point5
- fxch %st(3) // fv0v0+(fv1v0-fv0v0)*scale | scale |
- // fv0v1+(fv1v1-fv0v1)*scale |
- // fv0v2+(fv1v2-fv0v2)*scale
- fistpl fv_v+0-12(%edx) // scale | fv0v1+(fv1v1-fv0v1)*scale |
- // fv0v2+(fv1v2-fv0v2)*scale
- fxch %st(1) // fv0v1+(fv1v1-fv0v1)*scale | scale |
- // fv0v2+(fv1v2-fv0v2)*scale | scale
- fistpl fv_v+4-12(%edx) // scale | fv0v2+(fv1v2-fv0v2)*scale
- fxch %st(1) // fv0v2+(fv1v2-fv0v2)*sc | scale
- fistpl fv_v+8-12(%edx) // scale
-
- decl %eax
- jnz LDo3Forward
-
- fstp %st(0)
-
- popl %edi
- popl %esi
-
- ret
-
-
-.globl C(R_Alias_clip_top)
-C(R_Alias_clip_top):
- pushl %esi
- pushl %edi
-
- movl pfv0(%esp),%esi
- movl pfv1(%esp),%edi
-
- movl C(r_refdef)+rd_aliasvrect+4,%eax
- jmp LDoForwardOrBackward
-
-
-
-.globl C(R_Alias_clip_right)
-C(R_Alias_clip_right):
- pushl %esi
- pushl %edi
-
- movl pfv0(%esp),%esi
- movl pfv1(%esp),%edi
-
- movl C(r_refdef)+rd_aliasvrectright,%eax
-
-LRightLeftEntry:
-
-
- movl fv_v+4(%esi),%edx
- movl fv_v+4(%edi),%ecx
-
- cmpl %ecx,%edx
- movl fv_v+0(%esi),%edx
-
- movl fv_v+0(%edi),%ecx
- jl LDoForward2
-
- movl fv_v+0(%esi),%ecx
- movl fv_v+0(%edi),%edx
- movl pfv0(%esp),%edi
- movl pfv1(%esp),%esi
-
-LDoForward2:
-
- jmp LDoForward
-
-
-.globl C(R_Alias_clip_left)
-C(R_Alias_clip_left):
- pushl %esi
- pushl %edi
-
- movl pfv0(%esp),%esi
- movl pfv1(%esp),%edi
-
- movl C(r_refdef)+rd_aliasvrect+0,%eax
- jmp LRightLeftEntry
-
-
-#endif // USE_ASM
-
diff --git a/src/refresh/sw/x86/draw.S b/src/refresh/sw/x86/draw.S
deleted file mode 100644
index 3aeceea..0000000
--- a/src/refresh/sw/x86/draw.S
+++ /dev/null
@@ -1,817 +0,0 @@
-//
-// r_drawa.s
-// x86 assembly-language edge clipping and emission code
-//
-
-#include "common/x86/asm.h"
-#include "sw.h"
-
-#if USE_ASM
-
-// !!! if these are changed, they must be changed in r_draw.c too !!!
-#define FULLY_CLIPPED_CACHED 0x80000000
-#define FRAMECOUNT_MASK 0x7FFFFFFF
-
- .data
-
-Ld0: .single 0.0
-Ld1: .single 0.0
-Lstack: .long 0
-Lfp_near_clip: .single NEAR_CLIP
-Lceilv0: .long 0
-Lv: .long 0
-Lu0: .long 0
-Lv0: .long 0
-Lzi0: .long 0
-
- .text
-
-//----------------------------------------------------------------------
-// edge clipping code
-//----------------------------------------------------------------------
-
-#define pv0 4+12
-#define pv1 8+12
-#define clip 12+12
-
- .align 4
-.globl C(R_ClipEdge)
-C(R_ClipEdge):
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
- movl %esp,Lstack // for clearing the stack later
-
-// float d0, d1, f;
-// mvertex_t clipvert;
-
- movl clip(%esp),%ebx
- movl pv0(%esp),%esi
- movl pv1(%esp),%edx
-
-// if (clip)
-// {
- testl %ebx,%ebx
- jz Lemit
-
-// do
-// {
-
-Lcliploop:
-
-// d0 = DotProduct (pv0->position, clip->normal) - clip->dist;
-// d1 = DotProduct (pv1->position, clip->normal) - clip->dist;
- flds mv_position+0(%esi)
- fmuls cp_normal+0(%ebx)
- flds mv_position+4(%esi)
- fmuls cp_normal+4(%ebx)
- flds mv_position+8(%esi)
- fmuls cp_normal+8(%ebx)
- fxch %st(1)
- faddp %st(0),%st(2) // d0mul2 | d0add0
-
- flds mv_position+0(%edx)
- fmuls cp_normal+0(%ebx)
- flds mv_position+4(%edx)
- fmuls cp_normal+4(%ebx)
- flds mv_position+8(%edx)
- fmuls cp_normal+8(%ebx)
- fxch %st(1)
- faddp %st(0),%st(2) // d1mul2 | d1add0 | d0mul2 | d0add0
- fxch %st(3) // d0add0 | d1add0 | d0mul2 | d1mul2
-
- faddp %st(0),%st(2) // d1add0 | dot0 | d1mul2
- faddp %st(0),%st(2) // dot0 | dot1
-
- fsubs cp_dist(%ebx) // d0 | dot1
- fxch %st(1) // dot1 | d0
- fsubs cp_dist(%ebx) // d1 | d0
- fxch %st(1)
- fstps Ld0
- fstps Ld1
-
-// if (d0 >= 0)
-// {
- movl Ld0,%eax
- movl Ld1,%ecx
- orl %eax,%ecx
- js Lp2
-
-// both points are unclipped
-
-Lcontinue:
-
-//
-// R_ClipEdge (&clipvert, pv1, clip->next);
-// return;
-// }
-// } while ((clip = clip->next) != NULL);
- movl cp_next(%ebx),%ebx
- testl %ebx,%ebx
- jnz Lcliploop
-
-// }
-
-//// add the edge
-// R_EmitEdge (pv0, pv1);
-Lemit:
-
-//
-// set integer rounding to ceil mode, set to single precision
-//
-// FIXME: do away with by manually extracting integers from floats?
-// FIXME: set less often
- fldcw C(ceil_cw)
-
-// edge_t *edge, *pcheck;
-// int u_check;
-// float u, u_step;
-// vec3_t local, transformed;
-// float *world;
-// int v, v2, ceilv0;
-// float scale, lzi0, u0, v0;
-// int side;
-
-// if (r_lastvertvalid)
-// {
- cmpl $0,C(r_lastvertvalid)
- jz LCalcFirst
-
-// u0 = r_u1;
-// v0 = r_v1;
-// lzi0 = r_lzi1;
-// ceilv0 = r_ceilv1;
- movl C(r_lzi1),%eax
- movl C(r_u1),%ecx
- movl %eax,Lzi0
- movl %ecx,Lu0
- movl C(r_v1),%ecx
- movl C(r_ceilv1),%eax
- movl %ecx,Lv0
- movl %eax,Lceilv0
- jmp LCalcSecond
-
-// }
-
-LCalcFirst:
-
-// else
-// {
-// world = &pv0->position[0];
-
- call LTransformAndProject // v0 | lzi0 | u0
-
- fsts Lv0
- fxch %st(2) // u0 | lzi0 | v0
- fstps Lu0 // lzi0 | v0
- fstps Lzi0 // v0
-
-// ceilv0 = (int)(v0 - 2000) + 2000; // ceil(v0);
- fistpl Lceilv0
-
-// }
-
-LCalcSecond:
-
-// world = &pv1->position[0];
- movl %edx,%esi
-
- call LTransformAndProject // v1 | lzi1 | u1
-
- flds Lu0 // u0 | v1 | lzi1 | u1
- fxch %st(3) // u1 | v1 | lzi1 | u0
- flds Lzi0 // lzi0 | u1 | v1 | lzi1 | u0
- fxch %st(3) // lzi1 | u1 | v1 | lzi0 | u0
- flds Lv0 // v0 | lzi1 | u1 | v1 | lzi0 | u0
- fxch %st(3) // v1 | lzi1 | u1 | v0 | lzi0 | u0
-
-// r_ceilv1 = (int)(r_v1 - 2000) + 2000; // ceil(r_v1);
- fistl C(r_ceilv1)
-
- fldcw C(chop_cw) // put back normal floating-point state
-
- fsts C(r_v1)
- fxch %st(4) // lzi0 | lzi1 | u1 | v0 | v1 | u0
-
-// if (r_lzi1 > lzi0)
-// lzi0 = r_lzi1;
- fcom %st(1)
- fnstsw %ax
- testb $1,%ah
- jz LP0
- fstp %st(0)
- fld %st(0)
-LP0:
-
- fxch %st(1) // lzi1 | lzi0 | u1 | v0 | v1 | u0
- fstps C(r_lzi1) // lzi0 | u1 | v0 | v1 | u0
- fxch %st(1)
- fsts C(r_u1)
- fxch %st(1)
-
-// if (lzi0 > r_nearzi) // for mipmap finding
-// r_nearzi = lzi0;
- fcoms C(r_nearzi)
- fnstsw %ax
- testb $0x45,%ah
- jnz LP1
- fsts C(r_nearzi)
-LP1:
-
-// // for right edges, all we want is the effect on 1/z
-// if (r_nearzionly)
-// return;
- movl C(r_nearzionly),%eax
- testl %eax,%eax
- jz LP2
-LPop5AndDone:
- movl C(cacheoffset),%eax
- movl C(r_framecount),%edx
- cmpl $0x7FFFFFFF,%eax
- jz LDoPop
- andl $(FRAMECOUNT_MASK),%edx
- orl $(FULLY_CLIPPED_CACHED),%edx
- movl %edx,C(cacheoffset)
-
-LDoPop:
- fstp %st(0) // u1 | v0 | v1 | u0
- fstp %st(0) // v0 | v1 | u0
- fstp %st(0) // v1 | u0
- fstp %st(0) // u0
- fstp %st(0)
- jmp Ldone
-
-LP2:
-
-// // create the edge
-// if (ceilv0 == r_ceilv1)
-// return; // horizontal edge
- movl Lceilv0,%ebx
- movl C(edge_p),%edi
- movl C(r_ceilv1),%ecx
- movl %edi,%edx
- movl C(r_pedge),%esi
- addl $(et_size),%edx
- cmpl %ecx,%ebx
- jz LPop5AndDone
-
- movl C(r_pedge),%eax
- movl %eax,et_owner(%edi)
-
-// side = ceilv0 > r_ceilv1;
-//
-// edge->nearzi = lzi0;
- fstps et_nearzi(%edi) // u1 | v0 | v1 | u0
-
-// if (side == 1)
-// {
- jc LSide0
-
-LSide1:
-
-// // leading edge (go from p2 to p1)
-
-// u_step = ((u0 - r_u1) / (v0 - r_v1));
- fsubrp %st(0),%st(3) // v0 | v1 | u0-u1
- fsub %st(1),%st(0) // v0-v1 | v1 | u0-u1
- fdivrp %st(0),%st(2) // v1 | ustep
-
-// r_emitted = 1;
- movl $1,C(r_emitted)
-
-// edge = edge_p++;
- movl %edx,C(edge_p)
-
-// pretouch next edge
- movl (%edx),%eax
-
-// v2 = ceilv0 - 1;
-// v = r_ceilv1;
- movl %ecx,%eax
- leal -1(%ebx),%ecx
- movl %eax,%ebx
-
-// edge->surfs[0] = 0;
-// edge->surfs[1] = surface_p - surfaces;
- movl C(surface_p),%eax
- movl C(surfaces),%esi
- subl %edx,%edx
- subl %esi,%eax
- shrl $(SURF_T_SHIFT),%eax
- movl %edx,et_surfs(%edi)
- movl %eax,et_surfs+2(%edi)
-
- subl %esi,%esi
-
-// u = r_u1 + ((float)v - r_v1) * u_step;
- movl %ebx,Lv
- fildl Lv // v | v1 | ustep
- fsubp %st(0),%st(1) // v-v1 | ustep
- fmul %st(1),%st(0) // (v-v1)*ustep | ustep
- fadds C(r_u1) // u | ustep
-
- jmp LSideDone
-
-// }
-
-LSide0:
-
-// else
-// {
-// // trailing edge (go from p1 to p2)
-
-// u_step = ((r_u1 - u0) / (r_v1 - v0));
- fsub %st(3),%st(0) // u1-u0 | v0 | v1 | u0
- fxch %st(2) // v1 | v0 | u1-u0 | u0
- fsub %st(1),%st(0) // v1-v0 | v0 | u1-u0 | u0
- fdivrp %st(0),%st(2) // v0 | ustep | u0
-
-// r_emitted = 1;
- movl $1,C(r_emitted)
-
-// edge = edge_p++;
- movl %edx,C(edge_p)
-
-// pretouch next edge
- movl (%edx),%eax
-
-// v = ceilv0;
-// v2 = r_ceilv1 - 1;
- decl %ecx
-
-// edge->surfs[0] = surface_p - surfaces;
-// edge->surfs[1] = 0;
- movl C(surface_p),%eax
- movl C(surfaces),%esi
- subl %edx,%edx
- subl %esi,%eax
- shrl $(SURF_T_SHIFT),%eax
- movl %edx,et_surfs+2(%edi)
- movl %eax,et_surfs(%edi)
-
- movl $1,%esi
-
-// u = u0 + ((float)v - v0) * u_step;
- movl %ebx,Lv
- fildl Lv // v | v0 | ustep | u0
- fsubp %st(0),%st(1) // v-v0 | ustep | u0
- fmul %st(1),%st(0) // (v-v0)*ustep | ustep | u0
- faddp %st(0),%st(2) // ustep | u
- fxch %st(1) // u | ustep
-
-// }
-
-LSideDone:
-
-// edge->u_step = u_step*0x100000;
-// edge->u = u*0x100000 + 0xFFFFF;
-
- fmuls fp_1m // u*0x100000 | ustep
- fxch %st(1) // ustep | u*0x100000
- fmuls fp_1m // ustep*0x100000 | u*0x100000
- fxch %st(1) // u*0x100000 | ustep*0x100000
- fadds fp_1m_minus_1 // u*0x100000 + 0xFFFFF | ustep*0x100000
- fxch %st(1) // ustep*0x100000 | u*0x100000 + 0xFFFFF
- fistpl et_u_step(%edi) // u*0x100000 + 0xFFFFF
- fistpl et_u(%edi)
-
-// // we need to do this to avoid stepping off the edges if a very nearly
-// // horizontal edge is less than epsilon above a scan, and numeric error
-// // causes it to incorrectly extend to the scan, and the extension of the
-// // line goes off the edge of the screen
-// // FIXME: is this actually needed?
-// if (edge->u < r_refdef.vrect_x_adj_shift20)
-// edge->u = r_refdef.vrect_x_adj_shift20;
-// if (edge->u > r_refdef.vrectright_adj_shift20)
-// edge->u = r_refdef.vrectright_adj_shift20;
- movl et_u(%edi),%eax
- movl C(r_refdef)+rd_vrect_x_adj_shift20,%edx
- cmpl %edx,%eax
- jl LP4
- movl C(r_refdef)+rd_vrectright_adj_shift20,%edx
- cmpl %edx,%eax
- jng LP5
-LP4:
- movl %edx,et_u(%edi)
- movl %edx,%eax
-LP5:
-
-// // sort the edge in normally
-// u_check = edge->u;
-//
-// if (edge->surfs[0])
-// u_check++; // sort trailers after leaders
- addl %esi,%eax
-
-// if (!newedges[v] || newedges[v]->u >= u_check)
-// {
- movl C(newedges)(,%ebx,4),%esi
- testl %esi,%esi
- jz LDoFirst
- cmpl %eax,et_u(%esi)
- jl LNotFirst
-LDoFirst:
-
-// edge->next = newedges[v];
-// newedges[v] = edge;
- movl %esi,et_next(%edi)
- movl %edi,C(newedges)(,%ebx,4)
-
- jmp LSetRemove
-
-// }
-
-LNotFirst:
-
-// else
-// {
-// pcheck = newedges[v];
-//
-// while (pcheck->next && pcheck->next->u < u_check)
-// pcheck = pcheck->next;
-LFindInsertLoop:
- movl %esi,%edx
- movl et_next(%esi),%esi
- testl %esi,%esi
- jz LInsertFound
- cmpl %eax,et_u(%esi)
- jl LFindInsertLoop
-
-LInsertFound:
-
-// edge->next = pcheck->next;
-// pcheck->next = edge;
- movl %esi,et_next(%edi)
- movl %edi,et_next(%edx)
-
-// }
-
-LSetRemove:
-
-// edge->nextremove = removeedges[v2];
-// removeedges[v2] = edge;
- movl C(removeedges)(,%ecx,4),%eax
- movl %edi,C(removeedges)(,%ecx,4)
- movl %eax,et_nextremove(%edi)
-
-Ldone:
- movl Lstack,%esp // clear temporary variables from stack
-
- popl %ebx // restore register variables
- popl %edi
- popl %esi
- ret
-
-// at least one point is clipped
-
-Lp2:
- testl %eax,%eax
- jns Lp1
-
-// else
-// {
-// // point 0 is clipped
-
-// if (d1 < 0)
-// {
- movl Ld1,%eax
- testl %eax,%eax
- jns Lp3
-
-// // both points are clipped
-// // we do cache fully clipped edges
-// if (!leftclipped)
- movl C(r_leftclipped),%eax
- movl C(r_pedge),%ecx
- testl %eax,%eax
- jnz Ldone
-
-// r_pedge->framecount = r_framecount;
- movl C(r_framecount),%eax
- andl $(FRAMECOUNT_MASK),%eax
- orl $(FULLY_CLIPPED_CACHED),%eax
- movl %eax,C(cacheoffset)
-
-// return;
- jmp Ldone
-
-// }
-
-Lp1:
-
-// // point 0 is unclipped
-// if (d1 >= 0)
-// {
-// // both points are unclipped
-// continue;
-
-// // only point 1 is clipped
-
-// f = d0 / (d0 - d1);
- flds Ld0
- flds Ld1
- fsubr %st(1),%st(0)
-
-// // we don't cache partially clipped edges
- movl $0x7FFFFFFF,C(cacheoffset)
-
- fdivrp %st(0),%st(1)
-
- subl $(mv_size),%esp // allocate space for clipvert
-
-// clipvert.position[0] = pv0->position[0] +
-// f * (pv1->position[0] - pv0->position[0]);
-// clipvert.position[1] = pv0->position[1] +
-// f * (pv1->position[1] - pv0->position[1]);
-// clipvert.position[2] = pv0->position[2] +
-// f * (pv1->position[2] - pv0->position[2]);
- flds mv_position+8(%edx)
- fsubs mv_position+8(%esi)
- flds mv_position+4(%edx)
- fsubs mv_position+4(%esi)
- flds mv_position+0(%edx)
- fsubs mv_position+0(%esi) // 0 | 1 | 2
-
-// replace pv1 with the clip point
- movl %esp,%edx
- movl cp_leftedge(%ebx),%eax
- testb %al,%al
-
- fmul %st(3),%st(0)
- fxch %st(1) // 1 | 0 | 2
- fmul %st(3),%st(0)
- fxch %st(2) // 2 | 0 | 1
- fmulp %st(0),%st(3) // 0 | 1 | 2
- fadds mv_position+0(%esi)
- fxch %st(1) // 1 | 0 | 2
- fadds mv_position+4(%esi)
- fxch %st(2) // 2 | 0 | 1
- fadds mv_position+8(%esi)
- fxch %st(1) // 0 | 2 | 1
- fstps mv_position+0(%esp) // 2 | 1
- fstps mv_position+8(%esp) // 1
- fstps mv_position+4(%esp)
-
-// if (clip->leftedge)
-// {
- jz Ltestright
-
-// r_leftclipped = true;
-// r_leftexit = clipvert;
- movl $1,C(r_leftclipped)
- movl mv_position+0(%esp),%eax
- movl %eax,C(r_leftexit)+mv_position+0
- movl mv_position+4(%esp),%eax
- movl %eax,C(r_leftexit)+mv_position+4
- movl mv_position+8(%esp),%eax
- movl %eax,C(r_leftexit)+mv_position+8
-
- jmp Lcontinue
-
-// }
-
-Ltestright:
-// else if (clip->rightedge)
-// {
- testb %ah,%ah
- jz Lcontinue
-
-// r_rightclipped = true;
-// r_rightexit = clipvert;
- movl $1,C(r_rightclipped)
- movl mv_position+0(%esp),%eax
- movl %eax,C(r_rightexit)+mv_position+0
- movl mv_position+4(%esp),%eax
- movl %eax,C(r_rightexit)+mv_position+4
- movl mv_position+8(%esp),%eax
- movl %eax,C(r_rightexit)+mv_position+8
-
-// }
-//
-// R_ClipEdge (pv0, &clipvert, clip->next);
-// return;
-// }
- jmp Lcontinue
-
-// }
-
-Lp3:
-
-// // only point 0 is clipped
-// r_lastvertvalid = false;
-
- movl $0,C(r_lastvertvalid)
-
-// f = d0 / (d0 - d1);
- flds Ld0
- flds Ld1
- fsubr %st(1),%st(0)
-
-// // we don't cache partially clipped edges
- movl $0x7FFFFFFF,C(cacheoffset)
-
- fdivrp %st(0),%st(1)
-
- subl $(mv_size),%esp // allocate space for clipvert
-
-// clipvert.position[0] = pv0->position[0] +
-// f * (pv1->position[0] - pv0->position[0]);
-// clipvert.position[1] = pv0->position[1] +
-// f * (pv1->position[1] - pv0->position[1]);
-// clipvert.position[2] = pv0->position[2] +
-// f * (pv1->position[2] - pv0->position[2]);
- flds mv_position+8(%edx)
- fsubs mv_position+8(%esi)
- flds mv_position+4(%edx)
- fsubs mv_position+4(%esi)
- flds mv_position+0(%edx)
- fsubs mv_position+0(%esi) // 0 | 1 | 2
-
- movl cp_leftedge(%ebx),%eax
- testb %al,%al
-
- fmul %st(3),%st(0)
- fxch %st(1) // 1 | 0 | 2
- fmul %st(3),%st(0)
- fxch %st(2) // 2 | 0 | 1
- fmulp %st(0),%st(3) // 0 | 1 | 2
- fadds mv_position+0(%esi)
- fxch %st(1) // 1 | 0 | 2
- fadds mv_position+4(%esi)
- fxch %st(2) // 2 | 0 | 1
- fadds mv_position+8(%esi)
- fxch %st(1) // 0 | 2 | 1
- fstps mv_position+0(%esp) // 2 | 1
- fstps mv_position+8(%esp) // 1
- fstps mv_position+4(%esp)
-
-// replace pv0 with the clip point
- movl %esp,%esi
-
-// if (clip->leftedge)
-// {
- jz Ltestright2
-
-// r_leftclipped = true;
-// r_leftenter = clipvert;
- movl $1,C(r_leftclipped)
- movl mv_position+0(%esp),%eax
- movl %eax,C(r_leftenter)+mv_position+0
- movl mv_position+4(%esp),%eax
- movl %eax,C(r_leftenter)+mv_position+4
- movl mv_position+8(%esp),%eax
- movl %eax,C(r_leftenter)+mv_position+8
-
- jmp Lcontinue
-
-// }
-
-Ltestright2:
-// else if (clip->rightedge)
-// {
- testb %ah,%ah
- jz Lcontinue
-
-// r_rightclipped = true;
-// r_rightenter = clipvert;
- movl $1,C(r_rightclipped)
- movl mv_position+0(%esp),%eax
- movl %eax,C(r_rightenter)+mv_position+0
- movl mv_position+4(%esp),%eax
- movl %eax,C(r_rightenter)+mv_position+4
- movl mv_position+8(%esp),%eax
- movl %eax,C(r_rightenter)+mv_position+8
-
-// }
- jmp Lcontinue
-
-// %esi = vec3_t point to transform and project
-// %edx preserved
-LTransformAndProject:
-
-// // transform and project
-// VectorSubtract (world, modelorg, local);
- flds mv_position+0(%esi)
- fsubs C(modelorg)+0
- flds mv_position+4(%esi)
- fsubs C(modelorg)+4
- flds mv_position+8(%esi)
- fsubs C(modelorg)+8
- fxch %st(2) // local[0] | local[1] | local[2]
-
-// TransformVector (local, transformed);
-//
-// if (transformed[2] < NEAR_CLIP)
-// transformed[2] = NEAR_CLIP;
-//
-// lzi0 = 1.0 / transformed[2];
- fld %st(0) // local[0] | local[0] | local[1] | local[2]
- fmuls C(vpn)+0 // zm0 | local[0] | local[1] | local[2]
- fld %st(1) // local[0] | zm0 | local[0] | local[1] |
- // local[2]
- fmuls C(vright)+0 // xm0 | zm0 | local[0] | local[1] | local[2]
- fxch %st(2) // local[0] | zm0 | xm0 | local[1] | local[2]
- fmuls C(vup)+0 // ym0 | zm0 | xm0 | local[1] | local[2]
- fld %st(3) // local[1] | ym0 | zm0 | xm0 | local[1] |
- // local[2]
- fmuls C(vpn)+4 // zm1 | ym0 | zm0 | xm0 | local[1] |
- // local[2]
- fld %st(4) // local[1] | zm1 | ym0 | zm0 | xm0 |
- // local[1] | local[2]
- fmuls C(vright)+4 // xm1 | zm1 | ym0 | zm0 | xm0 |
- // local[1] | local[2]
- fxch %st(5) // local[1] | zm1 | ym0 | zm0 | xm0 |
- // xm1 | local[2]
- fmuls C(vup)+4 // ym1 | zm1 | ym0 | zm0 | xm0 |
- // xm1 | local[2]
- fxch %st(1) // zm1 | ym1 | ym0 | zm0 | xm0 |
- // xm1 | local[2]
- faddp %st(0),%st(3) // ym1 | ym0 | zm2 | xm0 | xm1 | local[2]
- fxch %st(3) // xm0 | ym0 | zm2 | ym1 | xm1 | local[2]
- faddp %st(0),%st(4) // ym0 | zm2 | ym1 | xm2 | local[2]
- faddp %st(0),%st(2) // zm2 | ym2 | xm2 | local[2]
- fld %st(3) // local[2] | zm2 | ym2 | xm2 | local[2]
- fmuls C(vpn)+8 // zm3 | zm2 | ym2 | xm2 | local[2]
- fld %st(4) // local[2] | zm3 | zm2 | ym2 | xm2 | local[2]
- fmuls C(vright)+8 // xm3 | zm3 | zm2 | ym2 | xm2 | local[2]
- fxch %st(5) // local[2] | zm3 | zm2 | ym2 | xm2 | xm3
- fmuls C(vup)+8 // ym3 | zm3 | zm2 | ym2 | xm2 | xm3
- fxch %st(1) // zm3 | ym3 | zm2 | ym2 | xm2 | xm3
- faddp %st(0),%st(2) // ym3 | zm4 | ym2 | xm2 | xm3
- fxch %st(4) // xm3 | zm4 | ym2 | xm2 | ym3
- faddp %st(0),%st(3) // zm4 | ym2 | xm4 | ym3
- fxch %st(1) // ym2 | zm4 | xm4 | ym3
- faddp %st(0),%st(3) // zm4 | xm4 | ym4
-
- fcoms Lfp_near_clip
- fnstsw %ax
- testb $1,%ah
- jz LNoClip
- fstp %st(0)
- flds Lfp_near_clip
-
-LNoClip:
-
- fdivrs float_1 // lzi0 | x | y
- fxch %st(1) // x | lzi0 | y
-
-// // FIXME: build x/yscale into transform?
-// scale = xscale * lzi0;
-// u0 = (xcenter + scale*transformed[0]);
- flds C(xscale) // xscale | x | lzi0 | y
- fmul %st(2),%st(0) // scale | x | lzi0 | y
- fmulp %st(0),%st(1) // scale*x | lzi0 | y
- fadds C(xcenter) // u0 | lzi0 | y
-
-// if (u0 < r_refdef.fvrectx_adj)
-// u0 = r_refdef.fvrectx_adj;
-// if (u0 > r_refdef.fvrectright_adj)
-// u0 = r_refdef.fvrectright_adj;
-// FIXME: use integer compares of floats?
- fcoms C(r_refdef)+rd_fvrectx_adj
- fnstsw %ax
- testb $1,%ah
- jz LClampP0
- fstp %st(0)
- flds C(r_refdef)+rd_fvrectx_adj
-LClampP0:
- fcoms C(r_refdef)+rd_fvrectright_adj
- fnstsw %ax
- testb $0x45,%ah
- jnz LClampP1
- fstp %st(0)
- flds C(r_refdef)+rd_fvrectright_adj
-LClampP1:
-
- fld %st(1) // lzi0 | u0 | lzi0 | y
-
-// scale = yscale * lzi0;
-// v0 = (ycenter - scale*transformed[1]);
- fmuls C(yscale) // scale | u0 | lzi0 | y
- fmulp %st(0),%st(3) // u0 | lzi0 | scale*y
- fxch %st(2) // scale*y | lzi0 | u0
- fsubrs C(ycenter) // v0 | lzi0 | u0
-
-// if (v0 < r_refdef.fvrecty_adj)
-// v0 = r_refdef.fvrecty_adj;
-// if (v0 > r_refdef.fvrectbottom_adj)
-// v0 = r_refdef.fvrectbottom_adj;
-// FIXME: use integer compares of floats?
- fcoms C(r_refdef)+rd_fvrecty_adj
- fnstsw %ax
- testb $1,%ah
- jz LClampP2
- fstp %st(0)
- flds C(r_refdef)+rd_fvrecty_adj
-LClampP2:
- fcoms C(r_refdef)+rd_fvrectbottom_adj
- fnstsw %ax
- testb $0x45,%ah
- jnz LClampP3
- fstp %st(0)
- flds C(r_refdef)+rd_fvrectbottom_adj
-LClampP3:
- ret
-
-#endif // USE_ASM
-
diff --git a/src/refresh/sw/x86/edge.S b/src/refresh/sw/x86/edge.S
deleted file mode 100644
index 03449cd..0000000
--- a/src/refresh/sw/x86/edge.S
+++ /dev/null
@@ -1,730 +0,0 @@
-//
-// r_edgea.s
-// x86 assembly-language edge-processing code.
-//
-
-#include "common/x86/asm.h"
-#include "sw.h"
-
-#if USE_ASM
-
- .data
-Ltemp: .long 0
-float_1_div_0100000h: .long 0x35800000 // 1.0/(float)0x100000
-float_point_999: .single 0.999
-float_1_point_001: .single 1.001
-
- .text
-
-//--------------------------------------------------------------------
-
-#define edgestoadd 4+8 // note odd stack offsets because of interleaving
-#define edgelist 8+12 // with pushes
-
-.globl C(R_EdgeCodeStart)
-C(R_EdgeCodeStart):
-
-.globl C(R_InsertNewEdges)
-C(R_InsertNewEdges):
- pushl %edi
- pushl %esi // preserve register variables
- movl edgestoadd(%esp),%edx
- pushl %ebx
- movl edgelist(%esp),%ecx
-
-LDoNextEdge:
- movl et_u(%edx),%eax
- movl %edx,%edi
-
-LContinueSearch:
- movl et_u(%ecx),%ebx
- movl et_next(%ecx),%esi
- cmpl %ebx,%eax
- jle LAddedge
- movl et_u(%esi),%ebx
- movl et_next(%esi),%ecx
- cmpl %ebx,%eax
- jle LAddedge2
- movl et_u(%ecx),%ebx
- movl et_next(%ecx),%esi
- cmpl %ebx,%eax
- jle LAddedge
- movl et_u(%esi),%ebx
- movl et_next(%esi),%ecx
- cmpl %ebx,%eax
- jg LContinueSearch
-
-LAddedge2:
- movl et_next(%edx),%edx
- movl et_prev(%esi),%ebx
- movl %esi,et_next(%edi)
- movl %ebx,et_prev(%edi)
- movl %edi,et_next(%ebx)
- movl %edi,et_prev(%esi)
- movl %esi,%ecx
-
- cmpl $0,%edx
- jnz LDoNextEdge
- jmp LDone
-
- .align 4
-LAddedge:
- movl et_next(%edx),%edx
- movl et_prev(%ecx),%ebx
- movl %ecx,et_next(%edi)
- movl %ebx,et_prev(%edi)
- movl %edi,et_next(%ebx)
- movl %edi,et_prev(%ecx)
-
- cmpl $0,%edx
- jnz LDoNextEdge
-
-LDone:
- popl %ebx // restore register variables
- popl %esi
- popl %edi
-
- ret
-
-//--------------------------------------------------------------------
-
-#define predge 4+4
-
-.globl C(R_RemoveEdges)
-C(R_RemoveEdges):
- pushl %ebx
- movl predge(%esp),%eax
-
-Lre_loop:
- movl et_next(%eax),%ecx
- movl et_nextremove(%eax),%ebx
- movl et_prev(%eax),%edx
- testl %ebx,%ebx
- movl %edx,et_prev(%ecx)
- jz Lre_done
- movl %ecx,et_next(%edx)
-
- movl et_next(%ebx),%ecx
- movl et_prev(%ebx),%edx
- movl et_nextremove(%ebx),%eax
- movl %edx,et_prev(%ecx)
- testl %eax,%eax
- movl %ecx,et_next(%edx)
- jnz Lre_loop
-
- popl %ebx
- ret
-
-Lre_done:
- movl %ecx,et_next(%edx)
- popl %ebx
-
- ret
-
-//--------------------------------------------------------------------
-
-#define pedgelist 4+4 // note odd stack offset because of interleaving
- // with pushes
-
-.globl C(R_StepActiveU)
-C(R_StepActiveU):
- pushl %edi
- movl pedgelist(%esp),%edx
- pushl %esi // preserve register variables
- pushl %ebx
-
- movl et_prev(%edx),%esi
-
-LNewEdge:
- movl et_u(%esi),%edi
-
-LNextEdge:
- movl et_u(%edx),%eax
- movl et_u_step(%edx),%ebx
- addl %ebx,%eax
- movl et_next(%edx),%esi
- movl %eax,et_u(%edx)
- cmpl %edi,%eax
- jl LPushBack
-
- movl et_u(%esi),%edi
- movl et_u_step(%esi),%ebx
- addl %ebx,%edi
- movl et_next(%esi),%edx
- movl %edi,et_u(%esi)
- cmpl %eax,%edi
- jl LPushBack2
-
- movl et_u(%edx),%eax
- movl et_u_step(%edx),%ebx
- addl %ebx,%eax
- movl et_next(%edx),%esi
- movl %eax,et_u(%edx)
- cmpl %edi,%eax
- jl LPushBack
-
- movl et_u(%esi),%edi
- movl et_u_step(%esi),%ebx
- addl %ebx,%edi
- movl et_next(%esi),%edx
- movl %edi,et_u(%esi)
- cmpl %eax,%edi
- jnl LNextEdge
-
-LPushBack2:
- movl %edx,%ebx
- movl %edi,%eax
- movl %esi,%edx
- movl %ebx,%esi
-
-LPushBack:
-// push it back to keep it sorted
- movl et_prev(%edx),%ecx
- movl et_next(%edx),%ebx
-
-// done if the -1 in edge_aftertail triggered this
- cmpl $(C(edge_aftertail)),%edx
- jz LUDone
-
-// pull the edge out of the edge list
- movl et_prev(%ecx),%edi
- movl %ecx,et_prev(%esi)
- movl %ebx,et_next(%ecx)
-
-// find out where the edge goes in the edge list
-LPushBackLoop:
- movl et_prev(%edi),%ecx
- movl et_u(%edi),%ebx
- cmpl %ebx,%eax
- jnl LPushBackFound
-
- movl et_prev(%ecx),%edi
- movl et_u(%ecx),%ebx
- cmpl %ebx,%eax
- jl LPushBackLoop
-
- movl %ecx,%edi
-
-// put the edge back into the edge list
-LPushBackFound:
- movl et_next(%edi),%ebx
- movl %edi,et_prev(%edx)
- movl %ebx,et_next(%edx)
- movl %edx,et_next(%edi)
- movl %edx,et_prev(%ebx)
-
- movl %esi,%edx
- movl et_prev(%esi),%esi
-
- cmpl $(C(edge_tail)),%edx
- jnz LNewEdge
-
-LUDone:
- popl %ebx // restore register variables
- popl %esi
- popl %edi
-
- ret
-
-//--------------------------------------------------------------------
-
-#define surf 4 // note this is loaded before any pushes
-
- .align 4
-TrailingEdge:
- movl st_spanstate(%esi),%eax // check for edge inversion
- decl %eax
- jnz LInverted
-
- movl %eax,st_spanstate(%esi)
- movl st_insubmodel(%esi),%ecx
- movl 0x12345678,%edx // surfaces[1].st_next
-LPatch0:
- movl C(r_bmodelactive),%eax
- subl %ecx,%eax
- cmpl %esi,%edx
- movl %eax,C(r_bmodelactive)
- jnz LNoEmit // surface isn't on top, just remove
-
-// emit a span (current top going away)
- movl et_u(%ebx),%eax
- shrl $20,%eax // iu = integral pixel u
- movl st_last_u(%esi),%edx
- movl st_next(%esi),%ecx
- cmpl %edx,%eax
- jle LNoEmit2 // iu <= surf->last_u, so nothing to emit
-
- movl %eax,st_last_u(%ecx) // surf->next->last_u = iu;
- subl %edx,%eax
- movl %edx,espan_t_u(%ebp) // span->u = surf->last_u;
-
- movl %eax,espan_t_count(%ebp) // span->count = iu - span->u;
- movl C(current_iv),%eax
- movl %eax,espan_t_v(%ebp) // span->v = current_iv;
- movl st_spans(%esi),%eax
- movl %eax,espan_t_pnext(%ebp) // span->pnext = surf->spans;
- movl %ebp,st_spans(%esi) // surf->spans = span;
- addl $(espan_t_size),%ebp
-
- movl st_next(%esi),%edx // remove the surface from the surface
- movl st_prev(%esi),%esi // stack
-
- movl %edx,st_next(%esi)
- movl %esi,st_prev(%edx)
- ret
-
-LNoEmit2:
- movl %eax,st_last_u(%ecx) // surf->next->last_u = iu;
- movl st_next(%esi),%edx // remove the surface from the surface
- movl st_prev(%esi),%esi // stack
-
- movl %edx,st_next(%esi)
- movl %esi,st_prev(%edx)
- ret
-
-LNoEmit:
- movl st_next(%esi),%edx // remove the surface from the surface
- movl st_prev(%esi),%esi // stack
-
- movl %edx,st_next(%esi)
- movl %esi,st_prev(%edx)
- ret
-
-LInverted:
- movl %eax,st_spanstate(%esi)
- ret
-
-//--------------------------------------------------------------------
-
-// trailing edge only
-Lgs_trailing:
- pushl $Lgs_nextedge
- jmp TrailingEdge
-
-
-.globl C(R_GenerateSpans)
-C(R_GenerateSpans):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
-// clear active surfaces to just the background surface
- movl C(surfaces),%eax
- movl C(edge_head_u_shift20),%edx
- addl $(st_size),%eax
-// %ebp = span_p throughout
- movl C(span_p),%ebp
-
- movl $0,C(r_bmodelactive)
-
- movl %eax,st_next(%eax)
- movl %eax,st_prev(%eax)
- movl %edx,st_last_u(%eax)
- movl C(edge_head)+et_next,%ebx // edge=edge_head.next
-
-// generate spans
- cmpl $(C(edge_tail)),%ebx // done if empty list
- jz Lgs_lastspan
-
-Lgs_edgeloop:
-
- movl et_surfs(%ebx),%edi
- movl C(surfaces),%eax
- movl %edi,%esi
- andl $0xFFFF0000,%edi
- andl $0xFFFF,%esi
- jz Lgs_leading // not a trailing edge
-
-// it has a left surface, so a surface is going away for this span
- shll $(SURF_T_SHIFT),%esi
- addl %eax,%esi
- testl %edi,%edi
- jz Lgs_trailing
-
-// both leading and trailing
- call TrailingEdge
- movl C(surfaces),%eax
-
-// ---------------------------------------------------------------
-// handle a leading edge
-// ---------------------------------------------------------------
-
-Lgs_leading:
- shrl $16-SURF_T_SHIFT,%edi
- movl C(surfaces),%eax
- addl %eax,%edi
- movl 0x12345678,%esi // surf2 = surfaces[1].next;
-LPatch2:
- movl st_spanstate(%edi),%edx
- movl st_insubmodel(%edi),%eax
- testl %eax,%eax
- jnz Lbmodel_leading
-
-// handle a leading non-bmodel edge
-
-// don't start a span if this is an inverted span, with the end edge preceding
-// the start edge (that is, we've already seen the end edge)
- testl %edx,%edx
- jnz Lxl_done
-
-
-// if (surf->key < surf2->key)
-// goto newtop;
- incl %edx
- movl st_key(%edi),%eax
- movl %edx,st_spanstate(%edi)
- movl st_key(%esi),%ecx
- cmpl %ecx,%eax
- jl Lnewtop
-
-// main sorting loop to search through surface stack until insertion point
-// found. Always terminates because background surface is sentinel
-// do
-// {
-// surf2 = surf2->next;
-// } while (surf->key >= surf2->key);
-Lsortloopnb:
- movl st_next(%esi),%esi
- movl st_key(%esi),%ecx
- cmpl %ecx,%eax
- jge Lsortloopnb
-
- jmp LInsertAndExit
-
-
-// handle a leading bmodel edge
- .align 4
-Lbmodel_leading:
-
-// don't start a span if this is an inverted span, with the end edge preceding
-// the start edge (that is, we've already seen the end edge)
- testl %edx,%edx
- jnz Lxl_done
-
- movl C(r_bmodelactive),%ecx
- incl %edx
- incl %ecx
- movl %edx,st_spanstate(%edi)
- movl %ecx,C(r_bmodelactive)
-
-// if (surf->key < surf2->key)
-// goto newtop;
- movl st_key(%edi),%eax
- movl st_key(%esi),%ecx
- cmpl %ecx,%eax
- jl Lnewtop
-
-// if ((surf->key == surf2->key) && surf->insubmodel)
-// {
- jz Lzcheck_for_newtop
-
-// main sorting loop to search through surface stack until insertion point
-// found. Always terminates because background surface is sentinel
-// do
-// {
-// surf2 = surf2->next;
-// } while (surf->key > surf2->key);
-Lsortloop:
- movl st_next(%esi),%esi
- movl st_key(%esi),%ecx
- cmpl %ecx,%eax
- jg Lsortloop
-
- jne LInsertAndExit
-
-// Do 1/z sorting to see if we've arrived in the right position
- movl et_u(%ebx),%eax
- subl $0xFFFFF,%eax
- movl %eax,Ltemp
- fildl Ltemp
-
- fmuls float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) *
- // (1.0 / 0x100000);
-
- fld %st(0) // fu | fu
- fmuls st_d_zistepu(%edi) // fu*surf->d_zistepu | fu
- flds C(fv) // fv | fu*surf->d_zistepu | fu
- fmuls st_d_zistepv(%edi) // fv*surf->d_zistepv | fu*surf->d_zistepu | fu
- fxch %st(1) // fu*surf->d_zistepu | fv*surf->d_zistepv | fu
- fadds st_d_ziorigin(%edi) // fu*surf->d_zistepu + surf->d_ziorigin |
- // fv*surf->d_zistepv | fu
-
- flds st_d_zistepu(%esi) // surf2->d_zistepu |
- // fu*surf->d_zistepu + surf->d_ziorigin |
- // fv*surf->d_zistepv | fu
- fmul %st(3),%st(0) // fu*surf2->d_zistepu |
- // fu*surf->d_zistepu + surf->d_ziorigin |
- // fv*surf->d_zistepv | fu
- fxch %st(1) // fu*surf->d_zistepu + surf->d_ziorigin |
- // fu*surf2->d_zistepu |
- // fv*surf->d_zistepv | fu
- faddp %st(0),%st(2) // fu*surf2->d_zistepu | newzi | fu
-
- flds C(fv) // fv | fu*surf2->d_zistepu | newzi | fu
- fmuls st_d_zistepv(%esi) // fv*surf2->d_zistepv |
- // fu*surf2->d_zistepu | newzi | fu
- fld %st(2) // newzi | fv*surf2->d_zistepv |
- // fu*surf2->d_zistepu | newzi | fu
- fmuls float_point_999 // newzibottom | fv*surf2->d_zistepv |
- // fu*surf2->d_zistepu | newzi | fu
-
- fxch %st(2) // fu*surf2->d_zistepu | fv*surf2->d_zistepv |
- // newzibottom | newzi | fu
- fadds st_d_ziorigin(%esi) // fu*surf2->d_zistepu + surf2->d_ziorigin |
- // fv*surf2->d_zistepv | newzibottom | newzi |
- // fu
- faddp %st(0),%st(1) // testzi | newzibottom | newzi | fu
- fxch %st(1) // newzibottom | testzi | newzi | fu
-
-// if (newzibottom >= testzi)
-// goto Lgotposition;
-
- fcomp %st(1) // testzi | newzi | fu
-
- fxch %st(1) // newzi | testzi | fu
- fmuls float_1_point_001 // newzitop | testzi | fu
- fxch %st(1) // testzi | newzitop | fu
-
- fnstsw %ax
- testb $0x01,%ah
- jz Lgotposition_fpop3
-
-// if (newzitop >= testzi)
-// {
-
- fcomp %st(1) // newzitop | fu
- fnstsw %ax
- testb $0x45,%ah
- jz Lsortloop_fpop2
-
-// if (surf->d_zistepu >= surf2->d_zistepu)
-// goto newtop;
-
- flds st_d_zistepu(%edi) // surf->d_zistepu | newzitop| fu
- fcomps st_d_zistepu(%esi) // newzitop | fu
- fnstsw %ax
- testb $0x01,%ah
- jz Lgotposition_fpop2
-
- fstp %st(0) // clear the FPstack
- fstp %st(0)
- movl st_key(%edi),%eax
- jmp Lsortloop
-
-
-Lgotposition_fpop3:
- fstp %st(0)
-Lgotposition_fpop2:
- fstp %st(0)
- fstp %st(0)
- jmp LInsertAndExit
-
-
-// emit a span (obscures current top)
-
-Lnewtop_fpop3:
- fstp %st(0)
-Lnewtop_fpop2:
- fstp %st(0)
- fstp %st(0)
- movl st_key(%edi),%eax // reload the sorting key
-
-Lnewtop:
- movl et_u(%ebx),%eax
- movl st_last_u(%esi),%edx
- shrl $20,%eax // iu = integral pixel u
- movl %eax,st_last_u(%edi) // surf->last_u = iu;
- cmpl %edx,%eax
- jle LInsertAndExit // iu <= surf->last_u, so nothing to emit
-
- subl %edx,%eax
- movl %edx,espan_t_u(%ebp) // span->u = surf->last_u;
-
- movl %eax,espan_t_count(%ebp) // span->count = iu - span->u;
- movl C(current_iv),%eax
- movl %eax,espan_t_v(%ebp) // span->v = current_iv;
- movl st_spans(%esi),%eax
- movl %eax,espan_t_pnext(%ebp) // span->pnext = surf->spans;
- movl %ebp,st_spans(%esi) // surf->spans = span;
- addl $(espan_t_size),%ebp
-
-LInsertAndExit:
-// insert before surf2
- movl %esi,st_next(%edi) // surf->next = surf2;
- movl st_prev(%esi),%eax
- movl %eax,st_prev(%edi) // surf->prev = surf2->prev;
- movl %edi,st_prev(%esi) // surf2->prev = surf;
- movl %edi,st_next(%eax) // surf2->prev->next = surf;
-
-// ---------------------------------------------------------------
-// leading edge done
-// ---------------------------------------------------------------
-
-// ---------------------------------------------------------------
-// see if there are any more edges
-// ---------------------------------------------------------------
-
-Lgs_nextedge:
- movl et_next(%ebx),%ebx
- cmpl $(C(edge_tail)),%ebx
- jnz Lgs_edgeloop
-
-// clean up at the right edge
-Lgs_lastspan:
-
-// now that we've reached the right edge of the screen, we're done with any
-// unfinished surfaces, so emit a span for whatever's on top
- movl 0x12345678,%esi // surfaces[1].st_next
-LPatch3:
- movl C(edge_tail_u_shift20),%eax
- xorl %ecx,%ecx
- movl st_last_u(%esi),%edx
- subl %edx,%eax
- jle Lgs_resetspanstate
-
- movl %edx,espan_t_u(%ebp)
- movl %eax,espan_t_count(%ebp)
- movl C(current_iv),%eax
- movl %eax,espan_t_v(%ebp)
- movl st_spans(%esi),%eax
- movl %eax,espan_t_pnext(%ebp)
- movl %ebp,st_spans(%esi)
- addl $(espan_t_size),%ebp
-
-// reset spanstate for all surfaces in the surface stack
-Lgs_resetspanstate:
- movl %ecx,st_spanstate(%esi)
- movl st_next(%esi),%esi
- cmpl $0x12345678,%esi // &surfaces[1]
-LPatch4:
- jnz Lgs_resetspanstate
-
-// store the final span_p
- movl %ebp,C(span_p)
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-
-// ---------------------------------------------------------------
-// 1/z sorting for bmodels in the same leaf
-// ---------------------------------------------------------------
- .align 4
-Lxl_done:
- incl %edx
- movl %edx,st_spanstate(%edi)
-
- jmp Lgs_nextedge
-
-
- .align 4
-Lzcheck_for_newtop:
- movl et_u(%ebx),%eax
- subl $0xFFFFF,%eax
- movl %eax,Ltemp
- fildl Ltemp
-
- fmuls float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) *
- // (1.0 / 0x100000);
-
- fld %st(0) // fu | fu
- fmuls st_d_zistepu(%edi) // fu*surf->d_zistepu | fu
- flds C(fv) // fv | fu*surf->d_zistepu | fu
- fmuls st_d_zistepv(%edi) // fv*surf->d_zistepv | fu*surf->d_zistepu | fu
- fxch %st(1) // fu*surf->d_zistepu | fv*surf->d_zistepv | fu
- fadds st_d_ziorigin(%edi) // fu*surf->d_zistepu + surf->d_ziorigin |
- // fv*surf->d_zistepv | fu
-
- flds st_d_zistepu(%esi) // surf2->d_zistepu |
- // fu*surf->d_zistepu + surf->d_ziorigin |
- // fv*surf->d_zistepv | fu
- fmul %st(3),%st(0) // fu*surf2->d_zistepu |
- // fu*surf->d_zistepu + surf->d_ziorigin |
- // fv*surf->d_zistepv | fu
- fxch %st(1) // fu*surf->d_zistepu + surf->d_ziorigin |
- // fu*surf2->d_zistepu |
- // fv*surf->d_zistepv | fu
- faddp %st(0),%st(2) // fu*surf2->d_zistepu | newzi | fu
-
- flds C(fv) // fv | fu*surf2->d_zistepu | newzi | fu
- fmuls st_d_zistepv(%esi) // fv*surf2->d_zistepv |
- // fu*surf2->d_zistepu | newzi | fu
- fld %st(2) // newzi | fv*surf2->d_zistepv |
- // fu*surf2->d_zistepu | newzi | fu
- fmuls float_point_999 // newzibottom | fv*surf2->d_zistepv |
- // fu*surf2->d_zistepu | newzi | fu
-
- fxch %st(2) // fu*surf2->d_zistepu | fv*surf2->d_zistepv |
- // newzibottom | newzi | fu
- fadds st_d_ziorigin(%esi) // fu*surf2->d_zistepu + surf2->d_ziorigin |
- // fv*surf2->d_zistepv | newzibottom | newzi |
- // fu
- faddp %st(0),%st(1) // testzi | newzibottom | newzi | fu
- fxch %st(1) // newzibottom | testzi | newzi | fu
-
-// if (newzibottom >= testzi)
-// goto newtop;
-
- fcomp %st(1) // testzi | newzi | fu
-
- fxch %st(1) // newzi | testzi | fu
- fmuls float_1_point_001 // newzitop | testzi | fu
- fxch %st(1) // testzi | newzitop | fu
-
- fnstsw %ax
- testb $0x01,%ah
- jz Lnewtop_fpop3
-
-// if (newzitop >= testzi)
-// {
-
- fcomp %st(1) // newzitop | fu
- fnstsw %ax
- testb $0x45,%ah
- jz Lsortloop_fpop2
-
-// if (surf->d_zistepu >= surf2->d_zistepu)
-// goto newtop;
-
- flds st_d_zistepu(%edi) // surf->d_zistepu | newzitop | fu
- fcomps st_d_zistepu(%esi) // newzitop | fu
- fnstsw %ax
- testb $0x01,%ah
- jz Lnewtop_fpop2
-
-Lsortloop_fpop2:
- fstp %st(0) // clear the FP stack
- fstp %st(0)
- movl st_key(%edi),%eax
- jmp Lsortloop
-
-
-.globl C(R_EdgeCodeEnd)
-C(R_EdgeCodeEnd):
-
-
-//----------------------------------------------------------------------
-// Surface array address code patching routine
-//----------------------------------------------------------------------
-
- .align 4
-.globl C(R_SurfacePatch)
-C(R_SurfacePatch):
-
- movl C(surfaces),%eax
- addl $(st_size),%eax
- movl %eax,LPatch4-4
-
- addl $(st_next),%eax
- movl %eax,LPatch0-4
- movl %eax,LPatch2-4
- movl %eax,LPatch3-4
-
- ret
-
-#endif // USE_ASM
-
diff --git a/src/refresh/sw/x86/polyset.S b/src/refresh/sw/x86/polyset.S
deleted file mode 100644
index 63b31dc..0000000
--- a/src/refresh/sw/x86/polyset.S
+++ /dev/null
@@ -1,1247 +0,0 @@
-//
-// d_polysa.s
-// x86 assembly-language polygon model drawing code
-//
-
-#include "common/x86/asm.h"
-#include "sw.h"
-
-#if USE_ASM
-
-// !!! if this is changed, it must be changed in d_polyse.c too !!!
-#define DPS_MAXSPANS MAXHEIGHT+1
- // 1 extra for spanpackage that marks end
-
-//#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
-#define SPAN_SIZE (1024+1+1+1)*32
-
-
-
- .data
-
- .align 4
-p10_minus_p20: .single 0
-p01_minus_p21: .single 0
-temp0: .single 0
-temp1: .single 0
-Ltemp: .single 0
-
-aff8entryvec_table: .long LDraw8, LDraw7, LDraw6, LDraw5
- .long LDraw4, LDraw3, LDraw2, LDraw1
-
-lzistepx: .long 0
-
-
- .text
-
-#ifndef NeXT
- .extern C(D_PolysetSetEdgeTable)
- .extern C(D_RasterizeAliasPolySmooth)
-#endif
-
-//----------------------------------------------------------------------
-// affine triangle gradient calculation code
-//----------------------------------------------------------------------
-
-#if 0
-#define skinwidth 4+0
-
-.globl C(R_PolysetCalcGradients)
-C(R_PolysetCalcGradients):
-
-// p00_minus_p20 = r_p0[0] - r_p2[0];
-// p01_minus_p21 = r_p0[1] - r_p2[1];
-// p10_minus_p20 = r_p1[0] - r_p2[0];
-// p11_minus_p21 = r_p1[1] - r_p2[1];
-//
-// xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 -
-// p00_minus_p20 * p11_minus_p21);
-//
-// ystepdenominv = -xstepdenominv;
-
- fildl C(r_p0)+0 // r_p0[0]
- fildl C(r_p2)+0 // r_p2[0] | r_p0[0]
- fildl C(r_p0)+4 // r_p0[1] | r_p2[0] | r_p0[0]
- fildl C(r_p2)+4 // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
- fildl C(r_p1)+0 // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
- fildl C(r_p1)+4 // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] |
- // r_p2[0] | r_p0[0]
- fxch %st(3) // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] |
- // r_p2[0] | r_p0[0]
- fsub %st(2),%st(0) // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] |
- // r_p2[0] | r_p0[0]
- fxch %st(1) // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] |
- // r_p2[0] | r_p0[0]
- fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2[1] |
- // r_p1[1] | r_p2[0] | r_p0[0]
- fxch %st(5) // r_p0[0] | p01_minus_p21 | r_p2[1] |
- // r_p1[1] | r_p2[0] | p10_minus_p20
- fsubp %st(0),%st(4) // p01_minus_p21 | r_p2[1] | r_p1[1] |
- // p00_minus_p20 | p10_minus_p20
- fxch %st(2) // r_p1[1] | r_p2[1] | p01_minus_p21 |
- // p00_minus_p20 | p10_minus_p20
- fsubp %st(0),%st(1) // p11_minus_p21 | p01_minus_p21 |
- // p00_minus_p20 | p10_minus_p20
- fxch %st(1) // p01_minus_p21 | p11_minus_p21 |
- // p00_minus_p20 | p10_minus_p20
- flds C(d_xdenom) // d_xdenom | p01_minus_p21 | p11_minus_p21 |
- // p00_minus_p20 | p10_minus_p20
- fxch %st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
- // p00_minus_p20 | d_xdenom
- fstps p10_minus_p20 // p01_minus_p21 | p11_minus_p21 |
- // p00_minus_p20 | d_xdenom
- fstps p01_minus_p21 // p11_minus_p21 | p00_minus_p20 | xstepdenominv
- fxch %st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21
-
-//// ceil () for light so positive steps are exaggerated, negative steps
-//// diminished, pushing us away from underflow toward overflow. Underflow is
-//// very visible, overflow is very unlikely, because of ambient lighting
-// t0 = r_p0[4] - r_p2[4];
-// t1 = r_p1[4] - r_p2[4];
-
- fildl C(r_p2)+16 // r_p2[4] | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fildl C(r_p0)+16 // r_p0[4] | r_p2[4] | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fildl C(r_p1)+16 // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fxch %st(2) // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fld %st(0) // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(2) // r_p2[4] | t0 | r_p1[4] | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
-
-// r_lstepx = (int)
-// ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
-// r_lstepy = (int)
-// ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
-
- fld %st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmul %st(5),%st(0) // t0*p11_minus_p21 | t0 | t1 | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fmul %st(5),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
- // t1*p01_minus_p21 | t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
- // t1*p00_minus_p20 | t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(2) // xstepdenominv |
- // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmuls float_minus_1 // ystepdenominv |
- // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv |
- // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fxch %st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv |
- // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fldcw C(ceil_cw)
- fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fldcw chop_cw
-
-// t0 = r_p0[2] - r_p2[2];
-// t1 = r_p1[2] - r_p2[2];
-
- fildl C(r_p2)+8 // r_p2[2] | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fildl C(r_p0)+8 // r_p0[2] | r_p2[2] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fildl C(r_p1)+8 // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(0) // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubrp %st(0),%st(2) // r_p2[2] | t0 | r_p1[2] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
-
-// r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
-// xstepdenominv);
-// r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
-// ystepdenominv);
-
- fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv
- fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
- // t1*p01_minus_p21 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
- // t1*p00_minus_p20 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv |
- // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv |
- // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fistpl C(r_sstepy) // r_sstepx | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fistpl C(r_sstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
-
-// t0 = r_p0[3] - r_p2[3];
-// t1 = r_p1[3] - r_p2[3];
-
- fildl C(r_p2)+12 // r_p2[3] | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fildl C(r_p0)+12 // r_p0[3] | r_p2[3] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fildl C(r_p1)+12 // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(0) // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubrp %st(0),%st(2) // r_p2[3] | t0 | r_p1[3] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
-
-// r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
-// xstepdenominv);
-// r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
-// ystepdenominv);
-
- fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
- // t1*p01_minus_p21 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
- // t1*p00_minus_p20 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv |
- // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv |
- // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fistpl C(r_tstepy) // r_tstepx | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fistpl C(r_tstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
-
-// t0 = r_p0[5] - r_p2[5];
-// t1 = r_p1[5] - r_p2[5];
-
- fildl C(r_p2)+20 // r_p2[5] | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fildl C(r_p0)+20 // r_p0[5] | r_p2[5] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fildl C(r_p1)+20 // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(0) // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubrp %st(0),%st(2) // r_p2[5] | t0 | r_p1[5] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
-
-// r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
-// xstepdenominv);
-// r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
-// ystepdenominv);
-
- fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fmulp %st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | t0*p11_minus_p21
- fxch %st(1) // t1 | t0 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | t0*p11_minus_p21
- fld %st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | t0*p11_minus_p21
- fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 |
- // t0*p11_minus_p21
- fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 |
- // t0*p11_minus_p21
- fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // t0*p11_minus_p21
- fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // t0*p11_minus_p21
- fmulp %st(0),%st(5) // t0*p10_minus_p20 | t1*p01_minus_p21 |
- // ystepdenominv | xstepdenominv |
- // t1*p00_minus_p20 | t0*p11_minus_p21
- fxch %st(5) // t0*p11_minus_p21 | t1*p01_minus_p21 |
- // ystepdenominv | xstepdenominv |
- // t1*p00_minus_p20 | t0*p10_minus_p20
- fsubrp %st(0),%st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv |
- // t1*p00_minus_p20 | t0*p10_minus_p20
- fxch %st(3) // t1*p00_minus_p20 | ystepdenominv |
- // xstepdenominv |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // t0*p10_minus_p20
- fsubp %st(0),%st(4) // ystepdenominv | xstepdenominv |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // t1*p00_minus_p20 - t0*p10_minus_p20
- fxch %st(1) // xstepdenominv | ystepdenominv |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // t1*p00_minus_p20 - t0*p10_minus_p20
- fmulp %st(0),%st(2) // ystepdenominv |
- // (t1*p01_minus_p21 - t0*p11_minus_p21) *
- // xstepdenominv |
- // t1*p00_minus_p20 - t0*p10_minus_p20
- fmulp %st(0),%st(2) // (t1*p01_minus_p21 - t0*p11_minus_p21) *
- // xstepdenominv |
- // (t1*p00_minus_p20 - t0*p10_minus_p20) *
- // ystepdenominv
- fistpl C(r_zistepx) // (t1*p00_minus_p20 - t0*p10_minus_p20) *
- // ystepdenominv
- fistpl C(r_zistepy)
-
-// a_sstepxfrac = r_sstepx << 16;
-// a_tstepxfrac = r_tstepx << 16;
-//
-// a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) +
-// (r_sstepx >> 16);
-
- movl C(r_sstepx),%eax
- movl C(r_tstepx),%edx
- shll $16,%eax
- shll $16,%edx
- movl %eax,C(a_sstepxfrac)
- movl %edx,C(a_tstepxfrac)
-
- movl C(r_sstepx),%ecx
- movl C(r_tstepx),%eax
- sarl $16,%ecx
- sarl $16,%eax
- imull skinwidth(%esp)
- addl %ecx,%eax
- movl %eax,C(a_ststepxwhole)
-
- ret
-
-#endif
-
-//----------------------------------------------------------------------
-// recursive subdivision affine triangle drawing code
-//
-// not C-callable because of stdcall return
-//----------------------------------------------------------------------
-
-#define lp1 4+16
-#define lp2 8+16
-#define lp3 12+16
-
-.globl C(D_PolysetRecursiveTriangle)
-C(D_PolysetRecursiveTriangle):
- pushl %ebp // preserve caller stack frame pointer
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
-
-// int *temp;
-// int d;
-// int new[6];
-// int i;
-// int z;
-// short *zbuf;
- movl lp2(%esp),%esi
- movl lp1(%esp),%ebx
- movl lp3(%esp),%edi
-
-// d = lp2[0] - lp1[0];
-// if (d < -1 || d > 1)
-// goto split;
- movl 0(%esi),%eax
-
- movl 0(%ebx),%edx
- movl 4(%esi),%ebp
-
- subl %edx,%eax
- movl 4(%ebx),%ecx
-
- subl %ecx,%ebp
- incl %eax
-
- cmpl $2,%eax
- ja LSplit
-
-// d = lp2[1] - lp1[1];
-// if (d < -1 || d > 1)
-// goto split;
- movl 0(%edi),%eax
- incl %ebp
-
- cmpl $2,%ebp
- ja LSplit
-
-// d = lp3[0] - lp2[0];
-// if (d < -1 || d > 1)
-// goto split2;
- movl 0(%esi),%edx
- movl 4(%edi),%ebp
-
- subl %edx,%eax
- movl 4(%esi),%ecx
-
- subl %ecx,%ebp
- incl %eax
-
- cmpl $2,%eax
- ja LSplit2
-
-// d = lp3[1] - lp2[1];
-// if (d < -1 || d > 1)
-// goto split2;
- movl 0(%ebx),%eax
- incl %ebp
-
- cmpl $2,%ebp
- ja LSplit2
-
-// d = lp1[0] - lp3[0];
-// if (d < -1 || d > 1)
-// goto split3;
- movl 0(%edi),%edx
- movl 4(%ebx),%ebp
-
- subl %edx,%eax
- movl 4(%edi),%ecx
-
- subl %ecx,%ebp
- incl %eax
-
- incl %ebp
- movl %ebx,%edx
-
- cmpl $2,%eax
- ja LSplit3
-
-// d = lp1[1] - lp3[1];
-// if (d < -1 || d > 1)
-// {
-//split3:
-// temp = lp1;
-// lp3 = lp2;
-// lp1 = lp3;
-// lp2 = temp;
-// goto split;
-// }
-//
-// return; // entire tri is filled
-//
- cmpl $2,%ebp
- jna LDone
-
-LSplit3:
- movl %edi,%ebx
- movl %esi,%edi
- movl %edx,%esi
- jmp LSplit
-
-//split2:
-LSplit2:
-
-// temp = lp1;
-// lp1 = lp2;
-// lp2 = lp3;
-// lp3 = temp;
- movl %ebx,%eax
- movl %esi,%ebx
- movl %edi,%esi
- movl %eax,%edi
-
-//split:
-LSplit:
-
- subl $24,%esp // allocate space for a new vertex
-
-//// split this edge
-// new[0] = (lp1[0] + lp2[0]) >> 1;
-// new[1] = (lp1[1] + lp2[1]) >> 1;
-// new[2] = (lp1[2] + lp2[2]) >> 1;
-// new[3] = (lp1[3] + lp2[3]) >> 1;
-// new[5] = (lp1[5] + lp2[5]) >> 1;
- movl 8(%ebx),%eax
-
- movl 8(%esi),%edx
- movl 12(%ebx),%ecx
-
- addl %edx,%eax
- movl 12(%esi),%edx
-
- sarl $1,%eax
- addl %edx,%ecx
-
- movl %eax,8(%esp)
- movl 20(%ebx),%eax
-
- sarl $1,%ecx
- movl 20(%esi),%edx
-
- movl %ecx,12(%esp)
- addl %edx,%eax
-
- movl 0(%ebx),%ecx
- movl 0(%esi),%edx
-
- sarl $1,%eax
- addl %ecx,%edx
-
- movl %eax,20(%esp)
- movl 4(%ebx),%eax
-
- sarl $1,%edx
- movl 4(%esi),%ebp
-
- movl %edx,0(%esp)
- addl %eax,%ebp
-
- sarl $1,%ebp
- movl %ebp,4(%esp)
-
-//// draw the point if splitting a leading edge
-// if (lp2[1] > lp1[1])
-// goto nodraw;
- cmpl %eax,4(%esi)
- jg LNoDraw
-
-// if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0]))
-// goto nodraw;
- movl 0(%esi),%edx
- jnz LDraw
-
- cmpl %ecx,%edx
- jl LNoDraw
-
-LDraw:
-
-// z = new[5] >> 16;
- movl 20(%esp),%edx
- movl 4(%esp),%ecx
-
- sarl $16,%edx
- movl 0(%esp),%ebp
-
-// zbuf = zspantable[new[1]] + new[0];
- movl C(zspantable)(,%ecx,4),%eax
-
-// if (z >= *zbuf)
-// {
- cmpw (%eax,%ebp,2),%dx
- jnge LNoDraw
-
-// int pix;
-//
-// *zbuf = z;
- movw %dx,(%eax,%ebp,2)
-
-// pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]];
- movl 12(%esp),%eax
-
- sarl $16,%eax
- movl 8(%esp),%edx
-
- sarl $16,%edx
- subl %ecx,%ecx
-
- movl C(skintable)(,%eax,4),%eax
- movl 4(%esp),%ebp
-
- movb (%eax,%edx,),%cl
- movl C(d_pcolormap),%edx
-
- movb (%edx,%ecx,),%dl
- movl 0(%esp),%ecx
-
-// d_viewbuffer[d_scantable[new[1]] + new[0]] = pix;
- movl C(d_scantable)(,%ebp,4),%eax
- addl %eax,%ecx
- movl C(d_viewbuffer),%eax
- movb %dl,(%eax,%ecx,1)
-
-// }
-//
-//nodraw:
-LNoDraw:
-
-//// recursively continue
-// D_PolysetRecursiveTriangle (lp3, lp1, new);
- pushl %esp
- pushl %ebx
- pushl %edi
- call C(D_PolysetRecursiveTriangle)
-
-// D_PolysetRecursiveTriangle (lp3, new, lp2);
- movl %esp,%ebx
- pushl %esi
- pushl %ebx
- pushl %edi
- call C(D_PolysetRecursiveTriangle)
- addl $24,%esp
-
-LDone:
- popl %ebx // restore register variables
- popl %edi
- popl %esi
- popl %ebp // restore caller stack frame pointer
- ret $12
-
-
-//----------------------------------------------------------------------
-// 8-bpp horizontal span drawing code for affine polygons, with smooth
-// shading and no transparency
-//----------------------------------------------------------------------
-
-#define pspans 4+8
-
-.globl C(D_PolysetAff8Start)
-C(D_PolysetAff8Start):
-
-.globl C(R_PolysetDrawSpans8_Opaque)
-C(R_PolysetDrawSpans8_Opaque):
- pushl %esi // preserve register variables
- pushl %ebx
-
- movl pspans(%esp),%esi // point to the first span descriptor
- movl C(r_zistepx),%ecx
-
- pushl %ebp // preserve caller's stack frame
- pushl %edi
-
- rorl $16,%ecx // put high 16 bits of 1/z step in low word
- movl spanpackage_t_count(%esi),%edx
-
- movl %ecx,lzistepx
-
-LSpanLoop:
-
-// lcount = d_aspancount - pspanpackage->count;
-//
-// errorterm += erroradjustup;
-// if (errorterm >= 0)
-// {
-// d_aspancount += d_countextrastep;
-// errorterm -= erroradjustdown;
-// }
-// else
-// {
-// d_aspancount += ubasestep;
-// }
- movl C(d_aspancount),%eax
- subl %edx,%eax
-
- movl C(erroradjustup),%edx
- movl C(errorterm),%ebx
- addl %edx,%ebx
- js LNoTurnover
-
- movl C(erroradjustdown),%edx
- movl C(d_countextrastep),%edi
- subl %edx,%ebx
- movl C(d_aspancount),%ebp
- movl %ebx,C(errorterm)
- addl %edi,%ebp
- movl %ebp,C(d_aspancount)
- jmp LRightEdgeStepped
-
-LNoTurnover:
- movl C(d_aspancount),%edi
- movl C(ubasestep),%edx
- movl %ebx,C(errorterm)
- addl %edx,%edi
- movl %edi,C(d_aspancount)
-
-LRightEdgeStepped:
- cmpl $1,%eax
-
- jl LNextSpan
- jz LExactlyOneLong
-
-//
-// set up advancetable
-//
- movl C(a_ststepxwhole),%ecx
- movl C(r_affinetridesc)+atd_skinwidth,%edx
-
- movl %ecx,advancetable+4 // advance base in t
- addl %edx,%ecx
-
- movl %ecx,advancetable // advance extra in t
- movl C(a_tstepxfrac),%ecx
-
- movw C(r_lstepx),%cx
- movl %eax,%edx // count
-
- movl %ecx,tstep
- addl $7,%edx
-
- shrl $3,%edx // count of full and partial loops
- movl spanpackage_t_sfrac(%esi),%ebx
-
- movw %dx,%bx
- movl spanpackage_t_pz(%esi),%ecx
-
- negl %eax
-
- movl spanpackage_t_pdest(%esi),%edi
- andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1
-
- subl %eax,%edi // compensate for hardwired offsets
- subl %eax,%ecx
-
- subl %eax,%ecx
- movl spanpackage_t_tfrac(%esi),%edx
-
- movw spanpackage_t_light(%esi),%dx
- movl spanpackage_t_zi(%esi),%ebp
-
- rorl $16,%ebp // put high 16 bits of 1/z in low word
- pushl %esi
-
- movl spanpackage_t_ptex(%esi),%esi
- jmp *aff8entryvec_table(,%eax,4)
-
-// %bx = count of full and partial loops
-// %ebx high word = sfrac
-// %ecx = pz
-// %dx = light
-// %edx high word = tfrac
-// %esi = ptex
-// %edi = pdest
-// %ebp = 1/z
-// tstep low word = C(r_lstepx)
-// tstep high word = C(a_tstepxfrac)
-// C(a_sstepxfrac) low word = 0
-// C(a_sstepxfrac) high word = C(a_sstepxfrac)
-
-LDrawLoop:
-
-// FIXME: do we need to clamp light? We may need at least a buffer bit to
-// keep it from poking into tfrac and causing problems
-
-LDraw8:
- cmpw (%ecx),%bp
- jl Lp1
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,(%ecx)
- movb 0x12345678(%eax),%al
-LPatch8:
- movb %al,(%edi)
-Lp1:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-LDraw7:
- cmpw 2(%ecx),%bp
- jl Lp2
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,2(%ecx)
- movb 0x12345678(%eax),%al
-LPatch7:
- movb %al,1(%edi)
-Lp2:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-LDraw6:
- cmpw 4(%ecx),%bp
- jl Lp3
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,4(%ecx)
- movb 0x12345678(%eax),%al
-LPatch6:
- movb %al,2(%edi)
-Lp3:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-LDraw5:
- cmpw 6(%ecx),%bp
- jl Lp4
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,6(%ecx)
- movb 0x12345678(%eax),%al
-LPatch5:
- movb %al,3(%edi)
-Lp4:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-LDraw4:
- cmpw 8(%ecx),%bp
- jl Lp5
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,8(%ecx)
- movb 0x12345678(%eax),%al
-LPatch4:
- movb %al,4(%edi)
-Lp5:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-LDraw3:
- cmpw 10(%ecx),%bp
- jl Lp6
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,10(%ecx)
- movb 0x12345678(%eax),%al
-LPatch3:
- movb %al,5(%edi)
-Lp6:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-LDraw2:
- cmpw 12(%ecx),%bp
- jl Lp7
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,12(%ecx)
- movb 0x12345678(%eax),%al
-LPatch2:
- movb %al,6(%edi)
-Lp7:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-LDraw1:
- cmpw 14(%ecx),%bp
- jl Lp8
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,14(%ecx)
- movb 0x12345678(%eax),%al
-LPatch1:
- movb %al,7(%edi)
-Lp8:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
- addl $8,%edi
- addl $16,%ecx
-
- decw %bx
- jnz LDrawLoop
-
- popl %esi // restore spans pointer
-LNextSpan:
- addl $(spanpackage_t_size),%esi // point to next span
-LNextSpanESISet:
- movl spanpackage_t_count(%esi),%edx
- cmpl $-999999,%edx // any more spans?
- jnz LSpanLoop // yes
-
- popl %edi
- popl %ebp // restore the caller's stack frame
- popl %ebx // restore register variables
- popl %esi
- ret
-
-
-// draw a one-long span
-
-LExactlyOneLong:
-
- movl spanpackage_t_pz(%esi),%ecx
- movl spanpackage_t_zi(%esi),%ebp
-
- rorl $16,%ebp // put high 16 bits of 1/z in low word
- movl spanpackage_t_ptex(%esi),%ebx
-
- cmpw (%ecx),%bp
- jl LNextSpan
- xorl %eax,%eax
- movl spanpackage_t_pdest(%esi),%edi
- movb spanpackage_t_light+1(%esi),%ah
- addl $(spanpackage_t_size),%esi // point to next span
- movb (%ebx),%al
- movw %bp,(%ecx)
- movb 0x12345678(%eax),%al
-LPatch9:
- movb %al,(%edi)
-
- jmp LNextSpanESISet
-
-.globl C(D_PolysetAff8End)
-C(D_PolysetAff8End):
-
-
-.globl C(D_Aff8Patch)
-C(D_Aff8Patch):
- movl C(d_pcolormap),%eax
- movl %eax,LPatch1-4
- movl %eax,LPatch2-4
- movl %eax,LPatch3-4
- movl %eax,LPatch4-4
- movl %eax,LPatch5-4
- movl %eax,LPatch6-4
- movl %eax,LPatch7-4
- movl %eax,LPatch8-4
- movl %eax,LPatch9-4
-
- ret
-
-//----------------------------------------------------------------------
-// Alias model triangle left-edge scanning code
-//----------------------------------------------------------------------
-
-#define height 4+16
-
-.globl C(R_PolysetScanLeftEdge)
-C(R_PolysetScanLeftEdge):
- pushl %ebp // preserve caller stack frame pointer
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
-
- movl height(%esp),%eax
- movl C(d_sfrac),%ecx
- andl $0xFFFF,%eax
- movl C(d_ptex),%ebx
- orl %eax,%ecx
- movl C(d_pedgespanpackage),%esi
- movl C(d_tfrac),%edx
- movl C(d_light),%edi
- movl C(d_zi),%ebp
-
-// %eax: scratch
-// %ebx: d_ptex
-// %ecx: d_sfrac in high word, count in low word
-// %edx: d_tfrac
-// %esi: d_pedgespanpackage, errorterm, scratch alternately
-// %edi: d_light
-// %ebp: d_zi
-
-// do
-// {
-
-LScanLoop:
-
-// d_pedgespanpackage->ptex = ptex;
-// d_pedgespanpackage->pdest = d_pdest;
-// d_pedgespanpackage->pz = d_pz;
-// d_pedgespanpackage->count = d_aspancount;
-// d_pedgespanpackage->light = d_light;
-// d_pedgespanpackage->zi = d_zi;
-// d_pedgespanpackage->sfrac = d_sfrac << 16;
-// d_pedgespanpackage->tfrac = d_tfrac << 16;
- movl %ebx,spanpackage_t_ptex(%esi)
- movl C(d_pdest),%eax
- movl %eax,spanpackage_t_pdest(%esi)
- movl C(d_pz),%eax
- movl %eax,spanpackage_t_pz(%esi)
- movl C(d_aspancount),%eax
- movl %eax,spanpackage_t_count(%esi)
- movl %edi,spanpackage_t_light(%esi)
- movl %ebp,spanpackage_t_zi(%esi)
- movl %ecx,spanpackage_t_sfrac(%esi)
- movl %edx,spanpackage_t_tfrac(%esi)
-
-// pretouch the next cache line
- movb spanpackage_t_size(%esi),%al
-
-// d_pedgespanpackage++;
- addl $(spanpackage_t_size),%esi
- movl C(erroradjustup),%eax
- movl %esi,C(d_pedgespanpackage)
-
-// errorterm += erroradjustup;
- movl C(errorterm),%esi
- addl %eax,%esi
- movl C(d_pdest),%eax
-
-// if (errorterm >= 0)
-// {
- js LNoLeftEdgeTurnover
-
-// errorterm -= erroradjustdown;
-// d_pdest += d_pdestextrastep;
- subl C(erroradjustdown),%esi
- addl C(d_pdestextrastep),%eax
- movl %esi,C(errorterm)
- movl %eax,C(d_pdest)
-
-// d_pz += d_pzextrastep;
-// d_aspancount += d_countextrastep;
-// d_ptex += d_ptexextrastep;
-// d_sfrac += d_sfracextrastep;
-// d_ptex += d_sfrac >> 16;
-// d_sfrac &= 0xFFFF;
-// d_tfrac += d_tfracextrastep;
- movl C(d_pz),%eax
- movl C(d_aspancount),%esi
- addl C(d_pzextrastep),%eax
- addl C(d_sfracextrastep),%ecx
- adcl C(d_ptexextrastep),%ebx
- addl C(d_countextrastep),%esi
- movl %eax,C(d_pz)
- movl C(d_tfracextrastep),%eax
- movl %esi,C(d_aspancount)
- addl %eax,%edx
-
-// if (d_tfrac & 0x10000)
-// {
- jnc LSkip1
-
-// d_ptex += r_affinetridesc.skinwidth;
-// d_tfrac &= 0xFFFF;
- addl C(r_affinetridesc)+atd_skinwidth,%ebx
-
-// }
-
-LSkip1:
-
-// d_light += d_lightextrastep;
-// d_zi += d_ziextrastep;
- addl C(d_lightextrastep),%edi
- addl C(d_ziextrastep),%ebp
-
-// }
- movl C(d_pedgespanpackage),%esi
- decl %ecx
- testl $0xFFFF,%ecx
- jnz LScanLoop
-
- popl %ebx
- popl %edi
- popl %esi
- popl %ebp
- ret
-
-// else
-// {
-
-LNoLeftEdgeTurnover:
- movl %esi,C(errorterm)
-
-// d_pdest += d_pdestbasestep;
- addl C(d_pdestbasestep),%eax
- movl %eax,C(d_pdest)
-
-// d_pz += d_pzbasestep;
-// d_aspancount += ubasestep;
-// d_ptex += d_ptexbasestep;
-// d_sfrac += d_sfracbasestep;
-// d_ptex += d_sfrac >> 16;
-// d_sfrac &= 0xFFFF;
- movl C(d_pz),%eax
- movl C(d_aspancount),%esi
- addl C(d_pzbasestep),%eax
- addl C(d_sfracbasestep),%ecx
- adcl C(d_ptexbasestep),%ebx
- addl C(ubasestep),%esi
- movl %eax,C(d_pz)
- movl %esi,C(d_aspancount)
-
-// d_tfrac += d_tfracbasestep;
- movl C(d_tfracbasestep),%esi
- addl %esi,%edx
-
-// if (d_tfrac & 0x10000)
-// {
- jnc LSkip2
-
-// d_ptex += r_affinetridesc.skinwidth;
-// d_tfrac &= 0xFFFF;
- addl C(r_affinetridesc)+atd_skinwidth,%ebx
-
-// }
-
-LSkip2:
-
-// d_light += d_lightbasestep;
-// d_zi += d_zibasestep;
- addl C(d_lightbasestep),%edi
- addl C(d_zibasestep),%ebp
-
-// }
-// } while (--height);
- movl C(d_pedgespanpackage),%esi
- decl %ecx
- testl $0xFFFF,%ecx
- jnz LScanLoop
-
- popl %ebx
- popl %edi
- popl %esi
- popl %ebp
- ret
-
-#endif // USE_ASM
-
diff --git a/src/refresh/sw/x86/protect.c b/src/refresh/sw/x86/protect.c
deleted file mode 100644
index 969f2be..0000000
--- a/src/refresh/sw/x86/protect.c
+++ /dev/null
@@ -1,31 +0,0 @@
-#include "shared/shared.h"
-
-#ifdef _WIN32
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#else
-#include <unistd.h>
-#include <sys/mman.h>
-#endif
-
-/*
-================
-Sys_MakeCodeWriteable
-================
-*/
-void Sys_MakeCodeWriteable(uintptr_t start, size_t length)
-{
-#ifdef _WIN32
- DWORD unused;
-
- if (!VirtualProtect((LPVOID)start, length, PAGE_EXECUTE_READWRITE, &unused))
- Com_Error(ERR_FATAL, "Protection change failed");
-#else
- int psize = getpagesize();
- uintptr_t addr = (start & ~(psize - 1)) - psize;
-
- if (mprotect((void *)addr, length + start - addr + psize, PROT_READ | PROT_WRITE | PROT_EXEC))
- Com_Error(ERR_FATAL, "Protection change failed");
-#endif
-}
-
diff --git a/src/refresh/sw/x86/span16.S b/src/refresh/sw/x86/span16.S
deleted file mode 100644
index 480ce60..0000000
--- a/src/refresh/sw/x86/span16.S
+++ /dev/null
@@ -1,1227 +0,0 @@
-//
-// d_draw16.s
-// x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel
-// subdivision.
-//
-
-#include "common/x86/asm.h"
-#include "sw.h"
-
-#if USE_ASM
-
-//----------------------------------------------------------------------
-// 8-bpp horizontal span drawing code for polygons, with no transparency and
-// 16-pixel subdivision.
-//
-// Assumes there is at least one span in pspans, and that every span
-// contains at least one pixel
-//----------------------------------------------------------------------
-
- .data
-
- .text
-
-// out-of-line, rarely-needed clamping code
-
-LClampHigh0:
- movl C(bbextents),%esi
- jmp LClampReentry0
-LClampHighOrLow0:
- jg LClampHigh0
- xorl %esi,%esi
- jmp LClampReentry0
-
-LClampHigh1:
- movl C(bbextentt),%edx
- jmp LClampReentry1
-LClampHighOrLow1:
- jg LClampHigh1
- xorl %edx,%edx
- jmp LClampReentry1
-
-LClampLow2:
- movl $4096,%ebp
- jmp LClampReentry2
-LClampHigh2:
- movl C(bbextents),%ebp
- jmp LClampReentry2
-
-LClampLow3:
- movl $4096,%ecx
- jmp LClampReentry3
-LClampHigh3:
- movl C(bbextentt),%ecx
- jmp LClampReentry3
-
-LClampLow4:
- movl $4096,%eax
- jmp LClampReentry4
-LClampHigh4:
- movl C(bbextents),%eax
- jmp LClampReentry4
-
-LClampLow5:
- movl $4096,%ebx
- jmp LClampReentry5
-LClampHigh5:
- movl C(bbextentt),%ebx
- jmp LClampReentry5
-
-
-#define pspans 4+16
-
- .align 4
-.globl C(D_DrawSpans16)
-C(D_DrawSpans16):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
-//
-// set up scaled-by-16 steps, for 16-long segments; also set up cacheblock
-// and span list pointers
-//
-// TODO: any overlap from rearranging?
- flds C(d_sdivzstepu)
- fmuls fp_16
- movl C(cacheblock),%edx
- flds C(d_tdivzstepu)
- fmuls fp_16
- movl pspans(%esp),%ebx // point to the first span descriptor
- flds C(d_zistepu)
- fmuls fp_16
- movl %edx,pbase // pbase = cacheblock
- fstps zi16stepu
- fstps tdivz16stepu
- fstps sdivz16stepu
-
-LSpanLoop:
-//
-// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
-// initial s and t values
-//
-// FIXME: pipeline FILD?
- fildl espan_t_v(%ebx)
- fildl espan_t_u(%ebx)
-
- fld %st(1) // dv | du | dv
- fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
- fld %st(1) // du | dv*d_sdivzstepv | du | dv
- fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
- fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
- fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
- // dv*d_sdivzstepv | du | dv
- fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
- // dv*d_sdivzstepv | du | dv
- faddp %st(0),%st(2) // du*d_tdivzstepu |
- // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
- fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
- // du*d_tdivzstepu | du | dv
- fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
- // du*d_tdivzstepu | du | dv
- fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
- // du*d_sdivzstepu + dv*d_sdivzstepv |
- // du*d_tdivzstepu | du | dv
- fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
- // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
- fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
- // du*d_sdivzstepu; stays in %st(2) at end
- fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
- // s/z
- fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
- // du*d_tdivzstepu | du | s/z
- fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
- // du*d_tdivzstepu | du | s/z
- faddp %st(0),%st(2) // dv*d_zistepv |
- // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
- fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
- // dv*d_zistepv | s/z
- fmuls C(d_zistepu) // du*d_zistepu |
- // dv*d_tdivzstepv + du*d_tdivzstepu |
- // dv*d_zistepv | s/z
- fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
- // du*d_zistepu | dv*d_zistepv | s/z
- fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
- // du*d_tdivzstepu; stays in %st(1) at end
- fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
- faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
-
- flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
- fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
- fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
- // du*d_zistepu; stays in %st(0) at end
- // 1/z | fp_64k | t/z | s/z
-//
-// calculate and clamp s & t
-//
- fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z
-
-//
-// point %edi to the first pixel in the span
-//
- movl C(d_viewbuffer),%ecx
- movl espan_t_v(%ebx),%eax
- movl %ebx,pspantemp // preserve spans pointer
-
- movl C(tadjust),%edx
- movl C(sadjust),%esi
- movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
- addl %ecx,%edi
- movl espan_t_u(%ebx),%ecx
- addl %ecx,%edi // pdest = &pdestspan[scans->u];
- movl espan_t_count(%ebx),%ecx
-
-//
-// now start the FDIV for the end of the span
-//
- cmpl $16,%ecx
- ja LSetupNotLast1
-
- decl %ecx
- jz LCleanup1 // if only one pixel, no need to start an FDIV
- movl %ecx,spancountminus1
-
-// finish up the s and t calcs
- fxch %st(1) // z*64k | 1/z | t/z | s/z
-
- fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
- fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
- fxch %st(1) // z*64k | s | 1/z | t/z | s/z
- fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
- fxch %st(1) // s | t | 1/z | t/z | s/z
- fistpl s // 1/z | t | t/z | s/z
- fistpl t // 1/z | t/z | s/z
-
- fildl spancountminus1
-
- flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1
- flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
- fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
- fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
- fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
- fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
- fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
- // C(d_tdivzstepu)*scm1
- fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
- // C(d_tdivzstepu)*scm1
- faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
- fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
- faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
- faddp %st(0),%st(3)
-
- flds fp_64k
- fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
- // overlap
- jmp LFDIVInFlight1
-
-LCleanup1:
-// finish up the s and t calcs
- fxch %st(1) // z*64k | 1/z | t/z | s/z
-
- fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
- fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
- fxch %st(1) // z*64k | s | 1/z | t/z | s/z
- fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
- fxch %st(1) // s | t | 1/z | t/z | s/z
- fistpl s // 1/z | t | t/z | s/z
- fistpl t // 1/z | t/z | s/z
- jmp LFDIVInFlight1
-
- .align 4
-LSetupNotLast1:
-// finish up the s and t calcs
- fxch %st(1) // z*64k | 1/z | t/z | s/z
-
- fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
- fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
- fxch %st(1) // z*64k | s | 1/z | t/z | s/z
- fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
- fxch %st(1) // s | t | 1/z | t/z | s/z
- fistpl s // 1/z | t | t/z | s/z
- fistpl t // 1/z | t/z | s/z
-
- fadds zi16stepu
- fxch %st(2)
- fadds sdivz16stepu
- fxch %st(2)
- flds tdivz16stepu
- faddp %st(0),%st(2)
- flds fp_64k
- fdiv %st(1),%st(0) // z = 1/1/z
- // this is what we've gone to all this trouble to
- // overlap
-LFDIVInFlight1:
-
- addl s,%esi
- addl t,%edx
- movl C(bbextents),%ebx
- movl C(bbextentt),%ebp
- cmpl %ebx,%esi
- ja LClampHighOrLow0
-LClampReentry0:
- movl %esi,s
- movl pbase,%ebx
- shll $16,%esi
- cmpl %ebp,%edx
- movl %esi,sfracf
- ja LClampHighOrLow1
-LClampReentry1:
- movl %edx,t
- movl s,%esi // sfrac = scans->sfrac;
- shll $16,%edx
- movl t,%eax // tfrac = scans->tfrac;
- sarl $16,%esi
- movl %edx,tfracf
-
-//
-// calculate the texture starting address
-//
- sarl $16,%eax
- movl C(cachewidth),%edx
- imull %edx,%eax // (tfrac >> 16) * cachewidth
- addl %ebx,%esi
- addl %eax,%esi // psource = pbase + (sfrac >> 16) +
- // ((tfrac >> 16) * cachewidth);
-//
-// determine whether last span or not
-//
- cmpl $16,%ecx
- jna LLastSegment
-
-//
-// not the last segment; do full 16-wide segment
-//
-LNotLastSegment:
-
-//
-// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
-// get there
-//
-
-// pick up after the FDIV that was left in flight previously
-
- fld %st(0) // duplicate it
- fmul %st(4),%st(0) // s = s/z * z
- fxch %st(1)
- fmul %st(3),%st(0) // t = t/z * z
- fxch %st(1)
- fistpl snext
- fistpl tnext
- movl snext,%eax
- movl tnext,%edx
-
- movb (%esi),%bl // get first source texel
- subl $16,%ecx // count off this segments' pixels
- movl C(sadjust),%ebp
- movl %ecx,counttemp // remember count of remaining pixels
-
- movl C(tadjust),%ecx
- movb %bl,(%edi) // store first dest pixel
-
- addl %eax,%ebp
- addl %edx,%ecx
-
- movl C(bbextents),%eax
- movl C(bbextentt),%edx
-
- cmpl $4096,%ebp
- jl LClampLow2
- cmpl %eax,%ebp
- ja LClampHigh2
-LClampReentry2:
-
- cmpl $4096,%ecx
- jl LClampLow3
- cmpl %edx,%ecx
- ja LClampHigh3
-LClampReentry3:
-
- movl %ebp,snext
- movl %ecx,tnext
-
- subl s,%ebp
- subl t,%ecx
-
-//
-// set up advancetable
-//
- movl %ecx,%eax
- movl %ebp,%edx
- sarl $20,%eax // tstep >>= 16;
- jz LZero
- sarl $20,%edx // sstep >>= 16;
- movl C(cachewidth),%ebx
- imull %ebx,%eax
- jmp LSetUp1
-
-LZero:
- sarl $20,%edx // sstep >>= 16;
- movl C(cachewidth),%ebx
-
-LSetUp1:
-
- addl %edx,%eax // add in sstep
- // (tstep >> 16) * cachewidth + (sstep >> 16);
- movl tfracf,%edx
- movl %eax,advancetable+4 // advance base in t
- addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
- // (sstep >> 16);
- shll $12,%ebp // left-justify sstep fractional part
- movl sfracf,%ebx
- shll $12,%ecx // left-justify tstep fractional part
- movl %eax,advancetable // advance extra in t
-
- movl %ecx,tstep
- addl %ecx,%edx // advance tfrac fractional part by tstep frac
-
- sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none)
- addl %ebp,%ebx // advance sfrac fractional part by sstep frac
- adcl advancetable+4(,%ecx,4),%esi // point to next source texel
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb (%esi),%al
- addl %ebp,%ebx
- movb %al,1(%edi)
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,2(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,3(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,4(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,5(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,6(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,7(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
-
-//
-// start FDIV for end of next segment in flight, so it can overlap
-//
- movl counttemp,%ecx
- cmpl $16,%ecx // more than one segment after this?
- ja LSetupNotLast2 // yes
-
- decl %ecx
- jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
- movl %ecx,spancountminus1
- fildl spancountminus1
-
- flds C(d_zistepu) // C(d_zistepu) | spancountminus1
- fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1
- flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
- fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
- fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
- faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1
- fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1
- fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
- fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
- faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
- flds fp_64k // 64k | C(d_sdivzstepu)*scm1
- fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k
- faddp %st(0),%st(4) // 64k
-
- fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
- // overlap
- jmp LFDIVInFlight2
-
- .align 4
-LSetupNotLast2:
- fadds zi16stepu
- fxch %st(2)
- fadds sdivz16stepu
- fxch %st(2)
- flds tdivz16stepu
- faddp %st(0),%st(2)
- flds fp_64k
- fdiv %st(1),%st(0) // z = 1/1/z
- // this is what we've gone to all this trouble to
- // overlap
-LFDIVInFlight2:
- movl %ecx,counttemp
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,8(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,9(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,10(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,11(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,12(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,13(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,14(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl $16,%edi
- movl %edx,tfracf
- movl snext,%edx
- movl %ebx,sfracf
- movl tnext,%ebx
- movl %edx,s
- movl %ebx,t
-
- movl counttemp,%ecx // retrieve count
-
-//
-// determine whether last span or not
-//
- cmpl $16,%ecx // are there multiple segments remaining?
- movb %al,-1(%edi)
- ja LNotLastSegment // yes
-
-//
-// last segment of scan
-//
-LLastSegment:
-
-//
-// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
-// get there. The number of pixels left is variable, and we want to land on the
-// last pixel, not step one past it, so we can't run into arithmetic problems
-//
- testl %ecx,%ecx
- jz LNoSteps // just draw the last pixel and we're done
-
-// pick up after the FDIV that was left in flight previously
-
-
- fld %st(0) // duplicate it
- fmul %st(4),%st(0) // s = s/z * z
- fxch %st(1)
- fmul %st(3),%st(0) // t = t/z * z
- fxch %st(1)
- fistpl snext
- fistpl tnext
-
- movb (%esi),%al // load first texel in segment
- movl C(tadjust),%ebx
- movb %al,(%edi) // store first pixel in segment
- movl C(sadjust),%eax
-
- addl snext,%eax
- addl tnext,%ebx
-
- movl C(bbextents),%ebp
- movl C(bbextentt),%edx
-
- cmpl $4096,%eax
- jl LClampLow4
- cmpl %ebp,%eax
- ja LClampHigh4
-LClampReentry4:
- movl %eax,snext
-
- cmpl $4096,%ebx
- jl LClampLow5
- cmpl %edx,%ebx
- ja LClampHigh5
-LClampReentry5:
-
- cmpl $1,%ecx // don't bother
- je LOnlyOneStep // if two pixels in segment, there's only one step,
- // of the segment length
- subl s,%eax
- subl t,%ebx
-
- addl %eax,%eax // convert to 15.17 format so multiply by 1.31
- addl %ebx,%ebx // reciprocal yields 16.48
-
- imull reciprocal_table_16-8(,%ecx,4) // sstep = (snext - s) /
- // (spancount-1)
- movl %edx,%ebp
-
- movl %ebx,%eax
- imull reciprocal_table_16-8(,%ecx,4) // tstep = (tnext - t) /
- // (spancount-1)
-LSetEntryvec:
-//
-// set up advancetable
-//
- movl entryvec_table_16(,%ecx,4),%ebx
- movl %edx,%eax
- movl %ebx,jumptemp // entry point into code for RET later
- movl %ebp,%ecx
- sarl $16,%edx // tstep >>= 16;
- movl C(cachewidth),%ebx
- sarl $16,%ecx // sstep >>= 16;
- imull %ebx,%edx
-
- addl %ecx,%edx // add in sstep
- // (tstep >> 16) * cachewidth + (sstep >> 16);
- movl tfracf,%ecx
- movl %edx,advancetable+4 // advance base in t
- addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
- // (sstep >> 16);
- shll $16,%ebp // left-justify sstep fractional part
- movl sfracf,%ebx
- shll $16,%eax // left-justify tstep fractional part
- movl %edx,advancetable // advance extra in t
-
- movl %eax,tstep
- movl %ecx,%edx
- addl %eax,%edx
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
-
- jmp *jumptemp // jump to the number-of-pixels handler
-
-//----------------------------------------
-
-LNoSteps:
- movb (%esi),%al // load first texel in segment
- subl $15,%edi // adjust for hardwired offset
- jmp LEndSpan
-
-
-LOnlyOneStep:
- subl s,%eax
- subl t,%ebx
- movl %eax,%ebp
- movl %ebx,%edx
- jmp LSetEntryvec
-
-//----------------------------------------
-
-.globl Entry2_16, Entry3_16, Entry4_16, Entry5_16
-.globl Entry6_16, Entry7_16, Entry8_16, Entry9_16
-.globl Entry10_16, Entry11_16, Entry12_16, Entry13_16
-.globl Entry14_16, Entry15_16, Entry16_16
-
-Entry2_16:
- subl $14,%edi // adjust for hardwired offsets
- movb (%esi),%al
- jmp LEntry2_16
-
-//----------------------------------------
-
-Entry3_16:
- subl $13,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- jmp LEntry3_16
-
-//----------------------------------------
-
-Entry4_16:
- subl $12,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry4_16
-
-//----------------------------------------
-
-Entry5_16:
- subl $11,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry5_16
-
-//----------------------------------------
-
-Entry6_16:
- subl $10,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry6_16
-
-//----------------------------------------
-
-Entry7_16:
- subl $9,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry7_16
-
-//----------------------------------------
-
-Entry8_16:
- subl $8,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry8_16
-
-//----------------------------------------
-
-Entry9_16:
- subl $7,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry9_16
-
-//----------------------------------------
-
-Entry10_16:
- subl $6,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry10_16
-
-//----------------------------------------
-
-Entry11_16:
- subl $5,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry11_16
-
-//----------------------------------------
-
-Entry12_16:
- subl $4,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry12_16
-
-//----------------------------------------
-
-Entry13_16:
- subl $3,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry13_16
-
-//----------------------------------------
-
-Entry14_16:
- subl $2,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry14_16
-
-//----------------------------------------
-
-Entry15_16:
- decl %edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry15_16
-
-//----------------------------------------
-
-Entry16_16:
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,1(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry15_16:
- sbbl %ecx,%ecx
- movb %al,2(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry14_16:
- sbbl %ecx,%ecx
- movb %al,3(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry13_16:
- sbbl %ecx,%ecx
- movb %al,4(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry12_16:
- sbbl %ecx,%ecx
- movb %al,5(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry11_16:
- sbbl %ecx,%ecx
- movb %al,6(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry10_16:
- sbbl %ecx,%ecx
- movb %al,7(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry9_16:
- sbbl %ecx,%ecx
- movb %al,8(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry8_16:
- sbbl %ecx,%ecx
- movb %al,9(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry7_16:
- sbbl %ecx,%ecx
- movb %al,10(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry6_16:
- sbbl %ecx,%ecx
- movb %al,11(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry5_16:
- sbbl %ecx,%ecx
- movb %al,12(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry4_16:
- sbbl %ecx,%ecx
- movb %al,13(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-LEntry3_16:
- movb %al,14(%edi)
- movb (%esi),%al
-LEntry2_16:
-
-LEndSpan:
-
-//
-// clear s/z, t/z, 1/z from FP stack
-//
- fstp %st(0)
- fstp %st(0)
- fstp %st(0)
-
- movl pspantemp,%ebx // restore spans pointer
- movl espan_t_pnext(%ebx),%ebx // point to next span
- testl %ebx,%ebx // any more spans?
- movb %al,15(%edi)
- jnz LSpanLoop // more spans
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-//----------------------------------------------------------------------
-// 8-bpp horizontal span z drawing codefor polygons, with no transparency.
-//
-// Assumes there is at least one span in pzspans, and that every span
-// contains at least one pixel
-//----------------------------------------------------------------------
-
- .text
-
-// z-clamp on a non-negative gradient span
-LClamp:
- movl $0x40000000,%edx
- xorl %ebx,%ebx
- fstp %st(0)
- jmp LZDraw
-
-// z-clamp on a negative gradient span
-LClampNeg:
- movl $0x40000000,%edx
- xorl %ebx,%ebx
- fstp %st(0)
- jmp LZDrawNeg
-
-
-#define pzspans 4+16
-
-.globl C(D_DrawZSpans)
-C(D_DrawZSpans):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
- flds C(d_zistepu)
- movl C(d_zistepu),%eax
- movl pzspans(%esp),%esi
- testl %eax,%eax
- jz LFNegSpan
-
- fmuls Float2ToThe31nd
- fistpl izistep // note: we are relying on FP exceptions being turned
- // off here to avoid range problems
- movl izistep,%ebx // remains loaded for all spans
-
-LFSpanLoop:
-// set up the initial 1/z value
- fildl espan_t_v(%esi)
- fildl espan_t_u(%esi)
- movl espan_t_v(%esi),%ecx
- movl C(d_pzbuffer),%edi
- fmuls C(d_zistepu)
- fxch %st(1)
- fmuls C(d_zistepv)
- fxch %st(1)
- fadds C(d_ziorigin)
- imull C(d_zrowbytes),%ecx
- faddp %st(0),%st(1)
-
-// clamp if z is nearer than 2 (1/z > 0.5)
- fcoms float_point5
- addl %ecx,%edi
- movl espan_t_u(%esi),%edx
- addl %edx,%edx // word count
- movl espan_t_count(%esi),%ecx
- addl %edx,%edi // pdest = &pdestspan[scans->u];
- pushl %esi // preserve spans pointer
- fnstsw %ax
- testb $0x45,%ah
- jz LClamp
-
- fmuls Float2ToThe31nd
- fistpl izi // note: we are relying on FP exceptions being turned
- // off here to avoid problems when the span is closer
- // than 1/(2**31)
- movl izi,%edx
-
-// at this point:
-// %ebx = izistep
-// %ecx = count
-// %edx = izi
-// %edi = pdest
-
-LZDraw:
-
-// do a single pixel up front, if necessary to dword align the destination
- testl $2,%edi
- jz LFMiddle
- movl %edx,%eax
- addl %ebx,%edx
- shrl $16,%eax
- decl %ecx
- movw %ax,(%edi)
- addl $2,%edi
-
-// do middle a pair of aligned dwords at a time
-LFMiddle:
- pushl %ecx
- shrl $1,%ecx // count / 2
- jz LFLast // no aligned dwords to do
- shrl $1,%ecx // (count / 2) / 2
- jnc LFMiddleLoop // even number of aligned dwords to do
-
- movl %edx,%eax
- addl %ebx,%edx
- shrl $16,%eax
- movl %edx,%esi
- addl %ebx,%edx
- andl $0xFFFF0000,%esi
- orl %esi,%eax
- movl %eax,(%edi)
- addl $4,%edi
- andl %ecx,%ecx
- jz LFLast
-
-LFMiddleLoop:
- movl %edx,%eax
- addl %ebx,%edx
- shrl $16,%eax
- movl %edx,%esi
- addl %ebx,%edx
- andl $0xFFFF0000,%esi
- orl %esi,%eax
- movl %edx,%ebp
- movl %eax,(%edi)
- addl %ebx,%edx
- shrl $16,%ebp
- movl %edx,%esi
- addl %ebx,%edx
- andl $0xFFFF0000,%esi
- orl %esi,%ebp
- movl %ebp,4(%edi) // FIXME: eliminate register contention
- addl $8,%edi
-
- decl %ecx
- jnz LFMiddleLoop
-
-LFLast:
- popl %ecx // retrieve count
- popl %esi // retrieve span pointer
-
-// do the last, unaligned pixel, if there is one
- andl $1,%ecx // is there an odd pixel left to do?
- jz LFSpanDone // no
- shrl $16,%edx
- movw %dx,(%edi) // do the final pixel's z
-
-LFSpanDone:
- movl espan_t_pnext(%esi),%esi
- testl %esi,%esi
- jnz LFSpanLoop
-
- jmp LFDone
-
-LFNegSpan:
- fmuls FloatMinus2ToThe31nd
- fistpl izistep // note: we are relying on FP exceptions being turned
- // off here to avoid range problems
- movl izistep,%ebx // remains loaded for all spans
-
-LFNegSpanLoop:
-// set up the initial 1/z value
- fildl espan_t_v(%esi)
- fildl espan_t_u(%esi)
- movl espan_t_v(%esi),%ecx
- movl C(d_pzbuffer),%edi
- fmuls C(d_zistepu)
- fxch %st(1)
- fmuls C(d_zistepv)
- fxch %st(1)
- fadds C(d_ziorigin)
- imull C(d_zrowbytes),%ecx
- faddp %st(0),%st(1)
-
-// clamp if z is nearer than 2 (1/z > 0.5)
- fcoms float_point5
- addl %ecx,%edi
- movl espan_t_u(%esi),%edx
- addl %edx,%edx // word count
- movl espan_t_count(%esi),%ecx
- addl %edx,%edi // pdest = &pdestspan[scans->u];
- pushl %esi // preserve spans pointer
- fnstsw %ax
- testb $0x45,%ah
- jz LClampNeg
-
- fmuls Float2ToThe31nd
- fistpl izi // note: we are relying on FP exceptions being turned
- // off here to avoid problems when the span is closer
- // than 1/(2**31)
- movl izi,%edx
-
-// at this point:
-// %ebx = izistep
-// %ecx = count
-// %edx = izi
-// %edi = pdest
-
-LZDrawNeg:
-
-// do a single pixel up front, if necessary to dword align the destination
- testl $2,%edi
- jz LFNegMiddle
- movl %edx,%eax
- subl %ebx,%edx
- shrl $16,%eax
- decl %ecx
- movw %ax,(%edi)
- addl $2,%edi
-
-// do middle a pair of aligned dwords at a time
-LFNegMiddle:
- pushl %ecx
- shrl $1,%ecx // count / 2
- jz LFNegLast // no aligned dwords to do
- shrl $1,%ecx // (count / 2) / 2
- jnc LFNegMiddleLoop // even number of aligned dwords to do
-
- movl %edx,%eax
- subl %ebx,%edx
- shrl $16,%eax
- movl %edx,%esi
- subl %ebx,%edx
- andl $0xFFFF0000,%esi
- orl %esi,%eax
- movl %eax,(%edi)
- addl $4,%edi
- andl %ecx,%ecx
- jz LFNegLast
-
-LFNegMiddleLoop:
- movl %edx,%eax
- subl %ebx,%edx
- shrl $16,%eax
- movl %edx,%esi
- subl %ebx,%edx
- andl $0xFFFF0000,%esi
- orl %esi,%eax
- movl %edx,%ebp
- movl %eax,(%edi)
- subl %ebx,%edx
- shrl $16,%ebp
- movl %edx,%esi
- subl %ebx,%edx
- andl $0xFFFF0000,%esi
- orl %esi,%ebp
- movl %ebp,4(%edi) // FIXME: eliminate register contention
- addl $8,%edi
-
- decl %ecx
- jnz LFNegMiddleLoop
-
-LFNegLast:
- popl %ecx // retrieve count
- popl %esi // retrieve span pointer
-
-// do the last, unaligned pixel, if there is one
- andl $1,%ecx // is there an odd pixel left to do?
- jz LFNegSpanDone // no
- shrl $16,%edx
- movw %dx,(%edi) // do the final pixel's z
-
-LFNegSpanDone:
- movl espan_t_pnext(%esi),%esi
- testl %esi,%esi
- jnz LFNegSpanLoop
-
-LFDone:
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-#endif // USE_ASM
-
diff --git a/src/refresh/sw/x86/surf8.S b/src/refresh/sw/x86/surf8.S
deleted file mode 100644
index b972f8f..0000000
--- a/src/refresh/sw/x86/surf8.S
+++ /dev/null
@@ -1,762 +0,0 @@
-//
-// surf8.s
-// x86 assembly-language 8 bpp surface block drawing code.
-//
-
-#include "common/x86/asm.h"
-
-#if USE_ASM
-
- .data
-
-sb_v: .long 0
-
- .text
-
- .align 4
-.globl C(R_Surf8Start)
-C(R_Surf8Start):
-
-//----------------------------------------------------------------------
-// Surface block drawer for mip level 0
-//----------------------------------------------------------------------
-
- .align 4
-.globl C(R_DrawSurfaceBlock8_mip0)
-C(R_DrawSurfaceBlock8_mip0):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
-// for (v=0 ; v<numvblocks ; v++)
-// {
- movl C(r_lightptr),%ebx
- movl C(r_numvblocks),%eax
-
- movl %eax,sb_v
- movl C(prowdestbase),%edi
-
- movl C(pbasesource),%esi
-
-Lv_loop_mip0:
-
-// lightleft = lightptr[0];
-// lightright = lightptr[1];
-// lightdelta = (lightleft - lightright) & 0xFFFFF;
- movl (%ebx),%eax // lightleft
- movl 4(%ebx),%edx // lightright
-
- movl %eax,%ebp
- movl C(r_lightwidth),%ecx
-
- movl %edx,C(lightright)
- subl %edx,%ebp
-
- andl $0xFFFFF,%ebp
- leal (%ebx,%ecx,4),%ebx
-
-// lightptr += lightwidth;
- movl %ebx,C(r_lightptr)
-
-// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
-// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
-// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
-// 0xF0000000;
- movl 4(%ebx),%ecx // lightptr[1]
- movl (%ebx),%ebx // lightptr[0]
-
- subl %eax,%ebx
- subl %edx,%ecx
-
- sarl $4,%ecx
- orl $0xF0000000,%ebp
-
- sarl $4,%ebx
- movl %ecx,C(lightrightstep)
-
- subl %ecx,%ebx
- andl $0xFFFFF,%ebx
-
- orl $0xF0000000,%ebx
- subl %ecx,%ecx // high word must be 0 in loop for addressing
-
- movl %ebx,C(lightdeltastep)
- subl %ebx,%ebx // high word must be 0 in loop for addressing
-
-Lblockloop8_mip0:
- movl %ebp,C(lightdelta)
- movb 14(%esi),%cl
-
- sarl $4,%ebp
- movb %dh,%bh
-
- movb 15(%esi),%bl
- addl %ebp,%edx
-
- movb %dh,%ch
- addl %ebp,%edx
-
- movb 0x12345678(%ebx),%ah
-LBPatch0:
- movb 13(%esi),%bl
-
- movb 0x12345678(%ecx),%al
-LBPatch1:
- movb 12(%esi),%cl
-
- movb %dh,%bh
- addl %ebp,%edx
-
- rorl $16,%eax
- movb %dh,%ch
-
- addl %ebp,%edx
- movb 0x12345678(%ebx),%ah
-LBPatch2:
-
- movb 11(%esi),%bl
- movb 0x12345678(%ecx),%al
-LBPatch3:
-
- movb 10(%esi),%cl
- movl %eax,12(%edi)
-
- movb %dh,%bh
- addl %ebp,%edx
-
- movb %dh,%ch
- addl %ebp,%edx
-
- movb 0x12345678(%ebx),%ah
-LBPatch4:
- movb 9(%esi),%bl
-
- movb 0x12345678(%ecx),%al
-LBPatch5:
- movb 8(%esi),%cl
-
- movb %dh,%bh
- addl %ebp,%edx
-
- rorl $16,%eax
- movb %dh,%ch
-
- addl %ebp,%edx
- movb 0x12345678(%ebx),%ah
-LBPatch6:
-
- movb 7(%esi),%bl
- movb 0x12345678(%ecx),%al
-LBPatch7:
-
- movb 6(%esi),%cl
- movl %eax,8(%edi)
-
- movb %dh,%bh
- addl %ebp,%edx
-
- movb %dh,%ch
- addl %ebp,%edx
-
- movb 0x12345678(%ebx),%ah
-LBPatch8:
- movb 5(%esi),%bl
-
- movb 0x12345678(%ecx),%al
-LBPatch9:
- movb 4(%esi),%cl
-
- movb %dh,%bh
- addl %ebp,%edx
-
- rorl $16,%eax
- movb %dh,%ch
-
- addl %ebp,%edx
- movb 0x12345678(%ebx),%ah
-LBPatch10:
-
- movb 3(%esi),%bl
- movb 0x12345678(%ecx),%al
-LBPatch11:
-
- movb 2(%esi),%cl
- movl %eax,4(%edi)
-
- movb %dh,%bh
- addl %ebp,%edx
-
- movb %dh,%ch
- addl %ebp,%edx
-
- movb 0x12345678(%ebx),%ah
-LBPatch12:
- movb 1(%esi),%bl
-
- movb 0x12345678(%ecx),%al
-LBPatch13:
- movb (%esi),%cl
-
- movb %dh,%bh
- addl %ebp,%edx
-
- rorl $16,%eax
- movb %dh,%ch
-
- movb 0x12345678(%ebx),%ah
-LBPatch14:
- movl C(lightright),%edx
-
- movb 0x12345678(%ecx),%al
-LBPatch15:
- movl C(lightdelta),%ebp
-
- movl %eax,(%edi)
-
- addl C(sourcetstep),%esi
- addl C(surfrowbytes),%edi
-
- addl C(lightrightstep),%edx
- addl C(lightdeltastep),%ebp
-
- movl %edx,C(lightright)
- jc Lblockloop8_mip0
-
-// if (pbasesource >= r_sourcemax)
-// pbasesource -= stepback;
-
- cmpl C(r_sourcemax),%esi
- jb LSkip_mip0
- subl C(r_stepback),%esi
-LSkip_mip0:
-
- movl C(r_lightptr),%ebx
- decl sb_v
-
- jnz Lv_loop_mip0
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-
-//----------------------------------------------------------------------
-// Surface block drawer for mip level 1
-//----------------------------------------------------------------------
-
- .align 4
-.globl C(R_DrawSurfaceBlock8_mip1)
-C(R_DrawSurfaceBlock8_mip1):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
-// for (v=0 ; v<numvblocks ; v++)
-// {
- movl C(r_lightptr),%ebx
- movl C(r_numvblocks),%eax
-
- movl %eax,sb_v
- movl C(prowdestbase),%edi
-
- movl C(pbasesource),%esi
-
-Lv_loop_mip1:
-
-// lightleft = lightptr[0];
-// lightright = lightptr[1];
-// lightdelta = (lightleft - lightright) & 0xFFFFF;
- movl (%ebx),%eax // lightleft
- movl 4(%ebx),%edx // lightright
-
- movl %eax,%ebp
- movl C(r_lightwidth),%ecx
-
- movl %edx,C(lightright)
- subl %edx,%ebp
-
- andl $0xFFFFF,%ebp
- leal (%ebx,%ecx,4),%ebx
-
-// lightptr += lightwidth;
- movl %ebx,C(r_lightptr)
-
-// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
-// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
-// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
-// 0xF0000000;
- movl 4(%ebx),%ecx // lightptr[1]
- movl (%ebx),%ebx // lightptr[0]
-
- subl %eax,%ebx
- subl %edx,%ecx
-
- sarl $3,%ecx
- orl $0x70000000,%ebp
-
- sarl $3,%ebx
- movl %ecx,C(lightrightstep)
-
- subl %ecx,%ebx
- andl $0xFFFFF,%ebx
-
- orl $0xF0000000,%ebx
- subl %ecx,%ecx // high word must be 0 in loop for addressing
-
- movl %ebx,C(lightdeltastep)
- subl %ebx,%ebx // high word must be 0 in loop for addressing
-
-Lblockloop8_mip1:
- movl %ebp,C(lightdelta)
- movb 6(%esi),%cl
-
- sarl $3,%ebp
- movb %dh,%bh
-
- movb 7(%esi),%bl
- addl %ebp,%edx
-
- movb %dh,%ch
- addl %ebp,%edx
-
- movb 0x12345678(%ebx),%ah
-LBPatch22:
- movb 5(%esi),%bl
-
- movb 0x12345678(%ecx),%al
-LBPatch23:
- movb 4(%esi),%cl
-
- movb %dh,%bh
- addl %ebp,%edx
-
- rorl $16,%eax
- movb %dh,%ch
-
- addl %ebp,%edx
- movb 0x12345678(%ebx),%ah
-LBPatch24:
-
- movb 3(%esi),%bl
- movb 0x12345678(%ecx),%al
-LBPatch25:
-
- movb 2(%esi),%cl
- movl %eax,4(%edi)
-
- movb %dh,%bh
- addl %ebp,%edx
-
- movb %dh,%ch
- addl %ebp,%edx
-
- movb 0x12345678(%ebx),%ah
-LBPatch26:
- movb 1(%esi),%bl
-
- movb 0x12345678(%ecx),%al
-LBPatch27:
- movb (%esi),%cl
-
- movb %dh,%bh
- addl %ebp,%edx
-
- rorl $16,%eax
- movb %dh,%ch
-
- movb 0x12345678(%ebx),%ah
-LBPatch28:
- movl C(lightright),%edx
-
- movb 0x12345678(%ecx),%al
-LBPatch29:
- movl C(lightdelta),%ebp
-
- movl %eax,(%edi)
- movl C(sourcetstep),%eax
-
- addl %eax,%esi
- movl C(surfrowbytes),%eax
-
- addl %eax,%edi
- movl C(lightrightstep),%eax
-
- addl %eax,%edx
- movl C(lightdeltastep),%eax
-
- addl %eax,%ebp
- movl %edx,C(lightright)
-
- jc Lblockloop8_mip1
-
-// if (pbasesource >= r_sourcemax)
-// pbasesource -= stepback;
-
- cmpl C(r_sourcemax),%esi
- jb LSkip_mip1
- subl C(r_stepback),%esi
-LSkip_mip1:
-
- movl C(r_lightptr),%ebx
- decl sb_v
-
- jnz Lv_loop_mip1
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-
-//----------------------------------------------------------------------
-// Surface block drawer for mip level 2
-//----------------------------------------------------------------------
-
- .align 4
-.globl C(R_DrawSurfaceBlock8_mip2)
-C(R_DrawSurfaceBlock8_mip2):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
-// for (v=0 ; v<numvblocks ; v++)
-// {
- movl C(r_lightptr),%ebx
- movl C(r_numvblocks),%eax
-
- movl %eax,sb_v
- movl C(prowdestbase),%edi
-
- movl C(pbasesource),%esi
-
-Lv_loop_mip2:
-
-// lightleft = lightptr[0];
-// lightright = lightptr[1];
-// lightdelta = (lightleft - lightright) & 0xFFFFF;
- movl (%ebx),%eax // lightleft
- movl 4(%ebx),%edx // lightright
-
- movl %eax,%ebp
- movl C(r_lightwidth),%ecx
-
- movl %edx,C(lightright)
- subl %edx,%ebp
-
- andl $0xFFFFF,%ebp
- leal (%ebx,%ecx,4),%ebx
-
-// lightptr += lightwidth;
- movl %ebx,C(r_lightptr)
-
-// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
-// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
-// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
-// 0xF0000000;
- movl 4(%ebx),%ecx // lightptr[1]
- movl (%ebx),%ebx // lightptr[0]
-
- subl %eax,%ebx
- subl %edx,%ecx
-
- sarl $2,%ecx
- orl $0x30000000,%ebp
-
- sarl $2,%ebx
- movl %ecx,C(lightrightstep)
-
- subl %ecx,%ebx
-
- andl $0xFFFFF,%ebx
-
- orl $0xF0000000,%ebx
- subl %ecx,%ecx // high word must be 0 in loop for addressing
-
- movl %ebx,C(lightdeltastep)
- subl %ebx,%ebx // high word must be 0 in loop for addressing
-
-Lblockloop8_mip2:
- movl %ebp,C(lightdelta)
- movb 2(%esi),%cl
-
- sarl $2,%ebp
- movb %dh,%bh
-
- movb 3(%esi),%bl
- addl %ebp,%edx
-
- movb %dh,%ch
- addl %ebp,%edx
-
- movb 0x12345678(%ebx),%ah
-LBPatch18:
- movb 1(%esi),%bl
-
- movb 0x12345678(%ecx),%al
-LBPatch19:
- movb (%esi),%cl
-
- movb %dh,%bh
- addl %ebp,%edx
-
- rorl $16,%eax
- movb %dh,%ch
-
- movb 0x12345678(%ebx),%ah
-LBPatch20:
- movl C(lightright),%edx
-
- movb 0x12345678(%ecx),%al
-LBPatch21:
- movl C(lightdelta),%ebp
-
- movl %eax,(%edi)
- movl C(sourcetstep),%eax
-
- addl %eax,%esi
- movl C(surfrowbytes),%eax
-
- addl %eax,%edi
- movl C(lightrightstep),%eax
-
- addl %eax,%edx
- movl C(lightdeltastep),%eax
-
- addl %eax,%ebp
- movl %edx,C(lightright)
-
- jc Lblockloop8_mip2
-
-// if (pbasesource >= r_sourcemax)
-// pbasesource -= stepback;
-
- cmpl C(r_sourcemax),%esi
- jb LSkip_mip2
- subl C(r_stepback),%esi
-LSkip_mip2:
-
- movl C(r_lightptr),%ebx
- decl sb_v
-
- jnz Lv_loop_mip2
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-
-//----------------------------------------------------------------------
-// Surface block drawer for mip level 3
-//----------------------------------------------------------------------
-
- .align 4
-.globl C(R_DrawSurfaceBlock8_mip3)
-C(R_DrawSurfaceBlock8_mip3):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
-// for (v=0 ; v<numvblocks ; v++)
-// {
- movl C(r_lightptr),%ebx
- movl C(r_numvblocks),%eax
-
- movl %eax,sb_v
- movl C(prowdestbase),%edi
-
- movl C(pbasesource),%esi
-
-Lv_loop_mip3:
-
-// lightleft = lightptr[0];
-// lightright = lightptr[1];
-// lightdelta = (lightleft - lightright) & 0xFFFFF;
- movl (%ebx),%eax // lightleft
- movl 4(%ebx),%edx // lightright
-
- movl %eax,%ebp
- movl C(r_lightwidth),%ecx
-
- movl %edx,C(lightright)
- subl %edx,%ebp
-
- andl $0xFFFFF,%ebp
- leal (%ebx,%ecx,4),%ebx
-
- movl %ebp,C(lightdelta)
-// lightptr += lightwidth;
- movl %ebx,C(r_lightptr)
-
-// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
-// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
-// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
-// 0xF0000000;
- movl 4(%ebx),%ecx // lightptr[1]
- movl (%ebx),%ebx // lightptr[0]
-
- subl %eax,%ebx
- subl %edx,%ecx
-
- sarl $1,%ecx
-
- sarl $1,%ebx
- movl %ecx,C(lightrightstep)
-
- subl %ecx,%ebx
- andl $0xFFFFF,%ebx
-
- sarl $1,%ebp
- orl $0xF0000000,%ebx
-
- movl %ebx,C(lightdeltastep)
- subl %ebx,%ebx // high word must be 0 in loop for addressing
-
- movb 1(%esi),%bl
- subl %ecx,%ecx // high word must be 0 in loop for addressing
-
- movb %dh,%bh
- movb (%esi),%cl
-
- addl %ebp,%edx
- movb %dh,%ch
-
- movb 0x12345678(%ebx),%al
-LBPatch16:
- movl C(lightright),%edx
-
- movb %al,1(%edi)
- movb 0x12345678(%ecx),%al
-LBPatch17:
-
- movb %al,(%edi)
- movl C(sourcetstep),%eax
-
- addl %eax,%esi
- movl C(surfrowbytes),%eax
-
- addl %eax,%edi
- movl C(lightdeltastep),%eax
-
- movl C(lightdelta),%ebp
- movb (%esi),%cl
-
- addl %eax,%ebp
- movl C(lightrightstep),%eax
-
- sarl $1,%ebp
- addl %eax,%edx
-
- movb %dh,%bh
- movb 1(%esi),%bl
-
- addl %ebp,%edx
- movb %dh,%ch
-
- movb 0x12345678(%ebx),%al
-LBPatch30:
- movl C(sourcetstep),%edx
-
- movb %al,1(%edi)
- movb 0x12345678(%ecx),%al
-LBPatch31:
-
- movb %al,(%edi)
- movl C(surfrowbytes),%ebp
-
- addl %edx,%esi
- addl %ebp,%edi
-
-// if (pbasesource >= r_sourcemax)
-// pbasesource -= stepback;
-
- cmpl C(r_sourcemax),%esi
- jb LSkip_mip3
- subl C(r_stepback),%esi
-LSkip_mip3:
-
- movl C(r_lightptr),%ebx
- decl sb_v
-
- jnz Lv_loop_mip3
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-
-.globl C(R_Surf8End)
-C(R_Surf8End):
-
-//----------------------------------------------------------------------
-// Code patching routines
-//----------------------------------------------------------------------
- .data
-
- .align 4
-LPatchTable8:
- .long LBPatch0-4
- .long LBPatch1-4
- .long LBPatch2-4
- .long LBPatch3-4
- .long LBPatch4-4
- .long LBPatch5-4
- .long LBPatch6-4
- .long LBPatch7-4
- .long LBPatch8-4
- .long LBPatch9-4
- .long LBPatch10-4
- .long LBPatch11-4
- .long LBPatch12-4
- .long LBPatch13-4
- .long LBPatch14-4
- .long LBPatch15-4
- .long LBPatch16-4
- .long LBPatch17-4
- .long LBPatch18-4
- .long LBPatch19-4
- .long LBPatch20-4
- .long LBPatch21-4
- .long LBPatch22-4
- .long LBPatch23-4
- .long LBPatch24-4
- .long LBPatch25-4
- .long LBPatch26-4
- .long LBPatch27-4
- .long LBPatch28-4
- .long LBPatch29-4
- .long LBPatch30-4
- .long LBPatch31-4
-
- .text
-
- .align 4
-.globl C(R_Surf8Patch)
-C(R_Surf8Patch):
- pushl %ebx
-
- movl C(d_pcolormap),%eax
- movl $LPatchTable8,%ebx
- movl $32,%ecx
-LPatchLoop8:
- movl (%ebx),%edx
- addl $4,%ebx
- movl %eax,(%edx)
- decl %ecx
- jnz LPatchLoop8
-
- popl %ebx
-
- ret
-
-#endif // USE_ASM
diff --git a/src/refresh/sw/x86/sw.h b/src/refresh/sw/x86/sw.h
deleted file mode 100644
index f817fa6..0000000
--- a/src/refresh/sw/x86/sw.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
-Copyright (C) 1997-2001 Id Software, Inc.
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-*/
-
-//
-// Include file for asm driver interface.
-//
-
-#define TRANSPARENT_COLOR 255
-
-#define ALIAS_ONSEAM 0x0020
-
-#define TURB_TEX_SIZE 64 // base turbulent texture size
-
-#define NEAR_CLIP 0.01
-
-#define CYCLE 128
-
-#define MAXHEIGHT 1200
-
-#define CACHE_SIZE 32 // used to align key data structures
-
-#define PARTICLE_Z_CLIP 8.0
-
-// particle_t structure
-// driver-usable fields
-#define pt_org 0
-#define pt_color 12
-// drivers never touch the following fields
-#define pt_next 16
-#define pt_vel 20
-#define pt_ramp 32
-#define pt_die 36
-#define pt_type 40
-#define pt_size 44
-
-// finalvert_t structure
-#define fv_v 0
-#define fv_flags 24
-#define fv_reserved 28
-#define fv_size 32
-#define fv_shift 5
-
-// stvert_t structure
-#define stv_onseam 0
-#define stv_s 4
-#define stv_t 8
-#define stv_size 12
-
-// trivertx_t structure
-#define tv_v 0
-#define tv_lightnormalindex 3
-#define tv_size 4
-
-// affinetridesc_t structure
-#define atd_pskin 0
-#define atd_pskindesc 4
-#define atd_skinwidth 8
-#define atd_skinheight 12
-#define atd_ptriangles 16
-#define atd_pfinalverts 20
-#define atd_numtriangles 24
-#define atd_drawtype 28
-#define atd_seamfixupX16 32
-#define atd_size 36
-
-// espan_t structure
-#define espan_t_u 0
-#define espan_t_v 4
-#define espan_t_count 8
-#define espan_t_pnext 12
-#define espan_t_size 16
-
-// sspan_t structure
-#define sspan_t_u 0
-#define sspan_t_v 4
-#define sspan_t_count 8
-#define sspan_t_size 12
-
-// spanpackage_t structure
-#define spanpackage_t_pdest 0
-#define spanpackage_t_pz 4
-#define spanpackage_t_count 8
-#define spanpackage_t_ptex 12
-#define spanpackage_t_sfrac 16
-#define spanpackage_t_tfrac 20
-#define spanpackage_t_light 24
-#define spanpackage_t_zi 28
-#define spanpackage_t_size 32
-
-// edge_t structure
-#define et_u 0
-#define et_u_step 4
-#define et_prev 8
-#define et_next 12
-#define et_surfs 16
-#define et_nextremove 20
-#define et_nearzi 24
-#define et_owner 28
-#define et_size 32
-
-// surf_t structure
-#define SURF_T_SHIFT 6
-#define st_next 0
-#define st_prev 4
-#define st_spans 8
-#define st_key 12
-#define st_last_u 16
-#define st_spanstate 20
-#define st_flags 24
-#define st_data 28
-#define st_entity 32
-#define st_nearzi 36
-#define st_insubmodel 40
-#define st_d_ziorigin 44
-#define st_d_zistepu 48
-#define st_d_zistepv 52
-#define st_pad 56
-#define st_size 64
-
-// clipplane_t structure
-#define cp_normal 0
-#define cp_dist 12
-#define cp_next 16
-#define cp_leftedge 20
-#define cp_rightedge 21
-#define cp_reserved 22
-#define cp_size 24
-
-// medge_t structure
-#define me_v 0
-#define me_cachededgeoffset 4
-#define me_size 8
-
-// mvertex_t structure
-#define mv_position 0
-#define mv_size 12
-
-// refdef_t structure
-#define rd_vrect 0
-#define rd_aliasvrect 20
-#define rd_vrectright 40
-#define rd_vrectbottom 44
-#define rd_aliasvrectright 48
-#define rd_aliasvrectbottom 52
-#define rd_vrectrightedge 56
-#define rd_fvrectx 60
-#define rd_fvrecty 64
-#define rd_fvrectx_adj 68
-#define rd_fvrecty_adj 72
-#define rd_vrect_x_adj_shift20 76
-#define rd_vrectright_adj_shift20 80
-#define rd_fvrectright_adj 84
-#define rd_fvrectbottom_adj 88
-#define rd_fvrectright 92
-#define rd_fvrectbottom 96
-#define rd_horizontalFieldOfView 100
-#define rd_xOrigin 104
-#define rd_yOrigin 108
-#define rd_vieworg 112
-#define rd_viewangles 124
-#define rd_ambientlight 136
-#define rd_size 140
-
-// mtriangle_t structure
-#define mtri_facesfront 0
-#define mtri_vertindex 4
-#define mtri_size 16
-#define mtri_shift 4
diff --git a/src/refresh/sw/x86/turb8.S b/src/refresh/sw/x86/turb8.S
deleted file mode 100644
index fefe228..0000000
--- a/src/refresh/sw/x86/turb8.S
+++ /dev/null
@@ -1,68 +0,0 @@
-//
-// d_scana.s
-// x86 assembly-language turbulent texture mapping code
-//
-
-#include "common/x86/asm.h"
-#include "sw.h"
-
-#if USE_ASM
-
- .data
-
- .text
-
-//----------------------------------------------------------------------
-// turbulent texture mapping code
-//----------------------------------------------------------------------
-
- .align 4
-.globl C(D_DrawTurbulent8Span)
-C(D_DrawTurbulent8Span):
- pushl %ebp // preserve caller's stack frame pointer
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
-
- movl C(r_turb_s),%esi
- movl C(r_turb_t),%ecx
- movl C(r_turb_pdest),%edi
- movl C(r_turb_spancount),%ebx
-
-Llp:
- movl %ecx,%eax
- movl %esi,%edx
- sarl $16,%eax
- movl C(r_turb_turb),%ebp
- sarl $16,%edx
- andl $(CYCLE-1),%eax
- andl $(CYCLE-1),%edx
- movl (%ebp,%eax,4),%eax
- movl (%ebp,%edx,4),%edx
- addl %esi,%eax
- sarl $16,%eax
- addl %ecx,%edx
- sarl $16,%edx
- andl $(TURB_TEX_SIZE-1),%eax
- andl $(TURB_TEX_SIZE-1),%edx
- shll $6,%edx
- movl C(r_turb_pbase),%ebp
- addl %eax,%edx
- incl %edi
- addl C(r_turb_sstep),%esi
- addl C(r_turb_tstep),%ecx
- movb (%ebp,%edx,1),%dl
- decl %ebx
- movb %dl,-1(%edi)
- jnz Llp
-
- movl %edi,C(r_turb_pdest)
-
- popl %ebx // restore register variables
- popl %edi
- popl %esi
- popl %ebp // restore caller's stack frame pointer
- ret
-
-#endif // USE_ASM
-
diff --git a/src/refresh/sw/x86/vars.S b/src/refresh/sw/x86/vars.S
deleted file mode 100644
index a886bc9..0000000
--- a/src/refresh/sw/x86/vars.S
+++ /dev/null
@@ -1,157 +0,0 @@
-//
-// r_varsa.s
-//
-
-#include "common/x86/asm.h"
-#include "sw.h"
-
-#if USE_ASM
-
- .data
-
-//-------------------------------------------------------
-// ASM-only variables
-//-------------------------------------------------------
-.globl float_1, float_particle_z_clip, float_point5
-.globl float_minus_1, float_0
-float_0: .single 0.0
-float_1: .single 1.0
-float_minus_1: .single -1.0
-float_particle_z_clip: .single PARTICLE_Z_CLIP
-float_point5: .single 0.5
-
-.globl fp_16, fp_64k, fp_1m, fp_64kx64k
-.globl fp_1m_minus_1
-.globl fp_8
-fp_1m: .single 1048576.0
-fp_1m_minus_1: .single 1048575.0
-fp_64k: .single 65536.0
-fp_8: .single 8.0
-fp_16: .single 16.0
-fp_64kx64k: .long 0x4f000000 // (float)0x8000*0x10000
-
-
-.globl FloatZero, Float2ToThe31nd, FloatMinus2ToThe31nd
-FloatZero: .long 0
-Float2ToThe31nd: .long 0x4f000000
-FloatMinus2ToThe31nd: .long 0xcf000000
-
-.globl C(r_bmodelactive)
-C(r_bmodelactive): .long 0
-
-//-------------------------------------------------------
-// global refresh variables
-//-------------------------------------------------------
-
-// FIXME: put all refresh variables into one contiguous block. Make into one
-// big structure, like cl or sv?
-
- .align 4
-.globl C(d_sdivzstepu)
-.globl C(d_tdivzstepu)
-.globl C(d_zistepu)
-.globl C(d_sdivzstepv)
-.globl C(d_tdivzstepv)
-.globl C(d_zistepv)
-.globl C(d_sdivzorigin)
-.globl C(d_tdivzorigin)
-.globl C(d_ziorigin)
-C(d_sdivzstepu): .single 0
-C(d_tdivzstepu): .single 0
-C(d_zistepu): .single 0
-C(d_sdivzstepv): .single 0
-C(d_tdivzstepv): .single 0
-C(d_zistepv): .single 0
-C(d_sdivzorigin): .single 0
-C(d_tdivzorigin): .single 0
-C(d_ziorigin): .single 0
-
-.globl C(sadjust)
-.globl C(tadjust)
-.globl C(bbextents)
-.globl C(bbextentt)
-C(sadjust): .long 0
-C(tadjust): .long 0
-C(bbextents): .long 0
-C(bbextentt): .long 0
-
-.globl C(cacheblock)
-.globl C(d_viewbuffer)
-.globl C(cachewidth)
-.globl C(d_pzbuffer)
-.globl C(d_zrowbytes)
-.globl C(d_zwidth)
-C(cacheblock): .long 0
-C(cachewidth): .long 0
-C(d_viewbuffer): .long 0
-C(d_pzbuffer): .long 0
-C(d_zrowbytes): .long 0
-C(d_zwidth): .long 0
-
-
-//-------------------------------------------------------
-// ASM-only variables
-//-------------------------------------------------------
-.globl izi
-izi: .long 0
-
-.globl pbase, s, t, sfracf, tfracf, snext, tnext
-.globl spancountminus1, zi16stepu, sdivz16stepu, tdivz16stepu
-.globl zi8stepu, sdivz8stepu, tdivz8stepu, pz
-s: .long 0
-t: .long 0
-snext: .long 0
-tnext: .long 0
-sfracf: .long 0
-tfracf: .long 0
-pbase: .long 0
-zi8stepu: .long 0
-sdivz8stepu: .long 0
-tdivz8stepu: .long 0
-zi16stepu: .long 0
-sdivz16stepu: .long 0
-tdivz16stepu: .long 0
-spancountminus1: .long 0
-pz: .long 0
-
-.globl izistep
-izistep: .long 0
-
-//-------------------------------------------------------
-// local variables for d_draw16.s
-//-------------------------------------------------------
-
-.globl reciprocal_table_16, entryvec_table_16
-// 1/2, 1/3, 1/4, 1/5, 1/6, 1/7, 1/8, 1/9, 1/10, 1/11, 1/12, 1/13,
-// 1/14, and 1/15 in 0.32 form
-reciprocal_table_16: .long 0x40000000, 0x2aaaaaaa, 0x20000000
- .long 0x19999999, 0x15555555, 0x12492492
- .long 0x10000000, 0xe38e38e, 0xccccccc, 0xba2e8ba
- .long 0xaaaaaaa, 0x9d89d89, 0x9249249, 0x8888888
-
-entryvec_table_16: .long 0, Entry2_16, Entry3_16, Entry4_16
- .long Entry5_16, Entry6_16, Entry7_16, Entry8_16
- .long Entry9_16, Entry10_16, Entry11_16, Entry12_16
- .long Entry13_16, Entry14_16, Entry15_16, Entry16_16
-
-//
-// advancetable is 8 bytes, but points to the middle of that range so negative
-// offsets will work
-//
-.globl advancetable, sstep, tstep, pspantemp, counttemp, jumptemp
-advancetable: .long 0, 0
-sstep: .long 0
-tstep: .long 0
-
-pspantemp: .long 0
-counttemp: .long 0
-jumptemp: .long 0
-
-// 1/2, 1/3, 1/4, 1/5, 1/6, and 1/7 in 0.32 form
-.globl reciprocal_table, entryvec_table
-reciprocal_table: .long 0x40000000, 0x2aaaaaaa, 0x20000000
- .long 0x19999999, 0x15555555, 0x12492492
-
-#endif // USE_ASM
-
-