diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2013-03-19 09:47:30 +0100 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2013-03-19 09:47:30 +0100 |
commit | 0d4a42f6bd298e826620585e766a154ab460617a (patch) | |
tree | 406d8f7778691d858dbe3e48e4bbb10e99c0a58a /arch/arc/lib/strcpy-700.S | |
parent | d62b4892f3d9f7dd2002e5309be10719d6805b0f (diff) | |
parent | a937536b868b8369b98967929045f1df54234323 (diff) |
Merge tag 'v3.9-rc3' into drm-intel-next-queued
Backmerge so that I can merge Imre Deak's coalesced sg entries fixes,
which depend upon the new for_each_sg_page introduce in
commit a321e91b6d73ed011ffceed384c40d2785cf723b
Author: Imre Deak <imre.deak@intel.com>
Date: Wed Feb 27 17:02:56 2013 -0800
lib/scatterlist: add simple page iterator
The merge itself is just two trivial conflicts:
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'arch/arc/lib/strcpy-700.S')
-rw-r--r-- | arch/arc/lib/strcpy-700.S | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S new file mode 100644 index 000000000000..b7ca4ae81d88 --- /dev/null +++ b/arch/arc/lib/strcpy-700.S @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* If dst and src are 4 byte aligned, copy 8 bytes at a time. + If the src is 4, but not 8 byte aligned, we first read 4 bytes to get + it 8 byte aligned. Thus, we can do a little read-ahead, without + dereferencing a cache line that we should not touch. + Note that short and long instructions have been scheduled to avoid + branch stalls. + The beq_s to r3z could be made unaligned & long to avoid a stall + there, but the it is not likely to be taken often, and it + would also be likey to cost an unaligned mispredict at the next call. */ + +#include <asm/linkage.h> + +ARC_ENTRY strcpy + or r2,r0,r1 + bmsk_s r2,r2,1 + brne.d r2,0,charloop + mov_s r10,r0 + ld_s r3,[r1,0] + mov r8,0x01010101 + bbit0.d r1,2,loop_start + ror r12,r8 + sub r2,r3,r8 + bic_s r2,r2,r3 + tst_s r2,r12 + bne r3z + mov_s r4,r3 + .balign 4 +loop: + ld.a r3,[r1,4] + st.ab r4,[r10,4] +loop_start: + ld.a r4,[r1,4] + sub r2,r3,r8 + bic_s r2,r2,r3 + tst_s r2,r12 + bne_s r3z + st.ab r3,[r10,4] + sub r2,r4,r8 + bic r2,r2,r4 + tst r2,r12 + beq loop + mov_s r3,r4 +#ifdef __LITTLE_ENDIAN__ +r3z: bmsk.f r1,r3,7 + lsr_s r3,r3,8 +#else +r3z: lsr.f r1,r3,24 + asl_s r3,r3,8 +#endif + bne.d r3z + stb.ab r1,[r10,1] + j_s [blink] + + .balign 4 +charloop: + ldb.ab r3,[r1,1] + + + brne.d r3,0,charloop + stb.ab r3,[r10,1] + j [blink] +ARC_EXIT strcpy |