summaryrefslogtreecommitdiff
path: root/arch/sparc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc')
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c249
1 files changed, 245 insertions, 4 deletions
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 2b2f3c3335ce..ec7d10da94f0 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -28,6 +28,11 @@ static inline bool is_simm5(unsigned int value)
return value + 0x10 < 0x20;
}
+static inline bool is_sethi(unsigned int value)
+{
+ return (value & ~0x3fffff) == 0;
+}
+
static void bpf_flush_icache(void *start_, void *end_)
{
/* Cheetah's I-cache is fully coherent. */
@@ -367,16 +372,252 @@ static void emit_loadimm_sext(s32 K, unsigned int dest, struct jit_ctx *ctx)
}
}
+static void analyze_64bit_constant(u32 high_bits, u32 low_bits,
+ int *hbsp, int *lbsp, int *abbasp)
+{
+ int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
+ int i;
+
+ lowest_bit_set = highest_bit_set = -1;
+ i = 0;
+ do {
+ if ((lowest_bit_set == -1) && ((low_bits >> i) & 1))
+ lowest_bit_set = i;
+ if ((highest_bit_set == -1) && ((high_bits >> (32 - i - 1)) & 1))
+ highest_bit_set = (64 - i - 1);
+ } while (++i < 32 && (highest_bit_set == -1 ||
+ lowest_bit_set == -1));
+ if (i == 32) {
+ i = 0;
+ do {
+ if (lowest_bit_set == -1 && ((high_bits >> i) & 1))
+ lowest_bit_set = i + 32;
+ if (highest_bit_set == -1 &&
+ ((low_bits >> (32 - i - 1)) & 1))
+ highest_bit_set = 32 - i - 1;
+ } while (++i < 32 && (highest_bit_set == -1 ||
+ lowest_bit_set == -1));
+ }
+
+ all_bits_between_are_set = 1;
+ for (i = lowest_bit_set; i <= highest_bit_set; i++) {
+ if (i < 32) {
+ if ((low_bits & (1 << i)) != 0)
+ continue;
+ } else {
+ if ((high_bits & (1 << (i - 32))) != 0)
+ continue;
+ }
+ all_bits_between_are_set = 0;
+ break;
+ }
+ *hbsp = highest_bit_set;
+ *lbsp = lowest_bit_set;
+ *abbasp = all_bits_between_are_set;
+}
+
+static unsigned long create_simple_focus_bits(unsigned long high_bits,
+ unsigned long low_bits,
+ int lowest_bit_set, int shift)
+{
+ long hi, lo;
+
+ if (lowest_bit_set < 32) {
+ lo = (low_bits >> lowest_bit_set) << shift;
+ hi = ((high_bits << (32 - lowest_bit_set)) << shift);
+ } else {
+ lo = 0;
+ hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
+ }
+ return hi | lo;
+}
+
+static bool const64_is_2insns(unsigned long high_bits,
+ unsigned long low_bits)
+{
+ int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
+
+ if (high_bits == 0 || high_bits == 0xffffffff)
+ return true;
+
+ analyze_64bit_constant(high_bits, low_bits,
+ &highest_bit_set, &lowest_bit_set,
+ &all_bits_between_are_set);
+
+ if ((highest_bit_set == 63 || lowest_bit_set == 0) &&
+ all_bits_between_are_set != 0)
+ return true;
+
+ if (highest_bit_set - lowest_bit_set < 21)
+ return true;
+
+ return false;
+}
+
+static void sparc_emit_set_const64_quick2(unsigned long high_bits,
+ unsigned long low_imm,
+ unsigned int dest,
+ int shift_count, struct jit_ctx *ctx)
+{
+ emit_loadimm32(high_bits, dest, ctx);
+
+ /* Now shift it up into place. */
+ emit_alu_K(SLLX, dest, shift_count, ctx);
+
+ /* If there is a low immediate part piece, finish up by
+ * putting that in as well.
+ */
+ if (low_imm != 0)
+ emit(OR | IMMED | RS1(dest) | S13(low_imm) | RD(dest), ctx);
+}
+
static void emit_loadimm64(u64 K, unsigned int dest, struct jit_ctx *ctx)
{
+ int all_bits_between_are_set, lowest_bit_set, highest_bit_set;
unsigned int tmp = bpf2sparc[TMP_REG_1];
- u32 high_part = (K >> 32);
- u32 low_part = (K & 0xffffffff);
+ u32 low_bits = (K & 0xffffffff);
+ u32 high_bits = (K >> 32);
+
+ /* These two tests also take care of all of the one
+ * instruction cases.
+ */
+ if (high_bits == 0xffffffff && (low_bits & 0x80000000))
+ return emit_loadimm_sext(K, dest, ctx);
+ if (high_bits == 0x00000000)
+ return emit_loadimm32(K, dest, ctx);
+
+ analyze_64bit_constant(high_bits, low_bits, &highest_bit_set,
+ &lowest_bit_set, &all_bits_between_are_set);
+
+ /* 1) mov -1, %reg
+ * sllx %reg, shift, %reg
+ * 2) mov -1, %reg
+ * srlx %reg, shift, %reg
+ * 3) mov some_small_const, %reg
+ * sllx %reg, shift, %reg
+ */
+ if (((highest_bit_set == 63 || lowest_bit_set == 0) &&
+ all_bits_between_are_set != 0) ||
+ ((highest_bit_set - lowest_bit_set) < 12)) {
+ int shift = lowest_bit_set;
+ long the_const = -1;
+
+ if ((highest_bit_set != 63 && lowest_bit_set != 0) ||
+ all_bits_between_are_set == 0) {
+ the_const =
+ create_simple_focus_bits(high_bits, low_bits,
+ lowest_bit_set, 0);
+ } else if (lowest_bit_set == 0)
+ shift = -(63 - highest_bit_set);
+
+ emit(OR | IMMED | RS1(G0) | S13(the_const) | RD(dest), ctx);
+ if (shift > 0)
+ emit_alu_K(SLLX, dest, shift, ctx);
+ else if (shift < 0)
+ emit_alu_K(SRLX, dest, -shift, ctx);
+
+ return;
+ }
+
+ /* Now a range of 22 or less bits set somewhere.
+ * 1) sethi %hi(focus_bits), %reg
+ * sllx %reg, shift, %reg
+ * 2) sethi %hi(focus_bits), %reg
+ * srlx %reg, shift, %reg
+ */
+ if ((highest_bit_set - lowest_bit_set) < 21) {
+ unsigned long focus_bits =
+ create_simple_focus_bits(high_bits, low_bits,
+ lowest_bit_set, 10);
+
+ emit(SETHI(focus_bits, dest), ctx);
+
+ /* If lowest_bit_set == 10 then a sethi alone could
+ * have done it.
+ */
+ if (lowest_bit_set < 10)
+ emit_alu_K(SRLX, dest, 10 - lowest_bit_set, ctx);
+ else if (lowest_bit_set > 10)
+ emit_alu_K(SLLX, dest, lowest_bit_set - 10, ctx);
+ return;
+ }
+
+ /* Ok, now 3 instruction sequences. */
+ if (low_bits == 0) {
+ emit_loadimm32(high_bits, dest, ctx);
+ emit_alu_K(SLLX, dest, 32, ctx);
+ return;
+ }
+
+ /* We may be able to do something quick
+ * when the constant is negated, so try that.
+ */
+ if (const64_is_2insns((~high_bits) & 0xffffffff,
+ (~low_bits) & 0xfffffc00)) {
+ /* NOTE: The trailing bits get XOR'd so we need the
+ * non-negated bits, not the negated ones.
+ */
+ unsigned long trailing_bits = low_bits & 0x3ff;
+
+ if ((((~high_bits) & 0xffffffff) == 0 &&
+ ((~low_bits) & 0x80000000) == 0) ||
+ (((~high_bits) & 0xffffffff) == 0xffffffff &&
+ ((~low_bits) & 0x80000000) != 0)) {
+ unsigned long fast_int = (~low_bits & 0xffffffff);
+
+ if ((is_sethi(fast_int) &&
+ (~high_bits & 0xffffffff) == 0)) {
+ emit(SETHI(fast_int, dest), ctx);
+ } else if (is_simm13(fast_int)) {
+ emit(OR | IMMED | RS1(G0) | S13(fast_int) | RD(dest), ctx);
+ } else {
+ emit_loadimm64(fast_int, dest, ctx);
+ }
+ } else {
+ u64 n = ((~low_bits) & 0xfffffc00) |
+ (((unsigned long)((~high_bits) & 0xffffffff))<<32);
+ emit_loadimm64(n, dest, ctx);
+ }
+
+ low_bits = -0x400 | trailing_bits;
+
+ emit(XOR | IMMED | RS1(dest) | S13(low_bits) | RD(dest), ctx);
+ return;
+ }
+
+ /* 1) sethi %hi(xxx), %reg
+ * or %reg, %lo(xxx), %reg
+ * sllx %reg, yyy, %reg
+ */
+ if ((highest_bit_set - lowest_bit_set) < 32) {
+ unsigned long focus_bits =
+ create_simple_focus_bits(high_bits, low_bits,
+ lowest_bit_set, 0);
+
+ /* So what we know is that the set bits straddle the
+ * middle of the 64-bit word.
+ */
+ sparc_emit_set_const64_quick2(focus_bits, 0, dest,
+ lowest_bit_set, ctx);
+ return;
+ }
+
+ /* 1) sethi %hi(high_bits), %reg
+ * or %reg, %lo(high_bits), %reg
+ * sllx %reg, 32, %reg
+ * or %reg, low_bits, %reg
+ */
+ if (is_simm13(low_bits) && ((int)low_bits > 0)) {
+ sparc_emit_set_const64_quick2(high_bits, low_bits,
+ dest, 32, ctx);
+ return;
+ }
+ /* Oh well, we tried... Do a full 64-bit decomposition. */
ctx->tmp_1_used = true;
- emit_set_const(high_part, tmp, ctx);
- emit_set_const(low_part, dest, ctx);
+ emit_loadimm32(high_bits, tmp, ctx);
+ emit_loadimm32(low_bits, dest, ctx);
emit_alu_K(SLLX, tmp, 32, ctx);
emit(OR | RS1(dest) | RS2(tmp) | RD(dest), ctx);
}