From f5baaf48e3e82b1caf9f5cd1207d4d6feba3a2e5 Mon Sep 17 00:00:00 2001
From: Thomas Bertschinger <tahbertschinger@gmail.com>
Date: Mon, 15 Jan 2024 23:41:02 -0700
Subject: move Rust sources to top level, C sources into c_src

This moves the Rust sources out of rust_src/ and into the top level.
Running the bcachefs executable out of the development tree is now:

$ ./target/release/bcachefs command
or
$ cargo run --profile release -- command

instead of "./bcachefs command".

Building and installing is still:

$ make && make install

Signed-off-by: Thomas Bertschinger <tahbertschinger@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 c_src/raid/x86z.c | 255 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 255 insertions(+)
 create mode 100644 c_src/raid/x86z.c

(limited to 'c_src/raid/x86z.c')

diff --git a/c_src/raid/x86z.c b/c_src/raid/x86z.c
new file mode 100644
index 00000000..1e3fe89a
--- /dev/null
+++ b/c_src/raid/x86z.c
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSE2)
+static const struct gfzconst16 {
+	uint8_t poly[16];
+	uint8_t half[16];
+	uint8_t low7[16];
+} gfzconst16 __aligned(64) =
+{
+	{
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d
+	},
+	{
+		0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e,
+		0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e
+	},
+	{
+		0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+		0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f
+	}
+};
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSE2)
+/*
+ * GENz (triple parity with powers of 2^-1) SSE2 implementation
+ */
+void raid_genz_sse2(int nd, size_t size, void **vv)
+{
+	uint8_t **v = (uint8_t**)vv;
+	uint8_t *p;
+	uint8_t *q;
+	uint8_t *r;
+	int d, l;
+	size_t i;
+
+	l = nd - 1;
+	p = v[nd];
+	q = v[nd + 1];
+	r = v[nd + 2];
+
+	raid_sse_begin();
+
+	asm volatile ("movdqa %0,%%xmm7" : : "m" (gfzconst16.poly[0]));
+	asm volatile ("movdqa %0,%%xmm3" : : "m" (gfzconst16.half[0]));
+	asm volatile ("movdqa %0,%%xmm6" : : "m" (gfzconst16.low7[0]));
+
+	for (i = 0; i < size; i += 16) {
+		asm volatile ("movdqa %0,%%xmm0" : : "m" (v[l][i]));
+		asm volatile ("movdqa %xmm0,%xmm1");
+		asm volatile ("movdqa %xmm0,%xmm2");
+		for (d = l - 1; d >= 0; --d) {
+			asm volatile ("pxor %xmm4,%xmm4");
+			asm volatile ("pcmpgtb %xmm1,%xmm4");
+			asm volatile ("paddb %xmm1,%xmm1");
+			asm volatile ("pand %xmm7,%xmm4");
+			asm volatile ("pxor %xmm4,%xmm1");
+
+			asm volatile ("movdqa %xmm2,%xmm4");
+			asm volatile ("pxor %xmm5,%xmm5");
+			asm volatile ("psllw $7,%xmm4");
+			asm volatile ("psrlw $1,%xmm2");
+			asm volatile ("pcmpgtb %xmm4,%xmm5");
+			asm volatile ("pand %xmm6,%xmm2");
+			asm volatile ("pand %xmm3,%xmm5");
+			asm volatile ("pxor %xmm5,%xmm2");
+
+			asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+			asm volatile ("pxor %xmm4,%xmm0");
+			asm volatile ("pxor %xmm4,%xmm1");
+			asm volatile ("pxor %xmm4,%xmm2");
+		}
+		asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+		asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+		asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+	}
+
+	raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SSE2)
+/*
+ * GENz (triple parity with powers of 2^-1) SSE2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_genz_sse2ext(int nd, size_t size, void **vv)
+{
+	uint8_t **v = (uint8_t**)vv;
+	uint8_t *p;
+	uint8_t *q;
+	uint8_t *r;
+	int d, l;
+	size_t i;
+
+	l = nd - 1;
+	p = v[nd];
+	q = v[nd + 1];
+	r = v[nd + 2];
+
+	raid_sse_begin();
+
+	asm volatile ("movdqa %0,%%xmm7" : : "m" (gfzconst16.poly[0]));
+	asm volatile ("movdqa %0,%%xmm3" : : "m" (gfzconst16.half[0]));
+	asm volatile ("movdqa %0,%%xmm11" : : "m" (gfzconst16.low7[0]));
+
+	for (i = 0; i < size; i += 32) {
+		asm volatile ("movdqa %0,%%xmm0" : : "m" (v[l][i]));
+		asm volatile ("movdqa %0,%%xmm8" : : "m" (v[l][i + 16]));
+		asm volatile ("movdqa %xmm0,%xmm1");
+		asm volatile ("movdqa %xmm8,%xmm9");
+		asm volatile ("movdqa %xmm0,%xmm2");
+		asm volatile ("movdqa %xmm8,%xmm10");
+		for (d = l - 1; d >= 0; --d) {
+			asm volatile ("movdqa %xmm2,%xmm6");
+			asm volatile ("movdqa %xmm10,%xmm14");
+			asm volatile ("pxor %xmm4,%xmm4");
+			asm volatile ("pxor %xmm12,%xmm12");
+			asm volatile ("pxor %xmm5,%xmm5");
+			asm volatile ("pxor %xmm13,%xmm13");
+			asm volatile ("psllw $7,%xmm6");
+			asm volatile ("psllw $7,%xmm14");
+			asm volatile ("psrlw $1,%xmm2");
+			asm volatile ("psrlw $1,%xmm10");
+			asm volatile ("pcmpgtb %xmm1,%xmm4");
+			asm volatile ("pcmpgtb %xmm9,%xmm12");
+			asm volatile ("pcmpgtb %xmm6,%xmm5");
+			asm volatile ("pcmpgtb %xmm14,%xmm13");
+			asm volatile ("paddb %xmm1,%xmm1");
+			asm volatile ("paddb %xmm9,%xmm9");
+			asm volatile ("pand %xmm11,%xmm2");
+			asm volatile ("pand %xmm11,%xmm10");
+			asm volatile ("pand %xmm7,%xmm4");
+			asm volatile ("pand %xmm7,%xmm12");
+			asm volatile ("pand %xmm3,%xmm5");
+			asm volatile ("pand %xmm3,%xmm13");
+			asm volatile ("pxor %xmm4,%xmm1");
+			asm volatile ("pxor %xmm12,%xmm9");
+			asm volatile ("pxor %xmm5,%xmm2");
+			asm volatile ("pxor %xmm13,%xmm10");
+
+			asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+			asm volatile ("movdqa %0,%%xmm12" : : "m" (v[d][i + 16]));
+			asm volatile ("pxor %xmm4,%xmm0");
+			asm volatile ("pxor %xmm4,%xmm1");
+			asm volatile ("pxor %xmm4,%xmm2");
+			asm volatile ("pxor %xmm12,%xmm8");
+			asm volatile ("pxor %xmm12,%xmm9");
+			asm volatile ("pxor %xmm12,%xmm10");
+		}
+		asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+		asm volatile ("movntdq %%xmm8,%0" : "=m" (p[i + 16]));
+		asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+		asm volatile ("movntdq %%xmm9,%0" : "=m" (q[i + 16]));
+		asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+		asm volatile ("movntdq %%xmm10,%0" : "=m" (r[i + 16]));
+	}
+
+	raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_AVX2)
+/*
+ * GENz (triple parity with powers of 2^-1) AVX2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_genz_avx2ext(int nd, size_t size, void **vv)
+{
+	uint8_t **v = (uint8_t**)vv;
+	uint8_t *p;
+	uint8_t *q;
+	uint8_t *r;
+	int d, l;
+	size_t i;
+
+	l = nd - 1;
+	p = v[nd];
+	q = v[nd + 1];
+	r = v[nd + 2];
+
+	raid_avx_begin();
+
+	asm volatile ("vbroadcasti128 %0,%%ymm7" : : "m" (gfzconst16.poly[0]));
+	asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfzconst16.half[0]));
+	asm volatile ("vbroadcasti128 %0,%%ymm11" : : "m" (gfzconst16.low7[0]));
+	asm volatile ("vpxor %ymm15,%ymm15,%ymm15");
+
+	for (i = 0; i < size; i += 64) {
+		asm volatile ("vmovdqa %0,%%ymm0" : : "m" (v[l][i]));
+		asm volatile ("vmovdqa %0,%%ymm8" : : "m" (v[l][i + 32]));
+		asm volatile ("vmovdqa %ymm0,%ymm1");
+		asm volatile ("vmovdqa %ymm8,%ymm9");
+		asm volatile ("vmovdqa %ymm0,%ymm2");
+		asm volatile ("vmovdqa %ymm8,%ymm10");
+		for (d = l - 1; d >= 0; --d) {
+			asm volatile ("vpsllw $7,%ymm2,%ymm6");
+			asm volatile ("vpsllw $7,%ymm10,%ymm14");
+			asm volatile ("vpsrlw $1,%ymm2,%ymm2");
+			asm volatile ("vpsrlw $1,%ymm10,%ymm10");
+			asm volatile ("vpcmpgtb %ymm1,%ymm15,%ymm4");
+			asm volatile ("vpcmpgtb %ymm9,%ymm15,%ymm12");
+			asm volatile ("vpcmpgtb %ymm6,%ymm15,%ymm5");
+			asm volatile ("vpcmpgtb %ymm14,%ymm15,%ymm13");
+			asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+			asm volatile ("vpaddb %ymm9,%ymm9,%ymm9");
+			asm volatile ("vpand %ymm11,%ymm2,%ymm2");
+			asm volatile ("vpand %ymm11,%ymm10,%ymm10");
+			asm volatile ("vpand %ymm7,%ymm4,%ymm4");
+			asm volatile ("vpand %ymm7,%ymm12,%ymm12");
+			asm volatile ("vpand %ymm3,%ymm5,%ymm5");
+			asm volatile ("vpand %ymm3,%ymm13,%ymm13");
+			asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
+			asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
+			asm volatile ("vpxor %ymm5,%ymm2,%ymm2");
+			asm volatile ("vpxor %ymm13,%ymm10,%ymm10");
+
+			asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[d][i]));
+			asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[d][i + 32]));
+			asm volatile ("vpxor %ymm4,%ymm0,%ymm0");
+			asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
+			asm volatile ("vpxor %ymm4,%ymm2,%ymm2");
+			asm volatile ("vpxor %ymm12,%ymm8,%ymm8");
+			asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
+			asm volatile ("vpxor %ymm12,%ymm10,%ymm10");
+		}
+		asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
+		asm volatile ("vmovntdq %%ymm8,%0" : "=m" (p[i + 32]));
+		asm volatile ("vmovntdq %%ymm1,%0" : "=m" (q[i]));
+		asm volatile ("vmovntdq %%ymm9,%0" : "=m" (q[i + 32]));
+		asm volatile ("vmovntdq %%ymm2,%0" : "=m" (r[i]));
+		asm volatile ("vmovntdq %%ymm10,%0" : "=m" (r[i + 32]));
+	}
+
+	raid_avx_end();
+}
+#endif
+
-- 
cgit v1.2.3