281 lines
7.6 KiB
Diff
281 lines
7.6 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Matteo Croce <technoboy85@gmail.com>
|
|
Date: Wed, 29 Sep 2021 19:22:34 +0200
|
|
Subject: riscv: optimized memset
|
|
|
|
The generic memset is defined as a byte at time write. This is always
|
|
safe, but it's slower than a 4 byte or even 8 byte write.
|
|
|
|
Write a generic memset which fills the data one byte at time until the
|
|
destination is aligned, then fills using the largest size allowed,
|
|
and finally fills the remaining data one byte at time.
|
|
|
|
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
|
|
Signed-off-by: Emil Renner Berthing <kernel@esmil.dk>
|
|
---
|
|
arch/riscv/include/asm/string.h | 8 +-
|
|
arch/riscv/kernel/Makefile | 1 -
|
|
arch/riscv/kernel/riscv_ksyms.c | 13 --
|
|
arch/riscv/lib/Makefile | 1 -
|
|
arch/riscv/lib/memset.S | 113 ----------
|
|
arch/riscv/lib/string.c | 41 ++++
|
|
arch/riscv/purgatory/Makefile | 5 +-
|
|
7 files changed, 44 insertions(+), 138 deletions(-)
|
|
|
|
diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
|
|
index 02120466d5a1..3b79b14a2bf1 100644
|
|
--- a/arch/riscv/include/asm/string.h
|
|
+++ b/arch/riscv/include/asm/string.h
|
|
@@ -6,13 +6,9 @@
|
|
#ifndef _ASM_RISCV_STRING_H
|
|
#define _ASM_RISCV_STRING_H
|
|
|
|
-#include <linux/types.h>
|
|
-#include <linux/linkage.h>
|
|
-
|
|
#define __HAVE_ARCH_MEMSET
|
|
-extern asmlinkage void *memset(void *, int, size_t);
|
|
-extern asmlinkage void *__memset(void *, int, size_t);
|
|
-
|
|
+extern void *memset(void *s, int c, size_t count);
|
|
+extern void *__memset(void *s, int c, size_t count);
|
|
#define __HAVE_ARCH_MEMCPY
|
|
extern void *memcpy(void *dest, const void *src, size_t count);
|
|
extern void *__memcpy(void *dest, const void *src, size_t count);
|
|
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
|
|
index ab333cb792fd..003e1cb46712 100644
|
|
--- a/arch/riscv/kernel/Makefile
|
|
+++ b/arch/riscv/kernel/Makefile
|
|
@@ -47,7 +47,6 @@ obj-y += syscall_table.o
|
|
obj-y += sys_riscv.o
|
|
obj-y += time.o
|
|
obj-y += traps.o
|
|
-obj-y += riscv_ksyms.o
|
|
obj-y += stacktrace.o
|
|
obj-y += cacheinfo.o
|
|
obj-y += patch.o
|
|
diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c
|
|
deleted file mode 100644
|
|
index 361565c4db7e..000000000000
|
|
--- a/arch/riscv/kernel/riscv_ksyms.c
|
|
+++ /dev/null
|
|
@@ -1,13 +0,0 @@
|
|
-// SPDX-License-Identifier: GPL-2.0-only
|
|
-/*
|
|
- * Copyright (C) 2017 Zihao Yu
|
|
- */
|
|
-
|
|
-#include <linux/export.h>
|
|
-#include <linux/uaccess.h>
|
|
-
|
|
-/*
|
|
- * Assembly functions that may be used (directly or indirectly) by modules
|
|
- */
|
|
-EXPORT_SYMBOL(memset);
|
|
-EXPORT_SYMBOL(__memset);
|
|
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
|
|
index 9166b61cc2dc..482e28132d77 100644
|
|
--- a/arch/riscv/lib/Makefile
|
|
+++ b/arch/riscv/lib/Makefile
|
|
@@ -1,6 +1,5 @@
|
|
# SPDX-License-Identifier: GPL-2.0-only
|
|
lib-y += delay.o
|
|
-lib-y += memset.o
|
|
lib-$(CONFIG_MMU) += uaccess.o
|
|
lib-$(CONFIG_64BIT) += tishift.o
|
|
lib-y += string.o
|
|
diff --git a/arch/riscv/lib/memset.S b/arch/riscv/lib/memset.S
|
|
deleted file mode 100644
|
|
index 34c5360c6705..000000000000
|
|
--- a/arch/riscv/lib/memset.S
|
|
+++ /dev/null
|
|
@@ -1,113 +0,0 @@
|
|
-/* SPDX-License-Identifier: GPL-2.0-only */
|
|
-/*
|
|
- * Copyright (C) 2013 Regents of the University of California
|
|
- */
|
|
-
|
|
-
|
|
-#include <linux/linkage.h>
|
|
-#include <asm/asm.h>
|
|
-
|
|
-/* void *memset(void *, int, size_t) */
|
|
-ENTRY(__memset)
|
|
-WEAK(memset)
|
|
- move t0, a0 /* Preserve return value */
|
|
-
|
|
- /* Defer to byte-oriented fill for small sizes */
|
|
- sltiu a3, a2, 16
|
|
- bnez a3, 4f
|
|
-
|
|
- /*
|
|
- * Round to nearest XLEN-aligned address
|
|
- * greater than or equal to start address
|
|
- */
|
|
- addi a3, t0, SZREG-1
|
|
- andi a3, a3, ~(SZREG-1)
|
|
- beq a3, t0, 2f /* Skip if already aligned */
|
|
- /* Handle initial misalignment */
|
|
- sub a4, a3, t0
|
|
-1:
|
|
- sb a1, 0(t0)
|
|
- addi t0, t0, 1
|
|
- bltu t0, a3, 1b
|
|
- sub a2, a2, a4 /* Update count */
|
|
-
|
|
-2: /* Duff's device with 32 XLEN stores per iteration */
|
|
- /* Broadcast value into all bytes */
|
|
- andi a1, a1, 0xff
|
|
- slli a3, a1, 8
|
|
- or a1, a3, a1
|
|
- slli a3, a1, 16
|
|
- or a1, a3, a1
|
|
-#ifdef CONFIG_64BIT
|
|
- slli a3, a1, 32
|
|
- or a1, a3, a1
|
|
-#endif
|
|
-
|
|
- /* Calculate end address */
|
|
- andi a4, a2, ~(SZREG-1)
|
|
- add a3, t0, a4
|
|
-
|
|
- andi a4, a4, 31*SZREG /* Calculate remainder */
|
|
- beqz a4, 3f /* Shortcut if no remainder */
|
|
- neg a4, a4
|
|
- addi a4, a4, 32*SZREG /* Calculate initial offset */
|
|
-
|
|
- /* Adjust start address with offset */
|
|
- sub t0, t0, a4
|
|
-
|
|
- /* Jump into loop body */
|
|
- /* Assumes 32-bit instruction lengths */
|
|
- la a5, 3f
|
|
-#ifdef CONFIG_64BIT
|
|
- srli a4, a4, 1
|
|
-#endif
|
|
- add a5, a5, a4
|
|
- jr a5
|
|
-3:
|
|
- REG_S a1, 0(t0)
|
|
- REG_S a1, SZREG(t0)
|
|
- REG_S a1, 2*SZREG(t0)
|
|
- REG_S a1, 3*SZREG(t0)
|
|
- REG_S a1, 4*SZREG(t0)
|
|
- REG_S a1, 5*SZREG(t0)
|
|
- REG_S a1, 6*SZREG(t0)
|
|
- REG_S a1, 7*SZREG(t0)
|
|
- REG_S a1, 8*SZREG(t0)
|
|
- REG_S a1, 9*SZREG(t0)
|
|
- REG_S a1, 10*SZREG(t0)
|
|
- REG_S a1, 11*SZREG(t0)
|
|
- REG_S a1, 12*SZREG(t0)
|
|
- REG_S a1, 13*SZREG(t0)
|
|
- REG_S a1, 14*SZREG(t0)
|
|
- REG_S a1, 15*SZREG(t0)
|
|
- REG_S a1, 16*SZREG(t0)
|
|
- REG_S a1, 17*SZREG(t0)
|
|
- REG_S a1, 18*SZREG(t0)
|
|
- REG_S a1, 19*SZREG(t0)
|
|
- REG_S a1, 20*SZREG(t0)
|
|
- REG_S a1, 21*SZREG(t0)
|
|
- REG_S a1, 22*SZREG(t0)
|
|
- REG_S a1, 23*SZREG(t0)
|
|
- REG_S a1, 24*SZREG(t0)
|
|
- REG_S a1, 25*SZREG(t0)
|
|
- REG_S a1, 26*SZREG(t0)
|
|
- REG_S a1, 27*SZREG(t0)
|
|
- REG_S a1, 28*SZREG(t0)
|
|
- REG_S a1, 29*SZREG(t0)
|
|
- REG_S a1, 30*SZREG(t0)
|
|
- REG_S a1, 31*SZREG(t0)
|
|
- addi t0, t0, 32*SZREG
|
|
- bltu t0, a3, 3b
|
|
- andi a2, a2, SZREG-1 /* Update count */
|
|
-
|
|
-4:
|
|
- /* Handle trailing misalignment */
|
|
- beqz a2, 6f
|
|
- add a3, t0, a2
|
|
-5:
|
|
- sb a1, 0(t0)
|
|
- addi t0, t0, 1
|
|
- bltu t0, a3, 5b
|
|
-6:
|
|
- ret
|
|
-END(__memset)
|
|
diff --git a/arch/riscv/lib/string.c b/arch/riscv/lib/string.c
|
|
index bd52581e2068..7fc9ec5c26a7 100644
|
|
--- a/arch/riscv/lib/string.c
|
|
+++ b/arch/riscv/lib/string.c
|
|
@@ -112,3 +112,44 @@ EXPORT_SYMBOL(__memmove);
|
|
|
|
void *memmove(void *dest, const void *src, size_t count) __weak __alias(__memmove);
|
|
EXPORT_SYMBOL(memmove);
|
|
+
|
|
+void *__memset(void *s, int c, size_t count)
|
|
+{
|
|
+ union types dest = { .as_u8 = s };
|
|
+
|
|
+ if (count >= MIN_THRESHOLD) {
|
|
+ unsigned long cu = (unsigned long)c;
|
|
+
|
|
+ /* Compose an ulong with 'c' repeated 4/8 times */
|
|
+#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
|
|
+ cu *= 0x0101010101010101UL;
|
|
+#else
|
|
+ cu |= cu << 8;
|
|
+ cu |= cu << 16;
|
|
+ /* Suppress warning on 32 bit machines */
|
|
+ cu |= (cu << 16) << 16;
|
|
+#endif
|
|
+ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
|
|
+ /*
|
|
+ * Fill the buffer one byte at time until
|
|
+ * the destination is word aligned.
|
|
+ */
|
|
+ for (; count && dest.as_uptr & WORD_MASK; count--)
|
|
+ *dest.as_u8++ = c;
|
|
+ }
|
|
+
|
|
+ /* Copy using the largest size allowed */
|
|
+ for (; count >= BYTES_LONG; count -= BYTES_LONG)
|
|
+ *dest.as_ulong++ = cu;
|
|
+ }
|
|
+
|
|
+ /* copy the remainder */
|
|
+ while (count--)
|
|
+ *dest.as_u8++ = c;
|
|
+
|
|
+ return s;
|
|
+}
|
|
+EXPORT_SYMBOL(__memset);
|
|
+
|
|
+void *memset(void *s, int c, size_t count) __weak __alias(__memset);
|
|
+EXPORT_SYMBOL(memset);
|
|
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
|
|
index a7209dc8027d..5b9ea779a2cc 100644
|
|
--- a/arch/riscv/purgatory/Makefile
|
|
+++ b/arch/riscv/purgatory/Makefile
|
|
@@ -1,7 +1,7 @@
|
|
# SPDX-License-Identifier: GPL-2.0
|
|
OBJECT_FILES_NON_STANDARD := y
|
|
|
|
-purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o rvstring.o memset.o
|
|
+purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o rvstring.o
|
|
|
|
targets += $(purgatory-y)
|
|
PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
|
|
@@ -15,9 +15,6 @@ $(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE
|
|
$(obj)/rvstring.o: $(srctree)/arch/riscv/lib/string.c FORCE
|
|
$(call if_changed_rule,cc_o_c)
|
|
|
|
-$(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE
|
|
- $(call if_changed_rule,as_o_S)
|
|
-
|
|
$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
|
|
$(call if_changed_rule,cc_o_c)
|
|
|
|
--
|
|
Armbian
|
|
|