Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/checkpoint-restore/criu.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurent Dufour <ldufour@linux.vnet.ibm.com>2015-05-13 19:45:09 +0300
committerPavel Emelyanov <xemul@parallels.com>2015-05-14 12:33:54 +0300
commit16ad19410e408d9b10fd5a93d93e07228c4bd50f (patch)
tree1f1f0b62dc850de0f0f090e26fa0e6511fc95c72
parent892a96c4f3830ceb4921bd9fb0455dfb692ddc80 (diff)
ppc64: Use optimized memcpy
Instead of belonging to the common C memcpy function, belong on the optimized one stolen from the kernel. Cc: Anton Blanchard <anton@au.ibm.com> Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com> Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
-rw-r--r--arch/ppc64/Makefile1
-rw-r--r--arch/ppc64/include/asm/string.h13
-rw-r--r--arch/ppc64/memcpy_power7.S208
-rw-r--r--pie/Makefile1
4 files changed, 220 insertions, 3 deletions
diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile
index c5d332364..bd90a14e7 100644
--- a/arch/ppc64/Makefile
+++ b/arch/ppc64/Makefile
@@ -6,6 +6,7 @@ SYS-ASM := syscalls.S
syscalls-asm-y += $(SYS-ASM:.S=).o
crtools-obj-y += crtools.o
crtools-obj-y += cpu.o
+crtools-asm-y += memcpy_power7.o
SYS-DEF := syscall-ppc64.def
SYS-ASM-COMMON := syscall-common-ppc64.S
diff --git a/arch/ppc64/include/asm/string.h b/arch/ppc64/include/asm/string.h
index 034442781..38e968cd6 100644
--- a/arch/ppc64/include/asm/string.h
+++ b/arch/ppc64/include/asm/string.h
@@ -3,9 +3,16 @@
#include "compiler.h"
-/*
- * TODO : We may optimized some code here instead of using the generic ones.
- */
+#define HAS_BUILTIN_MEMCPY
+
#include "asm-generic/string.h"
+extern void memcpy_power7(void *to, const void *from, unsigned long n);
+static inline void *builtin_memcpy(void *to, const void *from, unsigned long n)
+{
+ if (n)
+ memcpy_power7(to, from, n);
+ return to;
+}
+
#endif /* __CR_ASM_STRING_H__ */
diff --git a/arch/ppc64/memcpy_power7.S b/arch/ppc64/memcpy_power7.S
new file mode 100644
index 000000000..bd2833bc5
--- /dev/null
+++ b/arch/ppc64/memcpy_power7.S
@@ -0,0 +1,208 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ *
+ * --
+ * Copied from the kernel file arch/powerpc/lib/memcpy_power7.S
+ * Altivec support has been removed so we don't taint restored process.
+ */
+#include "asm/linkage.h"
+
+ENTRY(memcpy_power7)
+ cmpldi r5,16
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ blt .Lshort_copy
+
+.Lnonvmx_copy:
+ /* Get the source 8B aligned */
+ neg r6,r4
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ bf cr7*4+3,1f
+ lbz r0,0(r4)
+ addi r4,r4,1
+ stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+3: sub r5,r5,r6
+ cmpldi r5,128
+ blt 5f
+
+ mflr r0
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+ std r17,STK_REG(R17)(r1)
+ std r18,STK_REG(R18)(r1)
+ std r19,STK_REG(R19)(r1)
+ std r20,STK_REG(R20)(r1)
+ std r21,STK_REG(R21)(r1)
+ std r22,STK_REG(R22)(r1)
+ std r0,STACKFRAMESIZE+16(r1)
+
+ srdi r6,r5,7
+ mtctr r6
+
+ /* Now do cacheline (128B) sized loads and stores. */
+ .align 5
+4:
+ ld r0,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ld r8,24(r4)
+ ld r9,32(r4)
+ ld r10,40(r4)
+ ld r11,48(r4)
+ ld r12,56(r4)
+ ld r14,64(r4)
+ ld r15,72(r4)
+ ld r16,80(r4)
+ ld r17,88(r4)
+ ld r18,96(r4)
+ ld r19,104(r4)
+ ld r20,112(r4)
+ ld r21,120(r4)
+ addi r4,r4,128
+ std r0,0(r3)
+ std r6,8(r3)
+ std r7,16(r3)
+ std r8,24(r3)
+ std r9,32(r3)
+ std r10,40(r3)
+ std r11,48(r3)
+ std r12,56(r3)
+ std r14,64(r3)
+ std r15,72(r3)
+ std r16,80(r3)
+ std r17,88(r3)
+ std r18,96(r3)
+ std r19,104(r3)
+ std r20,112(r3)
+ std r21,120(r3)
+ addi r3,r3,128
+ bdnz 4b
+
+ clrldi r5,r5,(64-7)
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r22,STK_REG(R22)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ /* Up to 127B to go */
+5: srdi r6,r5,4
+ mtocrf 0x01,r6
+
+6: bf cr7*4+1,7f
+ ld r0,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ld r8,24(r4)
+ ld r9,32(r4)
+ ld r10,40(r4)
+ ld r11,48(r4)
+ ld r12,56(r4)
+ addi r4,r4,64
+ std r0,0(r3)
+ std r6,8(r3)
+ std r7,16(r3)
+ std r8,24(r3)
+ std r9,32(r3)
+ std r10,40(r3)
+ std r11,48(r3)
+ std r12,56(r3)
+ addi r3,r3,64
+
+ /* Up to 63B to go */
+7: bf cr7*4+2,8f
+ ld r0,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ld r8,24(r4)
+ addi r4,r4,32
+ std r0,0(r3)
+ std r6,8(r3)
+ std r7,16(r3)
+ std r8,24(r3)
+ addi r3,r3,32
+
+ /* Up to 31B to go */
+8: bf cr7*4+3,9f
+ ld r0,0(r4)
+ ld r6,8(r4)
+ addi r4,r4,16
+ std r0,0(r3)
+ std r6,8(r3)
+ addi r3,r3,16
+
+9: clrldi r5,r5,(64-4)
+
+ /* Up to 15B to go */
+.Lshort_copy:
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+ lwz r0,0(r4) /* Less chance of a reject with word ops */
+ lwz r6,4(r4)
+ addi r4,r4,8
+ stw r0,0(r3)
+ stw r6,4(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+ lbz r0,0(r4)
+ stb r0,0(r3)
+
+15: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ blr
+
+.Lunwind_stack_nonvmx_copy:
+ addi r1,r1,STACKFRAMESIZE
+ b .Lnonvmx_copy
+
diff --git a/pie/Makefile b/pie/Makefile
index 11620d7dc..c67d8dc6c 100644
--- a/pie/Makefile
+++ b/pie/Makefile
@@ -12,6 +12,7 @@ asm-e += $(ARCH_DIR)/intraprocedure.o
endif
ifeq ($(SRCARCH), ppc64)
asm-e += $(ARCH_DIR)/vdso-trampoline.o
+asm-e += $(ARCH_DIR)/memcpy_power7.o
endif
endif