diff options
author | Atsushi Nemoto <anemo@mba.ocn.ne.jp> | 2006-12-08 01:04:51 +0900 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2006-12-09 01:03:59 +0000 |
commit | ed99e2bc1dc5dc54eb5a019f4975562dbef20103 (patch) | |
tree | c8ff52ab4a29fe842e34fd94d01e74082486391d /arch/mips/lib/csum_partial.S | |
parent | 773ff78838ca3c07245e45c06235e0baaa5f710a (diff) | |
download | lwn-ed99e2bc1dc5dc54eb5a019f4975562dbef20103.tar.gz lwn-ed99e2bc1dc5dc54eb5a019f4975562dbef20103.zip |
[MIPS] Optimize csum_partial for 64bit kernel
Make csum_partial 64-bit powered.
Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib/csum_partial.S')
-rw-r--r-- | arch/mips/lib/csum_partial.S | 76 |
1 files changed, 54 insertions, 22 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index b04475d76f3c..9db357294be1 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -29,30 +29,49 @@ #define t5 $13 #define t6 $14 #define t7 $15 + +#define USE_DOUBLE #endif +#ifdef USE_DOUBLE + +#define LOAD ld +#define ADD daddu +#define NBYTES 8 + +#else + +#define LOAD lw +#define ADD addu +#define NBYTES 4 + +#endif /* USE_DOUBLE */ + +#define UNIT(unit) ((unit)*NBYTES) + #define ADDC(sum,reg) \ - addu sum, reg; \ + ADD sum, reg; \ sltu v1, sum, reg; \ - addu sum, v1 + ADD sum, v1 -#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ - lw _t0, (offset + 0x00)(src); \ - lw _t1, (offset + 0x04)(src); \ - lw _t2, (offset + 0x08)(src); \ - lw _t3, (offset + 0x0c)(src); \ - ADDC(sum, _t0); \ - ADDC(sum, _t1); \ - ADDC(sum, _t2); \ - ADDC(sum, _t3); \ - lw _t0, (offset + 0x10)(src); \ - lw _t1, (offset + 0x14)(src); \ - lw _t2, (offset + 0x18)(src); \ - lw _t3, (offset + 0x1c)(src); \ +#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ + LOAD _t0, (offset + UNIT(0))(src); \ + LOAD _t1, (offset + UNIT(1))(src); \ + LOAD _t2, (offset + UNIT(2))(src); \ + LOAD _t3, (offset + UNIT(3))(src); \ ADDC(sum, _t0); \ ADDC(sum, _t1); \ ADDC(sum, _t2); \ - ADDC(sum, _t3); \ + ADDC(sum, _t3) + +#ifdef USE_DOUBLE +#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ + CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) +#else +#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ + CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \ + CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3) +#endif /* * a0: source address @@ -117,11 +136,17 @@ qword_align: beqz t8, oword_align andi t8, src, 0x10 +#ifdef USE_DOUBLE + ld t0, 0x00(src) + LONG_SUBU a1, a1, 0x8 + ADDC(sum, t0) +#else lw t0, 0x00(src) lw t1, 0x04(src) LONG_SUBU a1, a1, 0x8 ADDC(sum, t0) ADDC(sum, t1) +#endif PTR_ADDU src, src, 0x8 andi t8, src, 0x10 @@ -129,14 +154,14 @@ oword_align: beqz t8, begin_movement LONG_SRL t8, a1, 0x7 - lw t3, 0x08(src) - lw t4, 0x0c(src) - lw t0, 0x00(src) - lw t1, 0x04(src) - ADDC(sum, t3) - ADDC(sum, t4) +#ifdef USE_DOUBLE + ld t0, 0x00(src) + ld t1, 0x08(src) ADDC(sum, t0) ADDC(sum, t1) +#else + CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4) +#endif LONG_SUBU a1, a1, 0x10 PTR_ADDU src, src, 0x10 LONG_SRL t8, a1, 0x7 @@ -219,6 +244,13 @@ small_csumcpy: 1: ADDC(sum, t1) /* fold checksum */ +#ifdef USE_DOUBLE + dsll32 v1, sum, 0 + daddu sum, v1 + sltu v1, sum, v1 + dsra32 sum, sum, 0 + addu sum, v1 +#endif sll v1, sum, 16 addu sum, v1 sltu v1, sum, v1 |