summaryrefslogtreecommitdiff
path: root/lib/lzo/lzo1x_decompress_safe.c
diff options
context:
space:
mode:
authorMarkus F.X.J. Oberhumer <markus@oberhumer.com>2012-08-13 17:25:44 +0200
committerMarkus F.X.J. Oberhumer <markus@oberhumer.com>2013-02-20 19:36:01 +0100
commit8b975bd3f9089f8ee5d7bbfd798537b992bbc7e7 (patch)
tree09078bdf5e805a5f921d8b7b592262ef3e877937 /lib/lzo/lzo1x_decompress_safe.c
parentb6bec26cea948148a9420e7a0ac337f925de49e7 (diff)
downloadlwn-8b975bd3f9089f8ee5d7bbfd798537b992bbc7e7.tar.gz
lwn-8b975bd3f9089f8ee5d7bbfd798537b992bbc7e7.zip
lib/lzo: Update LZO compression to current upstream version
This commit updates the kernel LZO code to the current upsteam version which features a significant speed improvement - benchmarking the Calgary and Silesia test corpora typically shows a doubled performance in both compression and decompression on modern i386/x86_64/powerpc machines. Signed-off-by: Markus F.X.J. Oberhumer <markus@oberhumer.com>
Diffstat (limited to 'lib/lzo/lzo1x_decompress_safe.c')
-rw-r--r--lib/lzo/lzo1x_decompress_safe.c350
1 files changed, 166 insertions, 184 deletions
diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index f2fd09850223..569985d522d5 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -1,12 +1,12 @@
/*
- * LZO1X Decompressor from MiniLZO
+ * LZO1X Decompressor from LZO
*
- * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
+ * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
*
* The full LZO package can be found at:
* http://www.oberhumer.com/opensource/lzo/
*
- * Changed for kernel use by:
+ * Changed for Linux kernel use by:
* Nitin Gupta <nitingupta910@gmail.com>
* Richard Purdie <rpurdie@openedhand.com>
*/
@@ -15,225 +15,207 @@
#include <linux/module.h>
#include <linux/kernel.h>
#endif
-
#include <asm/unaligned.h>
#include <linux/lzo.h>
#include "lzodefs.h"
-#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x))
-#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x))
-#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op)
-
-#define COPY4(dst, src) \
- put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
+#define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x))
+#define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x))
+#define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun
+#define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun
+#define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun
int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
- unsigned char *out, size_t *out_len)
+ unsigned char *out, size_t *out_len)
{
+ unsigned char *op;
+ const unsigned char *ip;
+ size_t t, next;
+ size_t state = 0;
+ const unsigned char *m_pos;
const unsigned char * const ip_end = in + in_len;
unsigned char * const op_end = out + *out_len;
- const unsigned char *ip = in, *m_pos;
- unsigned char *op = out;
- size_t t;
- *out_len = 0;
+ op = out;
+ ip = in;
+ if (unlikely(in_len < 3))
+ goto input_overrun;
if (*ip > 17) {
t = *ip++ - 17;
- if (t < 4)
+ if (t < 4) {
+ next = t;
goto match_next;
- if (HAVE_OP(t, op_end, op))
- goto output_overrun;
- if (HAVE_IP(t + 1, ip_end, ip))
- goto input_overrun;
- do {
- *op++ = *ip++;
- } while (--t > 0);
- goto first_literal_run;
- }
-
- while ((ip < ip_end)) {
- t = *ip++;
- if (t >= 16)
- goto match;
- if (t == 0) {
- if (HAVE_IP(1, ip_end, ip))
- goto input_overrun;
- while (*ip == 0) {
- t += 255;
- ip++;
- if (HAVE_IP(1, ip_end, ip))
- goto input_overrun;
- }
- t += 15 + *ip++;
- }
- if (HAVE_OP(t + 3, op_end, op))
- goto output_overrun;
- if (HAVE_IP(t + 4, ip_end, ip))
- goto input_overrun;
-
- COPY4(op, ip);
- op += 4;
- ip += 4;
- if (--t > 0) {
- if (t >= 4) {
- do {
- COPY4(op, ip);
- op += 4;
- ip += 4;
- t -= 4;
- } while (t >= 4);
- if (t > 0) {
- do {
- *op++ = *ip++;
- } while (--t > 0);
- }
- } else {
- do {
- *op++ = *ip++;
- } while (--t > 0);
- }
}
+ goto copy_literal_run;
+ }
-first_literal_run:
+ for (;;) {
t = *ip++;
- if (t >= 16)
- goto match;
- m_pos = op - (1 + M2_MAX_OFFSET);
- m_pos -= t >> 2;
- m_pos -= *ip++ << 2;
-
- if (HAVE_LB(m_pos, out, op))
- goto lookbehind_overrun;
-
- if (HAVE_OP(3, op_end, op))
- goto output_overrun;
- *op++ = *m_pos++;
- *op++ = *m_pos++;
- *op++ = *m_pos;
-
- goto match_done;
-
- do {
-match:
- if (t >= 64) {
- m_pos = op - 1;
- m_pos -= (t >> 2) & 7;
- m_pos -= *ip++ << 3;
- t = (t >> 5) - 1;
- if (HAVE_LB(m_pos, out, op))
- goto lookbehind_overrun;
- if (HAVE_OP(t + 3 - 1, op_end, op))
- goto output_overrun;
- goto copy_match;
- } else if (t >= 32) {
- t &= 31;
- if (t == 0) {
- if (HAVE_IP(1, ip_end, ip))
- goto input_overrun;
- while (*ip == 0) {
+ if (t < 16) {
+ if (likely(state == 0)) {
+ if (unlikely(t == 0)) {
+ while (unlikely(*ip == 0)) {
t += 255;
ip++;
- if (HAVE_IP(1, ip_end, ip))
- goto input_overrun;
+ NEED_IP(1);
}
- t += 31 + *ip++;
+ t += 15 + *ip++;
}
- m_pos = op - 1;
- m_pos -= get_unaligned_le16(ip) >> 2;
- ip += 2;
- } else if (t >= 16) {
- m_pos = op;
- m_pos -= (t & 8) << 11;
-
- t &= 7;
- if (t == 0) {
- if (HAVE_IP(1, ip_end, ip))
- goto input_overrun;
- while (*ip == 0) {
- t += 255;
- ip++;
- if (HAVE_IP(1, ip_end, ip))
- goto input_overrun;
- }
- t += 7 + *ip++;
+ t += 3;
+copy_literal_run:
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) {
+ const unsigned char *ie = ip + t;
+ unsigned char *oe = op + t;
+ do {
+ COPY8(op, ip);
+ op += 8;
+ ip += 8;
+ COPY8(op, ip);
+ op += 8;
+ ip += 8;
+ } while (ip < ie);
+ ip = ie;
+ op = oe;
+ } else
+#endif
+ {
+ NEED_OP(t);
+ NEED_IP(t + 3);
+ do {
+ *op++ = *ip++;
+ } while (--t > 0);
}
- m_pos -= get_unaligned_le16(ip) >> 2;
- ip += 2;
- if (m_pos == op)
- goto eof_found;
- m_pos -= 0x4000;
- } else {
+ state = 4;
+ continue;
+ } else if (state != 4) {
+ next = t & 3;
m_pos = op - 1;
m_pos -= t >> 2;
m_pos -= *ip++ << 2;
-
- if (HAVE_LB(m_pos, out, op))
- goto lookbehind_overrun;
- if (HAVE_OP(2, op_end, op))
- goto output_overrun;
-
- *op++ = *m_pos++;
- *op++ = *m_pos;
- goto match_done;
+ TEST_LB(m_pos);
+ NEED_OP(2);
+ op[0] = m_pos[0];
+ op[1] = m_pos[1];
+ op += 2;
+ goto match_next;
+ } else {
+ next = t & 3;
+ m_pos = op - (1 + M2_MAX_OFFSET);
+ m_pos -= t >> 2;
+ m_pos -= *ip++ << 2;
+ t = 3;
}
-
- if (HAVE_LB(m_pos, out, op))
- goto lookbehind_overrun;
- if (HAVE_OP(t + 3 - 1, op_end, op))
- goto output_overrun;
-
- if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) {
- COPY4(op, m_pos);
- op += 4;
- m_pos += 4;
- t -= 4 - (3 - 1);
+ } else if (t >= 64) {
+ next = t & 3;
+ m_pos = op - 1;
+ m_pos -= (t >> 2) & 7;
+ m_pos -= *ip++ << 3;
+ t = (t >> 5) - 1 + (3 - 1);
+ } else if (t >= 32) {
+ t = (t & 31) + (3 - 1);
+ if (unlikely(t == 2)) {
+ while (unlikely(*ip == 0)) {
+ t += 255;
+ ip++;
+ NEED_IP(1);
+ }
+ t += 31 + *ip++;
+ NEED_IP(2);
+ }
+ m_pos = op - 1;
+ next = get_unaligned_le16(ip);
+ ip += 2;
+ m_pos -= next >> 2;
+ next &= 3;
+ } else {
+ m_pos = op;
+ m_pos -= (t & 8) << 11;
+ t = (t & 7) + (3 - 1);
+ if (unlikely(t == 2)) {
+ while (unlikely(*ip == 0)) {
+ t += 255;
+ ip++;
+ NEED_IP(1);
+ }
+ t += 7 + *ip++;
+ NEED_IP(2);
+ }
+ next = get_unaligned_le16(ip);
+ ip += 2;
+ m_pos -= next >> 2;
+ next &= 3;
+ if (m_pos == op)
+ goto eof_found;
+ m_pos -= 0x4000;
+ }
+ TEST_LB(m_pos);
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ if (op - m_pos >= 8) {
+ unsigned char *oe = op + t;
+ if (likely(HAVE_OP(t + 15))) {
do {
- COPY4(op, m_pos);
- op += 4;
- m_pos += 4;
- t -= 4;
- } while (t >= 4);
- if (t > 0)
- do {
- *op++ = *m_pos++;
- } while (--t > 0);
+ COPY8(op, m_pos);
+ op += 8;
+ m_pos += 8;
+ COPY8(op, m_pos);
+ op += 8;
+ m_pos += 8;
+ } while (op < oe);
+ op = oe;
+ if (HAVE_IP(6)) {
+ state = next;
+ COPY4(op, ip);
+ op += next;
+ ip += next;
+ continue;
+ }
} else {
-copy_match:
- *op++ = *m_pos++;
- *op++ = *m_pos++;
+ NEED_OP(t);
do {
*op++ = *m_pos++;
- } while (--t > 0);
+ } while (op < oe);
}
-match_done:
- t = ip[-2] & 3;
- if (t == 0)
- break;
+ } else
+#endif
+ {
+ unsigned char *oe = op + t;
+ NEED_OP(t);
+ op[0] = m_pos[0];
+ op[1] = m_pos[1];
+ op += 2;
+ m_pos += 2;
+ do {
+ *op++ = *m_pos++;
+ } while (op < oe);
+ }
match_next:
- if (HAVE_OP(t, op_end, op))
- goto output_overrun;
- if (HAVE_IP(t + 1, ip_end, ip))
- goto input_overrun;
-
- *op++ = *ip++;
- if (t > 1) {
+ state = next;
+ t = next;
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ if (likely(HAVE_IP(6) && HAVE_OP(4))) {
+ COPY4(op, ip);
+ op += t;
+ ip += t;
+ } else
+#endif
+ {
+ NEED_IP(t + 3);
+ NEED_OP(t);
+ while (t > 0) {
*op++ = *ip++;
- if (t > 2)
- *op++ = *ip++;
+ t--;
}
-
- t = *ip++;
- } while (ip < ip_end);
+ }
}
- *out_len = op - out;
- return LZO_E_EOF_NOT_FOUND;
-
eof_found:
*out_len = op - out;
- return (ip == ip_end ? LZO_E_OK :
- (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
+ return (t != 3 ? LZO_E_ERROR :
+ ip == ip_end ? LZO_E_OK :
+ ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN);
+
input_overrun:
*out_len = op - out;
return LZO_E_INPUT_OVERRUN;