summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoakim Tjernlund <Joakim.Tjernlund@transmode.se>2010-01-08 14:42:40 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2010-01-11 09:34:04 -0800
commitac4c2a3bbe5db5fc570b1d0ee1e474db7cb22585 (patch)
treea7b8f2d618497cd4152ebe8e7390107a442bf0f6
parent129182e5626972ac0df85d43a36dd46ad61c64e1 (diff)
downloadop-kernel-dev-ac4c2a3bbe5db5fc570b1d0ee1e474db7cb22585.zip
op-kernel-dev-ac4c2a3bbe5db5fc570b1d0ee1e474db7cb22585.tar.gz
zlib: optimize inffast when copying direct from output
JFFS2 uses lesser compression ratio and inflate always ends up in "copy direct from output" case. This patch tries to optimize the direct copy procedure. Uses get_unaligned() but only in one place. The copy loop just above this one can also use this optimization, but I havn't done so as I have not tested if it is a win there too. On my MPC8321 this is about 17% faster on my JFFS2 root FS than the original. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se> Cc: Roel Kluin <roel.kluin@gmail.com> Cc: Richard Purdie <rpurdie@rpsys.net> Cc: David Woodhouse <dwmw2@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/powerpc/boot/Makefile4
-rw-r--r--lib/zlib_inflate/inffast.c55
2 files changed, 47 insertions, 12 deletions
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index bb2465b..826a30a 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -20,7 +20,7 @@
all: $(obj)/zImage
BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
- -fno-strict-aliasing -Os -msoft-float -pipe \
+ -fno-strict-aliasing -Os -msoft-float -pipe -D__KERNEL__\
-fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
-isystem $(shell $(CROSS32CC) -print-file-name=include)
BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
@@ -34,6 +34,8 @@ BOOTCFLAGS += -fno-stack-protector
endif
BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj)
+BOOTCFLAGS += -include include/linux/autoconf.h -Iarch/powerpc/include
+BOOTCFLAGS += -Iinclude
DTS_FLAGS ?= -p 1024
diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c
index 8550b0c..05e1559 100644
--- a/lib/zlib_inflate/inffast.c
+++ b/lib/zlib_inflate/inffast.c
@@ -4,6 +4,8 @@
*/
#include <linux/zutil.h>
+#include <asm/unaligned.h>
+#include <asm/byteorder.h>
#include "inftrees.h"
#include "inflate.h"
#include "inffast.h"
@@ -24,9 +26,11 @@
#ifdef POSTINC
# define OFF 0
# define PUP(a) *(a)++
+# define UP_UNALIGNED(a) get_unaligned((a)++)
#else
# define OFF 1
# define PUP(a) *++(a)
+# define UP_UNALIGNED(a) get_unaligned(++(a))
#endif
/*
@@ -239,18 +243,47 @@ void inflate_fast(z_streamp strm, unsigned start)
}
}
else {
+ unsigned short *sout;
+ unsigned long loops;
+
from = out - dist; /* copy direct from output */
- do { /* minimum length is three */
- PUP(out) = PUP(from);
- PUP(out) = PUP(from);
- PUP(out) = PUP(from);
- len -= 3;
- } while (len > 2);
- if (len) {
- PUP(out) = PUP(from);
- if (len > 1)
- PUP(out) = PUP(from);
- }
+ /* minimum length is three */
+ /* Align out addr */
+ if (!((long)(out - 1 + OFF) & 1)) {
+ PUP(out) = PUP(from);
+ len--;
+ }
+ sout = (unsigned short *)(out - OFF);
+ if (dist > 2) {
+ unsigned short *sfrom;
+
+ sfrom = (unsigned short *)(from - OFF);
+ loops = len >> 1;
+ do
+ PUP(sout) = UP_UNALIGNED(sfrom);
+ while (--loops);
+ out = (unsigned char *)sout + OFF;
+ from = (unsigned char *)sfrom + OFF;
+ } else { /* dist == 1 or dist == 2 */
+ unsigned short pat16;
+
+ pat16 = *(sout-2+2*OFF);
+ if (dist == 1)
+#if defined(__BIG_ENDIAN)
+ pat16 = (pat16 & 0xff) | ((pat16 & 0xff) << 8);
+#elif defined(__LITTLE_ENDIAN)
+ pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00) >> 8);
+#else
+#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined
+#endif
+ loops = len >> 1;
+ do
+ PUP(sout) = pat16;
+ while (--loops);
+ out = (unsigned char *)sout + OFF;
+ }
+ if (len & 1)
+ PUP(out) = PUP(from);
}
}
else if ((op & 64) == 0) { /* 2nd level distance code */
OpenPOWER on IntegriCloud