9cfc7508b5
This patch replaces all memcpy() calls with LZ4_memcpy() which calls __builtin_memcpy() so the compiler can inline it. LZ4 relies heavily on memcpy() with a constant size being inlined. In x86 and i386 pre-boot environments memcpy() cannot be inlined because memcpy() doesn't get defined as __builtin_memcpy(). An equivalent patch has been applied upstream so that the next import won't lose this change [1]. I've measured the kernel decompression speed using QEMU before and after this patch for the x86_64 and i386 architectures. The speed-up is about 10x as shown below. Code Arch Kernel Size Time Speed v5.8 x86_64 11504832 B 148 ms 79 MB/s patch x86_64 11503872 B 13 ms 885 MB/s v5.8 i386 9621216 B 91 ms 106 MB/s patch i386 9620224 B 10 ms 962 MB/s I also measured the time to decompress the initramfs on x86_64, i386, and arm. All three show the same decompression speed before and after, as expected. [1] https://github.com/lz4/lz4/pull/890 Signed-off-by: Nick Terrell <terrelln@fb.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Cc: Yann Collet <yann.collet.73@gmail.com> Cc: Gao Xiang <gaoxiang25@huawei.com> Cc: Sven Schmidt <4sschmid@informatik.uni-hamburg.de> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: Arvind Sankar <nivedita@alum.mit.edu> Link: http://lkml.kernel.org/r/20200803194022.2966806-1-nickrterrell@gmail.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Change-Id: I3e725b70595227145a8c8b42a6626cb0629fdddf |
||
---|---|---|
.. | ||
842 | ||
crypto | ||
dim | ||
fonts | ||
livepatch | ||
lz4 | ||
lzo | ||
math | ||
mpi | ||
raid6 | ||
reed_solomon | ||
vdso | ||
xz | ||
zlib_deflate | ||
zlib_inflate | ||
zstd | ||
.gitignore | ||
argv_split.c | ||
ashldi3.c | ||
ashrdi3.c | ||
asn1_decoder.c | ||
assoc_array.c | ||
atomic64_test.c | ||
atomic64.c | ||
audit.c | ||
bcd.c | ||
bch.c | ||
bitmap.c | ||
bitrev.c | ||
bsearch.c | ||
btree.c | ||
bucket_locks.c | ||
bug.c | ||
build_OID_registry | ||
bust_spinlocks.c | ||
chacha.c | ||
check_signature.c | ||
checksum.c | ||
clz_ctz.c | ||
clz_tab.c | ||
cmdline.c | ||
cmpdi2.c | ||
compat_audit.c | ||
cpu_rmap.c | ||
cpumask.c | ||
crc4.c | ||
crc7.c | ||
crc8.c | ||
crc16.c | ||
crc32.c | ||
crc32defs.h | ||
crc32test.c | ||
crc64.c | ||
crc-ccitt.c | ||
crc-itu-t.c | ||
crc-t10dif.c | ||
ctype.c | ||
debug_info.c | ||
debug_locks.c | ||
debugobjects.c | ||
dec_and_lock.c | ||
decompress_bunzip2.c | ||
decompress_inflate.c | ||
decompress_unlz4.c | ||
decompress_unlzma.c | ||
decompress_unlzo.c | ||
decompress_unxz.c | ||
decompress.c | ||
devres.c | ||
digsig.c | ||
dump_stack.c | ||
dynamic_debug.c | ||
dynamic_queue_limits.c | ||
earlycpio.c | ||
error-inject.c | ||
errseq.c | ||
extable.c | ||
fault-inject.c | ||
fdt_empty_tree.c | ||
fdt_ro.c | ||
fdt_rw.c | ||
fdt_strerror.c | ||
fdt_sw.c | ||
fdt_wip.c | ||
fdt.c | ||
find_bit_benchmark.c | ||
find_bit.c | ||
flex_proportions.c | ||
gen_crc32table.c | ||
gen_crc64table.c | ||
genalloc.c | ||
generic-radix-tree.c | ||
glob.c | ||
globtest.c | ||
hexdump.c | ||
hweight.c | ||
idr.c | ||
inflate.c | ||
interval_tree_test.c | ||
interval_tree.c | ||
iomap_copy.c | ||
iomap.c | ||
iommu-helper.c | ||
ioremap.c | ||
iov_iter.c | ||
irq_poll.c | ||
irq_regs.c | ||
is_single_threaded.c | ||
kasprintf.c | ||
Kconfig | ||
Kconfig.debug | ||
Kconfig.kasan | ||
Kconfig.kgdb | ||
Kconfig.ubsan | ||
kfifo.c | ||
klist.c | ||
kobject_uevent.c | ||
kobject.c | ||
kstrtox.c | ||
kstrtox.h | ||
libcrc32c.c | ||
list_debug.c | ||
list_sort.c | ||
llist.c | ||
locking-selftest-hardirq.h | ||
locking-selftest-mutex.h | ||
locking-selftest-rlock-hardirq.h | ||
locking-selftest-rlock-softirq.h | ||
locking-selftest-rlock.h | ||
locking-selftest-rsem.h | ||
locking-selftest-rtmutex.h | ||
locking-selftest-softirq.h | ||
locking-selftest-spin-hardirq.h | ||
locking-selftest-spin-softirq.h | ||
locking-selftest-spin.h | ||
locking-selftest-wlock-hardirq.h | ||
locking-selftest-wlock-softirq.h | ||
locking-selftest-wlock.h | ||
locking-selftest-wsem.h | ||
locking-selftest.c | ||
lockref.c | ||
logic_pio.c | ||
lru_cache.c | ||
lshrdi3.c | ||
Makefile | ||
memcat_p.c | ||
memory-notifier-error-inject.c | ||
memweight.c | ||
muldi3.c | ||
net_utils.c | ||
netdev-notifier-error-inject.c | ||
nlattr.c | ||
nmi_backtrace.c | ||
nodemask.c | ||
notifier-error-inject.c | ||
notifier-error-inject.h | ||
objagg.c | ||
of-reconfig-notifier-error-inject.c | ||
oid_registry.c | ||
once.c | ||
packing.c | ||
parman.c | ||
parser.c | ||
pci_iomap.c | ||
percpu_counter.c | ||
percpu_test.c | ||
percpu-refcount.c | ||
plist.c | ||
pm-notifier-error-inject.c | ||
radix-tree.c | ||
random32.c | ||
ratelimit.c | ||
rbtree_test.c | ||
rbtree.c | ||
refcount.c | ||
rhashtable.c | ||
sbitmap.c | ||
scatterlist.c | ||
seq_buf.c | ||
sg_pool.c | ||
sg_split.c | ||
sha1.c | ||
show_mem.c | ||
siphash.c | ||
smp_processor_id.c | ||
sort.c | ||
stackdepot.c | ||
stmp_device.c | ||
string_helpers.c | ||
string.c | ||
strncpy_from_user.c | ||
strnlen_user.c | ||
syscall.c | ||
test_bitfield.c | ||
test_bitmap.c | ||
test_blackhole_dev.c | ||
test_bpf.c | ||
test_debug_virtual.c | ||
test_firmware.c | ||
test_hash.c | ||
test_hexdump.c | ||
test_ida.c | ||
test_kasan.c | ||
test_kmod.c | ||
test_list_sort.c | ||
test_memcat_p.c | ||
test_meminit.c | ||
test_module.c | ||
test_objagg.c | ||
test_overflow.c | ||
test_parman.c | ||
test_printf.c | ||
test_rhashtable.c | ||
test_siphash.c | ||
test_sort.c | ||
test_stackinit.c | ||
test_static_key_base.c | ||
test_static_keys.c | ||
test_string.c | ||
test_strscpy.c | ||
test_sysctl.c | ||
test_ubsan.c | ||
test_user_copy.c | ||
test_uuid.c | ||
test_vmalloc.c | ||
test_xarray.c | ||
test-kstrtox.c | ||
test-string_helpers.c | ||
textsearch.c | ||
timerqueue.c | ||
ts_bm.c | ||
ts_fsm.c | ||
ts_kmp.c | ||
ubsan.c | ||
ubsan.h | ||
ucmpdi2.c | ||
ucs2_string.c | ||
usercopy.c | ||
uuid.c | ||
vsprintf.c | ||
win_minmax.c | ||
xarray.c | ||
xxhash.c |