android_kernel_xiaomi_sm8350/arch/csky/include/asm/uaccess.h

412 lines
12 KiB
C
Raw Permalink Normal View History

/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_UACCESS_H
#define __ASM_CSKY_UACCESS_H
/*
* User space memory access functions
*/
#include <linux/compiler.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/version.h>
#include <asm/segment.h>
Remove 'type' argument from access_ok() function Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument of the user address range verification function since we got rid of the old racy i386-only code to walk page tables by hand. It existed because the original 80386 would not honor the write protect bit when in kernel mode, so you had to do COW by hand before doing any user access. But we haven't supported that in a long time, and these days the 'type' argument is a purely historical artifact. A discussion about extending 'user_access_begin()' to do the range checking resulted this patch, because there is no way we're going to move the old VERIFY_xyz interface to that model. And it's best done at the end of the merge window when I've done most of my merges, so let's just get this done once and for all. This patch was mostly done with a sed-script, with manual fix-ups for the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form. There were a couple of notable cases: - csky still had the old "verify_area()" name as an alias. - the iter_iov code had magical hardcoded knowledge of the actual values of VERIFY_{READ,WRITE} (not that they mattered, since nothing really used it) - microblaze used the type argument for a debug printout but other than those oddities this should be a total no-op patch. I tried to fix up all architectures, did fairly extensive grepping for access_ok() uses, and the changes are trivial, but I may have missed something. Any missed conversion should be trivially fixable, though. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-03 21:57:57 -05:00
static inline int access_ok(const void *addr, unsigned long size)
{
unsigned long limit = current_thread_info()->addr_limit.seg;
return (((unsigned long)addr < limit) &&
((unsigned long)(addr + size) < limit));
}
Remove 'type' argument from access_ok() function Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument of the user address range verification function since we got rid of the old racy i386-only code to walk page tables by hand. It existed because the original 80386 would not honor the write protect bit when in kernel mode, so you had to do COW by hand before doing any user access. But we haven't supported that in a long time, and these days the 'type' argument is a purely historical artifact. A discussion about extending 'user_access_begin()' to do the range checking resulted this patch, because there is no way we're going to move the old VERIFY_xyz interface to that model. And it's best done at the end of the merge window when I've done most of my merges, so let's just get this done once and for all. This patch was mostly done with a sed-script, with manual fix-ups for the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form. There were a couple of notable cases: - csky still had the old "verify_area()" name as an alias. - the iter_iov code had magical hardcoded knowledge of the actual values of VERIFY_{READ,WRITE} (not that they mattered, since nothing really used it) - microblaze used the type argument for a debug printout but other than those oddities this should be a total no-op patch. I tried to fix up all architectures, did fairly extensive grepping for access_ok() uses, and the changes are trivial, but I may have missed something. Any missed conversion should be trivially fixable, though. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-03 21:57:57 -05:00
#define __addr_ok(addr) (access_ok(addr, 0))
extern int __put_user_bad(void);
/*
* Tell gcc we read from memory instead of writing: this is because
* we do not write to any memory gcc knows about, so there are no
* aliasing issues.
*/
/*
* These are the main single-value transfer routines. They automatically
* use the right size if we just have the right pointer type.
*
* This gets kind of ugly. We want to return _two_ values in "get_user()"
* and yet we don't want to do any pointers, because that is too much
* of a performance impact. Thus we have a few rather ugly macros here,
* and hide all the ugliness from the user.
*
* The "__xxx" versions of the user access functions are versions that
* do not verify the address space, that must have been done previously
* with a separate "access_ok()" call (this is used when we do multiple
* accesses to the same area of user memory).
*
* As we use the same address space for kernel and user data on
* Ckcore, we can just do these as direct assignments. (Of course, the
* exception handling means that it's no longer "just"...)
*/
#define put_user(x, ptr) \
__put_user_check((x), (ptr), sizeof(*(ptr)))
#define __put_user(x, ptr) \
__put_user_nocheck((x), (ptr), sizeof(*(ptr)))
#define __ptr(x) ((unsigned long *)(x))
#define get_user(x, ptr) \
__get_user_check((x), (ptr), sizeof(*(ptr)))
#define __get_user(x, ptr) \
__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
#define __put_user_nocheck(x, ptr, size) \
({ \
long __pu_err = 0; \
typeof(*(ptr)) *__pu_addr = (ptr); \
typeof(*(ptr)) __pu_val = (typeof(*(ptr)))(x); \
if (__pu_addr) \
__put_user_size(__pu_val, (__pu_addr), (size), \
__pu_err); \
__pu_err; \
})
#define __put_user_check(x, ptr, size) \
({ \
long __pu_err = -EFAULT; \
typeof(*(ptr)) *__pu_addr = (ptr); \
typeof(*(ptr)) __pu_val = (typeof(*(ptr)))(x); \
Remove 'type' argument from access_ok() function Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument of the user address range verification function since we got rid of the old racy i386-only code to walk page tables by hand. It existed because the original 80386 would not honor the write protect bit when in kernel mode, so you had to do COW by hand before doing any user access. But we haven't supported that in a long time, and these days the 'type' argument is a purely historical artifact. A discussion about extending 'user_access_begin()' to do the range checking resulted this patch, because there is no way we're going to move the old VERIFY_xyz interface to that model. And it's best done at the end of the merge window when I've done most of my merges, so let's just get this done once and for all. This patch was mostly done with a sed-script, with manual fix-ups for the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form. There were a couple of notable cases: - csky still had the old "verify_area()" name as an alias. - the iter_iov code had magical hardcoded knowledge of the actual values of VERIFY_{READ,WRITE} (not that they mattered, since nothing really used it) - microblaze used the type argument for a debug printout but other than those oddities this should be a total no-op patch. I tried to fix up all architectures, did fairly extensive grepping for access_ok() uses, and the changes are trivial, but I may have missed something. Any missed conversion should be trivially fixable, though. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-03 21:57:57 -05:00
if (access_ok(__pu_addr, size) && __pu_addr) \
__put_user_size(__pu_val, __pu_addr, (size), __pu_err); \
__pu_err; \
})
#define __put_user_size(x, ptr, size, retval) \
do { \
retval = 0; \
switch (size) { \
case 1: \
__put_user_asm_b(x, ptr, retval); \
break; \
case 2: \
__put_user_asm_h(x, ptr, retval); \
break; \
case 4: \
__put_user_asm_w(x, ptr, retval); \
break; \
case 8: \
__put_user_asm_64(x, ptr, retval); \
break; \
default: \
__put_user_bad(); \
} \
} while (0)
/*
* We don't tell gcc that we are accessing memory, but this is OK
* because we do not write to any memory gcc knows about, so there
* are no aliasing issues.
*
* Note that PC at a fault is the address *after* the faulting
* instruction.
*/
#define __put_user_asm_b(x, ptr, err) \
do { \
int errcode; \
asm volatile( \
"1: stb %1, (%2,0) \n" \
" br 3f \n" \
"2: mov %0, %3 \n" \
" br 3f \n" \
".section __ex_table, \"a\" \n" \
".align 2 \n" \
".long 1b,2b \n" \
".previous \n" \
"3: \n" \
: "=r"(err), "=r"(x), "=r"(ptr), "=r"(errcode) \
: "0"(err), "1"(x), "2"(ptr), "3"(-EFAULT) \
: "memory"); \
} while (0)
#define __put_user_asm_h(x, ptr, err) \
do { \
int errcode; \
asm volatile( \
"1: sth %1, (%2,0) \n" \
" br 3f \n" \
"2: mov %0, %3 \n" \
" br 3f \n" \
".section __ex_table, \"a\" \n" \
".align 2 \n" \
".long 1b,2b \n" \
".previous \n" \
"3: \n" \
: "=r"(err), "=r"(x), "=r"(ptr), "=r"(errcode) \
: "0"(err), "1"(x), "2"(ptr), "3"(-EFAULT) \
: "memory"); \
} while (0)
#define __put_user_asm_w(x, ptr, err) \
do { \
int errcode; \
asm volatile( \
"1: stw %1, (%2,0) \n" \
" br 3f \n" \
"2: mov %0, %3 \n" \
" br 3f \n" \
".section __ex_table,\"a\" \n" \
".align 2 \n" \
".long 1b, 2b \n" \
".previous \n" \
"3: \n" \
: "=r"(err), "=r"(x), "=r"(ptr), "=r"(errcode) \
: "0"(err), "1"(x), "2"(ptr), "3"(-EFAULT) \
: "memory"); \
} while (0)
#define __put_user_asm_64(x, ptr, err) \
do { \
int tmp; \
int errcode; \
typeof(*(ptr))src = (typeof(*(ptr)))x; \
typeof(*(ptr))*psrc = &src; \
\
asm volatile( \
" ldw %3, (%1, 0) \n" \
"1: stw %3, (%2, 0) \n" \
" ldw %3, (%1, 4) \n" \
"2: stw %3, (%2, 4) \n" \
" br 4f \n" \
"3: mov %0, %4 \n" \
" br 4f \n" \
".section __ex_table, \"a\" \n" \
".align 2 \n" \
".long 1b, 3b \n" \
".long 2b, 3b \n" \
".previous \n" \
"4: \n" \
: "=r"(err), "=r"(psrc), "=r"(ptr), \
"=r"(tmp), "=r"(errcode) \
: "0"(err), "1"(psrc), "2"(ptr), "3"(0), "4"(-EFAULT) \
: "memory"); \
} while (0)
#define __get_user_nocheck(x, ptr, size) \
({ \
long __gu_err; \
__get_user_size(x, (ptr), (size), __gu_err); \
__gu_err; \
})
#define __get_user_check(x, ptr, size) \
({ \
int __gu_err = -EFAULT; \
const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \
Remove 'type' argument from access_ok() function Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument of the user address range verification function since we got rid of the old racy i386-only code to walk page tables by hand. It existed because the original 80386 would not honor the write protect bit when in kernel mode, so you had to do COW by hand before doing any user access. But we haven't supported that in a long time, and these days the 'type' argument is a purely historical artifact. A discussion about extending 'user_access_begin()' to do the range checking resulted this patch, because there is no way we're going to move the old VERIFY_xyz interface to that model. And it's best done at the end of the merge window when I've done most of my merges, so let's just get this done once and for all. This patch was mostly done with a sed-script, with manual fix-ups for the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form. There were a couple of notable cases: - csky still had the old "verify_area()" name as an alias. - the iter_iov code had magical hardcoded knowledge of the actual values of VERIFY_{READ,WRITE} (not that they mattered, since nothing really used it) - microblaze used the type argument for a debug printout but other than those oddities this should be a total no-op patch. I tried to fix up all architectures, did fairly extensive grepping for access_ok() uses, and the changes are trivial, but I may have missed something. Any missed conversion should be trivially fixable, though. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-03 21:57:57 -05:00
if (access_ok(__gu_ptr, size) && __gu_ptr) \
__get_user_size(x, __gu_ptr, size, __gu_err); \
__gu_err; \
})
#define __get_user_size(x, ptr, size, retval) \
do { \
switch (size) { \
case 1: \
__get_user_asm_common((x), ptr, "ldb", retval); \
break; \
case 2: \
__get_user_asm_common((x), ptr, "ldh", retval); \
break; \
case 4: \
__get_user_asm_common((x), ptr, "ldw", retval); \
break; \
default: \
x = 0; \
(retval) = __get_user_bad(); \
} \
} while (0)
#define __get_user_asm_common(x, ptr, ins, err) \
do { \
int errcode; \
asm volatile( \
"1: " ins " %1, (%4,0) \n" \
" br 3f \n" \
/* Fix up codes */ \
"2: mov %0, %2 \n" \
" movi %1, 0 \n" \
" br 3f \n" \
".section __ex_table,\"a\" \n" \
".align 2 \n" \
".long 1b, 2b \n" \
".previous \n" \
"3: \n" \
: "=r"(err), "=r"(x), "=r"(errcode) \
: "0"(0), "r"(ptr), "2"(-EFAULT) \
: "memory"); \
} while (0)
extern int __get_user_bad(void);
csky: Fixup raw_copy_from_user() [ Upstream commit 51bb38cb78363fdad1f89e87357b7bc73e39ba88 ] If raw_copy_from_user(to, from, N) returns K, callers expect the first N - K bytes starting at to to have been replaced with the contents of corresponding area starting at from and the last K bytes of destination *left* *unmodified*. What arch/sky/lib/usercopy.c is doing is broken - it can lead to e.g. data corruption on write(2). raw_copy_to_user() is inaccurate about return value, which is a bug, but consequences are less drastic than for raw_copy_from_user(). And just what are those access_ok() doing in there? I mean, look into linux/uaccess.h; that's where we do that check (as well as zero tail on failure in the callers that need zeroing). AFAICS, all of that shouldn't be hard to fix; something like a patch below might make a useful starting point. I would suggest moving these macros into usercopy.c (they are never used anywhere else) and possibly expanding them there; if you leave them alive, please at least rename __copy_user_zeroing(). Again, it must not zero anything on failed read. Said that, I'm not sure we won't be better off simply turning usercopy.c into usercopy.S - all that is left there is a couple of functions, each consisting only of inline asm. Guo Ren reply: Yes, raw_copy_from_user is wrong, it's no need zeroing code. unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); if (likely(access_ok(from, n))) { kasan_check_write(to, n); res = raw_copy_from_user(to, from, n); } if (unlikely(res)) memset(to + (n - res), 0, res); return res; } EXPORT_SYMBOL(_copy_from_user); You are right and access_ok() should be removed. but, how about: do { ... "2: stw %3, (%1, 0) \n" \ + " subi %0, 4 \n" \ "9: stw %4, (%1, 4) \n" \ + " subi %0, 4 \n" \ "10: stw %5, (%1, 8) \n" \ + " subi %0, 4 \n" \ "11: stw %6, (%1, 12) \n" \ + " subi %0, 4 \n" \ " addi %2, 16 \n" \ " addi %1, 16 \n" \ Don't expand __ex_table AI Viro reply: Hey, I've no idea about the instruction scheduling on csky - if that doesn't slow the things down, all the better. It's just that copy_to_user() and friends are on fairly hot codepaths, and in quite a few situations they will dominate the speed of e.g. read(2). So I tried to keep the fast path unchanged. Up to the architecture maintainers, obviously. Which would be you... As for the fixups size increase (__ex_table size is unchanged)... You have each of those macros expanded exactly once. So the size is not a serious argument, IMO - useless complexity would be, if it is, in fact, useless; the size... not really, especially since those extra subi will at least offset it. Again, up to you - asm optimizations of (essentially) memcpy()-style loops are tricky and can depend upon the fairly subtle details of architecture. So even on something I know reasonably well I would resort to direct experiments if I can't pass the buck to architecture maintainers. It *is* worth optimizing - this is where read() from a file that is already in page cache spends most of the time, etc. Guo Ren reply: Thx, after fixup some typo “sub %0, 4”, apply the patch. TODO: - user copy/from codes are still need optimizing. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2020-04-06 21:40:11 -04:00
#define ___copy_to_user(to, from, n) \
do { \
int w0, w1, w2, w3; \
asm volatile( \
"0: cmpnei %1, 0 \n" \
" bf 8f \n" \
" mov %3, %1 \n" \
" or %3, %2 \n" \
" andi %3, 3 \n" \
" cmpnei %3, 0 \n" \
" bf 1f \n" \
" br 5f \n" \
"1: cmplti %0, 16 \n" /* 4W */ \
" bt 3f \n" \
" ldw %3, (%2, 0) \n" \
" ldw %4, (%2, 4) \n" \
" ldw %5, (%2, 8) \n" \
" ldw %6, (%2, 12) \n" \
"2: stw %3, (%1, 0) \n" \
"9: stw %4, (%1, 4) \n" \
"10: stw %5, (%1, 8) \n" \
"11: stw %6, (%1, 12) \n" \
" addi %2, 16 \n" \
" addi %1, 16 \n" \
" subi %0, 16 \n" \
" br 1b \n" \
"3: cmplti %0, 4 \n" /* 1W */ \
" bt 5f \n" \
" ldw %3, (%2, 0) \n" \
"4: stw %3, (%1, 0) \n" \
" addi %2, 4 \n" \
" addi %1, 4 \n" \
" subi %0, 4 \n" \
" br 3b \n" \
"5: cmpnei %0, 0 \n" /* 1B */ \
csky: Fixup raw_copy_from_user() [ Upstream commit 51bb38cb78363fdad1f89e87357b7bc73e39ba88 ] If raw_copy_from_user(to, from, N) returns K, callers expect the first N - K bytes starting at to to have been replaced with the contents of corresponding area starting at from and the last K bytes of destination *left* *unmodified*. What arch/sky/lib/usercopy.c is doing is broken - it can lead to e.g. data corruption on write(2). raw_copy_to_user() is inaccurate about return value, which is a bug, but consequences are less drastic than for raw_copy_from_user(). And just what are those access_ok() doing in there? I mean, look into linux/uaccess.h; that's where we do that check (as well as zero tail on failure in the callers that need zeroing). AFAICS, all of that shouldn't be hard to fix; something like a patch below might make a useful starting point. I would suggest moving these macros into usercopy.c (they are never used anywhere else) and possibly expanding them there; if you leave them alive, please at least rename __copy_user_zeroing(). Again, it must not zero anything on failed read. Said that, I'm not sure we won't be better off simply turning usercopy.c into usercopy.S - all that is left there is a couple of functions, each consisting only of inline asm. Guo Ren reply: Yes, raw_copy_from_user is wrong, it's no need zeroing code. unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); if (likely(access_ok(from, n))) { kasan_check_write(to, n); res = raw_copy_from_user(to, from, n); } if (unlikely(res)) memset(to + (n - res), 0, res); return res; } EXPORT_SYMBOL(_copy_from_user); You are right and access_ok() should be removed. but, how about: do { ... "2: stw %3, (%1, 0) \n" \ + " subi %0, 4 \n" \ "9: stw %4, (%1, 4) \n" \ + " subi %0, 4 \n" \ "10: stw %5, (%1, 8) \n" \ + " subi %0, 4 \n" \ "11: stw %6, (%1, 12) \n" \ + " subi %0, 4 \n" \ " addi %2, 16 \n" \ " addi %1, 16 \n" \ Don't expand __ex_table AI Viro reply: Hey, I've no idea about the instruction scheduling on csky - if that doesn't slow the things down, all the better. It's just that copy_to_user() and friends are on fairly hot codepaths, and in quite a few situations they will dominate the speed of e.g. read(2). So I tried to keep the fast path unchanged. Up to the architecture maintainers, obviously. Which would be you... As for the fixups size increase (__ex_table size is unchanged)... You have each of those macros expanded exactly once. So the size is not a serious argument, IMO - useless complexity would be, if it is, in fact, useless; the size... not really, especially since those extra subi will at least offset it. Again, up to you - asm optimizations of (essentially) memcpy()-style loops are tricky and can depend upon the fairly subtle details of architecture. So even on something I know reasonably well I would resort to direct experiments if I can't pass the buck to architecture maintainers. It *is* worth optimizing - this is where read() from a file that is already in page cache spends most of the time, etc. Guo Ren reply: Thx, after fixup some typo “sub %0, 4”, apply the patch. TODO: - user copy/from codes are still need optimizing. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2020-04-06 21:40:11 -04:00
" bf 13f \n" \
" ldb %3, (%2, 0) \n" \
"6: stb %3, (%1, 0) \n" \
" addi %2, 1 \n" \
" addi %1, 1 \n" \
" subi %0, 1 \n" \
" br 5b \n" \
csky: Fixup raw_copy_from_user() [ Upstream commit 51bb38cb78363fdad1f89e87357b7bc73e39ba88 ] If raw_copy_from_user(to, from, N) returns K, callers expect the first N - K bytes starting at to to have been replaced with the contents of corresponding area starting at from and the last K bytes of destination *left* *unmodified*. What arch/sky/lib/usercopy.c is doing is broken - it can lead to e.g. data corruption on write(2). raw_copy_to_user() is inaccurate about return value, which is a bug, but consequences are less drastic than for raw_copy_from_user(). And just what are those access_ok() doing in there? I mean, look into linux/uaccess.h; that's where we do that check (as well as zero tail on failure in the callers that need zeroing). AFAICS, all of that shouldn't be hard to fix; something like a patch below might make a useful starting point. I would suggest moving these macros into usercopy.c (they are never used anywhere else) and possibly expanding them there; if you leave them alive, please at least rename __copy_user_zeroing(). Again, it must not zero anything on failed read. Said that, I'm not sure we won't be better off simply turning usercopy.c into usercopy.S - all that is left there is a couple of functions, each consisting only of inline asm. Guo Ren reply: Yes, raw_copy_from_user is wrong, it's no need zeroing code. unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); if (likely(access_ok(from, n))) { kasan_check_write(to, n); res = raw_copy_from_user(to, from, n); } if (unlikely(res)) memset(to + (n - res), 0, res); return res; } EXPORT_SYMBOL(_copy_from_user); You are right and access_ok() should be removed. but, how about: do { ... "2: stw %3, (%1, 0) \n" \ + " subi %0, 4 \n" \ "9: stw %4, (%1, 4) \n" \ + " subi %0, 4 \n" \ "10: stw %5, (%1, 8) \n" \ + " subi %0, 4 \n" \ "11: stw %6, (%1, 12) \n" \ + " subi %0, 4 \n" \ " addi %2, 16 \n" \ " addi %1, 16 \n" \ Don't expand __ex_table AI Viro reply: Hey, I've no idea about the instruction scheduling on csky - if that doesn't slow the things down, all the better. It's just that copy_to_user() and friends are on fairly hot codepaths, and in quite a few situations they will dominate the speed of e.g. read(2). So I tried to keep the fast path unchanged. Up to the architecture maintainers, obviously. Which would be you... As for the fixups size increase (__ex_table size is unchanged)... You have each of those macros expanded exactly once. So the size is not a serious argument, IMO - useless complexity would be, if it is, in fact, useless; the size... not really, especially since those extra subi will at least offset it. Again, up to you - asm optimizations of (essentially) memcpy()-style loops are tricky and can depend upon the fairly subtle details of architecture. So even on something I know reasonably well I would resort to direct experiments if I can't pass the buck to architecture maintainers. It *is* worth optimizing - this is where read() from a file that is already in page cache spends most of the time, etc. Guo Ren reply: Thx, after fixup some typo “sub %0, 4”, apply the patch. TODO: - user copy/from codes are still need optimizing. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2020-04-06 21:40:11 -04:00
"7: subi %0, 4 \n" \
"8: subi %0, 4 \n" \
"12: subi %0, 4 \n" \
" br 13f \n" \
".section __ex_table, \"a\" \n" \
".align 2 \n" \
csky: Fixup raw_copy_from_user() [ Upstream commit 51bb38cb78363fdad1f89e87357b7bc73e39ba88 ] If raw_copy_from_user(to, from, N) returns K, callers expect the first N - K bytes starting at to to have been replaced with the contents of corresponding area starting at from and the last K bytes of destination *left* *unmodified*. What arch/sky/lib/usercopy.c is doing is broken - it can lead to e.g. data corruption on write(2). raw_copy_to_user() is inaccurate about return value, which is a bug, but consequences are less drastic than for raw_copy_from_user(). And just what are those access_ok() doing in there? I mean, look into linux/uaccess.h; that's where we do that check (as well as zero tail on failure in the callers that need zeroing). AFAICS, all of that shouldn't be hard to fix; something like a patch below might make a useful starting point. I would suggest moving these macros into usercopy.c (they are never used anywhere else) and possibly expanding them there; if you leave them alive, please at least rename __copy_user_zeroing(). Again, it must not zero anything on failed read. Said that, I'm not sure we won't be better off simply turning usercopy.c into usercopy.S - all that is left there is a couple of functions, each consisting only of inline asm. Guo Ren reply: Yes, raw_copy_from_user is wrong, it's no need zeroing code. unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); if (likely(access_ok(from, n))) { kasan_check_write(to, n); res = raw_copy_from_user(to, from, n); } if (unlikely(res)) memset(to + (n - res), 0, res); return res; } EXPORT_SYMBOL(_copy_from_user); You are right and access_ok() should be removed. but, how about: do { ... "2: stw %3, (%1, 0) \n" \ + " subi %0, 4 \n" \ "9: stw %4, (%1, 4) \n" \ + " subi %0, 4 \n" \ "10: stw %5, (%1, 8) \n" \ + " subi %0, 4 \n" \ "11: stw %6, (%1, 12) \n" \ + " subi %0, 4 \n" \ " addi %2, 16 \n" \ " addi %1, 16 \n" \ Don't expand __ex_table AI Viro reply: Hey, I've no idea about the instruction scheduling on csky - if that doesn't slow the things down, all the better. It's just that copy_to_user() and friends are on fairly hot codepaths, and in quite a few situations they will dominate the speed of e.g. read(2). So I tried to keep the fast path unchanged. Up to the architecture maintainers, obviously. Which would be you... As for the fixups size increase (__ex_table size is unchanged)... You have each of those macros expanded exactly once. So the size is not a serious argument, IMO - useless complexity would be, if it is, in fact, useless; the size... not really, especially since those extra subi will at least offset it. Again, up to you - asm optimizations of (essentially) memcpy()-style loops are tricky and can depend upon the fairly subtle details of architecture. So even on something I know reasonably well I would resort to direct experiments if I can't pass the buck to architecture maintainers. It *is* worth optimizing - this is where read() from a file that is already in page cache spends most of the time, etc. Guo Ren reply: Thx, after fixup some typo “sub %0, 4”, apply the patch. TODO: - user copy/from codes are still need optimizing. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2020-04-06 21:40:11 -04:00
".long 2b, 13f \n" \
".long 4b, 13f \n" \
".long 6b, 13f \n" \
".long 9b, 12b \n" \
".long 10b, 8b \n" \
".long 11b, 7b \n" \
".previous \n" \
csky: Fixup raw_copy_from_user() [ Upstream commit 51bb38cb78363fdad1f89e87357b7bc73e39ba88 ] If raw_copy_from_user(to, from, N) returns K, callers expect the first N - K bytes starting at to to have been replaced with the contents of corresponding area starting at from and the last K bytes of destination *left* *unmodified*. What arch/sky/lib/usercopy.c is doing is broken - it can lead to e.g. data corruption on write(2). raw_copy_to_user() is inaccurate about return value, which is a bug, but consequences are less drastic than for raw_copy_from_user(). And just what are those access_ok() doing in there? I mean, look into linux/uaccess.h; that's where we do that check (as well as zero tail on failure in the callers that need zeroing). AFAICS, all of that shouldn't be hard to fix; something like a patch below might make a useful starting point. I would suggest moving these macros into usercopy.c (they are never used anywhere else) and possibly expanding them there; if you leave them alive, please at least rename __copy_user_zeroing(). Again, it must not zero anything on failed read. Said that, I'm not sure we won't be better off simply turning usercopy.c into usercopy.S - all that is left there is a couple of functions, each consisting only of inline asm. Guo Ren reply: Yes, raw_copy_from_user is wrong, it's no need zeroing code. unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); if (likely(access_ok(from, n))) { kasan_check_write(to, n); res = raw_copy_from_user(to, from, n); } if (unlikely(res)) memset(to + (n - res), 0, res); return res; } EXPORT_SYMBOL(_copy_from_user); You are right and access_ok() should be removed. but, how about: do { ... "2: stw %3, (%1, 0) \n" \ + " subi %0, 4 \n" \ "9: stw %4, (%1, 4) \n" \ + " subi %0, 4 \n" \ "10: stw %5, (%1, 8) \n" \ + " subi %0, 4 \n" \ "11: stw %6, (%1, 12) \n" \ + " subi %0, 4 \n" \ " addi %2, 16 \n" \ " addi %1, 16 \n" \ Don't expand __ex_table AI Viro reply: Hey, I've no idea about the instruction scheduling on csky - if that doesn't slow the things down, all the better. It's just that copy_to_user() and friends are on fairly hot codepaths, and in quite a few situations they will dominate the speed of e.g. read(2). So I tried to keep the fast path unchanged. Up to the architecture maintainers, obviously. Which would be you... As for the fixups size increase (__ex_table size is unchanged)... You have each of those macros expanded exactly once. So the size is not a serious argument, IMO - useless complexity would be, if it is, in fact, useless; the size... not really, especially since those extra subi will at least offset it. Again, up to you - asm optimizations of (essentially) memcpy()-style loops are tricky and can depend upon the fairly subtle details of architecture. So even on something I know reasonably well I would resort to direct experiments if I can't pass the buck to architecture maintainers. It *is* worth optimizing - this is where read() from a file that is already in page cache spends most of the time, etc. Guo Ren reply: Thx, after fixup some typo “sub %0, 4”, apply the patch. TODO: - user copy/from codes are still need optimizing. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2020-04-06 21:40:11 -04:00
"13: \n" \
: "=r"(n), "=r"(to), "=r"(from), "=r"(w0), \
"=r"(w1), "=r"(w2), "=r"(w3) \
: "0"(n), "1"(to), "2"(from) \
: "memory"); \
} while (0)
csky: Fixup raw_copy_from_user() [ Upstream commit 51bb38cb78363fdad1f89e87357b7bc73e39ba88 ] If raw_copy_from_user(to, from, N) returns K, callers expect the first N - K bytes starting at to to have been replaced with the contents of corresponding area starting at from and the last K bytes of destination *left* *unmodified*. What arch/sky/lib/usercopy.c is doing is broken - it can lead to e.g. data corruption on write(2). raw_copy_to_user() is inaccurate about return value, which is a bug, but consequences are less drastic than for raw_copy_from_user(). And just what are those access_ok() doing in there? I mean, look into linux/uaccess.h; that's where we do that check (as well as zero tail on failure in the callers that need zeroing). AFAICS, all of that shouldn't be hard to fix; something like a patch below might make a useful starting point. I would suggest moving these macros into usercopy.c (they are never used anywhere else) and possibly expanding them there; if you leave them alive, please at least rename __copy_user_zeroing(). Again, it must not zero anything on failed read. Said that, I'm not sure we won't be better off simply turning usercopy.c into usercopy.S - all that is left there is a couple of functions, each consisting only of inline asm. Guo Ren reply: Yes, raw_copy_from_user is wrong, it's no need zeroing code. unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); if (likely(access_ok(from, n))) { kasan_check_write(to, n); res = raw_copy_from_user(to, from, n); } if (unlikely(res)) memset(to + (n - res), 0, res); return res; } EXPORT_SYMBOL(_copy_from_user); You are right and access_ok() should be removed. but, how about: do { ... "2: stw %3, (%1, 0) \n" \ + " subi %0, 4 \n" \ "9: stw %4, (%1, 4) \n" \ + " subi %0, 4 \n" \ "10: stw %5, (%1, 8) \n" \ + " subi %0, 4 \n" \ "11: stw %6, (%1, 12) \n" \ + " subi %0, 4 \n" \ " addi %2, 16 \n" \ " addi %1, 16 \n" \ Don't expand __ex_table AI Viro reply: Hey, I've no idea about the instruction scheduling on csky - if that doesn't slow the things down, all the better. It's just that copy_to_user() and friends are on fairly hot codepaths, and in quite a few situations they will dominate the speed of e.g. read(2). So I tried to keep the fast path unchanged. Up to the architecture maintainers, obviously. Which would be you... As for the fixups size increase (__ex_table size is unchanged)... You have each of those macros expanded exactly once. So the size is not a serious argument, IMO - useless complexity would be, if it is, in fact, useless; the size... not really, especially since those extra subi will at least offset it. Again, up to you - asm optimizations of (essentially) memcpy()-style loops are tricky and can depend upon the fairly subtle details of architecture. So even on something I know reasonably well I would resort to direct experiments if I can't pass the buck to architecture maintainers. It *is* worth optimizing - this is where read() from a file that is already in page cache spends most of the time, etc. Guo Ren reply: Thx, after fixup some typo “sub %0, 4”, apply the patch. TODO: - user copy/from codes are still need optimizing. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2020-04-06 21:40:11 -04:00
#define ___copy_from_user(to, from, n) \
do { \
int tmp; \
int nsave; \
asm volatile( \
"0: cmpnei %1, 0 \n" \
" bf 7f \n" \
" mov %3, %1 \n" \
" or %3, %2 \n" \
" andi %3, 3 \n" \
" cmpnei %3, 0 \n" \
" bf 1f \n" \
" br 5f \n" \
"1: cmplti %0, 16 \n" \
" bt 3f \n" \
"2: ldw %3, (%2, 0) \n" \
"10: ldw %4, (%2, 4) \n" \
" stw %3, (%1, 0) \n" \
" stw %4, (%1, 4) \n" \
"11: ldw %3, (%2, 8) \n" \
"12: ldw %4, (%2, 12) \n" \
" stw %3, (%1, 8) \n" \
" stw %4, (%1, 12) \n" \
" addi %2, 16 \n" \
" addi %1, 16 \n" \
" subi %0, 16 \n" \
" br 1b \n" \
"3: cmplti %0, 4 \n" \
" bt 5f \n" \
"4: ldw %3, (%2, 0) \n" \
" stw %3, (%1, 0) \n" \
" addi %2, 4 \n" \
" addi %1, 4 \n" \
" subi %0, 4 \n" \
" br 3b \n" \
"5: cmpnei %0, 0 \n" \
" bf 7f \n" \
"6: ldb %3, (%2, 0) \n" \
" stb %3, (%1, 0) \n" \
" addi %2, 1 \n" \
" addi %1, 1 \n" \
" subi %0, 1 \n" \
" br 5b \n" \
csky: Fixup raw_copy_from_user() [ Upstream commit 51bb38cb78363fdad1f89e87357b7bc73e39ba88 ] If raw_copy_from_user(to, from, N) returns K, callers expect the first N - K bytes starting at to to have been replaced with the contents of corresponding area starting at from and the last K bytes of destination *left* *unmodified*. What arch/sky/lib/usercopy.c is doing is broken - it can lead to e.g. data corruption on write(2). raw_copy_to_user() is inaccurate about return value, which is a bug, but consequences are less drastic than for raw_copy_from_user(). And just what are those access_ok() doing in there? I mean, look into linux/uaccess.h; that's where we do that check (as well as zero tail on failure in the callers that need zeroing). AFAICS, all of that shouldn't be hard to fix; something like a patch below might make a useful starting point. I would suggest moving these macros into usercopy.c (they are never used anywhere else) and possibly expanding them there; if you leave them alive, please at least rename __copy_user_zeroing(). Again, it must not zero anything on failed read. Said that, I'm not sure we won't be better off simply turning usercopy.c into usercopy.S - all that is left there is a couple of functions, each consisting only of inline asm. Guo Ren reply: Yes, raw_copy_from_user is wrong, it's no need zeroing code. unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); if (likely(access_ok(from, n))) { kasan_check_write(to, n); res = raw_copy_from_user(to, from, n); } if (unlikely(res)) memset(to + (n - res), 0, res); return res; } EXPORT_SYMBOL(_copy_from_user); You are right and access_ok() should be removed. but, how about: do { ... "2: stw %3, (%1, 0) \n" \ + " subi %0, 4 \n" \ "9: stw %4, (%1, 4) \n" \ + " subi %0, 4 \n" \ "10: stw %5, (%1, 8) \n" \ + " subi %0, 4 \n" \ "11: stw %6, (%1, 12) \n" \ + " subi %0, 4 \n" \ " addi %2, 16 \n" \ " addi %1, 16 \n" \ Don't expand __ex_table AI Viro reply: Hey, I've no idea about the instruction scheduling on csky - if that doesn't slow the things down, all the better. It's just that copy_to_user() and friends are on fairly hot codepaths, and in quite a few situations they will dominate the speed of e.g. read(2). So I tried to keep the fast path unchanged. Up to the architecture maintainers, obviously. Which would be you... As for the fixups size increase (__ex_table size is unchanged)... You have each of those macros expanded exactly once. So the size is not a serious argument, IMO - useless complexity would be, if it is, in fact, useless; the size... not really, especially since those extra subi will at least offset it. Again, up to you - asm optimizations of (essentially) memcpy()-style loops are tricky and can depend upon the fairly subtle details of architecture. So even on something I know reasonably well I would resort to direct experiments if I can't pass the buck to architecture maintainers. It *is* worth optimizing - this is where read() from a file that is already in page cache spends most of the time, etc. Guo Ren reply: Thx, after fixup some typo “sub %0, 4”, apply the patch. TODO: - user copy/from codes are still need optimizing. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2020-04-06 21:40:11 -04:00
"8: stw %3, (%1, 0) \n" \
" subi %0, 4 \n" \
" bf 7f \n" \
"9: subi %0, 8 \n" \
" bf 7f \n" \
"13: stw %3, (%1, 8) \n" \
" subi %0, 12 \n" \
" bf 7f \n" \
".section __ex_table, \"a\" \n" \
".align 2 \n" \
csky: Fixup raw_copy_from_user() [ Upstream commit 51bb38cb78363fdad1f89e87357b7bc73e39ba88 ] If raw_copy_from_user(to, from, N) returns K, callers expect the first N - K bytes starting at to to have been replaced with the contents of corresponding area starting at from and the last K bytes of destination *left* *unmodified*. What arch/sky/lib/usercopy.c is doing is broken - it can lead to e.g. data corruption on write(2). raw_copy_to_user() is inaccurate about return value, which is a bug, but consequences are less drastic than for raw_copy_from_user(). And just what are those access_ok() doing in there? I mean, look into linux/uaccess.h; that's where we do that check (as well as zero tail on failure in the callers that need zeroing). AFAICS, all of that shouldn't be hard to fix; something like a patch below might make a useful starting point. I would suggest moving these macros into usercopy.c (they are never used anywhere else) and possibly expanding them there; if you leave them alive, please at least rename __copy_user_zeroing(). Again, it must not zero anything on failed read. Said that, I'm not sure we won't be better off simply turning usercopy.c into usercopy.S - all that is left there is a couple of functions, each consisting only of inline asm. Guo Ren reply: Yes, raw_copy_from_user is wrong, it's no need zeroing code. unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); if (likely(access_ok(from, n))) { kasan_check_write(to, n); res = raw_copy_from_user(to, from, n); } if (unlikely(res)) memset(to + (n - res), 0, res); return res; } EXPORT_SYMBOL(_copy_from_user); You are right and access_ok() should be removed. but, how about: do { ... "2: stw %3, (%1, 0) \n" \ + " subi %0, 4 \n" \ "9: stw %4, (%1, 4) \n" \ + " subi %0, 4 \n" \ "10: stw %5, (%1, 8) \n" \ + " subi %0, 4 \n" \ "11: stw %6, (%1, 12) \n" \ + " subi %0, 4 \n" \ " addi %2, 16 \n" \ " addi %1, 16 \n" \ Don't expand __ex_table AI Viro reply: Hey, I've no idea about the instruction scheduling on csky - if that doesn't slow the things down, all the better. It's just that copy_to_user() and friends are on fairly hot codepaths, and in quite a few situations they will dominate the speed of e.g. read(2). So I tried to keep the fast path unchanged. Up to the architecture maintainers, obviously. Which would be you... As for the fixups size increase (__ex_table size is unchanged)... You have each of those macros expanded exactly once. So the size is not a serious argument, IMO - useless complexity would be, if it is, in fact, useless; the size... not really, especially since those extra subi will at least offset it. Again, up to you - asm optimizations of (essentially) memcpy()-style loops are tricky and can depend upon the fairly subtle details of architecture. So even on something I know reasonably well I would resort to direct experiments if I can't pass the buck to architecture maintainers. It *is* worth optimizing - this is where read() from a file that is already in page cache spends most of the time, etc. Guo Ren reply: Thx, after fixup some typo “sub %0, 4”, apply the patch. TODO: - user copy/from codes are still need optimizing. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2020-04-06 21:40:11 -04:00
".long 2b, 7f \n" \
".long 4b, 7f \n" \
".long 6b, 7f \n" \
".long 10b, 8b \n" \
csky: Fixup raw_copy_from_user() [ Upstream commit 51bb38cb78363fdad1f89e87357b7bc73e39ba88 ] If raw_copy_from_user(to, from, N) returns K, callers expect the first N - K bytes starting at to to have been replaced with the contents of corresponding area starting at from and the last K bytes of destination *left* *unmodified*. What arch/sky/lib/usercopy.c is doing is broken - it can lead to e.g. data corruption on write(2). raw_copy_to_user() is inaccurate about return value, which is a bug, but consequences are less drastic than for raw_copy_from_user(). And just what are those access_ok() doing in there? I mean, look into linux/uaccess.h; that's where we do that check (as well as zero tail on failure in the callers that need zeroing). AFAICS, all of that shouldn't be hard to fix; something like a patch below might make a useful starting point. I would suggest moving these macros into usercopy.c (they are never used anywhere else) and possibly expanding them there; if you leave them alive, please at least rename __copy_user_zeroing(). Again, it must not zero anything on failed read. Said that, I'm not sure we won't be better off simply turning usercopy.c into usercopy.S - all that is left there is a couple of functions, each consisting only of inline asm. Guo Ren reply: Yes, raw_copy_from_user is wrong, it's no need zeroing code. unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); if (likely(access_ok(from, n))) { kasan_check_write(to, n); res = raw_copy_from_user(to, from, n); } if (unlikely(res)) memset(to + (n - res), 0, res); return res; } EXPORT_SYMBOL(_copy_from_user); You are right and access_ok() should be removed. but, how about: do { ... "2: stw %3, (%1, 0) \n" \ + " subi %0, 4 \n" \ "9: stw %4, (%1, 4) \n" \ + " subi %0, 4 \n" \ "10: stw %5, (%1, 8) \n" \ + " subi %0, 4 \n" \ "11: stw %6, (%1, 12) \n" \ + " subi %0, 4 \n" \ " addi %2, 16 \n" \ " addi %1, 16 \n" \ Don't expand __ex_table AI Viro reply: Hey, I've no idea about the instruction scheduling on csky - if that doesn't slow the things down, all the better. It's just that copy_to_user() and friends are on fairly hot codepaths, and in quite a few situations they will dominate the speed of e.g. read(2). So I tried to keep the fast path unchanged. Up to the architecture maintainers, obviously. Which would be you... As for the fixups size increase (__ex_table size is unchanged)... You have each of those macros expanded exactly once. So the size is not a serious argument, IMO - useless complexity would be, if it is, in fact, useless; the size... not really, especially since those extra subi will at least offset it. Again, up to you - asm optimizations of (essentially) memcpy()-style loops are tricky and can depend upon the fairly subtle details of architecture. So even on something I know reasonably well I would resort to direct experiments if I can't pass the buck to architecture maintainers. It *is* worth optimizing - this is where read() from a file that is already in page cache spends most of the time, etc. Guo Ren reply: Thx, after fixup some typo “sub %0, 4”, apply the patch. TODO: - user copy/from codes are still need optimizing. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2020-04-06 21:40:11 -04:00
".long 11b, 9b \n" \
".long 12b,13b \n" \
".previous \n" \
"7: \n" \
: "=r"(n), "=r"(to), "=r"(from), "=r"(nsave), \
"=r"(tmp) \
: "0"(n), "1"(to), "2"(from) \
: "memory"); \
} while (0)
unsigned long raw_copy_from_user(void *to, const void *from, unsigned long n);
unsigned long raw_copy_to_user(void *to, const void *from, unsigned long n);
unsigned long clear_user(void *to, unsigned long n);
unsigned long __clear_user(void __user *to, unsigned long n);
long strncpy_from_user(char *dst, const char *src, long count);
long __strncpy_from_user(char *dst, const char *src, long count);
/*
* Return the size of a string (including the ending 0)
*
* Return 0 on exception, a value greater than N if too long
*/
long strnlen_user(const char *src, long n);
#define strlen_user(str) strnlen_user(str, 32767)
struct exception_table_entry {
unsigned long insn;
unsigned long nextinsn;
};
extern int fixup_exception(struct pt_regs *regs);
#endif /* __ASM_CSKY_UACCESS_H */