efa4d2fb04
Use temporary page tables for the kernel text mapping during hibernation restore on x86_64. Without the patch, the original boot kernel's page tables that represent the kernel text mapping are used while the core of the image kernel is being restored. However, in principle, if the boot kernel is not identical to the image kernel, the location of these page tables in the image kernel need not be the same, so we should create a safe copy of the kernel text mapping prior to restoring the core of the image kernel. Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl> Acked-by: Pavel Machek <pavel@ucw.cz> Cc: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
327 lines
8.1 KiB
C
327 lines
8.1 KiB
C
/*
|
|
* Suspend support specific for i386.
|
|
*
|
|
* Distribute under GPLv2
|
|
*
|
|
* Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
|
|
* Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
|
|
*/
|
|
|
|
#include <linux/smp.h>
|
|
#include <linux/suspend.h>
|
|
#include <asm/proto.h>
|
|
#include <asm/page.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/mtrr.h>
|
|
|
|
/* References to section boundaries */
|
|
extern const void __nosave_begin, __nosave_end;
|
|
|
|
struct saved_context saved_context;
|
|
|
|
unsigned long saved_context_eax, saved_context_ebx, saved_context_ecx, saved_context_edx;
|
|
unsigned long saved_context_esp, saved_context_ebp, saved_context_esi, saved_context_edi;
|
|
unsigned long saved_context_r08, saved_context_r09, saved_context_r10, saved_context_r11;
|
|
unsigned long saved_context_r12, saved_context_r13, saved_context_r14, saved_context_r15;
|
|
unsigned long saved_context_eflags;
|
|
|
|
void __save_processor_state(struct saved_context *ctxt)
|
|
{
|
|
kernel_fpu_begin();
|
|
|
|
/*
|
|
* descriptor tables
|
|
*/
|
|
asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit));
|
|
asm volatile ("sidt %0" : "=m" (ctxt->idt_limit));
|
|
asm volatile ("str %0" : "=m" (ctxt->tr));
|
|
|
|
/* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
|
|
/*
|
|
* segment registers
|
|
*/
|
|
asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
|
|
asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
|
|
asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
|
|
asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
|
|
asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
|
|
|
|
rdmsrl(MSR_FS_BASE, ctxt->fs_base);
|
|
rdmsrl(MSR_GS_BASE, ctxt->gs_base);
|
|
rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
|
|
mtrr_save_fixed_ranges(NULL);
|
|
|
|
/*
|
|
* control registers
|
|
*/
|
|
rdmsrl(MSR_EFER, ctxt->efer);
|
|
ctxt->cr0 = read_cr0();
|
|
ctxt->cr2 = read_cr2();
|
|
ctxt->cr3 = read_cr3();
|
|
ctxt->cr4 = read_cr4();
|
|
ctxt->cr8 = read_cr8();
|
|
}
|
|
|
|
void save_processor_state(void)
|
|
{
|
|
__save_processor_state(&saved_context);
|
|
}
|
|
|
|
static void do_fpu_end(void)
|
|
{
|
|
/*
|
|
* Restore FPU regs if necessary
|
|
*/
|
|
kernel_fpu_end();
|
|
}
|
|
|
|
void __restore_processor_state(struct saved_context *ctxt)
|
|
{
|
|
/*
|
|
* control registers
|
|
*/
|
|
wrmsrl(MSR_EFER, ctxt->efer);
|
|
write_cr8(ctxt->cr8);
|
|
write_cr4(ctxt->cr4);
|
|
write_cr3(ctxt->cr3);
|
|
write_cr2(ctxt->cr2);
|
|
write_cr0(ctxt->cr0);
|
|
|
|
/*
|
|
* now restore the descriptor tables to their proper values
|
|
* ltr is done i fix_processor_context().
|
|
*/
|
|
asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit));
|
|
asm volatile ("lidt %0" :: "m" (ctxt->idt_limit));
|
|
|
|
/*
|
|
* segment registers
|
|
*/
|
|
asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
|
|
asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
|
|
asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
|
|
load_gs_index(ctxt->gs);
|
|
asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
|
|
|
|
wrmsrl(MSR_FS_BASE, ctxt->fs_base);
|
|
wrmsrl(MSR_GS_BASE, ctxt->gs_base);
|
|
wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
|
|
|
|
fix_processor_context();
|
|
|
|
do_fpu_end();
|
|
mtrr_ap_init();
|
|
}
|
|
|
|
void restore_processor_state(void)
|
|
{
|
|
__restore_processor_state(&saved_context);
|
|
}
|
|
|
|
void fix_processor_context(void)
|
|
{
|
|
int cpu = smp_processor_id();
|
|
struct tss_struct *t = &per_cpu(init_tss, cpu);
|
|
|
|
set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
|
|
|
|
cpu_gdt(cpu)[GDT_ENTRY_TSS].type = 9;
|
|
|
|
syscall_init(); /* This sets MSR_*STAR and related */
|
|
load_TR_desc(); /* This does ltr */
|
|
load_LDT(¤t->active_mm->context); /* This does lldt */
|
|
|
|
/*
|
|
* Now maybe reload the debug registers
|
|
*/
|
|
if (current->thread.debugreg7){
|
|
loaddebug(¤t->thread, 0);
|
|
loaddebug(¤t->thread, 1);
|
|
loaddebug(¤t->thread, 2);
|
|
loaddebug(¤t->thread, 3);
|
|
/* no 4 and 5 */
|
|
loaddebug(¤t->thread, 6);
|
|
loaddebug(¤t->thread, 7);
|
|
}
|
|
|
|
}
|
|
|
|
#ifdef CONFIG_HIBERNATION
|
|
/* Defined in arch/x86_64/kernel/suspend_asm.S */
|
|
extern int restore_image(void);
|
|
|
|
/*
|
|
* Address to jump to in the last phase of restore in order to get to the image
|
|
* kernel's text (this value is passed in the image header).
|
|
*/
|
|
unsigned long restore_jump_address;
|
|
|
|
/*
|
|
* Value of the cr3 register from before the hibernation (this value is passed
|
|
* in the image header).
|
|
*/
|
|
unsigned long restore_cr3;
|
|
|
|
pgd_t *temp_level4_pgt;
|
|
|
|
void *relocated_restore_code;
|
|
|
|
static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
|
|
{
|
|
long i, j;
|
|
|
|
i = pud_index(address);
|
|
pud = pud + i;
|
|
for (; i < PTRS_PER_PUD; pud++, i++) {
|
|
unsigned long paddr;
|
|
pmd_t *pmd;
|
|
|
|
paddr = address + i*PUD_SIZE;
|
|
if (paddr >= end)
|
|
break;
|
|
|
|
pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
|
|
if (!pmd)
|
|
return -ENOMEM;
|
|
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
|
|
for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
|
|
unsigned long pe;
|
|
|
|
if (paddr >= end)
|
|
break;
|
|
pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
|
|
pe &= __supported_pte_mask;
|
|
set_pmd(pmd, __pmd(pe));
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int res_kernel_text_pud_init(pud_t *pud, unsigned long start)
|
|
{
|
|
pmd_t *pmd;
|
|
unsigned long paddr;
|
|
|
|
pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
|
|
if (!pmd)
|
|
return -ENOMEM;
|
|
set_pud(pud + pud_index(start), __pud(__pa(pmd) | _KERNPG_TABLE));
|
|
for (paddr = 0; paddr < KERNEL_TEXT_SIZE; pmd++, paddr += PMD_SIZE) {
|
|
unsigned long pe;
|
|
|
|
pe = __PAGE_KERNEL_LARGE_EXEC | _PAGE_GLOBAL | paddr;
|
|
pe &= __supported_pte_mask;
|
|
set_pmd(pmd, __pmd(pe));
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int set_up_temporary_mappings(void)
|
|
{
|
|
unsigned long start, end, next;
|
|
pud_t *pud;
|
|
int error;
|
|
|
|
temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
|
|
if (!temp_level4_pgt)
|
|
return -ENOMEM;
|
|
|
|
/* Set up the direct mapping from scratch */
|
|
start = (unsigned long)pfn_to_kaddr(0);
|
|
end = (unsigned long)pfn_to_kaddr(end_pfn);
|
|
|
|
for (; start < end; start = next) {
|
|
pud = (pud_t *)get_safe_page(GFP_ATOMIC);
|
|
if (!pud)
|
|
return -ENOMEM;
|
|
next = start + PGDIR_SIZE;
|
|
if (next > end)
|
|
next = end;
|
|
if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
|
|
return error;
|
|
set_pgd(temp_level4_pgt + pgd_index(start),
|
|
mk_kernel_pgd(__pa(pud)));
|
|
}
|
|
|
|
/* Set up the kernel text mapping from scratch */
|
|
pud = (pud_t *)get_safe_page(GFP_ATOMIC);
|
|
if (!pud)
|
|
return -ENOMEM;
|
|
error = res_kernel_text_pud_init(pud, __START_KERNEL_map);
|
|
if (!error)
|
|
set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
|
|
__pgd(__pa(pud) | _PAGE_TABLE));
|
|
|
|
return error;
|
|
}
|
|
|
|
int swsusp_arch_resume(void)
|
|
{
|
|
int error;
|
|
|
|
/* We have got enough memory and from now on we cannot recover */
|
|
if ((error = set_up_temporary_mappings()))
|
|
return error;
|
|
|
|
relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
|
|
if (!relocated_restore_code)
|
|
return -ENOMEM;
|
|
memcpy(relocated_restore_code, &core_restore_code,
|
|
&restore_registers - &core_restore_code);
|
|
|
|
restore_image();
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* pfn_is_nosave - check if given pfn is in the 'nosave' section
|
|
*/
|
|
|
|
int pfn_is_nosave(unsigned long pfn)
|
|
{
|
|
unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
|
|
unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
|
|
return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
|
|
}
|
|
|
|
struct restore_data_record {
|
|
unsigned long jump_address;
|
|
unsigned long cr3;
|
|
unsigned long magic;
|
|
};
|
|
|
|
#define RESTORE_MAGIC 0x0123456789ABCDEFUL
|
|
|
|
/**
|
|
* arch_hibernation_header_save - populate the architecture specific part
|
|
* of a hibernation image header
|
|
* @addr: address to save the data at
|
|
*/
|
|
int arch_hibernation_header_save(void *addr, unsigned int max_size)
|
|
{
|
|
struct restore_data_record *rdr = addr;
|
|
|
|
if (max_size < sizeof(struct restore_data_record))
|
|
return -EOVERFLOW;
|
|
rdr->jump_address = restore_jump_address;
|
|
rdr->cr3 = restore_cr3;
|
|
rdr->magic = RESTORE_MAGIC;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* arch_hibernation_header_restore - read the architecture specific data
|
|
* from the hibernation image header
|
|
* @addr: address to read the data from
|
|
*/
|
|
int arch_hibernation_header_restore(void *addr)
|
|
{
|
|
struct restore_data_record *rdr = addr;
|
|
|
|
restore_jump_address = rdr->jump_address;
|
|
restore_cr3 = rdr->cr3;
|
|
return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
|
|
}
|
|
#endif /* CONFIG_HIBERNATION */
|