2005-04-16 18:20:36 -04:00
|
|
|
/*
|
|
|
|
* Intel IO-APIC support for multi-Pentium hosts.
|
|
|
|
*
|
|
|
|
* Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
|
|
|
|
*
|
|
|
|
* Many thanks to Stig Venaas for trying out countless experimental
|
|
|
|
* patches and reporting/debugging problems patiently!
|
|
|
|
*
|
|
|
|
* (c) 1999, Multiple IO-APIC support, developed by
|
|
|
|
* Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
|
|
|
|
* Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
|
|
|
|
* further tested and cleaned up by Zach Brown <zab@redhat.com>
|
|
|
|
* and Ingo Molnar <mingo@redhat.com>
|
|
|
|
*
|
|
|
|
* Fixes
|
|
|
|
* Maciej W. Rozycki : Bits for genuine 82489DX APICs;
|
|
|
|
* thanks to Eric Gilmore
|
|
|
|
* and Rolf G. Tews
|
|
|
|
* for testing these extensively
|
|
|
|
* Paul Diefenbaugh : Added full ACPI support
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/delay.h>
|
|
|
|
#include <linux/sched.h>
|
2006-10-04 05:16:42 -04:00
|
|
|
#include <linux/pci.h>
|
2005-04-16 18:20:36 -04:00
|
|
|
#include <linux/mc146818rtc.h>
|
|
|
|
#include <linux/acpi.h>
|
|
|
|
#include <linux/sysdev.h>
|
2006-10-04 05:16:59 -04:00
|
|
|
#include <linux/msi.h>
|
2006-10-04 05:17:01 -04:00
|
|
|
#include <linux/htirq.h>
|
2007-10-21 19:41:54 -04:00
|
|
|
#include <linux/dmar.h>
|
2008-01-30 07:32:19 -05:00
|
|
|
#include <linux/jiffies.h>
|
2006-02-16 17:42:04 -05:00
|
|
|
#ifdef CONFIG_ACPI
|
|
|
|
#include <acpi/acpi_bus.h>
|
|
|
|
#endif
|
2008-01-30 07:30:19 -05:00
|
|
|
#include <linux/bootmem.h>
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
#include <linux/dmar.h>
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2007-02-23 06:40:58 -05:00
|
|
|
#include <asm/idle.h>
|
2005-04-16 18:20:36 -04:00
|
|
|
#include <asm/io.h>
|
|
|
|
#include <asm/smp.h>
|
|
|
|
#include <asm/desc.h>
|
|
|
|
#include <asm/proto.h>
|
2005-05-31 17:39:26 -04:00
|
|
|
#include <asm/acpi.h>
|
2006-01-11 16:44:27 -05:00
|
|
|
#include <asm/dma.h>
|
2008-07-14 12:18:04 -04:00
|
|
|
#include <asm/i8259.h>
|
2006-06-26 07:57:01 -04:00
|
|
|
#include <asm/nmi.h>
|
2006-10-04 05:16:42 -04:00
|
|
|
#include <asm/msidef.h>
|
2006-10-04 05:16:55 -04:00
|
|
|
#include <asm/hypertransport.h>
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
#include <asm/irq_remapping.h>
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2008-03-25 12:28:56 -04:00
|
|
|
#include <mach_ipi.h>
|
2008-03-25 17:10:46 -04:00
|
|
|
#include <mach_apic.h>
|
2008-03-25 12:28:56 -04:00
|
|
|
|
2008-07-20 19:52:49 -04:00
|
|
|
#define __apicdebuginit(type) static type __init
|
|
|
|
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg;
|
2008-08-19 23:50:07 -04:00
|
|
|
struct irq_pin_list;
|
2007-02-23 06:32:47 -05:00
|
|
|
struct irq_cfg {
|
2008-08-19 23:50:06 -04:00
|
|
|
unsigned int irq;
|
|
|
|
struct irq_cfg *next;
|
2008-08-19 23:50:07 -04:00
|
|
|
struct irq_pin_list *irq_2_pin;
|
2007-02-23 06:32:47 -05:00
|
|
|
cpumask_t domain;
|
2007-02-23 06:40:58 -05:00
|
|
|
cpumask_t old_domain;
|
|
|
|
unsigned move_cleanup_count;
|
2007-02-23 06:32:47 -05:00
|
|
|
u8 vector;
|
2007-02-23 06:40:58 -05:00
|
|
|
u8 move_in_progress : 1;
|
2007-02-23 06:32:47 -05:00
|
|
|
};
|
|
|
|
|
|
|
|
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
|
2008-08-19 23:50:02 -04:00
|
|
|
static struct irq_cfg irq_cfg_legacy[] __initdata = {
|
2008-08-19 23:50:06 -04:00
|
|
|
[0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
|
|
|
|
[1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
|
|
|
|
[2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
|
|
|
|
[3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
|
|
|
|
[4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
|
|
|
|
[5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
|
|
|
|
[6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
|
|
|
|
[7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
|
|
|
|
[8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
|
|
|
|
[9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
|
|
|
|
[10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
|
|
|
|
[11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
|
|
|
|
[12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
|
|
|
|
[13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
|
|
|
|
[14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
|
|
|
|
[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
|
2007-02-23 06:32:47 -05:00
|
|
|
};
|
|
|
|
|
2008-08-19 23:50:06 -04:00
|
|
|
static struct irq_cfg irq_cfg_init = { .irq = -1U, };
|
|
|
|
/* need to be biger than size of irq_cfg_legacy */
|
|
|
|
static int nr_irq_cfg = 32;
|
|
|
|
|
|
|
|
static int __init parse_nr_irq_cfg(char *arg)
|
|
|
|
{
|
|
|
|
if (arg) {
|
|
|
|
nr_irq_cfg = simple_strtoul(arg, NULL, 0);
|
|
|
|
if (nr_irq_cfg < 32)
|
|
|
|
nr_irq_cfg = 32;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
early_param("nr_irq_cfg", parse_nr_irq_cfg);
|
|
|
|
|
|
|
|
static void init_one_irq_cfg(struct irq_cfg *cfg)
|
|
|
|
{
|
|
|
|
memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
|
|
|
|
}
|
2008-08-19 23:50:02 -04:00
|
|
|
|
|
|
|
static void __init init_work(void *data)
|
|
|
|
{
|
|
|
|
struct dyn_array *da = data;
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg;
|
|
|
|
int i;
|
2008-08-19 23:50:02 -04:00
|
|
|
|
2008-08-19 23:50:06 -04:00
|
|
|
cfg = *da->name;
|
|
|
|
|
|
|
|
memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
|
|
|
|
|
|
|
|
i = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
|
|
|
|
for (; i < *da->nr; i++)
|
|
|
|
init_one_irq_cfg(&cfg[i]);
|
|
|
|
|
|
|
|
for (i = 1; i < *da->nr; i++)
|
|
|
|
cfg[i-1].next = &cfg[i];
|
2008-08-19 23:50:02 -04:00
|
|
|
}
|
|
|
|
|
2008-08-19 23:50:06 -04:00
|
|
|
static struct irq_cfg *irq_cfgx;
|
|
|
|
DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
|
|
|
|
|
|
|
|
static struct irq_cfg *irq_cfg(unsigned int irq)
|
|
|
|
{
|
|
|
|
struct irq_cfg *cfg;
|
|
|
|
|
|
|
|
BUG_ON(irq == -1U);
|
|
|
|
|
|
|
|
cfg = &irq_cfgx[0];
|
|
|
|
while (cfg) {
|
|
|
|
if (cfg->irq == irq)
|
|
|
|
return cfg;
|
|
|
|
|
|
|
|
if (cfg->irq == -1U)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
cfg = cfg->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
|
|
|
|
{
|
|
|
|
struct irq_cfg *cfg, *cfg_pri;
|
|
|
|
int i;
|
|
|
|
int count = 0;
|
|
|
|
|
|
|
|
BUG_ON(irq == -1U);
|
|
|
|
|
|
|
|
cfg_pri = cfg = &irq_cfgx[0];
|
|
|
|
while (cfg) {
|
|
|
|
if (cfg->irq == irq)
|
|
|
|
return cfg;
|
|
|
|
|
|
|
|
if (cfg->irq == -1U) {
|
|
|
|
cfg->irq = irq;
|
|
|
|
return cfg;
|
|
|
|
}
|
|
|
|
cfg_pri = cfg;
|
|
|
|
cfg = cfg->next;
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* we run out of pre-allocate ones, allocate more
|
|
|
|
*/
|
|
|
|
printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
|
|
|
|
|
|
|
|
if (after_bootmem)
|
|
|
|
cfg = kzalloc(sizeof(struct irq_cfg)*nr_irq_cfg, GFP_ATOMIC);
|
|
|
|
else
|
|
|
|
cfg = __alloc_bootmem_nopanic(sizeof(struct irq_cfg)*nr_irq_cfg, PAGE_SIZE, 0);
|
|
|
|
|
|
|
|
if (!cfg)
|
|
|
|
panic("please boot with nr_irq_cfg= %d\n", count * 2);
|
|
|
|
|
|
|
|
for (i = 0; i < nr_irq_cfg; i++)
|
|
|
|
init_one_irq_cfg(&cfg[i]);
|
|
|
|
|
|
|
|
for (i = 1; i < nr_irq_cfg; i++)
|
|
|
|
cfg[i-1].next = &cfg[i];
|
|
|
|
|
|
|
|
cfg->irq = irq;
|
|
|
|
cfg_pri->next = cfg;
|
|
|
|
|
|
|
|
return cfg;
|
|
|
|
}
|
2008-08-19 23:50:02 -04:00
|
|
|
|
2007-02-23 06:35:05 -05:00
|
|
|
static int assign_irq_vector(int irq, cpumask_t mask);
|
2006-10-04 05:16:46 -04:00
|
|
|
|
2008-04-15 16:36:56 -04:00
|
|
|
int first_system_vector = 0xfe;
|
|
|
|
|
|
|
|
char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
|
|
|
|
|
2005-04-16 18:20:36 -04:00
|
|
|
int sis_apic_bug; /* not actually supported, dummy for compile */
|
|
|
|
|
2005-05-20 17:27:59 -04:00
|
|
|
static int no_timer_check;
|
|
|
|
|
2007-01-08 18:04:46 -05:00
|
|
|
static int disable_timer_pin_1 __initdata;
|
|
|
|
|
2008-05-21 17:10:22 -04:00
|
|
|
int timer_through_8259 __initdata;
|
2007-01-08 18:04:46 -05:00
|
|
|
|
2006-01-11 16:46:06 -05:00
|
|
|
/* Where if anywhere is the i8259 connect in external int mode */
|
|
|
|
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
|
|
|
|
|
2005-04-16 18:20:36 -04:00
|
|
|
static DEFINE_SPINLOCK(ioapic_lock);
|
2008-08-09 18:09:02 -04:00
|
|
|
static DEFINE_SPINLOCK(vector_lock);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* # of IRQ routing registers
|
|
|
|
*/
|
|
|
|
int nr_ioapic_registers[MAX_IO_APICS];
|
|
|
|
|
2008-07-10 14:16:47 -04:00
|
|
|
/* I/O APIC RTE contents at the OS boot up */
|
|
|
|
struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
|
|
|
|
|
2008-04-04 15:41:19 -04:00
|
|
|
/* I/O APIC entries */
|
2008-05-14 11:03:10 -04:00
|
|
|
struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
|
2008-04-04 15:41:19 -04:00
|
|
|
int nr_ioapics;
|
|
|
|
|
2008-04-04 15:41:38 -04:00
|
|
|
/* MP IRQ source entries */
|
2008-05-14 11:03:17 -04:00
|
|
|
struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
|
2008-04-04 15:41:38 -04:00
|
|
|
|
|
|
|
/* # of MP IRQ source entries */
|
|
|
|
int mp_irq_entries;
|
|
|
|
|
2008-05-19 11:47:16 -04:00
|
|
|
DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
|
|
|
|
|
2005-04-16 18:20:36 -04:00
|
|
|
/*
|
|
|
|
* Rough estimation of how many shared IRQs there are, can
|
|
|
|
* be changed anytime.
|
|
|
|
*/
|
|
|
|
|
2008-08-19 23:50:02 -04:00
|
|
|
int pin_map_size;
|
|
|
|
|
2005-04-16 18:20:36 -04:00
|
|
|
/*
|
|
|
|
* This is performance-critical, we want to do it O(1)
|
|
|
|
*
|
|
|
|
* the indexing order of this array favors 1:1 mappings
|
|
|
|
* between pins and IRQs.
|
|
|
|
*/
|
|
|
|
|
2008-08-19 23:50:07 -04:00
|
|
|
struct irq_pin_list {
|
|
|
|
int apic, pin;
|
|
|
|
struct irq_pin_list *next;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct irq_pin_list *irq_2_pin_head;
|
|
|
|
/* fill one page ? */
|
|
|
|
static int nr_irq_2_pin = 0x100;
|
|
|
|
static struct irq_pin_list *irq_2_pin_ptr;
|
|
|
|
static void __init irq_2_pin_init_work(void *data)
|
|
|
|
{
|
|
|
|
struct dyn_array *da = data;
|
|
|
|
struct irq_pin_list *pin;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
pin = *da->name;
|
|
|
|
|
|
|
|
for (i = 1; i < *da->nr; i++)
|
|
|
|
pin[i-1].next = &pin[i];
|
|
|
|
|
|
|
|
irq_2_pin_ptr = &pin[0];
|
|
|
|
}
|
|
|
|
DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
|
|
|
|
|
|
|
|
static struct irq_pin_list *get_one_free_irq_2_pin(void)
|
|
|
|
{
|
|
|
|
struct irq_pin_list *pin;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
pin = irq_2_pin_ptr;
|
|
|
|
|
|
|
|
if (pin) {
|
|
|
|
irq_2_pin_ptr = pin->next;
|
|
|
|
pin->next = NULL;
|
|
|
|
return pin;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* we run out of pre-allocate ones, allocate more
|
|
|
|
*/
|
|
|
|
printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
|
|
|
|
|
|
|
|
if (after_bootmem)
|
|
|
|
pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
|
|
|
|
GFP_ATOMIC);
|
|
|
|
else
|
|
|
|
pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
|
|
|
|
nr_irq_2_pin, PAGE_SIZE, 0);
|
|
|
|
|
|
|
|
if (!pin)
|
|
|
|
panic("can not get more irq_2_pin\n");
|
2008-08-19 23:50:02 -04:00
|
|
|
|
2008-08-19 23:50:07 -04:00
|
|
|
for (i = 1; i < nr_irq_2_pin; i++)
|
|
|
|
pin[i-1].next = &pin[i];
|
2008-08-19 23:50:02 -04:00
|
|
|
|
2008-08-19 23:50:07 -04:00
|
|
|
irq_2_pin_ptr = pin->next;
|
|
|
|
pin->next = NULL;
|
|
|
|
|
|
|
|
return pin;
|
|
|
|
}
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2006-11-08 13:23:03 -05:00
|
|
|
struct io_apic {
|
|
|
|
unsigned int index;
|
|
|
|
unsigned int unused[3];
|
|
|
|
unsigned int data;
|
|
|
|
};
|
|
|
|
|
|
|
|
static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
|
|
|
|
{
|
|
|
|
return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
|
2008-05-14 11:03:10 -04:00
|
|
|
+ (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
|
2006-11-08 13:23:03 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
|
|
|
|
{
|
|
|
|
struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
|
|
writel(reg, &io_apic->index);
|
|
|
|
return readl(&io_apic->data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
|
|
|
|
{
|
|
|
|
struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
|
|
writel(reg, &io_apic->index);
|
|
|
|
writel(value, &io_apic->data);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Re-write a value: to be used for read-modify-write
|
|
|
|
* cycles where the read already set up the index register.
|
|
|
|
*/
|
|
|
|
static inline void io_apic_modify(unsigned int apic, unsigned int value)
|
|
|
|
{
|
|
|
|
struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
|
|
writel(value, &io_apic->data);
|
|
|
|
}
|
|
|
|
|
2008-04-05 09:39:04 -04:00
|
|
|
static bool io_apic_level_ack_pending(unsigned int irq)
|
2007-07-21 11:10:45 -04:00
|
|
|
{
|
|
|
|
struct irq_pin_list *entry;
|
|
|
|
unsigned long flags;
|
2008-08-19 23:50:07 -04:00
|
|
|
struct irq_cfg *cfg = irq_cfg(irq);
|
2007-07-21 11:10:45 -04:00
|
|
|
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
2008-08-19 23:50:07 -04:00
|
|
|
entry = cfg->irq_2_pin;
|
2007-07-21 11:10:45 -04:00
|
|
|
for (;;) {
|
|
|
|
unsigned int reg;
|
|
|
|
int pin;
|
|
|
|
|
2008-08-19 23:50:07 -04:00
|
|
|
if (!entry)
|
2007-07-21 11:10:45 -04:00
|
|
|
break;
|
2008-08-19 23:50:07 -04:00
|
|
|
pin = entry->pin;
|
2007-07-21 11:10:45 -04:00
|
|
|
reg = io_apic_read(entry->apic, 0x10 + pin*2);
|
|
|
|
/* Is the remote IRR bit set? */
|
2008-06-07 11:53:57 -04:00
|
|
|
if (reg & IO_APIC_REDIR_REMOTE_IRR) {
|
2008-04-05 09:39:04 -04:00
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
return true;
|
|
|
|
}
|
2007-07-21 11:10:45 -04:00
|
|
|
if (!entry->next)
|
|
|
|
break;
|
2008-08-19 23:50:07 -04:00
|
|
|
entry = entry->next;
|
2007-07-21 11:10:45 -04:00
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
2008-04-05 09:39:04 -04:00
|
|
|
|
|
|
|
return false;
|
2007-07-21 11:10:45 -04:00
|
|
|
}
|
|
|
|
|
2006-11-08 13:23:03 -05:00
|
|
|
/*
|
|
|
|
* Synchronize the IO-APIC and the CPU by doing
|
|
|
|
* a dummy read from the IO-APIC
|
|
|
|
*/
|
|
|
|
static inline void io_apic_sync(unsigned int apic)
|
|
|
|
{
|
|
|
|
struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
|
|
readl(&io_apic->data);
|
|
|
|
}
|
|
|
|
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
#define __DO_ACTION(R, ACTION, FINAL) \
|
|
|
|
\
|
|
|
|
{ \
|
|
|
|
int pin; \
|
2008-08-19 23:50:07 -04:00
|
|
|
struct irq_cfg *cfg; \
|
|
|
|
struct irq_pin_list *entry; \
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
\
|
2008-08-19 23:49:48 -04:00
|
|
|
BUG_ON(irq >= nr_irqs); \
|
2008-08-19 23:50:07 -04:00
|
|
|
cfg = irq_cfg(irq); \
|
|
|
|
entry = cfg->irq_2_pin; \
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
for (;;) { \
|
|
|
|
unsigned int reg; \
|
2008-08-19 23:50:07 -04:00
|
|
|
if (!entry) \
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
break; \
|
2008-08-19 23:50:07 -04:00
|
|
|
pin = entry->pin; \
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
|
|
|
|
reg ACTION; \
|
|
|
|
io_apic_modify(entry->apic, reg); \
|
2007-02-23 06:23:52 -05:00
|
|
|
FINAL; \
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
if (!entry->next) \
|
|
|
|
break; \
|
2008-08-19 23:50:07 -04:00
|
|
|
entry = entry->next; \
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
} \
|
|
|
|
}
|
|
|
|
|
2006-09-26 04:52:30 -04:00
|
|
|
union entry_union {
|
|
|
|
struct { u32 w1, w2; };
|
|
|
|
struct IO_APIC_route_entry entry;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
|
|
|
|
{
|
|
|
|
union entry_union eu;
|
|
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
|
|
eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
|
|
|
|
eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
|
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
return eu.entry;
|
|
|
|
}
|
|
|
|
|
2006-11-08 13:27:54 -05:00
|
|
|
/*
|
|
|
|
* When we write a new IO APIC routing entry, we need to write the high
|
|
|
|
* word first! If the mask bit in the low word is clear, we will enable
|
|
|
|
* the interrupt, and we need to make sure the entry is fully populated
|
|
|
|
* before that happens.
|
|
|
|
*/
|
2006-12-06 20:14:07 -05:00
|
|
|
static void
|
|
|
|
__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
2006-09-26 04:52:30 -04:00
|
|
|
{
|
|
|
|
union entry_union eu;
|
|
|
|
eu.entry = e;
|
2006-11-08 13:27:54 -05:00
|
|
|
io_apic_write(apic, 0x11 + 2*pin, eu.w2);
|
|
|
|
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
2006-12-06 20:14:07 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
|
|
__ioapic_write_entry(apic, pin, e);
|
2006-11-08 13:27:54 -05:00
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When we mask an IO APIC routing entry, we need to write the low
|
|
|
|
* word first, in order to set the mask bit before we change the
|
|
|
|
* high bits!
|
|
|
|
*/
|
|
|
|
static void ioapic_mask_entry(int apic, int pin)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
union entry_union eu = { .entry.mask = 1 };
|
|
|
|
|
2006-09-26 04:52:30 -04:00
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
|
|
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
|
|
|
io_apic_write(apic, 0x11 + 2*pin, eu.w2);
|
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
}
|
|
|
|
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
#ifdef CONFIG_SMP
|
2006-10-04 05:16:51 -04:00
|
|
|
static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
|
|
|
|
{
|
|
|
|
int apic, pin;
|
2008-08-19 23:50:07 -04:00
|
|
|
struct irq_cfg *cfg;
|
|
|
|
struct irq_pin_list *entry;
|
2006-10-04 05:16:51 -04:00
|
|
|
|
2008-08-19 23:49:48 -04:00
|
|
|
BUG_ON(irq >= nr_irqs);
|
2008-08-19 23:50:07 -04:00
|
|
|
cfg = irq_cfg(irq);
|
|
|
|
entry = cfg->irq_2_pin;
|
2006-10-04 05:16:51 -04:00
|
|
|
for (;;) {
|
|
|
|
unsigned int reg;
|
2008-08-19 23:50:07 -04:00
|
|
|
|
|
|
|
if (!entry)
|
|
|
|
break;
|
|
|
|
|
2006-10-04 05:16:51 -04:00
|
|
|
apic = entry->apic;
|
|
|
|
pin = entry->pin;
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
/*
|
|
|
|
* With interrupt-remapping, destination information comes
|
|
|
|
* from interrupt-remapping table entry.
|
|
|
|
*/
|
|
|
|
if (!irq_remapped(irq))
|
|
|
|
io_apic_write(apic, 0x11 + pin*2, dest);
|
2006-10-04 05:16:51 -04:00
|
|
|
reg = io_apic_read(apic, 0x10 + pin*2);
|
2008-06-07 11:53:57 -04:00
|
|
|
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
|
2006-10-04 05:16:51 -04:00
|
|
|
reg |= vector;
|
|
|
|
io_apic_modify(apic, reg);
|
|
|
|
if (!entry->next)
|
|
|
|
break;
|
2008-08-19 23:50:07 -04:00
|
|
|
entry = entry->next;
|
2006-10-04 05:16:51 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
|
|
|
|
{
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg = irq_cfg(irq);
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
unsigned long flags;
|
|
|
|
unsigned int dest;
|
|
|
|
cpumask_t tmp;
|
2008-08-19 23:50:05 -04:00
|
|
|
struct irq_desc *desc;
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
|
|
|
|
cpus_and(tmp, mask, cpu_online_map);
|
|
|
|
if (cpus_empty(tmp))
|
2007-02-23 06:20:59 -05:00
|
|
|
return;
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
|
2007-02-23 06:35:05 -05:00
|
|
|
if (assign_irq_vector(irq, mask))
|
2006-10-04 05:16:51 -04:00
|
|
|
return;
|
|
|
|
|
2007-02-23 06:35:05 -05:00
|
|
|
cpus_and(tmp, cfg->domain, mask);
|
2006-10-04 05:16:51 -04:00
|
|
|
dest = cpu_mask_to_apicid(tmp);
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Only the high 8 bits are valid.
|
|
|
|
*/
|
|
|
|
dest = SET_APIC_LOGICAL_ID(dest);
|
|
|
|
|
2008-08-19 23:50:05 -04:00
|
|
|
desc = irq_to_desc(irq);
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
2007-02-23 06:35:05 -05:00
|
|
|
__target_IO_APIC_irq(irq, dest, cfg->vector);
|
2008-08-19 23:50:05 -04:00
|
|
|
desc->affinity = mask;
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 18:20:36 -04:00
|
|
|
/*
|
|
|
|
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
|
|
|
|
* shared ISA-space IRQs, so we have to support them. We are super
|
|
|
|
* fast in the common case, and fast for shared ISA-space IRQs.
|
|
|
|
*/
|
2008-08-19 23:50:02 -04:00
|
|
|
int first_free_entry;
|
2005-04-16 18:20:36 -04:00
|
|
|
static void add_pin_to_irq(unsigned int irq, int apic, int pin)
|
|
|
|
{
|
2008-08-19 23:50:07 -04:00
|
|
|
struct irq_cfg *cfg;
|
|
|
|
struct irq_pin_list *entry;
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2008-08-19 23:49:48 -04:00
|
|
|
BUG_ON(irq >= nr_irqs);
|
2008-08-19 23:50:07 -04:00
|
|
|
/* first time to refer irq_cfg, so with new */
|
|
|
|
cfg = irq_cfg_alloc(irq);
|
|
|
|
entry = cfg->irq_2_pin;
|
|
|
|
if (!entry) {
|
|
|
|
entry = get_one_free_irq_2_pin();
|
|
|
|
cfg->irq_2_pin = entry;
|
|
|
|
entry->apic = apic;
|
|
|
|
entry->pin = pin;
|
|
|
|
printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
|
|
|
|
return;
|
|
|
|
}
|
2008-08-19 23:50:06 -04:00
|
|
|
|
2008-08-19 23:50:07 -04:00
|
|
|
while (entry->next) {
|
|
|
|
/* not again, please */
|
|
|
|
if (entry->apic == apic && entry->pin == pin)
|
|
|
|
return;
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2008-08-19 23:50:07 -04:00
|
|
|
entry = entry->next;
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
2008-08-19 23:50:07 -04:00
|
|
|
|
|
|
|
entry->next = get_one_free_irq_2_pin();
|
|
|
|
entry = entry->next;
|
2005-04-16 18:20:36 -04:00
|
|
|
entry->apic = apic;
|
|
|
|
entry->pin = pin;
|
2008-08-19 23:50:07 -04:00
|
|
|
printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
|
2008-06-30 20:19:31 -04:00
|
|
|
/*
|
|
|
|
* Reroute an IRQ to a different pin.
|
|
|
|
*/
|
|
|
|
static void __init replace_pin_at_irq(unsigned int irq,
|
|
|
|
int oldapic, int oldpin,
|
|
|
|
int newapic, int newpin)
|
|
|
|
{
|
2008-08-19 23:50:07 -04:00
|
|
|
struct irq_cfg *cfg = irq_cfg(irq);
|
|
|
|
struct irq_pin_list *entry = cfg->irq_2_pin;
|
|
|
|
int replaced = 0;
|
2008-06-30 20:19:31 -04:00
|
|
|
|
2008-08-19 23:50:07 -04:00
|
|
|
while (entry) {
|
2008-06-30 20:19:31 -04:00
|
|
|
if (entry->apic == oldapic && entry->pin == oldpin) {
|
|
|
|
entry->apic = newapic;
|
|
|
|
entry->pin = newpin;
|
2008-08-19 23:50:07 -04:00
|
|
|
replaced = 1;
|
|
|
|
/* every one is different, right? */
|
2008-06-30 20:19:31 -04:00
|
|
|
break;
|
2008-08-19 23:50:07 -04:00
|
|
|
}
|
|
|
|
entry = entry->next;
|
2008-06-30 20:19:31 -04:00
|
|
|
}
|
2008-08-19 23:50:07 -04:00
|
|
|
|
|
|
|
/* why? call replace before add? */
|
|
|
|
if (!replaced)
|
|
|
|
add_pin_to_irq(irq, newapic, newpin);
|
2008-06-30 20:19:31 -04:00
|
|
|
}
|
|
|
|
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
#define DO_ACTION(name,R,ACTION, FINAL) \
|
|
|
|
\
|
|
|
|
static void name##_IO_APIC_irq (unsigned int irq) \
|
|
|
|
__DO_ACTION(R, ACTION, FINAL)
|
|
|
|
|
2008-06-07 11:53:57 -04:00
|
|
|
/* mask = 1 */
|
|
|
|
DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
|
|
|
|
|
|
|
|
/* mask = 0 */
|
|
|
|
DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, )
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
static void mask_IO_APIC_irq (unsigned int irq)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
|
|
__mask_IO_APIC_irq(irq);
|
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void unmask_IO_APIC_irq (unsigned int irq)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
|
|
__unmask_IO_APIC_irq(irq);
|
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
|
|
|
|
{
|
|
|
|
struct IO_APIC_route_entry entry;
|
|
|
|
|
|
|
|
/* Check delivery_mode to be sure we're not clearing an SMI pin */
|
2006-09-26 04:52:30 -04:00
|
|
|
entry = ioapic_read_entry(apic, pin);
|
2005-04-16 18:20:36 -04:00
|
|
|
if (entry.delivery_mode == dest_SMI)
|
|
|
|
return;
|
|
|
|
/*
|
|
|
|
* Disable it in the IO-APIC irq-routing table:
|
|
|
|
*/
|
2006-11-08 13:27:54 -05:00
|
|
|
ioapic_mask_entry(apic, pin);
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void clear_IO_APIC (void)
|
|
|
|
{
|
|
|
|
int apic, pin;
|
|
|
|
|
|
|
|
for (apic = 0; apic < nr_ioapics; apic++)
|
|
|
|
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
|
|
|
|
clear_IO_APIC_pin(apic, pin);
|
|
|
|
}
|
|
|
|
|
2008-07-10 14:16:47 -04:00
|
|
|
/*
|
|
|
|
* Saves and masks all the unmasked IO-APIC RTE's
|
|
|
|
*/
|
|
|
|
int save_mask_IO_APIC_setup(void)
|
|
|
|
{
|
|
|
|
union IO_APIC_reg_01 reg_01;
|
|
|
|
unsigned long flags;
|
|
|
|
int apic, pin;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The number of IO-APIC IRQ registers (== #pins):
|
|
|
|
*/
|
|
|
|
for (apic = 0; apic < nr_ioapics; apic++) {
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
|
|
reg_01.raw = io_apic_read(apic, 1);
|
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
nr_ioapic_registers[apic] = reg_01.bits.entries+1;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (apic = 0; apic < nr_ioapics; apic++) {
|
|
|
|
early_ioapic_entries[apic] =
|
|
|
|
kzalloc(sizeof(struct IO_APIC_route_entry) *
|
|
|
|
nr_ioapic_registers[apic], GFP_KERNEL);
|
|
|
|
if (!early_ioapic_entries[apic])
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (apic = 0; apic < nr_ioapics; apic++)
|
|
|
|
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
|
|
struct IO_APIC_route_entry entry;
|
|
|
|
|
|
|
|
entry = early_ioapic_entries[apic][pin] =
|
|
|
|
ioapic_read_entry(apic, pin);
|
|
|
|
if (!entry.mask) {
|
|
|
|
entry.mask = 1;
|
|
|
|
ioapic_write_entry(apic, pin, entry);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void restore_IO_APIC_setup(void)
|
|
|
|
{
|
|
|
|
int apic, pin;
|
|
|
|
|
|
|
|
for (apic = 0; apic < nr_ioapics; apic++)
|
|
|
|
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
|
|
|
|
ioapic_write_entry(apic, pin,
|
|
|
|
early_ioapic_entries[apic][pin]);
|
|
|
|
}
|
|
|
|
|
|
|
|
void reinit_intr_remapped_IO_APIC(int intr_remapping)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* for now plain restore of previous settings.
|
|
|
|
* TBD: In the case of OS enabling interrupt-remapping,
|
|
|
|
* IO-APIC RTE's need to be setup to point to interrupt-remapping
|
|
|
|
* table entries. for now, do a plain restore, and wait for
|
|
|
|
* the setup_IO_APIC_irqs() to do proper initialization.
|
|
|
|
*/
|
|
|
|
restore_IO_APIC_setup();
|
|
|
|
}
|
|
|
|
|
2005-04-16 18:20:36 -04:00
|
|
|
int skip_ioapic_setup;
|
|
|
|
int ioapic_force;
|
|
|
|
|
2007-08-16 03:34:22 -04:00
|
|
|
static int __init parse_noapic(char *str)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
2007-08-16 03:34:22 -04:00
|
|
|
disable_ioapic_setup();
|
2006-09-26 04:52:32 -04:00
|
|
|
return 0;
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
2007-08-16 03:34:22 -04:00
|
|
|
early_param("noapic", parse_noapic);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2007-01-08 18:04:46 -05:00
|
|
|
/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
|
|
|
|
static int __init disable_timer_pin_setup(char *arg)
|
|
|
|
{
|
|
|
|
disable_timer_pin_1 = 1;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
__setup("disable_timer_pin_1", disable_timer_pin_setup);
|
|
|
|
|
|
|
|
|
2005-04-16 18:20:36 -04:00
|
|
|
/*
|
|
|
|
* Find the IRQ entry number of a certain pin.
|
|
|
|
*/
|
|
|
|
static int find_irq_entry(int apic, int pin, int type)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < mp_irq_entries; i++)
|
2008-05-14 11:03:17 -04:00
|
|
|
if (mp_irqs[i].mp_irqtype == type &&
|
|
|
|
(mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
|
|
|
|
mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
|
|
|
|
mp_irqs[i].mp_dstirq == pin)
|
2005-04-16 18:20:36 -04:00
|
|
|
return i;
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find the pin to which IRQ[irq] (ISA) is connected
|
|
|
|
*/
|
2006-01-11 16:46:06 -05:00
|
|
|
static int __init find_isa_irq_pin(int irq, int type)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < mp_irq_entries; i++) {
|
2008-05-14 11:03:17 -04:00
|
|
|
int lbus = mp_irqs[i].mp_srcbus;
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2006-09-26 04:52:30 -04:00
|
|
|
if (test_bit(lbus, mp_bus_not_pci) &&
|
2008-05-14 11:03:17 -04:00
|
|
|
(mp_irqs[i].mp_irqtype == type) &&
|
|
|
|
(mp_irqs[i].mp_srcbusirq == irq))
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2008-05-14 11:03:17 -04:00
|
|
|
return mp_irqs[i].mp_dstirq;
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2006-01-11 16:46:06 -05:00
|
|
|
static int __init find_isa_irq_apic(int irq, int type)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < mp_irq_entries; i++) {
|
2008-05-14 11:03:17 -04:00
|
|
|
int lbus = mp_irqs[i].mp_srcbus;
|
2006-01-11 16:46:06 -05:00
|
|
|
|
2006-09-26 04:52:30 -04:00
|
|
|
if (test_bit(lbus, mp_bus_not_pci) &&
|
2008-05-14 11:03:17 -04:00
|
|
|
(mp_irqs[i].mp_irqtype == type) &&
|
|
|
|
(mp_irqs[i].mp_srcbusirq == irq))
|
2006-01-11 16:46:06 -05:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (i < mp_irq_entries) {
|
|
|
|
int apic;
|
|
|
|
for(apic = 0; apic < nr_ioapics; apic++) {
|
2008-05-14 11:03:17 -04:00
|
|
|
if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
|
2006-01-11 16:46:06 -05:00
|
|
|
return apic;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2005-04-16 18:20:36 -04:00
|
|
|
/*
|
|
|
|
* Find a specific PCI IRQ entry.
|
|
|
|
* Not an __init, possibly needed by modules
|
|
|
|
*/
|
|
|
|
static int pin_2_irq(int idx, int apic, int pin);
|
|
|
|
|
|
|
|
int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
|
|
|
|
{
|
|
|
|
int apic, i, best_guess = -1;
|
|
|
|
|
|
|
|
apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
|
|
|
|
bus, slot, pin);
|
2008-05-19 11:47:09 -04:00
|
|
|
if (test_bit(bus, mp_bus_not_pci)) {
|
2005-04-16 18:20:36 -04:00
|
|
|
apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
for (i = 0; i < mp_irq_entries; i++) {
|
2008-05-14 11:03:17 -04:00
|
|
|
int lbus = mp_irqs[i].mp_srcbus;
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
for (apic = 0; apic < nr_ioapics; apic++)
|
2008-05-14 11:03:17 -04:00
|
|
|
if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
|
|
|
|
mp_irqs[i].mp_dstapic == MP_APIC_ALL)
|
2005-04-16 18:20:36 -04:00
|
|
|
break;
|
|
|
|
|
2006-09-26 04:52:30 -04:00
|
|
|
if (!test_bit(lbus, mp_bus_not_pci) &&
|
2008-05-14 11:03:17 -04:00
|
|
|
!mp_irqs[i].mp_irqtype &&
|
2005-04-16 18:20:36 -04:00
|
|
|
(bus == lbus) &&
|
2008-05-14 11:03:17 -04:00
|
|
|
(slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
|
|
|
|
int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
if (!(apic || IO_APIC_IRQ(irq)))
|
|
|
|
continue;
|
|
|
|
|
2008-05-14 11:03:17 -04:00
|
|
|
if (pin == (mp_irqs[i].mp_srcbusirq & 3))
|
2005-04-16 18:20:36 -04:00
|
|
|
return irq;
|
|
|
|
/*
|
|
|
|
* Use the first all-but-pin matching entry as a
|
|
|
|
* best-guess fuzzy result for broken mptables.
|
|
|
|
*/
|
|
|
|
if (best_guess < 0)
|
|
|
|
best_guess = irq;
|
|
|
|
}
|
|
|
|
}
|
2008-08-19 23:49:48 -04:00
|
|
|
BUG_ON(best_guess >= nr_irqs);
|
2005-04-16 18:20:36 -04:00
|
|
|
return best_guess;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ISA interrupts are always polarity zero edge triggered,
|
|
|
|
* when listed as conforming in the MP table. */
|
|
|
|
|
|
|
|
#define default_ISA_trigger(idx) (0)
|
|
|
|
#define default_ISA_polarity(idx) (0)
|
|
|
|
|
|
|
|
/* PCI interrupts are always polarity one level triggered,
|
|
|
|
* when listed as conforming in the MP table. */
|
|
|
|
|
|
|
|
#define default_PCI_trigger(idx) (1)
|
|
|
|
#define default_PCI_polarity(idx) (1)
|
|
|
|
|
2007-11-17 01:05:28 -05:00
|
|
|
static int MPBIOS_polarity(int idx)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
2008-05-14 11:03:17 -04:00
|
|
|
int bus = mp_irqs[idx].mp_srcbus;
|
2005-04-16 18:20:36 -04:00
|
|
|
int polarity;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Determine IRQ line polarity (high active or low active):
|
|
|
|
*/
|
2008-05-14 11:03:17 -04:00
|
|
|
switch (mp_irqs[idx].mp_irqflag & 3)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
|
|
|
case 0: /* conforms, ie. bus-type dependent polarity */
|
2006-09-26 04:52:30 -04:00
|
|
|
if (test_bit(bus, mp_bus_not_pci))
|
|
|
|
polarity = default_ISA_polarity(idx);
|
|
|
|
else
|
|
|
|
polarity = default_PCI_polarity(idx);
|
2005-04-16 18:20:36 -04:00
|
|
|
break;
|
|
|
|
case 1: /* high active */
|
|
|
|
{
|
|
|
|
polarity = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 2: /* reserved */
|
|
|
|
{
|
|
|
|
printk(KERN_WARNING "broken BIOS!!\n");
|
|
|
|
polarity = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 3: /* low active */
|
|
|
|
{
|
|
|
|
polarity = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default: /* invalid */
|
|
|
|
{
|
|
|
|
printk(KERN_WARNING "broken BIOS!!\n");
|
|
|
|
polarity = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return polarity;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int MPBIOS_trigger(int idx)
|
|
|
|
{
|
2008-05-14 11:03:17 -04:00
|
|
|
int bus = mp_irqs[idx].mp_srcbus;
|
2005-04-16 18:20:36 -04:00
|
|
|
int trigger;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Determine IRQ trigger mode (edge or level sensitive):
|
|
|
|
*/
|
2008-05-14 11:03:17 -04:00
|
|
|
switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
|
|
|
case 0: /* conforms, ie. bus-type dependent */
|
2006-09-26 04:52:30 -04:00
|
|
|
if (test_bit(bus, mp_bus_not_pci))
|
|
|
|
trigger = default_ISA_trigger(idx);
|
|
|
|
else
|
|
|
|
trigger = default_PCI_trigger(idx);
|
2005-04-16 18:20:36 -04:00
|
|
|
break;
|
|
|
|
case 1: /* edge */
|
|
|
|
{
|
|
|
|
trigger = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 2: /* reserved */
|
|
|
|
{
|
|
|
|
printk(KERN_WARNING "broken BIOS!!\n");
|
|
|
|
trigger = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 3: /* level */
|
|
|
|
{
|
|
|
|
trigger = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default: /* invalid */
|
|
|
|
{
|
|
|
|
printk(KERN_WARNING "broken BIOS!!\n");
|
|
|
|
trigger = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return trigger;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int irq_polarity(int idx)
|
|
|
|
{
|
|
|
|
return MPBIOS_polarity(idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int irq_trigger(int idx)
|
|
|
|
{
|
|
|
|
return MPBIOS_trigger(idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int pin_2_irq(int idx, int apic, int pin)
|
|
|
|
{
|
|
|
|
int irq, i;
|
2008-05-14 11:03:17 -04:00
|
|
|
int bus = mp_irqs[idx].mp_srcbus;
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Debugging check, we are in big trouble if this message pops up!
|
|
|
|
*/
|
2008-05-14 11:03:17 -04:00
|
|
|
if (mp_irqs[idx].mp_dstirq != pin)
|
2005-04-16 18:20:36 -04:00
|
|
|
printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
|
|
|
|
|
2006-09-26 04:52:30 -04:00
|
|
|
if (test_bit(bus, mp_bus_not_pci)) {
|
2008-05-14 11:03:17 -04:00
|
|
|
irq = mp_irqs[idx].mp_srcbusirq;
|
2006-09-26 04:52:30 -04:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* PCI IRQs are mapped in order
|
|
|
|
*/
|
|
|
|
i = irq = 0;
|
|
|
|
while (i < apic)
|
|
|
|
irq += nr_ioapic_registers[i++];
|
|
|
|
irq += pin;
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
2008-08-19 23:49:48 -04:00
|
|
|
BUG_ON(irq >= nr_irqs);
|
2005-04-16 18:20:36 -04:00
|
|
|
return irq;
|
|
|
|
}
|
|
|
|
|
2008-08-09 18:09:02 -04:00
|
|
|
void lock_vector_lock(void)
|
|
|
|
{
|
|
|
|
/* Used to the online set of cpus does not change
|
|
|
|
* during assign_irq_vector.
|
|
|
|
*/
|
|
|
|
spin_lock(&vector_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
void unlock_vector_lock(void)
|
|
|
|
{
|
|
|
|
spin_unlock(&vector_lock);
|
|
|
|
}
|
|
|
|
|
2007-02-23 06:35:05 -05:00
|
|
|
static int __assign_irq_vector(int irq, cpumask_t mask)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
2006-10-04 05:16:51 -04:00
|
|
|
/*
|
|
|
|
* NOTE! The local APIC isn't very good at handling
|
|
|
|
* multiple interrupts at the same interrupt level.
|
|
|
|
* As the interrupt level is determined by taking the
|
|
|
|
* vector number and shifting that right by 4, we
|
|
|
|
* want to spread these out a bit so that they don't
|
|
|
|
* all fall in the same interrupt level.
|
|
|
|
*
|
|
|
|
* Also, we've got to be careful not to trash gate
|
|
|
|
* 0x80, because int 0x80 is hm, kind of importantish. ;)
|
|
|
|
*/
|
2006-10-24 19:00:22 -04:00
|
|
|
static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
|
2007-02-23 06:35:05 -05:00
|
|
|
unsigned int old_vector;
|
2006-10-04 05:16:51 -04:00
|
|
|
int cpu;
|
2007-02-23 06:32:47 -05:00
|
|
|
struct irq_cfg *cfg;
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2008-08-19 23:49:48 -04:00
|
|
|
BUG_ON((unsigned)irq >= nr_irqs);
|
2008-08-19 23:50:06 -04:00
|
|
|
cfg = irq_cfg(irq);
|
2006-06-26 07:56:43 -04:00
|
|
|
|
2006-10-24 19:00:23 -04:00
|
|
|
/* Only try and allocate irqs on cpus that are present */
|
|
|
|
cpus_and(mask, mask, cpu_online_map);
|
|
|
|
|
2007-02-23 06:40:58 -05:00
|
|
|
if ((cfg->move_in_progress) || cfg->move_cleanup_count)
|
|
|
|
return -EBUSY;
|
|
|
|
|
2007-02-23 06:35:05 -05:00
|
|
|
old_vector = cfg->vector;
|
|
|
|
if (old_vector) {
|
|
|
|
cpumask_t tmp;
|
|
|
|
cpus_and(tmp, cfg->domain, mask);
|
|
|
|
if (!cpus_empty(tmp))
|
|
|
|
return 0;
|
2006-06-26 07:56:43 -04:00
|
|
|
}
|
2006-10-04 05:16:51 -04:00
|
|
|
|
2008-05-12 15:21:13 -04:00
|
|
|
for_each_cpu_mask_nr(cpu, mask) {
|
2006-10-24 19:00:23 -04:00
|
|
|
cpumask_t domain, new_mask;
|
2007-02-23 06:40:58 -05:00
|
|
|
int new_cpu;
|
2006-10-04 05:16:51 -04:00
|
|
|
int vector, offset;
|
2006-10-08 09:47:55 -04:00
|
|
|
|
|
|
|
domain = vector_allocation_domain(cpu);
|
2006-10-24 19:00:23 -04:00
|
|
|
cpus_and(new_mask, domain, cpu_online_map);
|
2006-10-08 09:47:55 -04:00
|
|
|
|
2006-10-24 19:00:22 -04:00
|
|
|
vector = current_vector;
|
|
|
|
offset = current_offset;
|
2005-04-16 18:20:36 -04:00
|
|
|
next:
|
2006-10-04 05:16:51 -04:00
|
|
|
vector += 8;
|
2008-04-15 16:36:56 -04:00
|
|
|
if (vector >= first_system_vector) {
|
2006-10-04 05:16:51 -04:00
|
|
|
/* If we run out of vectors on large boxen, must share them. */
|
|
|
|
offset = (offset + 1) % 8;
|
|
|
|
vector = FIRST_DEVICE_VECTOR + offset;
|
|
|
|
}
|
2006-10-24 19:00:22 -04:00
|
|
|
if (unlikely(current_vector == vector))
|
2006-10-04 05:16:51 -04:00
|
|
|
continue;
|
|
|
|
if (vector == IA32_SYSCALL_VECTOR)
|
|
|
|
goto next;
|
2008-05-12 15:21:13 -04:00
|
|
|
for_each_cpu_mask_nr(new_cpu, new_mask)
|
2006-10-21 12:37:01 -04:00
|
|
|
if (per_cpu(vector_irq, new_cpu)[vector] != -1)
|
2006-10-08 09:47:55 -04:00
|
|
|
goto next;
|
2006-10-04 05:16:51 -04:00
|
|
|
/* Found one! */
|
2006-10-24 19:00:22 -04:00
|
|
|
current_vector = vector;
|
|
|
|
current_offset = offset;
|
2007-02-23 06:40:58 -05:00
|
|
|
if (old_vector) {
|
|
|
|
cfg->move_in_progress = 1;
|
|
|
|
cfg->old_domain = cfg->domain;
|
|
|
|
}
|
2008-05-12 15:21:13 -04:00
|
|
|
for_each_cpu_mask_nr(new_cpu, new_mask)
|
2006-10-08 09:47:55 -04:00
|
|
|
per_cpu(vector_irq, new_cpu)[vector] = irq;
|
2007-02-23 06:32:47 -05:00
|
|
|
cfg->vector = vector;
|
|
|
|
cfg->domain = domain;
|
2007-02-23 06:35:05 -05:00
|
|
|
return 0;
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
2006-10-04 05:16:51 -04:00
|
|
|
return -ENOSPC;
|
2006-10-04 05:16:46 -04:00
|
|
|
}
|
|
|
|
|
2007-02-23 06:35:05 -05:00
|
|
|
static int assign_irq_vector(int irq, cpumask_t mask)
|
2006-10-04 05:16:46 -04:00
|
|
|
{
|
2007-02-23 06:35:05 -05:00
|
|
|
int err;
|
2006-10-04 05:16:46 -04:00
|
|
|
unsigned long flags;
|
2006-06-26 07:56:43 -04:00
|
|
|
|
2006-10-04 05:16:46 -04:00
|
|
|
spin_lock_irqsave(&vector_lock, flags);
|
2007-02-23 06:35:05 -05:00
|
|
|
err = __assign_irq_vector(irq, mask);
|
2006-06-26 07:57:16 -04:00
|
|
|
spin_unlock_irqrestore(&vector_lock, flags);
|
2007-02-23 06:35:05 -05:00
|
|
|
return err;
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
|
2006-12-06 20:14:05 -05:00
|
|
|
static void __clear_irq_vector(int irq)
|
|
|
|
{
|
2007-02-23 06:32:47 -05:00
|
|
|
struct irq_cfg *cfg;
|
2006-12-06 20:14:05 -05:00
|
|
|
cpumask_t mask;
|
|
|
|
int cpu, vector;
|
|
|
|
|
2008-08-19 23:49:48 -04:00
|
|
|
BUG_ON((unsigned)irq >= nr_irqs);
|
2008-08-19 23:50:06 -04:00
|
|
|
cfg = irq_cfg(irq);
|
2007-02-23 06:32:47 -05:00
|
|
|
BUG_ON(!cfg->vector);
|
2006-12-06 20:14:05 -05:00
|
|
|
|
2007-02-23 06:32:47 -05:00
|
|
|
vector = cfg->vector;
|
|
|
|
cpus_and(mask, cfg->domain, cpu_online_map);
|
2008-05-12 15:21:13 -04:00
|
|
|
for_each_cpu_mask_nr(cpu, mask)
|
2006-12-06 20:14:05 -05:00
|
|
|
per_cpu(vector_irq, cpu)[vector] = -1;
|
|
|
|
|
2007-02-23 06:32:47 -05:00
|
|
|
cfg->vector = 0;
|
2008-04-04 21:11:12 -04:00
|
|
|
cpus_clear(cfg->domain);
|
2006-12-06 20:14:05 -05:00
|
|
|
}
|
|
|
|
|
2008-08-09 18:09:02 -04:00
|
|
|
void __setup_vector_irq(int cpu)
|
2006-10-24 19:00:23 -04:00
|
|
|
{
|
|
|
|
/* Initialize vector_irq on a new cpu */
|
|
|
|
/* This function must be called with vector_lock held */
|
|
|
|
int irq, vector;
|
|
|
|
|
|
|
|
/* Mark the inuse vectors */
|
2008-08-19 23:49:48 -04:00
|
|
|
for (irq = 0; irq < nr_irqs; ++irq) {
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg = irq_cfg(irq);
|
|
|
|
|
|
|
|
if (!cpu_isset(cpu, cfg->domain))
|
2006-10-24 19:00:23 -04:00
|
|
|
continue;
|
2008-08-19 23:50:06 -04:00
|
|
|
vector = cfg->vector;
|
2006-10-24 19:00:23 -04:00
|
|
|
per_cpu(vector_irq, cpu)[vector] = irq;
|
|
|
|
}
|
|
|
|
/* Mark the free vectors */
|
|
|
|
for (vector = 0; vector < NR_VECTORS; ++vector) {
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg;
|
|
|
|
|
2006-10-24 19:00:23 -04:00
|
|
|
irq = per_cpu(vector_irq, cpu)[vector];
|
|
|
|
if (irq < 0)
|
|
|
|
continue;
|
2008-08-19 23:50:06 -04:00
|
|
|
|
|
|
|
cfg = irq_cfg(irq);
|
|
|
|
if (!cpu_isset(cpu, cfg->domain))
|
2006-10-24 19:00:23 -04:00
|
|
|
per_cpu(vector_irq, cpu)[vector] = -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-10-04 05:16:25 -04:00
|
|
|
static struct irq_chip ioapic_chip;
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
static struct irq_chip ir_ioapic_chip;
|
|
|
|
#endif
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2007-02-23 06:16:31 -05:00
|
|
|
static void ioapic_register_intr(int irq, unsigned long trigger)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
2008-08-19 23:50:05 -04:00
|
|
|
struct irq_desc *desc;
|
|
|
|
|
|
|
|
desc = irq_to_desc(irq);
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
if (trigger)
|
2008-08-19 23:50:05 -04:00
|
|
|
desc->status |= IRQ_LEVEL;
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
else
|
2008-08-19 23:50:05 -04:00
|
|
|
desc->status &= ~IRQ_LEVEL;
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
if (irq_remapped(irq)) {
|
2008-08-19 23:50:05 -04:00
|
|
|
desc->status |= IRQ_MOVE_PCNTXT;
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
if (trigger)
|
|
|
|
set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
|
|
|
|
handle_fasteoi_irq,
|
|
|
|
"fasteoi");
|
|
|
|
else
|
|
|
|
set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
|
|
|
|
handle_edge_irq, "edge");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if (trigger)
|
|
|
|
set_irq_chip_and_handler_name(irq, &ioapic_chip,
|
|
|
|
handle_fasteoi_irq,
|
|
|
|
"fasteoi");
|
|
|
|
else
|
2006-10-17 03:10:03 -04:00
|
|
|
set_irq_chip_and_handler_name(irq, &ioapic_chip,
|
|
|
|
handle_edge_irq, "edge");
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static int setup_ioapic_entry(int apic, int irq,
|
|
|
|
struct IO_APIC_route_entry *entry,
|
|
|
|
unsigned int destination, int trigger,
|
|
|
|
int polarity, int vector)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* add it to the IO-APIC irq-routing table:
|
|
|
|
*/
|
|
|
|
memset(entry,0,sizeof(*entry));
|
|
|
|
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
if (intr_remapping_enabled) {
|
|
|
|
struct intel_iommu *iommu = map_ioapic_to_ir(apic);
|
|
|
|
struct irte irte;
|
|
|
|
struct IR_IO_APIC_route_entry *ir_entry =
|
|
|
|
(struct IR_IO_APIC_route_entry *) entry;
|
|
|
|
int index;
|
|
|
|
|
|
|
|
if (!iommu)
|
|
|
|
panic("No mapping iommu for ioapic %d\n", apic);
|
|
|
|
|
|
|
|
index = alloc_irte(iommu, irq, 1);
|
|
|
|
if (index < 0)
|
|
|
|
panic("Failed to allocate IRTE for ioapic %d\n", apic);
|
|
|
|
|
|
|
|
memset(&irte, 0, sizeof(irte));
|
|
|
|
|
|
|
|
irte.present = 1;
|
|
|
|
irte.dst_mode = INT_DEST_MODE;
|
|
|
|
irte.trigger_mode = trigger;
|
|
|
|
irte.dlvry_mode = INT_DELIVERY_MODE;
|
|
|
|
irte.vector = vector;
|
|
|
|
irte.dest_id = IRTE_DEST(destination);
|
|
|
|
|
|
|
|
modify_irte(irq, &irte);
|
|
|
|
|
|
|
|
ir_entry->index2 = (index >> 15) & 0x1;
|
|
|
|
ir_entry->zero = 0;
|
|
|
|
ir_entry->format = 1;
|
|
|
|
ir_entry->index = (index & 0x7fff);
|
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
entry->delivery_mode = INT_DELIVERY_MODE;
|
|
|
|
entry->dest_mode = INT_DEST_MODE;
|
|
|
|
entry->dest = destination;
|
2007-08-12 11:46:36 -04:00
|
|
|
}
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
|
|
|
|
entry->mask = 0; /* enable IRQ */
|
|
|
|
entry->trigger = trigger;
|
|
|
|
entry->polarity = polarity;
|
|
|
|
entry->vector = vector;
|
|
|
|
|
|
|
|
/* Mask level triggered irqs.
|
|
|
|
* Use IRQ_DELAYED_DISABLE for edge triggered irqs.
|
|
|
|
*/
|
|
|
|
if (trigger)
|
|
|
|
entry->mask = 1;
|
|
|
|
return 0;
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
2007-02-23 06:19:08 -05:00
|
|
|
|
|
|
|
static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
|
|
|
|
int trigger, int polarity)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg;
|
2005-04-16 18:20:36 -04:00
|
|
|
struct IO_APIC_route_entry entry;
|
2007-02-23 06:19:08 -05:00
|
|
|
cpumask_t mask;
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2007-02-23 06:19:08 -05:00
|
|
|
if (!IO_APIC_IRQ(irq))
|
|
|
|
return;
|
|
|
|
|
2008-08-19 23:50:06 -04:00
|
|
|
cfg = irq_cfg(irq);
|
|
|
|
|
2007-02-23 06:35:05 -05:00
|
|
|
mask = TARGET_CPUS;
|
|
|
|
if (assign_irq_vector(irq, mask))
|
2007-02-23 06:19:08 -05:00
|
|
|
return;
|
|
|
|
|
2007-02-23 06:35:05 -05:00
|
|
|
cpus_and(mask, cfg->domain, mask);
|
|
|
|
|
2007-02-23 06:19:08 -05:00
|
|
|
apic_printk(APIC_VERBOSE,KERN_DEBUG
|
|
|
|
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
|
|
|
|
"IRQ %d Mode:%i Active:%i)\n",
|
2008-05-14 11:03:10 -04:00
|
|
|
apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
|
2007-02-23 06:19:08 -05:00
|
|
|
irq, trigger, polarity);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
|
|
|
|
cpu_mask_to_apicid(mask), trigger, polarity,
|
|
|
|
cfg->vector)) {
|
|
|
|
printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
|
|
|
|
mp_ioapics[apic].mp_apicid, pin);
|
|
|
|
__clear_irq_vector(irq);
|
|
|
|
return;
|
|
|
|
}
|
2006-12-06 20:14:19 -05:00
|
|
|
|
2007-02-23 06:19:08 -05:00
|
|
|
ioapic_register_intr(irq, trigger);
|
|
|
|
if (irq < 16)
|
|
|
|
disable_8259A_irq(irq);
|
2006-12-06 20:14:19 -05:00
|
|
|
|
|
|
|
ioapic_write_entry(apic, pin, entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __init setup_IO_APIC_irqs(void)
|
|
|
|
{
|
|
|
|
int apic, pin, idx, irq, first_notcon = 1;
|
|
|
|
|
|
|
|
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
|
|
|
|
|
|
|
|
for (apic = 0; apic < nr_ioapics; apic++) {
|
|
|
|
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
idx = find_irq_entry(apic,pin,mp_INT);
|
|
|
|
if (idx == -1) {
|
|
|
|
if (first_notcon) {
|
2008-05-14 11:03:10 -04:00
|
|
|
apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
|
2005-04-16 18:20:36 -04:00
|
|
|
first_notcon = 0;
|
|
|
|
} else
|
2008-05-14 11:03:10 -04:00
|
|
|
apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
|
2005-04-16 18:20:36 -04:00
|
|
|
continue;
|
|
|
|
}
|
2007-10-17 12:04:41 -04:00
|
|
|
if (!first_notcon) {
|
|
|
|
apic_printk(APIC_VERBOSE, " not connected.\n");
|
|
|
|
first_notcon = 1;
|
|
|
|
}
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
irq = pin_2_irq(idx, apic, pin);
|
|
|
|
add_pin_to_irq(irq, apic, pin);
|
|
|
|
|
2007-02-23 06:19:08 -05:00
|
|
|
setup_IO_APIC_irq(apic, pin, irq,
|
|
|
|
irq_trigger(idx), irq_polarity(idx));
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!first_notcon)
|
2007-10-17 12:04:41 -04:00
|
|
|
apic_printk(APIC_VERBOSE, " not connected.\n");
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2008-05-27 16:19:34 -04:00
|
|
|
* Set up the timer pin, possibly with the 8259A-master behind.
|
2005-04-16 18:20:36 -04:00
|
|
|
*/
|
2008-05-27 16:19:34 -04:00
|
|
|
static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
|
|
|
|
int vector)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
|
|
|
struct IO_APIC_route_entry entry;
|
|
|
|
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
if (intr_remapping_enabled)
|
|
|
|
return;
|
|
|
|
|
2008-04-05 09:39:05 -04:00
|
|
|
memset(&entry, 0, sizeof(entry));
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We use logical delivery to get the timer IRQ
|
|
|
|
* to the first CPU.
|
|
|
|
*/
|
|
|
|
entry.dest_mode = INT_DEST_MODE;
|
2008-05-27 16:19:45 -04:00
|
|
|
entry.mask = 1; /* mask IRQ now */
|
2007-02-13 07:26:25 -05:00
|
|
|
entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
|
2005-04-16 18:20:36 -04:00
|
|
|
entry.delivery_mode = INT_DELIVERY_MODE;
|
|
|
|
entry.polarity = 0;
|
|
|
|
entry.trigger = 0;
|
|
|
|
entry.vector = vector;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The timer IRQ doesn't have to know that behind the
|
2008-05-27 16:19:34 -04:00
|
|
|
* scene we may have a 8259A-master in AEOI mode ...
|
2005-04-16 18:20:36 -04:00
|
|
|
*/
|
2006-10-17 03:10:03 -04:00
|
|
|
set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Add it to the IO-APIC irq-routing table:
|
|
|
|
*/
|
2008-04-05 09:39:05 -04:00
|
|
|
ioapic_write_entry(apic, pin, entry);
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
|
2008-07-20 19:52:49 -04:00
|
|
|
|
|
|
|
__apicdebuginit(void) print_IO_APIC(void)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
|
|
|
int apic, i;
|
|
|
|
union IO_APIC_reg_00 reg_00;
|
|
|
|
union IO_APIC_reg_01 reg_01;
|
|
|
|
union IO_APIC_reg_02 reg_02;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
if (apic_verbosity == APIC_QUIET)
|
|
|
|
return;
|
|
|
|
|
|
|
|
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
|
|
|
|
for (i = 0; i < nr_ioapics; i++)
|
|
|
|
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
|
2008-05-14 11:03:10 -04:00
|
|
|
mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We are a bit conservative about what we expect. We have to
|
|
|
|
* know about every hardware change ASAP.
|
|
|
|
*/
|
|
|
|
printk(KERN_INFO "testing the IO APIC.......................\n");
|
|
|
|
|
|
|
|
for (apic = 0; apic < nr_ioapics; apic++) {
|
|
|
|
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
|
|
reg_00.raw = io_apic_read(apic, 0);
|
|
|
|
reg_01.raw = io_apic_read(apic, 1);
|
|
|
|
if (reg_01.bits.version >= 0x10)
|
|
|
|
reg_02.raw = io_apic_read(apic, 2);
|
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
|
|
|
|
printk("\n");
|
2008-05-14 11:03:10 -04:00
|
|
|
printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
|
2005-04-16 18:20:36 -04:00
|
|
|
printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
|
|
|
|
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
|
|
|
|
|
|
|
|
printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01);
|
|
|
|
printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
|
|
|
|
|
|
|
|
printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
|
|
|
|
printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
|
|
|
|
|
|
|
|
if (reg_01.bits.version >= 0x10) {
|
|
|
|
printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
|
|
|
|
printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
|
|
|
|
}
|
|
|
|
|
|
|
|
printk(KERN_DEBUG ".... IRQ redirection table:\n");
|
|
|
|
|
2007-02-13 07:26:25 -05:00
|
|
|
printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
|
|
|
|
" Stat Dmod Deli Vect: \n");
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
for (i = 0; i <= reg_01.bits.entries; i++) {
|
|
|
|
struct IO_APIC_route_entry entry;
|
|
|
|
|
2006-09-26 04:52:30 -04:00
|
|
|
entry = ioapic_read_entry(apic, i);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2007-02-13 07:26:25 -05:00
|
|
|
printk(KERN_DEBUG " %02x %03X ",
|
2005-04-16 18:20:36 -04:00
|
|
|
i,
|
2007-02-13 07:26:25 -05:00
|
|
|
entry.dest
|
2005-04-16 18:20:36 -04:00
|
|
|
);
|
|
|
|
|
|
|
|
printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
|
|
|
|
entry.mask,
|
|
|
|
entry.trigger,
|
|
|
|
entry.irr,
|
|
|
|
entry.polarity,
|
|
|
|
entry.delivery_status,
|
|
|
|
entry.dest_mode,
|
|
|
|
entry.delivery_mode,
|
|
|
|
entry.vector
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
printk(KERN_DEBUG "IRQ to pin mappings:\n");
|
2008-08-19 23:49:48 -04:00
|
|
|
for (i = 0; i < nr_irqs; i++) {
|
2008-08-19 23:50:07 -04:00
|
|
|
struct irq_cfg *cfg = irq_cfg(i);
|
|
|
|
struct irq_pin_list *entry = cfg->irq_2_pin;
|
|
|
|
if (!entry)
|
2005-04-16 18:20:36 -04:00
|
|
|
continue;
|
2006-10-04 05:16:46 -04:00
|
|
|
printk(KERN_DEBUG "IRQ%d ", i);
|
2005-04-16 18:20:36 -04:00
|
|
|
for (;;) {
|
|
|
|
printk("-> %d:%d", entry->apic, entry->pin);
|
|
|
|
if (!entry->next)
|
|
|
|
break;
|
2008-08-19 23:50:07 -04:00
|
|
|
entry = entry->next;
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
printk("\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
printk(KERN_INFO ".................................... done.\n");
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2008-07-20 19:52:49 -04:00
|
|
|
__apicdebuginit(void) print_APIC_bitfield(int base)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
|
|
|
unsigned int v;
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
if (apic_verbosity == APIC_QUIET)
|
|
|
|
return;
|
|
|
|
|
|
|
|
printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
|
|
|
|
for (i = 0; i < 8; i++) {
|
|
|
|
v = apic_read(base + i*0x10);
|
|
|
|
for (j = 0; j < 32; j++) {
|
|
|
|
if (v & (1<<j))
|
|
|
|
printk("1");
|
|
|
|
else
|
|
|
|
printk("0");
|
|
|
|
}
|
|
|
|
printk("\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-07-20 19:52:49 -04:00
|
|
|
__apicdebuginit(void) print_local_APIC(void *dummy)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
|
|
|
unsigned int v, ver, maxlvt;
|
2008-07-10 14:16:49 -04:00
|
|
|
unsigned long icr;
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
if (apic_verbosity == APIC_QUIET)
|
|
|
|
return;
|
|
|
|
|
|
|
|
printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
|
|
|
|
smp_processor_id(), hard_smp_processor_id());
|
2008-06-05 10:35:10 -04:00
|
|
|
v = apic_read(APIC_ID);
|
2008-07-11 21:44:16 -04:00
|
|
|
printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
|
2005-04-16 18:20:36 -04:00
|
|
|
v = apic_read(APIC_LVR);
|
|
|
|
printk(KERN_INFO "... APIC VERSION: %08x\n", v);
|
|
|
|
ver = GET_APIC_VERSION(v);
|
2008-01-30 07:30:14 -05:00
|
|
|
maxlvt = lapic_get_maxlvt();
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
v = apic_read(APIC_TASKPRI);
|
|
|
|
printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
|
|
|
|
|
2005-09-12 12:49:24 -04:00
|
|
|
v = apic_read(APIC_ARBPRI);
|
|
|
|
printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
|
|
|
|
v & APIC_ARBPRI_MASK);
|
|
|
|
v = apic_read(APIC_PROCPRI);
|
|
|
|
printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
v = apic_read(APIC_EOI);
|
|
|
|
printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
|
|
|
|
v = apic_read(APIC_RRR);
|
|
|
|
printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
|
|
|
|
v = apic_read(APIC_LDR);
|
|
|
|
printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
|
|
|
|
v = apic_read(APIC_DFR);
|
|
|
|
printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
|
|
|
|
v = apic_read(APIC_SPIV);
|
|
|
|
printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
|
|
|
|
|
|
|
|
printk(KERN_DEBUG "... APIC ISR field:\n");
|
|
|
|
print_APIC_bitfield(APIC_ISR);
|
|
|
|
printk(KERN_DEBUG "... APIC TMR field:\n");
|
|
|
|
print_APIC_bitfield(APIC_TMR);
|
|
|
|
printk(KERN_DEBUG "... APIC IRR field:\n");
|
|
|
|
print_APIC_bitfield(APIC_IRR);
|
|
|
|
|
2005-09-12 12:49:24 -04:00
|
|
|
v = apic_read(APIC_ESR);
|
|
|
|
printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2008-07-10 14:16:49 -04:00
|
|
|
icr = apic_icr_read();
|
2008-10-12 09:22:22 -04:00
|
|
|
printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
|
|
|
|
printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
v = apic_read(APIC_LVTT);
|
|
|
|
printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
|
|
|
|
|
|
|
|
if (maxlvt > 3) { /* PC is LVT#4. */
|
|
|
|
v = apic_read(APIC_LVTPC);
|
|
|
|
printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
|
|
|
|
}
|
|
|
|
v = apic_read(APIC_LVT0);
|
|
|
|
printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
|
|
|
|
v = apic_read(APIC_LVT1);
|
|
|
|
printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
|
|
|
|
|
|
|
|
if (maxlvt > 2) { /* ERR is LVT#3. */
|
|
|
|
v = apic_read(APIC_LVTERR);
|
|
|
|
printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
|
|
|
|
}
|
|
|
|
|
|
|
|
v = apic_read(APIC_TMICT);
|
|
|
|
printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
|
|
|
|
v = apic_read(APIC_TMCCT);
|
|
|
|
printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
|
|
|
|
v = apic_read(APIC_TDCR);
|
|
|
|
printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
|
|
|
|
printk("\n");
|
|
|
|
}
|
|
|
|
|
2008-07-20 19:52:49 -04:00
|
|
|
__apicdebuginit(void) print_all_local_APICs(void)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
2008-05-09 03:39:44 -04:00
|
|
|
on_each_cpu(print_local_APIC, NULL, 1);
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
|
2008-07-20 19:52:49 -04:00
|
|
|
__apicdebuginit(void) print_PIC(void)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
|
|
|
unsigned int v;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
if (apic_verbosity == APIC_QUIET)
|
|
|
|
return;
|
|
|
|
|
|
|
|
printk(KERN_DEBUG "\nprinting PIC contents\n");
|
|
|
|
|
|
|
|
spin_lock_irqsave(&i8259A_lock, flags);
|
|
|
|
|
|
|
|
v = inb(0xa1) << 8 | inb(0x21);
|
|
|
|
printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
|
|
|
|
|
|
|
|
v = inb(0xa0) << 8 | inb(0x20);
|
|
|
|
printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
|
|
|
|
|
|
|
|
outb(0x0b,0xa0);
|
|
|
|
outb(0x0b,0x20);
|
|
|
|
v = inb(0xa0) << 8 | inb(0x20);
|
|
|
|
outb(0x0a,0xa0);
|
|
|
|
outb(0x0a,0x20);
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&i8259A_lock, flags);
|
|
|
|
|
|
|
|
printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
|
|
|
|
|
|
|
|
v = inb(0x4d1) << 8 | inb(0x4d0);
|
|
|
|
printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
|
|
|
|
}
|
|
|
|
|
2008-07-20 19:52:49 -04:00
|
|
|
__apicdebuginit(int) print_all_ICs(void)
|
|
|
|
{
|
|
|
|
print_PIC();
|
|
|
|
print_all_local_APICs();
|
|
|
|
print_IO_APIC();
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
fs_initcall(print_all_ICs);
|
|
|
|
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2008-01-30 07:30:39 -05:00
|
|
|
void __init enable_IO_APIC(void)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
|
|
|
union IO_APIC_reg_01 reg_01;
|
2006-01-11 16:46:06 -05:00
|
|
|
int i8259_apic, i8259_pin;
|
2008-08-19 23:50:07 -04:00
|
|
|
int apic;
|
2005-04-16 18:20:36 -04:00
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The number of IO-APIC IRQ registers (== #pins):
|
|
|
|
*/
|
2006-01-11 16:46:06 -05:00
|
|
|
for (apic = 0; apic < nr_ioapics; apic++) {
|
2005-04-16 18:20:36 -04:00
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
2006-01-11 16:46:06 -05:00
|
|
|
reg_01.raw = io_apic_read(apic, 1);
|
2005-04-16 18:20:36 -04:00
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
2006-01-11 16:46:06 -05:00
|
|
|
nr_ioapic_registers[apic] = reg_01.bits.entries+1;
|
|
|
|
}
|
|
|
|
for(apic = 0; apic < nr_ioapics; apic++) {
|
|
|
|
int pin;
|
|
|
|
/* See if any of the pins is in ExtINT mode */
|
|
|
|
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
|
|
struct IO_APIC_route_entry entry;
|
2006-09-26 04:52:30 -04:00
|
|
|
entry = ioapic_read_entry(apic, pin);
|
2006-01-11 16:46:06 -05:00
|
|
|
|
|
|
|
/* If the interrupt line is enabled and in ExtInt mode
|
|
|
|
* I have found the pin where the i8259 is connected.
|
|
|
|
*/
|
|
|
|
if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
|
|
|
|
ioapic_i8259.apic = apic;
|
|
|
|
ioapic_i8259.pin = pin;
|
|
|
|
goto found_i8259;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
found_i8259:
|
|
|
|
/* Look to see what if the MP table has reported the ExtINT */
|
|
|
|
i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
|
|
|
|
i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
|
|
|
|
/* Trust the MP table if nothing is setup in the hardware */
|
|
|
|
if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
|
|
|
|
printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
|
|
|
|
ioapic_i8259.pin = i8259_pin;
|
|
|
|
ioapic_i8259.apic = i8259_apic;
|
|
|
|
}
|
|
|
|
/* Complain if the MP table and the hardware disagree */
|
|
|
|
if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
|
|
|
|
(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
|
|
|
|
{
|
|
|
|
printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do not trust the IO-APIC being empty at bootup
|
|
|
|
*/
|
|
|
|
clear_IO_APIC();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Not an __init, needed by the reboot code
|
|
|
|
*/
|
|
|
|
void disable_IO_APIC(void)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Clear the IO-APIC before rebooting:
|
|
|
|
*/
|
|
|
|
clear_IO_APIC();
|
|
|
|
|
2005-06-25 17:57:45 -04:00
|
|
|
/*
|
2005-09-09 06:59:04 -04:00
|
|
|
* If the i8259 is routed through an IOAPIC
|
2005-06-25 17:57:45 -04:00
|
|
|
* Put that IOAPIC in virtual wire mode
|
2005-09-09 06:59:04 -04:00
|
|
|
* so legacy interrupts can be delivered.
|
2005-06-25 17:57:45 -04:00
|
|
|
*/
|
2006-01-11 16:46:06 -05:00
|
|
|
if (ioapic_i8259.pin != -1) {
|
2005-06-25 17:57:45 -04:00
|
|
|
struct IO_APIC_route_entry entry;
|
|
|
|
|
|
|
|
memset(&entry, 0, sizeof(entry));
|
|
|
|
entry.mask = 0; /* Enabled */
|
|
|
|
entry.trigger = 0; /* Edge */
|
|
|
|
entry.irr = 0;
|
|
|
|
entry.polarity = 0; /* High */
|
|
|
|
entry.delivery_status = 0;
|
|
|
|
entry.dest_mode = 0; /* Physical */
|
2006-01-11 16:46:06 -05:00
|
|
|
entry.delivery_mode = dest_ExtINT; /* ExtInt */
|
2005-06-25 17:57:45 -04:00
|
|
|
entry.vector = 0;
|
2008-07-11 21:44:16 -04:00
|
|
|
entry.dest = read_apic_id();
|
2005-06-25 17:57:45 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Add it to the IO-APIC irq-routing table:
|
|
|
|
*/
|
2006-09-26 04:52:30 -04:00
|
|
|
ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
|
2005-06-25 17:57:45 -04:00
|
|
|
}
|
|
|
|
|
2006-01-11 16:46:06 -05:00
|
|
|
disconnect_bsp_APIC(ioapic_i8259.pin != -1);
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* There is a nasty bug in some older SMP boards, their mptable lies
|
|
|
|
* about the timer IRQ. We do the following to work around the situation:
|
|
|
|
*
|
|
|
|
* - timer IRQ defaults to IO-APIC IRQ
|
|
|
|
* - if this function detects that timer IRQs are defunct, then we fall
|
|
|
|
* back to ISA timer IRQs
|
|
|
|
*/
|
|
|
|
static int __init timer_irq_works(void)
|
|
|
|
{
|
|
|
|
unsigned long t1 = jiffies;
|
x86: fix "Kernel panic - not syncing: IO-APIC + timer doesn't work!"
this is the tale of a full day spent debugging an ancient but elusive bug.
after booting up thousands of random .config kernels, i finally happened
to generate a .config that produced the following rare bootup failure
on 32-bit x86:
| ..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
| ..MP-BIOS bug: 8254 timer not connected to IO-APIC
| ...trying to set up timer (IRQ0) through the 8259A ... failed.
| ...trying to set up timer as Virtual Wire IRQ... failed.
| ...trying to set up timer as ExtINT IRQ... failed :(.
| Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug
| and send a report. Then try booting with the 'noapic' option
this bug has been reported many times during the years, but it was never
reproduced nor fixed.
the bug that i hit was extremely sensitive to .config details.
First i did a .config-bisection - suspecting some .config detail.
That led to CONFIG_X86_MCE: enabling X86_MCE magically made the bug disappear
and the system would boot up just fine.
Debugging my way through the MCE code ended up identifying two unlikely
candidates: the thing that made a real difference to the hang was that
X86_MCE did two printks:
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#1.
Adding the same printks to a !CONFIG_X86_MCE kernel made the bug go away!
this left timing as the main suspect: i experimented with adding various
udelay()s to the arch/x86/kernel/io_apic_32.c:check_timer() function, and
the race window turned out to be narrower than 30 microseconds (!).
That made debugging especially funny, debugging without having printk
ability before the bug hits is ... interesting ;-)
eventually i started suspecting IRQ activities - those are pretty much the
only thing that happen this early during bootup and have the timescale of
a few dozen microseconds. Also, check_timer() changes the IRQ hardware
in various creative ways, so the main candidate became IRQ0 interaction.
i've added a counter to track timer irqs (on which core they arrived, at
what exact time, etc.) and found that no timer IRQ would arrive after the
bug condition hits - even if we re-enable IRQ0 and re-initialize the i8259A,
but that we'd get a small number of timer irqs right around the time when we
call the check_timer() function.
Eventually i got the following backtrace triggered from debug code in the
timer interrupt:
...trying to set up timer as Virtual Wire IRQ... failed.
...trying to set up timer as ExtINT IRQ...
Pid: 1, comm: swapper Not tainted (2.6.24-rc5 #57)
EIP: 0060:[<c044d57e>] EFLAGS: 00000246 CPU: 0
EIP is at _spin_unlock_irqrestore+0x5/0x1c
EAX: c0634178 EBX: 00000000 ECX: c4947d63 EDX: 00000246
ESI: 00000002 EDI: 00010031 EBP: c04e0f2e ESP: f7c41df4
DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 8005003b CR2: ffe04000 CR3: 00630000 CR4: 000006d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
[<c05f5784>] setup_IO_APIC+0x9c3/0xc5c
the spin_unlock() was called from init_8259A(). Wait ... we have an IRQ0
entry while we are in the middle of setting up the local APIC, the i8259A
and the PIT??
That is certainly not how it's supposed to work! check_timer() was supposed
to be called with irqs turned off - but this eroded away sometime in the
past. This code would still work most of the time because this code runs
very quickly, but just the right timing conditions are present and IRQ0
hits in this small, ~30 usecs window, timer irqs stop and the system does
not boot up. Also, given how early this is during bootup, the hang is
very deterministic - but it would only occur on certain machines (and
certain configs).
The fix was quite simple: disable/restore interrupts properly in this
function. With that in place the test-system now boots up just fine.
(64-bit x86 io_apic_64.c had the same bug.)
Phew! One down, only 1500 other kernel bugs are left ;-)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2007-12-18 12:05:58 -05:00
|
|
|
unsigned long flags;
|
2005-04-16 18:20:36 -04:00
|
|
|
|
x86: fix "Kernel panic - not syncing: IO-APIC + timer doesn't work!"
this is the tale of a full day spent debugging an ancient but elusive bug.
after booting up thousands of random .config kernels, i finally happened
to generate a .config that produced the following rare bootup failure
on 32-bit x86:
| ..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
| ..MP-BIOS bug: 8254 timer not connected to IO-APIC
| ...trying to set up timer (IRQ0) through the 8259A ... failed.
| ...trying to set up timer as Virtual Wire IRQ... failed.
| ...trying to set up timer as ExtINT IRQ... failed :(.
| Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug
| and send a report. Then try booting with the 'noapic' option
this bug has been reported many times during the years, but it was never
reproduced nor fixed.
the bug that i hit was extremely sensitive to .config details.
First i did a .config-bisection - suspecting some .config detail.
That led to CONFIG_X86_MCE: enabling X86_MCE magically made the bug disappear
and the system would boot up just fine.
Debugging my way through the MCE code ended up identifying two unlikely
candidates: the thing that made a real difference to the hang was that
X86_MCE did two printks:
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#1.
Adding the same printks to a !CONFIG_X86_MCE kernel made the bug go away!
this left timing as the main suspect: i experimented with adding various
udelay()s to the arch/x86/kernel/io_apic_32.c:check_timer() function, and
the race window turned out to be narrower than 30 microseconds (!).
That made debugging especially funny, debugging without having printk
ability before the bug hits is ... interesting ;-)
eventually i started suspecting IRQ activities - those are pretty much the
only thing that happen this early during bootup and have the timescale of
a few dozen microseconds. Also, check_timer() changes the IRQ hardware
in various creative ways, so the main candidate became IRQ0 interaction.
i've added a counter to track timer irqs (on which core they arrived, at
what exact time, etc.) and found that no timer IRQ would arrive after the
bug condition hits - even if we re-enable IRQ0 and re-initialize the i8259A,
but that we'd get a small number of timer irqs right around the time when we
call the check_timer() function.
Eventually i got the following backtrace triggered from debug code in the
timer interrupt:
...trying to set up timer as Virtual Wire IRQ... failed.
...trying to set up timer as ExtINT IRQ...
Pid: 1, comm: swapper Not tainted (2.6.24-rc5 #57)
EIP: 0060:[<c044d57e>] EFLAGS: 00000246 CPU: 0
EIP is at _spin_unlock_irqrestore+0x5/0x1c
EAX: c0634178 EBX: 00000000 ECX: c4947d63 EDX: 00000246
ESI: 00000002 EDI: 00010031 EBP: c04e0f2e ESP: f7c41df4
DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 8005003b CR2: ffe04000 CR3: 00630000 CR4: 000006d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
[<c05f5784>] setup_IO_APIC+0x9c3/0xc5c
the spin_unlock() was called from init_8259A(). Wait ... we have an IRQ0
entry while we are in the middle of setting up the local APIC, the i8259A
and the PIT??
That is certainly not how it's supposed to work! check_timer() was supposed
to be called with irqs turned off - but this eroded away sometime in the
past. This code would still work most of the time because this code runs
very quickly, but just the right timing conditions are present and IRQ0
hits in this small, ~30 usecs window, timer irqs stop and the system does
not boot up. Also, given how early this is during bootup, the hang is
very deterministic - but it would only occur on certain machines (and
certain configs).
The fix was quite simple: disable/restore interrupts properly in this
function. With that in place the test-system now boots up just fine.
(64-bit x86 io_apic_64.c had the same bug.)
Phew! One down, only 1500 other kernel bugs are left ;-)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2007-12-18 12:05:58 -05:00
|
|
|
local_save_flags(flags);
|
2005-04-16 18:20:36 -04:00
|
|
|
local_irq_enable();
|
|
|
|
/* Let ten ticks pass... */
|
|
|
|
mdelay((10 * 1000) / HZ);
|
x86: fix "Kernel panic - not syncing: IO-APIC + timer doesn't work!"
this is the tale of a full day spent debugging an ancient but elusive bug.
after booting up thousands of random .config kernels, i finally happened
to generate a .config that produced the following rare bootup failure
on 32-bit x86:
| ..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
| ..MP-BIOS bug: 8254 timer not connected to IO-APIC
| ...trying to set up timer (IRQ0) through the 8259A ... failed.
| ...trying to set up timer as Virtual Wire IRQ... failed.
| ...trying to set up timer as ExtINT IRQ... failed :(.
| Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug
| and send a report. Then try booting with the 'noapic' option
this bug has been reported many times during the years, but it was never
reproduced nor fixed.
the bug that i hit was extremely sensitive to .config details.
First i did a .config-bisection - suspecting some .config detail.
That led to CONFIG_X86_MCE: enabling X86_MCE magically made the bug disappear
and the system would boot up just fine.
Debugging my way through the MCE code ended up identifying two unlikely
candidates: the thing that made a real difference to the hang was that
X86_MCE did two printks:
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#1.
Adding the same printks to a !CONFIG_X86_MCE kernel made the bug go away!
this left timing as the main suspect: i experimented with adding various
udelay()s to the arch/x86/kernel/io_apic_32.c:check_timer() function, and
the race window turned out to be narrower than 30 microseconds (!).
That made debugging especially funny, debugging without having printk
ability before the bug hits is ... interesting ;-)
eventually i started suspecting IRQ activities - those are pretty much the
only thing that happen this early during bootup and have the timescale of
a few dozen microseconds. Also, check_timer() changes the IRQ hardware
in various creative ways, so the main candidate became IRQ0 interaction.
i've added a counter to track timer irqs (on which core they arrived, at
what exact time, etc.) and found that no timer IRQ would arrive after the
bug condition hits - even if we re-enable IRQ0 and re-initialize the i8259A,
but that we'd get a small number of timer irqs right around the time when we
call the check_timer() function.
Eventually i got the following backtrace triggered from debug code in the
timer interrupt:
...trying to set up timer as Virtual Wire IRQ... failed.
...trying to set up timer as ExtINT IRQ...
Pid: 1, comm: swapper Not tainted (2.6.24-rc5 #57)
EIP: 0060:[<c044d57e>] EFLAGS: 00000246 CPU: 0
EIP is at _spin_unlock_irqrestore+0x5/0x1c
EAX: c0634178 EBX: 00000000 ECX: c4947d63 EDX: 00000246
ESI: 00000002 EDI: 00010031 EBP: c04e0f2e ESP: f7c41df4
DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 8005003b CR2: ffe04000 CR3: 00630000 CR4: 000006d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
[<c05f5784>] setup_IO_APIC+0x9c3/0xc5c
the spin_unlock() was called from init_8259A(). Wait ... we have an IRQ0
entry while we are in the middle of setting up the local APIC, the i8259A
and the PIT??
That is certainly not how it's supposed to work! check_timer() was supposed
to be called with irqs turned off - but this eroded away sometime in the
past. This code would still work most of the time because this code runs
very quickly, but just the right timing conditions are present and IRQ0
hits in this small, ~30 usecs window, timer irqs stop and the system does
not boot up. Also, given how early this is during bootup, the hang is
very deterministic - but it would only occur on certain machines (and
certain configs).
The fix was quite simple: disable/restore interrupts properly in this
function. With that in place the test-system now boots up just fine.
(64-bit x86 io_apic_64.c had the same bug.)
Phew! One down, only 1500 other kernel bugs are left ;-)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2007-12-18 12:05:58 -05:00
|
|
|
local_irq_restore(flags);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Expect a few ticks at least, to be sure some possible
|
|
|
|
* glue logic does not lock up after one or two first
|
|
|
|
* ticks in a non-ExtINT mode. Also the local APIC
|
|
|
|
* might have cached one ExtINT interrupt. Finally, at
|
|
|
|
* least one tick may be lost due to delays.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* jiffies wrap? */
|
2008-01-30 07:32:19 -05:00
|
|
|
if (time_after(jiffies, t1 + 4))
|
2005-04-16 18:20:36 -04:00
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In the SMP+IOAPIC case it might happen that there are an unspecified
|
|
|
|
* number of pending IRQ events unhandled. These cases are very rare,
|
|
|
|
* so we 'resend' these IRQs via IPIs, to the same CPU. It's much
|
|
|
|
* better to do it this way as thus we do not have to be aware of
|
|
|
|
* 'pending' interrupts in the IRQ path, except at this point.
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* Edge triggered needs to resend any interrupt
|
|
|
|
* that was delayed but this is now handled in the device
|
|
|
|
* independent code.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Starting up a edge-triggered IO-APIC interrupt is
|
|
|
|
* nasty - we need to make sure that we get the edge.
|
|
|
|
* If it is already asserted for some reason, we need
|
|
|
|
* return 1 to indicate that is was pending.
|
|
|
|
*
|
|
|
|
* This is not complete - we should be able to fake
|
|
|
|
* an edge even if it isn't on the 8259A...
|
|
|
|
*/
|
|
|
|
|
2006-10-04 05:16:25 -04:00
|
|
|
static unsigned int startup_ioapic_irq(unsigned int irq)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
|
|
|
int was_pending = 0;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
|
|
if (irq < 16) {
|
|
|
|
disable_8259A_irq(irq);
|
|
|
|
if (i8259A_irq_pending(irq))
|
|
|
|
was_pending = 1;
|
|
|
|
}
|
|
|
|
__unmask_IO_APIC_irq(irq);
|
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
|
|
|
|
return was_pending;
|
|
|
|
}
|
|
|
|
|
2006-10-04 05:16:46 -04:00
|
|
|
static int ioapic_retrigger_irq(unsigned int irq)
|
2006-06-29 05:24:44 -04:00
|
|
|
{
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg = irq_cfg(irq);
|
2006-10-21 12:37:02 -04:00
|
|
|
unsigned long flags;
|
2006-10-04 05:16:51 -04:00
|
|
|
|
2006-10-21 12:37:02 -04:00
|
|
|
spin_lock_irqsave(&vector_lock, flags);
|
2008-07-15 17:14:31 -04:00
|
|
|
send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
|
2006-10-21 12:37:02 -04:00
|
|
|
spin_unlock_irqrestore(&vector_lock, flags);
|
2006-06-29 05:24:44 -04:00
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2005-04-16 18:20:36 -04:00
|
|
|
/*
|
|
|
|
* Level and edge triggered IO-APIC interrupts need different handling,
|
|
|
|
* so we use two separate IRQ descriptors. Edge triggered IRQs can be
|
|
|
|
* handled with the level-triggered descriptor, but that one has slightly
|
|
|
|
* more overhead. Level-triggered interrupts cannot be handled with the
|
|
|
|
* edge-triggered handler, without risking IRQ storms and other ugly
|
|
|
|
* races.
|
|
|
|
*/
|
|
|
|
|
2007-02-23 06:40:58 -05:00
|
|
|
#ifdef CONFIG_SMP
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
static void ir_irq_migration(struct work_struct *work);
|
|
|
|
|
|
|
|
static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Migrate the IO-APIC irq in the presence of intr-remapping.
|
|
|
|
*
|
|
|
|
* For edge triggered, irq migration is a simple atomic update(of vector
|
|
|
|
* and cpu destination) of IRTE and flush the hardware cache.
|
|
|
|
*
|
|
|
|
* For level triggered, we need to modify the io-apic RTE aswell with the update
|
|
|
|
* vector information, along with modifying IRTE with vector and destination.
|
|
|
|
* So irq migration for level triggered is little bit more complex compared to
|
|
|
|
* edge triggered migration. But the good news is, we use the same algorithm
|
|
|
|
* for level triggered migration as we have today, only difference being,
|
|
|
|
* we now initiate the irq migration from process context instead of the
|
|
|
|
* interrupt context.
|
|
|
|
*
|
|
|
|
* In future, when we do a directed EOI (combined with cpu EOI broadcast
|
|
|
|
* suppression) to the IO-APIC, level triggered irq migration will also be
|
|
|
|
* as simple as edge triggered migration and we can do the irq migration
|
|
|
|
* with a simple atomic update to IO-APIC RTE.
|
|
|
|
*/
|
|
|
|
static void migrate_ioapic_irq(int irq, cpumask_t mask)
|
|
|
|
{
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg;
|
2008-08-19 23:50:05 -04:00
|
|
|
struct irq_desc *desc;
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
cpumask_t tmp, cleanup_mask;
|
|
|
|
struct irte irte;
|
2008-08-19 23:50:05 -04:00
|
|
|
int modify_ioapic_rte;
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
unsigned int dest;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
cpus_and(tmp, mask, cpu_online_map);
|
|
|
|
if (cpus_empty(tmp))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (get_irte(irq, &irte))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (assign_irq_vector(irq, mask))
|
|
|
|
return;
|
|
|
|
|
2008-08-19 23:50:06 -04:00
|
|
|
cfg = irq_cfg(irq);
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
cpus_and(tmp, cfg->domain, mask);
|
|
|
|
dest = cpu_mask_to_apicid(tmp);
|
|
|
|
|
2008-08-19 23:50:05 -04:00
|
|
|
desc = irq_to_desc(irq);
|
|
|
|
modify_ioapic_rte = desc->status & IRQ_LEVEL;
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
if (modify_ioapic_rte) {
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
|
|
__target_IO_APIC_irq(irq, dest, cfg->vector);
|
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
irte.vector = cfg->vector;
|
|
|
|
irte.dest_id = IRTE_DEST(dest);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Modified the IRTE and flushes the Interrupt entry cache.
|
|
|
|
*/
|
|
|
|
modify_irte(irq, &irte);
|
|
|
|
|
|
|
|
if (cfg->move_in_progress) {
|
|
|
|
cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
|
|
|
|
cfg->move_cleanup_count = cpus_weight(cleanup_mask);
|
|
|
|
send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
|
|
|
cfg->move_in_progress = 0;
|
|
|
|
}
|
|
|
|
|
2008-08-19 23:50:05 -04:00
|
|
|
desc->affinity = mask;
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static int migrate_irq_remapped_level(int irq)
|
|
|
|
{
|
|
|
|
int ret = -1;
|
2008-08-19 23:50:05 -04:00
|
|
|
struct irq_desc *desc = irq_to_desc(irq);
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
|
|
|
|
mask_IO_APIC_irq(irq);
|
|
|
|
|
|
|
|
if (io_apic_level_ack_pending(irq)) {
|
|
|
|
/*
|
|
|
|
* Interrupt in progress. Migrating irq now will change the
|
|
|
|
* vector information in the IO-APIC RTE and that will confuse
|
|
|
|
* the EOI broadcast performed by cpu.
|
|
|
|
* So, delay the irq migration to the next instance.
|
|
|
|
*/
|
|
|
|
schedule_delayed_work(&ir_migration_work, 1);
|
|
|
|
goto unmask;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* everthing is clear. we have right of way */
|
2008-08-19 23:50:05 -04:00
|
|
|
migrate_ioapic_irq(irq, desc->pending_mask);
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
|
|
|
|
ret = 0;
|
2008-08-19 23:50:05 -04:00
|
|
|
desc->status &= ~IRQ_MOVE_PENDING;
|
|
|
|
cpus_clear(desc->pending_mask);
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
|
|
|
|
unmask:
|
|
|
|
unmask_IO_APIC_irq(irq);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ir_irq_migration(struct work_struct *work)
|
|
|
|
{
|
|
|
|
int irq;
|
|
|
|
|
2008-08-19 23:49:48 -04:00
|
|
|
for (irq = 0; irq < nr_irqs; irq++) {
|
2008-08-19 23:50:05 -04:00
|
|
|
struct irq_desc *desc = irq_to_desc(irq);
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
if (desc->status & IRQ_MOVE_PENDING) {
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&desc->lock, flags);
|
|
|
|
if (!desc->chip->set_affinity ||
|
|
|
|
!(desc->status & IRQ_MOVE_PENDING)) {
|
|
|
|
desc->status &= ~IRQ_MOVE_PENDING;
|
|
|
|
spin_unlock_irqrestore(&desc->lock, flags);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2008-08-19 23:50:05 -04:00
|
|
|
desc->chip->set_affinity(irq, desc->pending_mask);
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
spin_unlock_irqrestore(&desc->lock, flags);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Migrates the IRQ destination in the process context.
|
|
|
|
*/
|
|
|
|
static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
|
|
|
|
{
|
2008-08-19 23:50:05 -04:00
|
|
|
struct irq_desc *desc = irq_to_desc(irq);
|
|
|
|
|
|
|
|
if (desc->status & IRQ_LEVEL) {
|
|
|
|
desc->status |= IRQ_MOVE_PENDING;
|
|
|
|
desc->pending_mask = mask;
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
migrate_irq_remapped_level(irq);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
migrate_ioapic_irq(irq, mask);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2007-02-23 06:40:58 -05:00
|
|
|
asmlinkage void smp_irq_move_cleanup_interrupt(void)
|
|
|
|
{
|
|
|
|
unsigned vector, me;
|
|
|
|
ack_APIC_irq();
|
|
|
|
exit_idle();
|
|
|
|
irq_enter();
|
|
|
|
|
|
|
|
me = smp_processor_id();
|
|
|
|
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
|
|
|
|
unsigned int irq;
|
|
|
|
struct irq_desc *desc;
|
|
|
|
struct irq_cfg *cfg;
|
|
|
|
irq = __get_cpu_var(vector_irq)[vector];
|
2008-08-19 23:49:48 -04:00
|
|
|
if (irq >= nr_irqs)
|
2007-02-23 06:40:58 -05:00
|
|
|
continue;
|
|
|
|
|
2008-08-19 23:50:05 -04:00
|
|
|
desc = irq_to_desc(irq);
|
2008-08-19 23:50:06 -04:00
|
|
|
cfg = irq_cfg(irq);
|
2007-02-23 06:40:58 -05:00
|
|
|
spin_lock(&desc->lock);
|
|
|
|
if (!cfg->move_cleanup_count)
|
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
|
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
__get_cpu_var(vector_irq)[vector] = -1;
|
|
|
|
cfg->move_cleanup_count--;
|
|
|
|
unlock:
|
|
|
|
spin_unlock(&desc->lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
irq_exit();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void irq_complete_move(unsigned int irq)
|
|
|
|
{
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg = irq_cfg(irq);
|
2007-02-23 06:40:58 -05:00
|
|
|
unsigned vector, me;
|
|
|
|
|
|
|
|
if (likely(!cfg->move_in_progress))
|
|
|
|
return;
|
|
|
|
|
2008-01-30 07:30:56 -05:00
|
|
|
vector = ~get_irq_regs()->orig_ax;
|
2007-02-23 06:40:58 -05:00
|
|
|
me = smp_processor_id();
|
2007-05-02 13:27:08 -04:00
|
|
|
if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
|
2007-02-23 06:40:58 -05:00
|
|
|
cpumask_t cleanup_mask;
|
|
|
|
|
|
|
|
cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
|
|
|
|
cfg->move_cleanup_count = cpus_weight(cleanup_mask);
|
|
|
|
send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
|
|
|
cfg->move_in_progress = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
static inline void irq_complete_move(unsigned int irq) {}
|
|
|
|
#endif
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
static void ack_x2apic_level(unsigned int irq)
|
|
|
|
{
|
|
|
|
ack_x2APIC_irq();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ack_x2apic_edge(unsigned int irq)
|
|
|
|
{
|
|
|
|
ack_x2APIC_irq();
|
|
|
|
}
|
|
|
|
#endif
|
2007-02-23 06:40:58 -05:00
|
|
|
|
2006-10-04 05:16:30 -04:00
|
|
|
static void ack_apic_edge(unsigned int irq)
|
|
|
|
{
|
2007-02-23 06:40:58 -05:00
|
|
|
irq_complete_move(irq);
|
2006-10-04 05:16:30 -04:00
|
|
|
move_native_irq(irq);
|
|
|
|
ack_APIC_irq();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ack_apic_level(unsigned int irq)
|
|
|
|
{
|
|
|
|
int do_unmask_irq = 0;
|
|
|
|
|
2007-02-23 06:40:58 -05:00
|
|
|
irq_complete_move(irq);
|
2008-01-30 07:30:30 -05:00
|
|
|
#ifdef CONFIG_GENERIC_PENDING_IRQ
|
2006-10-04 05:16:30 -04:00
|
|
|
/* If we are moving the irq we need to mask it */
|
2008-08-19 23:50:05 -04:00
|
|
|
if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
|
2006-10-04 05:16:30 -04:00
|
|
|
do_unmask_irq = 1;
|
|
|
|
mask_IO_APIC_irq(irq);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We must acknowledge the irq before we move it or the acknowledge will
|
2007-05-09 01:14:03 -04:00
|
|
|
* not propagate properly.
|
2006-10-04 05:16:30 -04:00
|
|
|
*/
|
|
|
|
ack_APIC_irq();
|
|
|
|
|
|
|
|
/* Now we can move and renable the irq */
|
2007-07-21 11:10:45 -04:00
|
|
|
if (unlikely(do_unmask_irq)) {
|
|
|
|
/* Only migrate the irq if the ack has been received.
|
|
|
|
*
|
|
|
|
* On rare occasions the broadcast level triggered ack gets
|
|
|
|
* delayed going to ioapics, and if we reprogram the
|
|
|
|
* vector while Remote IRR is still set the irq will never
|
|
|
|
* fire again.
|
|
|
|
*
|
|
|
|
* To prevent this scenario we read the Remote IRR bit
|
|
|
|
* of the ioapic. This has two effects.
|
|
|
|
* - On any sane system the read of the ioapic will
|
|
|
|
* flush writes (and acks) going to the ioapic from
|
|
|
|
* this cpu.
|
|
|
|
* - We get to see if the ACK has actually been delivered.
|
|
|
|
*
|
|
|
|
* Based on failed experiments of reprogramming the
|
|
|
|
* ioapic entry from outside of irq context starting
|
|
|
|
* with masking the ioapic entry and then polling until
|
|
|
|
* Remote IRR was clear before reprogramming the
|
|
|
|
* ioapic I don't trust the Remote IRR bit to be
|
|
|
|
* completey accurate.
|
|
|
|
*
|
|
|
|
* However there appears to be no other way to plug
|
|
|
|
* this race, so if the Remote IRR bit is not
|
|
|
|
* accurate and is causing problems then it is a hardware bug
|
|
|
|
* and you can go talk to the chipset vendor about it.
|
|
|
|
*/
|
|
|
|
if (!io_apic_level_ack_pending(irq))
|
|
|
|
move_masked_irq(irq);
|
2006-10-04 05:16:30 -04:00
|
|
|
unmask_IO_APIC_irq(irq);
|
2007-07-21 11:10:45 -04:00
|
|
|
}
|
2006-10-04 05:16:30 -04:00
|
|
|
}
|
|
|
|
|
2006-10-04 05:16:25 -04:00
|
|
|
static struct irq_chip ioapic_chip __read_mostly = {
|
|
|
|
.name = "IO-APIC",
|
2006-10-04 05:16:46 -04:00
|
|
|
.startup = startup_ioapic_irq,
|
|
|
|
.mask = mask_IO_APIC_irq,
|
|
|
|
.unmask = unmask_IO_APIC_irq,
|
2006-10-04 05:16:30 -04:00
|
|
|
.ack = ack_apic_edge,
|
|
|
|
.eoi = ack_apic_level,
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
#ifdef CONFIG_SMP
|
2006-10-04 05:16:46 -04:00
|
|
|
.set_affinity = set_ioapic_affinity_irq,
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
#endif
|
2006-10-04 05:16:46 -04:00
|
|
|
.retrigger = ioapic_retrigger_irq,
|
2005-04-16 18:20:36 -04:00
|
|
|
};
|
|
|
|
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
static struct irq_chip ir_ioapic_chip __read_mostly = {
|
|
|
|
.name = "IR-IO-APIC",
|
|
|
|
.startup = startup_ioapic_irq,
|
|
|
|
.mask = mask_IO_APIC_irq,
|
|
|
|
.unmask = unmask_IO_APIC_irq,
|
|
|
|
.ack = ack_x2apic_edge,
|
|
|
|
.eoi = ack_x2apic_level,
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
.set_affinity = set_ir_ioapic_affinity_irq,
|
|
|
|
#endif
|
|
|
|
.retrigger = ioapic_retrigger_irq,
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 18:20:36 -04:00
|
|
|
static inline void init_IO_APIC_traps(void)
|
|
|
|
{
|
|
|
|
int irq;
|
2008-08-19 23:50:05 -04:00
|
|
|
struct irq_desc *desc;
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* NOTE! The local APIC isn't very good at handling
|
|
|
|
* multiple interrupts at the same interrupt level.
|
|
|
|
* As the interrupt level is determined by taking the
|
|
|
|
* vector number and shifting that right by 4, we
|
|
|
|
* want to spread these out a bit so that they don't
|
|
|
|
* all fall in the same interrupt level.
|
|
|
|
*
|
|
|
|
* Also, we've got to be careful not to trash gate
|
|
|
|
* 0x80, because int 0x80 is hm, kind of importantish. ;)
|
|
|
|
*/
|
2008-08-19 23:49:48 -04:00
|
|
|
for (irq = 0; irq < nr_irqs ; irq++) {
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg;
|
|
|
|
|
|
|
|
cfg = irq_cfg(irq);
|
|
|
|
if (IO_APIC_IRQ(irq) && !cfg->vector) {
|
2005-04-16 18:20:36 -04:00
|
|
|
/*
|
|
|
|
* Hmm.. We don't have an entry for this,
|
|
|
|
* so default to an old-fashioned 8259
|
|
|
|
* interrupt if we can..
|
|
|
|
*/
|
|
|
|
if (irq < 16)
|
|
|
|
make_8259A_irq(irq);
|
2008-08-19 23:50:05 -04:00
|
|
|
else {
|
|
|
|
desc = irq_to_desc(irq);
|
2005-04-16 18:20:36 -04:00
|
|
|
/* Strange. Oh, well.. */
|
2008-08-19 23:50:05 -04:00
|
|
|
desc->chip = &no_irq_chip;
|
|
|
|
}
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-07-11 14:35:17 -04:00
|
|
|
static void unmask_lapic_irq(unsigned int irq)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
|
|
|
unsigned long v;
|
|
|
|
|
|
|
|
v = apic_read(APIC_LVT0);
|
2006-01-11 16:46:51 -05:00
|
|
|
apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
|
2008-07-11 14:35:17 -04:00
|
|
|
static void mask_lapic_irq(unsigned int irq)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
|
|
|
unsigned long v;
|
|
|
|
|
|
|
|
v = apic_read(APIC_LVT0);
|
2006-01-11 16:46:51 -05:00
|
|
|
apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void ack_lapic_irq (unsigned int irq)
|
|
|
|
{
|
|
|
|
ack_APIC_irq();
|
|
|
|
}
|
|
|
|
|
2008-07-11 14:35:17 -04:00
|
|
|
static struct irq_chip lapic_chip __read_mostly = {
|
|
|
|
.name = "local-APIC",
|
|
|
|
.mask = mask_lapic_irq,
|
|
|
|
.unmask = unmask_lapic_irq,
|
|
|
|
.ack = ack_lapic_irq,
|
2005-04-16 18:20:36 -04:00
|
|
|
};
|
|
|
|
|
2008-07-11 14:35:17 -04:00
|
|
|
static void lapic_register_intr(int irq)
|
|
|
|
{
|
2008-08-19 23:50:05 -04:00
|
|
|
struct irq_desc *desc;
|
|
|
|
|
|
|
|
desc = irq_to_desc(irq);
|
|
|
|
desc->status &= ~IRQ_LEVEL;
|
2008-07-11 14:35:17 -04:00
|
|
|
set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
|
|
|
|
"edge");
|
|
|
|
}
|
|
|
|
|
2008-01-30 07:31:24 -05:00
|
|
|
static void __init setup_nmi(void)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Dirty trick to enable the NMI watchdog ...
|
|
|
|
* We put the 8259A master into AEOI mode and
|
|
|
|
* unmask on all local APICs LVT0 as NMI.
|
|
|
|
*
|
|
|
|
* The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
|
|
|
|
* is from Maciej W. Rozycki - so we do not have to EOI from
|
|
|
|
* the NMI handler or the timer interrupt.
|
|
|
|
*/
|
|
|
|
printk(KERN_INFO "activating NMI Watchdog ...");
|
|
|
|
|
2008-01-30 07:31:24 -05:00
|
|
|
enable_NMI_through_LVT0();
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
printk(" done.\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This looks a bit hackish but it's about the only one way of sending
|
|
|
|
* a few INTA cycles to 8259As and any associated glue logic. ICR does
|
|
|
|
* not support the ExtINT mode, unfortunately. We need to send these
|
|
|
|
* cycles as some i82489DX-based boards have glue logic that keeps the
|
|
|
|
* 8259A interrupt line asserted until INTA. --macro
|
|
|
|
*/
|
2008-04-11 07:29:04 -04:00
|
|
|
static inline void __init unlock_ExtINT_logic(void)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
2006-01-11 16:46:06 -05:00
|
|
|
int apic, pin, i;
|
2005-04-16 18:20:36 -04:00
|
|
|
struct IO_APIC_route_entry entry0, entry1;
|
|
|
|
unsigned char save_control, save_freq_select;
|
|
|
|
|
2006-01-11 16:46:06 -05:00
|
|
|
pin = find_isa_irq_pin(8, mp_INT);
|
|
|
|
apic = find_isa_irq_apic(8, mp_INT);
|
2005-04-16 18:20:36 -04:00
|
|
|
if (pin == -1)
|
|
|
|
return;
|
|
|
|
|
2008-04-05 09:39:05 -04:00
|
|
|
entry0 = ioapic_read_entry(apic, pin);
|
|
|
|
|
2006-01-11 16:46:06 -05:00
|
|
|
clear_IO_APIC_pin(apic, pin);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
memset(&entry1, 0, sizeof(entry1));
|
|
|
|
|
|
|
|
entry1.dest_mode = 0; /* physical delivery */
|
|
|
|
entry1.mask = 0; /* unmask IRQ now */
|
2007-02-13 07:26:25 -05:00
|
|
|
entry1.dest = hard_smp_processor_id();
|
2005-04-16 18:20:36 -04:00
|
|
|
entry1.delivery_mode = dest_ExtINT;
|
|
|
|
entry1.polarity = entry0.polarity;
|
|
|
|
entry1.trigger = 0;
|
|
|
|
entry1.vector = 0;
|
|
|
|
|
2008-04-05 09:39:05 -04:00
|
|
|
ioapic_write_entry(apic, pin, entry1);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
save_control = CMOS_READ(RTC_CONTROL);
|
|
|
|
save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
|
|
|
|
CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
|
|
|
|
RTC_FREQ_SELECT);
|
|
|
|
CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
|
|
|
|
|
|
|
|
i = 100;
|
|
|
|
while (i-- > 0) {
|
|
|
|
mdelay(10);
|
|
|
|
if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
|
|
|
|
i -= 10;
|
|
|
|
}
|
|
|
|
|
|
|
|
CMOS_WRITE(save_control, RTC_CONTROL);
|
|
|
|
CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
|
2006-01-11 16:46:06 -05:00
|
|
|
clear_IO_APIC_pin(apic, pin);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2008-04-05 09:39:05 -04:00
|
|
|
ioapic_write_entry(apic, pin, entry0);
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This code may look a bit paranoid, but it's supposed to cooperate with
|
|
|
|
* a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
|
|
|
|
* is so screwy. Thanks to Brian Perkins for testing/hacking this beast
|
|
|
|
* fanatically on his truly buggy board.
|
2007-01-08 18:04:46 -05:00
|
|
|
*
|
|
|
|
* FIXME: really need to revamp this for modern platforms only.
|
2005-04-16 18:20:36 -04:00
|
|
|
*/
|
2008-01-30 07:31:24 -05:00
|
|
|
static inline void __init check_timer(void)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg = irq_cfg(0);
|
2006-01-11 16:46:06 -05:00
|
|
|
int apic1, pin1, apic2, pin2;
|
x86: fix "Kernel panic - not syncing: IO-APIC + timer doesn't work!"
this is the tale of a full day spent debugging an ancient but elusive bug.
after booting up thousands of random .config kernels, i finally happened
to generate a .config that produced the following rare bootup failure
on 32-bit x86:
| ..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
| ..MP-BIOS bug: 8254 timer not connected to IO-APIC
| ...trying to set up timer (IRQ0) through the 8259A ... failed.
| ...trying to set up timer as Virtual Wire IRQ... failed.
| ...trying to set up timer as ExtINT IRQ... failed :(.
| Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug
| and send a report. Then try booting with the 'noapic' option
this bug has been reported many times during the years, but it was never
reproduced nor fixed.
the bug that i hit was extremely sensitive to .config details.
First i did a .config-bisection - suspecting some .config detail.
That led to CONFIG_X86_MCE: enabling X86_MCE magically made the bug disappear
and the system would boot up just fine.
Debugging my way through the MCE code ended up identifying two unlikely
candidates: the thing that made a real difference to the hang was that
X86_MCE did two printks:
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#1.
Adding the same printks to a !CONFIG_X86_MCE kernel made the bug go away!
this left timing as the main suspect: i experimented with adding various
udelay()s to the arch/x86/kernel/io_apic_32.c:check_timer() function, and
the race window turned out to be narrower than 30 microseconds (!).
That made debugging especially funny, debugging without having printk
ability before the bug hits is ... interesting ;-)
eventually i started suspecting IRQ activities - those are pretty much the
only thing that happen this early during bootup and have the timescale of
a few dozen microseconds. Also, check_timer() changes the IRQ hardware
in various creative ways, so the main candidate became IRQ0 interaction.
i've added a counter to track timer irqs (on which core they arrived, at
what exact time, etc.) and found that no timer IRQ would arrive after the
bug condition hits - even if we re-enable IRQ0 and re-initialize the i8259A,
but that we'd get a small number of timer irqs right around the time when we
call the check_timer() function.
Eventually i got the following backtrace triggered from debug code in the
timer interrupt:
...trying to set up timer as Virtual Wire IRQ... failed.
...trying to set up timer as ExtINT IRQ...
Pid: 1, comm: swapper Not tainted (2.6.24-rc5 #57)
EIP: 0060:[<c044d57e>] EFLAGS: 00000246 CPU: 0
EIP is at _spin_unlock_irqrestore+0x5/0x1c
EAX: c0634178 EBX: 00000000 ECX: c4947d63 EDX: 00000246
ESI: 00000002 EDI: 00010031 EBP: c04e0f2e ESP: f7c41df4
DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 8005003b CR2: ffe04000 CR3: 00630000 CR4: 000006d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
[<c05f5784>] setup_IO_APIC+0x9c3/0xc5c
the spin_unlock() was called from init_8259A(). Wait ... we have an IRQ0
entry while we are in the middle of setting up the local APIC, the i8259A
and the PIT??
That is certainly not how it's supposed to work! check_timer() was supposed
to be called with irqs turned off - but this eroded away sometime in the
past. This code would still work most of the time because this code runs
very quickly, but just the right timing conditions are present and IRQ0
hits in this small, ~30 usecs window, timer irqs stop and the system does
not boot up. Also, given how early this is during bootup, the hang is
very deterministic - but it would only occur on certain machines (and
certain configs).
The fix was quite simple: disable/restore interrupts properly in this
function. With that in place the test-system now boots up just fine.
(64-bit x86 io_apic_64.c had the same bug.)
Phew! One down, only 1500 other kernel bugs are left ;-)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2007-12-18 12:05:58 -05:00
|
|
|
unsigned long flags;
|
2008-05-27 16:19:51 -04:00
|
|
|
int no_pin1 = 0;
|
x86: fix "Kernel panic - not syncing: IO-APIC + timer doesn't work!"
this is the tale of a full day spent debugging an ancient but elusive bug.
after booting up thousands of random .config kernels, i finally happened
to generate a .config that produced the following rare bootup failure
on 32-bit x86:
| ..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
| ..MP-BIOS bug: 8254 timer not connected to IO-APIC
| ...trying to set up timer (IRQ0) through the 8259A ... failed.
| ...trying to set up timer as Virtual Wire IRQ... failed.
| ...trying to set up timer as ExtINT IRQ... failed :(.
| Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug
| and send a report. Then try booting with the 'noapic' option
this bug has been reported many times during the years, but it was never
reproduced nor fixed.
the bug that i hit was extremely sensitive to .config details.
First i did a .config-bisection - suspecting some .config detail.
That led to CONFIG_X86_MCE: enabling X86_MCE magically made the bug disappear
and the system would boot up just fine.
Debugging my way through the MCE code ended up identifying two unlikely
candidates: the thing that made a real difference to the hang was that
X86_MCE did two printks:
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#1.
Adding the same printks to a !CONFIG_X86_MCE kernel made the bug go away!
this left timing as the main suspect: i experimented with adding various
udelay()s to the arch/x86/kernel/io_apic_32.c:check_timer() function, and
the race window turned out to be narrower than 30 microseconds (!).
That made debugging especially funny, debugging without having printk
ability before the bug hits is ... interesting ;-)
eventually i started suspecting IRQ activities - those are pretty much the
only thing that happen this early during bootup and have the timescale of
a few dozen microseconds. Also, check_timer() changes the IRQ hardware
in various creative ways, so the main candidate became IRQ0 interaction.
i've added a counter to track timer irqs (on which core they arrived, at
what exact time, etc.) and found that no timer IRQ would arrive after the
bug condition hits - even if we re-enable IRQ0 and re-initialize the i8259A,
but that we'd get a small number of timer irqs right around the time when we
call the check_timer() function.
Eventually i got the following backtrace triggered from debug code in the
timer interrupt:
...trying to set up timer as Virtual Wire IRQ... failed.
...trying to set up timer as ExtINT IRQ...
Pid: 1, comm: swapper Not tainted (2.6.24-rc5 #57)
EIP: 0060:[<c044d57e>] EFLAGS: 00000246 CPU: 0
EIP is at _spin_unlock_irqrestore+0x5/0x1c
EAX: c0634178 EBX: 00000000 ECX: c4947d63 EDX: 00000246
ESI: 00000002 EDI: 00010031 EBP: c04e0f2e ESP: f7c41df4
DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 8005003b CR2: ffe04000 CR3: 00630000 CR4: 000006d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
[<c05f5784>] setup_IO_APIC+0x9c3/0xc5c
the spin_unlock() was called from init_8259A(). Wait ... we have an IRQ0
entry while we are in the middle of setting up the local APIC, the i8259A
and the PIT??
That is certainly not how it's supposed to work! check_timer() was supposed
to be called with irqs turned off - but this eroded away sometime in the
past. This code would still work most of the time because this code runs
very quickly, but just the right timing conditions are present and IRQ0
hits in this small, ~30 usecs window, timer irqs stop and the system does
not boot up. Also, given how early this is during bootup, the hang is
very deterministic - but it would only occur on certain machines (and
certain configs).
The fix was quite simple: disable/restore interrupts properly in this
function. With that in place the test-system now boots up just fine.
(64-bit x86 io_apic_64.c had the same bug.)
Phew! One down, only 1500 other kernel bugs are left ;-)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2007-12-18 12:05:58 -05:00
|
|
|
|
|
|
|
local_irq_save(flags);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* get/set the timer IRQ vector:
|
|
|
|
*/
|
|
|
|
disable_8259A_irq(0);
|
2007-02-23 06:35:05 -05:00
|
|
|
assign_irq_vector(0, TARGET_CPUS);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
/*
|
2008-05-21 17:09:11 -04:00
|
|
|
* As IRQ0 is to be enabled in the 8259A, the virtual
|
|
|
|
* wire has to be disabled in the local APIC.
|
2005-04-16 18:20:36 -04:00
|
|
|
*/
|
2006-01-11 16:46:51 -05:00
|
|
|
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
|
2005-04-16 18:20:36 -04:00
|
|
|
init_8259A(1);
|
|
|
|
|
2006-01-11 16:46:06 -05:00
|
|
|
pin1 = find_isa_irq_pin(0, mp_INT);
|
|
|
|
apic1 = find_isa_irq_apic(0, mp_INT);
|
|
|
|
pin2 = ioapic_i8259.pin;
|
|
|
|
apic2 = ioapic_i8259.apic;
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2008-07-14 14:08:13 -04:00
|
|
|
apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
|
|
|
|
"apic1=%d pin1=%d apic2=%d pin2=%d\n",
|
|
|
|
cfg->vector, apic1, pin1, apic2, pin2);
|
2006-12-06 20:14:06 -05:00
|
|
|
|
2008-05-27 16:19:51 -04:00
|
|
|
/*
|
|
|
|
* Some BIOS writers are clueless and report the ExtINTA
|
|
|
|
* I/O APIC input from the cascaded 8259A as the timer
|
|
|
|
* interrupt input. So just in case, if only one pin
|
|
|
|
* was found above, try it both directly and through the
|
|
|
|
* 8259A.
|
|
|
|
*/
|
|
|
|
if (pin1 == -1) {
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
if (intr_remapping_enabled)
|
|
|
|
panic("BIOS bug: timer not connected to IO-APIC");
|
2008-05-27 16:19:51 -04:00
|
|
|
pin1 = pin2;
|
|
|
|
apic1 = apic2;
|
|
|
|
no_pin1 = 1;
|
|
|
|
} else if (pin2 == -1) {
|
|
|
|
pin2 = pin1;
|
|
|
|
apic2 = apic1;
|
|
|
|
}
|
|
|
|
|
2007-01-08 18:04:46 -05:00
|
|
|
if (pin1 != -1) {
|
|
|
|
/*
|
|
|
|
* Ok, does IRQ0 through the IOAPIC work?
|
|
|
|
*/
|
2008-05-27 16:19:51 -04:00
|
|
|
if (no_pin1) {
|
|
|
|
add_pin_to_irq(0, apic1, pin1);
|
2008-05-31 06:20:10 -04:00
|
|
|
setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
|
2008-05-27 16:19:51 -04:00
|
|
|
}
|
2007-01-08 18:04:46 -05:00
|
|
|
unmask_IO_APIC_irq(0);
|
|
|
|
if (!no_timer_check && timer_irq_works()) {
|
|
|
|
if (nmi_watchdog == NMI_IO_APIC) {
|
|
|
|
setup_nmi();
|
|
|
|
enable_8259A_irq(0);
|
|
|
|
}
|
|
|
|
if (disable_timer_pin_1 > 0)
|
|
|
|
clear_IO_APIC_pin(0, pin1);
|
x86: fix "Kernel panic - not syncing: IO-APIC + timer doesn't work!"
this is the tale of a full day spent debugging an ancient but elusive bug.
after booting up thousands of random .config kernels, i finally happened
to generate a .config that produced the following rare bootup failure
on 32-bit x86:
| ..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
| ..MP-BIOS bug: 8254 timer not connected to IO-APIC
| ...trying to set up timer (IRQ0) through the 8259A ... failed.
| ...trying to set up timer as Virtual Wire IRQ... failed.
| ...trying to set up timer as ExtINT IRQ... failed :(.
| Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug
| and send a report. Then try booting with the 'noapic' option
this bug has been reported many times during the years, but it was never
reproduced nor fixed.
the bug that i hit was extremely sensitive to .config details.
First i did a .config-bisection - suspecting some .config detail.
That led to CONFIG_X86_MCE: enabling X86_MCE magically made the bug disappear
and the system would boot up just fine.
Debugging my way through the MCE code ended up identifying two unlikely
candidates: the thing that made a real difference to the hang was that
X86_MCE did two printks:
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#1.
Adding the same printks to a !CONFIG_X86_MCE kernel made the bug go away!
this left timing as the main suspect: i experimented with adding various
udelay()s to the arch/x86/kernel/io_apic_32.c:check_timer() function, and
the race window turned out to be narrower than 30 microseconds (!).
That made debugging especially funny, debugging without having printk
ability before the bug hits is ... interesting ;-)
eventually i started suspecting IRQ activities - those are pretty much the
only thing that happen this early during bootup and have the timescale of
a few dozen microseconds. Also, check_timer() changes the IRQ hardware
in various creative ways, so the main candidate became IRQ0 interaction.
i've added a counter to track timer irqs (on which core they arrived, at
what exact time, etc.) and found that no timer IRQ would arrive after the
bug condition hits - even if we re-enable IRQ0 and re-initialize the i8259A,
but that we'd get a small number of timer irqs right around the time when we
call the check_timer() function.
Eventually i got the following backtrace triggered from debug code in the
timer interrupt:
...trying to set up timer as Virtual Wire IRQ... failed.
...trying to set up timer as ExtINT IRQ...
Pid: 1, comm: swapper Not tainted (2.6.24-rc5 #57)
EIP: 0060:[<c044d57e>] EFLAGS: 00000246 CPU: 0
EIP is at _spin_unlock_irqrestore+0x5/0x1c
EAX: c0634178 EBX: 00000000 ECX: c4947d63 EDX: 00000246
ESI: 00000002 EDI: 00010031 EBP: c04e0f2e ESP: f7c41df4
DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 8005003b CR2: ffe04000 CR3: 00630000 CR4: 000006d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
[<c05f5784>] setup_IO_APIC+0x9c3/0xc5c
the spin_unlock() was called from init_8259A(). Wait ... we have an IRQ0
entry while we are in the middle of setting up the local APIC, the i8259A
and the PIT??
That is certainly not how it's supposed to work! check_timer() was supposed
to be called with irqs turned off - but this eroded away sometime in the
past. This code would still work most of the time because this code runs
very quickly, but just the right timing conditions are present and IRQ0
hits in this small, ~30 usecs window, timer irqs stop and the system does
not boot up. Also, given how early this is during bootup, the hang is
very deterministic - but it would only occur on certain machines (and
certain configs).
The fix was quite simple: disable/restore interrupts properly in this
function. With that in place the test-system now boots up just fine.
(64-bit x86 io_apic_64.c had the same bug.)
Phew! One down, only 1500 other kernel bugs are left ;-)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2007-12-18 12:05:58 -05:00
|
|
|
goto out;
|
2007-01-08 18:04:46 -05:00
|
|
|
}
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
if (intr_remapping_enabled)
|
|
|
|
panic("timer doesn't work through Interrupt-remapped IO-APIC");
|
2007-01-08 18:04:46 -05:00
|
|
|
clear_IO_APIC_pin(apic1, pin1);
|
2008-05-27 16:19:51 -04:00
|
|
|
if (!no_pin1)
|
2008-07-14 14:08:13 -04:00
|
|
|
apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
|
2008-05-27 16:19:51 -04:00
|
|
|
"8254 timer not connected to IO-APIC\n");
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2008-07-14 14:08:13 -04:00
|
|
|
apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
|
|
|
|
"(IRQ0) through the 8259A ...\n");
|
|
|
|
apic_printk(APIC_QUIET, KERN_INFO
|
|
|
|
"..... (found apic %d pin %d) ...\n", apic2, pin2);
|
2007-01-08 18:04:46 -05:00
|
|
|
/*
|
|
|
|
* legacy devices should be connected to IO APIC #0
|
|
|
|
*/
|
2008-06-30 20:19:31 -04:00
|
|
|
replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
|
2008-05-27 16:19:34 -04:00
|
|
|
setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
|
2008-05-27 16:19:40 -04:00
|
|
|
unmask_IO_APIC_irq(0);
|
2008-05-21 17:09:19 -04:00
|
|
|
enable_8259A_irq(0);
|
2007-01-08 18:04:46 -05:00
|
|
|
if (timer_irq_works()) {
|
2008-07-14 14:08:13 -04:00
|
|
|
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
|
2008-05-21 17:10:22 -04:00
|
|
|
timer_through_8259 = 1;
|
2007-01-08 18:04:46 -05:00
|
|
|
if (nmi_watchdog == NMI_IO_APIC) {
|
2008-05-21 17:09:34 -04:00
|
|
|
disable_8259A_irq(0);
|
2007-01-08 18:04:46 -05:00
|
|
|
setup_nmi();
|
2008-05-21 17:09:34 -04:00
|
|
|
enable_8259A_irq(0);
|
2007-01-08 18:04:46 -05:00
|
|
|
}
|
x86: fix "Kernel panic - not syncing: IO-APIC + timer doesn't work!"
this is the tale of a full day spent debugging an ancient but elusive bug.
after booting up thousands of random .config kernels, i finally happened
to generate a .config that produced the following rare bootup failure
on 32-bit x86:
| ..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
| ..MP-BIOS bug: 8254 timer not connected to IO-APIC
| ...trying to set up timer (IRQ0) through the 8259A ... failed.
| ...trying to set up timer as Virtual Wire IRQ... failed.
| ...trying to set up timer as ExtINT IRQ... failed :(.
| Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug
| and send a report. Then try booting with the 'noapic' option
this bug has been reported many times during the years, but it was never
reproduced nor fixed.
the bug that i hit was extremely sensitive to .config details.
First i did a .config-bisection - suspecting some .config detail.
That led to CONFIG_X86_MCE: enabling X86_MCE magically made the bug disappear
and the system would boot up just fine.
Debugging my way through the MCE code ended up identifying two unlikely
candidates: the thing that made a real difference to the hang was that
X86_MCE did two printks:
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#1.
Adding the same printks to a !CONFIG_X86_MCE kernel made the bug go away!
this left timing as the main suspect: i experimented with adding various
udelay()s to the arch/x86/kernel/io_apic_32.c:check_timer() function, and
the race window turned out to be narrower than 30 microseconds (!).
That made debugging especially funny, debugging without having printk
ability before the bug hits is ... interesting ;-)
eventually i started suspecting IRQ activities - those are pretty much the
only thing that happen this early during bootup and have the timescale of
a few dozen microseconds. Also, check_timer() changes the IRQ hardware
in various creative ways, so the main candidate became IRQ0 interaction.
i've added a counter to track timer irqs (on which core they arrived, at
what exact time, etc.) and found that no timer IRQ would arrive after the
bug condition hits - even if we re-enable IRQ0 and re-initialize the i8259A,
but that we'd get a small number of timer irqs right around the time when we
call the check_timer() function.
Eventually i got the following backtrace triggered from debug code in the
timer interrupt:
...trying to set up timer as Virtual Wire IRQ... failed.
...trying to set up timer as ExtINT IRQ...
Pid: 1, comm: swapper Not tainted (2.6.24-rc5 #57)
EIP: 0060:[<c044d57e>] EFLAGS: 00000246 CPU: 0
EIP is at _spin_unlock_irqrestore+0x5/0x1c
EAX: c0634178 EBX: 00000000 ECX: c4947d63 EDX: 00000246
ESI: 00000002 EDI: 00010031 EBP: c04e0f2e ESP: f7c41df4
DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 8005003b CR2: ffe04000 CR3: 00630000 CR4: 000006d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
[<c05f5784>] setup_IO_APIC+0x9c3/0xc5c
the spin_unlock() was called from init_8259A(). Wait ... we have an IRQ0
entry while we are in the middle of setting up the local APIC, the i8259A
and the PIT??
That is certainly not how it's supposed to work! check_timer() was supposed
to be called with irqs turned off - but this eroded away sometime in the
past. This code would still work most of the time because this code runs
very quickly, but just the right timing conditions are present and IRQ0
hits in this small, ~30 usecs window, timer irqs stop and the system does
not boot up. Also, given how early this is during bootup, the hang is
very deterministic - but it would only occur on certain machines (and
certain configs).
The fix was quite simple: disable/restore interrupts properly in this
function. With that in place the test-system now boots up just fine.
(64-bit x86 io_apic_64.c had the same bug.)
Phew! One down, only 1500 other kernel bugs are left ;-)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2007-12-18 12:05:58 -05:00
|
|
|
goto out;
|
2007-01-08 18:04:46 -05:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Cleanup, just in case ...
|
|
|
|
*/
|
2008-05-21 17:09:19 -04:00
|
|
|
disable_8259A_irq(0);
|
2007-01-08 18:04:46 -05:00
|
|
|
clear_IO_APIC_pin(apic2, pin2);
|
2008-07-14 14:08:13 -04:00
|
|
|
apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
|
2006-02-25 22:18:40 -05:00
|
|
|
if (nmi_watchdog == NMI_IO_APIC) {
|
2008-07-14 14:08:13 -04:00
|
|
|
apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
|
|
|
|
"through the IO-APIC - disabling NMI Watchdog!\n");
|
2008-05-29 14:32:30 -04:00
|
|
|
nmi_watchdog = NMI_NONE;
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
|
2008-07-14 14:08:13 -04:00
|
|
|
apic_printk(APIC_QUIET, KERN_INFO
|
|
|
|
"...trying to set up timer as Virtual Wire IRQ...\n");
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2008-07-11 14:35:17 -04:00
|
|
|
lapic_register_intr(0);
|
2007-02-23 06:35:05 -05:00
|
|
|
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
|
2005-04-16 18:20:36 -04:00
|
|
|
enable_8259A_irq(0);
|
|
|
|
|
|
|
|
if (timer_irq_works()) {
|
2008-07-14 14:08:13 -04:00
|
|
|
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
x86: fix "Kernel panic - not syncing: IO-APIC + timer doesn't work!"
this is the tale of a full day spent debugging an ancient but elusive bug.
after booting up thousands of random .config kernels, i finally happened
to generate a .config that produced the following rare bootup failure
on 32-bit x86:
| ..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
| ..MP-BIOS bug: 8254 timer not connected to IO-APIC
| ...trying to set up timer (IRQ0) through the 8259A ... failed.
| ...trying to set up timer as Virtual Wire IRQ... failed.
| ...trying to set up timer as ExtINT IRQ... failed :(.
| Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug
| and send a report. Then try booting with the 'noapic' option
this bug has been reported many times during the years, but it was never
reproduced nor fixed.
the bug that i hit was extremely sensitive to .config details.
First i did a .config-bisection - suspecting some .config detail.
That led to CONFIG_X86_MCE: enabling X86_MCE magically made the bug disappear
and the system would boot up just fine.
Debugging my way through the MCE code ended up identifying two unlikely
candidates: the thing that made a real difference to the hang was that
X86_MCE did two printks:
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#1.
Adding the same printks to a !CONFIG_X86_MCE kernel made the bug go away!
this left timing as the main suspect: i experimented with adding various
udelay()s to the arch/x86/kernel/io_apic_32.c:check_timer() function, and
the race window turned out to be narrower than 30 microseconds (!).
That made debugging especially funny, debugging without having printk
ability before the bug hits is ... interesting ;-)
eventually i started suspecting IRQ activities - those are pretty much the
only thing that happen this early during bootup and have the timescale of
a few dozen microseconds. Also, check_timer() changes the IRQ hardware
in various creative ways, so the main candidate became IRQ0 interaction.
i've added a counter to track timer irqs (on which core they arrived, at
what exact time, etc.) and found that no timer IRQ would arrive after the
bug condition hits - even if we re-enable IRQ0 and re-initialize the i8259A,
but that we'd get a small number of timer irqs right around the time when we
call the check_timer() function.
Eventually i got the following backtrace triggered from debug code in the
timer interrupt:
...trying to set up timer as Virtual Wire IRQ... failed.
...trying to set up timer as ExtINT IRQ...
Pid: 1, comm: swapper Not tainted (2.6.24-rc5 #57)
EIP: 0060:[<c044d57e>] EFLAGS: 00000246 CPU: 0
EIP is at _spin_unlock_irqrestore+0x5/0x1c
EAX: c0634178 EBX: 00000000 ECX: c4947d63 EDX: 00000246
ESI: 00000002 EDI: 00010031 EBP: c04e0f2e ESP: f7c41df4
DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 8005003b CR2: ffe04000 CR3: 00630000 CR4: 000006d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
[<c05f5784>] setup_IO_APIC+0x9c3/0xc5c
the spin_unlock() was called from init_8259A(). Wait ... we have an IRQ0
entry while we are in the middle of setting up the local APIC, the i8259A
and the PIT??
That is certainly not how it's supposed to work! check_timer() was supposed
to be called with irqs turned off - but this eroded away sometime in the
past. This code would still work most of the time because this code runs
very quickly, but just the right timing conditions are present and IRQ0
hits in this small, ~30 usecs window, timer irqs stop and the system does
not boot up. Also, given how early this is during bootup, the hang is
very deterministic - but it would only occur on certain machines (and
certain configs).
The fix was quite simple: disable/restore interrupts properly in this
function. With that in place the test-system now boots up just fine.
(64-bit x86 io_apic_64.c had the same bug.)
Phew! One down, only 1500 other kernel bugs are left ;-)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2007-12-18 12:05:58 -05:00
|
|
|
goto out;
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
2008-05-21 17:09:26 -04:00
|
|
|
disable_8259A_irq(0);
|
2007-02-23 06:35:05 -05:00
|
|
|
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
|
2008-07-14 14:08:13 -04:00
|
|
|
apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
|
2005-04-16 18:20:36 -04:00
|
|
|
|
2008-07-14 14:08:13 -04:00
|
|
|
apic_printk(APIC_QUIET, KERN_INFO
|
|
|
|
"...trying to set up timer as ExtINT IRQ...\n");
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
init_8259A(0);
|
|
|
|
make_8259A_irq(0);
|
2006-01-11 16:46:51 -05:00
|
|
|
apic_write(APIC_LVT0, APIC_DM_EXTINT);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
unlock_ExtINT_logic();
|
|
|
|
|
|
|
|
if (timer_irq_works()) {
|
2008-07-14 14:08:13 -04:00
|
|
|
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
x86: fix "Kernel panic - not syncing: IO-APIC + timer doesn't work!"
this is the tale of a full day spent debugging an ancient but elusive bug.
after booting up thousands of random .config kernels, i finally happened
to generate a .config that produced the following rare bootup failure
on 32-bit x86:
| ..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
| ..MP-BIOS bug: 8254 timer not connected to IO-APIC
| ...trying to set up timer (IRQ0) through the 8259A ... failed.
| ...trying to set up timer as Virtual Wire IRQ... failed.
| ...trying to set up timer as ExtINT IRQ... failed :(.
| Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug
| and send a report. Then try booting with the 'noapic' option
this bug has been reported many times during the years, but it was never
reproduced nor fixed.
the bug that i hit was extremely sensitive to .config details.
First i did a .config-bisection - suspecting some .config detail.
That led to CONFIG_X86_MCE: enabling X86_MCE magically made the bug disappear
and the system would boot up just fine.
Debugging my way through the MCE code ended up identifying two unlikely
candidates: the thing that made a real difference to the hang was that
X86_MCE did two printks:
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#1.
Adding the same printks to a !CONFIG_X86_MCE kernel made the bug go away!
this left timing as the main suspect: i experimented with adding various
udelay()s to the arch/x86/kernel/io_apic_32.c:check_timer() function, and
the race window turned out to be narrower than 30 microseconds (!).
That made debugging especially funny, debugging without having printk
ability before the bug hits is ... interesting ;-)
eventually i started suspecting IRQ activities - those are pretty much the
only thing that happen this early during bootup and have the timescale of
a few dozen microseconds. Also, check_timer() changes the IRQ hardware
in various creative ways, so the main candidate became IRQ0 interaction.
i've added a counter to track timer irqs (on which core they arrived, at
what exact time, etc.) and found that no timer IRQ would arrive after the
bug condition hits - even if we re-enable IRQ0 and re-initialize the i8259A,
but that we'd get a small number of timer irqs right around the time when we
call the check_timer() function.
Eventually i got the following backtrace triggered from debug code in the
timer interrupt:
...trying to set up timer as Virtual Wire IRQ... failed.
...trying to set up timer as ExtINT IRQ...
Pid: 1, comm: swapper Not tainted (2.6.24-rc5 #57)
EIP: 0060:[<c044d57e>] EFLAGS: 00000246 CPU: 0
EIP is at _spin_unlock_irqrestore+0x5/0x1c
EAX: c0634178 EBX: 00000000 ECX: c4947d63 EDX: 00000246
ESI: 00000002 EDI: 00010031 EBP: c04e0f2e ESP: f7c41df4
DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 8005003b CR2: ffe04000 CR3: 00630000 CR4: 000006d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
[<c05f5784>] setup_IO_APIC+0x9c3/0xc5c
the spin_unlock() was called from init_8259A(). Wait ... we have an IRQ0
entry while we are in the middle of setting up the local APIC, the i8259A
and the PIT??
That is certainly not how it's supposed to work! check_timer() was supposed
to be called with irqs turned off - but this eroded away sometime in the
past. This code would still work most of the time because this code runs
very quickly, but just the right timing conditions are present and IRQ0
hits in this small, ~30 usecs window, timer irqs stop and the system does
not boot up. Also, given how early this is during bootup, the hang is
very deterministic - but it would only occur on certain machines (and
certain configs).
The fix was quite simple: disable/restore interrupts properly in this
function. With that in place the test-system now boots up just fine.
(64-bit x86 io_apic_64.c had the same bug.)
Phew! One down, only 1500 other kernel bugs are left ;-)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2007-12-18 12:05:58 -05:00
|
|
|
goto out;
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
2008-07-14 14:08:13 -04:00
|
|
|
apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
|
|
|
|
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
|
|
|
|
"report. Then try booting with the 'noapic' option.\n");
|
x86: fix "Kernel panic - not syncing: IO-APIC + timer doesn't work!"
this is the tale of a full day spent debugging an ancient but elusive bug.
after booting up thousands of random .config kernels, i finally happened
to generate a .config that produced the following rare bootup failure
on 32-bit x86:
| ..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
| ..MP-BIOS bug: 8254 timer not connected to IO-APIC
| ...trying to set up timer (IRQ0) through the 8259A ... failed.
| ...trying to set up timer as Virtual Wire IRQ... failed.
| ...trying to set up timer as ExtINT IRQ... failed :(.
| Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug
| and send a report. Then try booting with the 'noapic' option
this bug has been reported many times during the years, but it was never
reproduced nor fixed.
the bug that i hit was extremely sensitive to .config details.
First i did a .config-bisection - suspecting some .config detail.
That led to CONFIG_X86_MCE: enabling X86_MCE magically made the bug disappear
and the system would boot up just fine.
Debugging my way through the MCE code ended up identifying two unlikely
candidates: the thing that made a real difference to the hang was that
X86_MCE did two printks:
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#1.
Adding the same printks to a !CONFIG_X86_MCE kernel made the bug go away!
this left timing as the main suspect: i experimented with adding various
udelay()s to the arch/x86/kernel/io_apic_32.c:check_timer() function, and
the race window turned out to be narrower than 30 microseconds (!).
That made debugging especially funny, debugging without having printk
ability before the bug hits is ... interesting ;-)
eventually i started suspecting IRQ activities - those are pretty much the
only thing that happen this early during bootup and have the timescale of
a few dozen microseconds. Also, check_timer() changes the IRQ hardware
in various creative ways, so the main candidate became IRQ0 interaction.
i've added a counter to track timer irqs (on which core they arrived, at
what exact time, etc.) and found that no timer IRQ would arrive after the
bug condition hits - even if we re-enable IRQ0 and re-initialize the i8259A,
but that we'd get a small number of timer irqs right around the time when we
call the check_timer() function.
Eventually i got the following backtrace triggered from debug code in the
timer interrupt:
...trying to set up timer as Virtual Wire IRQ... failed.
...trying to set up timer as ExtINT IRQ...
Pid: 1, comm: swapper Not tainted (2.6.24-rc5 #57)
EIP: 0060:[<c044d57e>] EFLAGS: 00000246 CPU: 0
EIP is at _spin_unlock_irqrestore+0x5/0x1c
EAX: c0634178 EBX: 00000000 ECX: c4947d63 EDX: 00000246
ESI: 00000002 EDI: 00010031 EBP: c04e0f2e ESP: f7c41df4
DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 8005003b CR2: ffe04000 CR3: 00630000 CR4: 000006d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
[<c05f5784>] setup_IO_APIC+0x9c3/0xc5c
the spin_unlock() was called from init_8259A(). Wait ... we have an IRQ0
entry while we are in the middle of setting up the local APIC, the i8259A
and the PIT??
That is certainly not how it's supposed to work! check_timer() was supposed
to be called with irqs turned off - but this eroded away sometime in the
past. This code would still work most of the time because this code runs
very quickly, but just the right timing conditions are present and IRQ0
hits in this small, ~30 usecs window, timer irqs stop and the system does
not boot up. Also, given how early this is during bootup, the hang is
very deterministic - but it would only occur on certain machines (and
certain configs).
The fix was quite simple: disable/restore interrupts properly in this
function. With that in place the test-system now boots up just fine.
(64-bit x86 io_apic_64.c had the same bug.)
Phew! One down, only 1500 other kernel bugs are left ;-)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2007-12-18 12:05:58 -05:00
|
|
|
out:
|
|
|
|
local_irq_restore(flags);
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
|
2005-05-20 17:27:59 -04:00
|
|
|
static int __init notimercheck(char *s)
|
|
|
|
{
|
|
|
|
no_timer_check = 1;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
__setup("no_timer_check", notimercheck);
|
|
|
|
|
2005-04-16 18:20:36 -04:00
|
|
|
/*
|
x86: I/O APIC: Never configure IRQ2
There is no such entity as ISA IRQ2. The ACPI spec does not make it
explicitly clear, but does not preclude it either -- all it says is ISA
legacy interrupts are identity mapped by default (subject to overrides),
but it does not state whether IRQ2 exists or not. As a result if there is
no IRQ0 override, then IRQ2 is normally initialised as an ISA interrupt,
which implies an edge-triggered line, which is unmasked by default as this
is what we do for edge-triggered I/O APIC interrupts so as not to miss an
edge.
To the best of my knowledge it is useless, as IRQ2 has not been in use
since the PC/AT as back then it was taken by the 8259A cascade interrupt
to the slave, with the line position in the slot rerouted to newly-created
IRQ9. No device could thus make use of this line with the pair of 8259A
chips. Now in theory INTIN2 of the I/O APIC may be usable, but the
interrupt of the device wired to it would not be available in the PIC mode
at all, so I seriously doubt if anybody decided to reuse it for a regular
device.
However there are two common uses of INTIN2. One is for IRQ0, with an
ACPI interrupt override (or its equivalent in the MP table). But in this
case IRQ2 is gone entirely with INTIN0 left vacant. The other one is for
an 8959A ExtINTA cascade. In this case IRQ0 goes to INTIN0 and if ACPI is
used INTIN2 is assumed to be IRQ2 (there is no override and ACPI has no
way to report ExtINTA interrupts). This is where a problem happens.
The problem is INTIN2 is configured as a native APIC interrupt, with a
vector assigned and the mask cleared. And the line may indeed get active
and inject interrupts if the master 8959A has its timer interrupt enabled
(it might happen for other interrupts too, but they are normally masked in
the process of rerouting them to the I/O APIC). There are two cases where
it will happen:
* When the I/O APIC NMI watchdog is enabled. This is actually a misnomer
as the watchdog pulses are delivered through the 8259A to the LINT0
inputs of all the local APICs in the system. The implication is the
output of the master 8259A goes high and low repeatedly, signalling
interrupts to INTIN2 which is enabled too!
[The origin of the name is I think for a brief period during the
development we had a capability in our code to configure the watchdog to
use an I/O APIC input; that would be INTIN2 in this scenario.]
* When the native route of IRQ0 via INTIN0 fails for whatever reason -- as
it happens with the system considered here. In this scenario the timer
pulse is delivered through the 8259A to LINT0 input of the local APIC of
the bootstrap processor, quite similarly to how is done for the watchdog
described above. The result is, again, INTIN2 receives these pulses
too. Rafael's system used to escape this scenario, because an incorrect
IRQ0 override would occupy INTIN2 and prevent it from being unmasked.
My conclusion is IRQ2 should be excluded from configuration in all the
cases and the current exception for ACPI systems should be lifted. The
reason being the exception not only being useless, but harmful as well.
Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-11 14:35:23 -04:00
|
|
|
* Traditionally ISA IRQ2 is the cascade IRQ, and is not available
|
|
|
|
* to devices. However there may be an I/O APIC pin available for
|
|
|
|
* this interrupt regardless. The pin may be left unconnected, but
|
|
|
|
* typically it will be reused as an ExtINT cascade interrupt for
|
|
|
|
* the master 8259A. In the MPS case such a pin will normally be
|
|
|
|
* reported as an ExtINT interrupt in the MP table. With ACPI
|
|
|
|
* there is no provision for ExtINT interrupts, and in the absence
|
|
|
|
* of an override it would be treated as an ordinary ISA I/O APIC
|
|
|
|
* interrupt, that is edge-triggered and unmasked by default. We
|
|
|
|
* used to do this, but it caused problems on some systems because
|
|
|
|
* of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
|
|
|
|
* the same ExtINT cascade interrupt to drive the local APIC of the
|
|
|
|
* bootstrap processor. Therefore we refrain from routing IRQ2 to
|
|
|
|
* the I/O APIC in all cases now. No actual device should request
|
|
|
|
* it anyway. --macro
|
2005-04-16 18:20:36 -04:00
|
|
|
*/
|
|
|
|
#define PIC_IRQS (1<<2)
|
|
|
|
|
|
|
|
void __init setup_IO_APIC(void)
|
|
|
|
{
|
2008-01-30 07:30:39 -05:00
|
|
|
|
|
|
|
/*
|
|
|
|
* calling enable_IO_APIC() is moved to setup_local_APIC for BP
|
|
|
|
*/
|
2005-04-16 18:20:36 -04:00
|
|
|
|
x86: I/O APIC: Never configure IRQ2
There is no such entity as ISA IRQ2. The ACPI spec does not make it
explicitly clear, but does not preclude it either -- all it says is ISA
legacy interrupts are identity mapped by default (subject to overrides),
but it does not state whether IRQ2 exists or not. As a result if there is
no IRQ0 override, then IRQ2 is normally initialised as an ISA interrupt,
which implies an edge-triggered line, which is unmasked by default as this
is what we do for edge-triggered I/O APIC interrupts so as not to miss an
edge.
To the best of my knowledge it is useless, as IRQ2 has not been in use
since the PC/AT as back then it was taken by the 8259A cascade interrupt
to the slave, with the line position in the slot rerouted to newly-created
IRQ9. No device could thus make use of this line with the pair of 8259A
chips. Now in theory INTIN2 of the I/O APIC may be usable, but the
interrupt of the device wired to it would not be available in the PIC mode
at all, so I seriously doubt if anybody decided to reuse it for a regular
device.
However there are two common uses of INTIN2. One is for IRQ0, with an
ACPI interrupt override (or its equivalent in the MP table). But in this
case IRQ2 is gone entirely with INTIN0 left vacant. The other one is for
an 8959A ExtINTA cascade. In this case IRQ0 goes to INTIN0 and if ACPI is
used INTIN2 is assumed to be IRQ2 (there is no override and ACPI has no
way to report ExtINTA interrupts). This is where a problem happens.
The problem is INTIN2 is configured as a native APIC interrupt, with a
vector assigned and the mask cleared. And the line may indeed get active
and inject interrupts if the master 8959A has its timer interrupt enabled
(it might happen for other interrupts too, but they are normally masked in
the process of rerouting them to the I/O APIC). There are two cases where
it will happen:
* When the I/O APIC NMI watchdog is enabled. This is actually a misnomer
as the watchdog pulses are delivered through the 8259A to the LINT0
inputs of all the local APICs in the system. The implication is the
output of the master 8259A goes high and low repeatedly, signalling
interrupts to INTIN2 which is enabled too!
[The origin of the name is I think for a brief period during the
development we had a capability in our code to configure the watchdog to
use an I/O APIC input; that would be INTIN2 in this scenario.]
* When the native route of IRQ0 via INTIN0 fails for whatever reason -- as
it happens with the system considered here. In this scenario the timer
pulse is delivered through the 8259A to LINT0 input of the local APIC of
the bootstrap processor, quite similarly to how is done for the watchdog
described above. The result is, again, INTIN2 receives these pulses
too. Rafael's system used to escape this scenario, because an incorrect
IRQ0 override would occupy INTIN2 and prevent it from being unmasked.
My conclusion is IRQ2 should be excluded from configuration in all the
cases and the current exception for ACPI systems should be lifted. The
reason being the exception not only being useless, but harmful as well.
Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-11 14:35:23 -04:00
|
|
|
io_apic_irqs = ~PIC_IRQS;
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
|
|
|
|
|
|
|
|
sync_Arb_IDs();
|
|
|
|
setup_IO_APIC_irqs();
|
|
|
|
init_IO_APIC_traps();
|
|
|
|
check_timer();
|
|
|
|
}
|
|
|
|
|
|
|
|
struct sysfs_ioapic_data {
|
|
|
|
struct sys_device dev;
|
|
|
|
struct IO_APIC_route_entry entry[0];
|
|
|
|
};
|
|
|
|
static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
|
|
|
|
|
2005-04-16 18:25:31 -04:00
|
|
|
static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
|
|
|
struct IO_APIC_route_entry *entry;
|
|
|
|
struct sysfs_ioapic_data *data;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
data = container_of(dev, struct sysfs_ioapic_data, dev);
|
|
|
|
entry = data->entry;
|
2006-09-26 04:52:30 -04:00
|
|
|
for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
|
|
|
|
*entry = ioapic_read_entry(dev->id, i);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ioapic_resume(struct sys_device *dev)
|
|
|
|
{
|
|
|
|
struct IO_APIC_route_entry *entry;
|
|
|
|
struct sysfs_ioapic_data *data;
|
|
|
|
unsigned long flags;
|
|
|
|
union IO_APIC_reg_00 reg_00;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
data = container_of(dev, struct sysfs_ioapic_data, dev);
|
|
|
|
entry = data->entry;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
|
|
reg_00.raw = io_apic_read(dev->id, 0);
|
2008-05-14 11:03:10 -04:00
|
|
|
if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
|
|
|
|
reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
|
2005-04-16 18:20:36 -04:00
|
|
|
io_apic_write(dev->id, 0, reg_00.raw);
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
2006-09-26 04:52:30 -04:00
|
|
|
for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
|
|
|
|
ioapic_write_entry(dev->id, i, entry[i]);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct sysdev_class ioapic_sysdev_class = {
|
2007-12-19 20:09:39 -05:00
|
|
|
.name = "ioapic",
|
2005-04-16 18:20:36 -04:00
|
|
|
.suspend = ioapic_suspend,
|
|
|
|
.resume = ioapic_resume,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init ioapic_init_sysfs(void)
|
|
|
|
{
|
|
|
|
struct sys_device * dev;
|
2007-10-17 12:04:38 -04:00
|
|
|
int i, size, error;
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
error = sysdev_class_register(&ioapic_sysdev_class);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
for (i = 0; i < nr_ioapics; i++ ) {
|
|
|
|
size = sizeof(struct sys_device) + nr_ioapic_registers[i]
|
|
|
|
* sizeof(struct IO_APIC_route_entry);
|
2007-10-17 12:04:38 -04:00
|
|
|
mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
|
2005-04-16 18:20:36 -04:00
|
|
|
if (!mp_ioapic_data[i]) {
|
|
|
|
printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
dev = &mp_ioapic_data[i]->dev;
|
|
|
|
dev->id = i;
|
|
|
|
dev->cls = &ioapic_sysdev_class;
|
|
|
|
error = sysdev_register(dev);
|
|
|
|
if (error) {
|
|
|
|
kfree(mp_ioapic_data[i]);
|
|
|
|
mp_ioapic_data[i] = NULL;
|
|
|
|
printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
device_initcall(ioapic_init_sysfs);
|
|
|
|
|
2006-10-04 05:16:40 -04:00
|
|
|
/*
|
2006-10-04 05:16:46 -04:00
|
|
|
* Dynamic irq allocate and deallocation
|
2006-10-04 05:16:40 -04:00
|
|
|
*/
|
|
|
|
int create_irq(void)
|
|
|
|
{
|
2006-10-04 05:16:46 -04:00
|
|
|
/* Allocate an unused irq */
|
|
|
|
int irq;
|
|
|
|
int new;
|
2006-10-04 05:16:40 -04:00
|
|
|
unsigned long flags;
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg_new;
|
2006-10-04 05:16:40 -04:00
|
|
|
|
2006-10-04 05:16:46 -04:00
|
|
|
irq = -ENOSPC;
|
|
|
|
spin_lock_irqsave(&vector_lock, flags);
|
2008-08-19 23:49:48 -04:00
|
|
|
for (new = (nr_irqs - 1); new >= 0; new--) {
|
2006-10-04 05:16:46 -04:00
|
|
|
if (platform_legacy_irq(new))
|
|
|
|
continue;
|
2008-08-19 23:50:06 -04:00
|
|
|
cfg_new = irq_cfg(new);
|
|
|
|
if (cfg_new && cfg_new->vector != 0)
|
2006-10-04 05:16:46 -04:00
|
|
|
continue;
|
2008-08-19 23:50:06 -04:00
|
|
|
/* check if need to create one */
|
|
|
|
if (!cfg_new)
|
|
|
|
cfg_new = irq_cfg_alloc(new);
|
2007-02-23 06:35:05 -05:00
|
|
|
if (__assign_irq_vector(new, TARGET_CPUS) == 0)
|
2006-10-04 05:16:46 -04:00
|
|
|
irq = new;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&vector_lock, flags);
|
2006-10-04 05:16:40 -04:00
|
|
|
|
2006-10-04 05:16:46 -04:00
|
|
|
if (irq >= 0) {
|
2006-10-04 05:16:40 -04:00
|
|
|
dynamic_irq_init(irq);
|
|
|
|
}
|
|
|
|
return irq;
|
|
|
|
}
|
|
|
|
|
|
|
|
void destroy_irq(unsigned int irq)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
dynamic_irq_cleanup(irq);
|
|
|
|
|
2008-07-10 14:16:57 -04:00
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
free_irte(irq);
|
|
|
|
#endif
|
2006-10-04 05:16:40 -04:00
|
|
|
spin_lock_irqsave(&vector_lock, flags);
|
2006-12-06 20:14:05 -05:00
|
|
|
__clear_irq_vector(irq);
|
2006-10-04 05:16:40 -04:00
|
|
|
spin_unlock_irqrestore(&vector_lock, flags);
|
|
|
|
}
|
|
|
|
|
2006-10-04 05:16:42 -04:00
|
|
|
/*
|
2007-10-19 19:25:36 -04:00
|
|
|
* MSI message composition
|
2006-10-04 05:16:42 -04:00
|
|
|
*/
|
|
|
|
#ifdef CONFIG_PCI_MSI
|
2006-10-04 05:16:59 -04:00
|
|
|
static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
|
2006-10-04 05:16:42 -04:00
|
|
|
{
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg;
|
2007-02-23 06:35:05 -05:00
|
|
|
int err;
|
2006-10-04 05:16:42 -04:00
|
|
|
unsigned dest;
|
2006-10-08 09:47:55 -04:00
|
|
|
cpumask_t tmp;
|
2006-10-04 05:16:42 -04:00
|
|
|
|
2007-02-23 06:35:05 -05:00
|
|
|
tmp = TARGET_CPUS;
|
|
|
|
err = assign_irq_vector(irq, tmp);
|
2008-07-10 14:16:57 -04:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2008-08-19 23:50:06 -04:00
|
|
|
cfg = irq_cfg(irq);
|
2008-07-10 14:16:57 -04:00
|
|
|
cpus_and(tmp, cfg->domain, tmp);
|
|
|
|
dest = cpu_mask_to_apicid(tmp);
|
|
|
|
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
if (irq_remapped(irq)) {
|
|
|
|
struct irte irte;
|
|
|
|
int ir_index;
|
|
|
|
u16 sub_handle;
|
|
|
|
|
|
|
|
ir_index = map_irq_to_irte_handle(irq, &sub_handle);
|
|
|
|
BUG_ON(ir_index == -1);
|
|
|
|
|
|
|
|
memset (&irte, 0, sizeof(irte));
|
|
|
|
|
|
|
|
irte.present = 1;
|
|
|
|
irte.dst_mode = INT_DEST_MODE;
|
|
|
|
irte.trigger_mode = 0; /* edge */
|
|
|
|
irte.dlvry_mode = INT_DELIVERY_MODE;
|
|
|
|
irte.vector = cfg->vector;
|
|
|
|
irte.dest_id = IRTE_DEST(dest);
|
|
|
|
|
|
|
|
modify_irte(irq, &irte);
|
2006-10-04 05:16:42 -04:00
|
|
|
|
2008-07-10 14:16:57 -04:00
|
|
|
msg->address_hi = MSI_ADDR_BASE_HI;
|
|
|
|
msg->data = sub_handle;
|
|
|
|
msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
|
|
|
|
MSI_ADDR_IR_SHV |
|
|
|
|
MSI_ADDR_IR_INDEX1(ir_index) |
|
|
|
|
MSI_ADDR_IR_INDEX2(ir_index);
|
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
{
|
2006-10-04 05:16:42 -04:00
|
|
|
msg->address_hi = MSI_ADDR_BASE_HI;
|
|
|
|
msg->address_lo =
|
|
|
|
MSI_ADDR_BASE_LO |
|
|
|
|
((INT_DEST_MODE == 0) ?
|
|
|
|
MSI_ADDR_DEST_MODE_PHYSICAL:
|
|
|
|
MSI_ADDR_DEST_MODE_LOGICAL) |
|
|
|
|
((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
|
|
MSI_ADDR_REDIRECTION_CPU:
|
|
|
|
MSI_ADDR_REDIRECTION_LOWPRI) |
|
|
|
|
MSI_ADDR_DEST_ID(dest);
|
|
|
|
|
|
|
|
msg->data =
|
|
|
|
MSI_DATA_TRIGGER_EDGE |
|
|
|
|
MSI_DATA_LEVEL_ASSERT |
|
|
|
|
((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
|
|
MSI_DATA_DELIVERY_FIXED:
|
|
|
|
MSI_DATA_DELIVERY_LOWPRI) |
|
2007-02-23 06:35:05 -05:00
|
|
|
MSI_DATA_VECTOR(cfg->vector);
|
2006-10-04 05:16:42 -04:00
|
|
|
}
|
2007-02-23 06:35:05 -05:00
|
|
|
return err;
|
2006-10-04 05:16:42 -04:00
|
|
|
}
|
|
|
|
|
2006-10-04 05:16:59 -04:00
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
|
2006-10-04 05:16:42 -04:00
|
|
|
{
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg;
|
2006-10-04 05:16:59 -04:00
|
|
|
struct msi_msg msg;
|
|
|
|
unsigned int dest;
|
|
|
|
cpumask_t tmp;
|
2008-08-19 23:50:05 -04:00
|
|
|
struct irq_desc *desc;
|
2006-10-04 05:16:59 -04:00
|
|
|
|
|
|
|
cpus_and(tmp, mask, cpu_online_map);
|
|
|
|
if (cpus_empty(tmp))
|
2007-02-23 06:20:59 -05:00
|
|
|
return;
|
2006-10-04 05:16:42 -04:00
|
|
|
|
2007-02-23 06:35:05 -05:00
|
|
|
if (assign_irq_vector(irq, mask))
|
2006-10-04 05:16:59 -04:00
|
|
|
return;
|
2006-10-04 05:16:51 -04:00
|
|
|
|
2008-08-19 23:50:06 -04:00
|
|
|
cfg = irq_cfg(irq);
|
2007-02-23 06:35:05 -05:00
|
|
|
cpus_and(tmp, cfg->domain, mask);
|
2006-10-04 05:16:59 -04:00
|
|
|
dest = cpu_mask_to_apicid(tmp);
|
2006-10-04 05:16:42 -04:00
|
|
|
|
2006-10-04 05:16:59 -04:00
|
|
|
read_msi_msg(irq, &msg);
|
|
|
|
|
|
|
|
msg.data &= ~MSI_DATA_VECTOR_MASK;
|
2007-02-23 06:35:05 -05:00
|
|
|
msg.data |= MSI_DATA_VECTOR(cfg->vector);
|
2006-10-04 05:16:59 -04:00
|
|
|
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
|
|
|
|
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
|
|
|
|
|
|
|
|
write_msi_msg(irq, &msg);
|
2008-08-19 23:50:05 -04:00
|
|
|
desc = irq_to_desc(irq);
|
|
|
|
desc->affinity = mask;
|
2006-10-04 05:16:42 -04:00
|
|
|
}
|
2008-07-10 14:16:57 -04:00
|
|
|
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
/*
|
|
|
|
* Migrate the MSI irq to another cpumask. This migration is
|
|
|
|
* done in the process context using interrupt-remapping hardware.
|
|
|
|
*/
|
|
|
|
static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
|
|
{
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg;
|
2008-07-10 14:16:57 -04:00
|
|
|
unsigned int dest;
|
|
|
|
cpumask_t tmp, cleanup_mask;
|
|
|
|
struct irte irte;
|
2008-08-19 23:50:05 -04:00
|
|
|
struct irq_desc *desc;
|
2008-07-10 14:16:57 -04:00
|
|
|
|
|
|
|
cpus_and(tmp, mask, cpu_online_map);
|
|
|
|
if (cpus_empty(tmp))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (get_irte(irq, &irte))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (assign_irq_vector(irq, mask))
|
|
|
|
return;
|
|
|
|
|
2008-08-19 23:50:06 -04:00
|
|
|
cfg = irq_cfg(irq);
|
2008-07-10 14:16:57 -04:00
|
|
|
cpus_and(tmp, cfg->domain, mask);
|
|
|
|
dest = cpu_mask_to_apicid(tmp);
|
|
|
|
|
|
|
|
irte.vector = cfg->vector;
|
|
|
|
irte.dest_id = IRTE_DEST(dest);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* atomically update the IRTE with the new destination and vector.
|
|
|
|
*/
|
|
|
|
modify_irte(irq, &irte);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* After this point, all the interrupts will start arriving
|
|
|
|
* at the new destination. So, time to cleanup the previous
|
|
|
|
* vector allocation.
|
|
|
|
*/
|
|
|
|
if (cfg->move_in_progress) {
|
|
|
|
cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
|
|
|
|
cfg->move_cleanup_count = cpus_weight(cleanup_mask);
|
|
|
|
send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
|
|
|
cfg->move_in_progress = 0;
|
|
|
|
}
|
|
|
|
|
2008-08-19 23:50:05 -04:00
|
|
|
desc = irq_to_desc(irq);
|
|
|
|
desc->affinity = mask;
|
2008-07-10 14:16:57 -04:00
|
|
|
}
|
|
|
|
#endif
|
2006-10-04 05:16:59 -04:00
|
|
|
#endif /* CONFIG_SMP */
|
2006-10-04 05:16:42 -04:00
|
|
|
|
2006-10-04 05:16:59 -04:00
|
|
|
/*
|
|
|
|
* IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
|
|
|
|
* which implement the MSI or MSI-X Capability Structure.
|
|
|
|
*/
|
|
|
|
static struct irq_chip msi_chip = {
|
|
|
|
.name = "PCI-MSI",
|
|
|
|
.unmask = unmask_msi_irq,
|
|
|
|
.mask = mask_msi_irq,
|
|
|
|
.ack = ack_apic_edge,
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
.set_affinity = set_msi_irq_affinity,
|
|
|
|
#endif
|
|
|
|
.retrigger = ioapic_retrigger_irq,
|
2006-10-04 05:16:42 -04:00
|
|
|
};
|
|
|
|
|
2008-07-10 14:16:57 -04:00
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
static struct irq_chip msi_ir_chip = {
|
|
|
|
.name = "IR-PCI-MSI",
|
|
|
|
.unmask = unmask_msi_irq,
|
|
|
|
.mask = mask_msi_irq,
|
|
|
|
.ack = ack_x2apic_edge,
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
.set_affinity = ir_set_msi_irq_affinity,
|
|
|
|
#endif
|
|
|
|
.retrigger = ioapic_retrigger_irq,
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Map the PCI dev to the corresponding remapping hardware unit
|
|
|
|
* and allocate 'nvec' consecutive interrupt-remapping table entries
|
|
|
|
* in it.
|
|
|
|
*/
|
|
|
|
static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
|
2006-10-04 05:16:59 -04:00
|
|
|
{
|
2008-07-10 14:16:57 -04:00
|
|
|
struct intel_iommu *iommu;
|
|
|
|
int index;
|
|
|
|
|
|
|
|
iommu = map_dev_to_ir(dev);
|
|
|
|
if (!iommu) {
|
|
|
|
printk(KERN_ERR
|
|
|
|
"Unable to map PCI %s to iommu\n", pci_name(dev));
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
|
|
|
index = alloc_irte(iommu, irq, nvec);
|
|
|
|
if (index < 0) {
|
|
|
|
printk(KERN_ERR
|
|
|
|
"Unable to allocate %d IRTE for PCI %s\n", nvec,
|
|
|
|
pci_name(dev));
|
|
|
|
return -ENOSPC;
|
|
|
|
}
|
|
|
|
return index;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
|
|
|
|
{
|
|
|
|
int ret;
|
2006-10-04 05:16:59 -04:00
|
|
|
struct msi_msg msg;
|
2008-07-10 14:16:57 -04:00
|
|
|
|
|
|
|
ret = msi_compose_msg(dev, irq, &msg);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
set_irq_msi(irq, desc);
|
|
|
|
write_msi_msg(irq, &msg);
|
|
|
|
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
if (irq_remapped(irq)) {
|
2008-08-19 23:50:05 -04:00
|
|
|
struct irq_desc *desc = irq_to_desc(irq);
|
2008-07-10 14:16:57 -04:00
|
|
|
/*
|
|
|
|
* irq migration in process context
|
|
|
|
*/
|
|
|
|
desc->status |= IRQ_MOVE_PCNTXT;
|
|
|
|
set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
|
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
|
|
|
|
{
|
2007-01-28 14:56:37 -05:00
|
|
|
int irq, ret;
|
2008-07-10 14:16:57 -04:00
|
|
|
|
2007-01-28 14:56:37 -05:00
|
|
|
irq = create_irq();
|
|
|
|
if (irq < 0)
|
|
|
|
return irq;
|
|
|
|
|
2008-07-10 14:16:57 -04:00
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
if (!intr_remapping_enabled)
|
|
|
|
goto no_ir;
|
|
|
|
|
|
|
|
ret = msi_alloc_irte(dev, irq, 1);
|
|
|
|
if (ret < 0)
|
|
|
|
goto error;
|
|
|
|
no_ir:
|
|
|
|
#endif
|
|
|
|
ret = setup_msi_irq(dev, desc, irq);
|
2007-01-28 14:56:37 -05:00
|
|
|
if (ret < 0) {
|
|
|
|
destroy_irq(irq);
|
2006-10-04 05:16:59 -04:00
|
|
|
return ret;
|
2007-01-28 14:56:37 -05:00
|
|
|
}
|
2008-07-10 14:16:57 -04:00
|
|
|
return 0;
|
2006-10-04 05:16:59 -04:00
|
|
|
|
2008-07-10 14:16:57 -04:00
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
error:
|
|
|
|
destroy_irq(irq);
|
|
|
|
return ret;
|
|
|
|
#endif
|
|
|
|
}
|
2006-10-04 05:16:59 -04:00
|
|
|
|
2008-07-10 14:16:57 -04:00
|
|
|
int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
|
|
|
|
{
|
|
|
|
int irq, ret, sub_handle;
|
|
|
|
struct msi_desc *desc;
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
struct intel_iommu *iommu = 0;
|
|
|
|
int index = 0;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
sub_handle = 0;
|
|
|
|
list_for_each_entry(desc, &dev->msi_list, list) {
|
|
|
|
irq = create_irq();
|
|
|
|
if (irq < 0)
|
|
|
|
return irq;
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
if (!intr_remapping_enabled)
|
|
|
|
goto no_ir;
|
2006-10-04 05:16:59 -04:00
|
|
|
|
2008-07-10 14:16:57 -04:00
|
|
|
if (!sub_handle) {
|
|
|
|
/*
|
|
|
|
* allocate the consecutive block of IRTE's
|
|
|
|
* for 'nvec'
|
|
|
|
*/
|
|
|
|
index = msi_alloc_irte(dev, irq, nvec);
|
|
|
|
if (index < 0) {
|
|
|
|
ret = index;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
iommu = map_dev_to_ir(dev);
|
|
|
|
if (!iommu) {
|
|
|
|
ret = -ENOENT;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* setup the mapping between the irq and the IRTE
|
|
|
|
* base index, the sub_handle pointing to the
|
|
|
|
* appropriate interrupt remap table entry.
|
|
|
|
*/
|
|
|
|
set_irte_irq(irq, iommu, index, sub_handle);
|
|
|
|
}
|
|
|
|
no_ir:
|
|
|
|
#endif
|
|
|
|
ret = setup_msi_irq(dev, desc, irq);
|
|
|
|
if (ret < 0)
|
|
|
|
goto error;
|
|
|
|
sub_handle++;
|
|
|
|
}
|
2007-04-18 05:39:21 -04:00
|
|
|
return 0;
|
2008-07-10 14:16:57 -04:00
|
|
|
|
|
|
|
error:
|
|
|
|
destroy_irq(irq);
|
|
|
|
return ret;
|
2006-10-04 05:16:59 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
void arch_teardown_msi_irq(unsigned int irq)
|
|
|
|
{
|
2007-01-28 14:56:37 -05:00
|
|
|
destroy_irq(irq);
|
2006-10-04 05:16:59 -04:00
|
|
|
}
|
|
|
|
|
2007-10-21 19:41:54 -04:00
|
|
|
#ifdef CONFIG_DMAR
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
|
|
|
|
{
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg;
|
2007-10-21 19:41:54 -04:00
|
|
|
struct msi_msg msg;
|
|
|
|
unsigned int dest;
|
|
|
|
cpumask_t tmp;
|
2008-08-19 23:50:05 -04:00
|
|
|
struct irq_desc *desc;
|
2007-10-21 19:41:54 -04:00
|
|
|
|
|
|
|
cpus_and(tmp, mask, cpu_online_map);
|
|
|
|
if (cpus_empty(tmp))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (assign_irq_vector(irq, mask))
|
|
|
|
return;
|
|
|
|
|
2008-08-19 23:50:06 -04:00
|
|
|
cfg = irq_cfg(irq);
|
2007-10-21 19:41:54 -04:00
|
|
|
cpus_and(tmp, cfg->domain, mask);
|
|
|
|
dest = cpu_mask_to_apicid(tmp);
|
|
|
|
|
|
|
|
dmar_msi_read(irq, &msg);
|
|
|
|
|
|
|
|
msg.data &= ~MSI_DATA_VECTOR_MASK;
|
|
|
|
msg.data |= MSI_DATA_VECTOR(cfg->vector);
|
|
|
|
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
|
|
|
|
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
|
|
|
|
|
|
|
|
dmar_msi_write(irq, &msg);
|
2008-08-19 23:50:05 -04:00
|
|
|
desc = irq_to_desc(irq);
|
|
|
|
desc->affinity = mask;
|
2007-10-21 19:41:54 -04:00
|
|
|
}
|
|
|
|
#endif /* CONFIG_SMP */
|
|
|
|
|
|
|
|
struct irq_chip dmar_msi_type = {
|
|
|
|
.name = "DMAR_MSI",
|
|
|
|
.unmask = dmar_msi_unmask,
|
|
|
|
.mask = dmar_msi_mask,
|
|
|
|
.ack = ack_apic_edge,
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
.set_affinity = dmar_msi_set_affinity,
|
|
|
|
#endif
|
|
|
|
.retrigger = ioapic_retrigger_irq,
|
|
|
|
};
|
|
|
|
|
|
|
|
int arch_setup_dmar_msi(unsigned int irq)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct msi_msg msg;
|
|
|
|
|
|
|
|
ret = msi_compose_msg(NULL, irq, &msg);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
dmar_msi_write(irq, &msg);
|
|
|
|
set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
|
|
|
|
"edge");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
2006-10-04 05:16:42 -04:00
|
|
|
|
2007-10-21 19:41:54 -04:00
|
|
|
#endif /* CONFIG_PCI_MSI */
|
2006-10-04 05:16:55 -04:00
|
|
|
/*
|
|
|
|
* Hypertransport interrupt support
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_HT_IRQ
|
|
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
|
|
|
|
static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
|
|
|
|
{
|
2006-11-08 20:44:57 -05:00
|
|
|
struct ht_irq_msg msg;
|
|
|
|
fetch_ht_irq_msg(irq, &msg);
|
2006-10-04 05:16:55 -04:00
|
|
|
|
2006-11-08 20:44:57 -05:00
|
|
|
msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
|
|
|
|
msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
|
2006-10-04 05:16:55 -04:00
|
|
|
|
2006-11-08 20:44:57 -05:00
|
|
|
msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
|
|
|
|
msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
|
2006-10-04 05:16:55 -04:00
|
|
|
|
2006-11-08 20:44:57 -05:00
|
|
|
write_ht_irq_msg(irq, &msg);
|
2006-10-04 05:16:55 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
|
|
{
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg;
|
2006-10-04 05:16:55 -04:00
|
|
|
unsigned int dest;
|
|
|
|
cpumask_t tmp;
|
2008-08-19 23:50:05 -04:00
|
|
|
struct irq_desc *desc;
|
2006-10-04 05:16:55 -04:00
|
|
|
|
|
|
|
cpus_and(tmp, mask, cpu_online_map);
|
|
|
|
if (cpus_empty(tmp))
|
2007-02-23 06:20:59 -05:00
|
|
|
return;
|
2006-10-04 05:16:55 -04:00
|
|
|
|
2007-02-23 06:35:05 -05:00
|
|
|
if (assign_irq_vector(irq, mask))
|
2006-10-04 05:16:55 -04:00
|
|
|
return;
|
|
|
|
|
2008-08-19 23:50:06 -04:00
|
|
|
cfg = irq_cfg(irq);
|
2007-02-23 06:35:05 -05:00
|
|
|
cpus_and(tmp, cfg->domain, mask);
|
2006-10-04 05:16:55 -04:00
|
|
|
dest = cpu_mask_to_apicid(tmp);
|
|
|
|
|
2007-02-23 06:35:05 -05:00
|
|
|
target_ht_irq(irq, dest, cfg->vector);
|
2008-08-19 23:50:05 -04:00
|
|
|
desc = irq_to_desc(irq);
|
|
|
|
desc->affinity = mask;
|
2006-10-04 05:16:55 -04:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2006-10-11 04:20:43 -04:00
|
|
|
static struct irq_chip ht_irq_chip = {
|
2006-10-04 05:16:55 -04:00
|
|
|
.name = "PCI-HT",
|
|
|
|
.mask = mask_ht_irq,
|
|
|
|
.unmask = unmask_ht_irq,
|
|
|
|
.ack = ack_apic_edge,
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
.set_affinity = set_ht_irq_affinity,
|
|
|
|
#endif
|
|
|
|
.retrigger = ioapic_retrigger_irq,
|
|
|
|
};
|
|
|
|
|
|
|
|
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
|
|
|
|
{
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg;
|
2007-02-23 06:35:05 -05:00
|
|
|
int err;
|
2006-10-08 09:47:55 -04:00
|
|
|
cpumask_t tmp;
|
2006-10-04 05:16:55 -04:00
|
|
|
|
2007-02-23 06:35:05 -05:00
|
|
|
tmp = TARGET_CPUS;
|
|
|
|
err = assign_irq_vector(irq, tmp);
|
|
|
|
if (!err) {
|
2006-11-08 20:44:57 -05:00
|
|
|
struct ht_irq_msg msg;
|
2006-10-04 05:16:55 -04:00
|
|
|
unsigned dest;
|
|
|
|
|
2008-08-19 23:50:06 -04:00
|
|
|
cfg = irq_cfg(irq);
|
2007-02-23 06:35:05 -05:00
|
|
|
cpus_and(tmp, cfg->domain, tmp);
|
2006-10-04 05:16:55 -04:00
|
|
|
dest = cpu_mask_to_apicid(tmp);
|
|
|
|
|
2006-11-08 20:44:57 -05:00
|
|
|
msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
|
2006-10-04 05:16:55 -04:00
|
|
|
|
2006-11-08 20:44:57 -05:00
|
|
|
msg.address_lo =
|
|
|
|
HT_IRQ_LOW_BASE |
|
2006-10-04 05:16:55 -04:00
|
|
|
HT_IRQ_LOW_DEST_ID(dest) |
|
2007-02-23 06:35:05 -05:00
|
|
|
HT_IRQ_LOW_VECTOR(cfg->vector) |
|
2006-10-04 05:16:55 -04:00
|
|
|
((INT_DEST_MODE == 0) ?
|
|
|
|
HT_IRQ_LOW_DM_PHYSICAL :
|
|
|
|
HT_IRQ_LOW_DM_LOGICAL) |
|
|
|
|
HT_IRQ_LOW_RQEOI_EDGE |
|
|
|
|
((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
|
|
HT_IRQ_LOW_MT_FIXED :
|
2006-11-08 20:44:57 -05:00
|
|
|
HT_IRQ_LOW_MT_ARBITRATED) |
|
|
|
|
HT_IRQ_LOW_IRQ_MASKED;
|
2006-10-04 05:16:55 -04:00
|
|
|
|
2006-11-08 20:44:57 -05:00
|
|
|
write_ht_irq_msg(irq, &msg);
|
2006-10-04 05:16:55 -04:00
|
|
|
|
2006-10-17 03:10:03 -04:00
|
|
|
set_irq_chip_and_handler_name(irq, &ht_irq_chip,
|
|
|
|
handle_edge_irq, "edge");
|
2006-10-04 05:16:55 -04:00
|
|
|
}
|
2007-02-23 06:35:05 -05:00
|
|
|
return err;
|
2006-10-04 05:16:55 -04:00
|
|
|
}
|
|
|
|
#endif /* CONFIG_HT_IRQ */
|
|
|
|
|
2005-04-16 18:20:36 -04:00
|
|
|
/* --------------------------------------------------------------------------
|
|
|
|
ACPI-based IOAPIC Configuration
|
|
|
|
-------------------------------------------------------------------------- */
|
|
|
|
|
2005-08-24 12:07:20 -04:00
|
|
|
#ifdef CONFIG_ACPI
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
#define IO_APIC_MAX_ID 0xFE
|
|
|
|
|
|
|
|
int __init io_apic_get_redir_entries (int ioapic)
|
|
|
|
{
|
|
|
|
union IO_APIC_reg_01 reg_01;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
|
|
reg_01.raw = io_apic_read(ioapic, 1);
|
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
|
|
|
|
return reg_01.bits.entries;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
[ACPI] ACPICA 20050930
Completed a major overhaul of the Resource Manager code -
specifically, optimizations in the area of the AML/internal
resource conversion code. The code has been optimized to
simplify and eliminate duplicated code, CPU stack use has
been decreased by optimizing function parameters and local
variables, and naming conventions across the manager have
been standardized for clarity and ease of maintenance (this
includes function, parameter, variable, and struct/typedef
names.)
All Resource Manager dispatch and information tables have
been moved to a single location for clarity and ease of
maintenance. One new file was created, named "rsinfo.c".
The ACPI return macros (return_ACPI_STATUS, etc.) have
been modified to guarantee that the argument is
not evaluated twice, making them less prone to macro
side-effects. However, since there exists the possibility
of additional stack use if a particular compiler cannot
optimize them (such as in the debug generation case),
the original macros are optionally available. Note that
some invocations of the return_VALUE macro may now cause
size mismatch warnings; the return_UINT8 and return_UINT32
macros are provided to eliminate these. (From Randy Dunlap)
Implemented a new mechanism to enable debug tracing for
individual control methods. A new external interface,
acpi_debug_trace(), is provided to enable this mechanism. The
intent is to allow the host OS to easily enable and disable
tracing for problematic control methods. This interface
can be easily exposed to a user or debugger interface if
desired. See the file psxface.c for details.
acpi_ut_callocate() will now return a valid pointer if a
length of zero is specified - a length of one is used
and a warning is issued. This matches the behavior of
acpi_ut_allocate().
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
2005-09-30 19:03:00 -04:00
|
|
|
int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
|
2005-04-16 18:20:36 -04:00
|
|
|
{
|
|
|
|
if (!IO_APIC_IRQ(irq)) {
|
|
|
|
apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
|
|
|
|
ioapic);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2006-10-04 05:16:51 -04:00
|
|
|
/*
|
|
|
|
* IRQs < 16 are already in the irq_2_pin[] map
|
|
|
|
*/
|
|
|
|
if (irq >= 16)
|
|
|
|
add_pin_to_irq(irq, ioapic, pin);
|
|
|
|
|
2007-02-23 06:19:08 -05:00
|
|
|
setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-11-17 01:05:28 -05:00
|
|
|
int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (skip_ioapic_setup)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
for (i = 0; i < mp_irq_entries; i++)
|
2008-05-14 11:03:17 -04:00
|
|
|
if (mp_irqs[i].mp_irqtype == mp_INT &&
|
|
|
|
mp_irqs[i].mp_srcbusirq == bus_irq)
|
2007-11-17 01:05:28 -05:00
|
|
|
break;
|
|
|
|
if (i >= mp_irq_entries)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
*trigger = irq_trigger(i);
|
|
|
|
*polarity = irq_polarity(i);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* CONFIG_ACPI */
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This function currently is only a helper for the i386 smp boot process where
|
|
|
|
* we need to reprogram the ioredtbls to cater for the cpus which have come online
|
|
|
|
* so mask in all cases should simply be TARGET_CPUS
|
|
|
|
*/
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
#ifdef CONFIG_SMP
|
2005-04-16 18:20:36 -04:00
|
|
|
void __init setup_ioapic_dest(void)
|
|
|
|
{
|
|
|
|
int pin, ioapic, irq, irq_entry;
|
2008-08-19 23:50:06 -04:00
|
|
|
struct irq_cfg *cfg;
|
2005-04-16 18:20:36 -04:00
|
|
|
|
|
|
|
if (skip_ioapic_setup == 1)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
|
|
|
|
for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
|
|
|
|
irq_entry = find_irq_entry(ioapic, pin, mp_INT);
|
|
|
|
if (irq_entry == -1)
|
|
|
|
continue;
|
|
|
|
irq = pin_2_irq(irq_entry, ioapic, pin);
|
2006-12-06 20:14:19 -05:00
|
|
|
|
|
|
|
/* setup_IO_APIC_irqs could fail to get vector for some device
|
|
|
|
* when you have too many devices, because at that time only boot
|
|
|
|
* cpu is online.
|
|
|
|
*/
|
2008-08-19 23:50:06 -04:00
|
|
|
cfg = irq_cfg(irq);
|
|
|
|
if (!cfg->vector)
|
2007-02-23 06:19:08 -05:00
|
|
|
setup_IO_APIC_irq(ioapic, pin, irq,
|
|
|
|
irq_trigger(irq_entry),
|
|
|
|
irq_polarity(irq_entry));
|
x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
IO-APIC support in the presence of interrupt-remapping infrastructure.
IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
index and the IRTE will contain information about the vector, cpu destination,
trigger mode etc, which traditionally was present in the IO-APIC RTE.
Introduce a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this cleanly).
For edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flush the hardware cache.
For level triggered, we need to modify the io-apic RTE aswell with the update
vector information, along with modifying IRTE with vector and cpu destination.
So irq migration for level triggered is little bit more complex compared to
edge triggered migration. But the good news is, we use the same algorithm
for level triggered migration as we have today, only difference being,
we now initiate the irq migration from process context instead of the
interrupt context.
In future, when we do a directed EOI (combined with cpu EOI broadcast
suppression) to the IO-APIC, level triggered irq migration will also be
as simple as edge triggered migration and we can do the irq migration
with a simple atomic update to IO-APIC RTE.
TBD: some tests/changes needed in the presence of fixup_irqs() for
level triggered irq migration.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: akpm@linux-foundation.org
Cc: arjan@linux.intel.com
Cc: andi@firstfloor.org
Cc: ebiederm@xmission.com
Cc: jbarnes@virtuousgeek.org
Cc: steiner@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-10 14:16:56 -04:00
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
|
|
else if (intr_remapping_enabled)
|
|
|
|
set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
|
|
|
|
#endif
|
2006-12-06 20:14:19 -05:00
|
|
|
else
|
|
|
|
set_ioapic_affinity_irq(irq, TARGET_CPUS);
|
2005-04-16 18:20:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.
CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.
- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.
Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.
MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 18:16:15 -04:00
|
|
|
#endif
|
2007-11-17 01:05:28 -05:00
|
|
|
|
2008-01-30 07:30:19 -05:00
|
|
|
#define IOAPIC_RESOURCE_NAME_SIZE 11
|
|
|
|
|
|
|
|
static struct resource *ioapic_resources;
|
|
|
|
|
|
|
|
static struct resource * __init ioapic_setup_resources(void)
|
|
|
|
{
|
|
|
|
unsigned long n;
|
|
|
|
struct resource *res;
|
|
|
|
char *mem;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (nr_ioapics <= 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
|
|
|
|
n *= nr_ioapics;
|
|
|
|
|
|
|
|
mem = alloc_bootmem(n);
|
|
|
|
res = (void *)mem;
|
|
|
|
|
|
|
|
if (mem != NULL) {
|
|
|
|
mem += sizeof(struct resource) * nr_ioapics;
|
|
|
|
|
|
|
|
for (i = 0; i < nr_ioapics; i++) {
|
|
|
|
res[i].name = mem;
|
|
|
|
res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
|
|
|
sprintf(mem, "IOAPIC %u", i);
|
|
|
|
mem += IOAPIC_RESOURCE_NAME_SIZE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ioapic_resources = res;
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
void __init ioapic_init_mappings(void)
|
|
|
|
{
|
|
|
|
unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
|
|
|
|
struct resource *ioapic_res;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
ioapic_res = ioapic_setup_resources();
|
|
|
|
for (i = 0; i < nr_ioapics; i++) {
|
|
|
|
if (smp_found_config) {
|
2008-05-14 11:03:10 -04:00
|
|
|
ioapic_phys = mp_ioapics[i].mp_apicaddr;
|
2008-01-30 07:30:19 -05:00
|
|
|
} else {
|
|
|
|
ioapic_phys = (unsigned long)
|
|
|
|
alloc_bootmem_pages(PAGE_SIZE);
|
|
|
|
ioapic_phys = __pa(ioapic_phys);
|
|
|
|
}
|
|
|
|
set_fixmap_nocache(idx, ioapic_phys);
|
|
|
|
apic_printk(APIC_VERBOSE,
|
|
|
|
"mapped IOAPIC to %016lx (%016lx)\n",
|
|
|
|
__fix_to_virt(idx), ioapic_phys);
|
|
|
|
idx++;
|
|
|
|
|
|
|
|
if (ioapic_res != NULL) {
|
|
|
|
ioapic_res->start = ioapic_phys;
|
|
|
|
ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
|
|
|
|
ioapic_res++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init ioapic_insert_resources(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct resource *r = ioapic_resources;
|
|
|
|
|
|
|
|
if (!r) {
|
|
|
|
printk(KERN_ERR
|
|
|
|
"IO APIC resources could be not be allocated.\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < nr_ioapics; i++) {
|
|
|
|
insert_resource(&iomem_resource, r);
|
|
|
|
r++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Insert the IO APIC resources after PCI initialization has occured to handle
|
|
|
|
* IO APICS that are mapped in on a BAR in PCI space. */
|
|
|
|
late_initcall(ioapic_insert_resources);
|
|
|
|
|