over-ride platform specific driver.
See also Documentation/acpi-hotkey.txt.
+ enable_timer_pin_1 [i386,x86-64]
+ Enable PIN 1 of APIC timer
+ Can be useful to work around chipset bugs (in particular on some ATI chipsets)
+ The kernel tries to set a reasonable default.
+
+ disable_timer_pin_1 [i386,x86-64]
+ Disable PIN 1 of APIC timer
+ Can be useful to work around chipset bugs.
+
ad1816= [HW,OSS]
Format: <io>,<irq>,<dma>,<dma2>
See also Documentation/sound/oss/AD1816.
If your BIOS doesn't do that it's a good idea to enable though
to make sure you log even machine check events that result
in a reboot.
+ mce=tolerancelevel (number)
+ 0: always panic, 1: panic if deadlock possible,
+ 2: try to avoid panic, 3: never panic or exit (for testing)
+ default is 1
+ Can be also set using sysfs which is preferable.
nomce (for compatibility with i386): same as mce=off
#include <linux/pci.h>
#include <asm/pci-direct.h>
#include <asm/acpi.h>
+#include <asm/apic.h>
static int __init check_bridge(int vendor, int device)
{
if (vendor == PCI_VENDOR_ID_NVIDIA) {
acpi_skip_timer_override = 1;
}
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * ATI IXP chipsets get double timer interrupts.
+ * For now just do this for all ATI chipsets.
+ * FIXME: this needs to be checked for the non ACPI case too.
+ */
+ if (vendor == PCI_VENDOR_ID_ATI)
+ disable_timer_pin_1 = 1;
+#endif
return 0;
}
*/
int nr_ioapic_registers[MAX_IO_APICS];
+int disable_timer_pin_1 __initdata;
+
/*
* Rough estimation of how many shared IRQs there are, can
* be changed anytime.
setup_nmi();
enable_8259A_irq(0);
}
+ if (disable_timer_pin_1 > 0)
+ clear_IO_APIC_pin(0, pin1);
return;
}
clear_IO_APIC_pin(0, pin1);
#endif
#ifdef CONFIG_X86_LOCAL_APIC
+ if (!memcmp(from, "disable_timer_pin_1", 19))
+ disable_timer_pin_1 = 1;
+ if (!memcmp(from, "enable_timer_pin_1", 18))
+ disable_timer_pin_1 = -1;
+
/* disable IO-APIC */
else if (!memcmp(from, "noapic", 6))
disable_ioapic_setup();
node_end_pfn[nid] = memory_chunk->end_pfn;
}
+static u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */
+
+int pxm_to_node(int pxm)
+{
+ return pxm_to_nid_map[pxm];
+}
+
/* Parse the ACPI Static Resource Affinity Table */
static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
{
u8 *start, *end, *p;
int i, j, nid;
- u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */
u8 nid_to_pxm_map[MAX_NUMNODES];/* logical node ID to _PXM map */
start = (u8 *)(&(sratp->reserved) + 1); /* skip header */
#include <linux/init.h>
#include <linux/irq.h>
#include <asm/hw_irq.h>
+#include <asm/numa.h>
#include "pci.h"
struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum)
{
+ struct pci_bus *bus;
+
if (domain != 0) {
printk(KERN_WARNING "PCI: Multiple domains not supported\n");
return NULL;
}
- return pcibios_scan_root(busnum);
+ bus = pcibios_scan_root(busnum);
+#ifdef CONFIG_ACPI_NUMA
+ if (bus != NULL) {
+ int pxm = acpi_get_pxm(device->handle);
+ if (pxm >= 0) {
+ bus->sysdata = (void *)(unsigned long)pxm_to_node(pxm);
+ printk("bus %d -> pxm %d -> node %ld\n",
+ busnum, pxm, (long)(bus->sysdata));
+ }
+ }
+#endif
+
+ return bus;
}
extern int pci_routeirq;
(pci_mmcfg_config[0].base_address == 0))
goto out;
- /* Kludge for now. Don't use mmconfig on AMD systems because
- those have some busses where mmconfig doesn't work,
- and we don't parse ACPI MCFG well enough to handle that.
- Remove when proper handling is added. */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- goto out;
-
printk(KERN_INFO "PCI: Using MMCONFIG\n");
raw_pci_ops = &pci_mmcfg;
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *) &of : NULL, count);
set_fs(old_fs);
- if (!ret && offset && put_user(of, offset))
+ if (offset && put_user(of, offset))
return -EFAULT;
return ret;
ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count);
set_fs(old_fs);
- if (!ret && offset && put_user(of, offset))
+ if (offset && put_user(of, offset))
return -EFAULT;
return ret;
$(obj)/bzImage: $(obj)/bootsect $(obj)/setup \
$(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(call if_changed,image)
- @echo 'Kernel: $@ is ready'
+ @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
$(call if_changed,objcopy)
#endif
#define SCREEN_INFO (*(struct screen_info *)(real_mode+0))
-extern char input_data[];
+extern unsigned char input_data[];
extern int input_len;
static long bytes_out = 0;
#else
if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < 1024) error("Less than 2MB of memory");
#endif
- output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */
+ output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
free_mem_end_ptr = (long)real_mode;
}
#else
if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
#endif
- mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START;
+ mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
low_buffer_size = low_buffer_end - LOW_BUFFER_START;
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-rc6-git3
-# Fri Aug 12 16:40:34 2005
+# Linux kernel version: 2.6.13-git11
+# Mon Sep 12 16:16:16 2005
#
CONFIG_X86_64=y
CONFIG_64BIT=y
CONFIG_X86=y
+CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_MMU=y
CONFIG_RWSEM_GENERIC_SPINLOCK=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_EARLY_PRINTK=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
#
# Code maturity level options
# General setup
#
CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
# CONFIG_CPUSETS is not set
+CONFIG_INITRAMFS_SOURCE=""
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
CONFIG_DISCONTIGMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_NEED_MULTIPLE_NODES=y
+# CONFIG_SPARSEMEM_STATIC is not set
CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
CONFIG_HAVE_DEC_LOCK=y
CONFIG_NR_CPUS=32
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_ISA_DMA_API=y
+CONFIG_GENERIC_PENDING_IRQ=y
#
# Power management options
# CONFIG_PCIEPORTBUS is not set
CONFIG_PCI_MSI=y
# CONFIG_PCI_LEGACY_PROC is not set
-# CONFIG_PCI_NAMES is not set
# CONFIG_PCI_DEBUG is not set
#
CONFIG_IP_MULTICAST=y
# CONFIG_IP_ADVANCED_ROUTER is not set
CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE is not set
# CONFIG_IP_MROUTE is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
# CONFIG_INET_TUNNEL is not set
-CONFIG_IP_TCPDIAG=y
-CONFIG_IP_TCPDIAG_IPV6=y
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
# CONFIG_TCP_CONG_ADVANCED is not set
CONFIG_TCP_CONG_BIC=y
CONFIG_IPV6=y
# CONFIG_IPV6_TUNNEL is not set
# CONFIG_NETFILTER is not set
+#
+# DCCP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_DCCP is not set
+
#
# SCTP Configuration (EXPERIMENTAL)
#
# Network testing
#
# CONFIG_NET_PKTGEN is not set
+# CONFIG_NETFILTER_NETLINK is not set
# CONFIG_HAMRADIO is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
+# CONFIG_IEEE80211 is not set
#
# Device Drivers
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=4096
CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
CONFIG_LBD=y
# CONFIG_CDROM_PKTCDVD is not set
#
# SCSI device support
#
+# CONFIG_RAID_ATTRS is not set
CONFIG_SCSI=y
# CONFIG_SCSI_PROC_FS is not set
#
# SCSI Transport Attributes
#
-# CONFIG_SCSI_SPI_ATTRS is not set
+CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_FC_ATTRS is not set
# CONFIG_SCSI_ISCSI_ATTRS is not set
# CONFIG_SCSI_SATA_AHCI is not set
# CONFIG_SCSI_SATA_SVW is not set
CONFIG_SCSI_ATA_PIIX=y
+# CONFIG_SCSI_SATA_MV is not set
# CONFIG_SCSI_SATA_NV is not set
# CONFIG_SCSI_SATA_PROMISE is not set
# CONFIG_SCSI_SATA_QSTOR is not set
#
# CONFIG_ARCNET is not set
+#
+# PHY device support
+#
+# CONFIG_PHYLIB is not set
+
#
# Ethernet (10 or 100Mbit)
#
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
# CONFIG_R8169 is not set
+# CONFIG_SIS190 is not set
# CONFIG_SKGE is not set
# CONFIG_SK98LIN is not set
# CONFIG_VIA_VELOCITY is not set
#
# Ethernet (10000 Mbit)
#
+# CONFIG_CHELSIO_T1 is not set
# CONFIG_IXGB is not set
CONFIG_S2IO=m
# CONFIG_S2IO_NAPI is not set
# I2C support
#
# CONFIG_I2C is not set
-# CONFIG_I2C_SENSOR is not set
#
# Dallas's 1-wire bus
# Hardware Monitoring support
#
CONFIG_HWMON=y
+# CONFIG_HWMON_VID is not set
# CONFIG_HWMON_DEBUG_CHIP is not set
#
#
# CONFIG_IBM_ASM is not set
+#
+# Multimedia Capabilities Port drivers
+#
+
#
# Multimedia devices
#
#
# USB Device Class drivers
#
-# CONFIG_USB_AUDIO is not set
+# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
# CONFIG_USB_BLUETOOTH_TTY is not set
-# CONFIG_USB_MIDI is not set
# CONFIG_USB_ACM is not set
CONFIG_USB_PRINTER=y
# CONFIG_USB_STORAGE_SDDR09 is not set
# CONFIG_USB_STORAGE_SDDR55 is not set
# CONFIG_USB_STORAGE_JUMPSHOT is not set
+# CONFIG_USB_STORAGE_ONETOUCH is not set
#
# USB Input Devices
# CONFIG_USB_MTOUCH is not set
# CONFIG_USB_ITMTOUCH is not set
# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_YEALINK is not set
# CONFIG_USB_XPAD is not set
# CONFIG_USB_ATI_REMOTE is not set
# CONFIG_USB_KEYSPAN_REMOTE is not set
# Firmware Drivers
#
# CONFIG_EDD is not set
+# CONFIG_DELL_RBU is not set
+CONFIG_DCDBAS=m
#
# File systems
# CONFIG_REISERFS_FS_SECURITY is not set
# CONFIG_JFS_FS is not set
CONFIG_FS_POSIX_ACL=y
-
-#
-# XFS support
-#
# CONFIG_XFS_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_ROMFS_FS is not set
CONFIG_DNOTIFY=y
CONFIG_AUTOFS_FS=y
# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
#
# CD-ROM/DVD Filesystems
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
CONFIG_SYSFS=y
-# CONFIG_DEVPTS_FS_XATTR is not set
CONFIG_TMPFS=y
-# CONFIG_TMPFS_XATTR is not set
CONFIG_HUGETLBFS=y
CONFIG_HUGETLB_PAGE=y
CONFIG_RAMFS=y
+# CONFIG_RELAYFS_FS is not set
#
# Miscellaneous filesystems
# CONFIG_NFSD_V3_ACL is not set
# CONFIG_NFSD_V4 is not set
CONFIG_NFSD_TCP=y
+CONFIG_ROOT_NFS=y
CONFIG_LOCKD=y
CONFIG_LOCKD_V4=y
CONFIG_EXPORTFS=y
# CONFIG_NCP_FS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set
+# CONFIG_9P_FS is not set
#
# Partition Types
CONFIG_DEBUG_KERNEL=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_LOG_BUF_SHIFT=18
+CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_SCHEDSTATS is not set
# CONFIG_DEBUG_SLAB is not set
# CONFIG_DEBUG_SPINLOCK is not set
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_INFO is not set
CONFIG_DEBUG_FS=y
+# CONFIG_FRAME_POINTER is not set
CONFIG_INIT_DEBUG=y
# CONFIG_IOMMU_DEBUG is not set
CONFIG_KPROBES=y
# Library routines
#
# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
CONFIG_CRC32=y
# CONFIG_LIBCRC32C is not set
* with the int 0x80 path.
*/
ENTRY(ia32_sysenter_target)
- CFI_STARTPROC
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,0
+ CFI_REGISTER rsp,rbp
swapgs
movq %gs:pda_kernelstack, %rsp
addq $(PDA_STACKOFFSET),%rsp
sti
movl %ebp,%ebp /* zero extension */
pushq $__USER32_DS
+ CFI_ADJUST_CFA_OFFSET 8
+ /*CFI_REL_OFFSET ss,0*/
pushq %rbp
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rsp,0
pushfq
+ CFI_ADJUST_CFA_OFFSET 8
+ /*CFI_REL_OFFSET rflags,0*/
movl $VSYSCALL32_SYSEXIT, %r10d
+ CFI_REGISTER rip,r10
pushq $__USER32_CS
+ CFI_ADJUST_CFA_OFFSET 8
+ /*CFI_REL_OFFSET cs,0*/
movl %eax, %eax
pushq %r10
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rip,0
pushq %rax
+ CFI_ADJUST_CFA_OFFSET 8
cld
SAVE_ARGS 0,0,1
/* no need to do an access_ok check here because rbp has been
.previous
GET_THREAD_INFO(%r10)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
+ CFI_REMEMBER_STATE
jnz sysenter_tracesys
sysenter_do_call:
cmpl $(IA32_NR_syscalls),%eax
andl $~0x200,EFLAGS-R11(%rsp)
RESTORE_ARGS 1,24,1,1,1,1
popfq
+ CFI_ADJUST_CFA_OFFSET -8
+ /*CFI_RESTORE rflags*/
popq %rcx /* User %esp */
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_REGISTER rsp,rcx
movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */
+ CFI_REGISTER rip,rdx
swapgs
sti /* sti only takes effect after the next instruction */
/* sysexit */
.byte 0xf, 0x35
sysenter_tracesys:
+ CFI_RESTORE_STATE
SAVE_REST
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* really needed? */
* with the int 0x80 path.
*/
ENTRY(ia32_cstar_target)
- CFI_STARTPROC
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,0
+ CFI_REGISTER rip,rcx
+ /*CFI_REGISTER rflags,r11*/
swapgs
movl %esp,%r8d
+ CFI_REGISTER rsp,r8
movq %gs:pda_kernelstack,%rsp
sti
SAVE_ARGS 8,1,1
movl %eax,%eax /* zero extension */
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
+ CFI_REL_OFFSET rip,RIP-ARGOFFSET
movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
movl %ebp,%ecx
movq $__USER32_CS,CS-ARGOFFSET(%rsp)
movq $__USER32_DS,SS-ARGOFFSET(%rsp)
movq %r11,EFLAGS-ARGOFFSET(%rsp)
+ /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
movq %r8,RSP-ARGOFFSET(%rsp)
+ CFI_REL_OFFSET rsp,RSP-ARGOFFSET
/* no need to do an access_ok check here because r8 has been
32bit zero extended */
/* hardware stack frame is complete now */
.previous
GET_THREAD_INFO(%r10)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
+ CFI_REMEMBER_STATE
jnz cstar_tracesys
cstar_do_call:
cmpl $IA32_NR_syscalls,%eax
jnz int_ret_from_sys_call
RESTORE_ARGS 1,-ARG_SKIP,1,1,1
movl RIP-ARGOFFSET(%rsp),%ecx
+ CFI_REGISTER rip,rcx
movl EFLAGS-ARGOFFSET(%rsp),%r11d
+ /*CFI_REGISTER rflags,r11*/
movl RSP-ARGOFFSET(%rsp),%esp
+ CFI_RESTORE rsp
swapgs
sysretl
cstar_tracesys:
+ CFI_RESTORE_STATE
SAVE_REST
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* really needed? */
*/
ENTRY(ia32_syscall)
- CFI_STARTPROC
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,SS+8-RIP
+ /*CFI_REL_OFFSET ss,SS-RIP*/
+ CFI_REL_OFFSET rsp,RSP-RIP
+ /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
+ /*CFI_REL_OFFSET cs,CS-RIP*/
+ CFI_REL_OFFSET rip,RIP-RIP
swapgs
sti
movl %eax,%eax
pushq %rax
+ CFI_ADJUST_CFA_OFFSET 8
cld
/* note the registers are not zero extended to the sf.
this could be a problem. */
jmp ia32_ptregs_common
.endm
+ CFI_STARTPROC
+
PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx
ENTRY(ia32_ptregs_common)
- CFI_STARTPROC
popq %r11
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_REGISTER rip, r11
SAVE_REST
call *%rax
RESTORE_REST
ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count);
set_fs(old_fs);
- if (!ret && offset && put_user(of, offset))
+ if (offset && put_user(of, offset))
return -EFAULT;
return ret;
intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
quirks-y += ../../i386/kernel/quirks.o
i8237-y += ../../i386/kernel/i8237.o
+msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/bootmem.h>
-#include <linux/irq.h>
#include <linux/acpi.h>
#include <asm/mpspec.h>
#include <asm/io.h>
if (aper_alloc) {
/* Got the aperture from the AGP bridge */
+ } else if (swiotlb && !valid_agp) {
+ /* Do nothing */
} else if ((!no_iommu && end_pfn >= 0xffffffff>>PAGE_SHIFT) ||
force_iommu ||
valid_agp ||
#include <linux/init.h>
#include <linux/mm.h>
-#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/bootmem.h>
#include <linux/smp_lock.h>
if (maxlvt >= 4)
apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
v = GET_APIC_VERSION(apic_read(APIC_LVR));
- if (APIC_INTEGRATED(v)) { /* !82489DX */
- if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- }
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
}
void __init connect_bsp_APIC(void)
*/
apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
value = APIC_DM_NMI;
- if (!APIC_INTEGRATED(ver)) /* 82489DX */
- value |= APIC_LVT_LEVEL_TRIGGER;
apic_write_around(APIC_LVT1, value);
}
{
unsigned int value, ver, maxlvt;
- /* Pound the ESR really hard over the head with a big hammer - mbligh */
- if (esr_disable) {
- apic_write(APIC_ESR, 0);
- apic_write(APIC_ESR, 0);
- apic_write(APIC_ESR, 0);
- apic_write(APIC_ESR, 0);
- }
-
value = apic_read(APIC_LVR);
ver = GET_APIC_VERSION(value);
value = APIC_DM_NMI;
else
value = APIC_DM_NMI | APIC_LVT_MASKED;
- if (!APIC_INTEGRATED(ver)) /* 82489DX */
- value |= APIC_LVT_LEVEL_TRIGGER;
apic_write_around(APIC_LVT1, value);
- if (APIC_INTEGRATED(ver) && !esr_disable) { /* !82489DX */
+ {
unsigned oldvalue;
maxlvt = get_maxlvt();
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
oldvalue = apic_read(APIC_ESR);
value = ERROR_APIC_VECTOR; // enables sending errors
apic_write_around(APIC_LVTERR, value);
apic_printk(APIC_VERBOSE,
"ESR value after enabling vector: %08x, after %08x\n",
oldvalue, value);
- } else {
- if (esr_disable)
- /*
- * Something untraceble is creating bad interrupts on
- * secondary quads ... for the moment, just leave the
- * ESR disabled - we can't do anything useful with the
- * errors anyway - mbligh
- */
- apic_printk(APIC_DEBUG, "Leaving ESR disabled.\n");
- else
- apic_printk(APIC_DEBUG, "No ESR for 82489DX.\n");
}
nmi_watchdog_default();
* Fetch the APIC ID of the BSP in case we have a
* default configuration (or the MP table is broken).
*/
- if (boot_cpu_id == -1U)
- boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
+ boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
#ifdef CONFIG_X86_IO_APIC
{
ver = GET_APIC_VERSION(apic_read(APIC_LVR));
lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
- if (!APIC_INTEGRATED(ver))
- lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
apic_write_around(APIC_LVTT, lvtt_value);
/*
connect_bsp_APIC();
- phys_cpu_present_map = physid_mask_of_physid(0);
+ phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
apic_write_around(APIC_ID, boot_cpu_id);
setup_local_APIC();
ENTRY(kernelstack);
ENTRY(oldrsp);
ENTRY(pcurrent);
- ENTRY(irqrsp);
ENTRY(irqcount);
ENTRY(cpunumber);
ENTRY(irqstackptr);
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/smp.h>
-#include <linux/irq.h>
#include <linux/reboot.h>
#include <linux/kexec.h>
if (ei->type != E820_RAM ||
ei->addr+ei->size <= start ||
- ei->addr > end)
+ ei->addr >= end)
continue;
addr = round_up(ei->addr, PAGE_SIZE);
#include <linux/tty.h>
#include <asm/io.h>
#include <asm/processor.h>
+#include <asm/fcntl.h>
/* Simple VGA output */
.index = -1,
};
+/* Console interface to a host file on AMD's SimNow! */
+
+static int simnow_fd;
+
+enum {
+ MAGIC1 = 0xBACCD00A,
+ MAGIC2 = 0xCA110000,
+ XOPEN = 5,
+ XWRITE = 4,
+};
+
+static noinline long simnow(long cmd, long a, long b, long c)
+{
+ long ret;
+ asm volatile("cpuid" :
+ "=a" (ret) :
+ "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2));
+ return ret;
+}
+
+void __init simnow_init(char *str)
+{
+ char *fn = "klog";
+ if (*str == '=')
+ fn = ++str;
+ /* error ignored */
+ simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644);
+}
+
+static void simnow_write(struct console *con, const char *s, unsigned n)
+{
+ simnow(XWRITE, simnow_fd, (unsigned long)s, n);
+}
+
+static struct console simnow_console = {
+ .name = "simnow",
+ .write = simnow_write,
+ .flags = CON_PRINTBUFFER,
+ .index = -1,
+};
+
/* Direct interface for emergencies */
struct console *early_console = &early_vga_console;
static int early_console_initialized = 0;
max_xpos = SCREEN_INFO.orig_video_cols;
max_ypos = SCREEN_INFO.orig_video_lines;
early_console = &early_vga_console;
+ } else if (!strncmp(buf, "simnow", 6)) {
+ simnow_init(buf + 6);
+ early_console = &simnow_console;
+ keep_early = 1;
}
early_console_initialized = 1;
register_console(early_console);
xorl %eax, %eax
pushq %rax /* ss */
CFI_ADJUST_CFA_OFFSET 8
+ /*CFI_REL_OFFSET ss,0*/
pushq %rax /* rsp */
CFI_ADJUST_CFA_OFFSET 8
- CFI_OFFSET rip,0
+ CFI_REL_OFFSET rsp,0
pushq $(1<<9) /* eflags - interrupts on */
CFI_ADJUST_CFA_OFFSET 8
+ /*CFI_REL_OFFSET rflags,0*/
pushq $__KERNEL_CS /* cs */
CFI_ADJUST_CFA_OFFSET 8
+ /*CFI_REL_OFFSET cs,0*/
pushq \child_rip /* rip */
CFI_ADJUST_CFA_OFFSET 8
- CFI_OFFSET rip,0
+ CFI_REL_OFFSET rip,0
pushq %rax /* orig rax */
CFI_ADJUST_CFA_OFFSET 8
.endm
CFI_ADJUST_CFA_OFFSET -(6*8)
.endm
- .macro CFI_DEFAULT_STACK
- CFI_ADJUST_CFA_OFFSET (SS)
- CFI_OFFSET r15,R15-SS
- CFI_OFFSET r14,R14-SS
- CFI_OFFSET r13,R13-SS
- CFI_OFFSET r12,R12-SS
- CFI_OFFSET rbp,RBP-SS
- CFI_OFFSET rbx,RBX-SS
- CFI_OFFSET r11,R11-SS
- CFI_OFFSET r10,R10-SS
- CFI_OFFSET r9,R9-SS
- CFI_OFFSET r8,R8-SS
- CFI_OFFSET rax,RAX-SS
- CFI_OFFSET rcx,RCX-SS
- CFI_OFFSET rdx,RDX-SS
- CFI_OFFSET rsi,RSI-SS
- CFI_OFFSET rdi,RDI-SS
- CFI_OFFSET rsp,RSP-SS
- CFI_OFFSET rip,RIP-SS
+ .macro CFI_DEFAULT_STACK start=1
+ .if \start
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,SS+8
+ .else
+ CFI_DEF_CFA_OFFSET SS+8
+ .endif
+ CFI_REL_OFFSET r15,R15
+ CFI_REL_OFFSET r14,R14
+ CFI_REL_OFFSET r13,R13
+ CFI_REL_OFFSET r12,R12
+ CFI_REL_OFFSET rbp,RBP
+ CFI_REL_OFFSET rbx,RBX
+ CFI_REL_OFFSET r11,R11
+ CFI_REL_OFFSET r10,R10
+ CFI_REL_OFFSET r9,R9
+ CFI_REL_OFFSET r8,R8
+ CFI_REL_OFFSET rax,RAX
+ CFI_REL_OFFSET rcx,RCX
+ CFI_REL_OFFSET rdx,RDX
+ CFI_REL_OFFSET rsi,RSI
+ CFI_REL_OFFSET rdi,RDI
+ CFI_REL_OFFSET rip,RIP
+ /*CFI_REL_OFFSET cs,CS*/
+ /*CFI_REL_OFFSET rflags,EFLAGS*/
+ CFI_REL_OFFSET rsp,RSP
+ /*CFI_REL_OFFSET ss,SS*/
.endm
/*
* A newly forked process directly context switches into this.
*/
/* rdi: prev */
ENTRY(ret_from_fork)
- CFI_STARTPROC
CFI_DEFAULT_STACK
call schedule_tail
GET_THREAD_INFO(%rcx)
*/
ENTRY(system_call)
- CFI_STARTPROC
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,0
+ CFI_REGISTER rip,rcx
+ /*CFI_REGISTER rflags,r11*/
swapgs
movq %rsp,%gs:pda_oldrsp
movq %gs:pda_kernelstack,%rsp
sti
SAVE_ARGS 8,1
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
- movq %rcx,RIP-ARGOFFSET(%rsp)
+ movq %rcx,RIP-ARGOFFSET(%rsp)
+ CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
+ CFI_REMEMBER_STATE
jnz tracesys
cmpq $__NR_syscall_max,%rax
ja badsys
cli
movl threadinfo_flags(%rcx),%edx
andl %edi,%edx
+ CFI_REMEMBER_STATE
jnz sysret_careful
movq RIP-ARGOFFSET(%rsp),%rcx
+ CFI_REGISTER rip,rcx
RESTORE_ARGS 0,-ARG_SKIP,1
+ /*CFI_REGISTER rflags,r11*/
movq %gs:pda_oldrsp,%rsp
swapgs
sysretq
/* Handle reschedules */
/* edx: work, edi: workmask */
sysret_careful:
+ CFI_RESTORE_STATE
bt $TIF_NEED_RESCHED,%edx
jnc sysret_signal
sti
pushq %rdi
+ CFI_ADJUST_CFA_OFFSET 8
call schedule
popq %rdi
+ CFI_ADJUST_CFA_OFFSET -8
jmp sysret_check
/* Handle a signal */
1: movl $_TIF_NEED_RESCHED,%edi
jmp sysret_check
+badsys:
+ movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
+ jmp ret_from_sys_call
+
/* Do syscall tracing */
tracesys:
+ CFI_RESTORE_STATE
SAVE_REST
movq $-ENOSYS,RAX(%rsp)
FIXUP_TOP_OF_STACK %rdi
RESTORE_TOP_OF_STACK %rbx
RESTORE_REST
jmp ret_from_sys_call
+ CFI_ENDPROC
-badsys:
- movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
- jmp ret_from_sys_call
-
/*
* Syscall return path ending with IRET.
* Has correct top of stack, but partial stack frame.
*/
-ENTRY(int_ret_from_sys_call)
+ENTRY(int_ret_from_sys_call)
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,SS+8-ARGOFFSET
+ /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
+ CFI_REL_OFFSET rsp,RSP-ARGOFFSET
+ /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
+ /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
+ CFI_REL_OFFSET rip,RIP-ARGOFFSET
+ CFI_REL_OFFSET rdx,RDX-ARGOFFSET
+ CFI_REL_OFFSET rcx,RCX-ARGOFFSET
+ CFI_REL_OFFSET rax,RAX-ARGOFFSET
+ CFI_REL_OFFSET rdi,RDI-ARGOFFSET
+ CFI_REL_OFFSET rsi,RSI-ARGOFFSET
+ CFI_REL_OFFSET r8,R8-ARGOFFSET
+ CFI_REL_OFFSET r9,R9-ARGOFFSET
+ CFI_REL_OFFSET r10,R10-ARGOFFSET
+ CFI_REL_OFFSET r11,R11-ARGOFFSET
cli
testl $3,CS-ARGOFFSET(%rsp)
je retint_restore_args
jnc int_very_careful
sti
pushq %rdi
+ CFI_ADJUST_CFA_OFFSET 8
call schedule
popq %rdi
+ CFI_ADJUST_CFA_OFFSET -8
cli
jmp int_with_check
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
jz int_signal
pushq %rdi
+ CFI_ADJUST_CFA_OFFSET 8
leaq 8(%rsp),%rdi # &ptregs -> arg1
call syscall_trace_leave
popq %rdi
+ CFI_ADJUST_CFA_OFFSET -8
andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
cli
jmp int_restore_rest
jmp ptregscall_common
.endm
+ CFI_STARTPROC
+
PTREGSCALL stub_clone, sys_clone, %r8
PTREGSCALL stub_fork, sys_fork, %rdi
PTREGSCALL stub_vfork, sys_vfork, %rdi
PTREGSCALL stub_iopl, sys_iopl, %rsi
ENTRY(ptregscall_common)
- CFI_STARTPROC
popq %r11
- CFI_ADJUST_CFA_OFFSET -8
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_REGISTER rip, r11
SAVE_REST
movq %r11, %r15
+ CFI_REGISTER rip, r15
FIXUP_TOP_OF_STACK %r11
call *%rax
RESTORE_TOP_OF_STACK %r11
movq %r15, %r11
+ CFI_REGISTER rip, r11
RESTORE_REST
pushq %r11
- CFI_ADJUST_CFA_OFFSET 8
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rip, 0
ret
CFI_ENDPROC
ENTRY(stub_execve)
CFI_STARTPROC
popq %r11
- CFI_ADJUST_CFA_OFFSET -8
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_REGISTER rip, r11
SAVE_REST
movq %r11, %r15
+ CFI_REGISTER rip, r15
FIXUP_TOP_OF_STACK %r11
call sys_execve
GET_THREAD_INFO(%rcx)
bt $TIF_IA32,threadinfo_flags(%rcx)
+ CFI_REMEMBER_STATE
jc exec_32bit
RESTORE_TOP_OF_STACK %r11
movq %r15, %r11
+ CFI_REGISTER rip, r11
RESTORE_REST
- push %r11
+ pushq %r11
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rip, 0
ret
exec_32bit:
- CFI_ADJUST_CFA_OFFSET REST_SKIP
+ CFI_RESTORE_STATE
movq %rax,RAX(%rsp)
RESTORE_REST
jmp int_ret_from_sys_call
*/
ENTRY(stub_rt_sigreturn)
CFI_STARTPROC
- addq $8, %rsp
+ addq $8, %rsp
+ CFI_ADJUST_CFA_OFFSET -8
SAVE_REST
movq %rsp,%rdi
FIXUP_TOP_OF_STACK %r11
jmp int_ret_from_sys_call
CFI_ENDPROC
+/*
+ * initial frame state for interrupts and exceptions
+ */
+ .macro _frame ref
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,SS+8-\ref
+ /*CFI_REL_OFFSET ss,SS-\ref*/
+ CFI_REL_OFFSET rsp,RSP-\ref
+ /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
+ /*CFI_REL_OFFSET cs,CS-\ref*/
+ CFI_REL_OFFSET rip,RIP-\ref
+ .endm
+
+/* initial frame state for interrupts (and exceptions without error code) */
+#define INTR_FRAME _frame RIP
+/* initial frame state for exceptions with error code (and interrupts with
+ vector already pushed) */
+#define XCPT_FRAME _frame ORIG_RAX
+
/*
* Interrupt entry/exit.
*
/* 0(%rsp): interrupt number */
.macro interrupt func
- CFI_STARTPROC simple
- CFI_DEF_CFA rsp,(SS-RDI)
- CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
- CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
cld
#ifdef CONFIG_DEBUG_INFO
SAVE_ALL
swapgs
1: incl %gs:pda_irqcount # RED-PEN should check preempt count
movq %gs:pda_irqstackptr,%rax
- cmoveq %rax,%rsp
+ cmoveq %rax,%rsp /*todo This needs CFI annotation! */
pushq %rdi # save old stack
+ CFI_ADJUST_CFA_OFFSET 8
call \func
.endm
ENTRY(common_interrupt)
+ XCPT_FRAME
interrupt do_IRQ
/* 0(%rsp): oldrsp-ARGOFFSET */
-ret_from_intr:
+ret_from_intr:
popq %rdi
+ CFI_ADJUST_CFA_OFFSET -8
cli
decl %gs:pda_irqcount
#ifdef CONFIG_DEBUG_INFO
movq RBP(%rdi),%rbp
+ CFI_DEF_CFA_REGISTER rsp
#endif
- leaq ARGOFFSET(%rdi),%rsp
-exit_intr:
+ leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
+exit_intr:
GET_THREAD_INFO(%rcx)
testl $3,CS-ARGOFFSET(%rsp)
je retint_kernel
*/
retint_with_reschedule:
movl $_TIF_WORK_MASK,%edi
-retint_check:
+retint_check:
movl threadinfo_flags(%rcx),%edx
andl %edi,%edx
+ CFI_REMEMBER_STATE
jnz retint_careful
retint_swapgs:
swapgs
jmp do_exit
.previous
- /* edi: workmask, edx: work */
+ /* edi: workmask, edx: work */
retint_careful:
+ CFI_RESTORE_STATE
bt $TIF_NEED_RESCHED,%edx
jnc retint_signal
sti
pushq %rdi
+ CFI_ADJUST_CFA_OFFSET 8
call schedule
popq %rdi
+ CFI_ADJUST_CFA_OFFSET -8
GET_THREAD_INFO(%rcx)
cli
jmp retint_check
* APIC interrupts.
*/
.macro apicinterrupt num,func
+ INTR_FRAME
pushq $\num-256
+ CFI_ADJUST_CFA_OFFSET 8
interrupt \func
jmp ret_from_intr
CFI_ENDPROC
ENTRY(reschedule_interrupt)
apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
-ENTRY(invalidate_interrupt)
- apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt
+ .macro INVALIDATE_ENTRY num
+ENTRY(invalidate_interrupt\num)
+ apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
+ .endm
+
+ INVALIDATE_ENTRY 0
+ INVALIDATE_ENTRY 1
+ INVALIDATE_ENTRY 2
+ INVALIDATE_ENTRY 3
+ INVALIDATE_ENTRY 4
+ INVALIDATE_ENTRY 5
+ INVALIDATE_ENTRY 6
+ INVALIDATE_ENTRY 7
ENTRY(call_function_interrupt)
apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
* Exception entry points.
*/
.macro zeroentry sym
+ INTR_FRAME
pushq $0 /* push error code/oldrax */
+ CFI_ADJUST_CFA_OFFSET 8
pushq %rax /* push real oldrax to the rdi slot */
+ CFI_ADJUST_CFA_OFFSET 8
leaq \sym(%rip),%rax
jmp error_entry
+ CFI_ENDPROC
.endm
.macro errorentry sym
+ XCPT_FRAME
pushq %rax
+ CFI_ADJUST_CFA_OFFSET 8
leaq \sym(%rip),%rax
jmp error_entry
+ CFI_ENDPROC
.endm
/* error code is on the stack already */
* and the exception handler in %rax.
*/
ENTRY(error_entry)
- CFI_STARTPROC simple
- CFI_DEF_CFA rsp,(SS-RDI)
- CFI_REL_OFFSET rsp,(RSP-RDI)
- CFI_REL_OFFSET rip,(RIP-RDI)
+ _frame RDI
/* rdi slot contains rax, oldrax contains error code */
cld
subq $14*8,%rsp
/* Reload gs selector with exception handling */
/* edi: new selector */
ENTRY(load_gs_index)
+ CFI_STARTPROC
pushf
+ CFI_ADJUST_CFA_OFFSET 8
cli
swapgs
gs_change:
2: mfence /* workaround */
swapgs
popf
+ CFI_ADJUST_CFA_OFFSET -8
ret
+ CFI_ENDPROC
.section __ex_table,"a"
.align 8
/* runs on exception stack */
KPROBE_ENTRY(debug)
- CFI_STARTPROC
+ INTR_FRAME
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_debug
/* runs on exception stack */
ENTRY(nmi)
- CFI_STARTPROC
+ INTR_FRAME
pushq $-1
- CFI_ADJUST_CFA_OFFSET 8
+ CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_nmi
/*
* "Paranoid" exit path from exception stack.
/* runs on exception stack */
ENTRY(double_fault)
- CFI_STARTPROC
+ XCPT_FRAME
paranoidentry do_double_fault
jmp paranoid_exit
CFI_ENDPROC
/* runs on exception stack */
ENTRY(stack_segment)
- CFI_STARTPROC
+ XCPT_FRAME
paranoidentry do_stack_segment
jmp paranoid_exit
CFI_ENDPROC
#ifdef CONFIG_X86_MCE
/* runs on exception stack */
ENTRY(machine_check)
- CFI_STARTPROC
+ INTR_FRAME
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_machine_check
zeroentry do_call_debug
ENTRY(call_softirq)
+ CFI_STARTPROC
movq %gs:pda_irqstackptr,%rax
pushq %r15
+ CFI_ADJUST_CFA_OFFSET 8
movq %rsp,%r15
+ CFI_DEF_CFA_REGISTER r15
incl %gs:pda_irqcount
cmove %rax,%rsp
call __do_softirq
movq %r15,%rsp
+ CFI_DEF_CFA_REGISTER rsp
decl %gs:pda_irqcount
popq %r15
+ CFI_ADJUST_CFA_OFFSET -8
ret
-
+ CFI_ENDPROC
u8 clusters, max_cluster;
u8 id;
u8 cluster_cnt[NUM_APIC_CLUSTERS];
- int num_cpus = 0;
+ int max_apic = 0;
#if defined(CONFIG_ACPI)
/*
id = bios_cpu_apicid[i];
if (id == BAD_APICID)
continue;
- num_cpus++;
+ if (id > max_apic)
+ max_apic = id;
cluster_cnt[APIC_CLUSTERID(id)]++;
}
/* Don't use clustered mode on AMD platforms. */
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
genapic = &apic_physflat;
-#ifndef CONFIG_CPU_HOTPLUG
+#ifndef CONFIG_HOTPLUG_CPU
/* In the CPU hotplug case we cannot use broadcast mode
because that opens a race when a CPU is removed.
Stay at physflat mode in this case.
we have ACPI platform support for CPU hotplug
we should detect hotplug capablity from ACPI tables and
only do this when really needed. -AK */
- if (num_cpus <= 8)
+ if (max_apic <= 8)
genapic = &apic_flat;
#endif
goto print;
* (We don't use lowest priority delivery + HW APIC IRQ steering, so
* can ignore the clustered logical case and go straight to physical.)
*/
- if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster)
+ if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) {
+#ifdef CONFIG_HOTPLUG_CPU
+ /* Don't use APIC shortcuts in CPU hotplug to avoid races */
+ genapic = &apic_physflat;
+#else
genapic = &apic_flat;
- else
+#endif
+ } else
genapic = &apic_cluster;
print:
count = 3;
id = my_cluster | (1UL << count);
x86_cpu_to_log_apicid[smp_processor_id()] = id;
- apic_write_around(APIC_DFR, APIC_DFR_CLUSTER);
+ apic_write(APIC_DFR, APIC_DFR_CLUSTER);
val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
val |= SET_APIC_LOGICAL_ID(id);
- apic_write_around(APIC_LDR, val);
+ apic_write(APIC_LDR, val);
}
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
num = smp_processor_id();
id = 1UL << num;
x86_cpu_to_log_apicid[num] = id;
- apic_write_around(APIC_DFR, APIC_DFR_FLAT);
+ apic_write(APIC_DFR, APIC_DFR_FLAT);
val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
val |= SET_APIC_LOGICAL_ID(id);
- apic_write_around(APIC_LDR, val);
+ apic_write(APIC_LDR, val);
}
static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
* prepare target chip field
*/
cfg = __prepare_ICR2(mask);
- apic_write_around(APIC_ICR2, cfg);
+ apic_write(APIC_ICR2, cfg);
/*
* program the ICR
/*
* Send the IPI. The write to APIC_ICR fires this off.
*/
- apic_write_around(APIC_ICR, cfg);
+ apic_write(APIC_ICR, cfg);
local_irq_restore(flags);
}
struct genapic apic_physflat = {
.name = "physical flat",
- .int_delivery_mode = dest_LowestPrio,
+ .int_delivery_mode = dest_Fixed,
.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
- .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_LOWEST,
+ .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_FIXED,
.target_cpus = physflat_target_cpus,
.apic_id_registered = flat_apic_id_registered,
.init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/
.org 0x4000
ENTRY(level2_ident_pgt)
/* 40MB for bootup. */
- .quad 0x0000000000000283
+ .quad 0x0000000000000183
.quad 0x0000000000200183
.quad 0x0000000000400183
.quad 0x0000000000600183
#include <asm/atomic.h>
#include <asm/system.h>
#include <asm/io.h>
-#include <asm/irq.h>
#include <asm/hw_irq.h>
#include <asm/pgtable.h>
#include <asm/delay.h>
#include <asm/desc.h>
#include <asm/apic.h>
-#include <linux/irq.h>
-
/*
* Common place to define all x86 IRQ vectors
*
void error_interrupt(void);
void reschedule_interrupt(void);
void call_function_interrupt(void);
-void invalidate_interrupt(void);
+void invalidate_interrupt0(void);
+void invalidate_interrupt1(void);
+void invalidate_interrupt2(void);
+void invalidate_interrupt3(void);
+void invalidate_interrupt4(void);
+void invalidate_interrupt5(void);
+void invalidate_interrupt6(void);
+void invalidate_interrupt7(void);
void thermal_interrupt(void);
void i8254_timer_resume(void);
*/
set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
- /* IPI for invalidation */
- set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+ /* IPIs for invalidation */
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
/* IPI for generic function call */
set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
-DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp;
+DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS;
#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
*/
#include <linux/mm.h>
-#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/delay.h>
static int no_timer_check;
+int disable_timer_pin_1 __initdata;
+
static DEFINE_SPINLOCK(ioapic_lock);
/*
#endif
/* RED-PEN skip them on mptables too? */
return;
+ case PCI_VENDOR_ID_ATI:
+ /* All timer interrupts on atiixp
+ are doubled. Disable one. */
+ if (disable_timer_pin_1 == 0) {
+ disable_timer_pin_1 = 1;
+ printk(KERN_INFO
+ "ATI board detected. Disabling timer pin 1.\n");
+ }
+ return;
}
/* No multi-function device? */
v = apic_read(APIC_TASKPRI);
printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
- v = apic_read(APIC_ARBPRI);
- printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
- v & APIC_ARBPRI_MASK);
- v = apic_read(APIC_PROCPRI);
- printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
- }
+ v = apic_read(APIC_ARBPRI);
+ printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+ v & APIC_ARBPRI_MASK);
+ v = apic_read(APIC_PROCPRI);
+ printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
v = apic_read(APIC_EOI);
printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
printk(KERN_DEBUG "... APIC IRR field:\n");
print_APIC_bitfield(APIC_IRR);
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
- v = apic_read(APIC_ESR);
- printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
- }
+ v = apic_read(APIC_ESR);
+ printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
v = apic_read(APIC_ICR);
printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
setup_nmi();
enable_8259A_irq(0);
}
+ if (disable_timer_pin_1 > 0)
+ clear_IO_APIC_pin(0, pin1);
return;
}
clear_IO_APIC_pin(0, pin1);
unsigned irq = regs->orig_rax & 0xff;
irq_enter();
- BUG_ON(irq > 256);
__do_IRQ(irq, regs);
irq_exit();
#include <linux/fs.h>
#include <linux/cpu.h>
#include <linux/percpu.h>
+#include <linux/ctype.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/mce.h>
smp_wmb();
for (;;) {
entry = rcu_dereference(mcelog.next);
- /* When the buffer fills up discard new entries. Assume
- that the earlier errors are the more interesting. */
- if (entry >= MCE_LOG_LEN) {
- set_bit(MCE_OVERFLOW, &mcelog.flags);
- return;
+ for (;;) {
+ /* When the buffer fills up discard new entries. Assume
+ that the earlier errors are the more interesting. */
+ if (entry >= MCE_LOG_LEN) {
+ set_bit(MCE_OVERFLOW, &mcelog.flags);
+ return;
+ }
+ /* Old left over entry. Skip. */
+ if (mcelog.entry[entry].finished) {
+ entry++;
+ continue;
+ }
}
- /* Old left over entry. Skip. */
- if (mcelog.entry[entry].finished)
- continue;
smp_rmb();
next = entry + 1;
if (cmpxchg(&mcelog.next, entry, next) == entry)
}
err = 0;
- for (i = 0; i < next; i++) {
- if (!mcelog.entry[i].finished)
- continue;
+ for (i = 0; i < next; i++) {
+ unsigned long start = jiffies;
+ while (!mcelog.entry[i].finished) {
+ if (!time_before(jiffies, start + 2)) {
+ memset(mcelog.entry + i,0, sizeof(struct mce));
+ continue;
+ }
+ cpu_relax();
+ }
smp_rmb();
err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
buf += sizeof(struct mce);
/* mce=off disables machine check. Note you can reenable it later
using sysfs.
+ mce=TOLERANCELEVEL (number, see above)
mce=bootlog Log MCEs from before booting. Disabled by default to work
around buggy BIOS that leave bogus MCEs. */
static int __init mcheck_enable(char *str)
mce_dont_init = 1;
else if (!strcmp(str, "bootlog"))
mce_bootlog = 1;
+ else if (isdigit(str[0]))
+ get_option(&str, &tolerant);
else
printk("mce= argument %s ignored. Please use /sys", str);
return 0;
* Sysfs support
*/
-/* On resume clear all MCE state. Don't want to see leftovers from the BIOS. */
+/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
+ Only one CPU is active at this time, the others get readded later using
+ CPU hotplug. */
static int mce_resume(struct sys_device *dev)
{
- on_each_cpu(mce_init, NULL, 1, 1);
+ mce_init(NULL);
return 0;
}
*/
#include <linux/mm.h>
-#include <linux/irq.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/config.h>
int apic_version [MAX_APICS];
unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
-unsigned char pci_bus_to_node [256];
-EXPORT_SYMBOL(pci_bus_to_node);
static int mp_current_pci_id = 0;
/* I/O APIC entries */
processor.mpc_type = MP_PROCESSOR;
processor.mpc_apicid = id;
- processor.mpc_apicver = 0x10; /* TBD: lapic version */
+ processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+++ /dev/null
-/* ----------------------------------------------------------------------- *
- *
- * Copyright 2000 H. Peter Anvin - All Rights Reserved
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
- * USA; either version 2 of the License, or (at your option) any later
- * version; incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
-
-/*
- * msr.c
- *
- * x86 MSR access device
- *
- * This device is accessed by lseek() to the appropriate register number
- * and then read/write in chunks of 8 bytes. A larger size means multiple
- * reads or writes of the same register.
- *
- * This driver uses /dev/cpu/%d/msr where %d is the minor number, and on
- * an SMP box will direct the access to CPU %d.
- */
-
-#include <linux/module.h>
-#include <linux/config.h>
-
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/major.h>
-#include <linux/fs.h>
-
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-/* Note: "err" is handled in a funny way below. Otherwise one version
- of gcc or another breaks. */
-
-static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx)
-{
- int err;
-
- asm volatile ("1: wrmsr\n"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl %4,%0\n"
- " jmp 2b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n" " .quad 1b,3b\n" ".previous":"=&bDS" (err)
- :"a"(eax), "d"(edx), "c"(reg), "i"(-EIO), "0"(0));
-
- return err;
-}
-
-static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx)
-{
- int err;
-
- asm volatile ("1: rdmsr\n"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl %4,%0\n"
- " jmp 2b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n"
- " .quad 1b,3b\n"
- ".previous":"=&bDS" (err), "=a"(*eax), "=d"(*edx)
- :"c"(reg), "i"(-EIO), "0"(0));
-
- return err;
-}
-
-#ifdef CONFIG_SMP
-
-struct msr_command {
- int cpu;
- int err;
- u32 reg;
- u32 data[2];
-};
-
-static void msr_smp_wrmsr(void *cmd_block)
-{
- struct msr_command *cmd = (struct msr_command *)cmd_block;
-
- if (cmd->cpu == smp_processor_id())
- cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]);
-}
-
-static void msr_smp_rdmsr(void *cmd_block)
-{
- struct msr_command *cmd = (struct msr_command *)cmd_block;
-
- if (cmd->cpu == smp_processor_id())
- cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]);
-}
-
-static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
-{
- struct msr_command cmd;
- int ret;
-
- preempt_disable();
- if (cpu == smp_processor_id()) {
- ret = wrmsr_eio(reg, eax, edx);
- } else {
- cmd.cpu = cpu;
- cmd.reg = reg;
- cmd.data[0] = eax;
- cmd.data[1] = edx;
-
- smp_call_function(msr_smp_wrmsr, &cmd, 1, 1);
- ret = cmd.err;
- }
- preempt_enable();
- return ret;
-}
-
-static inline int do_rdmsr(int cpu, u32 reg, u32 * eax, u32 * edx)
-{
- struct msr_command cmd;
- int ret;
-
- preempt_disable();
- if (cpu == smp_processor_id()) {
- ret = rdmsr_eio(reg, eax, edx);
- } else {
- cmd.cpu = cpu;
- cmd.reg = reg;
-
- smp_call_function(msr_smp_rdmsr, &cmd, 1, 1);
-
- *eax = cmd.data[0];
- *edx = cmd.data[1];
-
- ret = cmd.err;
- }
- preempt_enable();
- return ret;
-}
-
-#else /* ! CONFIG_SMP */
-
-static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
-{
- return wrmsr_eio(reg, eax, edx);
-}
-
-static inline int do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx)
-{
- return rdmsr_eio(reg, eax, edx);
-}
-
-#endif /* ! CONFIG_SMP */
-
-static loff_t msr_seek(struct file *file, loff_t offset, int orig)
-{
- loff_t ret = -EINVAL;
-
- lock_kernel();
- switch (orig) {
- case 0:
- file->f_pos = offset;
- ret = file->f_pos;
- break;
- case 1:
- file->f_pos += offset;
- ret = file->f_pos;
- }
- unlock_kernel();
- return ret;
-}
-
-static ssize_t msr_read(struct file *file, char __user * buf,
- size_t count, loff_t * ppos)
-{
- u32 __user *tmp = (u32 __user *) buf;
- u32 data[2];
- size_t rv;
- u32 reg = *ppos;
- int cpu = iminor(file->f_dentry->d_inode);
- int err;
-
- if (count % 8)
- return -EINVAL; /* Invalid chunk size */
-
- for (rv = 0; count; count -= 8) {
- err = do_rdmsr(cpu, reg, &data[0], &data[1]);
- if (err)
- return err;
- if (copy_to_user(tmp, &data, 8))
- return -EFAULT;
- tmp += 2;
- }
-
- return ((char __user *)tmp) - buf;
-}
-
-static ssize_t msr_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- const u32 __user *tmp = (const u32 __user *)buf;
- u32 data[2];
- size_t rv;
- u32 reg = *ppos;
- int cpu = iminor(file->f_dentry->d_inode);
- int err;
-
- if (count % 8)
- return -EINVAL; /* Invalid chunk size */
-
- for (rv = 0; count; count -= 8) {
- if (copy_from_user(&data, tmp, 8))
- return -EFAULT;
- err = do_wrmsr(cpu, reg, data[0], data[1]);
- if (err)
- return err;
- tmp += 2;
- }
-
- return ((char __user *)tmp) - buf;
-}
-
-static int msr_open(struct inode *inode, struct file *file)
-{
- unsigned int cpu = iminor(file->f_dentry->d_inode);
- struct cpuinfo_x86 *c = &(cpu_data)[cpu];
-
- if (cpu >= NR_CPUS || !cpu_online(cpu))
- return -ENXIO; /* No such CPU */
- if (!cpu_has(c, X86_FEATURE_MSR))
- return -EIO; /* MSR not supported */
-
- return 0;
-}
-
-/*
- * File operations we support
- */
-static struct file_operations msr_fops = {
- .owner = THIS_MODULE,
- .llseek = msr_seek,
- .read = msr_read,
- .write = msr_write,
- .open = msr_open,
-};
-
-static int __init msr_init(void)
-{
- if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) {
- printk(KERN_ERR "msr: unable to get major %d for msr\n",
- MSR_MAJOR);
- return -EBUSY;
- }
-
- return 0;
-}
-
-static void __exit msr_exit(void)
-{
- unregister_chrdev(MSR_MAJOR, "cpu/msr");
-}
-
-module_init(msr_init);
-module_exit(msr_exit)
-
-MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>");
-MODULE_DESCRIPTION("x86 generic MSR driver");
-MODULE_LICENSE("GPL");
#include <linux/config.h>
#include <linux/mm.h>
-#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/bootmem.h>
#include <linux/smp_lock.h>
== NOTIFY_STOP) {
local_set(&__get_cpu_var(alert_counter), 0);
return;
- }
- die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs);
+ }
+ die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs);
}
} else {
__get_cpu_var(last_irq_sum) = sum;
{
struct page *page;
int node;
- if (dev->bus == &pci_bus_type) {
- cpumask_t mask;
- mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
- node = cpu_to_node(first_cpu(mask));
- } else
+ if (dev->bus == &pci_bus_type)
+ node = pcibus_to_node(to_pci_dev(dev)->bus);
+ else
node = numa_node_id();
page = alloc_pages_node(node, gfp, order);
return page ? page_address(page) : NULL;
#include <linux/a.out.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
-#include <linux/irq.h>
#include <linux/ptrace.h>
#include <linux/utsname.h>
#include <linux/random.h>
: :
"i" (_TIF_NEED_RESCHED),
"m" (current_thread_info()->flags));
+ clear_thread_flag(TIF_POLLING_NRFLAG);
} else {
set_need_resched();
}
printk("\n");
print_modules();
- printk("Pid: %d, comm: %.20s %s %s\n",
- current->pid, current->comm, print_tainted(), system_utsname.release);
+ printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+ current->pid, current->comm, print_tainted(),
+ system_utsname.release,
+ (int)strcspn(system_utsname.version, " "),
+ system_utsname.version);
printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
printk_address(regs->rip);
printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
return err;
}
-/*
- * This function selects if the context switch from prev to next
- * has to tweak the TSC disable bit in the cr4.
- */
-static inline void disable_tsc(struct task_struct *prev_p,
- struct task_struct *next_p)
-{
- struct thread_info *prev, *next;
-
- /*
- * gcc should eliminate the ->thread_info dereference if
- * has_secure_computing returns 0 at compile time (SECCOMP=n).
- */
- prev = prev_p->thread_info;
- next = next_p->thread_info;
-
- if (has_secure_computing(prev) || has_secure_computing(next)) {
- /* slow path here */
- if (has_secure_computing(prev) &&
- !has_secure_computing(next)) {
- write_cr4(read_cr4() & ~X86_CR4_TSD);
- } else if (!has_secure_computing(prev) &&
- has_secure_computing(next))
- write_cr4(read_cr4() | X86_CR4_TSD);
- }
-}
-
/*
* This special macro can be used to load a debugging register
*/
}
}
- disable_tsc(prev_p, next_p);
-
return prev_p;
}
#endif
#endif
+ if (!memcmp(from, "disable_timer_pin_1", 19))
+ disable_timer_pin_1 = 1;
+ if (!memcmp(from, "enable_timer_pin_1", 18))
+ disable_timer_pin_1 = -1;
+
if (!memcmp(from, "nolapic", 7) ||
!memcmp(from, "disableapic", 11))
disable_apic = 1;
}
}
+#ifdef CONFIG_NUMA
+static int nearby_node(int apicid)
+{
+ int i;
+ for (i = apicid - 1; i >= 0; i--) {
+ int node = apicid_to_node[i];
+ if (node != NUMA_NO_NODE && node_online(node))
+ return node;
+ }
+ for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
+ int node = apicid_to_node[i];
+ if (node != NUMA_NO_NODE && node_online(node))
+ return node;
+ }
+ return first_node(node_online_map); /* Shouldn't happen */
+}
+#endif
+
/*
* On a AMD dual core setup the lower bits of the APIC id distingush the cores.
* Assumes number of cores is a power of two.
{
#ifdef CONFIG_SMP
int cpu = smp_processor_id();
- int node = 0;
unsigned bits;
+#ifdef CONFIG_NUMA
+ int node = 0;
+ unsigned apicid = phys_proc_id[cpu];
+#endif
bits = 0;
while ((1 << bits) < c->x86_num_cores)
phys_proc_id[cpu] >>= bits;
#ifdef CONFIG_NUMA
- /* When an ACPI SRAT table is available use the mappings from SRAT
- instead. */
- if (acpi_numa <= 0) {
- node = phys_proc_id[cpu];
- if (!node_online(node))
- node = first_node(node_online_map);
- cpu_to_node[cpu] = node;
- } else {
- node = cpu_to_node[cpu];
- }
+ node = phys_proc_id[cpu];
+ if (apicid_to_node[apicid] != NUMA_NO_NODE)
+ node = apicid_to_node[apicid];
+ if (!node_online(node)) {
+ /* Two possibilities here:
+ - The CPU is missing memory and no node was created.
+ In that case try picking one from a nearby CPU
+ - The APIC IDs differ from the HyperTransport node IDs
+ which the K8 northbridge parsing fills in.
+ Assume they are all increased by a constant offset,
+ but in the same order as the HT nodeids.
+ If that doesn't result in a usable node fall back to the
+ path for the previous case. */
+ int ht_nodeid = apicid - (phys_proc_id[0] << bits);
+ if (ht_nodeid >= 0 &&
+ apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+ node = apicid_to_node[ht_nodeid];
+ /* Pick a nearby node */
+ if (!node_online(node))
+ node = nearby_node(apicid);
+ }
+ cpu_to_node[cpu] = node;
+
+ printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
+ cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
#endif
-
- printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
- cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
#endif
}
return 1;
}
+static void srat_detect_node(void)
+{
+#ifdef CONFIG_NUMA
+ unsigned apicid, node;
+ int cpu = smp_processor_id();
+
+ /* Don't do the funky fallback heuristics the AMD version employs
+ for now. */
+ apicid = phys_proc_id[cpu];
+ node = apicid_to_node[apicid];
+ if (node == NUMA_NO_NODE)
+ node = 0;
+ cpu_to_node[cpu] = node;
+
+ if (acpi_numa > 0)
+ printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
+#endif
+}
+
static void __cpuinit init_intel(struct cpuinfo_x86 *c)
{
/* Cache sizes */
if (c->x86 >= 15)
set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
c->x86_num_cores = intel_num_cpu_cores(c);
+
+ srat_detect_node();
}
static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
size = PERCPU_ENOUGH_ROOM;
#endif
- for (i = 0; i < NR_CPUS; i++) {
+ for_each_cpu_mask (i, cpu_possible_map) {
char *ptr;
if (!NODE_DATA(cpu_to_node(i))) {
asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
- pda->me = pda;
pda->cpunumber = cpu;
pda->irqcount = -1;
pda->kernelstack =
#include <linux/init.h>
#include <linux/mm.h>
-#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/spinlock.h>
#include <linux/smp_lock.h>
#include <asm/proto.h>
#include <asm/apicdef.h>
+#define __cpuinit __init
+
/*
* Smarter SMP flushing macros.
* c/o Linus Torvalds.
* writing to user space from interrupts. (Its not allowed anyway).
*
* Optimizations Manfred Spraul <manfred@colorfullife.com>
+ *
+ * More scalable flush, from Andi Kleen
+ *
+ * To avoid global state use 8 different call vectors.
+ * Each CPU uses a specific vector to trigger flushes on other
+ * CPUs. Depending on the received vector the target CPUs look into
+ * the right per cpu variable for the flush data.
+ *
+ * With more than 8 CPUs they are hashed to the 8 available
+ * vectors. The limited global vector space forces us to this right now.
+ * In future when interrupts are split into per CPU domains this could be
+ * fixed, at the cost of triggering multiple IPIs in some cases.
*/
-static cpumask_t flush_cpumask;
-static struct mm_struct * flush_mm;
-static unsigned long flush_va;
-static DEFINE_SPINLOCK(tlbstate_lock);
+union smp_flush_state {
+ struct {
+ cpumask_t flush_cpumask;
+ struct mm_struct *flush_mm;
+ unsigned long flush_va;
#define FLUSH_ALL -1ULL
+ spinlock_t tlbstate_lock;
+ };
+ char pad[SMP_CACHE_BYTES];
+} ____cacheline_aligned;
+
+/* State is put into the per CPU data section, but padded
+ to a full cache line because other CPUs can access it and we don't
+ want false sharing in the per cpu data segment. */
+static DEFINE_PER_CPU(union smp_flush_state, flush_state);
/*
* We cannot call mmdrop() because we are in interrupt context,
* instead update mm->cpu_vm_mask.
*/
-static inline void leave_mm (unsigned long cpu)
+static inline void leave_mm(int cpu)
{
if (read_pda(mmu_state) == TLBSTATE_OK)
BUG();
*
* 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
* 2) Leave the mm if we are in the lazy tlb mode.
+ *
+ * Interrupts are disabled.
*/
-asmlinkage void smp_invalidate_interrupt (void)
+asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
{
- unsigned long cpu;
+ int cpu;
+ int sender;
+ union smp_flush_state *f;
- cpu = get_cpu();
+ cpu = smp_processor_id();
+ /*
+ * orig_rax contains the interrupt vector - 256.
+ * Use that to determine where the sender put the data.
+ */
+ sender = regs->orig_rax + 256 - INVALIDATE_TLB_VECTOR_START;
+ f = &per_cpu(flush_state, sender);
- if (!cpu_isset(cpu, flush_cpumask))
+ if (!cpu_isset(cpu, f->flush_cpumask))
goto out;
/*
* This was a BUG() but until someone can quote me the
* BUG();
*/
- if (flush_mm == read_pda(active_mm)) {
+ if (f->flush_mm == read_pda(active_mm)) {
if (read_pda(mmu_state) == TLBSTATE_OK) {
- if (flush_va == FLUSH_ALL)
+ if (f->flush_va == FLUSH_ALL)
local_flush_tlb();
else
- __flush_tlb_one(flush_va);
+ __flush_tlb_one(f->flush_va);
} else
leave_mm(cpu);
}
out:
ack_APIC_irq();
- cpu_clear(cpu, flush_cpumask);
- put_cpu_no_resched();
+ cpu_clear(cpu, f->flush_cpumask);
}
static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
unsigned long va)
{
- cpumask_t tmp;
- /*
- * A couple of (to be removed) sanity checks:
- *
- * - we do not send IPIs to not-yet booted CPUs.
- * - current CPU must not be in mask
- * - mask must exist :)
- */
- BUG_ON(cpus_empty(cpumask));
- cpus_and(tmp, cpumask, cpu_online_map);
- BUG_ON(!cpus_equal(tmp, cpumask));
- BUG_ON(cpu_isset(smp_processor_id(), cpumask));
- if (!mm)
- BUG();
+ int sender;
+ union smp_flush_state *f;
- /*
- * I'm not happy about this global shared spinlock in the
- * MM hot path, but we'll see how contended it is.
- * Temporarily this turns IRQs off, so that lockups are
- * detected by the NMI watchdog.
- */
- spin_lock(&tlbstate_lock);
-
- flush_mm = mm;
- flush_va = va;
- cpus_or(flush_cpumask, cpumask, flush_cpumask);
+ /* Caller has disabled preemption */
+ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
+ f = &per_cpu(flush_state, sender);
+
+ /* Could avoid this lock when
+ num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
+ probably not worth checking this for a cache-hot lock. */
+ spin_lock(&f->tlbstate_lock);
+
+ f->flush_mm = mm;
+ f->flush_va = va;
+ cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
/*
* We have to send the IPI only to
* CPUs affected.
*/
- send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+ send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+
+ while (!cpus_empty(f->flush_cpumask))
+ cpu_relax();
- while (!cpus_empty(flush_cpumask))
- mb(); /* nothing. lockup detection does not belong here */;
+ f->flush_mm = NULL;
+ f->flush_va = 0;
+ spin_unlock(&f->tlbstate_lock);
+}
- flush_mm = NULL;
- flush_va = 0;
- spin_unlock(&tlbstate_lock);
+int __cpuinit init_smp_flush(void)
+{
+ int i;
+ for_each_cpu_mask(i, cpu_possible_map) {
+ spin_lock_init(&per_cpu(flush_state.tlbstate_lock, i));
+ }
+ return 0;
}
+
+core_initcall(init_smp_flush);
void flush_tlb_current_task(void)
{
/*
* this function sends a 'generic call function' IPI to one other CPU
* in the system.
+ *
+ * cpu is a standard Linux logical CPU number.
*/
-static void __smp_call_function_single (int cpu, void (*func) (void *info), void *info,
+static void
+__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
int nonatomic, int wait)
{
struct call_data_struct data;
#include <linux/mm.h>
#include <linux/kernel_stat.h>
#include <linux/smp_lock.h>
-#include <linux/irq.h>
#include <linux/bootmem.h>
#include <linux/thread_info.h>
#include <linux/module.h>
#include <asm/tlbflush.h>
#include <asm/proto.h>
#include <asm/nmi.h>
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
/*
* Get our bogomips.
+ *
+ * Need to enable IRQs because it can take longer and then
+ * the NMI watchdog might kill us.
*/
+ local_irq_enable();
calibrate_delay();
+ local_irq_disable();
Dprintk("Stack at about %p\n",&cpuid);
disable_APIC_timer();
*/
apic_wait_icr_idle();
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
- apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
timeout = 0;
do {
/*
* Turn INIT on target chip
*/
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/*
* Send IPI
*/
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+ apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
| APIC_DM_INIT);
Dprintk("Waiting for send to finish...\n");
Dprintk("Deasserting INIT.\n");
/* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* Send IPI */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+ apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
Dprintk("Waiting for send to finish...\n");
timeout = 0;
atomic_set(&init_deasserted, 1);
- /*
- * Should we send STARTUP IPIs ?
- *
- * Determine this based on the APIC version.
- * If we don't have an integrated APIC, don't send the STARTUP IPIs.
- */
- if (APIC_INTEGRATED(apic_version[phys_apicid]))
- num_starts = 2;
- else
- num_starts = 0;
+ num_starts = 2;
/*
* Run STARTUP IPI loop.
*/
/* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* Boot on the stack */
/* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_STARTUP
- | (start_rip >> 12));
+ apic_write(APIC_ICR, APIC_DM_STARTUP | (start_rip >> 12));
/*
* Give the other CPU some time to accept the IPI.
* Reset trampoline flag
*/
*((volatile int *) phys_to_virt(0x467)) = 0;
-
-#ifndef CONFIG_HOTPLUG_CPU
- /*
- * Free pages reserved for SMP bootup.
- * When you add hotplug CPU support later remove this
- * Note there is more work to be done for later CPU bootup.
- */
-
- free_page((unsigned long) __va(PAGE_SIZE));
- free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
-#endif
}
/*
*/
#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/poll.h>
-#include <linux/delay.h>
-#include <linux/sysrq.h>
-#include <linux/proc_fs.h>
-#include <linux/irq.h>
-#include <linux/pm.h>
-#include <linux/device.h>
+#include <linux/smp.h>
#include <linux/suspend.h>
-#include <asm/uaccess.h>
-#include <asm/acpi.h>
-#include <asm/tlbflush.h>
-#include <asm/io.h>
#include <asm/proto.h>
struct saved_context saved_context;
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/mc146818rtc.h>
-#include <linux/irq.h>
#include <linux/time.h>
#include <linux/ioport.h>
#include <linux/module.h>
vxtime.mode = VXTIME_TSC;
vxtime.quot = (1000000L << 32) / vxtime_hz;
vxtime.tsc_quot = (1000L << 32) / cpu_khz;
- vxtime.hz = vxtime_hz;
rdtscll_sync(&vxtime.last_tsc);
setup_irq(0, &irq0);
#include <asm/proto.h>
#include <asm/nmi.h>
-#include <linux/irq.h>
-
-
extern struct gate_struct idt_table[256];
asmlinkage void divide_error(void);
if (__copy_from_user(&f, (struct bug_frame *) regs->rip,
sizeof(struct bug_frame)))
return;
- if ((unsigned long)f.filename < __PAGE_OFFSET ||
+ if (f.filename >= 0 ||
f.ud2[0] != 0x0f || f.ud2[1] != 0x0b)
return;
- if (__get_user(tmp, f.filename))
- f.filename = "unmapped filename";
+ if (__get_user(tmp, (char *)(long)f.filename))
+ f.filename = (int)(long)"unmapped filename";
printk("----------- [cut here ] --------- [please bite here ] ---------\n");
- printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", f.filename, f.line);
+ printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", (char *)(long)f.filename, f.line);
}
#ifdef CONFIG_BUG
static DEFINE_SPINLOCK(die_lock);
static int die_owner = -1;
-void oops_begin(void)
+unsigned long oops_begin(void)
{
- int cpu = safe_smp_processor_id();
- /* racy, but better than risking deadlock. */
- local_irq_disable();
+ int cpu = safe_smp_processor_id();
+ unsigned long flags;
+
+ /* racy, but better than risking deadlock. */
+ local_irq_save(flags);
if (!spin_trylock(&die_lock)) {
if (cpu == die_owner)
/* nested oops. should stop eventually */;
else
- spin_lock(&die_lock);
+ spin_lock(&die_lock);
}
- die_owner = cpu;
+ die_owner = cpu;
console_verbose();
- bust_spinlocks(1);
+ bust_spinlocks(1);
+ return flags;
}
-void oops_end(void)
+void oops_end(unsigned long flags)
{
die_owner = -1;
- bust_spinlocks(0);
- spin_unlock(&die_lock);
+ bust_spinlocks(0);
+ spin_unlock_irqrestore(&die_lock, flags);
if (panic_on_oops)
- panic("Oops");
-}
+ panic("Oops");
+}
void __die(const char * str, struct pt_regs * regs, long err)
{
void die(const char * str, struct pt_regs * regs, long err)
{
- oops_begin();
+ unsigned long flags = oops_begin();
+
handle_BUG(regs);
__die(str, regs, err);
- oops_end();
+ oops_end(flags);
do_exit(SIGSEGV);
}
static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
void die_nmi(char *str, struct pt_regs *regs)
{
- oops_begin();
+ unsigned long flags = oops_begin();
+
/*
* We are in trouble anyway, lets at least try
* to get a message out.
if (panic_on_timeout || panic_on_oops)
panic("nmi watchdog");
printk("console shuts up ...\n");
- oops_end();
+ oops_end(flags);
do_exit(SIGSEGV);
}
*/
cwd = get_fpu_cwd(task);
swd = get_fpu_swd(task);
- switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
+ switch (swd & ~cwd & 0x3f) {
case 0x000:
default:
break;
case 0x001: /* Invalid Op */
- case 0x041: /* Stack Fault */
- case 0x241: /* Stack Fault | Direction */
+ /*
+ * swd & 0x240 == 0x040: Stack Underflow
+ * swd & 0x240 == 0x240: Stack Overflow
+ * User must clear the SF bit (0x40) if set
+ */
info.si_code = FPE_FLTINV;
break;
case 0x002: /* Denormalize */
return secs;
}
-static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
+int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
{
if (unlikely(!__sysctl_vsyscall))
return gettimeofday(tv,tz);
/* This will break when the xtime seconds get inaccurate, but that is
* unlikely */
-static time_t __vsyscall(1) vtime(time_t *t)
+time_t __vsyscall(1) vtime(time_t *t)
{
if (unlikely(!__sysctl_vsyscall))
return time_syscall(t);
return __xtime.tv_sec;
}
-static long __vsyscall(2) venosys_0(void)
+long __vsyscall(2) venosys_0(void)
{
return -ENOSYS;
}
-static long __vsyscall(3) venosys_1(void)
+long __vsyscall(3) venosys_1(void)
{
return -ENOSYS;
}
static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
unsigned long error_code)
{
- oops_begin();
+ unsigned long flags = oops_begin();
+
printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
current->comm, address);
dump_pagetable(address);
__die("Bad pagetable", regs, error_code);
- oops_end();
+ oops_end(flags);
do_exit(SIGKILL);
}
unsigned long address;
const struct exception_table_entry *fixup;
int write;
+ unsigned long flags;
siginfo_t info;
#ifdef CONFIG_CHECKING
* terminate things with extreme prejudice.
*/
- oops_begin();
+ flags = oops_begin();
if (address < PAGE_SIZE)
printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
__die("Oops", regs, error_code);
/* Executive summary in case the body of the oops scrolled away */
printk(KERN_EMERG "CR2: %016lx\n", address);
- oops_end();
+ oops_end(flags);
do_exit(SIGKILL);
/*
void show_mem(void)
{
- int i, total = 0, reserved = 0;
- int shared = 0, cached = 0;
+ long i, total = 0, reserved = 0;
+ long shared = 0, cached = 0;
pg_data_t *pgdat;
struct page *page;
- printk("Mem-info:\n");
+ printk(KERN_INFO "Mem-info:\n");
show_free_areas();
- printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+ printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
for_each_pgdat(pgdat) {
for (i = 0; i < pgdat->node_spanned_pages; ++i) {
page = pfn_to_page(pgdat->node_start_pfn + i);
total++;
- if (PageReserved(page))
- reserved++;
- else if (PageSwapCache(page))
- cached++;
- else if (page_count(page))
- shared += page_count(page) - 1;
+ if (PageReserved(page))
+ reserved++;
+ else if (PageSwapCache(page))
+ cached++;
+ else if (page_count(page))
+ shared += page_count(page) - 1;
}
}
- printk("%d pages of RAM\n", total);
- printk("%d reserved pages\n",reserved);
- printk("%d pages shared\n",shared);
- printk("%d pages swap cached\n",cached);
+ printk(KERN_INFO "%lu pages of RAM\n", total);
+ printk(KERN_INFO "%lu reserved pages\n",reserved);
+ printk(KERN_INFO "%lu pages shared\n",shared);
+ printk(KERN_INFO "%lu pages swap cached\n",cached);
}
/* References to section boundaries */
__flush_tlb_all();
}
-static inline int page_is_ram (unsigned long pagenr)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- unsigned long addr, end;
-
- if (e820.map[i].type != E820_RAM) /* not usable memory */
- continue;
- /*
- * !!!FIXME!!! Some BIOSen report areas as RAM that
- * are not. Notably the 640->1Mb area. We need a sanity
- * check here.
- */
- addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
- end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
- if ((pagenr >= addr) && (pagenr < end))
- return 1;
- }
- return 0;
-}
-
-extern int swiotlb_force;
-
static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
kcore_vsyscall;
void __init mem_init(void)
{
- int codesize, reservedpages, datasize, initsize;
- int tmp;
+ long codesize, reservedpages, datasize, initsize;
#ifdef CONFIG_SWIOTLB
- if (swiotlb_force)
- swiotlb = 1;
if (!iommu_aperture &&
(end_pfn >= 0xffffffff>>PAGE_SHIFT || force_iommu))
swiotlb = 1;
/* this will put all low memory onto the freelists */
#ifdef CONFIG_NUMA
- totalram_pages += numa_free_all_bootmem();
- tmp = 0;
- /* should count reserved pages here for all nodes */
+ totalram_pages = numa_free_all_bootmem();
#else
-
-#ifdef CONFIG_FLATMEM
- max_mapnr = end_pfn;
- if (!mem_map) BUG();
-#endif
-
- totalram_pages += free_all_bootmem();
-
- for (tmp = 0; tmp < end_pfn; tmp++)
- /*
- * Only count reserved RAM pages
- */
- if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
- reservedpages++;
+ totalram_pages = free_all_bootmem();
#endif
+ reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
after_bootmem = 1;
kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
VSYSCALL_END - VSYSCALL_START);
- printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
+ printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
end_pfn << (PAGE_SHIFT-10),
codesize >> 10,
unsigned long prevbase;
struct node nodes[8];
int nodeid, i, nb;
+ unsigned char nodeids[8];
int found = 0;
u32 reg;
unsigned numnodes;
nodemask_t nodes_parsed;
+ unsigned dualcore = 0;
nodes_clear(nodes_parsed);
prevbase = 0;
for (i = 0; i < 8; i++) {
unsigned long base,limit;
-
+ u32 nodeid;
+
+ /* Undefined before E stepping, but hopefully 0 */
+ dualcore |= ((read_pci_config(0, nb, 3, 0xe8) >> 12) & 3) == 1;
base = read_pci_config(0, nb, 1, 0x40 + i*8);
limit = read_pci_config(0, nb, 1, 0x44 + i*8);
nodeid = limit & 7;
+ nodeids[i] = nodeid;
if ((base & 3) == 0) {
if (i < numnodes)
printk("Skipping disabled node %d\n", i);
for (i = 0; i < 8; i++) {
if (nodes[i].start != nodes[i].end) {
- /* assume 1:1 NODE:CPU */
- cpu_to_node[i] = i;
+ nodeid = nodeids[i];
+ apicid_to_node[nodeid << dualcore] = i;
+ apicid_to_node[(nodeid << dualcore) + dualcore] = i;
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
}
int memnode_shift;
u8 memnodemap[NODEMAPSIZE];
-unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
-cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
+unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
+ [0 ... NR_CPUS-1] = NUMA_NO_NODE
+};
+unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
int numa_off __initdata;
static struct acpi_table_slit *acpi_slit;
-/* Internal processor count */
-static unsigned int __initdata num_processors = 0;
-
static nodemask_t nodes_parsed __initdata;
static nodemask_t nodes_found __initdata;
static struct node nodes[MAX_NUMNODES] __initdata;
static __u8 pxm2node[256] = { [0 ... 255] = 0xff };
+static int node_to_pxm(int n);
+
+int pxm_to_node(int pxm)
+{
+ if ((unsigned)pxm >= 256)
+ return 0;
+ return pxm2node[pxm];
+}
+
static __init int setup_node(int pxm)
{
unsigned node = pxm2node[pxm];
static __init int conflicting_nodes(unsigned long start, unsigned long end)
{
int i;
- for_each_online_node(i) {
+ for_each_node_mask(i, nodes_parsed) {
struct node *nd = &nodes[i];
if (nd->start == nd->end)
continue;
if (nd->end > start && nd->start < end)
- return 1;
+ return i;
if (nd->end == end && nd->start == start)
- return 1;
+ return i;
}
return -1;
}
static __init void bad_srat(void)
{
+ int i;
printk(KERN_ERR "SRAT: SRAT not used.\n");
acpi_numa = -1;
+ for (i = 0; i < MAX_LOCAL_APIC; i++)
+ apicid_to_node[i] = NUMA_NO_NODE;
}
static __init inline int srat_disabled(void)
bad_srat();
return;
}
- if (num_processors >= NR_CPUS) {
- printk(KERN_ERR "SRAT: Processor #%d (lapic %u) INVALID. (Max ID: %d).\n",
- num_processors, pa->apic_id, NR_CPUS);
- bad_srat();
- return;
- }
- cpu_to_node[num_processors] = node;
+ apicid_to_node[pa->apic_id] = node;
acpi_numa = 1;
- printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> CPU %u -> Node %u\n",
- pxm, pa->apic_id, num_processors, node);
-
- num_processors++;
+ printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
+ pxm, pa->apic_id, node);
}
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
printk(KERN_INFO "SRAT: hot plug zone found %lx - %lx \n",
start, end);
i = conflicting_nodes(start, end);
- if (i >= 0) {
+ if (i == node) {
+ printk(KERN_WARNING
+ "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
+ pxm, start, end, nodes[i].start, nodes[i].end);
+ } else if (i >= 0) {
printk(KERN_ERR
- "SRAT: pxm %d overlap %lx-%lx with node %d(%Lx-%Lx)\n",
- pxm, start, end, i, nodes[i].start, nodes[i].end);
+ "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
+ pxm, start, end, node_to_pxm(i),
+ nodes[i].start, nodes[i].end);
bad_srat();
return;
}
int i;
if (acpi_numa <= 0)
return -1;
+
+ /* First clean up the node list */
+ for_each_node_mask(i, nodes_parsed) {
+ cutoff_node(i, start, end);
+ if (nodes[i].start == nodes[i].end)
+ node_clear(i, nodes_parsed);
+ }
+
memnode_shift = compute_hash_shift(nodes, nodes_weight(nodes_parsed));
if (memnode_shift < 0) {
printk(KERN_ERR
bad_srat();
return -1;
}
- for (i = 0; i < MAX_NUMNODES; i++) {
- if (!node_isset(i, nodes_parsed))
- continue;
- cutoff_node(i, start, end);
- if (nodes[i].start == nodes[i].end) {
- node_clear(i, nodes_parsed);
- continue;
- }
+
+ /* Finally register nodes */
+ for_each_node_mask(i, nodes_parsed)
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
for (i = 0; i < NR_CPUS; i++) {
if (cpu_to_node[i] == NUMA_NO_NODE)
continue;
return 0;
}
-int node_to_pxm(int n)
+static int node_to_pxm(int n)
{
int i;
if (pxm2node[n] == n)
for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus);
j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus);
j++) {
- int node = NODE_ID(nid);
+ struct pci_bus *bus;
+ long node = NODE_ID(nid);
+ /* Algorithm a bit dumb, but
+ it shouldn't matter here */
+ bus = pci_find_bus(0, j);
+ if (!bus)
+ continue;
if (!node_online(node))
node = 0;
- pci_bus_to_node[j] = node;
+ bus->sysdata = (void *)node;
}
}
}
(pci_mmcfg_config[0].base_address == 0))
return 0;
- /* Kludge for now. Don't use mmconfig on AMD systems because
- those have some busses where mmconfig doesn't work,
- and we don't parse ACPI MCFG well enough to handle that.
- Remove when proper handling is added. */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- return 0;
-
/* RED-PEN i386 doesn't do _nocache right now */
pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL);
if (pci_mmcfg_virt == NULL) {
* and page free order so much..
*/
#ifdef CONFIG_SMP
- #define FREE_PTE_NR 506
+ #ifdef ARCH_FREE_PTR_NR
+ #define FREE_PTR_NR ARCH_FREE_PTR_NR
+ #else
+ #define FREE_PTE_NR 506
+ #endif
#define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
#else
#define FREE_PTE_NR 1
#define NMI_LOCAL_APIC 2
#define NMI_INVALID 3
+extern int disable_timer_pin_1;
+
#else /* !CONFIG_X86_LOCAL_APIC */
static inline void lapic_shutdown(void) { }
--- /dev/null
+
+int pxm_to_nid(int pxm);
+
return first_cpu(mask);
}
-#define pcibus_to_node(bus) mp_bus_id_to_node[(bus)->number]
+#define pcibus_to_node(bus) ((long) (bus)->sysdata)
#define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus))
/* sched_domains SD_NODE_INIT for NUMAQ machines */
long __res; \
__asm__ volatile ("int $0x80" \
: "=a" (__res) \
- : "0" (__NR_##name),"b" ((long)(arg1))); \
+ : "0" (__NR_##name),"b" ((long)(arg1)) : "memory"); \
__syscall_return(type,__res); \
}
long __res; \
__asm__ volatile ("int $0x80" \
: "=a" (__res) \
- : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2))); \
+ : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)) : "memory"); \
__syscall_return(type,__res); \
}
__asm__ volatile ("int $0x80" \
: "=a" (__res) \
: "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
- "d" ((long)(arg3))); \
+ "d" ((long)(arg3)) : "memory"); \
__syscall_return(type,__res); \
}
__asm__ volatile ("int $0x80" \
: "=a" (__res) \
: "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
- "d" ((long)(arg3)),"S" ((long)(arg4))); \
+ "d" ((long)(arg3)),"S" ((long)(arg4)) : "memory"); \
__syscall_return(type,__res); \
}
__asm__ volatile ("int $0x80" \
: "=a" (__res) \
: "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
- "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5))); \
+ "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5)) : "memory"); \
__syscall_return(type,__res); \
}
: "=a" (__res) \
: "i" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
"d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5)), \
- "0" ((long)(arg6))); \
+ "0" ((long)(arg6)) : "memory"); \
__syscall_return(type,__res); \
}
#define NMI_LOCAL_APIC 2
#define NMI_INVALID 3
+extern int disable_timer_pin_1;
+
#endif /* CONFIG_X86_LOCAL_APIC */
-#define esr_disable 0
extern unsigned boot_cpu_id;
#endif /* __ASM_APIC_H */
#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
#define MAX_IO_APICS 128
+#define MAX_LOCAL_APIC 256
/*
* All x86-64 systems are xAPIC compatible.
*/
struct bug_frame {
unsigned char ud2[2];
- unsigned char mov;
- /* should use 32bit offset instead, but the assembler doesn't
- like it */
- char *filename;
+ unsigned char push;
+ signed int filename;
unsigned char ret;
unsigned short line;
} __attribute__((packed));
The magic numbers generate mov $64bitimm,%eax ; ret $offset. */
#define BUG() \
asm volatile( \
- "ud2 ; .byte 0xa3 ; .quad %c1 ; .byte 0xc2 ; .short %c0" :: \
- "i"(__LINE__), "i" (__stringify(__FILE__)))
+ "ud2 ; pushq $%c1 ; ret $%c0" :: \
+ "i"(__LINE__), "i" (__FILE__))
void out_of_line_bug(void);
#else
static inline void out_of_line_bug(void) { }
.if \skipr11
.else
movq (%rsp),%r11
+ CFI_RESTORE r11
.endif
.if \skipr8910
.else
movq 1*8(%rsp),%r10
+ CFI_RESTORE r10
movq 2*8(%rsp),%r9
+ CFI_RESTORE r9
movq 3*8(%rsp),%r8
+ CFI_RESTORE r8
.endif
.if \skiprax
.else
movq 4*8(%rsp),%rax
+ CFI_RESTORE rax
.endif
.if \skiprcx
.else
movq 5*8(%rsp),%rcx
+ CFI_RESTORE rcx
.endif
.if \skiprdx
.else
movq 6*8(%rsp),%rdx
+ CFI_RESTORE rdx
.endif
movq 7*8(%rsp),%rsi
+ CFI_RESTORE rsi
movq 8*8(%rsp),%rdi
+ CFI_RESTORE rdi
.if ARG_SKIP+\addskip > 0
addq $ARG_SKIP+\addskip,%rsp
CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip)
.macro RESTORE_REST
movq (%rsp),%r15
+ CFI_RESTORE r15
movq 1*8(%rsp),%r14
+ CFI_RESTORE r14
movq 2*8(%rsp),%r13
+ CFI_RESTORE r13
movq 3*8(%rsp),%r12
+ CFI_RESTORE r12
movq 4*8(%rsp),%rbp
+ CFI_RESTORE rbp
movq 5*8(%rsp),%rbx
+ CFI_RESTORE rbx
addq $REST_SKIP,%rsp
CFI_ADJUST_CFA_OFFSET -(REST_SKIP)
.endm
.macro icebp
.byte 0xf1
.endm
-
-#ifdef CONFIG_FRAME_POINTER
-#define ENTER enter
-#define LEAVE leave
-#else
-#define ENTER
-#define LEAVE
-#endif
/*
* load one particular LDT into the current CPU
*/
-extern inline void load_LDT_nolock (mm_context_t *pc, int cpu)
+static inline void load_LDT_nolock (mm_context_t *pc, int cpu)
{
int count = pc->size;
flush_write_buffers();
}
+#define dma_sync_single_range_for_cpu(dev, dma_handle, offset, size, dir) \
+ dma_sync_single_for_cpu(dev, dma_handle, size, dir)
+#define dma_sync_single_range_for_device(dev, dma_handle, offset, size, dir) \
+ dma_sync_single_for_device(dev, dma_handle, size, dir)
+
static inline void dma_sync_sg_for_cpu(struct device *hwdev,
struct scatterlist *sg,
int nelems, int direction)
#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
#define CFI_OFFSET .cfi_offset
#define CFI_REL_OFFSET .cfi_rel_offset
+#define CFI_REGISTER .cfi_register
+#define CFI_RESTORE .cfi_restore
+#define CFI_REMEMBER_STATE .cfi_remember_state
+#define CFI_RESTORE_STATE .cfi_restore_state
#else
#define CFI_ADJUST_CFA_OFFSET #
#define CFI_OFFSET #
#define CFI_REL_OFFSET #
+#define CFI_REGISTER #
+#define CFI_RESTORE #
+#define CFI_REMEMBER_STATE #
+#define CFI_RESTORE_STATE #
#endif
* directly without translation, we catch the bug with a NULL-deference
* kernel oops. Illegal ranges of incoming indices are caught too.
*/
-extern inline unsigned long fix_to_virt(const unsigned int idx)
+static inline unsigned long fix_to_virt(const unsigned int idx)
{
/*
* this branch gets completely eliminated after inlining,
#define __ARCH_IRQ_STAT 1
-/* Generate a lvalue for a pda member. Should fix softirq.c instead to use
- special access macros. This would generate better code. */
-#define __IRQ_STAT(cpu,member) (read_pda(me)->member)
+#define local_softirq_pending() read_pda(__softirq_pending)
-#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
+#define __ARCH_SET_SOFTIRQ_PENDING 1
+
+#define set_softirq_pending(x) write_pda(__softirq_pending, (x))
+#define or_softirq_pending(x) or_pda(__softirq_pending, (x))
/*
* 'what should we do if we get a hw irq event on an illegal vector'.
struct hw_interrupt_type;
#endif
+#define NMI_VECTOR 0x02
/*
* IDT vectors usable for external interrupt sources start
* at 0x20:
*/
#define SPURIOUS_APIC_VECTOR 0xff
#define ERROR_APIC_VECTOR 0xfe
-#define INVALIDATE_TLB_VECTOR 0xfd
-#define RESCHEDULE_VECTOR 0xfc
-#define TASK_MIGRATION_VECTOR 0xfb
-#define CALL_FUNCTION_VECTOR 0xfa
-#define KDB_VECTOR 0xf9
-
-#define THERMAL_APIC_VECTOR 0xf0
-
+#define RESCHEDULE_VECTOR 0xfd
+#define CALL_FUNCTION_VECTOR 0xfc
+#define KDB_VECTOR 0xfb /* reserved for KDB */
+#define THERMAL_APIC_VECTOR 0xfa
+/* 0xf9 free */
+#define INVALIDATE_TLB_VECTOR_END 0xf8
+#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f8 used for TLB flush */
+
+#define NUM_INVALIDATE_TLB_VECTORS 8
/*
* Local APIC timer IRQ vector is on a different priority level,
* Talk about misusing macros..
*/
#define __OUT1(s,x) \
-extern inline void out##s(unsigned x value, unsigned short port) {
+static inline void out##s(unsigned x value, unsigned short port) {
#define __OUT2(s,s1,s2) \
__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
#define __IN1(s) \
-extern inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
+static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
#define __IN2(s,s1,s2) \
__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
#define __INS(s) \
-extern inline void ins##s(unsigned short port, void * addr, unsigned long count) \
+static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
{ __asm__ __volatile__ ("rep ; ins" #s \
: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
#define __OUTS(s) \
-extern inline void outs##s(unsigned short port, const void * addr, unsigned long count) \
+static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \
{ __asm__ __volatile__ ("rep ; outs" #s \
: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
* Change virtual addresses to physical addresses and vv.
* These are pretty trivial
*/
-extern inline unsigned long virt_to_phys(volatile void * address)
+static inline unsigned long virt_to_phys(volatile void * address)
{
return __pa(address);
}
-extern inline void * phys_to_virt(unsigned long address)
+static inline void * phys_to_virt(unsigned long address)
{
return __va(address);
}
extern void __iomem *__ioremap(unsigned long offset, unsigned long size, unsigned long flags);
-extern inline void __iomem * ioremap (unsigned long offset, unsigned long size)
+static inline void __iomem * ioremap (unsigned long offset, unsigned long size)
{
return __ioremap(offset, size, 0);
}
static inline unsigned int __prepare_ICR (unsigned int shortcut, int vector, unsigned int dest)
{
- unsigned int icr = APIC_DM_FIXED | shortcut | vector | dest;
- if (vector == KDB_VECTOR)
- icr = (icr & (~APIC_VECTOR_MASK)) | APIC_DM_NMI;
+ unsigned int icr = shortcut | dest;
+
+ switch (vector) {
+ default:
+ icr |= APIC_DM_FIXED | vector;
+ break;
+ case NMI_VECTOR:
+ /*
+ * Setup KDB IPI to be delivered as an NMI
+ */
+ case KDB_VECTOR:
+ icr |= APIC_DM_NMI;
+ break;
+ }
return icr;
}
/*
* Send the IPI. The write to APIC_ICR fires this off.
*/
- apic_write_around(APIC_ICR, cfg);
+ apic_write(APIC_ICR, cfg);
}
* prepare target chip field
*/
cfg = __prepare_ICR2(x86_cpu_to_apicid[query_cpu]);
- apic_write_around(APIC_ICR2, cfg);
+ apic_write(APIC_ICR2, cfg);
/*
* program the ICR
/*
* Send the IPI. The write to APIC_ICR fires this off.
*/
- apic_write_around(APIC_ICR, cfg);
+ apic_write(APIC_ICR, cfg);
}
local_irq_restore(flags);
}
extern void __die(const char *,struct pt_regs *,long);
extern void show_registers(struct pt_regs *regs);
extern void dump_pagetable(unsigned long);
-extern void oops_begin(void);
-extern void oops_end(void);
+extern unsigned long oops_begin(void);
+extern void oops_end(unsigned long);
#endif
:"m" (v->counter));
}
-static __inline__ void local_add(unsigned long i, local_t *v)
+static __inline__ void local_add(unsigned int i, local_t *v)
{
__asm__ __volatile__(
"addl %1,%0"
:"ir" (i), "m" (v->counter));
}
-static __inline__ void local_sub(unsigned long i, local_t *v)
+static __inline__ void local_sub(unsigned int i, local_t *v)
{
__asm__ __volatile__(
"subl %1,%0"
#include <asm/smp.h>
-#define NODEMAPSIZE 0xff
+#define NODEMAPSIZE 0xfff
/* Simple perfect hash to map physical addresses to node numbers */
extern int memnode_shift;
#define pfn_valid(pfn) ((pfn) >= num_physpages ? 0 : \
({ u8 nid__ = pfn_to_nid(pfn); \
- nid__ != 0xff && (pfn) >= node_start_pfn(nid__) && (pfn) <= node_end_pfn(nid__); }))
+ nid__ != 0xff && (pfn) >= node_start_pfn(nid__) && (pfn) < node_end_pfn(nid__); }))
#endif
#define local_mapnr(kvaddr) \
#define wrmsrl(msr,val) wrmsr(msr,(__u32)((__u64)(val)),((__u64)(val))>>32)
/* wrmsr with exception handling */
-#define wrmsr_safe(msr,a,b) ({ int ret__; \
- asm volatile("2: wrmsr ; xorl %0,%0\n" \
- "1:\n\t" \
- ".section .fixup,\"ax\"\n\t" \
- "3: movl %4,%0 ; jmp 1b\n\t" \
- ".previous\n\t" \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n\t" \
- " .quad 2b,3b\n\t" \
- ".previous" \
- : "=a" (ret__) \
- : "c" (msr), "0" (a), "d" (b), "i" (-EFAULT));\
+#define wrmsr_safe(msr,a,b) ({ int ret__; \
+ asm volatile("2: wrmsr ; xorl %0,%0\n" \
+ "1:\n\t" \
+ ".section .fixup,\"ax\"\n\t" \
+ "3: movl %4,%0 ; jmp 1b\n\t" \
+ ".previous\n\t" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 8\n\t" \
+ " .quad 2b,3b\n\t" \
+ ".previous" \
+ : "=a" (ret__) \
+ : "c" (msr), "0" (a), "d" (b), "i" (-EFAULT)); \
ret__; })
#define checking_wrmsrl(msr,val) wrmsr_safe(msr,(u32)(val),(u32)((val)>>32))
+#define rdmsr_safe(msr,a,b) \
+ ({ int ret__; \
+ asm volatile ("1: rdmsr\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl %4,%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 8\n" \
+ " .quad 1b,3b\n" \
+ ".previous":"=&bDS" (ret__), "=a"(a), "=d"(b)\
+ :"c"(msr), "i"(-EIO), "0"(0)); \
+ ret__; })
+
#define rdtsc(low,high) \
__asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
: "=a" (low), "=d" (high) \
: "c" (counter))
-extern inline void cpuid(int op, unsigned int *eax, unsigned int *ebx,
+static inline void cpuid(int op, unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
__asm__("cpuid"
/*
* CPUID functions returning a single datum
*/
-extern inline unsigned int cpuid_eax(unsigned int op)
+static inline unsigned int cpuid_eax(unsigned int op)
{
unsigned int eax;
: "bx", "cx", "dx");
return eax;
}
-extern inline unsigned int cpuid_ebx(unsigned int op)
+static inline unsigned int cpuid_ebx(unsigned int op)
{
unsigned int eax, ebx;
: "cx", "dx" );
return ebx;
}
-extern inline unsigned int cpuid_ecx(unsigned int op)
+static inline unsigned int cpuid_ecx(unsigned int op)
{
unsigned int eax, ecx;
: "bx", "dx" );
return ecx;
}
-extern inline unsigned int cpuid_edx(unsigned int op)
+static inline unsigned int cpuid_edx(unsigned int op)
{
unsigned int eax, edx;
};
extern int compute_hash_shift(struct node *nodes, int numnodes);
+extern int pxm_to_node(int nid);
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
extern void numa_init_array(void);
extern int numa_off;
+extern unsigned char apicid_to_node[256];
+
#define NUMA_NO_NODE 0xff
#endif
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
+extern unsigned long end_pfn;
+
void clear_page(void *);
void copy_page(void *, void *);
#ifdef CONFIG_FLATMEM
#define pfn_to_page(pfn) (mem_map + (pfn))
#define page_to_pfn(page) ((unsigned long)((page) - mem_map))
-#define pfn_valid(pfn) ((pfn) < max_mapnr)
+#define pfn_valid(pfn) ((pfn) < end_pfn)
#endif
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
* address space. The networking and block device layers use
* this boolean for bounce buffer decisions
*
- * On AMD64 it mostly equals, but we set it to zero to tell some subsystems
- * that an IOMMU is available.
+ * On x86-64 it mostly equals, but we set it to zero to tell some subsystems
+ * that an hard or soft IOMMU is available.
*/
-#define PCI_DMA_BUS_IS_PHYS (no_iommu ? 1 : 0)
+#define PCI_DMA_BUS_IS_PHYS 0
/*
* x86-64 always supports DAC, but sometimes it is useful to force
struct x8664_pda {
struct task_struct *pcurrent; /* Current process */
unsigned long data_offset; /* Per cpu data offset from linker address */
- struct x8664_pda *me; /* Pointer to itself */
unsigned long kernelstack; /* top of kernel stack for current */
unsigned long oldrsp; /* user rsp for system call */
- unsigned long irqrsp; /* Old rsp for interrupts. */
int irqcount; /* Irq nesting counter. Starts with -1 */
int cpunumber; /* Logical CPU number */
char *irqstackptr; /* top of irqstack */
struct mm_struct *active_mm;
int mmu_state;
unsigned apic_timer_irqs;
-} ____cacheline_aligned;
+} ____cacheline_aligned_in_smp;
#define IRQSTACK_ORDER 2
#define pda_offset(field) offsetof(struct x8664_pda, field)
#define pda_to_op(op,field,val) do { \
+ typedef typeof_field(struct x8664_pda, field) T__; \
switch (sizeof_field(struct x8664_pda, field)) { \
case 2: \
-asm volatile(op "w %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); break; \
+asm volatile(op "w %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \
case 4: \
-asm volatile(op "l %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); break; \
+asm volatile(op "l %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \
case 8: \
-asm volatile(op "q %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); break; \
+asm volatile(op "q %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \
default: __bad_pda_field(); \
} \
} while (0)
* Unfortunately removing them causes all hell to break lose currently.
*/
#define pda_from_op(op,field) ({ \
- typedef typeof_field(struct x8664_pda, field) T__; T__ ret__; \
+ typeof_field(struct x8664_pda, field) ret__; \
switch (sizeof_field(struct x8664_pda, field)) { \
case 2: \
asm volatile(op "w %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\
#define write_pda(field,val) pda_to_op("mov",field,val)
#define add_pda(field,val) pda_to_op("add",field,val)
#define sub_pda(field,val) pda_to_op("sub",field,val)
+#define or_pda(field,val) pda_to_op("or",field,val)
#endif
set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
}
-extern __inline__ pmd_t *get_pmd(void)
+static inline pmd_t *get_pmd(void)
{
return (pmd_t *)get_zeroed_page(GFP_KERNEL);
}
-extern __inline__ void pmd_free(pmd_t *pmd)
+static inline void pmd_free(pmd_t *pmd)
{
BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
free_page((unsigned long)pmd);
/* Should really implement gc for free page table pages. This could be
done with a reference count in struct page. */
-extern __inline__ void pte_free_kernel(pte_t *pte)
+static inline void pte_free_kernel(pte_t *pte)
{
BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
free_page((unsigned long)pte);
}
-extern inline void pte_free(struct page *pte)
+static inline void pte_free(struct page *pte)
{
__free_page(pte);
}
pud_val(*dst) = pud_val(val);
}
-extern inline void pud_clear (pud_t *pud)
+static inline void pud_clear (pud_t *pud)
{
set_pud(pud, __pud(0));
}
pgd_val(*dst) = pgd_val(val);
}
-extern inline void pgd_clear (pgd_t * pgd)
+static inline void pgd_clear (pgd_t * pgd)
{
set_pgd(pgd, __pgd(0));
}
}
/* Change flags of a PTE */
-extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
pte_val(pte) &= _PAGE_CHG_MASK;
pte_val(pte) |= pgprot_val(newprot);
u64 tls_array[GDT_ENTRY_TLS_ENTRIES];
} __attribute__((aligned(16)));
-#define INIT_THREAD {}
+#define INIT_THREAD { \
+ .rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+}
+
+#define INIT_TSS { \
+ .rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+}
#define INIT_MMAP \
{ &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL }
#define ASM_NOP_MAX 8
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-extern inline void rep_nop(void)
+static inline void rep_nop(void)
{
__asm__ __volatile__("rep;nop": : :"memory");
}
/* Stop speculative execution */
-extern inline void sync_core(void)
+static inline void sync_core(void)
{
int tmp;
asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory");
extern void swap_low_mappings(void);
-extern void oops_begin(void);
-extern void die(const char *,struct pt_regs *,long);
-extern void __die(const char * str, struct pt_regs * regs, long err);
extern void __show_regs(struct pt_regs * regs);
extern void show_regs(struct pt_regs * regs);
extern void select_idle_routine(const struct cpuinfo_x86 *c);
extern void swiotlb_init(void);
-extern unsigned long max_mapnr;
-extern unsigned long end_pfn;
extern unsigned long table_start, table_end;
extern int exception_trace;
#undef __HAVE_ARCH_SIG_BITOPS
#if 0
-extern __inline__ void sigaddset(sigset_t *set, int _sig)
+static inline void sigaddset(sigset_t *set, int _sig)
{
__asm__("btsq %1,%0" : "=m"(*set) : "Ir"(_sig - 1) : "cc");
}
-extern __inline__ void sigdelset(sigset_t *set, int _sig)
+static inline void sigdelset(sigset_t *set, int _sig)
{
__asm__("btrq %1,%0" : "=m"(*set) : "Ir"(_sig - 1) : "cc");
}
-extern __inline__ int __const_sigismember(sigset_t *set, int _sig)
+static inline int __const_sigismember(sigset_t *set, int _sig)
{
unsigned long sig = _sig - 1;
return 1 & (set->sig[sig / _NSIG_BPW] >> (sig & ~(_NSIG_BPW-1)));
}
-extern __inline__ int __gen_sigismember(sigset_t *set, int _sig)
+static inline int __gen_sigismember(sigset_t *set, int _sig)
{
int ret;
__asm__("btq %2,%1\n\tsbbq %0,%0"
__const_sigismember((set),(sig)) : \
__gen_sigismember((set),(sig)))
-extern __inline__ int sigfindinword(unsigned long word)
+static inline int sigfindinword(unsigned long word)
{
__asm__("bsfq %1,%0" : "=r"(word) : "rm"(word) : "cc");
return word;
#define raw_smp_processor_id() read_pda(cpunumber)
-extern __inline int hard_smp_processor_id(void)
+static inline int hard_smp_processor_id(void)
{
/* we don't want to mark this access volatile - bad code generation */
return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
#define __xg(x) ((volatile long *)(x))
-extern inline void set_64bit(volatile unsigned long *ptr, unsigned long val)
+static inline void set_64bit(volatile unsigned long *ptr, unsigned long val)
{
*ptr = val;
}
case 2:
__asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
: "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
case 4:
__asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2"
: "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
case 8:
__asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2"
: "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
}
#ifndef _ASMx8664_TIMEX_H
#define _ASMx8664_TIMEX_H
-#include <linux/config.h>
#include <asm/8253pit.h>
#include <asm/msr.h>
#include <asm/vsyscall.h>
#define TLBSTATE_OK 1
#define TLBSTATE_LAZY 2
+/* Roughly an IPI every 20MB with 4k pages for freeing page table
+ ranges. Cost is about 42k of memory for each CPU. */
+#define ARCH_FREE_PTE_NR 5350
+
#endif
#define flush_tlb_kernel_range(start, end) flush_tlb_all()
extern cpumask_t cpu_online_map;
extern unsigned char cpu_to_node[];
-extern unsigned char pci_bus_to_node[];
extern cpumask_t node_to_cpumask[];
#ifdef CONFIG_ACPI_NUMA
#define parent_node(node) (node)
#define node_to_first_cpu(node) (__ffs(node_to_cpumask[node]))
#define node_to_cpumask(node) (node_to_cpumask[node])
-#define pcibus_to_node(bus) pci_bus_to_node[(bus)->number]
+#define pcibus_to_node(bus) ((long)(bus->sysdata))
#define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus));
/* sched_domains SD_NODE_INIT for x86_64 machines */
struct vxtime_data {
long hpet_address; /* HPET base address */
- unsigned long hz; /* HPET clocks / sec */
int last;
unsigned long last_tsc;
long quot;
static inline int dmi_check_system(struct dmi_system_id *list) { return 0; }
static inline char * dmi_get_system_info(int field) { return NULL; }
-static struct dmi_device * dmi_find_device(int type, const char *name,
+static inline struct dmi_device * dmi_find_device(int type, const char *name,
struct dmi_device *from) { return NULL; }
#endif
extern void enable_irq(unsigned int irq);
#endif
+#ifndef __ARCH_SET_SOFTIRQ_PENDING
+#define set_softirq_pending(x) (local_softirq_pending() = (x))
+#define or_softirq_pending(x) (local_softirq_pending() |= (x))
+#endif
+
/*
* Temporary defines for UP kernels, until all code gets fixed.
*/
asmlinkage void do_softirq(void);
extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data);
extern void softirq_init(void);
-#define __raise_softirq_irqoff(nr) do { local_softirq_pending() |= 1UL << (nr); } while (0)
+#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
extern void FASTCALL(raise_softirq_irqoff(unsigned int nr));
extern void FASTCALL(raise_softirq(unsigned int nr));
short il_next;
#endif
#ifdef CONFIG_CPUSETS
- short cpuset_sem_nest_depth;
struct cpuset *cpuset;
nodemask_t mems_allowed;
int cpuset_mems_generation;
*/
static DECLARE_MUTEX(cpuset_sem);
+static struct task_struct *cpuset_sem_owner;
+static int cpuset_sem_depth;
/*
* The global cpuset semaphore cpuset_sem can be needed by the
static inline void cpuset_down(struct semaphore *psem)
{
- if (current->cpuset_sem_nest_depth == 0)
+ if (cpuset_sem_owner != current) {
down(psem);
- current->cpuset_sem_nest_depth++;
+ cpuset_sem_owner = current;
+ }
+ cpuset_sem_depth++;
}
static inline void cpuset_up(struct semaphore *psem)
{
- current->cpuset_sem_nest_depth--;
- if (current->cpuset_sem_nest_depth == 0)
+ if (--cpuset_sem_depth == 0) {
+ cpuset_sem_owner = NULL;
up(psem);
+ }
}
/*
cpu = smp_processor_id();
restart:
/* Reset the pending bitmask before enabling irqs */
- local_softirq_pending() = 0;
+ set_softirq_pending(0);
local_irq_enable();
config FRAME_POINTER
bool "Compile the kernel with frame pointers"
- depends on DEBUG_KERNEL && ((X86 && !X86_64) || CRIS || M68K || M68KNOMMU || FRV || UML)
+ depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML)
default y if DEBUG_INFO && UML
help
If you say Y here the resulting kernel image will be slightly larger
- and slower, but it will give very useful debugging information.
- If you don't debug the kernel, you can say N, but we may not be able
- to solve problems without frame pointers.
+ and slower, but it might give very useful debugging information
+ on some architectures or you use external debuggers.
+ If you don't debug the kernel, you can say N.
{
bootmem_data_t *bdata = pgdat->bdata;
unsigned long mapsize = ((end - start)+7)/8;
-
- pgdat->pgdat_next = pgdat_list;
- pgdat_list = pgdat;
+ static struct pglist_data *pgdat_last;
+
+ pgdat->pgdat_next = NULL;
+ /* Add new nodes last so that bootmem always starts
+ searching in the first nodes, not the last ones */
+ if (pgdat_last)
+ pgdat_last->pgdat_next = pgdat;
+ else {
+ pgdat_list = pgdat;
+ pgdat_last = pgdat;
+ }
mapsize = ALIGN(mapsize, sizeof(long));
bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);