]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
authorDavid Woodhouse <David.Woodhouse@intel.com>
Tue, 21 Oct 2008 18:42:20 +0000 (19:42 +0100)
committerDavid Woodhouse <David.Woodhouse@intel.com>
Tue, 21 Oct 2008 18:42:20 +0000 (19:42 +0100)
Conflicts:

drivers/pci/dmar.c

471 files changed:
CREDITS
Documentation/DocBook/kernel-hacking.tmpl
Documentation/MSI-HOWTO.txt
Documentation/PCI/pci.txt
Documentation/PCI/pcieaer-howto.txt
Documentation/kernel-parameters.txt
Documentation/markers.txt
Documentation/sysrq.txt
Documentation/tracepoints.txt [new file with mode: 0644]
Documentation/tracers/mmiotrace.txt
MAINTAINERS
arch/alpha/kernel/sys_sable.c
arch/arm/mach-iop13xx/include/mach/time.h
arch/arm/mach-ixp2000/ixdp2x00.c
arch/arm/mach-omap2/irq.c
arch/arm/mach-pxa/include/mach/zylonite.h
arch/arm/mach-sa1100/include/mach/ide.h [deleted file]
arch/avr32/mach-at32ap/extint.c
arch/ia64/include/asm/pci.h
arch/ia64/pci/pci.c
arch/m32r/kernel/smpboot.c
arch/parisc/Kconfig
arch/parisc/include/asm/Kbuild [moved from include/asm-parisc/Kbuild with 100% similarity]
arch/parisc/include/asm/agp.h [moved from include/asm-parisc/agp.h with 100% similarity]
arch/parisc/include/asm/asmregs.h [moved from include/asm-parisc/asmregs.h with 100% similarity]
arch/parisc/include/asm/assembly.h [moved from include/asm-parisc/assembly.h with 100% similarity]
arch/parisc/include/asm/atomic.h [moved from include/asm-parisc/atomic.h with 100% similarity]
arch/parisc/include/asm/auxvec.h [moved from include/asm-parisc/auxvec.h with 100% similarity]
arch/parisc/include/asm/bitops.h [moved from include/asm-parisc/bitops.h with 100% similarity]
arch/parisc/include/asm/bug.h [moved from include/asm-parisc/bug.h with 100% similarity]
arch/parisc/include/asm/bugs.h [moved from include/asm-parisc/bugs.h with 100% similarity]
arch/parisc/include/asm/byteorder.h [moved from include/asm-parisc/byteorder.h with 100% similarity]
arch/parisc/include/asm/cache.h [moved from include/asm-parisc/cache.h with 100% similarity]
arch/parisc/include/asm/cacheflush.h [moved from include/asm-parisc/cacheflush.h with 100% similarity]
arch/parisc/include/asm/checksum.h [moved from include/asm-parisc/checksum.h with 100% similarity]
arch/parisc/include/asm/compat.h [moved from include/asm-parisc/compat.h with 100% similarity]
arch/parisc/include/asm/compat_rt_sigframe.h [moved from include/asm-parisc/compat_rt_sigframe.h with 100% similarity]
arch/parisc/include/asm/compat_signal.h [moved from include/asm-parisc/compat_signal.h with 100% similarity]
arch/parisc/include/asm/compat_ucontext.h [moved from include/asm-parisc/compat_ucontext.h with 100% similarity]
arch/parisc/include/asm/cputime.h [moved from include/asm-parisc/cputime.h with 100% similarity]
arch/parisc/include/asm/current.h [moved from include/asm-parisc/current.h with 100% similarity]
arch/parisc/include/asm/delay.h [moved from include/asm-parisc/delay.h with 100% similarity]
arch/parisc/include/asm/device.h [moved from include/asm-parisc/device.h with 100% similarity]
arch/parisc/include/asm/div64.h [moved from include/asm-parisc/div64.h with 100% similarity]
arch/parisc/include/asm/dma-mapping.h [moved from include/asm-parisc/dma-mapping.h with 100% similarity]
arch/parisc/include/asm/dma.h [moved from include/asm-parisc/dma.h with 100% similarity]
arch/parisc/include/asm/eisa_bus.h [moved from include/asm-parisc/eisa_bus.h with 100% similarity]
arch/parisc/include/asm/eisa_eeprom.h [moved from include/asm-parisc/eisa_eeprom.h with 100% similarity]
arch/parisc/include/asm/elf.h [moved from include/asm-parisc/elf.h with 100% similarity]
arch/parisc/include/asm/emergency-restart.h [moved from include/asm-parisc/emergency-restart.h with 100% similarity]
arch/parisc/include/asm/errno.h [moved from include/asm-parisc/errno.h with 100% similarity]
arch/parisc/include/asm/fb.h [moved from include/asm-parisc/fb.h with 100% similarity]
arch/parisc/include/asm/fcntl.h [moved from include/asm-parisc/fcntl.h with 100% similarity]
arch/parisc/include/asm/fixmap.h [moved from include/asm-parisc/fixmap.h with 100% similarity]
arch/parisc/include/asm/floppy.h [moved from include/asm-parisc/floppy.h with 100% similarity]
arch/parisc/include/asm/futex.h [moved from include/asm-parisc/futex.h with 100% similarity]
arch/parisc/include/asm/grfioctl.h [moved from include/asm-parisc/grfioctl.h with 100% similarity]
arch/parisc/include/asm/hardirq.h [moved from include/asm-parisc/hardirq.h with 100% similarity]
arch/parisc/include/asm/hardware.h [moved from include/asm-parisc/hardware.h with 100% similarity]
arch/parisc/include/asm/hw_irq.h [moved from include/asm-parisc/hw_irq.h with 100% similarity]
arch/parisc/include/asm/ide.h [moved from include/asm-parisc/ide.h with 77% similarity]
arch/parisc/include/asm/io.h [moved from include/asm-parisc/io.h with 100% similarity]
arch/parisc/include/asm/ioctl.h [moved from include/asm-parisc/ioctl.h with 100% similarity]
arch/parisc/include/asm/ioctls.h [moved from include/asm-parisc/ioctls.h with 100% similarity]
arch/parisc/include/asm/ipcbuf.h [moved from include/asm-parisc/ipcbuf.h with 100% similarity]
arch/parisc/include/asm/irq.h [moved from include/asm-parisc/irq.h with 100% similarity]
arch/parisc/include/asm/irq_regs.h [moved from include/asm-parisc/irq_regs.h with 100% similarity]
arch/parisc/include/asm/kdebug.h [moved from include/asm-parisc/kdebug.h with 100% similarity]
arch/parisc/include/asm/kmap_types.h [moved from include/asm-parisc/kmap_types.h with 100% similarity]
arch/parisc/include/asm/led.h [moved from include/asm-parisc/led.h with 100% similarity]
arch/parisc/include/asm/linkage.h [moved from include/asm-parisc/linkage.h with 100% similarity]
arch/parisc/include/asm/local.h [moved from include/asm-parisc/local.h with 100% similarity]
arch/parisc/include/asm/machdep.h [moved from include/asm-parisc/machdep.h with 100% similarity]
arch/parisc/include/asm/mc146818rtc.h [moved from include/asm-parisc/mc146818rtc.h with 100% similarity]
arch/parisc/include/asm/mckinley.h [moved from include/asm-parisc/mckinley.h with 100% similarity]
arch/parisc/include/asm/mman.h [moved from include/asm-parisc/mman.h with 100% similarity]
arch/parisc/include/asm/mmu.h [moved from include/asm-parisc/mmu.h with 100% similarity]
arch/parisc/include/asm/mmu_context.h [moved from include/asm-parisc/mmu_context.h with 100% similarity]
arch/parisc/include/asm/mmzone.h [moved from include/asm-parisc/mmzone.h with 100% similarity]
arch/parisc/include/asm/module.h [moved from include/asm-parisc/module.h with 100% similarity]
arch/parisc/include/asm/msgbuf.h [moved from include/asm-parisc/msgbuf.h with 100% similarity]
arch/parisc/include/asm/mutex.h [moved from include/asm-parisc/mutex.h with 100% similarity]
arch/parisc/include/asm/page.h [moved from include/asm-parisc/page.h with 100% similarity]
arch/parisc/include/asm/param.h [moved from include/asm-parisc/param.h with 100% similarity]
arch/parisc/include/asm/parisc-device.h [moved from include/asm-parisc/parisc-device.h with 100% similarity]
arch/parisc/include/asm/parport.h [moved from include/asm-parisc/parport.h with 100% similarity]
arch/parisc/include/asm/pci.h [moved from include/asm-parisc/pci.h with 100% similarity]
arch/parisc/include/asm/pdc.h [moved from include/asm-parisc/pdc.h with 99% similarity]
arch/parisc/include/asm/pdc_chassis.h [moved from include/asm-parisc/pdc_chassis.h with 100% similarity]
arch/parisc/include/asm/pdcpat.h [moved from include/asm-parisc/pdcpat.h with 100% similarity]
arch/parisc/include/asm/percpu.h [moved from include/asm-parisc/percpu.h with 100% similarity]
arch/parisc/include/asm/perf.h [moved from include/asm-parisc/perf.h with 100% similarity]
arch/parisc/include/asm/pgalloc.h [moved from include/asm-parisc/pgalloc.h with 100% similarity]
arch/parisc/include/asm/pgtable.h [moved from include/asm-parisc/pgtable.h with 100% similarity]
arch/parisc/include/asm/poll.h [moved from include/asm-parisc/poll.h with 100% similarity]
arch/parisc/include/asm/posix_types.h [moved from include/asm-parisc/posix_types.h with 100% similarity]
arch/parisc/include/asm/prefetch.h [moved from include/asm-parisc/prefetch.h with 100% similarity]
arch/parisc/include/asm/processor.h [moved from include/asm-parisc/processor.h with 100% similarity]
arch/parisc/include/asm/psw.h [moved from include/asm-parisc/psw.h with 100% similarity]
arch/parisc/include/asm/ptrace.h [moved from include/asm-parisc/ptrace.h with 85% similarity]
arch/parisc/include/asm/real.h [moved from include/asm-parisc/real.h with 100% similarity]
arch/parisc/include/asm/resource.h [moved from include/asm-parisc/resource.h with 100% similarity]
arch/parisc/include/asm/ropes.h [moved from include/asm-parisc/ropes.h with 99% similarity]
arch/parisc/include/asm/rt_sigframe.h [moved from include/asm-parisc/rt_sigframe.h with 100% similarity]
arch/parisc/include/asm/rtc.h [moved from include/asm-parisc/rtc.h with 100% similarity]
arch/parisc/include/asm/runway.h [moved from include/asm-parisc/runway.h with 100% similarity]
arch/parisc/include/asm/scatterlist.h [moved from include/asm-parisc/scatterlist.h with 100% similarity]
arch/parisc/include/asm/sections.h [moved from include/asm-parisc/sections.h with 100% similarity]
arch/parisc/include/asm/segment.h [moved from include/asm-parisc/segment.h with 100% similarity]
arch/parisc/include/asm/sembuf.h [moved from include/asm-parisc/sembuf.h with 100% similarity]
arch/parisc/include/asm/serial.h [moved from include/asm-parisc/serial.h with 100% similarity]
arch/parisc/include/asm/setup.h [moved from include/asm-parisc/setup.h with 100% similarity]
arch/parisc/include/asm/shmbuf.h [moved from include/asm-parisc/shmbuf.h with 100% similarity]
arch/parisc/include/asm/shmparam.h [moved from include/asm-parisc/shmparam.h with 100% similarity]
arch/parisc/include/asm/sigcontext.h [moved from include/asm-parisc/sigcontext.h with 100% similarity]
arch/parisc/include/asm/siginfo.h [moved from include/asm-parisc/siginfo.h with 100% similarity]
arch/parisc/include/asm/signal.h [moved from include/asm-parisc/signal.h with 100% similarity]
arch/parisc/include/asm/smp.h [moved from include/asm-parisc/smp.h with 100% similarity]
arch/parisc/include/asm/socket.h [moved from include/asm-parisc/socket.h with 100% similarity]
arch/parisc/include/asm/sockios.h [moved from include/asm-parisc/sockios.h with 100% similarity]
arch/parisc/include/asm/spinlock.h [moved from include/asm-parisc/spinlock.h with 100% similarity]
arch/parisc/include/asm/spinlock_types.h [moved from include/asm-parisc/spinlock_types.h with 100% similarity]
arch/parisc/include/asm/stat.h [moved from include/asm-parisc/stat.h with 100% similarity]
arch/parisc/include/asm/statfs.h [moved from include/asm-parisc/statfs.h with 100% similarity]
arch/parisc/include/asm/string.h [moved from include/asm-parisc/string.h with 100% similarity]
arch/parisc/include/asm/superio.h [moved from include/asm-parisc/superio.h with 100% similarity]
arch/parisc/include/asm/system.h [moved from include/asm-parisc/system.h with 100% similarity]
arch/parisc/include/asm/termbits.h [moved from include/asm-parisc/termbits.h with 100% similarity]
arch/parisc/include/asm/termios.h [moved from include/asm-parisc/termios.h with 100% similarity]
arch/parisc/include/asm/thread_info.h [moved from include/asm-parisc/thread_info.h with 100% similarity]
arch/parisc/include/asm/timex.h [moved from include/asm-parisc/timex.h with 100% similarity]
arch/parisc/include/asm/tlb.h [moved from include/asm-parisc/tlb.h with 100% similarity]
arch/parisc/include/asm/tlbflush.h [moved from include/asm-parisc/tlbflush.h with 100% similarity]
arch/parisc/include/asm/topology.h [moved from include/asm-parisc/topology.h with 100% similarity]
arch/parisc/include/asm/traps.h [moved from include/asm-parisc/traps.h with 100% similarity]
arch/parisc/include/asm/types.h [moved from include/asm-parisc/types.h with 100% similarity]
arch/parisc/include/asm/uaccess.h [moved from include/asm-parisc/uaccess.h with 100% similarity]
arch/parisc/include/asm/ucontext.h [moved from include/asm-parisc/ucontext.h with 100% similarity]
arch/parisc/include/asm/unaligned.h [moved from include/asm-parisc/unaligned.h with 100% similarity]
arch/parisc/include/asm/unistd.h [moved from include/asm-parisc/unistd.h with 99% similarity]
arch/parisc/include/asm/unwind.h [moved from include/asm-parisc/unwind.h with 99% similarity]
arch/parisc/include/asm/user.h [moved from include/asm-parisc/user.h with 100% similarity]
arch/parisc/include/asm/vga.h [moved from include/asm-parisc/vga.h with 100% similarity]
arch/parisc/include/asm/xor.h [moved from include/asm-parisc/xor.h with 100% similarity]
arch/parisc/kernel/.gitignore [new file with mode: 0644]
arch/parisc/kernel/asm-offsets.c
arch/parisc/kernel/firmware.c
arch/parisc/kernel/head.S
arch/parisc/kernel/ptrace.c
arch/parisc/kernel/real2.S
arch/parisc/kernel/setup.c
arch/parisc/kernel/syscall_table.S
arch/parisc/kernel/time.c
arch/parisc/kernel/unwind.c
arch/powerpc/include/asm/page.h
arch/powerpc/include/asm/pci-bridge.h
arch/powerpc/include/asm/pci.h
arch/powerpc/include/asm/ptrace.h
arch/powerpc/kernel/pci-common.c
arch/powerpc/platforms/cell/spufs/sputrace.c
arch/x86/Kconfig
arch/x86/configs/i386_defconfig
arch/x86/kernel/Makefile
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/acpi/sleep.c
arch/x86/kernel/apic.c [moved from arch/x86/kernel/apic_32.c with 79% similarity]
arch/x86/kernel/apic_64.c [deleted file]
arch/x86/kernel/bios_uv.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/cpufreq/longhaul.c
arch/x86/kernel/cpu/cpufreq/powernow-k6.c
arch/x86/kernel/cpu/cpufreq/powernow-k7.c
arch/x86/kernel/cpu/cpufreq/powernow-k8.c
arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/mcheck/k7.c
arch/x86/kernel/cpu/mcheck/mce_32.c
arch/x86/kernel/cpu/mcheck/non-fatal.c
arch/x86/kernel/cpu/perfctr-watchdog.c
arch/x86/kernel/efi.c
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/ftrace.c
arch/x86/kernel/genapic_flat_64.c
arch/x86/kernel/genx2apic_uv_x.c
arch/x86/kernel/hpet.c
arch/x86/kernel/io_apic.c [moved from arch/x86/kernel/io_apic_64.c with 68% similarity]
arch/x86/kernel/io_apic_32.c [deleted file]
arch/x86/kernel/irq.c [new file with mode: 0644]
arch/x86/kernel/irq_32.c
arch/x86/kernel/irq_64.c
arch/x86/kernel/irqinit_32.c
arch/x86/kernel/irqinit_64.c
arch/x86/kernel/quirks.c
arch/x86/kernel/setup.c
arch/x86/kernel/setup_percpu.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/uv_irq.c [new file with mode: 0644]
arch/x86/kernel/uv_sysfs.c [new file with mode: 0644]
arch/x86/kernel/visws_quirks.c
arch/x86/kernel/vmiclock_32.c
arch/x86/lguest/boot.c
arch/x86/mach-generic/bigsmp.c
arch/x86/mach-generic/es7000.c
arch/x86/mach-generic/numaq.c
arch/x86/mach-generic/summit.c
arch/x86/mach-voyager/voyager_smp.c
arch/x86/mm/mmio-mod.c
arch/x86/mm/pf_in.c
arch/x86/mm/testmmiotrace.c
arch/x86/pci/irq.c
arch/x86/xen/irq.c
arch/x86/xen/spinlock.c
crypto/async_tx/async_tx.c
drivers/char/agp/ali-agp.c
drivers/char/agp/amd64-agp.c
drivers/char/agp/ati-agp.c
drivers/char/agp/backend.c
drivers/char/agp/intel-agp.c
drivers/char/agp/nvidia-agp.c
drivers/char/agp/parisc-agp.c
drivers/char/agp/via-agp.c
drivers/char/hpet.c
drivers/char/random.c
drivers/char/sysrq.c
drivers/char/vr41xx_giu.c
drivers/clocksource/acpi_pm.c
drivers/dma/Kconfig
drivers/dma/dmatest.c
drivers/dma/fsldma.c
drivers/dma/fsldma.h
drivers/dma/ioat_dma.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/Kconfig
drivers/gpu/drm/drm_proc.c
drivers/gpu/drm/i915/i915_gem.c
drivers/i2c/busses/i2c-amd756.c
drivers/i2c/busses/i2c-viapro.c
drivers/ide/Kconfig
drivers/ide/Makefile
drivers/ide/ide-atapi.c
drivers/ide/ide-cd.c
drivers/ide/ide-cd_ioctl.c
drivers/ide/ide-disk.c
drivers/ide/ide-disk.h
drivers/ide/ide-disk_ioctl.c
drivers/ide/ide-disk_proc.c
drivers/ide/ide-dma-sff.c
drivers/ide/ide-floppy.c
drivers/ide/ide-floppy.h
drivers/ide/ide-floppy_ioctl.c
drivers/ide/ide-floppy_proc.c
drivers/ide/ide-gd.c [new file with mode: 0644]
drivers/ide/ide-gd.h [new file with mode: 0644]
drivers/ide/ide-iops.c
drivers/ide/ide-probe.c
drivers/ide/ide-proc.c
drivers/ide/ide-tape.c
drivers/ide/pci/Makefile
drivers/ide/pci/delkin_cb.c
drivers/ide/pci/hpt34x.c [deleted file]
drivers/ide/pci/hpt366.c
drivers/ide/pci/scc_pata.c
drivers/ide/pci/sgiioc4.c
drivers/leds/Kconfig
drivers/mfd/asic3.c
drivers/mfd/htc-egpio.c
drivers/net/3c59x.c
drivers/net/hamradio/baycom_ser_fdx.c
drivers/net/hamradio/scc.c
drivers/net/usb/pegasus.c
drivers/net/wan/sbni.c
drivers/parisc/ccio-dma.c
drivers/parisc/dino.c
drivers/parisc/eisa.c
drivers/parisc/gsc.c
drivers/parisc/iosapic.c
drivers/parisc/superio.c
drivers/pci/bus.c
drivers/pci/dmar.c
drivers/pci/hotplug/ibmphp_ebda.c
drivers/pci/hotplug/pci_hotplug_core.c
drivers/pci/hotplug/pciehp.h
drivers/pci/hotplug/pciehp_core.c
drivers/pci/hotplug/pciehp_ctrl.c
drivers/pci/hotplug/pciehp_hpc.c
drivers/pci/hotplug/pciehp_pci.c
drivers/pci/hotplug/rpaphp.h
drivers/pci/hotplug/rpaphp_core.c
drivers/pci/hotplug/rpaphp_pci.c
drivers/pci/htirq.c
drivers/pci/intr_remapping.c
drivers/pci/msi.c
drivers/pci/pci-driver.c
drivers/pci/pci-sysfs.c
drivers/pci/pci.c
drivers/pci/pci.h
drivers/pci/pcie/aer/aerdrv.c
drivers/pci/pcie/aer/aerdrv_core.c
drivers/pci/pcie/aspm.c
drivers/pci/pcie/portdrv.h
drivers/pci/pcie/portdrv_core.c
drivers/pci/pcie/portdrv_pci.c
drivers/pci/probe.c
drivers/pci/quirks.c
drivers/pci/remove.c
drivers/pci/setup-bus.c
drivers/pci/setup-res.c
drivers/pci/slot.c
drivers/pcmcia/at91_cf.c
drivers/pcmcia/hd64465_ss.c
drivers/pcmcia/vrc4171_card.c
drivers/rtc/Kconfig
drivers/rtc/Makefile
drivers/rtc/rtc-parisc.c [new file with mode: 0644]
drivers/rtc/rtc-vr41xx.c
drivers/scsi/aha152x.c
drivers/scsi/ide-scsi.c
drivers/scsi/ipr.c
drivers/scsi/qla2xxx/qla_def.h
drivers/scsi/qla2xxx/qla_os.c
drivers/serial/68328serial.c
drivers/serial/8250.c
drivers/serial/amba-pl010.c
drivers/serial/amba-pl011.c
drivers/serial/cpm_uart/cpm_uart_core.c
drivers/serial/m32r_sio.c
drivers/serial/serial_core.c
drivers/serial/serial_lh7a40x.c
drivers/serial/sh-sci.c
drivers/serial/ucc_uart.c
drivers/uio/uio.c
drivers/usb/host/ehci-hcd.c
drivers/watchdog/ib700wdt.c
drivers/xen/events.c
fs/Kconfig
fs/binfmt_elf.c
fs/binfmt_elf_fdpic.c
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/fuse/inode.c
fs/proc/array.c
fs/proc/proc_misc.c
include/asm-frv/ide.h
include/asm-generic/bug.h
include/asm-generic/vmlinux.lds.h
include/asm-m68k/ide.h
include/asm-x86/apic.h
include/asm-x86/bigsmp/apic.h
include/asm-x86/efi.h
include/asm-x86/es7000/apic.h
include/asm-x86/ftrace.h
include/asm-x86/genapic_32.h
include/asm-x86/hpet.h
include/asm-x86/hw_irq.h
include/asm-x86/io_apic.h
include/asm-x86/irq_vectors.h
include/asm-x86/mach-default/entry_arch.h
include/asm-x86/mach-default/mach_apic.h
include/asm-x86/mach-generic/irq_vectors_limits.h [deleted file]
include/asm-x86/mach-generic/mach_apic.h
include/asm-x86/numaq/apic.h
include/asm-x86/summit/apic.h
include/asm-x86/summit/irq_vectors_limits.h [deleted file]
include/asm-x86/uv/bios.h
include/asm-x86/uv/uv_irq.h [new file with mode: 0644]
include/linux/aer.h
include/linux/clocksource.h
include/linux/compiler.h
include/linux/dmar.h
include/linux/efi.h
include/linux/ftrace.h
include/linux/fuse.h
include/linux/hrtimer.h
include/linux/ide.h
include/linux/init.h
include/linux/interrupt.h
include/linux/irq.h
include/linux/irqnr.h [new file with mode: 0644]
include/linux/kernel.h
include/linux/kernel_stat.h
include/linux/kprobes.h
include/linux/linkage.h
include/linux/marker.h
include/linux/mmiotrace.h
include/linux/module.h
include/linux/pci.h
include/linux/pci_ids.h
include/linux/pci_regs.h
include/linux/posix-timers.h
include/linux/ring_buffer.h [new file with mode: 0644]
include/linux/sched.h
include/linux/tick.h
include/linux/time.h
include/linux/timex.h
include/linux/tracepoint.h [new file with mode: 0644]
include/trace/sched.h [new file with mode: 0644]
init/Kconfig
init/main.c
kernel/Makefile
kernel/compat.c
kernel/exit.c
kernel/fork.c
kernel/hrtimer.c
kernel/irq/autoprobe.c
kernel/irq/chip.c
kernel/irq/handle.c
kernel/irq/internals.h
kernel/irq/manage.c
kernel/irq/migration.c
kernel/irq/proc.c
kernel/irq/resend.c
kernel/irq/spurious.c
kernel/itimer.c
kernel/kexec.c
kernel/kthread.c
kernel/marker.c
kernel/module.c
kernel/notifier.c
kernel/posix-cpu-timers.c
kernel/posix-timers.c
kernel/rcutorture.c
kernel/sched.c
kernel/sched_fair.c
kernel/sched_rt.c
kernel/sched_stats.h
kernel/signal.c
kernel/softirq.c
kernel/sys.c
kernel/time/clocksource.c
kernel/time/jiffies.c
kernel/time/ntp.c
kernel/time/tick-broadcast.c
kernel/time/tick-internal.h
kernel/time/tick-sched.c
kernel/time/timekeeping.c
kernel/time/timer_list.c
kernel/timer.c
kernel/trace/Kconfig
kernel/trace/Makefile
kernel/trace/ftrace.c
kernel/trace/ring_buffer.c [new file with mode: 0644]
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_boot.c [new file with mode: 0644]
kernel/trace/trace_functions.c
kernel/trace/trace_irqsoff.c
kernel/trace/trace_mmiotrace.c
kernel/trace/trace_nop.c [new file with mode: 0644]
kernel/trace/trace_sched_switch.c
kernel/trace/trace_sched_wakeup.c
kernel/trace/trace_selftest.c
kernel/trace/trace_stack.c [new file with mode: 0644]
kernel/trace/trace_sysprof.c
kernel/tracepoint.c [new file with mode: 0644]
mm/memory.c
mm/tiny-shmem.c
mm/vmalloc.c
samples/Kconfig
samples/Makefile
samples/markers/probe-example.c
samples/tracepoints/Makefile [new file with mode: 0644]
samples/tracepoints/tp-samples-trace.h [new file with mode: 0644]
samples/tracepoints/tracepoint-probe-sample.c [new file with mode: 0644]
samples/tracepoints/tracepoint-probe-sample2.c [new file with mode: 0644]
samples/tracepoints/tracepoint-sample.c [new file with mode: 0644]
scripts/Makefile.build
scripts/bootgraph.pl
scripts/checkpatch.pl
scripts/recordmcount.pl [new file with mode: 0755]
security/selinux/hooks.c

diff --git a/CREDITS b/CREDITS
index c62dcb3b7e2621d918815a656f2682fda044afcb..2358846f06be53807ccbd54b2e3615eb5814c4dc 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -1653,14 +1653,14 @@ S: Chapel Hill, North Carolina 27514-4818
 S: USA
 
 N: Dave Jones
-E: davej@codemonkey.org.uk
+E: davej@redhat.com
 W: http://www.codemonkey.org.uk
-D: x86 errata/setup maintenance.
-D: AGPGART driver.
+D: Assorted VIA x86 support.
+D: 2.5 AGPGART overhaul.
 D: CPUFREQ maintenance.
-D: Backport/Forwardport merge monkey.
-D: Various Janitor work.
-S: United Kingdom
+D: Fedora kernel maintainence.
+D: Misc/Other.
+S: 314 Littleton Rd, Westford, MA 01886, USA
 
 N: Martin Josfsson
 E: gandalf@wlug.westbo.se
index 4c63e5864160f159fd073039ba80efefb161ff68..ae15d55350ec8bf93e2eb79dac2ee730cd6de054 100644 (file)
@@ -1105,7 +1105,7 @@ static struct block_device_operations opt_fops = {
     </listitem>
     <listitem>
      <para>
-      Function names as strings (__FUNCTION__).
+      Function names as strings (__func__).
      </para>
     </listitem>
     <listitem>
index a51f693c15419e9b2da87df025b69d898f648b51..256defd7e1742be45497e1bcc8912d493241c5af 100644 (file)
@@ -236,10 +236,8 @@ software system can set different pages for controlling accesses to the
 MSI-X structure. The implementation of MSI support requires the PCI
 subsystem, not a device driver, to maintain full control of the MSI-X
 table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
-table/MSI-X PBA.  A device driver is prohibited from requesting the MMIO
-address space of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem
-will fail enabling MSI-X on its hardware device when it calls the function
-pci_enable_msix().
+table/MSI-X PBA.  A device driver should not access the MMIO address
+space of the MSI-X table/MSI-X PBA.
 
 5.3.2 API pci_enable_msix
 
index 8d4dc6250c582821ccca5b89127833e2b14bd4ee..fd4907a2968cb3fab71e92286c24da0244dcfcae 100644 (file)
@@ -163,6 +163,10 @@ need pass only as many optional fields as necessary:
        o class and classmask fields default to 0
        o driver_data defaults to 0UL.
 
+Note that driver_data must match the value used by any of the pci_device_id
+entries defined in the driver. This makes the driver_data field mandatory
+if all the pci_device_id entries have a non-zero driver_data value.
+
 Once added, the driver probe routine will be invoked for any unclaimed
 PCI devices listed in its (newly updated) pci_ids list.
 
index 16c251230c82398a1ad26a3754762e49fc819922..ddeb14beacc8fe8592334ba11303ef6fcd2a7404 100644 (file)
@@ -203,22 +203,17 @@ to mmio_enabled.
 
 3.3 helper functions
 
-3.3.1 int pci_find_aer_capability(struct pci_dev *dev);
-pci_find_aer_capability locates the PCI Express AER capability
-in the device configuration space. If the device doesn't support
-PCI-Express AER, the function returns 0.
-
-3.3.2 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
+3.3.1 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
 pci_enable_pcie_error_reporting enables the device to send error
 messages to root port when an error is detected. Note that devices
 don't enable the error reporting by default, so device drivers need
 call this function to enable it.
 
-3.3.3 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
+3.3.2 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
 pci_disable_pcie_error_reporting disables the device to send error
 messages to root port when an error is detected.
 
-3.3.4 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
+3.3.3 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
 pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
 error status register.
 
index 0f1544f67400b4288887649bf73b9fb09449b92c..53ba7c7d82b342d50053f6458225b29123bee747 100644 (file)
@@ -101,6 +101,7 @@ parameter is applicable:
        X86-64  X86-64 architecture is enabled.
                        More X86-64 boot options can be found in
                        Documentation/x86_64/boot-options.txt .
+       X86     Either 32bit or 64bit x86 (same as X86-32+X86-64)
 
 In addition, the following text indicates that the option:
 
@@ -1588,7 +1589,7 @@ and is between 256 and 4096 characters. It is defined in the file
                        See also Documentation/paride.txt.
 
        pci=option[,option...]  [PCI] various PCI subsystem options:
-               off             [X86-32] don't probe for the PCI bus
+               off             [X86] don't probe for the PCI bus
                bios            [X86-32] force use of PCI BIOS, don't access
                                the hardware directly. Use this if your machine
                                has a non-standard PCI host bridge.
@@ -1596,9 +1597,9 @@ and is between 256 and 4096 characters. It is defined in the file
                                hardware access methods are allowed. Use this
                                if you experience crashes upon bootup and you
                                suspect they are caused by the BIOS.
-               conf1           [X86-32] Force use of PCI Configuration
+               conf1           [X86] Force use of PCI Configuration
                                Mechanism 1.
-               conf2           [X86-32] Force use of PCI Configuration
+               conf2           [X86] Force use of PCI Configuration
                                Mechanism 2.
                noaer           [PCIE] If the PCIEAER kernel config parameter is
                                enabled, this kernel boot option can be used to
@@ -1618,37 +1619,37 @@ and is between 256 and 4096 characters. It is defined in the file
                                this option if the kernel is unable to allocate
                                IRQs or discover secondary PCI buses on your
                                motherboard.
-               rom             [X86-32] Assign address space to expansion ROMs.
+               rom             [X86] Assign address space to expansion ROMs.
                                Use with caution as certain devices share
                                address decoders between ROMs and other
                                resources.
-               norom           [X86-32,X86_64] Do not assign address space to
+               norom           [X86] Do not assign address space to
                                expansion ROMs that do not already have
                                BIOS assigned address ranges.
-               irqmask=0xMMMM  [X86-32] Set a bit mask of IRQs allowed to be
+               irqmask=0xMMMM  [X86] Set a bit mask of IRQs allowed to be
                                assigned automatically to PCI devices. You can
                                make the kernel exclude IRQs of your ISA cards
                                this way.
-               pirqaddr=0xAAAAA        [X86-32] Specify the physical address
+               pirqaddr=0xAAAAA        [X86] Specify the physical address
                                of the PIRQ table (normally generated
                                by the BIOS) if it is outside the
                                F0000h-100000h range.
-               lastbus=N       [X86-32] Scan all buses thru bus #N. Can be
+               lastbus=N       [X86] Scan all buses thru bus #N. Can be
                                useful if the kernel is unable to find your
                                secondary buses and you want to tell it
                                explicitly which ones they are.
-               assign-busses   [X86-32] Always assign all PCI bus
+               assign-busses   [X86] Always assign all PCI bus
                                numbers ourselves, overriding
                                whatever the firmware may have done.
-               usepirqmask     [X86-32] Honor the possible IRQ mask stored
+               usepirqmask     [X86] Honor the possible IRQ mask stored
                                in the BIOS $PIR table. This is needed on
                                some systems with broken BIOSes, notably
                                some HP Pavilion N5400 and Omnibook XE3
                                notebooks. This will have no effect if ACPI
                                IRQ routing is enabled.
-               noacpi          [X86-32] Do not use ACPI for IRQ routing
+               noacpi          [X86] Do not use ACPI for IRQ routing
                                or for PCI scanning.
-               use_crs         [X86-32] Use _CRS for PCI resource
+               use_crs         [X86] Use _CRS for PCI resource
                                allocation.
                routeirq        Do IRQ routing for all PCI devices.
                                This is normally done in pci_enable_device(),
@@ -1677,6 +1678,12 @@ and is between 256 and 4096 characters. It is defined in the file
                                reserved for the CardBus bridge's memory
                                window. The default value is 64 megabytes.
 
+       pcie_aspm=      [PCIE] Forcibly enable or disable PCIe Active State Power
+                       Management.
+               off     Disable ASPM.
+               force   Enable ASPM even on devices that claim not to support it.
+                       WARNING: Forcing ASPM on may cause system lockups.
+
        pcmv=           [HW,PCMCIA] BadgePAD 4
 
        pd.             [PARIDE]
index d9f50a19fa0c48185968e875aa2c9163bae3e6fa..089f6138fcd94249a6444ca3a932a50c263098e1 100644 (file)
@@ -50,10 +50,12 @@ Connecting a function (probe) to a marker is done by providing a probe (function
 to call) for the specific marker through marker_probe_register() and can be
 activated by calling marker_arm(). Marker deactivation can be done by calling
 marker_disarm() as many times as marker_arm() has been called. Removing a probe
-is done through marker_probe_unregister(); it will disarm the probe and make
-sure there is no caller left using the probe when it returns. Probe removal is
-preempt-safe because preemption is disabled around the probe call. See the
-"Probe example" section below for a sample probe module.
+is done through marker_probe_unregister(); it will disarm the probe.
+marker_synchronize_unregister() must be called before the end of the module exit
+function to make sure there is no caller left using the probe. This, and the
+fact that preemption is disabled around the probe call, make sure that probe
+removal and module unload are safe. See the "Probe example" section below for a
+sample probe module.
 
 The marker mechanism supports inserting multiple instances of the same marker.
 Markers can be put in inline functions, inlined static functions, and
index 49378a9f2b5f276c4a050e76b0898c25a74303e0..10a0263ebb3f01e832c7827cc75d7fe54b341a6f 100644 (file)
@@ -95,8 +95,9 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'p'     - Will dump the current registers and flags to your console.
 
-'q'     - Will dump a list of all running hrtimers.
-         WARNING: Does not cover any other timers
+'q'     - Will dump per CPU lists of all armed hrtimers (but NOT regular
+          timer_list timers) and detailed information about all
+          clockevent devices.
 
 'r'     - Turns off keyboard raw mode and sets it to XLATE.
 
diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt
new file mode 100644 (file)
index 0000000..5d354e1
--- /dev/null
@@ -0,0 +1,101 @@
+                    Using the Linux Kernel Tracepoints
+
+                           Mathieu Desnoyers
+
+
+This document introduces Linux Kernel Tracepoints and their use. It provides
+examples of how to insert tracepoints in the kernel and connect probe functions
+to them and provides some examples of probe functions.
+
+
+* Purpose of tracepoints
+
+A tracepoint placed in code provides a hook to call a function (probe) that you
+can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or
+"off" (no probe is attached). When a tracepoint is "off" it has no effect,
+except for adding a tiny time penalty (checking a condition for a branch) and
+space penalty (adding a few bytes for the function call at the end of the
+instrumented function and adds a data structure in a separate section).  When a
+tracepoint is "on", the function you provide is called each time the tracepoint
+is executed, in the execution context of the caller. When the function provided
+ends its execution, it returns to the caller (continuing from the tracepoint
+site).
+
+You can put tracepoints at important locations in the code. They are
+lightweight hooks that can pass an arbitrary number of parameters,
+which prototypes are described in a tracepoint declaration placed in a header
+file.
+
+They can be used for tracing and performance accounting.
+
+
+* Usage
+
+Two elements are required for tracepoints :
+
+- A tracepoint definition, placed in a header file.
+- The tracepoint statement, in C code.
+
+In order to use tracepoints, you should include linux/tracepoint.h.
+
+In include/trace/subsys.h :
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(subsys_eventname,
+       TPPTOTO(int firstarg, struct task_struct *p),
+       TPARGS(firstarg, p));
+
+In subsys/file.c (where the tracing statement must be added) :
+
+#include <trace/subsys.h>
+
+void somefct(void)
+{
+       ...
+       trace_subsys_eventname(arg, task);
+       ...
+}
+
+Where :
+- subsys_eventname is an identifier unique to your event
+    - subsys is the name of your subsystem.
+    - eventname is the name of the event to trace.
+- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function
+  called by this tracepoint.
+- TPARGS(firstarg, p) are the parameters names, same as found in the prototype.
+
+Connecting a function (probe) to a tracepoint is done by providing a probe
+(function to call) for the specific tracepoint through
+register_trace_subsys_eventname().  Removing a probe is done through
+unregister_trace_subsys_eventname(); it will remove the probe sure there is no
+caller left using the probe when it returns. Probe removal is preempt-safe
+because preemption is disabled around the probe call. See the "Probe example"
+section below for a sample probe module.
+
+The tracepoint mechanism supports inserting multiple instances of the same
+tracepoint, but a single definition must be made of a given tracepoint name over
+all the kernel to make sure no type conflict will occur. Name mangling of the
+tracepoints is done using the prototypes to make sure typing is correct.
+Verification of probe type correctness is done at the registration site by the
+compiler. Tracepoints can be put in inline functions, inlined static functions,
+and unrolled loops as well as regular functions.
+
+The naming scheme "subsys_event" is suggested here as a convention intended
+to limit collisions. Tracepoint names are global to the kernel: they are
+considered as being the same whether they are in the core kernel image or in
+modules.
+
+
+* Probe / tracepoint example
+
+See the example provided in samples/tracepoints/src
+
+Compile them with your kernel.
+
+Run, as root :
+modprobe tracepoint-example (insmod order is not important)
+modprobe tracepoint-probe-example
+cat /proc/tracepoint-example (returns an expected error)
+rmmod tracepoint-example tracepoint-probe-example
+dmesg
index a4afb560a45bfa9c7429f18d54d0f8b4c194b116..5bbbe2096223f69bc1f0d7007f6d07a3385e4fd4 100644 (file)
@@ -36,7 +36,7 @@ $ mount -t debugfs debugfs /debug
 $ echo mmiotrace > /debug/tracing/current_tracer
 $ cat /debug/tracing/trace_pipe > mydump.txt &
 Start X or whatever.
-$ echo "X is up" > /debug/tracing/marker
+$ echo "X is up" > /debug/tracing/trace_marker
 $ echo none > /debug/tracing/current_tracer
 Check for lost events.
 
@@ -59,9 +59,8 @@ The 'cat' process should stay running (sleeping) in the background.
 Load the driver you want to trace and use it. Mmiotrace will only catch MMIO
 accesses to areas that are ioremapped while mmiotrace is active.
 
-[Unimplemented feature:]
 During tracing you can place comments (markers) into the trace by
-$ echo "X is up" > /debug/tracing/marker
+$ echo "X is up" > /debug/tracing/trace_marker
 This makes it easier to see which part of the (huge) trace corresponds to
 which action. It is recommended to place descriptive markers about what you
 do.
index 22303e5fe4ce41986bc14a6b2a68cc20b418f845..6d51f00dcdc0ba8349469d04005bb8435463f8e2 100644 (file)
@@ -1198,7 +1198,7 @@ S:        Maintained
 
 CPU FREQUENCY DRIVERS
 P:     Dave Jones
-M:     davej@codemonkey.org.uk
+M:     davej@redhat.com
 L:     cpufreq@vger.kernel.org
 W:     http://www.codemonkey.org.uk/projects/cpufreq/
 T:     git kernel.org/pub/scm/linux/kernel/git/davej/cpufreq.git
index 99a7f19da13aae935ae8b25ca9a590e8d5e02272..a4555f497639fa0c68e7da3e0284ce221005cee9 100644 (file)
@@ -47,7 +47,7 @@ typedef struct irq_swizzle_struct
 
 static irq_swizzle_t *sable_lynx_irq_swizzle;
 
-static void sable_lynx_init_irq(int nr_irqs);
+static void sable_lynx_init_irq(int nr_of_irqs);
 
 #if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SABLE)
 
@@ -530,11 +530,11 @@ sable_lynx_srm_device_interrupt(unsigned long vector)
 }
 
 static void __init
-sable_lynx_init_irq(int nr_irqs)
+sable_lynx_init_irq(int nr_of_irqs)
 {
        long i;
 
-       for (i = 0; i < nr_irqs; ++i) {
+       for (i = 0; i < nr_of_irqs; ++i) {
                irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
                irq_desc[i].chip = &sable_lynx_irq_type;
        }
index 49213d9d7cad5c6538f4999de573fe2d50e2b82d..d6d52527589dc7c6ad1f45053b8f2a08e7d3dfc6 100644 (file)
@@ -41,7 +41,7 @@ static inline unsigned long iop13xx_core_freq(void)
                return 1200000000;
        default:
                printk("%s: warning unknown frequency, defaulting to 800Mhz\n",
-                       __FUNCTION__);
+                       __func__);
        }
 
        return 800000000;
@@ -60,7 +60,7 @@ static inline unsigned long iop13xx_xsi_bus_ratio(void)
                return 4;
        default:
                printk("%s: warning unknown ratio, defaulting to 2\n",
-                       __FUNCTION__);
+                       __func__);
        }
 
        return 2;
index b0653a87159a89b36b67da7674b8163e80439f0e..30451300751beb360ca41cf6e6f46814db1215f9 100644 (file)
@@ -143,7 +143,7 @@ static struct irq_chip ixdp2x00_cpld_irq_chip = {
        .unmask = ixdp2x00_irq_unmask
 };
 
-void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_irqs)
+void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_of_irqs)
 {
        unsigned int irq;
 
@@ -154,7 +154,7 @@ void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigne
 
        board_irq_stat = stat_reg;
        board_irq_mask = mask_reg;
-       board_irq_count = nr_irqs;
+       board_irq_count = nr_of_irqs;
 
        *board_irq_mask = 0xffffffff;
 
index d354e0fe4477ad450668b16e462258e26b2b4853..c40fc378a251244bce6e6c0ad9f0e177c7326a75 100644 (file)
@@ -119,7 +119,7 @@ static void __init omap_irq_bank_init_one(struct omap_irq_bank *bank)
 
 void __init omap_init_irq(void)
 {
-       unsigned long nr_irqs = 0;
+       unsigned long nr_of_irqs = 0;
        unsigned int nr_banks = 0;
        int i;
 
@@ -133,14 +133,14 @@ void __init omap_init_irq(void)
 
                omap_irq_bank_init_one(bank);
 
-               nr_irqs += bank->nr_irqs;
+               nr_of_irqs += bank->nr_irqs;
                nr_banks++;
        }
 
        printk(KERN_INFO "Total of %ld interrupts on %d active controller%s\n",
-              nr_irqs, nr_banks, nr_banks > 1 ? "s" : "");
+              nr_of_irqs, nr_banks, nr_banks > 1 ? "s" : "");
 
-       for (i = 0; i < nr_irqs; i++) {
+       for (i = 0; i < nr_of_irqs; i++) {
                set_irq_chip(i, &omap_irq_chip);
                set_irq_handler(i, handle_level_irq);
                set_irq_flags(i, IRQF_VALID);
index 0d35ca04731e485fa655c5dccd087c59fd58fb0d..bf6785adccf45a02e725422994d17d77138d2bf9 100644 (file)
@@ -30,7 +30,7 @@ extern void zylonite_pxa300_init(void);
 static inline void zylonite_pxa300_init(void)
 {
        if (cpu_is_pxa300() || cpu_is_pxa310())
-               panic("%s: PXA300/PXA310 not supported\n", __FUNCTION__);
+               panic("%s: PXA300/PXA310 not supported\n", __func__);
 }
 #endif
 
@@ -40,7 +40,7 @@ extern void zylonite_pxa320_init(void);
 static inline void zylonite_pxa320_init(void)
 {
        if (cpu_is_pxa320())
-               panic("%s: PXA320 not supported\n", __FUNCTION__);
+               panic("%s: PXA320 not supported\n", __func__);
 }
 #endif
 
diff --git a/arch/arm/mach-sa1100/include/mach/ide.h b/arch/arm/mach-sa1100/include/mach/ide.h
deleted file mode 100644 (file)
index 4c99c8f..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * arch/arm/mach-sa1100/include/mach/ide.h
- *
- * Copyright (c) 1998 Hugo Fiennes & Nicolas Pitre
- *
- * 18-aug-2000: Cleanup by Erik Mouw (J.A.K.Mouw@its.tudelft.nl)
- *              Get rid of the special ide_init_hwif_ports() functions
- *              and make a generalised function that can be used by all
- *              architectures.
- */
-
-#include <asm/irq.h>
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-
-#error "This code is broken and needs update to match with current ide support"
-
-
-/*
- * Set up a hw structure for a specified data port, control port and IRQ.
- * This should follow whatever the default interface uses.
- */
-static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
-                                      unsigned long ctrl_port, int *irq)
-{
-       unsigned long reg = data_port;
-       int i;
-       int regincr = 1;
-
-       /* The Empeg board has the first two address lines unused */
-       if (machine_is_empeg())
-               regincr = 1 << 2;
-
-       /* The LART doesn't use A0 for IDE */
-       if (machine_is_lart())
-               regincr = 1 << 1;
-
-       memset(hw, 0, sizeof(*hw));
-
-       for (i = 0; i <= 7; i++) {
-               hw->io_ports_array[i] = reg;
-               reg += regincr;
-       }
-
-       hw->io_ports.ctl_addr = ctrl_port;
-
-       if (irq)
-               *irq = 0;
-}
-
-/*
- * This registers the standard ports for this architecture with the IDE
- * driver.
- */
-static __inline__ void
-ide_init_default_hwifs(void)
-{
-    if (machine_is_lart()) {
-#ifdef CONFIG_SA1100_LART
-        hw_regs_t hw;
-
-        /* Enable GPIO as interrupt line */
-        GPDR &= ~LART_GPIO_IDE;
-       set_irq_type(LART_IRQ_IDE, IRQ_TYPE_EDGE_RISING);
-
-        /* set PCMCIA interface timing */
-        MECR = 0x00060006;
-
-        /* init the interface */
-       ide_init_hwif_ports(&hw, PCMCIA_IO_0_BASE + 0x0000, PCMCIA_IO_0_BASE + 0x1000, NULL);
-        hw.irq = LART_IRQ_IDE;
-        ide_register_hw(&hw);
-#endif
-    }
-}
index c36a6d59d6f0785e900a7e905189e577bd4693bc..310477ba1bbf170a47af4e68bfc4bfa26d836a8d 100644 (file)
@@ -191,7 +191,7 @@ static int __init eic_probe(struct platform_device *pdev)
        struct eic *eic;
        struct resource *regs;
        unsigned int i;
-       unsigned int nr_irqs;
+       unsigned int nr_of_irqs;
        unsigned int int_irq;
        int ret;
        u32 pattern;
@@ -224,7 +224,7 @@ static int __init eic_probe(struct platform_device *pdev)
        eic_writel(eic, IDR, ~0UL);
        eic_writel(eic, MODE, ~0UL);
        pattern = eic_readl(eic, MODE);
-       nr_irqs = fls(pattern);
+       nr_of_irqs = fls(pattern);
 
        /* Trigger on low level unless overridden by driver */
        eic_writel(eic, EDGE, 0UL);
@@ -232,7 +232,7 @@ static int __init eic_probe(struct platform_device *pdev)
 
        eic->chip = &eic_chip;
 
-       for (i = 0; i < nr_irqs; i++) {
+       for (i = 0; i < nr_of_irqs; i++) {
                set_irq_chip_and_handler(eic->first_irq + i, &eic_chip,
                                         handle_level_irq);
                set_irq_chip_data(eic->first_irq + i, eic);
@@ -256,7 +256,7 @@ static int __init eic_probe(struct platform_device *pdev)
                 eic->regs, int_irq);
        dev_info(&pdev->dev,
                 "Handling %u external IRQs, starting with IRQ %u\n",
-                nr_irqs, eic->first_irq);
+                nr_of_irqs, eic->first_irq);
 
        return 0;
 
index 0149097b736d70bd5e4d2f73801b5805ffe8a88a..ce342fb74246240d14c789a8cc4f40ad182552c9 100644 (file)
@@ -95,16 +95,8 @@ extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
                                enum pci_mmap_state mmap_state, int write_combine);
 #define HAVE_PCI_LEGACY
 extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
-                                     struct vm_area_struct *vma);
-extern ssize_t pci_read_legacy_io(struct kobject *kobj,
-                                 struct bin_attribute *bin_attr,
-                                 char *buf, loff_t off, size_t count);
-extern ssize_t pci_write_legacy_io(struct kobject *kobj,
-                                  struct bin_attribute *bin_attr,
-                                  char *buf, loff_t off, size_t count);
-extern int pci_mmap_legacy_mem(struct kobject *kobj,
-                              struct bin_attribute *attr,
-                              struct vm_area_struct *vma);
+                                     struct vm_area_struct *vma,
+                                     enum pci_mmap_state mmap_state);
 
 #define pci_get_legacy_mem platform_pci_get_legacy_mem
 #define pci_legacy_read platform_pci_legacy_read
index 7545037a86254f1f5da925f9b96f80fafcfc1a33..211fcfd115f91f1e6ec169e8abd9849c257b975f 100644 (file)
@@ -614,12 +614,17 @@ char *ia64_pci_get_legacy_mem(struct pci_bus *bus)
  * vector to get the base address.
  */
 int
-pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma)
+pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
+                          enum pci_mmap_state mmap_state)
 {
        unsigned long size = vma->vm_end - vma->vm_start;
        pgprot_t prot;
        char *addr;
 
+       /* We only support mmap'ing of legacy memory space */
+       if (mmap_state != pci_mmap_mem)
+               return -ENOSYS;
+
        /*
         * Avoid attribute aliasing.  See Documentation/ia64/aliasing.txt
         * for more details.
index fc2994811f150c991986b6294538ca5b9c6a64ab..39cb6da72dcbef3358d04e2b419a150f88c299ba 100644 (file)
@@ -40,6 +40,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/cpu.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
index 2bd1f6ef5db0c6bd45269701b36b4713c0a6def3..644a70b1b04e4fd9ecf16d2e73d6e22ea5227115 100644 (file)
@@ -9,6 +9,8 @@ config PARISC
        def_bool y
        select HAVE_IDE
        select HAVE_OPROFILE
+       select RTC_CLASS
+       select RTC_DRV_PARISC
        help
          The PA-RISC microprocessor is designed by Hewlett-Packard and used
          in many of their workstations & servers (HP9000 700 and 800 series,
similarity index 77%
rename from include/asm-parisc/ide.h
rename to arch/parisc/include/asm/ide.h
index c246ef75017db7fc048699577c0603121bb19f51..81700a2321cff3926bbf60320a2bed1cba248118 100644 (file)
 
 #ifdef __KERNEL__
 
-#define ide_request_irq(irq,hand,flg,dev,id)   request_irq((irq),(hand),(flg),(dev),(id))
-#define ide_free_irq(irq,dev_id)               free_irq((irq), (dev_id))
-#define ide_request_region(from,extent,name)   request_region((from), (extent), (name))
-#define ide_release_region(from,extent)                release_region((from), (extent))
 /* Generic I/O and MEMIO string operations.  */
 
 #define __ide_insw     insw
similarity index 99%
rename from include/asm-parisc/pdc.h
rename to arch/parisc/include/asm/pdc.h
index 9eaa794c3e4a3f1de70716e450896e1cddf8d570..c584b00c6074af9419ac31badf1e432f38262815 100644 (file)
 #define BOOT_CONSOLE_SPA_OFFSET  0x3c4
 #define BOOT_CONSOLE_PATH_OFFSET 0x3a8
 
+/* size of the pdc_result buffer for firmware.c */
+#define NUM_PDC_RESULT 32
+
 #if !defined(__ASSEMBLY__)
 #ifdef __KERNEL__
 
@@ -600,6 +603,7 @@ int pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_info, unsi
 int pdc_chassis_disp(unsigned long disp);
 int pdc_chassis_warn(unsigned long *warn);
 int pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info);
+int pdc_coproc_cfg_unlocked(struct pdc_coproc_cfg *pdc_coproc_info);
 int pdc_iodc_read(unsigned long *actcnt, unsigned long hpa, unsigned int index,
                  void *iodc_data, unsigned int iodc_data_size);
 int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info,
@@ -638,6 +642,7 @@ int pdc_mem_mem_table(struct pdc_memory_table_raddr *r_addr,
 #endif
 
 void set_firmware_width(void);
+void set_firmware_width_unlocked(void);
 int pdc_do_firm_test_reset(unsigned long ftc_bitmap);
 int pdc_do_reset(void);
 int pdc_soft_power_info(unsigned long *power_reg);
similarity index 85%
rename from include/asm-parisc/ptrace.h
rename to arch/parisc/include/asm/ptrace.h
index 3e94c5d85ff5715a3d95af8e6444b27ae081a1fd..afa5333187b4519126d2556b008e9fdfaba0fde5 100644 (file)
@@ -47,6 +47,16 @@ struct pt_regs {
 
 #define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
 
+#define __ARCH_WANT_COMPAT_SYS_PTRACE
+
+struct task_struct;
+#define arch_has_single_step() 1
+void user_disable_single_step(struct task_struct *task);
+void user_enable_single_step(struct task_struct *task);
+
+#define arch_has_block_step()  1
+void user_enable_block_step(struct task_struct *task);
+
 /* XXX should we use iaoq[1] or iaoq[0] ? */
 #define user_mode(regs)                        (((regs)->iaoq[0] & 3) ? 1 : 0)
 #define user_space(regs)               (((regs)->iasq[1] != 0) ? 1 : 0)
similarity index 99%
rename from include/asm-parisc/ropes.h
rename to arch/parisc/include/asm/ropes.h
index 007a880615eb80d766863f8e0b3708750c2ebbef..09f51d5ab57c254d78693b633d9077fd26454856 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _ASM_PARISC_ROPES_H_
 #define _ASM_PARISC_ROPES_H_
 
-#include <asm-parisc/parisc-device.h>
+#include <asm/parisc-device.h>
 
 #ifdef CONFIG_64BIT
 /* "low end" PA8800 machines use ZX1 chipset: PAT PDC and only run 64-bit */
similarity index 99%
rename from include/asm-parisc/unistd.h
rename to arch/parisc/include/asm/unistd.h
index a7d857f0e4f404963c86f2baeb240f80d7a6f110..ef26b009dc5da1f323a7fc77d329e6c520b5e40f 100644 (file)
 #define __NR_timerfd_create    (__NR_Linux + 306)
 #define __NR_timerfd_settime   (__NR_Linux + 307)
 #define __NR_timerfd_gettime   (__NR_Linux + 308)
-
-#define __NR_Linux_syscalls    (__NR_timerfd_gettime + 1)
+#define __NR_signalfd4         (__NR_Linux + 309)
+#define __NR_eventfd2          (__NR_Linux + 310)
+#define __NR_epoll_create1     (__NR_Linux + 311)
+#define __NR_dup3              (__NR_Linux + 312)
+#define __NR_pipe2             (__NR_Linux + 313)
+#define __NR_inotify_init1     (__NR_Linux + 314)
+
+#define __NR_Linux_syscalls    (__NR_inotify_init1 + 1)
 
 
 #define __IGNORE_select                /* newselect */
similarity index 99%
rename from include/asm-parisc/unwind.h
rename to arch/parisc/include/asm/unwind.h
index 2f7e6e50a1580de34d23216018cc015d1bf7780f..52482e4fc20d1f11407243915968523160839fbb 100644 (file)
@@ -74,4 +74,6 @@ void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *r
 int unwind_once(struct unwind_frame_info *info);
 int unwind_to_user(struct unwind_frame_info *info);
 
+int unwind_init(void);
+
 #endif
diff --git a/arch/parisc/kernel/.gitignore b/arch/parisc/kernel/.gitignore
new file mode 100644 (file)
index 0000000..c5f676c
--- /dev/null
@@ -0,0 +1 @@
+vmlinux.lds
index 3efc0b73e4ff9ce71efd7bf93550af12f1710394..699cf8ef211816576c5ee10d29c7bc67257580fc 100644 (file)
@@ -290,5 +290,8 @@ int main(void)
        DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip));
        DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space));
        DEFINE(EXCDATA_ADDR, offsetof(struct exception_data, fault_addr));
+       BLANK();
+       DEFINE(ASM_PDC_RESULT_SIZE, NUM_PDC_RESULT * sizeof(unsigned long));
+       BLANK();
        return 0;
 }
index 7177a6cd1b7f58b0fcc83f60ea5c11cf1bd96e1a..03f26bd75bd8ebcb301beb7a7e90c07454d08412 100644 (file)
@@ -71,8 +71,8 @@
 #include <asm/processor.h>     /* for boot_cpu_data */
 
 static DEFINE_SPINLOCK(pdc_lock);
-static unsigned long pdc_result[32] __attribute__ ((aligned (8)));
-static unsigned long pdc_result2[32] __attribute__ ((aligned (8)));
+extern unsigned long pdc_result[NUM_PDC_RESULT];
+extern unsigned long pdc_result2[NUM_PDC_RESULT];
 
 #ifdef CONFIG_64BIT
 #define WIDE_FIRMWARE 0x1
@@ -150,26 +150,40 @@ static void convert_to_wide(unsigned long *addr)
 #endif
 }
 
+#ifdef CONFIG_64BIT
+void __init set_firmware_width_unlocked(void)
+{
+       int ret;
+
+       ret = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES,
+               __pa(pdc_result), 0);
+       convert_to_wide(pdc_result);
+       if (pdc_result[0] != NARROW_FIRMWARE)
+               parisc_narrow_firmware = 0;
+}
+       
 /**
  * set_firmware_width - Determine if the firmware is wide or narrow.
  * 
- * This function must be called before any pdc_* function that uses the convert_to_wide
- * function.
+ * This function must be called before any pdc_* function that uses the
+ * convert_to_wide function.
  */
 void __init set_firmware_width(void)
 {
-#ifdef CONFIG_64BIT
-       int retval;
        unsigned long flags;
+       spin_lock_irqsave(&pdc_lock, flags);
+       set_firmware_width_unlocked();
+       spin_unlock_irqrestore(&pdc_lock, flags);
+}
+#else
+void __init set_firmware_width_unlocked(void) {
+       return;
+}
 
-        spin_lock_irqsave(&pdc_lock, flags);
-       retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES, __pa(pdc_result), 0);
-       convert_to_wide(pdc_result);
-       if(pdc_result[0] != NARROW_FIRMWARE)
-               parisc_narrow_firmware = 0;
-        spin_unlock_irqrestore(&pdc_lock, flags);
-#endif
+void __init set_firmware_width(void) {
+       return;
 }
+#endif /*CONFIG_64BIT*/
 
 /**
  * pdc_emergency_unlock - Unlock the linux pdc lock
@@ -288,6 +302,20 @@ int pdc_chassis_warn(unsigned long *warn)
        return retval;
 }
 
+int __init pdc_coproc_cfg_unlocked(struct pdc_coproc_cfg *pdc_coproc_info)
+{
+       int ret;
+
+       ret = mem_pdc_call(PDC_COPROC, PDC_COPROC_CFG, __pa(pdc_result));
+       convert_to_wide(pdc_result);
+       pdc_coproc_info->ccr_functional = pdc_result[0];
+       pdc_coproc_info->ccr_present = pdc_result[1];
+       pdc_coproc_info->revision = pdc_result[17];
+       pdc_coproc_info->model = pdc_result[18];
+
+       return ret;
+}
+
 /**
  * pdc_coproc_cfg - To identify coprocessors attached to the processor.
  * @pdc_coproc_info: Return buffer address.
@@ -297,19 +325,14 @@ int pdc_chassis_warn(unsigned long *warn)
  */
 int __init pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info)
 {
-        int retval;
+       int ret;
        unsigned long flags;
 
-        spin_lock_irqsave(&pdc_lock, flags);
-        retval = mem_pdc_call(PDC_COPROC, PDC_COPROC_CFG, __pa(pdc_result));
-        convert_to_wide(pdc_result);
-        pdc_coproc_info->ccr_functional = pdc_result[0];
-        pdc_coproc_info->ccr_present = pdc_result[1];
-        pdc_coproc_info->revision = pdc_result[17];
-        pdc_coproc_info->model = pdc_result[18];
-        spin_unlock_irqrestore(&pdc_lock, flags);
+       spin_lock_irqsave(&pdc_lock, flags);
+       ret = pdc_coproc_cfg_unlocked(pdc_coproc_info);
+       spin_unlock_irqrestore(&pdc_lock, flags);
 
-        return retval;
+       return ret;
 }
 
 /**
index a84e31e828768943a7ac288ccb26aa49499b275c..0e3d9f9b9e33e97680190e3752cea063fce9c251 100644 (file)
@@ -121,7 +121,7 @@ $pgt_fill_loop:
        copy            %r0,%r2
 
        /* And the RFI Target address too */
-       load32          start_kernel,%r11
+       load32          start_parisc,%r11
 
        /* And the initial task pointer */
        load32          init_thread_union,%r6
index 49c637970789ba81da5741a75817ef1274e3b1ac..90904f9dfc504fb1e0337abb196b1617006ce514 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2000 Hewlett-Packard Co, Linuxcare Inc.
  * Copyright (C) 2000 Matthew Wilcox <matthew@wil.cx>
  * Copyright (C) 2000 David Huggins-Daines <dhd@debian.org>
+ * Copyright (C) 2008 Helge Deller <deller@gmx.de>
  */
 
 #include <linux/kernel.h>
 /* PSW bits we allow the debugger to modify */
 #define USER_PSW_BITS  (PSW_N | PSW_V | PSW_CB)
 
-#undef DEBUG_PTRACE
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *task)
+{
+       task->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP);
 
-#ifdef DEBUG_PTRACE
-#define DBG(x...)      printk(x)
-#else
-#define DBG(x...)
-#endif
+       /* make sure the trap bits are not set */
+       pa_psw(task)->r = 0;
+       pa_psw(task)->t = 0;
+       pa_psw(task)->h = 0;
+       pa_psw(task)->l = 0;
+}
+
+/*
+ * The following functions are called by ptrace_resume() when
+ * enabling or disabling single/block tracing.
+ */
+void user_disable_single_step(struct task_struct *task)
+{
+       ptrace_disable(task);
+}
+
+void user_enable_single_step(struct task_struct *task)
+{
+       task->ptrace &= ~PT_BLOCKSTEP;
+       task->ptrace |= PT_SINGLESTEP;
+
+       if (pa_psw(task)->n) {
+               struct siginfo si;
+
+               /* Nullified, just crank over the queue. */
+               task_regs(task)->iaoq[0] = task_regs(task)->iaoq[1];
+               task_regs(task)->iasq[0] = task_regs(task)->iasq[1];
+               task_regs(task)->iaoq[1] = task_regs(task)->iaoq[0] + 4;
+               pa_psw(task)->n = 0;
+               pa_psw(task)->x = 0;
+               pa_psw(task)->y = 0;
+               pa_psw(task)->z = 0;
+               pa_psw(task)->b = 0;
+               ptrace_disable(task);
+               /* Don't wake up the task, but let the
+                  parent know something happened. */
+               si.si_code = TRAP_TRACE;
+               si.si_addr = (void __user *) (task_regs(task)->iaoq[0] & ~3);
+               si.si_signo = SIGTRAP;
+               si.si_errno = 0;
+               force_sig_info(SIGTRAP, &si, task);
+               /* notify_parent(task, SIGCHLD); */
+               return;
+       }
+
+       /* Enable recovery counter traps.  The recovery counter
+        * itself will be set to zero on a task switch.  If the
+        * task is suspended on a syscall then the syscall return
+        * path will overwrite the recovery counter with a suitable
+        * value such that it traps once back in user space.  We
+        * disable interrupts in the tasks PSW here also, to avoid
+        * interrupts while the recovery counter is decrementing.
+        */
+       pa_psw(task)->r = 1;
+       pa_psw(task)->t = 0;
+       pa_psw(task)->h = 0;
+       pa_psw(task)->l = 0;
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+       task->ptrace &= ~PT_SINGLESTEP;
+       task->ptrace |= PT_BLOCKSTEP;
+
+       /* Enable taken branch trap. */
+       pa_psw(task)->r = 0;
+       pa_psw(task)->t = 1;
+       pa_psw(task)->h = 0;
+       pa_psw(task)->l = 0;
+}
+
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+{
+       unsigned long tmp;
+       long ret = -EIO;
 
-#ifdef CONFIG_64BIT
+       switch (request) {
+
+       /* Read the word at location addr in the USER area.  For ptraced
+          processes, the kernel saves all regs on a syscall. */
+       case PTRACE_PEEKUSR:
+               if ((addr & (sizeof(long)-1)) ||
+                   (unsigned long) addr >= sizeof(struct pt_regs))
+                       break;
+               tmp = *(unsigned long *) ((char *) task_regs(child) + addr);
+               ret = put_user(tmp, (unsigned long *) data);
+               break;
+
+       /* Write the word at location addr in the USER area.  This will need
+          to change when the kernel no longer saves all regs on a syscall.
+          FIXME.  There is a problem at the moment in that r3-r18 are only
+          saved if the process is ptraced on syscall entry, and even then
+          those values are overwritten by actual register values on syscall
+          exit. */
+       case PTRACE_POKEUSR:
+               /* Some register values written here may be ignored in
+                * entry.S:syscall_restore_rfi; e.g. iaoq is written with
+                * r31/r31+4, and not with the values in pt_regs.
+                */
+               if (addr == PT_PSW) {
+                       /* Allow writing to Nullify, Divide-step-correction,
+                        * and carry/borrow bits.
+                        * BEWARE, if you set N, and then single step, it won't
+                        * stop on the nullified instruction.
+                        */
+                       data &= USER_PSW_BITS;
+                       task_regs(child)->gr[0] &= ~USER_PSW_BITS;
+                       task_regs(child)->gr[0] |= data;
+                       ret = 0;
+                       break;
+               }
+
+               if ((addr & (sizeof(long)-1)) ||
+                   (unsigned long) addr >= sizeof(struct pt_regs))
+                       break;
+               if ((addr >= PT_GR1 && addr <= PT_GR31) ||
+                               addr == PT_IAOQ0 || addr == PT_IAOQ1 ||
+                               (addr >= PT_FR0 && addr <= PT_FR31 + 4) ||
+                               addr == PT_SAR) {
+                       *(unsigned long *) ((char *) task_regs(child) + addr) = data;
+                       ret = 0;
+               }
+               break;
+
+       default:
+               ret = ptrace_request(child, request, addr, data);
+               break;
+       }
+
+       return ret;
+}
+
+
+#ifdef CONFIG_COMPAT
 
 /* This function is needed to translate 32 bit pt_regs offsets in to
  * 64 bit pt_regs offsets.  For example, a 32 bit gdb under a 64 bit kernel
@@ -61,106 +196,25 @@ static long translate_usr_offset(long offset)
        else
                return -1;
 }
-#endif
 
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure single step bits etc are not set.
- */
-void ptrace_disable(struct task_struct *child)
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+                       compat_ulong_t addr, compat_ulong_t data)
 {
-       /* make sure the trap bits are not set */
-       pa_psw(child)->r = 0;
-       pa_psw(child)->t = 0;
-       pa_psw(child)->h = 0;
-       pa_psw(child)->l = 0;
-}
-
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
-{
-       long ret;
-#ifdef DEBUG_PTRACE
-       long oaddr=addr, odata=data;
-#endif
+       compat_uint_t tmp;
+       long ret = -EIO;
 
        switch (request) {
-       case PTRACE_PEEKTEXT: /* read word at location addr. */ 
-       case PTRACE_PEEKDATA: {
-#ifdef CONFIG_64BIT
-               if (__is_compat_task(child)) {
-                       int copied;
-                       unsigned int tmp;
-
-                       addr &= 0xffffffffL;
-                       copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
-                       ret = -EIO;
-                       if (copied != sizeof(tmp))
-                               goto out_tsk;
-                       ret = put_user(tmp,(unsigned int *) data);
-                       DBG("sys_ptrace(PEEK%s, %d, %lx, %lx) returning %ld, data %x\n",
-                               request == PTRACE_PEEKTEXT ? "TEXT" : "DATA",
-                               pid, oaddr, odata, ret, tmp);
-               }
-               else
-#endif
-                       ret = generic_ptrace_peekdata(child, addr, data);
-               goto out_tsk;
-       }
 
-       /* when I and D space are separate, this will have to be fixed. */
-       case PTRACE_POKETEXT: /* write the word at location addr. */
-       case PTRACE_POKEDATA:
-               ret = 0;
-#ifdef CONFIG_64BIT
-               if (__is_compat_task(child)) {
-                       unsigned int tmp = (unsigned int)data;
-                       DBG("sys_ptrace(POKE%s, %d, %lx, %lx)\n",
-                               request == PTRACE_POKETEXT ? "TEXT" : "DATA",
-                               pid, oaddr, odata);
-                       addr &= 0xffffffffL;
-                       if (access_process_vm(child, addr, &tmp, sizeof(tmp), 1) == sizeof(tmp))
-                               goto out_tsk;
-               }
-               else
-#endif
-               {
-                       if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
-                               goto out_tsk;
-               }
-               ret = -EIO;
-               goto out_tsk;
-
-       /* Read the word at location addr in the USER area.  For ptraced
-          processes, the kernel saves all regs on a syscall. */
-       case PTRACE_PEEKUSR: {
-               ret = -EIO;
-#ifdef CONFIG_64BIT
-               if (__is_compat_task(child)) {
-                       unsigned int tmp;
-
-                       if (addr & (sizeof(int)-1))
-                               goto out_tsk;
-                       if ((addr = translate_usr_offset(addr)) < 0)
-                               goto out_tsk;
-
-                       tmp = *(unsigned int *) ((char *) task_regs(child) + addr);
-                       ret = put_user(tmp, (unsigned int *) data);
-                       DBG("sys_ptrace(PEEKUSR, %d, %lx, %lx) returning %ld, addr %lx, data %x\n",
-                               pid, oaddr, odata, ret, addr, tmp);
-               }
-               else
-#endif
-               {
-                       unsigned long tmp;
+       case PTRACE_PEEKUSR:
+               if (addr & (sizeof(compat_uint_t)-1))
+                       break;
+               addr = translate_usr_offset(addr);
+               if (addr < 0)
+                       break;
 
-                       if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs))
-                               goto out_tsk;
-                       tmp = *(unsigned long *) ((char *) task_regs(child) + addr);
-                       ret = put_user(tmp, (unsigned long *) data);
-               }
-               goto out_tsk;
-       }
+               tmp = *(compat_uint_t *) ((char *) task_regs(child) + addr);
+               ret = put_user(tmp, (compat_uint_t *) (unsigned long) data);
+               break;
 
        /* Write the word at location addr in the USER area.  This will need
           to change when the kernel no longer saves all regs on a syscall.
@@ -169,185 +223,46 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
           those values are overwritten by actual register values on syscall
           exit. */
        case PTRACE_POKEUSR:
-               ret = -EIO;
                /* Some register values written here may be ignored in
                 * entry.S:syscall_restore_rfi; e.g. iaoq is written with
                 * r31/r31+4, and not with the values in pt_regs.
                 */
-                /* PT_PSW=0, so this is valid for 32 bit processes under 64
-                * bit kernels.
-                */
                if (addr == PT_PSW) {
-                       /* PT_PSW=0, so this is valid for 32 bit processes
-                        * under 64 bit kernels.
-                        *
-                        * Allow writing to Nullify, Divide-step-correction,
-                        * and carry/borrow bits.
-                        * BEWARE, if you set N, and then single step, it won't
-                        * stop on the nullified instruction.
+                       /* Since PT_PSW==0, it is valid for 32 bit processes
+                        * under 64 bit kernels as well.
                         */
-                       DBG("sys_ptrace(POKEUSR, %d, %lx, %lx)\n",
-                               pid, oaddr, odata);
-                       data &= USER_PSW_BITS;
-                       task_regs(child)->gr[0] &= ~USER_PSW_BITS;
-                       task_regs(child)->gr[0] |= data;
-                       ret = 0;
-                       goto out_tsk;
-               }
-#ifdef CONFIG_64BIT
-               if (__is_compat_task(child)) {
-                       if (addr & (sizeof(int)-1))
-                               goto out_tsk;
-                       if ((addr = translate_usr_offset(addr)) < 0)
-                               goto out_tsk;
-                       DBG("sys_ptrace(POKEUSR, %d, %lx, %lx) addr %lx\n",
-                               pid, oaddr, odata, addr);
+                       ret = arch_ptrace(child, request, addr, data);
+               } else {
+                       if (addr & (sizeof(compat_uint_t)-1))
+                               break;
+                       addr = translate_usr_offset(addr);
+                       if (addr < 0)
+                               break;
                        if (addr >= PT_FR0 && addr <= PT_FR31 + 4) {
                                /* Special case, fp regs are 64 bits anyway */
-                               *(unsigned int *) ((char *) task_regs(child) + addr) = data;
+                               *(__u64 *) ((char *) task_regs(child) + addr) = data;
                                ret = 0;
                        }
                        else if ((addr >= PT_GR1+4 && addr <= PT_GR31+4) ||
                                        addr == PT_IAOQ0+4 || addr == PT_IAOQ1+4 ||
                                        addr == PT_SAR+4) {
                                /* Zero the top 32 bits */
-                               *(unsigned int *) ((char *) task_regs(child) + addr - 4) = 0;
-                               *(unsigned int *) ((char *) task_regs(child) + addr) = data;
+                               *(__u32 *) ((char *) task_regs(child) + addr - 4) = 0;
+                               *(__u32 *) ((char *) task_regs(child) + addr) = data;
                                ret = 0;
                        }
-                       goto out_tsk;
                }
-               else
-#endif
-               {
-                       if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs))
-                               goto out_tsk;
-                       if ((addr >= PT_GR1 && addr <= PT_GR31) ||
-                                       addr == PT_IAOQ0 || addr == PT_IAOQ1 ||
-                                       (addr >= PT_FR0 && addr <= PT_FR31 + 4) ||
-                                       addr == PT_SAR) {
-                               *(unsigned long *) ((char *) task_regs(child) + addr) = data;
-                               ret = 0;
-                       }
-                       goto out_tsk;
-               }
-
-       case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
-       case PTRACE_CONT:
-               ret = -EIO;
-               DBG("sys_ptrace(%s)\n",
-                       request == PTRACE_SYSCALL ? "SYSCALL" : "CONT");
-               if (!valid_signal(data))
-                       goto out_tsk;
-               child->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP);
-               if (request == PTRACE_SYSCALL) {
-                       set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               } else {
-                       clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               }               
-               child->exit_code = data;
-               goto out_wake_notrap;
-
-       case PTRACE_KILL:
-               /*
-                * make the child exit.  Best I can do is send it a
-                * sigkill.  perhaps it should be put in the status
-                * that it wants to exit.
-                */
-               ret = 0;
-               DBG("sys_ptrace(KILL)\n");
-               if (child->exit_state == EXIT_ZOMBIE)   /* already dead */
-                       goto out_tsk;
-               child->exit_code = SIGKILL;
-               goto out_wake_notrap;
-
-       case PTRACE_SINGLEBLOCK:
-               DBG("sys_ptrace(SINGLEBLOCK)\n");
-               ret = -EIO;
-               if (!valid_signal(data))
-                       goto out_tsk;
-               clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               child->ptrace &= ~PT_SINGLESTEP;
-               child->ptrace |= PT_BLOCKSTEP;
-               child->exit_code = data;
-
-               /* Enable taken branch trap. */
-               pa_psw(child)->r = 0;
-               pa_psw(child)->t = 1;
-               pa_psw(child)->h = 0;
-               pa_psw(child)->l = 0;
-               goto out_wake;
-
-       case PTRACE_SINGLESTEP:
-               DBG("sys_ptrace(SINGLESTEP)\n");
-               ret = -EIO;
-               if (!valid_signal(data))
-                       goto out_tsk;
-
-               clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               child->ptrace &= ~PT_BLOCKSTEP;
-               child->ptrace |= PT_SINGLESTEP;
-               child->exit_code = data;
-
-               if (pa_psw(child)->n) {
-                       struct siginfo si;
-
-                       /* Nullified, just crank over the queue. */
-                       task_regs(child)->iaoq[0] = task_regs(child)->iaoq[1];
-                       task_regs(child)->iasq[0] = task_regs(child)->iasq[1];
-                       task_regs(child)->iaoq[1] = task_regs(child)->iaoq[0] + 4;
-                       pa_psw(child)->n = 0;
-                       pa_psw(child)->x = 0;
-                       pa_psw(child)->y = 0;
-                       pa_psw(child)->z = 0;
-                       pa_psw(child)->b = 0;
-                       ptrace_disable(child);
-                       /* Don't wake up the child, but let the
-                          parent know something happened. */
-                       si.si_code = TRAP_TRACE;
-                       si.si_addr = (void __user *) (task_regs(child)->iaoq[0] & ~3);
-                       si.si_signo = SIGTRAP;
-                       si.si_errno = 0;
-                       force_sig_info(SIGTRAP, &si, child);
-                       //notify_parent(child, SIGCHLD);
-                       //ret = 0;
-                       goto out_wake;
-               }
-
-               /* Enable recovery counter traps.  The recovery counter
-                * itself will be set to zero on a task switch.  If the
-                * task is suspended on a syscall then the syscall return
-                * path will overwrite the recovery counter with a suitable
-                * value such that it traps once back in user space.  We
-                * disable interrupts in the childs PSW here also, to avoid
-                * interrupts while the recovery counter is decrementing.
-                */
-               pa_psw(child)->r = 1;
-               pa_psw(child)->t = 0;
-               pa_psw(child)->h = 0;
-               pa_psw(child)->l = 0;
-               /* give it a chance to run. */
-               goto out_wake;
-
-       case PTRACE_GETEVENTMSG:
-                ret = put_user(child->ptrace_message, (unsigned int __user *) data);
-               goto out_tsk;
+               break;
 
        default:
-               ret = ptrace_request(child, request, addr, data);
-               goto out_tsk;
+               ret = compat_ptrace_request(child, request, addr, data);
+               break;
        }
 
-out_wake_notrap:
-       ptrace_disable(child);
-out_wake:
-       wake_up_process(child);
-       ret = 0;
-out_tsk:
-       DBG("arch_ptrace(%ld, %d, %lx, %lx) returning %ld\n",
-               request, pid, oaddr, odata, ret);
        return ret;
 }
+#endif
+
 
 void syscall_trace(void)
 {
index 7a92695d95a6bf814a72803da7d02403953eacd4..5f3d3a1f9037c7438b980c3e5f8d23589c4a5461 100644 (file)
@@ -8,12 +8,24 @@
  *
  */
 
+#include <asm/pdc.h>
 #include <asm/psw.h>
 #include <asm/assembly.h>
+#include <asm/asm-offsets.h>
 
 #include <linux/linkage.h>
 
+
        .section        .bss
+
+       .export pdc_result
+       .export pdc_result2
+       .align 8
+pdc_result:
+       .block  ASM_PDC_RESULT_SIZE
+pdc_result2:
+       .block  ASM_PDC_RESULT_SIZE
+
        .export real_stack
        .export real32_stack
        .export real64_stack
index 39e7c5a5946a8981deee1afa1296efc7cff9eadc..7d27853ff8c8b6233ec4a0e1f2fe3747fbbf88aa 100644 (file)
@@ -44,6 +44,7 @@
 #include <asm/pdc_chassis.h>
 #include <asm/io.h>
 #include <asm/setup.h>
+#include <asm/unwind.h>
 
 static char __initdata command_line[COMMAND_LINE_SIZE];
 
@@ -123,6 +124,7 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_64BIT
        extern int parisc_narrow_firmware;
 #endif
+       unwind_init();
 
        init_per_cpu(smp_processor_id());       /* Set Modes & Enable FP */
 
@@ -368,6 +370,31 @@ static int __init parisc_init(void)
 
        return 0;
 }
-
 arch_initcall(parisc_init);
 
+void start_parisc(void)
+{
+       extern void start_kernel(void);
+
+       int ret, cpunum;
+       struct pdc_coproc_cfg coproc_cfg;
+
+       cpunum = smp_processor_id();
+
+       set_firmware_width_unlocked();
+
+       ret = pdc_coproc_cfg_unlocked(&coproc_cfg);
+       if (ret >= 0 && coproc_cfg.ccr_functional) {
+               mtctl(coproc_cfg.ccr_functional, 10);
+
+               cpu_data[cpunum].fp_rev = coproc_cfg.revision;
+               cpu_data[cpunum].fp_model = coproc_cfg.model;
+
+               asm volatile ("fstd     %fr0,8(%sp)");
+       } else {
+               panic("must have an fpu to boot linux");
+       }
+
+       start_kernel();
+       // not reached
+}
index c7e59f548817fb36554954d663bfedcdf5e3b2f6..303d2b647e418daab682f586565ade19e5420325 100644 (file)
@@ -87,7 +87,7 @@
        ENTRY_SAME(setuid)
        ENTRY_SAME(getuid)
        ENTRY_COMP(stime)               /* 25 */
-       ENTRY_SAME(ptrace)
+       ENTRY_COMP(ptrace)
        ENTRY_SAME(alarm)
        /* see stat comment */
        ENTRY_COMP(newfstat)
        ENTRY_SAME(timerfd_create)
        ENTRY_COMP(timerfd_settime)
        ENTRY_COMP(timerfd_gettime)
+       ENTRY_COMP(signalfd4)
+       ENTRY_SAME(eventfd2)            /* 310 */
+       ENTRY_SAME(epoll_create1)
+       ENTRY_SAME(dup3)
+       ENTRY_SAME(pipe2)
+       ENTRY_SAME(inotify_init1)
 
        /* Nothing yet */
 
index 24be86bba94d6bdf93618109bb490bf6fbe68205..4d09203bc69307275afcb92f5f3a53d2bdff5057 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/smp.h>
 #include <linux/profile.h>
 #include <linux/clocksource.h>
+#include <linux/platform_device.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -215,6 +216,24 @@ void __init start_cpu_itimer(void)
        cpu_data[cpu].it_value = next_tick;
 }
 
+struct platform_device rtc_parisc_dev = {
+       .name = "rtc-parisc",
+       .id = -1,
+};
+
+static int __init rtc_init(void)
+{
+       int ret;
+
+       ret = platform_device_register(&rtc_parisc_dev);
+       if (ret < 0)
+               printk(KERN_ERR "unable to register rtc device...\n");
+
+       /* not necessarily an error */
+       return 0;
+}
+module_init(rtc_init);
+
 void __init time_init(void)
 {
        static struct pdc_tod tod_data;
@@ -245,4 +264,3 @@ void __init time_init(void)
                xtime.tv_nsec = 0;
        }
 }
-
index 701b2d2d88823f55fb2d18a2c03c1e91b2b3f2a9..6773c582e457a15b3e9ddb5006e462754bddd06e 100644 (file)
@@ -170,7 +170,7 @@ void unwind_table_remove(struct unwind_table *table)
 }
 
 /* Called from setup_arch to import the kernel unwind info */
-static int unwind_init(void)
+int unwind_init(void)
 {
        long start, stop;
        register unsigned long gp __asm__ ("r27");
@@ -417,5 +417,3 @@ int unwind_to_user(struct unwind_frame_info *info)
 
        return ret;
 }
-
-module_init(unwind_init);
index 64e144505f653e0ae1d4f190690ef458e9aad5e7..5ac51e6efc1d860ea0d72702d60e63fa42a20c81 100644 (file)
  * 2 of the License, or (at your option) any later version.
  */
 
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#else
+#include <asm/types.h>
+#endif
 #include <asm/asm-compat.h>
 #include <asm/kdump.h>
-#include <asm/types.h>
 
 /*
  * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software
index ae2ea803a0f2502daff6e2ffcd24359c1f602675..9047af7baa697afad42e89b19f2cfdd893ad7430 100644 (file)
@@ -74,6 +74,13 @@ struct pci_controller {
        unsigned long pci_io_size;
 #endif
 
+       /* Some machines have a special region to forward the ISA
+        * "memory" cycles such as VGA memory regions. Left to 0
+        * if unsupported
+        */
+       resource_size_t isa_mem_phys;
+       resource_size_t isa_mem_size;
+
        struct pci_ops *ops;
        unsigned int __iomem *cfg_addr;
        void __iomem *cfg_data;
index 0e52c7828ea498d48c34a37155dba0f57b39a349..39d547fde956a66f9dc3cb5d26914c3e01cb387b 100644 (file)
@@ -123,6 +123,16 @@ int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma,
 /* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */
 #define HAVE_PCI_MMAP  1
 
+extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
+                          size_t count);
+extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
+                          size_t count);
+extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
+                                     struct vm_area_struct *vma,
+                                     enum pci_mmap_state mmap_state);
+
+#define HAVE_PCI_LEGACY        1
+
 #if defined(CONFIG_PPC64) || defined(CONFIG_NOT_COHERENT_CACHE)
 /*
  * For 64-bit kernels, pci_unmap_{single,page} is not a nop.
@@ -226,5 +236,6 @@ extern void pci_resource_to_user(const struct pci_dev *dev, int bar,
 extern void pcibios_do_bus_setup(struct pci_bus *bus);
 extern void pcibios_fixup_of_probed_bus(struct pci_bus *bus);
 
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_POWERPC_PCI_H */
index 734e0754fb9bec39a0d8533d77c30ad1c02d1103..280a90cc9894c6ed3f6a2e2611b9d53623218a2f 100644 (file)
@@ -129,7 +129,7 @@ extern int ptrace_put_reg(struct task_struct *task, int regno,
 #define CHECK_FULL_REGS(regs)                                                \
 do {                                                                         \
        if ((regs)->trap & 1)                                                 \
-               printk(KERN_CRIT "%s: partial register set\n", __FUNCTION__); \
+               printk(KERN_CRIT "%s: partial register set\n", __func__); \
 } while (0)
 #endif /* __powerpc64__ */
 
index 01ce8c38bae635334b6b3b9d104f0bb26d5861fe..3815d84a1ef4b8beb71dab7c8a26d90834320ee9 100644 (file)
@@ -451,7 +451,8 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,
                pci_dev_put(pdev);
        }
 
-       DBG("non-PCI map for %lx, prot: %lx\n", offset, prot);
+       DBG("non-PCI map for %llx, prot: %lx\n",
+           (unsigned long long)offset, prot);
 
        return __pgprot(prot);
 }
@@ -490,6 +491,131 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
        return ret;
 }
 
+/* This provides legacy IO read access on a bus */
+int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
+{
+       unsigned long offset;
+       struct pci_controller *hose = pci_bus_to_host(bus);
+       struct resource *rp = &hose->io_resource;
+       void __iomem *addr;
+
+       /* Check if port can be supported by that bus. We only check
+        * the ranges of the PHB though, not the bus itself as the rules
+        * for forwarding legacy cycles down bridges are not our problem
+        * here. So if the host bridge supports it, we do it.
+        */
+       offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+       offset += port;
+
+       if (!(rp->flags & IORESOURCE_IO))
+               return -ENXIO;
+       if (offset < rp->start || (offset + size) > rp->end)
+               return -ENXIO;
+       addr = hose->io_base_virt + port;
+
+       switch(size) {
+       case 1:
+               *((u8 *)val) = in_8(addr);
+               return 1;
+       case 2:
+               if (port & 1)
+                       return -EINVAL;
+               *((u16 *)val) = in_le16(addr);
+               return 2;
+       case 4:
+               if (port & 3)
+                       return -EINVAL;
+               *((u32 *)val) = in_le32(addr);
+               return 4;
+       }
+       return -EINVAL;
+}
+
+/* This provides legacy IO write access on a bus */
+int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size)
+{
+       unsigned long offset;
+       struct pci_controller *hose = pci_bus_to_host(bus);
+       struct resource *rp = &hose->io_resource;
+       void __iomem *addr;
+
+       /* Check if port can be supported by that bus. We only check
+        * the ranges of the PHB though, not the bus itself as the rules
+        * for forwarding legacy cycles down bridges are not our problem
+        * here. So if the host bridge supports it, we do it.
+        */
+       offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+       offset += port;
+
+       if (!(rp->flags & IORESOURCE_IO))
+               return -ENXIO;
+       if (offset < rp->start || (offset + size) > rp->end)
+               return -ENXIO;
+       addr = hose->io_base_virt + port;
+
+       /* WARNING: The generic code is idiotic. It gets passed a pointer
+        * to what can be a 1, 2 or 4 byte quantity and always reads that
+        * as a u32, which means that we have to correct the location of
+        * the data read within those 32 bits for size 1 and 2
+        */
+       switch(size) {
+       case 1:
+               out_8(addr, val >> 24);
+               return 1;
+       case 2:
+               if (port & 1)
+                       return -EINVAL;
+               out_le16(addr, val >> 16);
+               return 2;
+       case 4:
+               if (port & 3)
+                       return -EINVAL;
+               out_le32(addr, val);
+               return 4;
+       }
+       return -EINVAL;
+}
+
+/* This provides legacy IO or memory mmap access on a bus */
+int pci_mmap_legacy_page_range(struct pci_bus *bus,
+                              struct vm_area_struct *vma,
+                              enum pci_mmap_state mmap_state)
+{
+       struct pci_controller *hose = pci_bus_to_host(bus);
+       resource_size_t offset =
+               ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
+       resource_size_t size = vma->vm_end - vma->vm_start;
+       struct resource *rp;
+
+       pr_debug("pci_mmap_legacy_page_range(%04x:%02x, %s @%llx..%llx)\n",
+                pci_domain_nr(bus), bus->number,
+                mmap_state == pci_mmap_mem ? "MEM" : "IO",
+                (unsigned long long)offset,
+                (unsigned long long)(offset + size - 1));
+
+       if (mmap_state == pci_mmap_mem) {
+               if ((offset + size) > hose->isa_mem_size)
+                       return -ENXIO;
+               offset += hose->isa_mem_phys;
+       } else {
+               unsigned long io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+               unsigned long roffset = offset + io_offset;
+               rp = &hose->io_resource;
+               if (!(rp->flags & IORESOURCE_IO))
+                       return -ENXIO;
+               if (roffset < rp->start || (roffset + size) > rp->end)
+                       return -ENXIO;
+               offset += hose->io_base_phys;
+       }
+       pr_debug(" -> mapping phys %llx\n", (unsigned long long)offset);
+
+       vma->vm_pgoff = offset >> PAGE_SHIFT;
+       vma->vm_page_prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
+       return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+                              vma->vm_end - vma->vm_start,
+                              vma->vm_page_prot);
+}
+
 void pci_resource_to_user(const struct pci_dev *dev, int bar,
                          const struct resource *rsrc,
                          resource_size_t *start, resource_size_t *end)
@@ -592,6 +718,12 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
                cpu_addr = of_translate_address(dev, ranges + 3);
                size = of_read_number(ranges + pna + 3, 2);
                ranges += np;
+
+               /* If we failed translation or got a zero-sized region
+                * (some FW try to feed us with non sensical zero sized regions
+                * such as power3 which look like some kind of attempt at exposing
+                * the VGA memory hole)
+                */
                if (cpu_addr == OF_BAD_ADDR || size == 0)
                        continue;
 
@@ -665,6 +797,8 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
                                isa_hole = memno;
                                if (primary || isa_mem_base == 0)
                                        isa_mem_base = cpu_addr;
+                               hose->isa_mem_phys = cpu_addr;
+                               hose->isa_mem_size = size;
                        }
 
                        /* We get the PCI/Mem offset from the first range or
index 92d20e993ede097d5e732a2bf0b751f9931eb7d1..2ece399f2862361aaa0789f08a49a4bae169043e 100644 (file)
@@ -232,6 +232,7 @@ static void __exit sputrace_exit(void)
 
        remove_proc_entry("sputrace", NULL);
        kfree(sputrace_log);
+       marker_synchronize_unregister();
 }
 
 module_init(sputrace_init);
index 49349ba77d80b81141deb6065510e54fc74eb077..5b9b12321ad132ed1987ae7e5a9bdf278cc4be7a 100644 (file)
@@ -26,6 +26,7 @@ config X86
        select HAVE_KPROBES
        select ARCH_WANT_OPTIONAL_GPIOLIB
        select HAVE_KRETPROBES
+       select HAVE_FTRACE_MCOUNT_RECORD
        select HAVE_DYNAMIC_FTRACE
        select HAVE_FTRACE
        select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
@@ -1242,14 +1243,6 @@ config EFI
        resultant kernel should continue to boot on existing non-EFI
        platforms.
 
-config IRQBALANCE
-       def_bool y
-       prompt "Enable kernel irq balancing"
-       depends on X86_32 && SMP && X86_IO_APIC
-       help
-         The default yes will allow the kernel to do irq load balancing.
-         Saying no will keep the kernel from doing irq load balancing.
-
 config SECCOMP
        def_bool y
        prompt "Enable seccomp to safely compute untrusted bytecode"
index 52d0359719d7dda3c15ec870be577e653d829896..13b8c86ae98570f67375b505e9d106374e24b574 100644 (file)
@@ -287,7 +287,6 @@ CONFIG_MTRR=y
 # CONFIG_MTRR_SANITIZER is not set
 CONFIG_X86_PAT=y
 CONFIG_EFI=y
-# CONFIG_IRQBALANCE is not set
 CONFIG_SECCOMP=y
 # CONFIG_HZ_100 is not set
 # CONFIG_HZ_250 is not set
index 0d41f0343dc0753e5be4c97fd8b36460bc0ab2a5..d7e5a58ee22f376c13caab451ac1dcf6d4e33b08 100644 (file)
@@ -23,7 +23,7 @@ CFLAGS_hpet.o         := $(nostackp)
 CFLAGS_tsc.o           := $(nostackp)
 
 obj-y                  := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
-obj-y                  += traps.o irq_$(BITS).o dumpstack_$(BITS).o
+obj-y                  += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y                  += time_$(BITS).o ioport.o ldt.o
 obj-y                  += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
 obj-$(CONFIG_X86_VISWS)        += visws_quirks.o
@@ -60,8 +60,8 @@ obj-$(CONFIG_X86_32_SMP)      += smpcommon.o
 obj-$(CONFIG_X86_64_SMP)       += tsc_sync.o smpcommon.o
 obj-$(CONFIG_X86_TRAMPOLINE)   += trampoline_$(BITS).o
 obj-$(CONFIG_X86_MPPARSE)      += mpparse.o
-obj-$(CONFIG_X86_LOCAL_APIC)   += apic_$(BITS).o nmi.o
-obj-$(CONFIG_X86_IO_APIC)      += io_apic_$(BITS).o
+obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o nmi.o
+obj-$(CONFIG_X86_IO_APIC)      += io_apic.o
 obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
 obj-$(CONFIG_DYNAMIC_FTRACE)   += ftrace.o
 obj-$(CONFIG_KEXEC)            += machine_kexec_$(BITS).o
@@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE)                     += microcode.o
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
         obj-y                          += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
-       obj-y                           += bios_uv.o
+       obj-y                           += bios_uv.o uv_irq.o uv_sysfs.o
         obj-y                          += genx2apic_cluster.o
         obj-y                          += genx2apic_phys.o
         obj-$(CONFIG_X86_PM_TIMER)     += pmtimer_64.o
index eb875cdc7367c3aee3fcb285606c1e993cc808af..0d1c26a583c59fb3329503b0f44d15fe148ec122 100644 (file)
@@ -1256,7 +1256,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 
        count =
            acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
-                                 NR_IRQ_VECTORS);
+                                 nr_irqs);
        if (count < 0) {
                printk(KERN_ERR PREFIX
                       "Error parsing interrupt source overrides entry\n");
@@ -1276,7 +1276,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 
        count =
            acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src,
-                                 NR_IRQ_VECTORS);
+                                 nr_irqs);
        if (count < 0) {
                printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
                /* TBD: Cleanup to allow fallback to MPS */
index 426e5d91b63a55d9b49d3387e2c40bb56306f104..c44cd6dbfa1414187213854db2ce3a1129298c76 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/dmi.h>
 #include <linux/cpumask.h>
 #include <asm/segment.h>
+#include <asm/desc.h>
 
 #include "realmode/wakeup.h"
 #include "sleep.h"
@@ -98,6 +99,8 @@ int acpi_save_state_mem(void)
        header->trampoline_segment = setup_trampoline() >> 4;
 #ifdef CONFIG_SMP
        stack_start.sp = temp_stack + 4096;
+       early_gdt_descr.address =
+                       (unsigned long)get_cpu_gdt_table(smp_processor_id());
 #endif
        initial_code = (unsigned long)wakeup_long64;
        saved_magic = 0x123456789abcdef0;
similarity index 79%
rename from arch/x86/kernel/apic_32.c
rename to arch/x86/kernel/apic.c
index 21c831d96af3d8f8ef63e6403f56fd0e71f5730c..04a7f960bbc0a2408907a959af527382dae477a5 100644 (file)
 #include <linux/mc146818rtc.h>
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
+#include <linux/ioport.h>
 #include <linux/cpu.h>
 #include <linux/clockchips.h>
 #include <linux/acpi_pmtmr.h>
 #include <linux/module.h>
 #include <linux/dmi.h>
+#include <linux/dmar.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
 #include <asm/desc.h>
 #include <asm/arch_hooks.h>
 #include <asm/hpet.h>
+#include <asm/pgalloc.h>
 #include <asm/i8253.h>
 #include <asm/nmi.h>
+#include <asm/idle.h>
+#include <asm/proto.h>
+#include <asm/timex.h>
+#include <asm/apic.h>
+#include <asm/i8259.h>
 
 #include <mach_apic.h>
 #include <mach_apicdef.h>
 # error SPURIOUS_APIC_VECTOR definition error
 #endif
 
-unsigned long mp_lapic_addr;
-
+#ifdef CONFIG_X86_32
 /*
  * Knob to control our willingness to enable the local APIC.
  *
  * +1=force-enable
  */
 static int force_enable_local_apic;
-int disable_apic;
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+       force_enable_local_apic = 1;
+       return 0;
+}
+early_param("lapic", parse_lapic);
+/* Local APIC was disabled by the BIOS and enabled by the kernel */
+static int enabled_via_apicbase;
+
+#endif
+
+#ifdef CONFIG_X86_64
+static int apic_calibrate_pmtmr __initdata;
+static __init int setup_apicpmtimer(char *s)
+{
+       apic_calibrate_pmtmr = 1;
+       notsc_setup(NULL);
+       return 0;
+}
+__setup("apicpmtimer", setup_apicpmtimer);
+#endif
+
+#ifdef CONFIG_X86_64
+#define HAVE_X2APIC
+#endif
+
+#ifdef HAVE_X2APIC
+int x2apic;
+/* x2apic enabled before OS handover */
+int x2apic_preenabled;
+int disable_x2apic;
+static __init int setup_nox2apic(char *str)
+{
+       disable_x2apic = 1;
+       setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+       return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+#endif
 
+unsigned long mp_lapic_addr;
+int disable_apic;
 /* Disable local APIC timer from the kernel commandline or via dmi quirk */
 static int disable_apic_timer __cpuinitdata;
 /* Local APIC timer works in C2 */
@@ -110,9 +160,6 @@ static struct clock_event_device lapic_clockevent = {
 };
 static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
-/* Local APIC was disabled by the BIOS and enabled by the kernel */
-static int enabled_via_apicbase;
-
 static unsigned long apic_phys;
 
 /*
@@ -202,6 +249,42 @@ static struct apic_ops xapic_ops = {
 struct apic_ops __read_mostly *apic_ops = &xapic_ops;
 EXPORT_SYMBOL_GPL(apic_ops);
 
+#ifdef HAVE_X2APIC
+static void x2apic_wait_icr_idle(void)
+{
+       /* no need to wait for icr idle in x2apic */
+       return;
+}
+
+static u32 safe_x2apic_wait_icr_idle(void)
+{
+       /* no need to wait for icr idle in x2apic */
+       return 0;
+}
+
+void x2apic_icr_write(u32 low, u32 id)
+{
+       wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+}
+
+u64 x2apic_icr_read(void)
+{
+       unsigned long val;
+
+       rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+       return val;
+}
+
+static struct apic_ops x2apic_ops = {
+       .read = native_apic_msr_read,
+       .write = native_apic_msr_write,
+       .icr_read = x2apic_icr_read,
+       .icr_write = x2apic_icr_write,
+       .wait_icr_idle = x2apic_wait_icr_idle,
+       .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
+};
+#endif
+
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  */
@@ -219,6 +302,7 @@ void __cpuinit enable_NMI_through_LVT0(void)
        apic_write(APIC_LVT0, v);
 }
 
+#ifdef CONFIG_X86_32
 /**
  * get_physical_broadcast - Get number of physical broadcast IDs
  */
@@ -226,6 +310,7 @@ int get_physical_broadcast(void)
 {
        return modern_apic() ? 0xff : 0xf;
 }
+#endif
 
 /**
  * lapic_get_maxlvt - get the maximum number of local vector table entries
@@ -247,11 +332,7 @@ int lapic_get_maxlvt(void)
  */
 
 /* Clock divisor */
-#ifdef CONFG_X86_64
-#define APIC_DIVISOR 1
-#else
 #define APIC_DIVISOR 16
-#endif
 
 /*
  * This function sets up the local APIC timer, with a timeout of
@@ -383,7 +464,7 @@ static void lapic_timer_broadcast(cpumask_t mask)
  * Setup the local APIC timer for this CPU. Copy the initilized values
  * of the boot CPU and register the clock event in the framework.
  */
-static void __devinit setup_APIC_timer(void)
+static void __cpuinit setup_APIC_timer(void)
 {
        struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
@@ -453,14 +534,51 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
        }
 }
 
+static int __init calibrate_by_pmtimer(long deltapm, long *delta)
+{
+       const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
+       const long pm_thresh = pm_100ms / 100;
+       unsigned long mult;
+       u64 res;
+
+#ifndef CONFIG_X86_PM_TIMER
+       return -1;
+#endif
+
+       apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
+
+       /* Check, if the PM timer is available */
+       if (!deltapm)
+               return -1;
+
+       mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
+
+       if (deltapm > (pm_100ms - pm_thresh) &&
+           deltapm < (pm_100ms + pm_thresh)) {
+               apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
+       } else {
+               res = (((u64)deltapm) *  mult) >> 22;
+               do_div(res, 1000000);
+               printk(KERN_WARNING "APIC calibration not consistent "
+                       "with PM Timer: %ldms instead of 100ms\n",
+                       (long)res);
+               /* Correct the lapic counter value */
+               res = (((u64)(*delta)) * pm_100ms);
+               do_div(res, deltapm);
+               printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
+                       "%lu (%ld)\n", (unsigned long)res, *delta);
+               *delta = (long)res;
+       }
+
+       return 0;
+}
+
 static int __init calibrate_APIC_clock(void)
 {
        struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-       const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
-       const long pm_thresh = pm_100ms/100;
        void (*real_handler)(struct clock_event_device *dev);
        unsigned long deltaj;
-       long delta, deltapm;
+       long delta;
        int pm_referenced = 0;
 
        local_irq_disable();
@@ -470,10 +588,10 @@ static int __init calibrate_APIC_clock(void)
        global_clock_event->event_handler = lapic_cal_handler;
 
        /*
-        * Setup the APIC counter to 1e9. There is no way the lapic
+        * Setup the APIC counter to maximum. There is no way the lapic
         * can underflow in the 100ms detection time frame
         */
-       __setup_APIC_LVTT(1000000000, 0, 0);
+       __setup_APIC_LVTT(0xffffffff, 0, 0);
 
        /* Let the interrupts run */
        local_irq_enable();
@@ -490,34 +608,9 @@ static int __init calibrate_APIC_clock(void)
        delta = lapic_cal_t1 - lapic_cal_t2;
        apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
 
-       /* Check, if the PM timer is available */
-       deltapm = lapic_cal_pm2 - lapic_cal_pm1;
-       apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
-
-       if (deltapm) {
-               unsigned long mult;
-               u64 res;
-
-               mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
-
-               if (deltapm > (pm_100ms - pm_thresh) &&
-                   deltapm < (pm_100ms + pm_thresh)) {
-                       apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
-               } else {
-                       res = (((u64) deltapm) *  mult) >> 22;
-                       do_div(res, 1000000);
-                       printk(KERN_WARNING "APIC calibration not consistent "
-                              "with PM Timer: %ldms instead of 100ms\n",
-                              (long)res);
-                       /* Correct the lapic counter value */
-                       res = (((u64) delta) * pm_100ms);
-                       do_div(res, deltapm);
-                       printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
-                              "%lu (%ld)\n", (unsigned long) res, delta);
-                       delta = (long) res;
-               }
-               pm_referenced = 1;
-       }
+       /* we trust the PM based calibration if possible */
+       pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
+                                       &delta);
 
        /* Calculate the scaled math multiplication factor */
        lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -559,7 +652,10 @@ static int __init calibrate_APIC_clock(void)
 
        levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
 
-       /* We trust the pm timer based calibration */
+       /*
+        * PM timer calibration failed or not turned on
+        * so lets try APIC timer based calibration
+        */
        if (!pm_referenced) {
                apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
 
@@ -652,7 +748,7 @@ void __init setup_boot_APIC_clock(void)
        setup_APIC_timer();
 }
 
-void __devinit setup_secondary_APIC_clock(void)
+void __cpuinit setup_secondary_APIC_clock(void)
 {
        setup_APIC_timer();
 }
@@ -718,6 +814,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
         * Besides, if we don't timer interrupts ignore the global
         * interrupt lock, which is the WrongThing (tm) to do.
         */
+#ifdef CONFIG_X86_64
+       exit_idle();
+#endif
        irq_enter();
        local_apic_timer_interrupt();
        irq_exit();
@@ -991,40 +1090,43 @@ void __init init_bsp_APIC(void)
 
 static void __cpuinit lapic_setup_esr(void)
 {
-       unsigned long oldvalue, value, maxlvt;
-       if (lapic_is_integrated() && !esr_disable) {
-               if (esr_disable) {
-                       /*
-                        * Something untraceable is creating bad interrupts on
-                        * secondary quads ... for the moment, just leave the
-                        * ESR disabled - we can't do anything useful with the
-                        * errors anyway - mbligh
-                        */
-                       printk(KERN_INFO "Leaving ESR disabled.\n");
-                       return;
-               }
-               /* !82489DX */
-               maxlvt = lapic_get_maxlvt();
-               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-                       apic_write(APIC_ESR, 0);
-               oldvalue = apic_read(APIC_ESR);
+       unsigned int oldvalue, value, maxlvt;
+
+       if (!lapic_is_integrated()) {
+               printk(KERN_INFO "No ESR for 82489DX.\n");
+               return;
+       }
 
-               /* enables sending errors */
-               value = ERROR_APIC_VECTOR;
-               apic_write(APIC_LVTERR, value);
+       if (esr_disable) {
                /*
-                * spec says clear errors after enabling vector.
+                * Something untraceable is creating bad interrupts on
+                * secondary quads ... for the moment, just leave the
+                * ESR disabled - we can't do anything useful with the
+                * errors anyway - mbligh
                 */
-               if (maxlvt > 3)
-                       apic_write(APIC_ESR, 0);
-               value = apic_read(APIC_ESR);
-               if (value != oldvalue)
-                       apic_printk(APIC_VERBOSE, "ESR value before enabling "
-                               "vector: 0x%08lx  after: 0x%08lx\n",
-                               oldvalue, value);
-       } else {
-               printk(KERN_INFO "No ESR for 82489DX.\n");
+               printk(KERN_INFO "Leaving ESR disabled.\n");
+               return;
        }
+
+       maxlvt = lapic_get_maxlvt();
+       if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+               apic_write(APIC_ESR, 0);
+       oldvalue = apic_read(APIC_ESR);
+
+       /* enables sending errors */
+       value = ERROR_APIC_VECTOR;
+       apic_write(APIC_LVTERR, value);
+
+       /*
+        * spec says clear errors after enabling vector.
+        */
+       if (maxlvt > 3)
+               apic_write(APIC_ESR, 0);
+       value = apic_read(APIC_ESR);
+       if (value != oldvalue)
+               apic_printk(APIC_VERBOSE, "ESR value before enabling "
+                       "vector: 0x%08x  after: 0x%08x\n",
+                       oldvalue, value);
 }
 
 
@@ -1033,24 +1135,27 @@ static void __cpuinit lapic_setup_esr(void)
  */
 void __cpuinit setup_local_APIC(void)
 {
-       unsigned long value, integrated;
+       unsigned int value;
        int i, j;
 
+#ifdef CONFIG_X86_32
        /* Pound the ESR really hard over the head with a big hammer - mbligh */
-       if (esr_disable) {
+       if (lapic_is_integrated() && esr_disable) {
                apic_write(APIC_ESR, 0);
                apic_write(APIC_ESR, 0);
                apic_write(APIC_ESR, 0);
                apic_write(APIC_ESR, 0);
        }
+#endif
 
-       integrated = lapic_is_integrated();
+       preempt_disable();
 
        /*
         * Double-check whether this APIC is really registered.
+        * This is meaningless in clustered apic mode, so we skip it.
         */
        if (!apic_id_registered())
-               WARN_ON_ONCE(1);
+               BUG();
 
        /*
         * Intel recommends to set DFR, LDR and TPR before enabling
@@ -1096,6 +1201,7 @@ void __cpuinit setup_local_APIC(void)
         */
        value |= APIC_SPIV_APIC_ENABLED;
 
+#ifdef CONFIG_X86_32
        /*
         * Some unknown Intel IO/APIC (or APIC) errata is biting us with
         * certain networking cards. If high frequency interrupts are
@@ -1116,8 +1222,13 @@ void __cpuinit setup_local_APIC(void)
         * See also the comment in end_level_ioapic_irq().  --macro
         */
 
-       /* Enable focus processor (bit==0) */
+       /*
+        * - enable focus processor (bit==0)
+        * - 64bit mode always use processor focus
+        *   so no need to set it
+        */
        value &= ~APIC_SPIV_FOCUS_DISABLED;
+#endif
 
        /*
         * Set spurious IRQ vector
@@ -1154,9 +1265,11 @@ void __cpuinit setup_local_APIC(void)
                value = APIC_DM_NMI;
        else
                value = APIC_DM_NMI | APIC_LVT_MASKED;
-       if (!integrated)                /* 82489DX */
+       if (!lapic_is_integrated())             /* 82489DX */
                value |= APIC_LVT_LEVEL_TRIGGER;
        apic_write(APIC_LVT1, value);
+
+       preempt_enable();
 }
 
 void __cpuinit end_local_APIC_setup(void)
@@ -1177,6 +1290,153 @@ void __cpuinit end_local_APIC_setup(void)
        apic_pm_activate();
 }
 
+#ifdef HAVE_X2APIC
+void check_x2apic(void)
+{
+       int msr, msr2;
+
+       rdmsr(MSR_IA32_APICBASE, msr, msr2);
+
+       if (msr & X2APIC_ENABLE) {
+               printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+               x2apic_preenabled = x2apic = 1;
+               apic_ops = &x2apic_ops;
+       }
+}
+
+void enable_x2apic(void)
+{
+       int msr, msr2;
+
+       rdmsr(MSR_IA32_APICBASE, msr, msr2);
+       if (!(msr & X2APIC_ENABLE)) {
+               printk("Enabling x2apic\n");
+               wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+       }
+}
+
+void enable_IR_x2apic(void)
+{
+#ifdef CONFIG_INTR_REMAP
+       int ret;
+       unsigned long flags;
+
+       if (!cpu_has_x2apic)
+               return;
+
+       if (!x2apic_preenabled && disable_x2apic) {
+               printk(KERN_INFO
+                      "Skipped enabling x2apic and Interrupt-remapping "
+                      "because of nox2apic\n");
+               return;
+       }
+
+       if (x2apic_preenabled && disable_x2apic)
+               panic("Bios already enabled x2apic, can't enforce nox2apic");
+
+       if (!x2apic_preenabled && skip_ioapic_setup) {
+               printk(KERN_INFO
+                      "Skipped enabling x2apic and Interrupt-remapping "
+                      "because of skipping io-apic setup\n");
+               return;
+       }
+
+       ret = dmar_table_init();
+       if (ret) {
+               printk(KERN_INFO
+                      "dmar_table_init() failed with %d:\n", ret);
+
+               if (x2apic_preenabled)
+                       panic("x2apic enabled by bios. But IR enabling failed");
+               else
+                       printk(KERN_INFO
+                              "Not enabling x2apic,Intr-remapping\n");
+               return;
+       }
+
+       local_irq_save(flags);
+       mask_8259A();
+
+       ret = save_mask_IO_APIC_setup();
+       if (ret) {
+               printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret);
+               goto end;
+       }
+
+       ret = enable_intr_remapping(1);
+
+       if (ret && x2apic_preenabled) {
+               local_irq_restore(flags);
+               panic("x2apic enabled by bios. But IR enabling failed");
+       }
+
+       if (ret)
+               goto end_restore;
+
+       if (!x2apic) {
+               x2apic = 1;
+               apic_ops = &x2apic_ops;
+               enable_x2apic();
+       }
+
+end_restore:
+       if (ret)
+               /*
+                * IR enabling failed
+                */
+               restore_IO_APIC_setup();
+       else
+               reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+
+end:
+       unmask_8259A();
+       local_irq_restore(flags);
+
+       if (!ret) {
+               if (!x2apic_preenabled)
+                       printk(KERN_INFO
+                              "Enabled x2apic and interrupt-remapping\n");
+               else
+                       printk(KERN_INFO
+                              "Enabled Interrupt-remapping\n");
+       } else
+               printk(KERN_ERR
+                      "Failed to enable Interrupt-remapping and x2apic\n");
+#else
+       if (!cpu_has_x2apic)
+               return;
+
+       if (x2apic_preenabled)
+               panic("x2apic enabled prior OS handover,"
+                     " enable CONFIG_INTR_REMAP");
+
+       printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+              " and x2apic\n");
+#endif
+
+       return;
+}
+#endif /* HAVE_X2APIC */
+
+#ifdef CONFIG_X86_64
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ * On AMD64 we trust the BIOS - if it says no APIC it is likely
+ * not correctly set up (usually the APIC timer won't work etc.)
+ */
+static int __init detect_init_APIC(void)
+{
+       if (!cpu_has_apic) {
+               printk(KERN_INFO "No local APIC present\n");
+               return -1;
+       }
+
+       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+       boot_cpu_physical_apicid = 0;
+       return 0;
+}
+#else
 /*
  * Detect and initialize APIC
  */
@@ -1255,12 +1515,46 @@ no_apic:
        printk(KERN_INFO "No local APIC present or hardware disabled\n");
        return -1;
 }
+#endif
+
+#ifdef CONFIG_X86_64
+void __init early_init_lapic_mapping(void)
+{
+       unsigned long phys_addr;
+
+       /*
+        * If no local APIC can be found then go out
+        * : it means there is no mpatable and MADT
+        */
+       if (!smp_found_config)
+               return;
+
+       phys_addr = mp_lapic_addr;
+
+       set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
+       apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+                   APIC_BASE, phys_addr);
+
+       /*
+        * Fetch the APIC ID of the BSP in case we have a
+        * default configuration (or the MP table is broken).
+        */
+       boot_cpu_physical_apicid = read_apic_id();
+}
+#endif
 
 /**
  * init_apic_mappings - initialize APIC mappings
  */
 void __init init_apic_mappings(void)
 {
+#ifdef HAVE_X2APIC
+       if (x2apic) {
+               boot_cpu_physical_apicid = read_apic_id();
+               return;
+       }
+#endif
+
        /*
         * If no local APIC can be found then set up a fake all
         * zeroes page to simulate the local APIC and another
@@ -1273,8 +1567,8 @@ void __init init_apic_mappings(void)
                apic_phys = mp_lapic_addr;
 
        set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-       printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE,
-              apic_phys);
+       apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
+                               APIC_BASE, apic_phys);
 
        /*
         * Fetch the APIC ID of the BSP in case we have a
@@ -1282,18 +1576,27 @@ void __init init_apic_mappings(void)
         */
        if (boot_cpu_physical_apicid == -1U)
                boot_cpu_physical_apicid = read_apic_id();
-
 }
 
 /*
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
-
 int apic_version[MAX_APICS];
 
 int __init APIC_init_uniprocessor(void)
 {
+#ifdef CONFIG_X86_64
+       if (disable_apic) {
+               printk(KERN_INFO "Apic disabled\n");
+               return -1;
+       }
+       if (!cpu_has_apic) {
+               disable_apic = 1;
+               printk(KERN_INFO "Apic disabled by BIOS\n");
+               return -1;
+       }
+#else
        if (!smp_found_config && !cpu_has_apic)
                return -1;
 
@@ -1302,39 +1605,68 @@ int __init APIC_init_uniprocessor(void)
         */
        if (!cpu_has_apic &&
            APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
-               printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+               printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n",
                       boot_cpu_physical_apicid);
                clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
                return -1;
        }
+#endif
 
-       verify_local_APIC();
+#ifdef HAVE_X2APIC
+       enable_IR_x2apic();
+#endif
+#ifdef CONFIG_X86_64
+       setup_apic_routing();
+#endif
 
+       verify_local_APIC();
        connect_bsp_APIC();
 
+#ifdef CONFIG_X86_64
+       apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
+#else
        /*
         * Hack: In case of kdump, after a crash, kernel might be booting
         * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
         * might be zero if read from MP tables. Get it from LAPIC.
         */
-#ifdef CONFIG_CRASH_DUMP
+# ifdef CONFIG_CRASH_DUMP
        boot_cpu_physical_apicid = read_apic_id();
+# endif
 #endif
        physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
-
        setup_local_APIC();
 
+#ifdef CONFIG_X86_64
+       /*
+        * Now enable IO-APICs, actually call clear_IO_APIC
+        * We need clear_IO_APIC before enabling vector on BP
+        */
+       if (!skip_ioapic_setup && nr_ioapics)
+               enable_IO_APIC();
+#endif
+
 #ifdef CONFIG_X86_IO_APIC
        if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
 #endif
                localise_nmi_watchdog();
        end_local_APIC_setup();
+
 #ifdef CONFIG_X86_IO_APIC
-       if (smp_found_config)
-               if (!skip_ioapic_setup && nr_ioapics)
-                       setup_IO_APIC();
+       if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
+               setup_IO_APIC();
+# ifdef CONFIG_X86_64
+       else
+               nr_ioapics = 0;
+# endif
 #endif
+
+#ifdef CONFIG_X86_64
+       setup_boot_APIC_clock();
+       check_nmi_watchdog();
+#else
        setup_boot_clock();
+#endif
 
        return 0;
 }
@@ -1348,8 +1680,11 @@ int __init APIC_init_uniprocessor(void)
  */
 void smp_spurious_interrupt(struct pt_regs *regs)
 {
-       unsigned long v;
+       u32 v;
 
+#ifdef CONFIG_X86_64
+       exit_idle();
+#endif
        irq_enter();
        /*
         * Check if this really is a spurious interrupt and ACK it
@@ -1360,10 +1695,14 @@ void smp_spurious_interrupt(struct pt_regs *regs)
        if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
                ack_APIC_irq();
 
+#ifdef CONFIG_X86_64
+       add_pda(irq_spurious_count, 1);
+#else
        /* see sw-dev-man vol 3, chapter 7.4.13.5 */
        printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
               "should never happen.\n", smp_processor_id());
        __get_cpu_var(irq_stat).irq_spurious_count++;
+#endif
        irq_exit();
 }
 
@@ -1372,8 +1711,11 @@ void smp_spurious_interrupt(struct pt_regs *regs)
  */
 void smp_error_interrupt(struct pt_regs *regs)
 {
-       unsigned long v, v1;
+       u32 v, v1;
 
+#ifdef CONFIG_X86_64
+       exit_idle();
+#endif
        irq_enter();
        /* First tickle the hardware, only then report what went on. -- REW */
        v = apic_read(APIC_ESR);
@@ -1392,7 +1734,7 @@ void smp_error_interrupt(struct pt_regs *regs)
           6: Received illegal vector
           7: Illegal register address
        */
-       printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
+       printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
                smp_processor_id(), v , v1);
        irq_exit();
 }
@@ -1565,6 +1907,13 @@ void __cpuinit generic_processor_info(int apicid, int version)
        cpu_set(cpu, cpu_present_map);
 }
 
+#ifdef CONFIG_X86_64
+int hard_smp_processor_id(void)
+{
+       return read_apic_id();
+}
+#endif
+
 /*
  * Power management
  */
@@ -1640,7 +1989,7 @@ static int lapic_resume(struct sys_device *dev)
 
        local_irq_save(flags);
 
-#ifdef CONFIG_X86_64
+#ifdef HAVE_X2APIC
        if (x2apic)
                enable_x2apic();
        else
@@ -1702,7 +2051,7 @@ static struct sys_device device_lapic = {
        .cls    = &lapic_sysclass,
 };
 
-static void __devinit apic_pm_activate(void)
+static void __cpuinit apic_pm_activate(void)
 {
        apic_pm_state.active = 1;
 }
@@ -1728,16 +2077,87 @@ static void apic_pm_activate(void) { }
 
 #endif /* CONFIG_PM */
 
+#ifdef CONFIG_X86_64
 /*
- * APIC command line parameters
+ * apic_is_clustered_box() -- Check if we can expect good TSC
+ *
+ * Thus far, the major user of this is IBM's Summit2 series:
+ *
+ * Clustered boxes may have unsynced TSC problems if they are
+ * multi-chassis. Use available data to take a good guess.
+ * If in doubt, go HPET.
  */
-static int __init parse_lapic(char *arg)
+__cpuinit int apic_is_clustered_box(void)
 {
-       force_enable_local_apic = 1;
-       return 0;
+       int i, clusters, zeros;
+       unsigned id;
+       u16 *bios_cpu_apicid;
+       DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
+
+       /*
+        * there is not this kind of box with AMD CPU yet.
+        * Some AMD box with quadcore cpu and 8 sockets apicid
+        * will be [4, 0x23] or [8, 0x27] could be thought to
+        * vsmp box still need checking...
+        */
+       if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
+               return 0;
+
+       bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
+       bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
+
+       for (i = 0; i < NR_CPUS; i++) {
+               /* are we being called early in kernel startup? */
+               if (bios_cpu_apicid) {
+                       id = bios_cpu_apicid[i];
+               }
+               else if (i < nr_cpu_ids) {
+                       if (cpu_present(i))
+                               id = per_cpu(x86_bios_cpu_apicid, i);
+                       else
+                               continue;
+               }
+               else
+                       break;
+
+               if (id != BAD_APICID)
+                       __set_bit(APIC_CLUSTERID(id), clustermap);
+       }
+
+       /* Problem:  Partially populated chassis may not have CPUs in some of
+        * the APIC clusters they have been allocated.  Only present CPUs have
+        * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
+        * Since clusters are allocated sequentially, count zeros only if
+        * they are bounded by ones.
+        */
+       clusters = 0;
+       zeros = 0;
+       for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+               if (test_bit(i, clustermap)) {
+                       clusters += 1 + zeros;
+                       zeros = 0;
+               } else
+                       ++zeros;
+       }
+
+       /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+        * not guaranteed to be synced between boards
+        */
+       if (is_vsmp_box() && clusters > 1)
+               return 1;
+
+       /*
+        * If clusters > 2, then should be multi-chassis.
+        * May have to revisit this when multi-core + hyperthreaded CPUs come
+        * out, but AFAIK this will work even for them.
+        */
+       return (clusters > 2);
 }
-early_param("lapic", parse_lapic);
+#endif
 
+/*
+ * APIC command line parameters
+ */
 static int __init setup_disableapic(char *arg)
 {
        disable_apic = 1;
@@ -1779,7 +2199,6 @@ static int __init apic_set_verbosity(char *arg)
        if (!arg)  {
 #ifdef CONFIG_X86_64
                skip_ioapic_setup = 0;
-               ioapic_force = 1;
                return 0;
 #endif
                return -EINVAL;
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
deleted file mode 100644 (file)
index 94ddb69..0000000
+++ /dev/null
@@ -1,1848 +0,0 @@
-/*
- *     Local APIC handling, local APIC timers
- *
- *     (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- *     Fixes
- *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
- *                                     thanks to Eric Gilmore
- *                                     and Rolf G. Tews
- *                                     for testing these extensively.
- *     Maciej W. Rozycki       :       Various updates and fixes.
- *     Mikael Pettersson       :       Power Management for UP-APIC.
- *     Pavel Machek and
- *     Mikael Pettersson       :       PM converted to driver model.
- */
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/ioport.h>
-#include <linux/clockchips.h>
-#include <linux/acpi_pmtmr.h>
-#include <linux/module.h>
-#include <linux/dmar.h>
-
-#include <asm/atomic.h>
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
-#include <asm/hpet.h>
-#include <asm/pgalloc.h>
-#include <asm/nmi.h>
-#include <asm/idle.h>
-#include <asm/proto.h>
-#include <asm/timex.h>
-#include <asm/apic.h>
-#include <asm/i8259.h>
-
-#include <mach_ipi.h>
-#include <mach_apic.h>
-
-/* Disable local APIC timer from the kernel commandline or via dmi quirk */
-static int disable_apic_timer __cpuinitdata;
-static int apic_calibrate_pmtmr __initdata;
-int disable_apic;
-int disable_x2apic;
-int x2apic;
-
-/* x2apic enabled before OS handover */
-int x2apic_preenabled;
-
-/* Local APIC timer works in C2 */
-int local_apic_timer_c2_ok;
-EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
-
-/*
- * Debug level, exported for io_apic.c
- */
-unsigned int apic_verbosity;
-
-/* Have we found an MP table */
-int smp_found_config;
-
-static struct resource lapic_resource = {
-       .name = "Local APIC",
-       .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
-};
-
-static unsigned int calibration_result;
-
-static int lapic_next_event(unsigned long delta,
-                           struct clock_event_device *evt);
-static void lapic_timer_setup(enum clock_event_mode mode,
-                             struct clock_event_device *evt);
-static void lapic_timer_broadcast(cpumask_t mask);
-static void apic_pm_activate(void);
-
-/*
- * The local apic timer can be used for any function which is CPU local.
- */
-static struct clock_event_device lapic_clockevent = {
-       .name           = "lapic",
-       .features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
-                       | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
-       .shift          = 32,
-       .set_mode       = lapic_timer_setup,
-       .set_next_event = lapic_next_event,
-       .broadcast      = lapic_timer_broadcast,
-       .rating         = 100,
-       .irq            = -1,
-};
-static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-
-static unsigned long apic_phys;
-
-unsigned long mp_lapic_addr;
-
-/*
- * Get the LAPIC version
- */
-static inline int lapic_get_version(void)
-{
-       return GET_APIC_VERSION(apic_read(APIC_LVR));
-}
-
-/*
- * Check, if the APIC is integrated or a separate chip
- */
-static inline int lapic_is_integrated(void)
-{
-#ifdef CONFIG_X86_64
-       return 1;
-#else
-       return APIC_INTEGRATED(lapic_get_version());
-#endif
-}
-
-/*
- * Check, whether this is a modern or a first generation APIC
- */
-static int modern_apic(void)
-{
-       /* AMD systems use old APIC versions, so check the CPU */
-       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-           boot_cpu_data.x86 >= 0xf)
-               return 1;
-       return lapic_get_version() >= 0x14;
-}
-
-/*
- * Paravirt kernels also might be using these below ops. So we still
- * use generic apic_read()/apic_write(), which might be pointing to different
- * ops in PARAVIRT case.
- */
-void xapic_wait_icr_idle(void)
-{
-       while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
-               cpu_relax();
-}
-
-u32 safe_xapic_wait_icr_idle(void)
-{
-       u32 send_status;
-       int timeout;
-
-       timeout = 0;
-       do {
-               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-               if (!send_status)
-                       break;
-               udelay(100);
-       } while (timeout++ < 1000);
-
-       return send_status;
-}
-
-void xapic_icr_write(u32 low, u32 id)
-{
-       apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
-       apic_write(APIC_ICR, low);
-}
-
-u64 xapic_icr_read(void)
-{
-       u32 icr1, icr2;
-
-       icr2 = apic_read(APIC_ICR2);
-       icr1 = apic_read(APIC_ICR);
-
-       return icr1 | ((u64)icr2 << 32);
-}
-
-static struct apic_ops xapic_ops = {
-       .read = native_apic_mem_read,
-       .write = native_apic_mem_write,
-       .icr_read = xapic_icr_read,
-       .icr_write = xapic_icr_write,
-       .wait_icr_idle = xapic_wait_icr_idle,
-       .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
-};
-
-struct apic_ops __read_mostly *apic_ops = &xapic_ops;
-EXPORT_SYMBOL_GPL(apic_ops);
-
-static void x2apic_wait_icr_idle(void)
-{
-       /* no need to wait for icr idle in x2apic */
-       return;
-}
-
-static u32 safe_x2apic_wait_icr_idle(void)
-{
-       /* no need to wait for icr idle in x2apic */
-       return 0;
-}
-
-void x2apic_icr_write(u32 low, u32 id)
-{
-       wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
-}
-
-u64 x2apic_icr_read(void)
-{
-       unsigned long val;
-
-       rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
-       return val;
-}
-
-static struct apic_ops x2apic_ops = {
-       .read = native_apic_msr_read,
-       .write = native_apic_msr_write,
-       .icr_read = x2apic_icr_read,
-       .icr_write = x2apic_icr_write,
-       .wait_icr_idle = x2apic_wait_icr_idle,
-       .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
-};
-
-/**
- * enable_NMI_through_LVT0 - enable NMI through local vector table 0
- */
-void __cpuinit enable_NMI_through_LVT0(void)
-{
-       unsigned int v;
-
-       /* unmask and set to NMI */
-       v = APIC_DM_NMI;
-
-       /* Level triggered for 82489DX (32bit mode) */
-       if (!lapic_is_integrated())
-               v |= APIC_LVT_LEVEL_TRIGGER;
-
-       apic_write(APIC_LVT0, v);
-}
-
-/**
- * lapic_get_maxlvt - get the maximum number of local vector table entries
- */
-int lapic_get_maxlvt(void)
-{
-       unsigned int v;
-
-       v = apic_read(APIC_LVR);
-       /*
-        * - we always have APIC integrated on 64bit mode
-        * - 82489DXs do not report # of LVT entries
-        */
-       return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
-}
-
-/*
- * Local APIC timer
- */
-
-/* Clock divisor */
-#ifdef CONFG_X86_64
-#define APIC_DIVISOR 1
-#else
-#define APIC_DIVISOR 16
-#endif
-
-/*
- * This function sets up the local APIC timer, with a timeout of
- * 'clocks' APIC bus clock. During calibration we actually call
- * this function twice on the boot CPU, once with a bogus timeout
- * value, second time for real. The other (noncalibrating) CPUs
- * call this function only once, with the real, calibrated value.
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
- */
-static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
-{
-       unsigned int lvtt_value, tmp_value;
-
-       lvtt_value = LOCAL_TIMER_VECTOR;
-       if (!oneshot)
-               lvtt_value |= APIC_LVT_TIMER_PERIODIC;
-       if (!lapic_is_integrated())
-               lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
-
-       if (!irqen)
-               lvtt_value |= APIC_LVT_MASKED;
-
-       apic_write(APIC_LVTT, lvtt_value);
-
-       /*
-        * Divide PICLK by 16
-        */
-       tmp_value = apic_read(APIC_TDCR);
-       apic_write(APIC_TDCR,
-               (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
-               APIC_TDR_DIV_16);
-
-       if (!oneshot)
-               apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
-}
-
-/*
- * Setup extended LVT, AMD specific (K8, family 10h)
- *
- * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
- * MCE interrupts are supported. Thus MCE offset must be set to 0.
- *
- * If mask=1, the LVT entry does not generate interrupts while mask=0
- * enables the vector. See also the BKDGs.
- */
-
-#define APIC_EILVT_LVTOFF_MCE 0
-#define APIC_EILVT_LVTOFF_IBS 1
-
-static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
-{
-       unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
-       unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
-
-       apic_write(reg, v);
-}
-
-u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
-{
-       setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
-       return APIC_EILVT_LVTOFF_MCE;
-}
-
-u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
-{
-       setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
-       return APIC_EILVT_LVTOFF_IBS;
-}
-EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
-
-/*
- * Program the next event, relative to now
- */
-static int lapic_next_event(unsigned long delta,
-                           struct clock_event_device *evt)
-{
-       apic_write(APIC_TMICT, delta);
-       return 0;
-}
-
-/*
- * Setup the lapic timer in periodic or oneshot mode
- */
-static void lapic_timer_setup(enum clock_event_mode mode,
-                             struct clock_event_device *evt)
-{
-       unsigned long flags;
-       unsigned int v;
-
-       /* Lapic used as dummy for broadcast ? */
-       if (evt->features & CLOCK_EVT_FEAT_DUMMY)
-               return;
-
-       local_irq_save(flags);
-
-       switch (mode) {
-       case CLOCK_EVT_MODE_PERIODIC:
-       case CLOCK_EVT_MODE_ONESHOT:
-               __setup_APIC_LVTT(calibration_result,
-                                 mode != CLOCK_EVT_MODE_PERIODIC, 1);
-               break;
-       case CLOCK_EVT_MODE_UNUSED:
-       case CLOCK_EVT_MODE_SHUTDOWN:
-               v = apic_read(APIC_LVTT);
-               v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-               apic_write(APIC_LVTT, v);
-               break;
-       case CLOCK_EVT_MODE_RESUME:
-               /* Nothing to do here */
-               break;
-       }
-
-       local_irq_restore(flags);
-}
-
-/*
- * Local APIC timer broadcast function
- */
-static void lapic_timer_broadcast(cpumask_t mask)
-{
-#ifdef CONFIG_SMP
-       send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
-#endif
-}
-
-/*
- * Setup the local APIC timer for this CPU. Copy the initilized values
- * of the boot CPU and register the clock event in the framework.
- */
-static void setup_APIC_timer(void)
-{
-       struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-
-       memcpy(levt, &lapic_clockevent, sizeof(*levt));
-       levt->cpumask = cpumask_of_cpu(smp_processor_id());
-
-       clockevents_register_device(levt);
-}
-
-/*
- * In this function we calibrate APIC bus clocks to the external
- * timer. Unfortunately we cannot use jiffies and the timer irq
- * to calibrate, since some later bootup code depends on getting
- * the first irq? Ugh.
- *
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
- */
-
-#define TICK_COUNT 100000000
-
-static int __init calibrate_APIC_clock(void)
-{
-       unsigned apic, apic_start;
-       unsigned long tsc, tsc_start;
-       int result;
-
-       local_irq_disable();
-
-       /*
-        * Put whatever arbitrary (but long enough) timeout
-        * value into the APIC clock, we just want to get the
-        * counter running for calibration.
-        *
-        * No interrupt enable !
-        */
-       __setup_APIC_LVTT(250000000, 0, 0);
-
-       apic_start = apic_read(APIC_TMCCT);
-#ifdef CONFIG_X86_PM_TIMER
-       if (apic_calibrate_pmtmr && pmtmr_ioport) {
-               pmtimer_wait(5000);  /* 5ms wait */
-               apic = apic_read(APIC_TMCCT);
-               result = (apic_start - apic) * 1000L / 5;
-       } else
-#endif
-       {
-               rdtscll(tsc_start);
-
-               do {
-                       apic = apic_read(APIC_TMCCT);
-                       rdtscll(tsc);
-               } while ((tsc - tsc_start) < TICK_COUNT &&
-                               (apic_start - apic) < TICK_COUNT);
-
-               result = (apic_start - apic) * 1000L * tsc_khz /
-                                       (tsc - tsc_start);
-       }
-
-       local_irq_enable();
-
-       printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
-
-       printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
-               result / 1000 / 1000, result / 1000 % 1000);
-
-       /* Calculate the scaled math multiplication factor */
-       lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
-                                      lapic_clockevent.shift);
-       lapic_clockevent.max_delta_ns =
-               clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
-       lapic_clockevent.min_delta_ns =
-               clockevent_delta2ns(0xF, &lapic_clockevent);
-
-       calibration_result = (result * APIC_DIVISOR) / HZ;
-
-       /*
-        * Do a sanity check on the APIC calibration result
-        */
-       if (calibration_result < (1000000 / HZ)) {
-               printk(KERN_WARNING
-                       "APIC frequency too slow, disabling apic timer\n");
-               return -1;
-       }
-
-       return 0;
-}
-
-/*
- * Setup the boot APIC
- *
- * Calibrate and verify the result.
- */
-void __init setup_boot_APIC_clock(void)
-{
-       /*
-        * The local apic timer can be disabled via the kernel
-        * commandline or from the CPU detection code. Register the lapic
-        * timer as a dummy clock event source on SMP systems, so the
-        * broadcast mechanism is used. On UP systems simply ignore it.
-        */
-       if (disable_apic_timer) {
-               printk(KERN_INFO "Disabling APIC timer\n");
-               /* No broadcast on UP ! */
-               if (num_possible_cpus() > 1) {
-                       lapic_clockevent.mult = 1;
-                       setup_APIC_timer();
-               }
-               return;
-       }
-
-       apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
-                   "calibrating APIC timer ...\n");
-
-       if (calibrate_APIC_clock()) {
-               /* No broadcast on UP ! */
-               if (num_possible_cpus() > 1)
-                       setup_APIC_timer();
-               return;
-       }
-
-       /*
-        * If nmi_watchdog is set to IO_APIC, we need the
-        * PIT/HPET going.  Otherwise register lapic as a dummy
-        * device.
-        */
-       if (nmi_watchdog != NMI_IO_APIC)
-               lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-       else
-               printk(KERN_WARNING "APIC timer registered as dummy,"
-                       " due to nmi_watchdog=%d!\n", nmi_watchdog);
-
-       /* Setup the lapic or request the broadcast */
-       setup_APIC_timer();
-}
-
-void __cpuinit setup_secondary_APIC_clock(void)
-{
-       setup_APIC_timer();
-}
-
-/*
- * The guts of the apic timer interrupt
- */
-static void local_apic_timer_interrupt(void)
-{
-       int cpu = smp_processor_id();
-       struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
-
-       /*
-        * Normally we should not be here till LAPIC has been initialized but
-        * in some cases like kdump, its possible that there is a pending LAPIC
-        * timer interrupt from previous kernel's context and is delivered in
-        * new kernel the moment interrupts are enabled.
-        *
-        * Interrupts are enabled early and LAPIC is setup much later, hence
-        * its possible that when we get here evt->event_handler is NULL.
-        * Check for event_handler being NULL and discard the interrupt as
-        * spurious.
-        */
-       if (!evt->event_handler) {
-               printk(KERN_WARNING
-                      "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
-               /* Switch it off */
-               lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
-               return;
-       }
-
-       /*
-        * the NMI deadlock-detector uses this.
-        */
-#ifdef CONFIG_X86_64
-       add_pda(apic_timer_irqs, 1);
-#else
-       per_cpu(irq_stat, cpu).apic_timer_irqs++;
-#endif
-
-       evt->event_handler(evt);
-}
-
-/*
- * Local APIC timer interrupt. This is the most natural way for doing
- * local interrupts, but local timer interrupts can be emulated by
- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
- *
- * [ if a single-CPU system runs an SMP kernel then we call the local
- *   interrupt as well. Thus we cannot inline the local irq ... ]
- */
-void smp_apic_timer_interrupt(struct pt_regs *regs)
-{
-       struct pt_regs *old_regs = set_irq_regs(regs);
-
-       /*
-        * NOTE! We'd better ACK the irq immediately,
-        * because timer handling can be slow.
-        */
-       ack_APIC_irq();
-       /*
-        * update_process_times() expects us to have done irq_enter().
-        * Besides, if we don't timer interrupts ignore the global
-        * interrupt lock, which is the WrongThing (tm) to do.
-        */
-       exit_idle();
-       irq_enter();
-       local_apic_timer_interrupt();
-       irq_exit();
-
-       set_irq_regs(old_regs);
-}
-
-int setup_profiling_timer(unsigned int multiplier)
-{
-       return -EINVAL;
-}
-
-
-/*
- * Local APIC start and shutdown
- */
-
-/**
- * clear_local_APIC - shutdown the local APIC
- *
- * This is called, when a CPU is disabled and before rebooting, so the state of
- * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
- * leftovers during boot.
- */
-void clear_local_APIC(void)
-{
-       int maxlvt;
-       u32 v;
-
-       /* APIC hasn't been mapped yet */
-       if (!apic_phys)
-               return;
-
-       maxlvt = lapic_get_maxlvt();
-       /*
-        * Masking an LVT entry can trigger a local APIC error
-        * if the vector is zero. Mask LVTERR first to prevent this.
-        */
-       if (maxlvt >= 3) {
-               v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
-               apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
-       }
-       /*
-        * Careful: we have to set masks only first to deassert
-        * any level-triggered sources.
-        */
-       v = apic_read(APIC_LVTT);
-       apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
-       v = apic_read(APIC_LVT0);
-       apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-       v = apic_read(APIC_LVT1);
-       apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
-       if (maxlvt >= 4) {
-               v = apic_read(APIC_LVTPC);
-               apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
-       }
-
-       /* lets not touch this if we didn't frob it */
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
-       if (maxlvt >= 5) {
-               v = apic_read(APIC_LVTTHMR);
-               apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
-       }
-#endif
-       /*
-        * Clean APIC state for other OSs:
-        */
-       apic_write(APIC_LVTT, APIC_LVT_MASKED);
-       apic_write(APIC_LVT0, APIC_LVT_MASKED);
-       apic_write(APIC_LVT1, APIC_LVT_MASKED);
-       if (maxlvt >= 3)
-               apic_write(APIC_LVTERR, APIC_LVT_MASKED);
-       if (maxlvt >= 4)
-               apic_write(APIC_LVTPC, APIC_LVT_MASKED);
-
-       /* Integrated APIC (!82489DX) ? */
-       if (lapic_is_integrated()) {
-               if (maxlvt > 3)
-                       /* Clear ESR due to Pentium errata 3AP and 11AP */
-                       apic_write(APIC_ESR, 0);
-               apic_read(APIC_ESR);
-       }
-}
-
-/**
- * disable_local_APIC - clear and disable the local APIC
- */
-void disable_local_APIC(void)
-{
-       unsigned int value;
-
-       clear_local_APIC();
-
-       /*
-        * Disable APIC (implies clearing of registers
-        * for 82489DX!).
-        */
-       value = apic_read(APIC_SPIV);
-       value &= ~APIC_SPIV_APIC_ENABLED;
-       apic_write(APIC_SPIV, value);
-
-#ifdef CONFIG_X86_32
-       /*
-        * When LAPIC was disabled by the BIOS and enabled by the kernel,
-        * restore the disabled state.
-        */
-       if (enabled_via_apicbase) {
-               unsigned int l, h;
-
-               rdmsr(MSR_IA32_APICBASE, l, h);
-               l &= ~MSR_IA32_APICBASE_ENABLE;
-               wrmsr(MSR_IA32_APICBASE, l, h);
-       }
-#endif
-}
-
-/*
- * If Linux enabled the LAPIC against the BIOS default disable it down before
- * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
- * not power-off.  Additionally clear all LVT entries before disable_local_APIC
- * for the case where Linux didn't enable the LAPIC.
- */
-void lapic_shutdown(void)
-{
-       unsigned long flags;
-
-       if (!cpu_has_apic)
-               return;
-
-       local_irq_save(flags);
-
-#ifdef CONFIG_X86_32
-       if (!enabled_via_apicbase)
-               clear_local_APIC();
-       else
-#endif
-               disable_local_APIC();
-
-
-       local_irq_restore(flags);
-}
-
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
-       unsigned int reg0, reg1;
-
-       /*
-        * The version register is read-only in a real APIC.
-        */
-       reg0 = apic_read(APIC_LVR);
-       apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
-       apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
-       reg1 = apic_read(APIC_LVR);
-       apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
-       /*
-        * The two version reads above should print the same
-        * numbers.  If the second one is different, then we
-        * poke at a non-APIC.
-        */
-       if (reg1 != reg0)
-               return 0;
-
-       /*
-        * Check if the version looks reasonably.
-        */
-       reg1 = GET_APIC_VERSION(reg0);
-       if (reg1 == 0x00 || reg1 == 0xff)
-               return 0;
-       reg1 = lapic_get_maxlvt();
-       if (reg1 < 0x02 || reg1 == 0xff)
-               return 0;
-
-       /*
-        * The ID register is read/write in a real APIC.
-        */
-       reg0 = apic_read(APIC_ID);
-       apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
-       apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
-       reg1 = apic_read(APIC_ID);
-       apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
-       apic_write(APIC_ID, reg0);
-       if (reg1 != (reg0 ^ APIC_ID_MASK))
-               return 0;
-
-       /*
-        * The next two are just to see if we have sane values.
-        * They're only really relevant if we're in Virtual Wire
-        * compatibility mode, but most boxes are anymore.
-        */
-       reg0 = apic_read(APIC_LVT0);
-       apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
-       reg1 = apic_read(APIC_LVT1);
-       apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
-       return 1;
-}
-
-/**
- * sync_Arb_IDs - synchronize APIC bus arbitration IDs
- */
-void __init sync_Arb_IDs(void)
-{
-       /*
-        * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
-        * needed on AMD.
-        */
-       if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-               return;
-
-       /*
-        * Wait for idle.
-        */
-       apic_wait_icr_idle();
-
-       apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
-       apic_write(APIC_ICR, APIC_DEST_ALLINC |
-                       APIC_INT_LEVELTRIG | APIC_DM_INIT);
-}
-
-/*
- * An initial setup of the virtual wire mode.
- */
-void __init init_bsp_APIC(void)
-{
-       unsigned int value;
-
-       /*
-        * Don't do the setup now if we have a SMP BIOS as the
-        * through-I/O-APIC virtual wire mode might be active.
-        */
-       if (smp_found_config || !cpu_has_apic)
-               return;
-
-       /*
-        * Do not trust the local APIC being empty at bootup.
-        */
-       clear_local_APIC();
-
-       /*
-        * Enable APIC.
-        */
-       value = apic_read(APIC_SPIV);
-       value &= ~APIC_VECTOR_MASK;
-       value |= APIC_SPIV_APIC_ENABLED;
-
-#ifdef CONFIG_X86_32
-       /* This bit is reserved on P4/Xeon and should be cleared */
-       if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-           (boot_cpu_data.x86 == 15))
-               value &= ~APIC_SPIV_FOCUS_DISABLED;
-       else
-#endif
-               value |= APIC_SPIV_FOCUS_DISABLED;
-       value |= SPURIOUS_APIC_VECTOR;
-       apic_write(APIC_SPIV, value);
-
-       /*
-        * Set up the virtual wire mode.
-        */
-       apic_write(APIC_LVT0, APIC_DM_EXTINT);
-       value = APIC_DM_NMI;
-       if (!lapic_is_integrated())             /* 82489DX */
-               value |= APIC_LVT_LEVEL_TRIGGER;
-       apic_write(APIC_LVT1, value);
-}
-
-static void __cpuinit lapic_setup_esr(void)
-{
-       unsigned long oldvalue, value, maxlvt;
-       if (lapic_is_integrated() && !esr_disable) {
-               if (esr_disable) {
-                       /*
-                        * Something untraceable is creating bad interrupts on
-                        * secondary quads ... for the moment, just leave the
-                        * ESR disabled - we can't do anything useful with the
-                        * errors anyway - mbligh
-                        */
-                       printk(KERN_INFO "Leaving ESR disabled.\n");
-                       return;
-               }
-               /* !82489DX */
-               maxlvt = lapic_get_maxlvt();
-               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-                       apic_write(APIC_ESR, 0);
-               oldvalue = apic_read(APIC_ESR);
-
-               /* enables sending errors */
-               value = ERROR_APIC_VECTOR;
-               apic_write(APIC_LVTERR, value);
-               /*
-                * spec says clear errors after enabling vector.
-                */
-               if (maxlvt > 3)
-                       apic_write(APIC_ESR, 0);
-               value = apic_read(APIC_ESR);
-               if (value != oldvalue)
-                       apic_printk(APIC_VERBOSE, "ESR value before enabling "
-                               "vector: 0x%08lx  after: 0x%08lx\n",
-                               oldvalue, value);
-       } else {
-               printk(KERN_INFO "No ESR for 82489DX.\n");
-       }
-}
-
-
-/**
- * setup_local_APIC - setup the local APIC
- */
-void __cpuinit setup_local_APIC(void)
-{
-       unsigned int value;
-       int i, j;
-
-       preempt_disable();
-       value = apic_read(APIC_LVR);
-
-       BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
-
-       /*
-        * Double-check whether this APIC is really registered.
-        * This is meaningless in clustered apic mode, so we skip it.
-        */
-       if (!apic_id_registered())
-               BUG();
-
-       /*
-        * Intel recommends to set DFR, LDR and TPR before enabling
-        * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
-        * document number 292116).  So here it goes...
-        */
-       init_apic_ldr();
-
-       /*
-        * Set Task Priority to 'accept all'. We never change this
-        * later on.
-        */
-       value = apic_read(APIC_TASKPRI);
-       value &= ~APIC_TPRI_MASK;
-       apic_write(APIC_TASKPRI, value);
-
-       /*
-        * After a crash, we no longer service the interrupts and a pending
-        * interrupt from previous kernel might still have ISR bit set.
-        *
-        * Most probably by now CPU has serviced that pending interrupt and
-        * it might not have done the ack_APIC_irq() because it thought,
-        * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
-        * does not clear the ISR bit and cpu thinks it has already serivced
-        * the interrupt. Hence a vector might get locked. It was noticed
-        * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
-        */
-       for (i = APIC_ISR_NR - 1; i >= 0; i--) {
-               value = apic_read(APIC_ISR + i*0x10);
-               for (j = 31; j >= 0; j--) {
-                       if (value & (1<<j))
-                               ack_APIC_irq();
-               }
-       }
-
-       /*
-        * Now that we are all set up, enable the APIC
-        */
-       value = apic_read(APIC_SPIV);
-       value &= ~APIC_VECTOR_MASK;
-       /*
-        * Enable APIC
-        */
-       value |= APIC_SPIV_APIC_ENABLED;
-
-       /* We always use processor focus */
-
-       /*
-        * Set spurious IRQ vector
-        */
-       value |= SPURIOUS_APIC_VECTOR;
-       apic_write(APIC_SPIV, value);
-
-       /*
-        * Set up LVT0, LVT1:
-        *
-        * set up through-local-APIC on the BP's LINT0. This is not
-        * strictly necessary in pure symmetric-IO mode, but sometimes
-        * we delegate interrupts to the 8259A.
-        */
-       /*
-        * TODO: set up through-local-APIC from through-I/O-APIC? --macro
-        */
-       value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
-       if (!smp_processor_id() && !value) {
-               value = APIC_DM_EXTINT;
-               apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
-                           smp_processor_id());
-       } else {
-               value = APIC_DM_EXTINT | APIC_LVT_MASKED;
-               apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
-                           smp_processor_id());
-       }
-       apic_write(APIC_LVT0, value);
-
-       /*
-        * only the BP should see the LINT1 NMI signal, obviously.
-        */
-       if (!smp_processor_id())
-               value = APIC_DM_NMI;
-       else
-               value = APIC_DM_NMI | APIC_LVT_MASKED;
-       apic_write(APIC_LVT1, value);
-       preempt_enable();
-}
-
-void __cpuinit end_local_APIC_setup(void)
-{
-       lapic_setup_esr();
-
-#ifdef CONFIG_X86_32
-       {
-               unsigned int value;
-               /* Disable the local apic timer */
-               value = apic_read(APIC_LVTT);
-               value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-               apic_write(APIC_LVTT, value);
-       }
-#endif
-
-       setup_apic_nmi_watchdog(NULL);
-       apic_pm_activate();
-}
-
-void check_x2apic(void)
-{
-       int msr, msr2;
-
-       rdmsr(MSR_IA32_APICBASE, msr, msr2);
-
-       if (msr & X2APIC_ENABLE) {
-               printk("x2apic enabled by BIOS, switching to x2apic ops\n");
-               x2apic_preenabled = x2apic = 1;
-               apic_ops = &x2apic_ops;
-       }
-}
-
-void enable_x2apic(void)
-{
-       int msr, msr2;
-
-       rdmsr(MSR_IA32_APICBASE, msr, msr2);
-       if (!(msr & X2APIC_ENABLE)) {
-               printk("Enabling x2apic\n");
-               wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
-       }
-}
-
-void enable_IR_x2apic(void)
-{
-#ifdef CONFIG_INTR_REMAP
-       int ret;
-       unsigned long flags;
-
-       if (!cpu_has_x2apic)
-               return;
-
-       if (!x2apic_preenabled && disable_x2apic) {
-               printk(KERN_INFO
-                      "Skipped enabling x2apic and Interrupt-remapping "
-                      "because of nox2apic\n");
-               return;
-       }
-
-       if (x2apic_preenabled && disable_x2apic)
-               panic("Bios already enabled x2apic, can't enforce nox2apic");
-
-       if (!x2apic_preenabled && skip_ioapic_setup) {
-               printk(KERN_INFO
-                      "Skipped enabling x2apic and Interrupt-remapping "
-                      "because of skipping io-apic setup\n");
-               return;
-       }
-
-       ret = dmar_table_init();
-       if (ret) {
-               printk(KERN_INFO
-                      "dmar_table_init() failed with %d:\n", ret);
-
-               if (x2apic_preenabled)
-                       panic("x2apic enabled by bios. But IR enabling failed");
-               else
-                       printk(KERN_INFO
-                              "Not enabling x2apic,Intr-remapping\n");
-               return;
-       }
-
-       local_irq_save(flags);
-       mask_8259A();
-       save_mask_IO_APIC_setup();
-
-       ret = enable_intr_remapping(1);
-
-       if (ret && x2apic_preenabled) {
-               local_irq_restore(flags);
-               panic("x2apic enabled by bios. But IR enabling failed");
-       }
-
-       if (ret)
-               goto end;
-
-       if (!x2apic) {
-               x2apic = 1;
-               apic_ops = &x2apic_ops;
-               enable_x2apic();
-       }
-end:
-       if (ret)
-               /*
-                * IR enabling failed
-                */
-               restore_IO_APIC_setup();
-       else
-               reinit_intr_remapped_IO_APIC(x2apic_preenabled);
-
-       unmask_8259A();
-       local_irq_restore(flags);
-
-       if (!ret) {
-               if (!x2apic_preenabled)
-                       printk(KERN_INFO
-                              "Enabled x2apic and interrupt-remapping\n");
-               else
-                       printk(KERN_INFO
-                              "Enabled Interrupt-remapping\n");
-       } else
-               printk(KERN_ERR
-                      "Failed to enable Interrupt-remapping and x2apic\n");
-#else
-       if (!cpu_has_x2apic)
-               return;
-
-       if (x2apic_preenabled)
-               panic("x2apic enabled prior OS handover,"
-                     " enable CONFIG_INTR_REMAP");
-
-       printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
-              " and x2apic\n");
-#endif
-
-       return;
-}
-
-/*
- * Detect and enable local APICs on non-SMP boards.
- * Original code written by Keir Fraser.
- * On AMD64 we trust the BIOS - if it says no APIC it is likely
- * not correctly set up (usually the APIC timer won't work etc.)
- */
-static int __init detect_init_APIC(void)
-{
-       if (!cpu_has_apic) {
-               printk(KERN_INFO "No local APIC present\n");
-               return -1;
-       }
-
-       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-       boot_cpu_physical_apicid = 0;
-       return 0;
-}
-
-void __init early_init_lapic_mapping(void)
-{
-       unsigned long phys_addr;
-
-       /*
-        * If no local APIC can be found then go out
-        * : it means there is no mpatable and MADT
-        */
-       if (!smp_found_config)
-               return;
-
-       phys_addr = mp_lapic_addr;
-
-       set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
-       apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-                   APIC_BASE, phys_addr);
-
-       /*
-        * Fetch the APIC ID of the BSP in case we have a
-        * default configuration (or the MP table is broken).
-        */
-       boot_cpu_physical_apicid = read_apic_id();
-}
-
-/**
- * init_apic_mappings - initialize APIC mappings
- */
-void __init init_apic_mappings(void)
-{
-       if (x2apic) {
-               boot_cpu_physical_apicid = read_apic_id();
-               return;
-       }
-
-       /*
-        * If no local APIC can be found then set up a fake all
-        * zeroes page to simulate the local APIC and another
-        * one for the IO-APIC.
-        */
-       if (!smp_found_config && detect_init_APIC()) {
-               apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
-               apic_phys = __pa(apic_phys);
-       } else
-               apic_phys = mp_lapic_addr;
-
-       set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-       apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-                               APIC_BASE, apic_phys);
-
-       /*
-        * Fetch the APIC ID of the BSP in case we have a
-        * default configuration (or the MP table is broken).
-        */
-       boot_cpu_physical_apicid = read_apic_id();
-}
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int apic_version[MAX_APICS];
-
-int __init APIC_init_uniprocessor(void)
-{
-       if (disable_apic) {
-               printk(KERN_INFO "Apic disabled\n");
-               return -1;
-       }
-       if (!cpu_has_apic) {
-               disable_apic = 1;
-               printk(KERN_INFO "Apic disabled by BIOS\n");
-               return -1;
-       }
-
-       enable_IR_x2apic();
-       setup_apic_routing();
-
-       verify_local_APIC();
-
-       connect_bsp_APIC();
-
-       physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
-       apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
-
-       setup_local_APIC();
-
-       /*
-        * Now enable IO-APICs, actually call clear_IO_APIC
-        * We need clear_IO_APIC before enabling vector on BP
-        */
-       if (!skip_ioapic_setup && nr_ioapics)
-               enable_IO_APIC();
-
-       if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
-               localise_nmi_watchdog();
-       end_local_APIC_setup();
-
-       if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
-               setup_IO_APIC();
-       else
-               nr_ioapics = 0;
-       setup_boot_APIC_clock();
-       check_nmi_watchdog();
-       return 0;
-}
-
-/*
- * Local APIC interrupts
- */
-
-/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
- */
-asmlinkage void smp_spurious_interrupt(void)
-{
-       unsigned int v;
-       exit_idle();
-       irq_enter();
-       /*
-        * Check if this really is a spurious interrupt and ACK it
-        * if it is a vectored one.  Just in case...
-        * Spurious interrupts should not be ACKed.
-        */
-       v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
-       if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
-               ack_APIC_irq();
-
-       add_pda(irq_spurious_count, 1);
-       irq_exit();
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
- */
-asmlinkage void smp_error_interrupt(void)
-{
-       unsigned int v, v1;
-
-       exit_idle();
-       irq_enter();
-       /* First tickle the hardware, only then report what went on. -- REW */
-       v = apic_read(APIC_ESR);
-       apic_write(APIC_ESR, 0);
-       v1 = apic_read(APIC_ESR);
-       ack_APIC_irq();
-       atomic_inc(&irq_err_count);
-
-       /* Here is what the APIC error bits mean:
-          0: Send CS error
-          1: Receive CS error
-          2: Send accept error
-          3: Receive accept error
-          4: Reserved
-          5: Send illegal vector
-          6: Received illegal vector
-          7: Illegal register address
-       */
-       printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
-               smp_processor_id(), v , v1);
-       irq_exit();
-}
-
-/**
- * connect_bsp_APIC - attach the APIC to the interrupt system
- */
-void __init connect_bsp_APIC(void)
-{
-#ifdef CONFIG_X86_32
-       if (pic_mode) {
-               /*
-                * Do not trust the local APIC being empty at bootup.
-                */
-               clear_local_APIC();
-               /*
-                * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
-                * local APIC to INT and NMI lines.
-                */
-               apic_printk(APIC_VERBOSE, "leaving PIC mode, "
-                               "enabling APIC mode.\n");
-               outb(0x70, 0x22);
-               outb(0x01, 0x23);
-       }
-#endif
-       enable_apic_mode();
-}
-
-/**
- * disconnect_bsp_APIC - detach the APIC from the interrupt system
- * @virt_wire_setup:   indicates, whether virtual wire mode is selected
- *
- * Virtual wire mode is necessary to deliver legacy interrupts even when the
- * APIC is disabled.
- */
-void disconnect_bsp_APIC(int virt_wire_setup)
-{
-       unsigned int value;
-
-#ifdef CONFIG_X86_32
-       if (pic_mode) {
-               /*
-                * Put the board back into PIC mode (has an effect only on
-                * certain older boards).  Note that APIC interrupts, including
-                * IPIs, won't work beyond this point!  The only exception are
-                * INIT IPIs.
-                */
-               apic_printk(APIC_VERBOSE, "disabling APIC mode, "
-                               "entering PIC mode.\n");
-               outb(0x70, 0x22);
-               outb(0x00, 0x23);
-               return;
-       }
-#endif
-
-       /* Go back to Virtual Wire compatibility mode */
-
-       /* For the spurious interrupt use vector F, and enable it */
-       value = apic_read(APIC_SPIV);
-       value &= ~APIC_VECTOR_MASK;
-       value |= APIC_SPIV_APIC_ENABLED;
-       value |= 0xf;
-       apic_write(APIC_SPIV, value);
-
-       if (!virt_wire_setup) {
-               /*
-                * For LVT0 make it edge triggered, active high,
-                * external and enabled
-                */
-               value = apic_read(APIC_LVT0);
-               value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-                       APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-                       APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-               value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-               value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
-               apic_write(APIC_LVT0, value);
-       } else {
-               /* Disable LVT0 */
-               apic_write(APIC_LVT0, APIC_LVT_MASKED);
-       }
-
-       /*
-        * For LVT1 make it edge triggered, active high,
-        * nmi and enabled
-        */
-       value = apic_read(APIC_LVT1);
-       value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-                       APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-                       APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-       value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-       value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
-       apic_write(APIC_LVT1, value);
-}
-
-void __cpuinit generic_processor_info(int apicid, int version)
-{
-       int cpu;
-       cpumask_t tmp_map;
-
-       /*
-        * Validate version
-        */
-       if (version == 0x0) {
-               printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
-                               "fixing up to 0x10. (tell your hw vendor)\n",
-                               version);
-               version = 0x10;
-       }
-       apic_version[apicid] = version;
-
-       if (num_processors >= NR_CPUS) {
-               printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
-                       "  Processor ignored.\n", NR_CPUS);
-               return;
-       }
-
-       num_processors++;
-       cpus_complement(tmp_map, cpu_present_map);
-       cpu = first_cpu(tmp_map);
-
-       physid_set(apicid, phys_cpu_present_map);
-       if (apicid == boot_cpu_physical_apicid) {
-               /*
-                * x86_bios_cpu_apicid is required to have processors listed
-                * in same order as logical cpu numbers. Hence the first
-                * entry is BSP, and so on.
-                */
-               cpu = 0;
-       }
-       if (apicid > max_physical_apicid)
-               max_physical_apicid = apicid;
-
-#ifdef CONFIG_X86_32
-       /*
-        * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
-        * but we need to work other dependencies like SMP_SUSPEND etc
-        * before this can be done without some confusion.
-        * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
-        *       - Ashok Raj <ashok.raj@intel.com>
-        */
-       if (max_physical_apicid >= 8) {
-               switch (boot_cpu_data.x86_vendor) {
-               case X86_VENDOR_INTEL:
-                       if (!APIC_XAPIC(version)) {
-                               def_to_bigsmp = 0;
-                               break;
-                       }
-                       /* If P4 and above fall through */
-               case X86_VENDOR_AMD:
-                       def_to_bigsmp = 1;
-               }
-       }
-#endif
-
-#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
-       /* are we being called early in kernel startup? */
-       if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
-               u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
-               u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-
-               cpu_to_apicid[cpu] = apicid;
-               bios_cpu_apicid[cpu] = apicid;
-       } else {
-               per_cpu(x86_cpu_to_apicid, cpu) = apicid;
-               per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
-       }
-#endif
-
-       cpu_set(cpu, cpu_possible_map);
-       cpu_set(cpu, cpu_present_map);
-}
-
-int hard_smp_processor_id(void)
-{
-       return read_apic_id();
-}
-
-/*
- * Power management
- */
-#ifdef CONFIG_PM
-
-static struct {
-       /*
-        * 'active' is true if the local APIC was enabled by us and
-        * not the BIOS; this signifies that we are also responsible
-        * for disabling it before entering apm/acpi suspend
-        */
-       int active;
-       /* r/w apic fields */
-       unsigned int apic_id;
-       unsigned int apic_taskpri;
-       unsigned int apic_ldr;
-       unsigned int apic_dfr;
-       unsigned int apic_spiv;
-       unsigned int apic_lvtt;
-       unsigned int apic_lvtpc;
-       unsigned int apic_lvt0;
-       unsigned int apic_lvt1;
-       unsigned int apic_lvterr;
-       unsigned int apic_tmict;
-       unsigned int apic_tdcr;
-       unsigned int apic_thmr;
-} apic_pm_state;
-
-static int lapic_suspend(struct sys_device *dev, pm_message_t state)
-{
-       unsigned long flags;
-       int maxlvt;
-
-       if (!apic_pm_state.active)
-               return 0;
-
-       maxlvt = lapic_get_maxlvt();
-
-       apic_pm_state.apic_id = apic_read(APIC_ID);
-       apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
-       apic_pm_state.apic_ldr = apic_read(APIC_LDR);
-       apic_pm_state.apic_dfr = apic_read(APIC_DFR);
-       apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
-       apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
-       if (maxlvt >= 4)
-               apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
-       apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
-       apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
-       apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
-       apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
-       apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
-       if (maxlvt >= 5)
-               apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
-#endif
-
-       local_irq_save(flags);
-       disable_local_APIC();
-       local_irq_restore(flags);
-       return 0;
-}
-
-static int lapic_resume(struct sys_device *dev)
-{
-       unsigned int l, h;
-       unsigned long flags;
-       int maxlvt;
-
-       if (!apic_pm_state.active)
-               return 0;
-
-       maxlvt = lapic_get_maxlvt();
-
-       local_irq_save(flags);
-
-#ifdef CONFIG_X86_64
-       if (x2apic)
-               enable_x2apic();
-       else
-#endif
-       {
-               /*
-                * Make sure the APICBASE points to the right address
-                *
-                * FIXME! This will be wrong if we ever support suspend on
-                * SMP! We'll need to do this as part of the CPU restore!
-                */
-               rdmsr(MSR_IA32_APICBASE, l, h);
-               l &= ~MSR_IA32_APICBASE_BASE;
-               l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
-               wrmsr(MSR_IA32_APICBASE, l, h);
-       }
-
-       apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
-       apic_write(APIC_ID, apic_pm_state.apic_id);
-       apic_write(APIC_DFR, apic_pm_state.apic_dfr);
-       apic_write(APIC_LDR, apic_pm_state.apic_ldr);
-       apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
-       apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
-       apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
-       apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
-       if (maxlvt >= 5)
-               apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
-#endif
-       if (maxlvt >= 4)
-               apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
-       apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
-       apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
-       apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
-       apic_write(APIC_ESR, 0);
-       apic_read(APIC_ESR);
-       apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
-       apic_write(APIC_ESR, 0);
-       apic_read(APIC_ESR);
-
-       local_irq_restore(flags);
-
-       return 0;
-}
-
-/*
- * This device has no shutdown method - fully functioning local APICs
- * are needed on every CPU up until machine_halt/restart/poweroff.
- */
-
-static struct sysdev_class lapic_sysclass = {
-       .name           = "lapic",
-       .resume         = lapic_resume,
-       .suspend        = lapic_suspend,
-};
-
-static struct sys_device device_lapic = {
-       .id     = 0,
-       .cls    = &lapic_sysclass,
-};
-
-static void __cpuinit apic_pm_activate(void)
-{
-       apic_pm_state.active = 1;
-}
-
-static int __init init_lapic_sysfs(void)
-{
-       int error;
-
-       if (!cpu_has_apic)
-               return 0;
-       /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
-
-       error = sysdev_class_register(&lapic_sysclass);
-       if (!error)
-               error = sysdev_register(&device_lapic);
-       return error;
-}
-device_initcall(init_lapic_sysfs);
-
-#else  /* CONFIG_PM */
-
-static void apic_pm_activate(void) { }
-
-#endif /* CONFIG_PM */
-
-/*
- * apic_is_clustered_box() -- Check if we can expect good TSC
- *
- * Thus far, the major user of this is IBM's Summit2 series:
- *
- * Clustered boxes may have unsynced TSC problems if they are
- * multi-chassis. Use available data to take a good guess.
- * If in doubt, go HPET.
- */
-__cpuinit int apic_is_clustered_box(void)
-{
-       int i, clusters, zeros;
-       unsigned id;
-       u16 *bios_cpu_apicid;
-       DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
-
-       /*
-        * there is not this kind of box with AMD CPU yet.
-        * Some AMD box with quadcore cpu and 8 sockets apicid
-        * will be [4, 0x23] or [8, 0x27] could be thought to
-        * vsmp box still need checking...
-        */
-       if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
-               return 0;
-
-       bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-       bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
-
-       for (i = 0; i < NR_CPUS; i++) {
-               /* are we being called early in kernel startup? */
-               if (bios_cpu_apicid) {
-                       id = bios_cpu_apicid[i];
-               }
-               else if (i < nr_cpu_ids) {
-                       if (cpu_present(i))
-                               id = per_cpu(x86_bios_cpu_apicid, i);
-                       else
-                               continue;
-               }
-               else
-                       break;
-
-               if (id != BAD_APICID)
-                       __set_bit(APIC_CLUSTERID(id), clustermap);
-       }
-
-       /* Problem:  Partially populated chassis may not have CPUs in some of
-        * the APIC clusters they have been allocated.  Only present CPUs have
-        * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
-        * Since clusters are allocated sequentially, count zeros only if
-        * they are bounded by ones.
-        */
-       clusters = 0;
-       zeros = 0;
-       for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
-               if (test_bit(i, clustermap)) {
-                       clusters += 1 + zeros;
-                       zeros = 0;
-               } else
-                       ++zeros;
-       }
-
-       /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
-        * not guaranteed to be synced between boards
-        */
-       if (is_vsmp_box() && clusters > 1)
-               return 1;
-
-       /*
-        * If clusters > 2, then should be multi-chassis.
-        * May have to revisit this when multi-core + hyperthreaded CPUs come
-        * out, but AFAIK this will work even for them.
-        */
-       return (clusters > 2);
-}
-
-static __init int setup_nox2apic(char *str)
-{
-       disable_x2apic = 1;
-       clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
-       return 0;
-}
-early_param("nox2apic", setup_nox2apic);
-
-
-/*
- * APIC command line parameters
- */
-static int __init setup_disableapic(char *arg)
-{
-       disable_apic = 1;
-       setup_clear_cpu_cap(X86_FEATURE_APIC);
-       return 0;
-}
-early_param("disableapic", setup_disableapic);
-
-/* same as disableapic, for compatibility */
-static int __init setup_nolapic(char *arg)
-{
-       return setup_disableapic(arg);
-}
-early_param("nolapic", setup_nolapic);
-
-static int __init parse_lapic_timer_c2_ok(char *arg)
-{
-       local_apic_timer_c2_ok = 1;
-       return 0;
-}
-early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-
-static int __init parse_disable_apic_timer(char *arg)
-{
-       disable_apic_timer = 1;
-       return 0;
-}
-early_param("noapictimer", parse_disable_apic_timer);
-
-static int __init parse_nolapic_timer(char *arg)
-{
-       disable_apic_timer = 1;
-       return 0;
-}
-early_param("nolapic_timer", parse_nolapic_timer);
-
-static __init int setup_apicpmtimer(char *s)
-{
-       apic_calibrate_pmtmr = 1;
-       notsc_setup(NULL);
-       return 0;
-}
-__setup("apicpmtimer", setup_apicpmtimer);
-
-static int __init apic_set_verbosity(char *arg)
-{
-       if (!arg)  {
-#ifdef CONFIG_X86_64
-               skip_ioapic_setup = 0;
-               ioapic_force = 1;
-               return 0;
-#endif
-               return -EINVAL;
-       }
-
-       if (strcmp("debug", arg) == 0)
-               apic_verbosity = APIC_DEBUG;
-       else if (strcmp("verbose", arg) == 0)
-               apic_verbosity = APIC_VERBOSE;
-       else {
-               printk(KERN_WARNING "APIC Verbosity level %s not recognised"
-                       " use apic=verbose or apic=debug\n", arg);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-early_param("apic", apic_set_verbosity);
-
-static int __init lapic_insert_resource(void)
-{
-       if (!apic_phys)
-               return -1;
-
-       /* Put local APIC into the resource map. */
-       lapic_resource.start = apic_phys;
-       lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
-       insert_resource(&iomem_resource, &lapic_resource);
-
-       return 0;
-}
-
-/*
- * need call insert after e820_reserve_resources()
- * that is using request_resource
- */
-late_initcall(lapic_insert_resource);
index fdd585f9c53dd371d41392b4e955278d913979f0..f0dfe6f17e7eabbe41f90a8afd75c54766081fa2 100644 (file)
@@ -1,8 +1,6 @@
 /*
  * BIOS run time interface routines.
  *
- *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
- *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *  Copyright (c) Russ Anderson
  */
 
+#include <linux/efi.h>
+#include <asm/efi.h>
+#include <linux/io.h>
 #include <asm/uv/bios.h>
+#include <asm/uv/uv_hub.h>
+
+struct uv_systab uv_systab;
 
-const char *
-x86_bios_strerror(long status)
+s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 {
-       const char *str;
-       switch (status) {
-       case  0: str = "Call completed without error";  break;
-       case -1: str = "Not implemented";               break;
-       case -2: str = "Invalid argument";              break;
-       case -3: str = "Call completed with error";     break;
-       default: str = "Unknown BIOS status code";      break;
-       }
-       return str;
+       struct uv_systab *tab = &uv_systab;
+
+       if (!tab->function)
+               /*
+                * BIOS does not support UV systab
+                */
+               return BIOS_STATUS_UNIMPLEMENTED;
+
+       return efi_call6((void *)__va(tab->function),
+                                       (u64)which, a1, a2, a3, a4, a5);
 }
 
-long
-x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
-                  unsigned long *drift_info)
+s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+                                       u64 a4, u64 a5)
 {
-       struct uv_bios_retval isrv;
+       unsigned long bios_flags;
+       s64 ret;
 
-       BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
-       *ticks_per_second = isrv.v0;
-       *drift_info = isrv.v1;
-       return isrv.status;
+       local_irq_save(bios_flags);
+       ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+       local_irq_restore(bios_flags);
+
+       return ret;
 }
-EXPORT_SYMBOL_GPL(x86_bios_freq_base);
+
+s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+                                       u64 a4, u64 a5)
+{
+       s64 ret;
+
+       preempt_disable();
+       ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+       preempt_enable();
+
+       return ret;
+}
+
+
+long sn_partition_id;
+EXPORT_SYMBOL_GPL(sn_partition_id);
+long uv_coherency_id;
+EXPORT_SYMBOL_GPL(uv_coherency_id);
+long uv_region_size;
+EXPORT_SYMBOL_GPL(uv_region_size);
+int uv_type;
+
+
+s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
+               long *region)
+{
+       s64 ret;
+       u64 v0, v1;
+       union partition_info_u part;
+
+       ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc,
+                               (u64)(&v0), (u64)(&v1), 0, 0);
+       if (ret != BIOS_STATUS_SUCCESS)
+               return ret;
+
+       part.val = v0;
+       if (uvtype)
+               *uvtype = part.hub_version;
+       if (partid)
+               *partid = part.partition_id;
+       if (coher)
+               *coher = part.coherence_id;
+       if (region)
+               *region = part.region_size;
+       return ret;
+}
+
+
+s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
+{
+       return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
+                          (u64)ticks_per_second, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_freq_base);
+
+
+#ifdef CONFIG_EFI
+void uv_bios_init(void)
+{
+       struct uv_systab *tab;
+
+       if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
+           (efi.uv_systab == (unsigned long)NULL)) {
+               printk(KERN_CRIT "No EFI UV System Table.\n");
+               uv_systab.function = (unsigned long)NULL;
+               return;
+       }
+
+       tab = (struct uv_systab *)ioremap(efi.uv_systab,
+                                       sizeof(struct uv_systab));
+       if (strncmp(tab->signature, "UVST", 4) != 0)
+               printk(KERN_ERR "bad signature in UV system table!");
+
+       /*
+        * Copy table to permanent spot for later use.
+        */
+       memcpy(&uv_systab, tab, sizeof(struct uv_systab));
+       iounmap(tab);
+
+       printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision);
+}
+#else  /* !CONFIG_EFI */
+
+void uv_bios_init(void) { }
+#endif
+
index 32e73520adf7540525c033bca414bde159dc5536..8f1e31db2ad56d4f178881fff4d86b777349173c 100644 (file)
@@ -249,7 +249,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
        }
        numa_set_node(cpu, node);
 
-       printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+       printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
 #endif
 }
 
index 06fcce516d51d44a97349f9140507f82b6d20ef2..b0461856acfb1ca4b236bd355c4cd94604b36503 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  (C) 2001-2004  Dave Jones. <davej@codemonkey.org.uk>
+ *  (C) 2001-2004  Dave Jones. <davej@redhat.com>
  *  (C) 2002  Padraig Brady. <padraig@antefacto.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
@@ -1019,7 +1019,7 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
 module_param(revid_errata, int, 0644);
 MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
 
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
 MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
 MODULE_LICENSE ("GPL");
 
index b5ced806a316d66b1c4bb1fcc6120f855617a266..c1ac5790c63e34ec65cfe062f6fbf34c6d3607ce 100644 (file)
@@ -246,7 +246,7 @@ static void __exit powernow_k6_exit(void)
 }
 
 
-MODULE_AUTHOR("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
 MODULE_LICENSE("GPL");
 
index 0a61159d7b71389c8e8bcdbae7b913463b0f318c..7c7d56b43136a6a416085fb7cc4b27f25839ec3c 100644 (file)
@@ -1,6 +1,6 @@
 /*
  *  AMD K7 Powernow driver.
- *  (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs.
+ *  (C) 2003 Dave Jones on behalf of SuSE Labs.
  *  (C) 2003-2004 Dave Jones <davej@redhat.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
@@ -692,7 +692,7 @@ static void __exit powernow_exit (void)
 module_param(acpi_force,  int, 0444);
 MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
 
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
 MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors.");
 MODULE_LICENSE ("GPL");
 
index 84bb395038d879823d5beb4e4bf2e181b8bd6b91..008d23ba491bff850222e0435006220c724ead22 100644 (file)
@@ -7,7 +7,7 @@
  *  Support : mark.langsdorf@amd.com
  *
  *  Based on the powernow-k7.c module written by Dave Jones.
- *  (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs
+ *  (C) 2003 Dave Jones on behalf of SuSE Labs
  *  (C) 2004 Dominik Brodowski <linux@brodo.de>
  *  (C) 2004 Pavel Machek <pavel@suse.cz>
  *  Licensed under the terms of the GNU GPL License version 2.
index 191f7263c61dce1b42864cd4709e249773339243..04d0376b64b0dfa9a292a0784afddc07de475720 100644 (file)
@@ -431,7 +431,7 @@ static void __exit speedstep_exit(void)
 }
 
 
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_AUTHOR ("Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
 MODULE_LICENSE ("GPL");
 
index 99468dbd08da3edbb2c97b5cbb803918c85329d6..cce0b6118d550e015a34c5e1b0f9bc60de1f8b3d 100644 (file)
@@ -174,7 +174,7 @@ static void __cpuinit srat_detect_node(void)
                node = first_node(node_online_map);
        numa_set_node(cpu, node);
 
-       printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+       printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
 #endif
 }
 
index f390c9f66351ae850ca169b5e399b32d555bd266..dd3af6e7b39a078835abd589feeaeee0b6184907 100644 (file)
@@ -1,6 +1,6 @@
 /*
- * Athlon/Hammer specific Machine Check Exception Reporting
- * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk>
+ * Athlon specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Dave Jones <davej@redhat.com>
  */
 
 #include <linux/init.h>
index 774d87cfd8cd8fe0524bb9d1bc4c4a076f8c306d..0ebf3fc6a6108e8c9eedb239f2aba01a76e5902d 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * mce.c - x86 Machine Check Exception Reporting
- * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk>
+ * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@redhat.com>
  */
 
 #include <linux/init.h>
index cc1fccdd31e08ce56b6538692004475a7f44bf66..a74af128efc917384909d44b51834aaf40282647 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Non Fatal Machine Check Exception Reporting
  *
- * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk>
+ * (C) Copyright 2002 Dave Jones. <davej@redhat.com>
  *
  * This file contains routines to check for non-fatal MCEs every 15s
  *
index 6bff382094f58a2a40b0adebdf5fb264905614ba..9abd48b2267413a4212b18d0cb627325703b90de 100644 (file)
@@ -17,6 +17,8 @@
 #include <linux/bitops.h>
 #include <linux/smp.h>
 #include <linux/nmi.h>
+#include <linux/kprobes.h>
+
 #include <asm/apic.h>
 #include <asm/intel_arch_perfmon.h>
 
@@ -336,7 +338,8 @@ static void single_msr_unreserve(void)
        release_perfctr_nmi(wd_ops->perfctr);
 }
 
-static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes
+single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
        /* start the cycle over again */
        write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
@@ -401,7 +404,7 @@ static int setup_p6_watchdog(unsigned nmi_hz)
        return 1;
 }
 
-static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
        /*
         * P6 based Pentium M need to re-unmask
@@ -605,7 +608,7 @@ static void p4_unreserve(void)
        release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
 }
 
-static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
        unsigned dummy;
        /*
@@ -784,7 +787,7 @@ unsigned lapic_adjust_nmi_hz(unsigned hz)
        return hz;
 }
 
-int lapic_wd_event(unsigned nmi_hz)
+int __kprobes lapic_wd_event(unsigned nmi_hz)
 {
        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
        u64 ctr;
index 945a31cdd81f5493d0de00bf39214f0df371b976..1119d247fe11d87aceaa2e42f770bd773a9c1ea2 100644 (file)
@@ -366,6 +366,10 @@ void __init efi_init(void)
                                        SMBIOS_TABLE_GUID)) {
                        efi.smbios = config_tables[i].table;
                        printk(" SMBIOS=0x%lx ", config_tables[i].table);
+               } else if (!efi_guidcmp(config_tables[i].guid,
+                                       UV_SYSTEM_TABLE_GUID)) {
+                       efi.uv_systab = config_tables[i].table;
+                       printk(" UVsystab=0x%lx ", config_tables[i].table);
                } else if (!efi_guidcmp(config_tables[i].guid,
                                        HCDP_TABLE_GUID)) {
                        efi.hcdp = config_tables[i].table;
index b21fbfaffe391a974d10d7417a9f00c99ab05228..c356423a6026fd62123a13d6229bfa4d9a224df3 100644 (file)
@@ -629,7 +629,7 @@ ENTRY(interrupt)
 ENTRY(irq_entries_start)
        RING0_INT_FRAME
 vector=0
-.rept NR_IRQS
+.rept NR_VECTORS
        ALIGN
  .if vector
        CFI_ADJUST_CFA_OFFSET -4
@@ -1153,20 +1153,6 @@ ENDPROC(xen_failsafe_callback)
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 ENTRY(mcount)
-       pushl %eax
-       pushl %ecx
-       pushl %edx
-       movl 0xc(%esp), %eax
-       subl $MCOUNT_INSN_SIZE, %eax
-
-.globl mcount_call
-mcount_call:
-       call ftrace_stub
-
-       popl %edx
-       popl %ecx
-       popl %eax
-
        ret
 END(mcount)
 
index 1db6ce4314e19325a740e15fc1ec60db3de8cf38..09e7145484c5ba971f72af1f7411c286f41ee5ef 100644 (file)
 #ifdef CONFIG_FTRACE
 #ifdef CONFIG_DYNAMIC_FTRACE
 ENTRY(mcount)
-
-       subq $0x38, %rsp
-       movq %rax, (%rsp)
-       movq %rcx, 8(%rsp)
-       movq %rdx, 16(%rsp)
-       movq %rsi, 24(%rsp)
-       movq %rdi, 32(%rsp)
-       movq %r8, 40(%rsp)
-       movq %r9, 48(%rsp)
-
-       movq 0x38(%rsp), %rdi
-       subq $MCOUNT_INSN_SIZE, %rdi
-
-.globl mcount_call
-mcount_call:
-       call ftrace_stub
-
-       movq 48(%rsp), %r9
-       movq 40(%rsp), %r8
-       movq 32(%rsp), %rdi
-       movq 24(%rsp), %rsi
-       movq 16(%rsp), %rdx
-       movq 8(%rsp), %rcx
-       movq (%rsp), %rax
-       addq $0x38, %rsp
-
        retq
 END(mcount)
 
index ab115cd15fdfbb2cc06182385b4a60b8521f617c..d073d981a730306f970aabdf5f7174453c24d226 100644 (file)
 
 #include <linux/spinlock.h>
 #include <linux/hardirq.h>
+#include <linux/uaccess.h>
 #include <linux/ftrace.h>
 #include <linux/percpu.h>
 #include <linux/init.h>
 #include <linux/list.h>
 
-#include <asm/alternative.h>
 #include <asm/ftrace.h>
+#include <asm/nops.h>
 
 
 /* Long is fine, even if it is only 4 bytes ;-) */
-static long *ftrace_nop;
+static unsigned long *ftrace_nop;
 
 union ftrace_code_union {
        char code[MCOUNT_INSN_SIZE];
@@ -60,11 +61,7 @@ notrace int
 ftrace_modify_code(unsigned long ip, unsigned char *old_code,
                   unsigned char *new_code)
 {
-       unsigned replaced;
-       unsigned old = *(unsigned *)old_code; /* 4 bytes */
-       unsigned new = *(unsigned *)new_code; /* 4 bytes */
-       unsigned char newch = new_code[4];
-       int faulted = 0;
+       unsigned char replaced[MCOUNT_INSN_SIZE];
 
        /*
         * Note: Due to modules and __init, code can
@@ -72,29 +69,20 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
         *  as well as code changing.
         *
         * No real locking needed, this code is run through
-        * kstop_machine.
+        * kstop_machine, or before SMP starts.
         */
-       asm volatile (
-               "1: lock\n"
-               "   cmpxchg %3, (%2)\n"
-               "   jnz 2f\n"
-               "   movb %b4, 4(%2)\n"
-               "2:\n"
-               ".section .fixup, \"ax\"\n"
-               "3:     movl $1, %0\n"
-               "       jmp 2b\n"
-               ".previous\n"
-               _ASM_EXTABLE(1b, 3b)
-               : "=r"(faulted), "=a"(replaced)
-               : "r"(ip), "r"(new), "c"(newch),
-                 "0"(faulted), "a"(old)
-               : "memory");
-       sync_core();
+       if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE))
+               return 1;
+
+       if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+               return 2;
 
-       if (replaced != old && replaced != new)
-               faulted = 2;
+       WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code,
+                                   MCOUNT_INSN_SIZE));
 
-       return faulted;
+       sync_core();
+
+       return 0;
 }
 
 notrace int ftrace_update_ftrace_func(ftrace_func_t func)
@@ -112,30 +100,76 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func)
 
 notrace int ftrace_mcount_set(unsigned long *data)
 {
-       unsigned long ip = (long)(&mcount_call);
-       unsigned long *addr = data;
-       unsigned char old[MCOUNT_INSN_SIZE], *new;
-
-       /*
-        * Replace the mcount stub with a pointer to the
-        * ip recorder function.
-        */
-       memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
-       new = ftrace_call_replace(ip, *addr);
-       *addr = ftrace_modify_code(ip, old, new);
-
+       /* mcount is initialized as a nop */
+       *data = 0;
        return 0;
 }
 
 int __init ftrace_dyn_arch_init(void *data)
 {
-       const unsigned char *const *noptable = find_nop_table();
-
-       /* This is running in kstop_machine */
-
-       ftrace_mcount_set(data);
+       extern const unsigned char ftrace_test_p6nop[];
+       extern const unsigned char ftrace_test_nop5[];
+       extern const unsigned char ftrace_test_jmp[];
+       int faulted = 0;
 
-       ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
+       /*
+        * There is no good nop for all x86 archs.
+        * We will default to using the P6_NOP5, but first we
+        * will test to make sure that the nop will actually
+        * work on this CPU. If it faults, we will then
+        * go to a lesser efficient 5 byte nop. If that fails
+        * we then just use a jmp as our nop. This isn't the most
+        * efficient nop, but we can not use a multi part nop
+        * since we would then risk being preempted in the middle
+        * of that nop, and if we enabled tracing then, it might
+        * cause a system crash.
+        *
+        * TODO: check the cpuid to determine the best nop.
+        */
+       asm volatile (
+               "jmp ftrace_test_jmp\n"
+               /* This code needs to stay around */
+               ".section .text, \"ax\"\n"
+               "ftrace_test_jmp:"
+               "jmp ftrace_test_p6nop\n"
+               "nop\n"
+               "nop\n"
+               "nop\n"  /* 2 byte jmp + 3 bytes */
+               "ftrace_test_p6nop:"
+               P6_NOP5
+               "jmp 1f\n"
+               "ftrace_test_nop5:"
+               ".byte 0x66,0x66,0x66,0x66,0x90\n"
+               "jmp 1f\n"
+               ".previous\n"
+               "1:"
+               ".section .fixup, \"ax\"\n"
+               "2:     movl $1, %0\n"
+               "       jmp ftrace_test_nop5\n"
+               "3:     movl $2, %0\n"
+               "       jmp 1b\n"
+               ".previous\n"
+               _ASM_EXTABLE(ftrace_test_p6nop, 2b)
+               _ASM_EXTABLE(ftrace_test_nop5, 3b)
+               : "=r"(faulted) : "0" (faulted));
+
+       switch (faulted) {
+       case 0:
+               pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
+               ftrace_nop = (unsigned long *)ftrace_test_p6nop;
+               break;
+       case 1:
+               pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
+               ftrace_nop = (unsigned long *)ftrace_test_nop5;
+               break;
+       case 2:
+               pr_info("ftrace: converting mcount calls to jmp . + 5\n");
+               ftrace_nop = (unsigned long *)ftrace_test_jmp;
+               break;
+       }
+
+       /* The return code is retured via data */
+       *(unsigned long *)data = 0;
 
        return 0;
 }
index 9eca5ba7a6b1dcfe543fc644ab049e40b9139a9f..2ec2de8d8c46525a13d91a731366d5cfbce4b37b 100644 (file)
@@ -179,8 +179,10 @@ static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
         * is an example).
         */
        if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
-               (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
+               (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
+               printk(KERN_DEBUG "system APIC only can use physical flat");
                return 1;
+       }
 #endif
 
        return 0;
index 33581d94a90e5bebea90a9a26d413524947c7340..bfd532843df68f46b43ac822b581534bb23771ac 100644 (file)
@@ -341,12 +341,12 @@ static __init void map_mmioh_high(int max_pnode)
 
 static __init void uv_rtc_init(void)
 {
-       long status, ticks_per_sec, drift;
+       long status;
+       u64 ticks_per_sec;
 
-       status =
-           x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
-                                       &drift);
-       if (status != 0 || ticks_per_sec < 100000) {
+       status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
+                                       &ticks_per_sec);
+       if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
                printk(KERN_WARNING
                        "unable to determine platform RTC clock frequency, "
                        "guessing.\n");
@@ -356,7 +356,22 @@ static __init void uv_rtc_init(void)
                sn_rtc_cycles_per_second = ticks_per_sec;
 }
 
-static bool uv_system_inited;
+/*
+ * Called on each cpu to initialize the per_cpu UV data area.
+ *     ZZZ hotplug not supported yet
+ */
+void __cpuinit uv_cpu_init(void)
+{
+       /* CPU 0 initilization will be done via uv_system_init. */
+       if (!uv_blade_info)
+               return;
+
+       uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+
+       if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+               set_x2apic_extra_bits(uv_hub_info->pnode);
+}
+
 
 void __init uv_system_init(void)
 {
@@ -412,6 +427,9 @@ void __init uv_system_init(void)
        gnode_upper = (((unsigned long)node_id.s.node_id) &
                       ~((1 << n_val) - 1)) << m_val;
 
+       uv_bios_init();
+       uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
+                           &uv_coherency_id, &uv_region_size);
        uv_rtc_init();
 
        for_each_present_cpu(cpu) {
@@ -433,7 +451,7 @@ void __init uv_system_init(void)
                uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
                uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
                uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
-               uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
+               uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id;
                uv_node_to_blade[nid] = blade;
                uv_cpu_to_blade[cpu] = blade;
                max_pnode = max(pnode, max_pnode);
@@ -448,21 +466,6 @@ void __init uv_system_init(void)
        map_mmr_high(max_pnode);
        map_config_high(max_pnode);
        map_mmioh_high(max_pnode);
-       uv_system_inited = true;
-}
 
-/*
- * Called on each cpu to initialize the per_cpu UV data area.
- *     ZZZ hotplug not supported yet
- */
-void __cpuinit uv_cpu_init(void)
-{
-       BUG_ON(!uv_system_inited);
-
-       uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
-
-       if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
-               set_x2apic_extra_bits(uv_hub_info->pnode);
+       uv_cpu_init();
 }
-
-
index acf62fc233da6c0ee8196d18c53a9a55d31bd10f..77017e834cf7178b4fceb957a7e706ec4f2d220a 100644 (file)
@@ -1,29 +1,49 @@
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/sysdev.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/hpet.h>
 #include <linux/init.h>
-#include <linux/sysdev.h>
+#include <linux/cpu.h>
 #include <linux/pm.h>
+#include <linux/io.h>
 
 #include <asm/fixmap.h>
-#include <asm/hpet.h>
 #include <asm/i8253.h>
-#include <asm/io.h>
+#include <asm/hpet.h>
 
-#define HPET_MASK      CLOCKSOURCE_MASK(32)
-#define HPET_SHIFT     22
+#define HPET_MASK                      CLOCKSOURCE_MASK(32)
+#define HPET_SHIFT                     22
 
 /* FSEC = 10^-15
    NSEC = 10^-9 */
-#define FSEC_PER_NSEC  1000000L
+#define FSEC_PER_NSEC                  1000000L
+
+#define HPET_DEV_USED_BIT              2
+#define HPET_DEV_USED                  (1 << HPET_DEV_USED_BIT)
+#define HPET_DEV_VALID                 0x8
+#define HPET_DEV_FSB_CAP               0x1000
+#define HPET_DEV_PERI_CAP              0x2000
+
+#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
 
 /*
  * HPET address is set in acpi/boot.c, when an ACPI entry exists
  */
-unsigned long hpet_address;
-static void __iomem *hpet_virt_address;
+unsigned long                          hpet_address;
+unsigned long                          hpet_num_timers;
+static void __iomem                    *hpet_virt_address;
+
+struct hpet_dev {
+       struct clock_event_device       evt;
+       unsigned int                    num;
+       int                             cpu;
+       unsigned int                    irq;
+       unsigned int                    flags;
+       char                            name[10];
+};
 
 unsigned long hpet_readl(unsigned long a)
 {
@@ -59,7 +79,7 @@ static inline void hpet_clear_mapping(void)
 static int boot_hpet_disable;
 int hpet_force_user;
 
-static int __init hpet_setup(charstr)
+static int __init hpet_setup(char *str)
 {
        if (str) {
                if (!strncmp("disable", str, 7))
@@ -80,7 +100,7 @@ __setup("nohpet", disable_hpet);
 
 static inline int is_hpet_capable(void)
 {
-       return (!boot_hpet_disable && hpet_address);
+       return !boot_hpet_disable && hpet_address;
 }
 
 /*
@@ -102,6 +122,9 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
  * timer 0 and timer 1 in case of RTC emulation.
  */
 #ifdef CONFIG_HPET
+
+static void hpet_reserve_msi_timers(struct hpet_data *hd);
+
 static void hpet_reserve_platform_timers(unsigned long id)
 {
        struct hpet __iomem *hpet = hpet_virt_address;
@@ -111,10 +134,10 @@ static void hpet_reserve_platform_timers(unsigned long id)
 
        nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
 
-       memset(&hd, 0, sizeof (hd));
-       hd.hd_phys_address = hpet_address;
-       hd.hd_address = hpet;
-       hd.hd_nirqs = nrtimers;
+       memset(&hd, 0, sizeof(hd));
+       hd.hd_phys_address      = hpet_address;
+       hd.hd_address           = hpet;
+       hd.hd_nirqs             = nrtimers;
        hpet_reserve_timer(&hd, 0);
 
 #ifdef CONFIG_HPET_EMULATE_RTC
@@ -130,10 +153,12 @@ static void hpet_reserve_platform_timers(unsigned long id)
        hd.hd_irq[1] = HPET_LEGACY_RTC;
 
        for (i = 2; i < nrtimers; timer++, i++) {
-               hd.hd_irq[i] = (readl(&timer->hpet_config) & Tn_INT_ROUTE_CNF_MASK) >>
-                       Tn_INT_ROUTE_CNF_SHIFT;
+               hd.hd_irq[i] = (readl(&timer->hpet_config) &
+                       Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
        }
 
+       hpet_reserve_msi_timers(&hd);
+
        hpet_alloc(&hd);
 
 }
@@ -227,60 +252,70 @@ static void hpet_legacy_clockevent_register(void)
        printk(KERN_DEBUG "hpet clockevent registered\n");
 }
 
-static void hpet_legacy_set_mode(enum clock_event_mode mode,
-                         struct clock_event_device *evt)
+static int hpet_setup_msi_irq(unsigned int irq);
+
+static void hpet_set_mode(enum clock_event_mode mode,
+                         struct clock_event_device *evt, int timer)
 {
        unsigned long cfg, cmp, now;
        uint64_t delta;
 
-       switch(mode) {
+       switch (mode) {
        case CLOCK_EVT_MODE_PERIODIC:
-               delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult;
-               delta >>= hpet_clockevent.shift;
+               delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
+               delta >>= evt->shift;
                now = hpet_readl(HPET_COUNTER);
                cmp = now + (unsigned long) delta;
-               cfg = hpet_readl(HPET_T0_CFG);
+               cfg = hpet_readl(HPET_Tn_CFG(timer));
                cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
                       HPET_TN_SETVAL | HPET_TN_32BIT;
-               hpet_writel(cfg, HPET_T0_CFG);
+               hpet_writel(cfg, HPET_Tn_CFG(timer));
                /*
                 * The first write after writing TN_SETVAL to the
                 * config register sets the counter value, the second
                 * write sets the period.
                 */
-               hpet_writel(cmp, HPET_T0_CMP);
+               hpet_writel(cmp, HPET_Tn_CMP(timer));
                udelay(1);
-               hpet_writel((unsigned long) delta, HPET_T0_CMP);
+               hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
                break;
 
        case CLOCK_EVT_MODE_ONESHOT:
-               cfg = hpet_readl(HPET_T0_CFG);
+               cfg = hpet_readl(HPET_Tn_CFG(timer));
                cfg &= ~HPET_TN_PERIODIC;
                cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
-               hpet_writel(cfg, HPET_T0_CFG);
+               hpet_writel(cfg, HPET_Tn_CFG(timer));
                break;
 
        case CLOCK_EVT_MODE_UNUSED:
        case CLOCK_EVT_MODE_SHUTDOWN:
-               cfg = hpet_readl(HPET_T0_CFG);
+               cfg = hpet_readl(HPET_Tn_CFG(timer));
                cfg &= ~HPET_TN_ENABLE;
-               hpet_writel(cfg, HPET_T0_CFG);
+               hpet_writel(cfg, HPET_Tn_CFG(timer));
                break;
 
        case CLOCK_EVT_MODE_RESUME:
-               hpet_enable_legacy_int();
+               if (timer == 0) {
+                       hpet_enable_legacy_int();
+               } else {
+                       struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+                       hpet_setup_msi_irq(hdev->irq);
+                       disable_irq(hdev->irq);
+                       irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+                       enable_irq(hdev->irq);
+               }
                break;
        }
 }
 
-static int hpet_legacy_next_event(unsigned long delta,
-                                 struct clock_event_device *evt)
+static int hpet_next_event(unsigned long delta,
+                          struct clock_event_device *evt, int timer)
 {
        u32 cnt;
 
        cnt = hpet_readl(HPET_COUNTER);
        cnt += (u32) delta;
-       hpet_writel(cnt, HPET_T0_CMP);
+       hpet_writel(cnt, HPET_Tn_CMP(timer));
 
        /*
         * We need to read back the CMP register to make sure that
@@ -292,6 +327,347 @@ static int hpet_legacy_next_event(unsigned long delta,
        return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
 }
 
+static void hpet_legacy_set_mode(enum clock_event_mode mode,
+                       struct clock_event_device *evt)
+{
+       hpet_set_mode(mode, evt, 0);
+}
+
+static int hpet_legacy_next_event(unsigned long delta,
+                       struct clock_event_device *evt)
+{
+       return hpet_next_event(delta, evt, 0);
+}
+
+/*
+ * HPET MSI Support
+ */
+#ifdef CONFIG_PCI_MSI
+
+static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
+static struct hpet_dev *hpet_devs;
+
+void hpet_msi_unmask(unsigned int irq)
+{
+       struct hpet_dev *hdev = get_irq_data(irq);
+       unsigned long cfg;
+
+       /* unmask it */
+       cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+       cfg |= HPET_TN_FSB;
+       hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+}
+
+void hpet_msi_mask(unsigned int irq)
+{
+       unsigned long cfg;
+       struct hpet_dev *hdev = get_irq_data(irq);
+
+       /* mask it */
+       cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+       cfg &= ~HPET_TN_FSB;
+       hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+}
+
+void hpet_msi_write(unsigned int irq, struct msi_msg *msg)
+{
+       struct hpet_dev *hdev = get_irq_data(irq);
+
+       hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
+       hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
+}
+
+void hpet_msi_read(unsigned int irq, struct msi_msg *msg)
+{
+       struct hpet_dev *hdev = get_irq_data(irq);
+
+       msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
+       msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
+       msg->address_hi = 0;
+}
+
+static void hpet_msi_set_mode(enum clock_event_mode mode,
+                               struct clock_event_device *evt)
+{
+       struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+       hpet_set_mode(mode, evt, hdev->num);
+}
+
+static int hpet_msi_next_event(unsigned long delta,
+                               struct clock_event_device *evt)
+{
+       struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+       return hpet_next_event(delta, evt, hdev->num);
+}
+
+static int hpet_setup_msi_irq(unsigned int irq)
+{
+       if (arch_setup_hpet_msi(irq)) {
+               destroy_irq(irq);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int hpet_assign_irq(struct hpet_dev *dev)
+{
+       unsigned int irq;
+
+       irq = create_irq();
+       if (!irq)
+               return -EINVAL;
+
+       set_irq_data(irq, dev);
+
+       if (hpet_setup_msi_irq(irq))
+               return -EINVAL;
+
+       dev->irq = irq;
+       return 0;
+}
+
+static irqreturn_t hpet_interrupt_handler(int irq, void *data)
+{
+       struct hpet_dev *dev = (struct hpet_dev *)data;
+       struct clock_event_device *hevt = &dev->evt;
+
+       if (!hevt->event_handler) {
+               printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n",
+                               dev->num);
+               return IRQ_HANDLED;
+       }
+
+       hevt->event_handler(hevt);
+       return IRQ_HANDLED;
+}
+
+static int hpet_setup_irq(struct hpet_dev *dev)
+{
+
+       if (request_irq(dev->irq, hpet_interrupt_handler,
+                       IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev))
+               return -1;
+
+       disable_irq(dev->irq);
+       irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+       enable_irq(dev->irq);
+
+       printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
+                        dev->name, dev->irq);
+
+       return 0;
+}
+
+/* This should be called in specific @cpu */
+static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
+{
+       struct clock_event_device *evt = &hdev->evt;
+       uint64_t hpet_freq;
+
+       WARN_ON(cpu != smp_processor_id());
+       if (!(hdev->flags & HPET_DEV_VALID))
+               return;
+
+       if (hpet_setup_msi_irq(hdev->irq))
+               return;
+
+       hdev->cpu = cpu;
+       per_cpu(cpu_hpet_dev, cpu) = hdev;
+       evt->name = hdev->name;
+       hpet_setup_irq(hdev);
+       evt->irq = hdev->irq;
+
+       evt->rating = 110;
+       evt->features = CLOCK_EVT_FEAT_ONESHOT;
+       if (hdev->flags & HPET_DEV_PERI_CAP)
+               evt->features |= CLOCK_EVT_FEAT_PERIODIC;
+
+       evt->set_mode = hpet_msi_set_mode;
+       evt->set_next_event = hpet_msi_next_event;
+       evt->shift = 32;
+
+       /*
+        * The period is a femto seconds value. We need to calculate the
+        * scaled math multiplication factor for nanosecond to hpet tick
+        * conversion.
+        */
+       hpet_freq = 1000000000000000ULL;
+       do_div(hpet_freq, hpet_period);
+       evt->mult = div_sc((unsigned long) hpet_freq,
+                                     NSEC_PER_SEC, evt->shift);
+       /* Calculate the max delta */
+       evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt);
+       /* 5 usec minimum reprogramming delta. */
+       evt->min_delta_ns = 5000;
+
+       evt->cpumask = cpumask_of_cpu(hdev->cpu);
+       clockevents_register_device(evt);
+}
+
+#ifdef CONFIG_HPET
+/* Reserve at least one timer for userspace (/dev/hpet) */
+#define RESERVE_TIMERS 1
+#else
+#define RESERVE_TIMERS 0
+#endif
+
+static void hpet_msi_capability_lookup(unsigned int start_timer)
+{
+       unsigned int id;
+       unsigned int num_timers;
+       unsigned int num_timers_used = 0;
+       int i;
+
+       id = hpet_readl(HPET_ID);
+
+       num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
+       num_timers++; /* Value read out starts from 0 */
+
+       hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
+       if (!hpet_devs)
+               return;
+
+       hpet_num_timers = num_timers;
+
+       for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
+               struct hpet_dev *hdev = &hpet_devs[num_timers_used];
+               unsigned long cfg = hpet_readl(HPET_Tn_CFG(i));
+
+               /* Only consider HPET timer with MSI support */
+               if (!(cfg & HPET_TN_FSB_CAP))
+                       continue;
+
+               hdev->flags = 0;
+               if (cfg & HPET_TN_PERIODIC_CAP)
+                       hdev->flags |= HPET_DEV_PERI_CAP;
+               hdev->num = i;
+
+               sprintf(hdev->name, "hpet%d", i);
+               if (hpet_assign_irq(hdev))
+                       continue;
+
+               hdev->flags |= HPET_DEV_FSB_CAP;
+               hdev->flags |= HPET_DEV_VALID;
+               num_timers_used++;
+               if (num_timers_used == num_possible_cpus())
+                       break;
+       }
+
+       printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n",
+               num_timers, num_timers_used);
+}
+
+#ifdef CONFIG_HPET
+static void hpet_reserve_msi_timers(struct hpet_data *hd)
+{
+       int i;
+
+       if (!hpet_devs)
+               return;
+
+       for (i = 0; i < hpet_num_timers; i++) {
+               struct hpet_dev *hdev = &hpet_devs[i];
+
+               if (!(hdev->flags & HPET_DEV_VALID))
+                       continue;
+
+               hd->hd_irq[hdev->num] = hdev->irq;
+               hpet_reserve_timer(hd, hdev->num);
+       }
+}
+#endif
+
+static struct hpet_dev *hpet_get_unused_timer(void)
+{
+       int i;
+
+       if (!hpet_devs)
+               return NULL;
+
+       for (i = 0; i < hpet_num_timers; i++) {
+               struct hpet_dev *hdev = &hpet_devs[i];
+
+               if (!(hdev->flags & HPET_DEV_VALID))
+                       continue;
+               if (test_and_set_bit(HPET_DEV_USED_BIT,
+                       (unsigned long *)&hdev->flags))
+                       continue;
+               return hdev;
+       }
+       return NULL;
+}
+
+struct hpet_work_struct {
+       struct delayed_work work;
+       struct completion complete;
+};
+
+static void hpet_work(struct work_struct *w)
+{
+       struct hpet_dev *hdev;
+       int cpu = smp_processor_id();
+       struct hpet_work_struct *hpet_work;
+
+       hpet_work = container_of(w, struct hpet_work_struct, work.work);
+
+       hdev = hpet_get_unused_timer();
+       if (hdev)
+               init_one_hpet_msi_clockevent(hdev, cpu);
+
+       complete(&hpet_work->complete);
+}
+
+static int hpet_cpuhp_notify(struct notifier_block *n,
+               unsigned long action, void *hcpu)
+{
+       unsigned long cpu = (unsigned long)hcpu;
+       struct hpet_work_struct work;
+       struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
+
+       switch (action & 0xf) {
+       case CPU_ONLINE:
+               INIT_DELAYED_WORK(&work.work, hpet_work);
+               init_completion(&work.complete);
+               /* FIXME: add schedule_work_on() */
+               schedule_delayed_work_on(cpu, &work.work, 0);
+               wait_for_completion(&work.complete);
+               break;
+       case CPU_DEAD:
+               if (hdev) {
+                       free_irq(hdev->irq, hdev);
+                       hdev->flags &= ~HPET_DEV_USED;
+                       per_cpu(cpu_hpet_dev, cpu) = NULL;
+               }
+               break;
+       }
+       return NOTIFY_OK;
+}
+#else
+
+static int hpet_setup_msi_irq(unsigned int irq)
+{
+       return 0;
+}
+static void hpet_msi_capability_lookup(unsigned int start_timer)
+{
+       return;
+}
+
+#ifdef CONFIG_HPET
+static void hpet_reserve_msi_timers(struct hpet_data *hd)
+{
+       return;
+}
+#endif
+
+static int hpet_cpuhp_notify(struct notifier_block *n,
+               unsigned long action, void *hcpu)
+{
+       return NOTIFY_OK;
+}
+
+#endif
+
 /*
  * Clock source related code
  */
@@ -427,8 +803,10 @@ int __init hpet_enable(void)
 
        if (id & HPET_ID_LEGSUP) {
                hpet_legacy_clockevent_register();
+               hpet_msi_capability_lookup(2);
                return 1;
        }
+       hpet_msi_capability_lookup(0);
        return 0;
 
 out_nohpet:
@@ -445,6 +823,8 @@ out_nohpet:
  */
 static __init int hpet_late_init(void)
 {
+       int cpu;
+
        if (boot_hpet_disable)
                return -ENODEV;
 
@@ -460,6 +840,13 @@ static __init int hpet_late_init(void)
 
        hpet_reserve_platform_timers(hpet_readl(HPET_ID));
 
+       for_each_online_cpu(cpu) {
+               hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
+       }
+
+       /* This notifier should be called after workqueue is ready */
+       hotcpu_notifier(hpet_cpuhp_notify, -20);
+
        return 0;
 }
 fs_initcall(hpet_late_init);
similarity index 68%
rename from arch/x86/kernel/io_apic_64.c
rename to arch/x86/kernel/io_apic.c
index 02063ae042f72ae979a13b57035bd0d91f1dcb59..b764d7429c6168a4eb70b151dc3b07cd553f8afd 100644 (file)
 #include <linux/sched.h>
 #include <linux/pci.h>
 #include <linux/mc146818rtc.h>
+#include <linux/compiler.h>
 #include <linux/acpi.h>
+#include <linux/module.h>
 #include <linux/sysdev.h>
 #include <linux/msi.h>
 #include <linux/htirq.h>
-#include <linux/dmar.h>
-#include <linux/jiffies.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/jiffies.h>     /* time_after() */
 #ifdef CONFIG_ACPI
 #include <acpi/acpi_bus.h>
 #endif
 #include <linux/bootmem.h>
 #include <linux/dmar.h>
+#include <linux/hpet.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
 #include <asm/proto.h>
 #include <asm/acpi.h>
 #include <asm/dma.h>
+#include <asm/timer.h>
 #include <asm/i8259.h>
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
+#include <asm/setup.h>
 #include <asm/irq_remapping.h>
+#include <asm/hpet.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
+#include <mach_apicdef.h>
 
 #define __apicdebuginit(type) static type __init
 
-struct irq_cfg {
-       cpumask_t domain;
-       cpumask_t old_domain;
-       unsigned move_cleanup_count;
-       u8 vector;
-       u8 move_in_progress : 1;
-};
-
-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
-       [0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-       [1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-       [2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-       [3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-       [4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-       [5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-       [6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-       [7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-       [8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-       [9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-       [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-       [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-       [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-       [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-       [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-       [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
-};
-
-static int assign_irq_vector(int irq, cpumask_t mask);
-
-int first_system_vector = 0xfe;
-
-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
-
-int sis_apic_bug; /* not actually supported, dummy for compile */
-
-static int no_timer_check;
-
-static int disable_timer_pin_1 __initdata;
-
-int timer_through_8259 __initdata;
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+/*
+ *      Is the SiS APIC rmw bug present ?
+ *      -1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
 
 static DEFINE_SPINLOCK(ioapic_lock);
 static DEFINE_SPINLOCK(vector_lock);
@@ -110,9 +81,6 @@ static DEFINE_SPINLOCK(vector_lock);
  */
 int nr_ioapic_registers[MAX_IO_APICS];
 
-/* I/O APIC RTE contents at the OS boot up */
-struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
-
 /* I/O APIC entries */
 struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
@@ -123,11 +91,69 @@ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 /* # of MP IRQ source entries */
 int mp_irq_entries;
 
+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+int mp_bus_id_to_type[MAX_MP_BUSSES];
+#endif
+
 DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
+int skip_ioapic_setup;
+
+static int __init parse_noapic(char *str)
+{
+       /* disable IO-APIC */
+       disable_ioapic_setup();
+       return 0;
+}
+early_param("noapic", parse_noapic);
+
+struct irq_pin_list;
+struct irq_cfg {
+       unsigned int irq;
+       struct irq_pin_list *irq_2_pin;
+       cpumask_t domain;
+       cpumask_t old_domain;
+       unsigned move_cleanup_count;
+       u8 vector;
+       u8 move_in_progress : 1;
+};
+
+/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+static struct irq_cfg irq_cfgx[NR_IRQS] = {
+       [0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+       [1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+       [2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+       [3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+       [4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+       [5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+       [6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+       [7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+       [8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+       [9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+       [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+       [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+       [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+       [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+       [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+       [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+};
+
+#define for_each_irq_cfg(irq, cfg)             \
+       for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
+
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+       return irq < nr_irqs ? irq_cfgx + irq : NULL;
+}
+
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+{
+       return irq_cfg(irq);
+}
+
 /*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
+ * Rough estimation of how many shared IRQs there are, can be changed
+ * anytime.
  */
 #define MAX_PLUS_SHARED_IRQS NR_IRQS
 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
@@ -139,9 +165,36 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
  * between pins and IRQs.
  */
 
-static struct irq_pin_list {
-       short apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+struct irq_pin_list {
+       int apic, pin;
+       struct irq_pin_list *next;
+};
+
+static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
+static struct irq_pin_list *irq_2_pin_ptr;
+
+static void __init irq_2_pin_init(void)
+{
+       struct irq_pin_list *pin = irq_2_pin_head;
+       int i;
+
+       for (i = 1; i < PIN_MAP_SIZE; i++)
+               pin[i-1].next = &pin[i];
+
+       irq_2_pin_ptr = &pin[0];
+}
+
+static struct irq_pin_list *get_one_free_irq_2_pin(void)
+{
+       struct irq_pin_list *pin = irq_2_pin_ptr;
+
+       if (!pin)
+               panic("can not get more irq_2_pin\n");
+
+       irq_2_pin_ptr = pin->next;
+       pin->next = NULL;
+       return pin;
+}
 
 struct io_apic {
        unsigned int index;
@@ -172,10 +225,15 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
 /*
  * Re-write a value: to be used for read-modify-write
  * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index register
  */
-static inline void io_apic_modify(unsigned int apic, unsigned int value)
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
 {
        struct io_apic __iomem *io_apic = io_apic_base(apic);
+
+       if (sis_apic_bug)
+               writel(reg, &io_apic->index);
        writel(value, &io_apic->data);
 }
 
@@ -183,16 +241,17 @@ static bool io_apic_level_ack_pending(unsigned int irq)
 {
        struct irq_pin_list *entry;
        unsigned long flags;
+       struct irq_cfg *cfg = irq_cfg(irq);
 
        spin_lock_irqsave(&ioapic_lock, flags);
-       entry = irq_2_pin + irq;
+       entry = cfg->irq_2_pin;
        for (;;) {
                unsigned int reg;
                int pin;
 
-               pin = entry->pin;
-               if (pin == -1)
+               if (!entry)
                        break;
+               pin = entry->pin;
                reg = io_apic_read(entry->apic, 0x10 + pin*2);
                /* Is the remote IRR bit set? */
                if (reg & IO_APIC_REDIR_REMOTE_IRR) {
@@ -201,45 +260,13 @@ static bool io_apic_level_ack_pending(unsigned int irq)
                }
                if (!entry->next)
                        break;
-               entry = irq_2_pin + entry->next;
+               entry = entry->next;
        }
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
        return false;
 }
 
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
-       struct io_apic __iomem *io_apic = io_apic_base(apic);
-       readl(&io_apic->data);
-}
-
-#define __DO_ACTION(R, ACTION, FINAL)                                  \
-                                                                       \
-{                                                                      \
-       int pin;                                                        \
-       struct irq_pin_list *entry = irq_2_pin + irq;                   \
-                                                                       \
-       BUG_ON(irq >= NR_IRQS);                                         \
-       for (;;) {                                                      \
-               unsigned int reg;                                       \
-               pin = entry->pin;                                       \
-               if (pin == -1)                                          \
-                       break;                                          \
-               reg = io_apic_read(entry->apic, 0x10 + R + pin*2);      \
-               reg ACTION;                                             \
-               io_apic_modify(entry->apic, reg);                       \
-               FINAL;                                                  \
-               if (!entry->next)                                       \
-                       break;                                          \
-               entry = irq_2_pin + entry->next;                        \
-       }                                                               \
-}
-
 union entry_union {
        struct { u32 w1, w2; };
        struct IO_APIC_route_entry entry;
@@ -299,59 +326,71 @@ static void ioapic_mask_entry(int apic, int pin)
 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 {
        int apic, pin;
-       struct irq_pin_list *entry = irq_2_pin + irq;
+       struct irq_cfg *cfg;
+       struct irq_pin_list *entry;
 
-       BUG_ON(irq >= NR_IRQS);
+       cfg = irq_cfg(irq);
+       entry = cfg->irq_2_pin;
        for (;;) {
                unsigned int reg;
+
+               if (!entry)
+                       break;
+
                apic = entry->apic;
                pin = entry->pin;
-               if (pin == -1)
-                       break;
+#ifdef CONFIG_INTR_REMAP
                /*
                 * With interrupt-remapping, destination information comes
                 * from interrupt-remapping table entry.
                 */
                if (!irq_remapped(irq))
                        io_apic_write(apic, 0x11 + pin*2, dest);
+#else
+               io_apic_write(apic, 0x11 + pin*2, dest);
+#endif
                reg = io_apic_read(apic, 0x10 + pin*2);
                reg &= ~IO_APIC_REDIR_VECTOR_MASK;
                reg |= vector;
-               io_apic_modify(apic, reg);
+               io_apic_modify(apic, 0x10 + pin*2, reg);
                if (!entry->next)
                        break;
-               entry = irq_2_pin + entry->next;
+               entry = entry->next;
        }
 }
 
+static int assign_irq_vector(int irq, cpumask_t mask);
+
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        unsigned long flags;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
                return;
 
+       cfg = irq_cfg(irq);
        if (assign_irq_vector(irq, mask))
                return;
 
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
-
        /*
         * Only the high 8 bits are valid.
         */
        dest = SET_APIC_LOGICAL_ID(dest);
 
+       desc = irq_to_desc(irq);
        spin_lock_irqsave(&ioapic_lock, flags);
        __target_IO_APIC_irq(irq, dest, cfg->vector);
-       irq_desc[irq].affinity = mask;
+       desc->affinity = mask;
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
-#endif
+#endif /* CONFIG_SMP */
 
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -360,19 +399,30 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  */
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
-       static int first_free_entry = NR_IRQS;
-       struct irq_pin_list *entry = irq_2_pin + irq;
+       struct irq_cfg *cfg;
+       struct irq_pin_list *entry;
 
-       BUG_ON(irq >= NR_IRQS);
-       while (entry->next)
-               entry = irq_2_pin + entry->next;
+       /* first time to refer irq_cfg, so with new */
+       cfg = irq_cfg_alloc(irq);
+       entry = cfg->irq_2_pin;
+       if (!entry) {
+               entry = get_one_free_irq_2_pin();
+               cfg->irq_2_pin = entry;
+               entry->apic = apic;
+               entry->pin = pin;
+               return;
+       }
+
+       while (entry->next) {
+               /* not again, please */
+               if (entry->apic == apic && entry->pin == pin)
+                       return;
 
-       if (entry->pin != -1) {
-               entry->next = first_free_entry;
-               entry = irq_2_pin + entry->next;
-               if (++first_free_entry >= PIN_MAP_SIZE)
-                       panic("io_apic.c: ran out of irq_2_pin entries!");
+               entry = entry->next;
        }
+
+       entry->next = get_one_free_irq_2_pin();
+       entry = entry->next;
        entry->apic = apic;
        entry->pin = pin;
 }
@@ -384,30 +434,86 @@ static void __init replace_pin_at_irq(unsigned int irq,
                                      int oldapic, int oldpin,
                                      int newapic, int newpin)
 {
-       struct irq_pin_list *entry = irq_2_pin + irq;
+       struct irq_cfg *cfg = irq_cfg(irq);
+       struct irq_pin_list *entry = cfg->irq_2_pin;
+       int replaced = 0;
 
-       while (1) {
+       while (entry) {
                if (entry->apic == oldapic && entry->pin == oldpin) {
                        entry->apic = newapic;
                        entry->pin = newpin;
-               }
-               if (!entry->next)
+                       replaced = 1;
+                       /* every one is different, right? */
                        break;
-               entry = irq_2_pin + entry->next;
+               }
+               entry = entry->next;
        }
+
+       /* why? call replace before add? */
+       if (!replaced)
+               add_pin_to_irq(irq, newapic, newpin);
 }
 
+static inline void io_apic_modify_irq(unsigned int irq,
+                               int mask_and, int mask_or,
+                               void (*final)(struct irq_pin_list *entry))
+{
+       int pin;
+       struct irq_cfg *cfg;
+       struct irq_pin_list *entry;
 
-#define DO_ACTION(name,R,ACTION, FINAL)                                        \
-                                                                       \
-       static void name##_IO_APIC_irq (unsigned int irq)               \
-       __DO_ACTION(R, ACTION, FINAL)
+       cfg = irq_cfg(irq);
+       for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+               unsigned int reg;
+               pin = entry->pin;
+               reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+               reg &= mask_and;
+               reg |= mask_or;
+               io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+               if (final)
+                       final(entry);
+       }
+}
 
-/* mask = 1 */
-DO_ACTION(__mask,      0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
+static void __unmask_IO_APIC_irq(unsigned int irq)
+{
+       io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+}
 
-/* mask = 0 */
-DO_ACTION(__unmask,    0, &= ~IO_APIC_REDIR_MASKED, )
+#ifdef CONFIG_X86_64
+void io_apic_sync(struct irq_pin_list *entry)
+{
+       /*
+        * Synchronize the IO-APIC and the CPU by doing
+        * a dummy read from the IO-APIC
+        */
+       struct io_apic __iomem *io_apic;
+       io_apic = io_apic_base(entry->apic);
+       readl(&io_apic->data);
+}
+
+static void __mask_IO_APIC_irq(unsigned int irq)
+{
+       io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+}
+#else /* CONFIG_X86_32 */
+static void __mask_IO_APIC_irq(unsigned int irq)
+{
+       io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+}
+
+static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+{
+       io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+                       IO_APIC_REDIR_MASKED, NULL);
+}
+
+static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+{
+       io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+                       IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
+}
+#endif /* CONFIG_X86_32 */
 
 static void mask_IO_APIC_irq (unsigned int irq)
 {
@@ -450,6 +556,68 @@ static void clear_IO_APIC (void)
                        clear_IO_APIC_pin(apic, pin);
 }
 
+#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
+void send_IPI_self(int vector)
+{
+       unsigned int cfg;
+
+       /*
+        * Wait for idle.
+        */
+       apic_wait_icr_idle();
+       cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
+       /*
+        * Send the IPI. The write to APIC_ICR fires this off.
+        */
+       apic_write(APIC_ICR, cfg);
+}
+#endif /* !CONFIG_SMP && CONFIG_X86_32*/
+
+#ifdef CONFIG_X86_32
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+static int pirq_entries [MAX_PIRQS];
+static int pirqs_enabled;
+
+static int __init ioapic_pirq_setup(char *str)
+{
+       int i, max;
+       int ints[MAX_PIRQS+1];
+
+       get_options(str, ARRAY_SIZE(ints), ints);
+
+       for (i = 0; i < MAX_PIRQS; i++)
+               pirq_entries[i] = -1;
+
+       pirqs_enabled = 1;
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "PIRQ redirection, working around broken MP-BIOS.\n");
+       max = MAX_PIRQS;
+       if (ints[0] < MAX_PIRQS)
+               max = ints[0];
+
+       for (i = 0; i < max; i++) {
+               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                               "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+               /*
+                * PIRQs are mapped upside down, usually.
+                */
+               pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+       }
+       return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_INTR_REMAP
+/* I/O APIC RTE contents at the OS boot up */
+static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
 /*
  * Saves and masks all the unmasked IO-APIC RTE's
  */
@@ -474,7 +642,7 @@ int save_mask_IO_APIC_setup(void)
                        kzalloc(sizeof(struct IO_APIC_route_entry) *
                                nr_ioapic_registers[apic], GFP_KERNEL);
                if (!early_ioapic_entries[apic])
-                       return -ENOMEM;
+                       goto nomem;
        }
 
        for (apic = 0; apic < nr_ioapics; apic++)
@@ -488,17 +656,31 @@ int save_mask_IO_APIC_setup(void)
                                ioapic_write_entry(apic, pin, entry);
                        }
                }
+
        return 0;
+
+nomem:
+       while (apic >= 0)
+               kfree(early_ioapic_entries[apic--]);
+       memset(early_ioapic_entries, 0,
+               ARRAY_SIZE(early_ioapic_entries));
+
+       return -ENOMEM;
 }
 
 void restore_IO_APIC_setup(void)
 {
        int apic, pin;
 
-       for (apic = 0; apic < nr_ioapics; apic++)
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               if (!early_ioapic_entries[apic])
+                       break;
                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
                        ioapic_write_entry(apic, pin,
                                           early_ioapic_entries[apic][pin]);
+               kfree(early_ioapic_entries[apic]);
+               early_ioapic_entries[apic] = NULL;
+       }
 }
 
 void reinit_intr_remapped_IO_APIC(int intr_remapping)
@@ -512,25 +694,7 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping)
         */
        restore_IO_APIC_setup();
 }
-
-int skip_ioapic_setup;
-int ioapic_force;
-
-static int __init parse_noapic(char *str)
-{
-       disable_ioapic_setup();
-       return 0;
-}
-early_param("noapic", parse_noapic);
-
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init disable_timer_pin_setup(char *arg)
-{
-       disable_timer_pin_1 = 1;
-       return 1;
-}
-__setup("disable_timer_pin_1", disable_timer_pin_setup);
-
+#endif
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -634,22 +798,54 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
                                best_guess = irq;
                }
        }
-       BUG_ON(best_guess >= NR_IRQS);
        return best_guess;
 }
 
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+       if (irq < 16) {
+               unsigned int port = 0x4d0 + (irq >> 3);
+               return (inb(port) >> (irq & 7)) & 1;
+       }
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "Broken MPtable reports ISA irq %d\n", irq);
+       return 0;
+}
+
+#endif
+
 /* ISA interrupts are always polarity zero edge triggered,
  * when listed as conforming in the MP table. */
 
 #define default_ISA_trigger(idx)       (0)
 #define default_ISA_polarity(idx)      (0)
 
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value.  If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx)      (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
+#define default_EISA_polarity(idx)     default_ISA_polarity(idx)
+
 /* PCI interrupts are always polarity one level triggered,
  * when listed as conforming in the MP table. */
 
 #define default_PCI_trigger(idx)       (1)
 #define default_PCI_polarity(idx)      (1)
 
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx)       (1)
+#define default_MCA_polarity(idx)      default_ISA_polarity(idx)
+
 static int MPBIOS_polarity(int idx)
 {
        int bus = mp_irqs[idx].mp_srcbus;
@@ -707,6 +903,36 @@ static int MPBIOS_trigger(int idx)
                                trigger = default_ISA_trigger(idx);
                        else
                                trigger = default_PCI_trigger(idx);
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+                       switch (mp_bus_id_to_type[bus]) {
+                               case MP_BUS_ISA: /* ISA pin */
+                               {
+                                       /* set before the switch */
+                                       break;
+                               }
+                               case MP_BUS_EISA: /* EISA pin */
+                               {
+                                       trigger = default_EISA_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_PCI: /* PCI pin */
+                               {
+                                       /* set before the switch */
+                                       break;
+                               }
+                               case MP_BUS_MCA: /* MCA pin */
+                               {
+                                       trigger = default_MCA_trigger(idx);
+                                       break;
+                               }
+                               default:
+                               {
+                                       printk(KERN_WARNING "broken BIOS!!\n");
+                                       trigger = 1;
+                                       break;
+                               }
+                       }
+#endif
                        break;
                case 1: /* edge */
                {
@@ -744,6 +970,7 @@ static inline int irq_trigger(int idx)
        return MPBIOS_trigger(idx);
 }
 
+int (*ioapic_renumber_irq)(int ioapic, int irq);
 static int pin_2_irq(int idx, int apic, int pin)
 {
        int irq, i;
@@ -765,8 +992,32 @@ static int pin_2_irq(int idx, int apic, int pin)
                while (i < apic)
                        irq += nr_ioapic_registers[i++];
                irq += pin;
+               /*
+                 * For MPS mode, so far only needed by ES7000 platform
+                 */
+               if (ioapic_renumber_irq)
+                       irq = ioapic_renumber_irq(apic, irq);
+       }
+
+#ifdef CONFIG_X86_32
+       /*
+        * PCI IRQ command line redirection. Yes, limits are hardcoded.
+        */
+       if ((pin >= 16) && (pin <= 23)) {
+               if (pirq_entries[pin-16] != -1) {
+                       if (!pirq_entries[pin-16]) {
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               "disabling PIRQ%d\n", pin-16);
+                       } else {
+                               irq = pirq_entries[pin-16];
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               "using PIRQ%d -> IRQ %d\n",
+                                               pin-16, irq);
+                       }
+               }
        }
-       BUG_ON(irq >= NR_IRQS);
+#endif
+
        return irq;
 }
 
@@ -801,8 +1052,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
        int cpu;
        struct irq_cfg *cfg;
 
-       BUG_ON((unsigned)irq >= NR_IRQS);
-       cfg = &irq_cfg[irq];
+       cfg = irq_cfg(irq);
 
        /* Only try and allocate irqs on cpus that are present */
        cpus_and(mask, mask, cpu_online_map);
@@ -837,8 +1087,13 @@ next:
                }
                if (unlikely(current_vector == vector))
                        continue;
+#ifdef CONFIG_X86_64
                if (vector == IA32_SYSCALL_VECTOR)
                        goto next;
+#else
+               if (vector == SYSCALL_VECTOR)
+                       goto next;
+#endif
                for_each_cpu_mask_nr(new_cpu, new_mask)
                        if (per_cpu(vector_irq, new_cpu)[vector] != -1)
                                goto next;
@@ -875,8 +1130,7 @@ static void __clear_irq_vector(int irq)
        cpumask_t mask;
        int cpu, vector;
 
-       BUG_ON((unsigned)irq >= NR_IRQS);
-       cfg = &irq_cfg[irq];
+       cfg = irq_cfg(irq);
        BUG_ON(!cfg->vector);
 
        vector = cfg->vector;
@@ -893,12 +1147,13 @@ void __setup_vector_irq(int cpu)
        /* Initialize vector_irq on a new cpu */
        /* This function must be called with vector_lock held */
        int irq, vector;
+       struct irq_cfg *cfg;
 
        /* Mark the inuse vectors */
-       for (irq = 0; irq < NR_IRQS; ++irq) {
-               if (!cpu_isset(cpu, irq_cfg[irq].domain))
+       for_each_irq_cfg(irq, cfg) {
+               if (!cpu_isset(cpu, cfg->domain))
                        continue;
-               vector = irq_cfg[irq].vector;
+               vector = cfg->vector;
                per_cpu(vector_irq, cpu)[vector] = irq;
        }
        /* Mark the free vectors */
@@ -906,7 +1161,9 @@ void __setup_vector_irq(int cpu)
                irq = per_cpu(vector_irq, cpu)[vector];
                if (irq < 0)
                        continue;
-               if (!cpu_isset(cpu, irq_cfg[irq].domain))
+
+               cfg = irq_cfg(irq);
+               if (!cpu_isset(cpu, cfg->domain))
                        per_cpu(vector_irq, cpu)[vector] = -1;
        }
 }
@@ -916,16 +1173,49 @@ static struct irq_chip ioapic_chip;
 static struct irq_chip ir_ioapic_chip;
 #endif
 
+#define IOAPIC_AUTO     -1
+#define IOAPIC_EDGE     0
+#define IOAPIC_LEVEL    1
+
+#ifdef CONFIG_X86_32
+static inline int IO_APIC_irq_trigger(int irq)
+{
+       int apic, idx, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       idx = find_irq_entry(apic, pin, mp_INT);
+                       if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+                               return irq_trigger(idx);
+               }
+       }
+       /*
+         * nonexistent IRQs are edge default
+         */
+       return 0;
+}
+#else
+static inline int IO_APIC_irq_trigger(int irq)
+{
+       return 1;
+}
+#endif
+
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
-       if (trigger)
-               irq_desc[irq].status |= IRQ_LEVEL;
+       struct irq_desc *desc;
+
+       desc = irq_to_desc(irq);
+
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+           trigger == IOAPIC_LEVEL)
+               desc->status |= IRQ_LEVEL;
        else
-               irq_desc[irq].status &= ~IRQ_LEVEL;
+               desc->status &= ~IRQ_LEVEL;
 
 #ifdef CONFIG_INTR_REMAP
        if (irq_remapped(irq)) {
-               irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
+               desc->status |= IRQ_MOVE_PCNTXT;
                if (trigger)
                        set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
                                                      handle_fasteoi_irq,
@@ -936,7 +1226,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
                return;
        }
 #endif
-       if (trigger)
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+           trigger == IOAPIC_LEVEL)
                set_irq_chip_and_handler_name(irq, &ioapic_chip,
                                              handle_fasteoi_irq,
                                              "fasteoi");
@@ -1009,13 +1300,15 @@ static int setup_ioapic_entry(int apic, int irq,
 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
                              int trigger, int polarity)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        struct IO_APIC_route_entry entry;
        cpumask_t mask;
 
        if (!IO_APIC_IRQ(irq))
                return;
 
+       cfg = irq_cfg(irq);
+
        mask = TARGET_CPUS;
        if (assign_irq_vector(irq, mask))
                return;
@@ -1047,37 +1340,49 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 
 static void __init setup_IO_APIC_irqs(void)
 {
-       int apic, pin, idx, irq, first_notcon = 1;
+       int apic, pin, idx, irq;
+       int notcon = 0;
 
        apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
        for (apic = 0; apic < nr_ioapics; apic++) {
-       for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
-               idx = find_irq_entry(apic,pin,mp_INT);
-               if (idx == -1) {
-                       if (first_notcon) {
-                               apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
-                               first_notcon = 0;
-                       } else
-                               apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
-                       continue;
-               }
-               if (!first_notcon) {
-                       apic_printk(APIC_VERBOSE, " not connected.\n");
-                       first_notcon = 1;
-               }
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 
-               irq = pin_2_irq(idx, apic, pin);
-               add_pin_to_irq(irq, apic, pin);
+                       idx = find_irq_entry(apic, pin, mp_INT);
+                       if (idx == -1) {
+                               if (!notcon) {
+                                       notcon = 1;
+                                       apic_printk(APIC_VERBOSE,
+                                               KERN_DEBUG " %d-%d",
+                                               mp_ioapics[apic].mp_apicid,
+                                               pin);
+                               } else
+                                       apic_printk(APIC_VERBOSE, " %d-%d",
+                                               mp_ioapics[apic].mp_apicid,
+                                               pin);
+                               continue;
+                       }
+                       if (notcon) {
+                               apic_printk(APIC_VERBOSE,
+                                       " (apicid-pin) not connected\n");
+                               notcon = 0;
+                       }
 
-               setup_IO_APIC_irq(apic, pin, irq,
-                                 irq_trigger(idx), irq_polarity(idx));
-       }
+                       irq = pin_2_irq(idx, apic, pin);
+#ifdef CONFIG_X86_32
+                       if (multi_timer_check(apic, irq))
+                               continue;
+#endif
+                       add_pin_to_irq(irq, apic, pin);
+
+                       setup_IO_APIC_irq(apic, pin, irq,
+                                       irq_trigger(idx), irq_polarity(idx));
+               }
        }
 
-       if (!first_notcon)
-               apic_printk(APIC_VERBOSE, " not connected.\n");
+       if (notcon)
+               apic_printk(APIC_VERBOSE,
+                       " (apicid-pin) not connected\n");
 }
 
 /*
@@ -1088,8 +1393,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 {
        struct IO_APIC_route_entry entry;
 
+#ifdef CONFIG_INTR_REMAP
        if (intr_remapping_enabled)
                return;
+#endif
 
        memset(&entry, 0, sizeof(entry));
 
@@ -1124,7 +1431,10 @@ __apicdebuginit(void) print_IO_APIC(void)
        union IO_APIC_reg_00 reg_00;
        union IO_APIC_reg_01 reg_01;
        union IO_APIC_reg_02 reg_02;
+       union IO_APIC_reg_03 reg_03;
        unsigned long flags;
+       struct irq_cfg *cfg;
+       unsigned int irq;
 
        if (apic_verbosity == APIC_QUIET)
                return;
@@ -1147,12 +1457,16 @@ __apicdebuginit(void) print_IO_APIC(void)
        reg_01.raw = io_apic_read(apic, 1);
        if (reg_01.bits.version >= 0x10)
                reg_02.raw = io_apic_read(apic, 2);
+       if (reg_01.bits.version >= 0x20)
+               reg_03.raw = io_apic_read(apic, 3);
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
        printk("\n");
        printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
        printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
        printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
+       printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
+       printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
 
        printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
        printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
@@ -1160,11 +1474,27 @@ __apicdebuginit(void) print_IO_APIC(void)
        printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
        printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
 
-       if (reg_01.bits.version >= 0x10) {
+       /*
+        * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+        * but the value of reg_02 is read as the previous read register
+        * value, so ignore it if reg_02 == reg_01.
+        */
+       if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
                printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
                printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
        }
 
+       /*
+        * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+        * or reg_03, but the value of reg_0[23] is read as the previous read
+        * register value, so ignore it if reg_03 == reg_0[12].
+        */
+       if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+           reg_03.raw != reg_01.raw) {
+               printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+               printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
+       }
+
        printk(KERN_DEBUG ".... IRQ redirection table:\n");
 
        printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
@@ -1193,16 +1523,16 @@ __apicdebuginit(void) print_IO_APIC(void)
        }
        }
        printk(KERN_DEBUG "IRQ to pin mappings:\n");
-       for (i = 0; i < NR_IRQS; i++) {
-               struct irq_pin_list *entry = irq_2_pin + i;
-               if (entry->pin < 0)
+       for_each_irq_cfg(irq, cfg) {
+               struct irq_pin_list *entry = cfg->irq_2_pin;
+               if (!entry)
                        continue;
-               printk(KERN_DEBUG "IRQ%d ", i);
+               printk(KERN_DEBUG "IRQ%d ", irq);
                for (;;) {
                        printk("-> %d:%d", entry->apic, entry->pin);
                        if (!entry->next)
                                break;
-                       entry = irq_2_pin + entry->next;
+                       entry = entry->next;
                }
                printk("\n");
        }
@@ -1236,7 +1566,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base)
 __apicdebuginit(void) print_local_APIC(void *dummy)
 {
        unsigned int v, ver, maxlvt;
-       unsigned long icr;
+       u64 icr;
 
        if (apic_verbosity == APIC_QUIET)
                return;
@@ -1253,20 +1583,31 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
        v = apic_read(APIC_TASKPRI);
        printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 
-       v = apic_read(APIC_ARBPRI);
-       printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-               v & APIC_ARBPRI_MASK);
-       v = apic_read(APIC_PROCPRI);
-       printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+       if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
+               if (!APIC_XAPIC(ver)) {
+                       v = apic_read(APIC_ARBPRI);
+                       printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+                              v & APIC_ARBPRI_MASK);
+               }
+               v = apic_read(APIC_PROCPRI);
+               printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+       }
+
+       /*
+        * Remote read supported only in the 82489DX and local APIC for
+        * Pentium processors.
+        */
+       if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+               v = apic_read(APIC_RRR);
+               printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+       }
 
-       v = apic_read(APIC_EOI);
-       printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
-       v = apic_read(APIC_RRR);
-       printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
        v = apic_read(APIC_LDR);
        printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-       v = apic_read(APIC_DFR);
-       printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+       if (!x2apic_enabled()) {
+               v = apic_read(APIC_DFR);
+               printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+       }
        v = apic_read(APIC_SPIV);
        printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 
@@ -1277,8 +1618,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
        printk(KERN_DEBUG "... APIC IRR field:\n");
        print_APIC_bitfield(APIC_IRR);
 
-       v = apic_read(APIC_ESR);
-       printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+       if (APIC_INTEGRATED(ver)) {             /* !82489DX */
+               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+                       apic_write(APIC_ESR, 0);
+
+               v = apic_read(APIC_ESR);
+               printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+       }
 
        icr = apic_icr_read();
        printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
@@ -1312,7 +1658,12 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 
 __apicdebuginit(void) print_all_local_APICs(void)
 {
-       on_each_cpu(print_local_APIC, NULL, 1);
+       int cpu;
+
+       preempt_disable();
+       for_each_online_cpu(cpu)
+               smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+       preempt_enable();
 }
 
 __apicdebuginit(void) print_PIC(void)
@@ -1359,17 +1710,22 @@ __apicdebuginit(int) print_all_ICs(void)
 fs_initcall(print_all_ICs);
 
 
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
 void __init enable_IO_APIC(void)
 {
        union IO_APIC_reg_01 reg_01;
        int i8259_apic, i8259_pin;
-       int i, apic;
+       int apic;
        unsigned long flags;
 
-       for (i = 0; i < PIN_MAP_SIZE; i++) {
-               irq_2_pin[i].pin = -1;
-               irq_2_pin[i].next = 0;
-       }
+#ifdef CONFIG_X86_32
+       int i;
+       if (!pirqs_enabled)
+               for (i = 0; i < MAX_PIRQS; i++)
+                       pirq_entries[i] = -1;
+#endif
 
        /*
         * The number of IO-APIC IRQ registers (== #pins):
@@ -1399,6 +1755,10 @@ void __init enable_IO_APIC(void)
        }
  found_i8259:
        /* Look to see what if the MP table has reported the ExtINT */
+       /* If we could not find the appropriate pin by looking at the ioapic
+        * the i8259 probably is not connected the ioapic but give the
+        * mptable a chance anyway.
+        */
        i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
        i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
        /* Trust the MP table if nothing is setup in the hardware */
@@ -1458,6 +1818,133 @@ void disable_IO_APIC(void)
        disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 
+#ifdef CONFIG_X86_32
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
+ */
+
+static void __init setup_ioapic_ids_from_mpc(void)
+{
+       union IO_APIC_reg_00 reg_00;
+       physid_mask_t phys_id_present_map;
+       int apic;
+       int i;
+       unsigned char old_id;
+       unsigned long flags;
+
+       if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
+               return;
+
+       /*
+        * Don't check I/O APIC IDs for xAPIC systems.  They have
+        * no meaning without the serial APIC bus.
+        */
+       if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+               || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+               return;
+       /*
+        * This is broken; anything with a real cpu count has to
+        * circumvent this idiocy regardless.
+        */
+       phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+       /*
+        * Set the IOAPIC ID to the value stored in the MPC table.
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+
+               /* Read the register 0 value */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_00.raw = io_apic_read(apic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               old_id = mp_ioapics[apic].mp_apicid;
+
+               if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+                               apic, mp_ioapics[apic].mp_apicid);
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+                               reg_00.bits.ID);
+                       mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
+               }
+
+               /*
+                * Sanity check, is the ID really free? Every APIC in a
+                * system must have a unique ID or we get lots of nice
+                * 'stuck on smp_invalidate_needed IPI wait' messages.
+                */
+               if (check_apicid_used(phys_id_present_map,
+                                       mp_ioapics[apic].mp_apicid)) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+                               apic, mp_ioapics[apic].mp_apicid);
+                       for (i = 0; i < get_physical_broadcast(); i++)
+                               if (!physid_isset(i, phys_id_present_map))
+                                       break;
+                       if (i >= get_physical_broadcast())
+                               panic("Max APIC ID exceeded!\n");
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+                               i);
+                       physid_set(i, phys_id_present_map);
+                       mp_ioapics[apic].mp_apicid = i;
+               } else {
+                       physid_mask_t tmp;
+                       tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
+                       apic_printk(APIC_VERBOSE, "Setting %d in the "
+                                       "phys_id_present_map\n",
+                                       mp_ioapics[apic].mp_apicid);
+                       physids_or(phys_id_present_map, phys_id_present_map, tmp);
+               }
+
+
+               /*
+                * We need to adjust the IRQ routing table
+                * if the ID changed.
+                */
+               if (old_id != mp_ioapics[apic].mp_apicid)
+                       for (i = 0; i < mp_irq_entries; i++)
+                               if (mp_irqs[i].mp_dstapic == old_id)
+                                       mp_irqs[i].mp_dstapic
+                                               = mp_ioapics[apic].mp_apicid;
+
+               /*
+                * Read the right value from the MPC table and
+                * write it into the ID register.
+                */
+               apic_printk(APIC_VERBOSE, KERN_INFO
+                       "...changing IO-APIC physical APIC ID to %d ...",
+                       mp_ioapics[apic].mp_apicid);
+
+               reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(apic, 0, reg_00.raw);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               /*
+                * Sanity check
+                */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_00.raw = io_apic_read(apic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
+                       printk("could not set ID!\n");
+               else
+                       apic_printk(APIC_VERBOSE, " ok.\n");
+       }
+}
+#endif
+
+int no_timer_check __initdata;
+
+static int __init notimercheck(char *s)
+{
+       no_timer_check = 1;
+       return 1;
+}
+__setup("no_timer_check", notimercheck);
+
 /*
  * There is a nasty bug in some older SMP boards, their mptable lies
  * about the timer IRQ. We do the following to work around the situation:
@@ -1471,6 +1958,9 @@ static int __init timer_irq_works(void)
        unsigned long t1 = jiffies;
        unsigned long flags;
 
+       if (no_timer_check)
+               return 1;
+
        local_save_flags(flags);
        local_irq_enable();
        /* Let ten ticks pass... */
@@ -1531,9 +2021,11 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
        return was_pending;
 }
 
+#ifdef CONFIG_X86_64
 static int ioapic_retrigger_irq(unsigned int irq)
 {
-       struct irq_cfg *cfg = &irq_cfg[irq];
+
+       struct irq_cfg *cfg = irq_cfg(irq);
        unsigned long flags;
 
        spin_lock_irqsave(&vector_lock, flags);
@@ -1542,6 +2034,14 @@ static int ioapic_retrigger_irq(unsigned int irq)
 
        return 1;
 }
+#else
+static int ioapic_retrigger_irq(unsigned int irq)
+{
+       send_IPI_self(irq_cfg(irq)->vector);
+
+       return 1;
+}
+#endif
 
 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
@@ -1580,11 +2080,11 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
  */
 static void migrate_ioapic_irq(int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
        cpumask_t tmp, cleanup_mask;
        struct irte irte;
-       int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+       int modify_ioapic_rte;
        unsigned int dest;
        unsigned long flags;
 
@@ -1598,9 +2098,12 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
+       desc = irq_to_desc(irq);
+       modify_ioapic_rte = desc->status & IRQ_LEVEL;
        if (modify_ioapic_rte) {
                spin_lock_irqsave(&ioapic_lock, flags);
                __target_IO_APIC_irq(irq, dest, cfg->vector);
@@ -1622,18 +2125,19 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
                cfg->move_in_progress = 0;
        }
 
-       irq_desc[irq].affinity = mask;
+       desc->affinity = mask;
 }
 
 static int migrate_irq_remapped_level(int irq)
 {
        int ret = -1;
+       struct irq_desc *desc = irq_to_desc(irq);
 
        mask_IO_APIC_irq(irq);
 
        if (io_apic_level_ack_pending(irq)) {
                /*
-                * Interrupt in progress. Migrating irq now will change the
+                * Interrupt in progress. Migrating irq now will change the
                 * vector information in the IO-APIC RTE and that will confuse
                 * the EOI broadcast performed by cpu.
                 * So, delay the irq migration to the next instance.
@@ -1643,11 +2147,11 @@ static int migrate_irq_remapped_level(int irq)
        }
 
        /* everthing is clear. we have right of way */
-       migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
+       migrate_ioapic_irq(irq, desc->pending_mask);
 
        ret = 0;
-       irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
-       cpus_clear(irq_desc[irq].pending_mask);
+       desc->status &= ~IRQ_MOVE_PENDING;
+       cpus_clear(desc->pending_mask);
 
 unmask:
        unmask_IO_APIC_irq(irq);
@@ -1656,10 +2160,10 @@ unmask:
 
 static void ir_irq_migration(struct work_struct *work)
 {
-       int irq;
+       unsigned int irq;
+       struct irq_desc *desc;
 
-       for (irq = 0; irq < NR_IRQS; irq++) {
-               struct irq_desc *desc = irq_desc + irq;
+       for_each_irq_desc(irq, desc) {
                if (desc->status & IRQ_MOVE_PENDING) {
                        unsigned long flags;
 
@@ -1671,8 +2175,7 @@ static void ir_irq_migration(struct work_struct *work)
                                continue;
                        }
 
-                       desc->chip->set_affinity(irq,
-                                                irq_desc[irq].pending_mask);
+                       desc->chip->set_affinity(irq, desc->pending_mask);
                        spin_unlock_irqrestore(&desc->lock, flags);
                }
        }
@@ -1683,9 +2186,11 @@ static void ir_irq_migration(struct work_struct *work)
  */
 static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-       if (irq_desc[irq].status & IRQ_LEVEL) {
-               irq_desc[irq].status |= IRQ_MOVE_PENDING;
-               irq_desc[irq].pending_mask = mask;
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       if (desc->status & IRQ_LEVEL) {
+               desc->status |= IRQ_MOVE_PENDING;
+               desc->pending_mask = mask;
                migrate_irq_remapped_level(irq);
                return;
        }
@@ -1698,7 +2203,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 {
        unsigned vector, me;
        ack_APIC_irq();
+#ifdef CONFIG_X86_64
        exit_idle();
+#endif
        irq_enter();
 
        me = smp_processor_id();
@@ -1707,11 +2214,12 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
                struct irq_desc *desc;
                struct irq_cfg *cfg;
                irq = __get_cpu_var(vector_irq)[vector];
-               if (irq >= NR_IRQS)
+
+               desc = irq_to_desc(irq);
+               if (!desc)
                        continue;
 
-               desc = irq_desc + irq;
-               cfg = irq_cfg + irq;
+               cfg = irq_cfg(irq);
                spin_lock(&desc->lock);
                if (!cfg->move_cleanup_count)
                        goto unlock;
@@ -1730,7 +2238,7 @@ unlock:
 
 static void irq_complete_move(unsigned int irq)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg = irq_cfg(irq);
        unsigned vector, me;
 
        if (likely(!cfg->move_in_progress))
@@ -1769,19 +2277,50 @@ static void ack_apic_edge(unsigned int irq)
        ack_APIC_irq();
 }
 
+atomic_t irq_mis_count;
+
 static void ack_apic_level(unsigned int irq)
 {
+#ifdef CONFIG_X86_32
+       unsigned long v;
+       int i;
+#endif
        int do_unmask_irq = 0;
 
        irq_complete_move(irq);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
        /* If we are moving the irq we need to mask it */
-       if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
+       if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
                do_unmask_irq = 1;
                mask_IO_APIC_irq(irq);
        }
 #endif
 
+#ifdef CONFIG_X86_32
+       /*
+       * It appears there is an erratum which affects at least version 0x11
+       * of I/O APIC (that's the 82093AA and cores integrated into various
+       * chipsets).  Under certain conditions a level-triggered interrupt is
+       * erroneously delivered as edge-triggered one but the respective IRR
+       * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+       * message but it will never arrive and further interrupts are blocked
+       * from the source.  The exact reason is so far unknown, but the
+       * phenomenon was observed when two consecutive interrupt requests
+       * from a given source get delivered to the same CPU and the source is
+       * temporarily disabled in between.
+       *
+       * A workaround is to simulate an EOI message manually.  We achieve it
+       * by setting the trigger mode to edge and then to level when the edge
+       * trigger mode gets detected in the TMR of a local APIC for a
+       * level-triggered interrupt.  We mask the source for the time of the
+       * operation to prevent an edge-triggered interrupt escaping meanwhile.
+       * The idea is from Manfred Spraul.  --macro
+       */
+       i = irq_cfg(irq)->vector;
+
+       v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+#endif
+
        /*
         * We must acknowledge the irq before we move it or the acknowledge will
         * not propagate properly.
@@ -1820,31 +2359,41 @@ static void ack_apic_level(unsigned int irq)
                        move_masked_irq(irq);
                unmask_IO_APIC_irq(irq);
        }
+
+#ifdef CONFIG_X86_32
+       if (!(v & (1 << (i & 0x1f)))) {
+               atomic_inc(&irq_mis_count);
+               spin_lock(&ioapic_lock);
+               __mask_and_edge_IO_APIC_irq(irq);
+               __unmask_and_level_IO_APIC_irq(irq);
+               spin_unlock(&ioapic_lock);
+       }
+#endif
 }
 
 static struct irq_chip ioapic_chip __read_mostly = {
-       .name           = "IO-APIC",
-       .startup        = startup_ioapic_irq,
-       .mask           = mask_IO_APIC_irq,
-       .unmask         = unmask_IO_APIC_irq,
-       .ack            = ack_apic_edge,
-       .eoi            = ack_apic_level,
+       .name           = "IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_apic_edge,
+       .eoi            = ack_apic_level,
 #ifdef CONFIG_SMP
-       .set_affinity   = set_ioapic_affinity_irq,
+       .set_affinity   = set_ioapic_affinity_irq,
 #endif
        .retrigger      = ioapic_retrigger_irq,
 };
 
 #ifdef CONFIG_INTR_REMAP
 static struct irq_chip ir_ioapic_chip __read_mostly = {
-       .name           = "IR-IO-APIC",
-       .startup        = startup_ioapic_irq,
-       .mask           = mask_IO_APIC_irq,
-       .unmask         = unmask_IO_APIC_irq,
-       .ack            = ack_x2apic_edge,
-       .eoi            = ack_x2apic_level,
+       .name           = "IR-IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_x2apic_edge,
+       .eoi            = ack_x2apic_level,
 #ifdef CONFIG_SMP
-       .set_affinity   = set_ir_ioapic_affinity_irq,
+       .set_affinity   = set_ir_ioapic_affinity_irq,
 #endif
        .retrigger      = ioapic_retrigger_irq,
 };
@@ -1853,6 +2402,8 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
 static inline void init_IO_APIC_traps(void)
 {
        int irq;
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
 
        /*
         * NOTE! The local APIC isn't very good at handling
@@ -1865,8 +2416,8 @@ static inline void init_IO_APIC_traps(void)
         * Also, we've got to be careful not to trash gate
         * 0x80, because int 0x80 is hm, kind of importantish. ;)
         */
-       for (irq = 0; irq < NR_IRQS ; irq++) {
-               if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
+       for_each_irq_cfg(irq, cfg) {
+               if (IO_APIC_IRQ(irq) && !cfg->vector) {
                        /*
                         * Hmm.. We don't have an entry for this,
                         * so default to an old-fashioned 8259
@@ -1874,27 +2425,33 @@ static inline void init_IO_APIC_traps(void)
                         */
                        if (irq < 16)
                                make_8259A_irq(irq);
-                       else
+                       else {
+                               desc = irq_to_desc(irq);
                                /* Strange. Oh, well.. */
-                               irq_desc[irq].chip = &no_irq_chip;
+                               desc->chip = &no_irq_chip;
+                       }
                }
        }
 }
 
-static void unmask_lapic_irq(unsigned int irq)
+/*
+ * The local APIC irq-chip implementation:
+ */
+
+static void mask_lapic_irq(unsigned int irq)
 {
        unsigned long v;
 
        v = apic_read(APIC_LVT0);
-       apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
+       apic_write(APIC_LVT0, v APIC_LVT_MASKED);
 }
 
-static void mask_lapic_irq(unsigned int irq)
+static void unmask_lapic_irq(unsigned int irq)
 {
        unsigned long v;
 
        v = apic_read(APIC_LVT0);
-       apic_write(APIC_LVT0, v APIC_LVT_MASKED);
+       apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
 static void ack_lapic_irq (unsigned int irq)
@@ -1911,7 +2468,10 @@ static struct irq_chip lapic_chip __read_mostly = {
 
 static void lapic_register_intr(int irq)
 {
-       irq_desc[irq].status &= ~IRQ_LEVEL;
+       struct irq_desc *desc;
+
+       desc = irq_to_desc(irq);
+       desc->status &= ~IRQ_LEVEL;
        set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
                                      "edge");
 }
@@ -1919,19 +2479,19 @@ static void lapic_register_intr(int irq)
 static void __init setup_nmi(void)
 {
        /*
-        * Dirty trick to enable the NMI watchdog ...
+        * Dirty trick to enable the NMI watchdog ...
         * We put the 8259A master into AEOI mode and
         * unmask on all local APICs LVT0 as NMI.
         *
         * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
         * is from Maciej W. Rozycki - so we do not have to EOI from
         * the NMI handler or the timer interrupt.
-        */ 
-       printk(KERN_INFO "activating NMI Watchdog ...");
+        */
+       apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
 
        enable_NMI_through_LVT0();
 
-       printk(" done.\n");
+       apic_printk(APIC_VERBOSE, " done.\n");
 }
 
 /*
@@ -1948,12 +2508,17 @@ static inline void __init unlock_ExtINT_logic(void)
        unsigned char save_control, save_freq_select;
 
        pin  = find_isa_irq_pin(8, mp_INT);
+       if (pin == -1) {
+               WARN_ON_ONCE(1);
+               return;
+       }
        apic = find_isa_irq_apic(8, mp_INT);
-       if (pin == -1)
+       if (apic == -1) {
+               WARN_ON_ONCE(1);
                return;
+       }
 
        entry0 = ioapic_read_entry(apic, pin);
-
        clear_IO_APIC_pin(apic, pin);
 
        memset(&entry1, 0, sizeof(entry1));
@@ -1988,23 +2553,38 @@ static inline void __init unlock_ExtINT_logic(void)
        ioapic_write_entry(apic, pin, entry0);
 }
 
+static int disable_timer_pin_1 __initdata;
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
+static int __init disable_timer_pin_setup(char *arg)
+{
+       disable_timer_pin_1 = 1;
+       return 0;
+}
+early_param("disable_timer_pin_1", disable_timer_pin_setup);
+
+int timer_through_8259 __initdata;
+
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
  *
- * FIXME: really need to revamp this for modern platforms only.
+ * FIXME: really need to revamp this for all platforms.
  */
 static inline void __init check_timer(void)
 {
-       struct irq_cfg *cfg = irq_cfg + 0;
+       struct irq_cfg *cfg = irq_cfg(0);
        int apic1, pin1, apic2, pin2;
        unsigned long flags;
+       unsigned int ver;
        int no_pin1 = 0;
 
        local_irq_save(flags);
 
+       ver = apic_read(APIC_LVR);
+       ver = GET_APIC_VERSION(ver);
+
        /*
         * get/set the timer IRQ vector:
         */
@@ -2013,10 +2593,18 @@ static inline void __init check_timer(void)
 
        /*
         * As IRQ0 is to be enabled in the 8259A, the virtual
-        * wire has to be disabled in the local APIC.
+        * wire has to be disabled in the local APIC.  Also
+        * timer interrupts need to be acknowledged manually in
+        * the 8259A for the i82489DX when using the NMI
+        * watchdog as that APIC treats NMIs as level-triggered.
+        * The AEOI mode will finish them in the 8259A
+        * automatically.
         */
        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
        init_8259A(1);
+#ifdef CONFIG_X86_32
+       timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+#endif
 
        pin1  = find_isa_irq_pin(0, mp_INT);
        apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2035,8 +2623,10 @@ static inline void __init check_timer(void)
         * 8259A.
         */
        if (pin1 == -1) {
+#ifdef CONFIG_INTR_REMAP
                if (intr_remapping_enabled)
                        panic("BIOS bug: timer not connected to IO-APIC");
+#endif
                pin1 = pin2;
                apic1 = apic2;
                no_pin1 = 1;
@@ -2054,7 +2644,7 @@ static inline void __init check_timer(void)
                        setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
                }
                unmask_IO_APIC_irq(0);
-               if (!no_timer_check && timer_irq_works()) {
+               if (timer_irq_works()) {
                        if (nmi_watchdog == NMI_IO_APIC) {
                                setup_nmi();
                                enable_8259A_irq(0);
@@ -2063,8 +2653,10 @@ static inline void __init check_timer(void)
                                clear_IO_APIC_pin(0, pin1);
                        goto out;
                }
+#ifdef CONFIG_INTR_REMAP
                if (intr_remapping_enabled)
                        panic("timer doesn't work through Interrupt-remapped IO-APIC");
+#endif
                clear_IO_APIC_pin(apic1, pin1);
                if (!no_pin1)
                        apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2104,6 +2696,9 @@ static inline void __init check_timer(void)
                            "through the IO-APIC - disabling NMI Watchdog!\n");
                nmi_watchdog = NMI_NONE;
        }
+#ifdef CONFIG_X86_32
+       timer_ack = 0;
+#endif
 
        apic_printk(APIC_QUIET, KERN_INFO
                    "...trying to set up timer as Virtual Wire IRQ...\n");
@@ -2140,13 +2735,6 @@ out:
        local_irq_restore(flags);
 }
 
-static int __init notimercheck(char *s)
-{
-       no_timer_check = 1;
-       return 1;
-}
-__setup("no_timer_check", notimercheck);
-
 /*
  * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
  * to devices.  However there may be an I/O APIC pin available for
@@ -2164,25 +2752,49 @@ __setup("no_timer_check", notimercheck);
  * the I/O APIC in all cases now.  No actual device should request
  * it anyway.  --macro
  */
-#define PIC_IRQS       (1<<2)
+#define PIC_IRQS       (1 << PIC_CASCADE_IR)
 
 void __init setup_IO_APIC(void)
 {
 
+#ifdef CONFIG_X86_32
+       enable_IO_APIC();
+#else
        /*
         * calling enable_IO_APIC() is moved to setup_local_APIC for BP
         */
+#endif
 
        io_apic_irqs = ~PIC_IRQS;
 
        apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-
+       /*
+         * Set up IO-APIC IRQ routing.
+         */
+#ifdef CONFIG_X86_32
+       if (!acpi_ioapic)
+               setup_ioapic_ids_from_mpc();
+#endif
        sync_Arb_IDs();
        setup_IO_APIC_irqs();
        init_IO_APIC_traps();
        check_timer();
 }
 
+/*
+ *      Called after all the initialization is done. If we didnt find any
+ *      APIC bugs then we can allow the modify fast path
+ */
+
+static int __init io_apic_bug_finalize(void)
+{
+       if (sis_apic_bug == -1)
+               sis_apic_bug = 0;
+       return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
 struct sysfs_ioapic_data {
        struct sys_device dev;
        struct IO_APIC_route_entry entry[0];
@@ -2270,32 +2882,51 @@ device_initcall(ioapic_init_sysfs);
 /*
  * Dynamic irq allocate and deallocation
  */
-int create_irq(void)
+unsigned int create_irq_nr(unsigned int irq_want)
 {
        /* Allocate an unused irq */
-       int irq;
-       int new;
+       unsigned int irq;
+       unsigned int new;
        unsigned long flags;
+       struct irq_cfg *cfg_new;
+
+       irq_want = nr_irqs - 1;
 
-       irq = -ENOSPC;
+       irq = 0;
        spin_lock_irqsave(&vector_lock, flags);
-       for (new = (NR_IRQS - 1); new >= 0; new--) {
+       for (new = irq_want; new > 0; new--) {
                if (platform_legacy_irq(new))
                        continue;
-               if (irq_cfg[new].vector != 0)
+               cfg_new = irq_cfg(new);
+               if (cfg_new && cfg_new->vector != 0)
                        continue;
+               /* check if need to create one */
+               if (!cfg_new)
+                       cfg_new = irq_cfg_alloc(new);
                if (__assign_irq_vector(new, TARGET_CPUS) == 0)
                        irq = new;
                break;
        }
        spin_unlock_irqrestore(&vector_lock, flags);
 
-       if (irq >= 0) {
+       if (irq > 0) {
                dynamic_irq_init(irq);
        }
        return irq;
 }
 
+int create_irq(void)
+{
+       int irq;
+
+       irq = create_irq_nr(nr_irqs - 1);
+
+       if (irq == 0)
+               irq = -1;
+
+       return irq;
+}
+
 void destroy_irq(unsigned int irq)
 {
        unsigned long flags;
@@ -2316,7 +2947,7 @@ void destroy_irq(unsigned int irq)
 #ifdef CONFIG_PCI_MSI
 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        int err;
        unsigned dest;
        cpumask_t tmp;
@@ -2326,6 +2957,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
        if (err)
                return err;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, tmp);
        dest = cpu_mask_to_apicid(tmp);
 
@@ -2383,10 +3015,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 #ifdef CONFIG_SMP
 static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        struct msi_msg msg;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -2395,6 +3028,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
@@ -2406,7 +3040,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        write_msi_msg(irq, &msg);
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
 }
 
 #ifdef CONFIG_INTR_REMAP
@@ -2416,10 +3051,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
  */
 static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        unsigned int dest;
        cpumask_t tmp, cleanup_mask;
        struct irte irte;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -2431,6 +3067,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
@@ -2454,7 +3091,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
                cfg->move_in_progress = 0;
        }
 
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
 }
 #endif
 #endif /* CONFIG_SMP */
@@ -2507,7 +3145,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
        if (index < 0) {
                printk(KERN_ERR
                       "Unable to allocate %d IRTE for PCI %s\n", nvec,
-                       pci_name(dev));
+                      pci_name(dev));
                return -ENOSPC;
        }
        return index;
@@ -2528,7 +3166,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 
 #ifdef CONFIG_INTR_REMAP
        if (irq_remapped(irq)) {
-               struct irq_desc *desc = irq_desc + irq;
+               struct irq_desc *desc = irq_to_desc(irq);
                /*
                 * irq migration in process context
                 */
@@ -2538,16 +3176,34 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 #endif
                set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 
+       dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
+
        return 0;
 }
 
+static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
+{
+       unsigned int irq;
+
+       irq = dev->bus->number;
+       irq <<= 8;
+       irq |= dev->devfn;
+       irq <<= 12;
+
+       return irq;
+}
+
 int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
-       int irq, ret;
+       unsigned int irq;
+       int ret;
+       unsigned int irq_want;
 
-       irq = create_irq();
-       if (irq < 0)
-               return irq;
+       irq_want = build_irq_for_pci_dev(dev) + 0x100;
+
+       irq = create_irq_nr(irq_want);
+       if (irq == 0)
+               return -1;
 
 #ifdef CONFIG_INTR_REMAP
        if (!intr_remapping_enabled)
@@ -2574,18 +3230,22 @@ error:
 
 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
-       int irq, ret, sub_handle;
+       unsigned int irq;
+       int ret, sub_handle;
        struct msi_desc *desc;
+       unsigned int irq_want;
+
 #ifdef CONFIG_INTR_REMAP
        struct intel_iommu *iommu = 0;
        int index = 0;
 #endif
 
+       irq_want = build_irq_for_pci_dev(dev) + 0x100;
        sub_handle = 0;
        list_for_each_entry(desc, &dev->msi_list, list) {
-               irq = create_irq();
-               if (irq < 0)
-                       return irq;
+               irq = create_irq_nr(irq_want--);
+               if (irq == 0)
+                       return -1;
 #ifdef CONFIG_INTR_REMAP
                if (!intr_remapping_enabled)
                        goto no_ir;
@@ -2636,10 +3296,11 @@ void arch_teardown_msi_irq(unsigned int irq)
 #ifdef CONFIG_SMP
 static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        struct msi_msg msg;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -2648,6 +3309,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
@@ -2659,7 +3321,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        dmar_msi_write(irq, &msg);
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
 }
 #endif /* CONFIG_SMP */
 
@@ -2689,6 +3352,69 @@ int arch_setup_dmar_msi(unsigned int irq)
 }
 #endif
 
+#ifdef CONFIG_HPET_TIMER
+
+#ifdef CONFIG_SMP
+static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+{
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+       struct msi_msg msg;
+       unsigned int dest;
+       cpumask_t tmp;
+
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+               return;
+
+       if (assign_irq_vector(irq, mask))
+               return;
+
+       cfg = irq_cfg(irq);
+       cpus_and(tmp, cfg->domain, mask);
+       dest = cpu_mask_to_apicid(tmp);
+
+       hpet_msi_read(irq, &msg);
+
+       msg.data &= ~MSI_DATA_VECTOR_MASK;
+       msg.data |= MSI_DATA_VECTOR(cfg->vector);
+       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+       hpet_msi_write(irq, &msg);
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
+}
+#endif /* CONFIG_SMP */
+
+struct irq_chip hpet_msi_type = {
+       .name = "HPET_MSI",
+       .unmask = hpet_msi_unmask,
+       .mask = hpet_msi_mask,
+       .ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+       .set_affinity = hpet_msi_set_affinity,
+#endif
+       .retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_hpet_msi(unsigned int irq)
+{
+       int ret;
+       struct msi_msg msg;
+
+       ret = msi_compose_msg(NULL, irq, &msg);
+       if (ret < 0)
+               return ret;
+
+       hpet_msi_write(irq, &msg);
+       set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
+               "edge");
+
+       return 0;
+}
+#endif
+
 #endif /* CONFIG_PCI_MSI */
 /*
  * Hypertransport interrupt support
@@ -2713,9 +3439,10 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -2724,11 +3451,13 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
        target_ht_irq(irq, dest, cfg->vector);
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
 }
 #endif
 
@@ -2745,7 +3474,7 @@ static struct irq_chip ht_irq_chip = {
 
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        int err;
        cpumask_t tmp;
 
@@ -2755,6 +3484,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
                struct ht_irq_msg msg;
                unsigned dest;
 
+               cfg = irq_cfg(irq);
                cpus_and(tmp, cfg->domain, tmp);
                dest = cpu_mask_to_apicid(tmp);
 
@@ -2777,20 +3507,196 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
                set_irq_chip_and_handler_name(irq, &ht_irq_chip,
                                              handle_edge_irq, "edge");
+
+               dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
        }
        return err;
 }
 #endif /* CONFIG_HT_IRQ */
 
+#ifdef CONFIG_X86_64
+/*
+ * Re-target the irq to the specified CPU and enable the specified MMR located
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
+ */
+int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+                      unsigned long mmr_offset)
+{
+       const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
+       struct irq_cfg *cfg;
+       int mmr_pnode;
+       unsigned long mmr_value;
+       struct uv_IO_APIC_route_entry *entry;
+       unsigned long flags;
+       int err;
+
+       err = assign_irq_vector(irq, *eligible_cpu);
+       if (err != 0)
+               return err;
+
+       spin_lock_irqsave(&vector_lock, flags);
+       set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+                                     irq_name);
+       spin_unlock_irqrestore(&vector_lock, flags);
+
+       cfg = irq_cfg(irq);
+
+       mmr_value = 0;
+       entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+       entry->vector = cfg->vector;
+       entry->delivery_mode = INT_DELIVERY_MODE;
+       entry->dest_mode = INT_DEST_MODE;
+       entry->polarity = 0;
+       entry->trigger = 0;
+       entry->mask = 0;
+       entry->dest = cpu_mask_to_apicid(*eligible_cpu);
+
+       mmr_pnode = uv_blade_to_pnode(mmr_blade);
+       uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+       return irq;
+}
+
+/*
+ * Disable the specified MMR located on the specified blade so that MSIs are
+ * longer allowed to be sent.
+ */
+void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+{
+       unsigned long mmr_value;
+       struct uv_IO_APIC_route_entry *entry;
+       int mmr_pnode;
+
+       mmr_value = 0;
+       entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+       entry->mask = 1;
+
+       mmr_pnode = uv_blade_to_pnode(mmr_blade);
+       uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+}
+#endif /* CONFIG_X86_64 */
+
+int __init io_apic_get_redir_entries (int ioapic)
+{
+       union IO_APIC_reg_01    reg_01;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_01.raw = io_apic_read(ioapic, 1);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return reg_01.bits.entries;
+}
+
+int __init probe_nr_irqs(void)
+{
+       int idx;
+       int nr = 0;
+#ifndef CONFIG_XEN
+       int nr_min = 32;
+#else
+       int nr_min = NR_IRQS;
+#endif
+
+       for (idx = 0; idx < nr_ioapics; idx++)
+               nr += io_apic_get_redir_entries(idx) + 1;
+
+       /* double it for hotplug and msi and nmi */
+       nr <<= 1;
+
+       /* something wrong ? */
+       if (nr < nr_min)
+               nr = nr_min;
+
+       return nr;
+}
+
 /* --------------------------------------------------------------------------
                           ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
 
 #ifdef CONFIG_ACPI
 
-#define IO_APIC_MAX_ID         0xFE
+#ifdef CONFIG_X86_32
+int __init io_apic_get_unique_id(int ioapic, int apic_id)
+{
+       union IO_APIC_reg_00 reg_00;
+       static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+       physid_mask_t tmp;
+       unsigned long flags;
+       int i = 0;
+
+       /*
+        * The P4 platform supports up to 256 APIC IDs on two separate APIC
+        * buses (one for LAPICs, one for IOAPICs), where predecessors only
+        * supports up to 16 on one shared APIC bus.
+        *
+        * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+        *      advantage of new APIC bus architecture.
+        */
+
+       if (physids_empty(apic_id_map))
+               apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
 
-int __init io_apic_get_redir_entries (int ioapic)
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_00.raw = io_apic_read(ioapic, 0);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       if (apic_id >= get_physical_broadcast()) {
+               printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+                       "%d\n", ioapic, apic_id, reg_00.bits.ID);
+               apic_id = reg_00.bits.ID;
+       }
+
+       /*
+        * Every APIC in a system must have a unique ID or we get lots of nice
+        * 'stuck on smp_invalidate_needed IPI wait' messages.
+        */
+       if (check_apicid_used(apic_id_map, apic_id)) {
+
+               for (i = 0; i < get_physical_broadcast(); i++) {
+                       if (!check_apicid_used(apic_id_map, i))
+                               break;
+               }
+
+               if (i == get_physical_broadcast())
+                       panic("Max apic_id exceeded!\n");
+
+               printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+                       "trying %d\n", ioapic, apic_id, i);
+
+               apic_id = i;
+       }
+
+       tmp = apicid_to_cpu_present(apic_id);
+       physids_or(apic_id_map, apic_id_map, tmp);
+
+       if (reg_00.bits.ID != apic_id) {
+               reg_00.bits.ID = apic_id;
+
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(ioapic, 0, reg_00.raw);
+               reg_00.raw = io_apic_read(ioapic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               /* Sanity check */
+               if (reg_00.bits.ID != apic_id) {
+                       printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
+                       return -1;
+               }
+       }
+
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+
+       return apic_id;
+}
+
+int __init io_apic_get_version(int ioapic)
 {
        union IO_APIC_reg_01    reg_01;
        unsigned long flags;
@@ -2799,9 +3705,9 @@ int __init io_apic_get_redir_entries (int ioapic)
        reg_01.raw = io_apic_read(ioapic, 1);
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
-       return reg_01.bits.entries;
+       return reg_01.bits.version;
 }
-
+#endif
 
 int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
 {
@@ -2853,6 +3759,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 void __init setup_ioapic_dest(void)
 {
        int pin, ioapic, irq, irq_entry;
+       struct irq_cfg *cfg;
 
        if (skip_ioapic_setup == 1)
                return;
@@ -2868,7 +3775,8 @@ void __init setup_ioapic_dest(void)
                         * when you have too many devices, because at that time only boot
                         * cpu is online.
                         */
-                       if (!irq_cfg[irq].vector)
+                       cfg = irq_cfg(irq);
+                       if (!cfg->vector)
                                setup_IO_APIC_irq(ioapic, pin, irq,
                                                  irq_trigger(irq_entry),
                                                  irq_polarity(irq_entry));
@@ -2926,18 +3834,33 @@ void __init ioapic_init_mappings(void)
        struct resource *ioapic_res;
        int i;
 
+       irq_2_pin_init();
        ioapic_res = ioapic_setup_resources();
        for (i = 0; i < nr_ioapics; i++) {
                if (smp_found_config) {
                        ioapic_phys = mp_ioapics[i].mp_apicaddr;
+#ifdef CONFIG_X86_32
+                       if (!ioapic_phys) {
+                               printk(KERN_ERR
+                                      "WARNING: bogus zero IO-APIC "
+                                      "address found in MPTABLE, "
+                                      "disabling IO/APIC support!\n");
+                               smp_found_config = 0;
+                               skip_ioapic_setup = 1;
+                               goto fake_ioapic_page;
+                       }
+#endif
                } else {
+#ifdef CONFIG_X86_32
+fake_ioapic_page:
+#endif
                        ioapic_phys = (unsigned long)
                                alloc_bootmem_pages(PAGE_SIZE);
                        ioapic_phys = __pa(ioapic_phys);
                }
                set_fixmap_nocache(idx, ioapic_phys);
                apic_printk(APIC_VERBOSE,
-                           "mapped IOAPIC to %016lx (%016lx)\n",
+                           "mapped IOAPIC to %08lx (%08lx)\n",
                            __fix_to_virt(idx), ioapic_phys);
                idx++;
 
@@ -2971,4 +3894,3 @@ static int __init ioapic_insert_resources(void)
 /* Insert the IO APIC resources after PCI initialization has occured to handle
  * IO APICS that are mapped in on a BAR in PCI space. */
 late_initcall(ioapic_insert_resources);
-
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
deleted file mode 100644 (file)
index e710289..0000000
+++ /dev/null
@@ -1,2908 +0,0 @@
-/*
- *     Intel IO-APIC support for multi-Pentium hosts.
- *
- *     Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
- *
- *     Many thanks to Stig Venaas for trying out countless experimental
- *     patches and reporting/debugging problems patiently!
- *
- *     (c) 1999, Multiple IO-APIC support, developed by
- *     Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
- *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
- *     further tested and cleaned up by Zach Brown <zab@redhat.com>
- *     and Ingo Molnar <mingo@redhat.com>
- *
- *     Fixes
- *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
- *                                     thanks to Eric Gilmore
- *                                     and Rolf G. Tews
- *                                     for testing these extensively
- *     Paul Diefenbaugh        :       Added full ACPI support
- */
-
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/mc146818rtc.h>
-#include <linux/compiler.h>
-#include <linux/acpi.h>
-#include <linux/module.h>
-#include <linux/sysdev.h>
-#include <linux/pci.h>
-#include <linux/msi.h>
-#include <linux/htirq.h>
-#include <linux/freezer.h>
-#include <linux/kthread.h>
-#include <linux/jiffies.h>     /* time_after() */
-
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/desc.h>
-#include <asm/timer.h>
-#include <asm/i8259.h>
-#include <asm/nmi.h>
-#include <asm/msidef.h>
-#include <asm/hypertransport.h>
-#include <asm/setup.h>
-
-#include <mach_apic.h>
-#include <mach_apicdef.h>
-
-#define __apicdebuginit(type) static type __init
-
-int (*ioapic_renumber_irq)(int ioapic, int irq);
-atomic_t irq_mis_count;
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-
-static DEFINE_SPINLOCK(ioapic_lock);
-DEFINE_SPINLOCK(vector_lock);
-
-int timer_through_8259 __initdata;
-
-/*
- *     Is the SiS APIC rmw bug present ?
- *     -1 = don't know, 0 = no, 1 = yes
- */
-int sis_apic_bug = -1;
-
-/*
- * # of IRQ routing registers
- */
-int nr_ioapic_registers[MAX_IO_APICS];
-
-/* I/O APIC entries */
-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
-int nr_ioapics;
-
-/* MP IRQ source entries */
-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* # of MP IRQ source entries */
-int mp_irq_entries;
-
-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
-int mp_bus_id_to_type[MAX_MP_BUSSES];
-#endif
-
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-
-static int disable_timer_pin_1 __initdata;
-
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
-static struct irq_pin_list {
-       int apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
-
-struct io_apic {
-       unsigned int index;
-       unsigned int unused[3];
-       unsigned int data;
-};
-
-static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
-{
-       return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
-               + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
-}
-
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
-{
-       struct io_apic __iomem *io_apic = io_apic_base(apic);
-       writel(reg, &io_apic->index);
-       return readl(&io_apic->data);
-}
-
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
-       struct io_apic __iomem *io_apic = io_apic_base(apic);
-       writel(reg, &io_apic->index);
-       writel(value, &io_apic->data);
-}
-
-/*
- * Re-write a value: to be used for read-modify-write
- * cycles where the read already set up the index register.
- *
- * Older SiS APIC requires we rewrite the index register
- */
-static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
-{
-       volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
-       if (sis_apic_bug)
-               writel(reg, &io_apic->index);
-       writel(value, &io_apic->data);
-}
-
-union entry_union {
-       struct { u32 w1, w2; };
-       struct IO_APIC_route_entry entry;
-};
-
-static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
-{
-       union entry_union eu;
-       unsigned long flags;
-       spin_lock_irqsave(&ioapic_lock, flags);
-       eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
-       eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-       return eu.entry;
-}
-
-/*
- * When we write a new IO APIC routing entry, we need to write the high
- * word first! If the mask bit in the low word is clear, we will enable
- * the interrupt, and we need to make sure the entry is fully populated
- * before that happens.
- */
-static void
-__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
-       union entry_union eu;
-       eu.entry = e;
-       io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-       io_apic_write(apic, 0x10 + 2*pin, eu.w1);
-}
-
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
-       unsigned long flags;
-       spin_lock_irqsave(&ioapic_lock, flags);
-       __ioapic_write_entry(apic, pin, e);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-/*
- * When we mask an IO APIC routing entry, we need to write the low
- * word first, in order to set the mask bit before we change the
- * high bits!
- */
-static void ioapic_mask_entry(int apic, int pin)
-{
-       unsigned long flags;
-       union entry_union eu = { .entry.mask = 1 };
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(apic, 0x10 + 2*pin, eu.w1);
-       io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-/*
- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
- * shared ISA-space IRQs, so we have to support them. We are super
- * fast in the common case, and fast for shared ISA-space IRQs.
- */
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
-{
-       static int first_free_entry = NR_IRQS;
-       struct irq_pin_list *entry = irq_2_pin + irq;
-
-       while (entry->next)
-               entry = irq_2_pin + entry->next;
-
-       if (entry->pin != -1) {
-               entry->next = first_free_entry;
-               entry = irq_2_pin + entry->next;
-               if (++first_free_entry >= PIN_MAP_SIZE)
-                       panic("io_apic.c: whoops");
-       }
-       entry->apic = apic;
-       entry->pin = pin;
-}
-
-/*
- * Reroute an IRQ to a different pin.
- */
-static void __init replace_pin_at_irq(unsigned int irq,
-                                     int oldapic, int oldpin,
-                                     int newapic, int newpin)
-{
-       struct irq_pin_list *entry = irq_2_pin + irq;
-
-       while (1) {
-               if (entry->apic == oldapic && entry->pin == oldpin) {
-                       entry->apic = newapic;
-                       entry->pin = newpin;
-               }
-               if (!entry->next)
-                       break;
-               entry = irq_2_pin + entry->next;
-       }
-}
-
-static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
-{
-       struct irq_pin_list *entry = irq_2_pin + irq;
-       unsigned int pin, reg;
-
-       for (;;) {
-               pin = entry->pin;
-               if (pin == -1)
-                       break;
-               reg = io_apic_read(entry->apic, 0x10 + pin*2);
-               reg &= ~disable;
-               reg |= enable;
-               io_apic_modify(entry->apic, 0x10 + pin*2, reg);
-               if (!entry->next)
-                       break;
-               entry = irq_2_pin + entry->next;
-       }
-}
-
-/* mask = 1 */
-static void __mask_IO_APIC_irq(unsigned int irq)
-{
-       __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
-}
-
-/* mask = 0 */
-static void __unmask_IO_APIC_irq(unsigned int irq)
-{
-       __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
-}
-
-/* mask = 1, trigger = 0 */
-static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
-{
-       __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
-                               IO_APIC_REDIR_LEVEL_TRIGGER);
-}
-
-/* mask = 0, trigger = 1 */
-static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
-{
-       __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
-                               IO_APIC_REDIR_MASKED);
-}
-
-static void mask_IO_APIC_irq(unsigned int irq)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       __mask_IO_APIC_irq(irq);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void unmask_IO_APIC_irq(unsigned int irq)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       __unmask_IO_APIC_irq(irq);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
-{
-       struct IO_APIC_route_entry entry;
-
-       /* Check delivery_mode to be sure we're not clearing an SMI pin */
-       entry = ioapic_read_entry(apic, pin);
-       if (entry.delivery_mode == dest_SMI)
-               return;
-
-       /*
-        * Disable it in the IO-APIC irq-routing table:
-        */
-       ioapic_mask_entry(apic, pin);
-}
-
-static void clear_IO_APIC(void)
-{
-       int apic, pin;
-
-       for (apic = 0; apic < nr_ioapics; apic++)
-               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
-                       clear_IO_APIC_pin(apic, pin);
-}
-
-#ifdef CONFIG_SMP
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
-{
-       unsigned long flags;
-       int pin;
-       struct irq_pin_list *entry = irq_2_pin + irq;
-       unsigned int apicid_value;
-       cpumask_t tmp;
-
-       cpus_and(tmp, cpumask, cpu_online_map);
-       if (cpus_empty(tmp))
-               tmp = TARGET_CPUS;
-
-       cpus_and(cpumask, tmp, CPU_MASK_ALL);
-
-       apicid_value = cpu_mask_to_apicid(cpumask);
-       /* Prepare to do the io_apic_write */
-       apicid_value = apicid_value << 24;
-       spin_lock_irqsave(&ioapic_lock, flags);
-       for (;;) {
-               pin = entry->pin;
-               if (pin == -1)
-                       break;
-               io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
-               if (!entry->next)
-                       break;
-               entry = irq_2_pin + entry->next;
-       }
-       irq_desc[irq].affinity = cpumask;
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#if defined(CONFIG_IRQBALANCE)
-# include <asm/processor.h>    /* kernel_thread() */
-# include <linux/kernel_stat.h>        /* kstat */
-# include <linux/slab.h>               /* kmalloc() */
-# include <linux/timer.h>
-
-#define IRQBALANCE_CHECK_ARCH -999
-#define MAX_BALANCED_IRQ_INTERVAL      (5*HZ)
-#define MIN_BALANCED_IRQ_INTERVAL      (HZ/2)
-#define BALANCED_IRQ_MORE_DELTA                (HZ/10)
-#define BALANCED_IRQ_LESS_DELTA                (HZ)
-
-static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
-static int physical_balance __read_mostly;
-static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
-
-static struct irq_cpu_info {
-       unsigned long *last_irq;
-       unsigned long *irq_delta;
-       unsigned long irq;
-} irq_cpu_data[NR_CPUS];
-
-#define CPU_IRQ(cpu)           (irq_cpu_data[cpu].irq)
-#define LAST_CPU_IRQ(cpu, irq)   (irq_cpu_data[cpu].last_irq[irq])
-#define IRQ_DELTA(cpu, irq)    (irq_cpu_data[cpu].irq_delta[irq])
-
-#define IDLE_ENOUGH(cpu,now) \
-       (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
-
-#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
-
-#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
-
-static cpumask_t balance_irq_affinity[NR_IRQS] = {
-       [0 ... NR_IRQS-1] = CPU_MASK_ALL
-};
-
-void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-       balance_irq_affinity[irq] = mask;
-}
-
-static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
-                       unsigned long now, int direction)
-{
-       int search_idle = 1;
-       int cpu = curr_cpu;
-
-       goto inside;
-
-       do {
-               if (unlikely(cpu == curr_cpu))
-                       search_idle = 0;
-inside:
-               if (direction == 1) {
-                       cpu++;
-                       if (cpu >= NR_CPUS)
-                               cpu = 0;
-               } else {
-                       cpu--;
-                       if (cpu == -1)
-                               cpu = NR_CPUS-1;
-               }
-       } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
-                       (search_idle && !IDLE_ENOUGH(cpu, now)));
-
-       return cpu;
-}
-
-static inline void balance_irq(int cpu, int irq)
-{
-       unsigned long now = jiffies;
-       cpumask_t allowed_mask;
-       unsigned int new_cpu;
-
-       if (irqbalance_disabled)
-               return;
-
-       cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
-       new_cpu = move(cpu, allowed_mask, now, 1);
-       if (cpu != new_cpu)
-               set_pending_irq(irq, cpumask_of_cpu(new_cpu));
-}
-
-static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
-{
-       int i, j;
-
-       for_each_online_cpu(i) {
-               for (j = 0; j < NR_IRQS; j++) {
-                       if (!irq_desc[j].action)
-                               continue;
-                       /* Is it a significant load ?  */
-                       if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
-                                               useful_load_threshold)
-                               continue;
-                       balance_irq(i, j);
-               }
-       }
-       balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
-               balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
-       return;
-}
-
-static void do_irq_balance(void)
-{
-       int i, j;
-       unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
-       unsigned long move_this_load = 0;
-       int max_loaded = 0, min_loaded = 0;
-       int load;
-       unsigned long useful_load_threshold = balanced_irq_interval + 10;
-       int selected_irq;
-       int tmp_loaded, first_attempt = 1;
-       unsigned long tmp_cpu_irq;
-       unsigned long imbalance = 0;
-       cpumask_t allowed_mask, target_cpu_mask, tmp;
-
-       for_each_possible_cpu(i) {
-               int package_index;
-               CPU_IRQ(i) = 0;
-               if (!cpu_online(i))
-                       continue;
-               package_index = CPU_TO_PACKAGEINDEX(i);
-               for (j = 0; j < NR_IRQS; j++) {
-                       unsigned long value_now, delta;
-                       /* Is this an active IRQ or balancing disabled ? */
-                       if (!irq_desc[j].action || irq_balancing_disabled(j))
-                               continue;
-                       if (package_index == i)
-                               IRQ_DELTA(package_index, j) = 0;
-                       /* Determine the total count per processor per IRQ */
-                       value_now = (unsigned long) kstat_cpu(i).irqs[j];
-
-                       /* Determine the activity per processor per IRQ */
-                       delta = value_now - LAST_CPU_IRQ(i, j);
-
-                       /* Update last_cpu_irq[][] for the next time */
-                       LAST_CPU_IRQ(i, j) = value_now;
-
-                       /* Ignore IRQs whose rate is less than the clock */
-                       if (delta < useful_load_threshold)
-                               continue;
-                       /* update the load for the processor or package total */
-                       IRQ_DELTA(package_index, j) += delta;
-
-                       /* Keep track of the higher numbered sibling as well */
-                       if (i != package_index)
-                               CPU_IRQ(i) += delta;
-                       /*
-                        * We have sibling A and sibling B in the package
-                        *
-                        * cpu_irq[A] = load for cpu A + load for cpu B
-                        * cpu_irq[B] = load for cpu B
-                        */
-                       CPU_IRQ(package_index) += delta;
-               }
-       }
-       /* Find the least loaded processor package */
-       for_each_online_cpu(i) {
-               if (i != CPU_TO_PACKAGEINDEX(i))
-                       continue;
-               if (min_cpu_irq > CPU_IRQ(i)) {
-                       min_cpu_irq = CPU_IRQ(i);
-                       min_loaded = i;
-               }
-       }
-       max_cpu_irq = ULONG_MAX;
-
-tryanothercpu:
-       /*
-        * Look for heaviest loaded processor.
-        * We may come back to get the next heaviest loaded processor.
-        * Skip processors with trivial loads.
-        */
-       tmp_cpu_irq = 0;
-       tmp_loaded = -1;
-       for_each_online_cpu(i) {
-               if (i != CPU_TO_PACKAGEINDEX(i))
-                       continue;
-               if (max_cpu_irq <= CPU_IRQ(i))
-                       continue;
-               if (tmp_cpu_irq < CPU_IRQ(i)) {
-                       tmp_cpu_irq = CPU_IRQ(i);
-                       tmp_loaded = i;
-               }
-       }
-
-       if (tmp_loaded == -1) {
-        /*
-         * In the case of small number of heavy interrupt sources,
-         * loading some of the cpus too much. We use Ingo's original
-         * approach to rotate them around.
-         */
-               if (!first_attempt && imbalance >= useful_load_threshold) {
-                       rotate_irqs_among_cpus(useful_load_threshold);
-                       return;
-               }
-               goto not_worth_the_effort;
-       }
-
-       first_attempt = 0;              /* heaviest search */
-       max_cpu_irq = tmp_cpu_irq;      /* load */
-       max_loaded = tmp_loaded;        /* processor */
-       imbalance = (max_cpu_irq - min_cpu_irq) / 2;
-
-       /*
-        * if imbalance is less than approx 10% of max load, then
-        * observe diminishing returns action. - quit
-        */
-       if (imbalance < (max_cpu_irq >> 3))
-               goto not_worth_the_effort;
-
-tryanotherirq:
-       /* if we select an IRQ to move that can't go where we want, then
-        * see if there is another one to try.
-        */
-       move_this_load = 0;
-       selected_irq = -1;
-       for (j = 0; j < NR_IRQS; j++) {
-               /* Is this an active IRQ? */
-               if (!irq_desc[j].action)
-                       continue;
-               if (imbalance <= IRQ_DELTA(max_loaded, j))
-                       continue;
-               /* Try to find the IRQ that is closest to the imbalance
-                * without going over.
-                */
-               if (move_this_load < IRQ_DELTA(max_loaded, j)) {
-                       move_this_load = IRQ_DELTA(max_loaded, j);
-                       selected_irq = j;
-               }
-       }
-       if (selected_irq == -1)
-               goto tryanothercpu;
-
-       imbalance = move_this_load;
-
-       /* For physical_balance case, we accumulated both load
-        * values in the one of the siblings cpu_irq[],
-        * to use the same code for physical and logical processors
-        * as much as possible.
-        *
-        * NOTE: the cpu_irq[] array holds the sum of the load for
-        * sibling A and sibling B in the slot for the lowest numbered
-        * sibling (A), _AND_ the load for sibling B in the slot for
-        * the higher numbered sibling.
-        *
-        * We seek the least loaded sibling by making the comparison
-        * (A+B)/2 vs B
-        */
-       load = CPU_IRQ(min_loaded) >> 1;
-       for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
-               if (load > CPU_IRQ(j)) {
-                       /* This won't change cpu_sibling_map[min_loaded] */
-                       load = CPU_IRQ(j);
-                       min_loaded = j;
-               }
-       }
-
-       cpus_and(allowed_mask,
-               cpu_online_map,
-               balance_irq_affinity[selected_irq]);
-       target_cpu_mask = cpumask_of_cpu(min_loaded);
-       cpus_and(tmp, target_cpu_mask, allowed_mask);
-
-       if (!cpus_empty(tmp)) {
-               /* mark for change destination */
-               set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
-
-               /* Since we made a change, come back sooner to
-                * check for more variation.
-                */
-               balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
-                       balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
-               return;
-       }
-       goto tryanotherirq;
-
-not_worth_the_effort:
-       /*
-        * if we did not find an IRQ to move, then adjust the time interval
-        * upward
-        */
-       balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
-               balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
-       return;
-}
-
-static int balanced_irq(void *unused)
-{
-       int i;
-       unsigned long prev_balance_time = jiffies;
-       long time_remaining = balanced_irq_interval;
-
-       /* push everything to CPU 0 to give us a starting point.  */
-       for (i = 0 ; i < NR_IRQS ; i++) {
-               irq_desc[i].pending_mask = cpumask_of_cpu(0);
-               set_pending_irq(i, cpumask_of_cpu(0));
-       }
-
-       set_freezable();
-       for ( ; ; ) {
-               time_remaining = schedule_timeout_interruptible(time_remaining);
-               try_to_freeze();
-               if (time_after(jiffies,
-                               prev_balance_time+balanced_irq_interval)) {
-                       preempt_disable();
-                       do_irq_balance();
-                       prev_balance_time = jiffies;
-                       time_remaining = balanced_irq_interval;
-                       preempt_enable();
-               }
-       }
-       return 0;
-}
-
-static int __init balanced_irq_init(void)
-{
-       int i;
-       struct cpuinfo_x86 *c;
-       cpumask_t tmp;
-
-       cpus_shift_right(tmp, cpu_online_map, 2);
-       c = &boot_cpu_data;
-       /* When not overwritten by the command line ask subarchitecture. */
-       if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
-               irqbalance_disabled = NO_BALANCE_IRQ;
-       if (irqbalance_disabled)
-               return 0;
-
-        /* disable irqbalance completely if there is only one processor online */
-       if (num_online_cpus() < 2) {
-               irqbalance_disabled = 1;
-               return 0;
-       }
-       /*
-        * Enable physical balance only if more than 1 physical processor
-        * is present
-        */
-       if (smp_num_siblings > 1 && !cpus_empty(tmp))
-               physical_balance = 1;
-
-       for_each_online_cpu(i) {
-               irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
-               irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
-               if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
-                       printk(KERN_ERR "balanced_irq_init: out of memory");
-                       goto failed;
-               }
-       }
-
-       printk(KERN_INFO "Starting balanced_irq\n");
-       if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
-               return 0;
-       printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
-failed:
-       for_each_possible_cpu(i) {
-               kfree(irq_cpu_data[i].irq_delta);
-               irq_cpu_data[i].irq_delta = NULL;
-               kfree(irq_cpu_data[i].last_irq);
-               irq_cpu_data[i].last_irq = NULL;
-       }
-       return 0;
-}
-
-int __devinit irqbalance_disable(char *str)
-{
-       irqbalance_disabled = 1;
-       return 1;
-}
-
-__setup("noirqbalance", irqbalance_disable);
-
-late_initcall(balanced_irq_init);
-#endif /* CONFIG_IRQBALANCE */
-#endif /* CONFIG_SMP */
-
-#ifndef CONFIG_SMP
-void send_IPI_self(int vector)
-{
-       unsigned int cfg;
-
-       /*
-        * Wait for idle.
-        */
-       apic_wait_icr_idle();
-       cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
-       /*
-        * Send the IPI. The write to APIC_ICR fires this off.
-        */
-       apic_write(APIC_ICR, cfg);
-}
-#endif /* !CONFIG_SMP */
-
-
-/*
- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
- * specific CPU-side IRQs.
- */
-
-#define MAX_PIRQS 8
-static int pirq_entries [MAX_PIRQS];
-static int pirqs_enabled;
-int skip_ioapic_setup;
-
-static int __init ioapic_pirq_setup(char *str)
-{
-       int i, max;
-       int ints[MAX_PIRQS+1];
-
-       get_options(str, ARRAY_SIZE(ints), ints);
-
-       for (i = 0; i < MAX_PIRQS; i++)
-               pirq_entries[i] = -1;
-
-       pirqs_enabled = 1;
-       apic_printk(APIC_VERBOSE, KERN_INFO
-                       "PIRQ redirection, working around broken MP-BIOS.\n");
-       max = MAX_PIRQS;
-       if (ints[0] < MAX_PIRQS)
-               max = ints[0];
-
-       for (i = 0; i < max; i++) {
-               apic_printk(APIC_VERBOSE, KERN_DEBUG
-                               "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
-               /*
-                * PIRQs are mapped upside down, usually.
-                */
-               pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
-       }
-       return 1;
-}
-
-__setup("pirq=", ioapic_pirq_setup);
-
-/*
- * Find the IRQ entry number of a certain pin.
- */
-static int find_irq_entry(int apic, int pin, int type)
-{
-       int i;
-
-       for (i = 0; i < mp_irq_entries; i++)
-               if (mp_irqs[i].mp_irqtype == type &&
-                   (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
-                    mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
-                   mp_irqs[i].mp_dstirq == pin)
-                       return i;
-
-       return -1;
-}
-
-/*
- * Find the pin to which IRQ[irq] (ISA) is connected
- */
-static int __init find_isa_irq_pin(int irq, int type)
-{
-       int i;
-
-       for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].mp_srcbus;
-
-               if (test_bit(lbus, mp_bus_not_pci) &&
-                   (mp_irqs[i].mp_irqtype == type) &&
-                   (mp_irqs[i].mp_srcbusirq == irq))
-
-                       return mp_irqs[i].mp_dstirq;
-       }
-       return -1;
-}
-
-static int __init find_isa_irq_apic(int irq, int type)
-{
-       int i;
-
-       for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].mp_srcbus;
-
-               if (test_bit(lbus, mp_bus_not_pci) &&
-                   (mp_irqs[i].mp_irqtype == type) &&
-                   (mp_irqs[i].mp_srcbusirq == irq))
-                       break;
-       }
-       if (i < mp_irq_entries) {
-               int apic;
-               for (apic = 0; apic < nr_ioapics; apic++) {
-                       if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
-                               return apic;
-               }
-       }
-
-       return -1;
-}
-
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
-       int apic, i, best_guess = -1;
-
-       apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
-               "slot:%d, pin:%d.\n", bus, slot, pin);
-       if (test_bit(bus, mp_bus_not_pci)) {
-               printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
-               return -1;
-       }
-       for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].mp_srcbus;
-
-               for (apic = 0; apic < nr_ioapics; apic++)
-                       if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
-                           mp_irqs[i].mp_dstapic == MP_APIC_ALL)
-                               break;
-
-               if (!test_bit(lbus, mp_bus_not_pci) &&
-                   !mp_irqs[i].mp_irqtype &&
-                   (bus == lbus) &&
-                   (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
-                       int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq);
-
-                       if (!(apic || IO_APIC_IRQ(irq)))
-                               continue;
-
-                       if (pin == (mp_irqs[i].mp_srcbusirq & 3))
-                               return irq;
-                       /*
-                        * Use the first all-but-pin matching entry as a
-                        * best-guess fuzzy result for broken mptables.
-                        */
-                       if (best_guess < 0)
-                               best_guess = irq;
-               }
-       }
-       return best_guess;
-}
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
-/*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be TARGET_CPUS
- */
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
-       int pin, ioapic, irq, irq_entry;
-
-       if (skip_ioapic_setup == 1)
-               return;
-
-       for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-               for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-                       irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-                       if (irq_entry == -1)
-                               continue;
-                       irq = pin_2_irq(irq_entry, ioapic, pin);
-                       set_ioapic_affinity_irq(irq, TARGET_CPUS);
-               }
-
-       }
-}
-#endif
-
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-/*
- * EISA Edge/Level control register, ELCR
- */
-static int EISA_ELCR(unsigned int irq)
-{
-       if (irq < 16) {
-               unsigned int port = 0x4d0 + (irq >> 3);
-               return (inb(port) >> (irq & 7)) & 1;
-       }
-       apic_printk(APIC_VERBOSE, KERN_INFO
-                       "Broken MPtable reports ISA irq %d\n", irq);
-       return 0;
-}
-#endif
-
-/* ISA interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_ISA_trigger(idx)       (0)
-#define default_ISA_polarity(idx)      (0)
-
-/* EISA interrupts are always polarity zero and can be edge or level
- * trigger depending on the ELCR value.  If an interrupt is listed as
- * EISA conforming in the MP table, that means its trigger type must
- * be read in from the ELCR */
-
-#define default_EISA_trigger(idx)      (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
-#define default_EISA_polarity(idx)     default_ISA_polarity(idx)
-
-/* PCI interrupts are always polarity one level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_PCI_trigger(idx)       (1)
-#define default_PCI_polarity(idx)      (1)
-
-/* MCA interrupts are always polarity zero level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_MCA_trigger(idx)       (1)
-#define default_MCA_polarity(idx)      default_ISA_polarity(idx)
-
-static int MPBIOS_polarity(int idx)
-{
-       int bus = mp_irqs[idx].mp_srcbus;
-       int polarity;
-
-       /*
-        * Determine IRQ line polarity (high active or low active):
-        */
-       switch (mp_irqs[idx].mp_irqflag & 3) {
-       case 0: /* conforms, ie. bus-type dependent polarity */
-       {
-               polarity = test_bit(bus, mp_bus_not_pci)?
-                       default_ISA_polarity(idx):
-                       default_PCI_polarity(idx);
-               break;
-       }
-       case 1: /* high active */
-       {
-               polarity = 0;
-               break;
-       }
-       case 2: /* reserved */
-       {
-               printk(KERN_WARNING "broken BIOS!!\n");
-               polarity = 1;
-               break;
-       }
-       case 3: /* low active */
-       {
-               polarity = 1;
-               break;
-       }
-       default: /* invalid */
-       {
-               printk(KERN_WARNING "broken BIOS!!\n");
-               polarity = 1;
-               break;
-       }
-       }
-       return polarity;
-}
-
-static int MPBIOS_trigger(int idx)
-{
-       int bus = mp_irqs[idx].mp_srcbus;
-       int trigger;
-
-       /*
-        * Determine IRQ trigger mode (edge or level sensitive):
-        */
-       switch ((mp_irqs[idx].mp_irqflag>>2) & 3) {
-       case 0: /* conforms, ie. bus-type dependent */
-       {
-               trigger = test_bit(bus, mp_bus_not_pci)?
-                               default_ISA_trigger(idx):
-                               default_PCI_trigger(idx);
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-               switch (mp_bus_id_to_type[bus]) {
-               case MP_BUS_ISA: /* ISA pin */
-               {
-                       /* set before the switch */
-                       break;
-               }
-               case MP_BUS_EISA: /* EISA pin */
-               {
-                       trigger = default_EISA_trigger(idx);
-                       break;
-               }
-               case MP_BUS_PCI: /* PCI pin */
-               {
-                       /* set before the switch */
-                       break;
-               }
-               case MP_BUS_MCA: /* MCA pin */
-               {
-                       trigger = default_MCA_trigger(idx);
-                       break;
-               }
-               default:
-               {
-                       printk(KERN_WARNING "broken BIOS!!\n");
-                       trigger = 1;
-                       break;
-               }
-       }
-#endif
-               break;
-       }
-       case 1: /* edge */
-       {
-               trigger = 0;
-               break;
-       }
-       case 2: /* reserved */
-       {
-               printk(KERN_WARNING "broken BIOS!!\n");
-               trigger = 1;
-               break;
-       }
-       case 3: /* level */
-       {
-               trigger = 1;
-               break;
-       }
-       default: /* invalid */
-       {
-               printk(KERN_WARNING "broken BIOS!!\n");
-               trigger = 0;
-               break;
-       }
-       }
-       return trigger;
-}
-
-static inline int irq_polarity(int idx)
-{
-       return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
-       return MPBIOS_trigger(idx);
-}
-
-static int pin_2_irq(int idx, int apic, int pin)
-{
-       int irq, i;
-       int bus = mp_irqs[idx].mp_srcbus;
-
-       /*
-        * Debugging check, we are in big trouble if this message pops up!
-        */
-       if (mp_irqs[idx].mp_dstirq != pin)
-               printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
-
-       if (test_bit(bus, mp_bus_not_pci))
-               irq = mp_irqs[idx].mp_srcbusirq;
-       else {
-               /*
-                * PCI IRQs are mapped in order
-                */
-               i = irq = 0;
-               while (i < apic)
-                       irq += nr_ioapic_registers[i++];
-               irq += pin;
-
-               /*
-                * For MPS mode, so far only needed by ES7000 platform
-                */
-               if (ioapic_renumber_irq)
-                       irq = ioapic_renumber_irq(apic, irq);
-       }
-
-       /*
-        * PCI IRQ command line redirection. Yes, limits are hardcoded.
-        */
-       if ((pin >= 16) && (pin <= 23)) {
-               if (pirq_entries[pin-16] != -1) {
-                       if (!pirq_entries[pin-16]) {
-                               apic_printk(APIC_VERBOSE, KERN_DEBUG
-                                               "disabling PIRQ%d\n", pin-16);
-                       } else {
-                               irq = pirq_entries[pin-16];
-                               apic_printk(APIC_VERBOSE, KERN_DEBUG
-                                               "using PIRQ%d -> IRQ %d\n",
-                                               pin-16, irq);
-                       }
-               }
-       }
-       return irq;
-}
-
-static inline int IO_APIC_irq_trigger(int irq)
-{
-       int apic, idx, pin;
-
-       for (apic = 0; apic < nr_ioapics; apic++) {
-               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-                       idx = find_irq_entry(apic, pin, mp_INT);
-                       if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
-                               return irq_trigger(idx);
-               }
-       }
-       /*
-        * nonexistent IRQs are edge default
-        */
-       return 0;
-}
-
-/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
-
-static int __assign_irq_vector(int irq)
-{
-       static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
-       int vector, offset;
-
-       BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
-
-       if (irq_vector[irq] > 0)
-               return irq_vector[irq];
-
-       vector = current_vector;
-       offset = current_offset;
-next:
-       vector += 8;
-       if (vector >= first_system_vector) {
-               offset = (offset + 1) % 8;
-               vector = FIRST_DEVICE_VECTOR + offset;
-       }
-       if (vector == current_vector)
-               return -ENOSPC;
-       if (test_and_set_bit(vector, used_vectors))
-               goto next;
-
-       current_vector = vector;
-       current_offset = offset;
-       irq_vector[irq] = vector;
-
-       return vector;
-}
-
-static int assign_irq_vector(int irq)
-{
-       unsigned long flags;
-       int vector;
-
-       spin_lock_irqsave(&vector_lock, flags);
-       vector = __assign_irq_vector(irq);
-       spin_unlock_irqrestore(&vector_lock, flags);
-
-       return vector;
-}
-
-static struct irq_chip ioapic_chip;
-
-#define IOAPIC_AUTO    -1
-#define IOAPIC_EDGE    0
-#define IOAPIC_LEVEL   1
-
-static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
-{
-       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-           trigger == IOAPIC_LEVEL) {
-               irq_desc[irq].status |= IRQ_LEVEL;
-               set_irq_chip_and_handler_name(irq, &ioapic_chip,
-                                        handle_fasteoi_irq, "fasteoi");
-       } else {
-               irq_desc[irq].status &= ~IRQ_LEVEL;
-               set_irq_chip_and_handler_name(irq, &ioapic_chip,
-                                        handle_edge_irq, "edge");
-       }
-       set_intr_gate(vector, interrupt[irq]);
-}
-
-static void __init setup_IO_APIC_irqs(void)
-{
-       struct IO_APIC_route_entry entry;
-       int apic, pin, idx, irq, first_notcon = 1, vector;
-
-       apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
-
-       for (apic = 0; apic < nr_ioapics; apic++) {
-       for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
-               /*
-                * add it to the IO-APIC irq-routing table:
-                */
-               memset(&entry, 0, sizeof(entry));
-
-               entry.delivery_mode = INT_DELIVERY_MODE;
-               entry.dest_mode = INT_DEST_MODE;
-               entry.mask = 0;                         /* enable IRQ */
-               entry.dest.logical.logical_dest =
-                                       cpu_mask_to_apicid(TARGET_CPUS);
-
-               idx = find_irq_entry(apic, pin, mp_INT);
-               if (idx == -1) {
-                       if (first_notcon) {
-                               apic_printk(APIC_VERBOSE, KERN_DEBUG
-                                               " IO-APIC (apicid-pin) %d-%d",
-                                               mp_ioapics[apic].mp_apicid,
-                                               pin);
-                               first_notcon = 0;
-                       } else
-                               apic_printk(APIC_VERBOSE, ", %d-%d",
-                                       mp_ioapics[apic].mp_apicid, pin);
-                       continue;
-               }
-
-               if (!first_notcon) {
-                       apic_printk(APIC_VERBOSE, " not connected.\n");
-                       first_notcon = 1;
-               }
-
-               entry.trigger = irq_trigger(idx);
-               entry.polarity = irq_polarity(idx);
-
-               if (irq_trigger(idx)) {
-                       entry.trigger = 1;
-                       entry.mask = 1;
-               }
-
-               irq = pin_2_irq(idx, apic, pin);
-               /*
-                * skip adding the timer int on secondary nodes, which causes
-                * a small but painful rift in the time-space continuum
-                */
-               if (multi_timer_check(apic, irq))
-                       continue;
-               else
-                       add_pin_to_irq(irq, apic, pin);
-
-               if (!apic && !IO_APIC_IRQ(irq))
-                       continue;
-
-               if (IO_APIC_IRQ(irq)) {
-                       vector = assign_irq_vector(irq);
-                       entry.vector = vector;
-                       ioapic_register_intr(irq, vector, IOAPIC_AUTO);
-
-                       if (!apic && (irq < 16))
-                               disable_8259A_irq(irq);
-               }
-               ioapic_write_entry(apic, pin, entry);
-       }
-       }
-
-       if (!first_notcon)
-               apic_printk(APIC_VERBOSE, " not connected.\n");
-}
-
-/*
- * Set up the timer pin, possibly with the 8259A-master behind.
- */
-static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
-                                       int vector)
-{
-       struct IO_APIC_route_entry entry;
-
-       memset(&entry, 0, sizeof(entry));
-
-       /*
-        * We use logical delivery to get the timer IRQ
-        * to the first CPU.
-        */
-       entry.dest_mode = INT_DEST_MODE;
-       entry.mask = 1;                                 /* mask IRQ now */
-       entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
-       entry.delivery_mode = INT_DELIVERY_MODE;
-       entry.polarity = 0;
-       entry.trigger = 0;
-       entry.vector = vector;
-
-       /*
-        * The timer IRQ doesn't have to know that behind the
-        * scene we may have a 8259A-master in AEOI mode ...
-        */
-       ioapic_register_intr(0, vector, IOAPIC_EDGE);
-
-       /*
-        * Add it to the IO-APIC irq-routing table:
-        */
-       ioapic_write_entry(apic, pin, entry);
-}
-
-
-__apicdebuginit(void) print_IO_APIC(void)
-{
-       int apic, i;
-       union IO_APIC_reg_00 reg_00;
-       union IO_APIC_reg_01 reg_01;
-       union IO_APIC_reg_02 reg_02;
-       union IO_APIC_reg_03 reg_03;
-       unsigned long flags;
-
-       if (apic_verbosity == APIC_QUIET)
-               return;
-
-       printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
-       for (i = 0; i < nr_ioapics; i++)
-               printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-                      mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
-
-       /*
-        * We are a bit conservative about what we expect.  We have to
-        * know about every hardware change ASAP.
-        */
-       printk(KERN_INFO "testing the IO APIC.......................\n");
-
-       for (apic = 0; apic < nr_ioapics; apic++) {
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       reg_00.raw = io_apic_read(apic, 0);
-       reg_01.raw = io_apic_read(apic, 1);
-       if (reg_01.bits.version >= 0x10)
-               reg_02.raw = io_apic_read(apic, 2);
-       if (reg_01.bits.version >= 0x20)
-               reg_03.raw = io_apic_read(apic, 3);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-
-       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
-       printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
-       printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
-       printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
-       printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
-
-       printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
-       printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
-
-       printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
-       printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
-
-       /*
-        * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
-        * but the value of reg_02 is read as the previous read register
-        * value, so ignore it if reg_02 == reg_01.
-        */
-       if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
-               printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
-               printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
-       }
-
-       /*
-        * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
-        * or reg_03, but the value of reg_0[23] is read as the previous read
-        * register value, so ignore it if reg_03 == reg_0[12].
-        */
-       if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
-           reg_03.raw != reg_01.raw) {
-               printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
-               printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
-       }
-
-       printk(KERN_DEBUG ".... IRQ redirection table:\n");
-
-       printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
-                         " Stat Dest Deli Vect:   \n");
-
-       for (i = 0; i <= reg_01.bits.entries; i++) {
-               struct IO_APIC_route_entry entry;
-
-               entry = ioapic_read_entry(apic, i);
-
-               printk(KERN_DEBUG " %02x %03X %02X  ",
-                       i,
-                       entry.dest.logical.logical_dest,
-                       entry.dest.physical.physical_dest
-               );
-
-               printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
-                       entry.mask,
-                       entry.trigger,
-                       entry.irr,
-                       entry.polarity,
-                       entry.delivery_status,
-                       entry.dest_mode,
-                       entry.delivery_mode,
-                       entry.vector
-               );
-       }
-       }
-       printk(KERN_DEBUG "IRQ to pin mappings:\n");
-       for (i = 0; i < NR_IRQS; i++) {
-               struct irq_pin_list *entry = irq_2_pin + i;
-               if (entry->pin < 0)
-                       continue;
-               printk(KERN_DEBUG "IRQ%d ", i);
-               for (;;) {
-                       printk("-> %d:%d", entry->apic, entry->pin);
-                       if (!entry->next)
-                               break;
-                       entry = irq_2_pin + entry->next;
-               }
-               printk("\n");
-       }
-
-       printk(KERN_INFO ".................................... done.\n");
-
-       return;
-}
-
-__apicdebuginit(void) print_APIC_bitfield(int base)
-{
-       unsigned int v;
-       int i, j;
-
-       if (apic_verbosity == APIC_QUIET)
-               return;
-
-       printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
-       for (i = 0; i < 8; i++) {
-               v = apic_read(base + i*0x10);
-               for (j = 0; j < 32; j++) {
-                       if (v & (1<<j))
-                               printk("1");
-                       else
-                               printk("0");
-               }
-               printk("\n");
-       }
-}
-
-__apicdebuginit(void) print_local_APIC(void *dummy)
-{
-       unsigned int v, ver, maxlvt;
-       u64 icr;
-
-       if (apic_verbosity == APIC_QUIET)
-               return;
-
-       printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
-               smp_processor_id(), hard_smp_processor_id());
-       v = apic_read(APIC_ID);
-       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v,
-                       GET_APIC_ID(v));
-       v = apic_read(APIC_LVR);
-       printk(KERN_INFO "... APIC VERSION: %08x\n", v);
-       ver = GET_APIC_VERSION(v);
-       maxlvt = lapic_get_maxlvt();
-
-       v = apic_read(APIC_TASKPRI);
-       printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
-       if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
-               v = apic_read(APIC_ARBPRI);
-               printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-                       v & APIC_ARBPRI_MASK);
-               v = apic_read(APIC_PROCPRI);
-               printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
-       }
-
-       v = apic_read(APIC_EOI);
-       printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
-       v = apic_read(APIC_RRR);
-       printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
-       v = apic_read(APIC_LDR);
-       printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-       v = apic_read(APIC_DFR);
-       printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
-       v = apic_read(APIC_SPIV);
-       printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
-       printk(KERN_DEBUG "... APIC ISR field:\n");
-       print_APIC_bitfield(APIC_ISR);
-       printk(KERN_DEBUG "... APIC TMR field:\n");
-       print_APIC_bitfield(APIC_TMR);
-       printk(KERN_DEBUG "... APIC IRR field:\n");
-       print_APIC_bitfield(APIC_IRR);
-
-       if (APIC_INTEGRATED(ver)) {             /* !82489DX */
-               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-                       apic_write(APIC_ESR, 0);
-               v = apic_read(APIC_ESR);
-               printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
-       }
-
-       icr = apic_icr_read();
-       printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
-       printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
-
-       v = apic_read(APIC_LVTT);
-       printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
-       if (maxlvt > 3) {                       /* PC is LVT#4. */
-               v = apic_read(APIC_LVTPC);
-               printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
-       }
-       v = apic_read(APIC_LVT0);
-       printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
-       v = apic_read(APIC_LVT1);
-       printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
-       if (maxlvt > 2) {                       /* ERR is LVT#3. */
-               v = apic_read(APIC_LVTERR);
-               printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
-       }
-
-       v = apic_read(APIC_TMICT);
-       printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
-       v = apic_read(APIC_TMCCT);
-       printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
-       v = apic_read(APIC_TDCR);
-       printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
-       printk("\n");
-}
-
-__apicdebuginit(void) print_all_local_APICs(void)
-{
-       on_each_cpu(print_local_APIC, NULL, 1);
-}
-
-__apicdebuginit(void) print_PIC(void)
-{
-       unsigned int v;
-       unsigned long flags;
-
-       if (apic_verbosity == APIC_QUIET)
-               return;
-
-       printk(KERN_DEBUG "\nprinting PIC contents\n");
-
-       spin_lock_irqsave(&i8259A_lock, flags);
-
-       v = inb(0xa1) << 8 | inb(0x21);
-       printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
-
-       v = inb(0xa0) << 8 | inb(0x20);
-       printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
-
-       outb(0x0b, 0xa0);
-       outb(0x0b, 0x20);
-       v = inb(0xa0) << 8 | inb(0x20);
-       outb(0x0a, 0xa0);
-       outb(0x0a, 0x20);
-
-       spin_unlock_irqrestore(&i8259A_lock, flags);
-
-       printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
-
-       v = inb(0x4d1) << 8 | inb(0x4d0);
-       printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-__apicdebuginit(int) print_all_ICs(void)
-{
-       print_PIC();
-       print_all_local_APICs();
-       print_IO_APIC();
-
-       return 0;
-}
-
-fs_initcall(print_all_ICs);
-
-
-static void __init enable_IO_APIC(void)
-{
-       union IO_APIC_reg_01 reg_01;
-       int i8259_apic, i8259_pin;
-       int i, apic;
-       unsigned long flags;
-
-       for (i = 0; i < PIN_MAP_SIZE; i++) {
-               irq_2_pin[i].pin = -1;
-               irq_2_pin[i].next = 0;
-       }
-       if (!pirqs_enabled)
-               for (i = 0; i < MAX_PIRQS; i++)
-                       pirq_entries[i] = -1;
-
-       /*
-        * The number of IO-APIC IRQ registers (== #pins):
-        */
-       for (apic = 0; apic < nr_ioapics; apic++) {
-               spin_lock_irqsave(&ioapic_lock, flags);
-               reg_01.raw = io_apic_read(apic, 1);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
-               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
-       }
-       for (apic = 0; apic < nr_ioapics; apic++) {
-               int pin;
-               /* See if any of the pins is in ExtINT mode */
-               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-                       struct IO_APIC_route_entry entry;
-                       entry = ioapic_read_entry(apic, pin);
-
-
-                       /* If the interrupt line is enabled and in ExtInt mode
-                        * I have found the pin where the i8259 is connected.
-                        */
-                       if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
-                               ioapic_i8259.apic = apic;
-                               ioapic_i8259.pin  = pin;
-                               goto found_i8259;
-                       }
-               }
-       }
- found_i8259:
-       /* Look to see what if the MP table has reported the ExtINT */
-       /* If we could not find the appropriate pin by looking at the ioapic
-        * the i8259 probably is not connected the ioapic but give the
-        * mptable a chance anyway.
-        */
-       i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
-       i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
-       /* Trust the MP table if nothing is setup in the hardware */
-       if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
-               printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
-               ioapic_i8259.pin  = i8259_pin;
-               ioapic_i8259.apic = i8259_apic;
-       }
-       /* Complain if the MP table and the hardware disagree */
-       if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
-               (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
-       {
-               printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
-       }
-
-       /*
-        * Do not trust the IO-APIC being empty at bootup
-        */
-       clear_IO_APIC();
-}
-
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
-{
-       /*
-        * Clear the IO-APIC before rebooting:
-        */
-       clear_IO_APIC();
-
-       /*
-        * If the i8259 is routed through an IOAPIC
-        * Put that IOAPIC in virtual wire mode
-        * so legacy interrupts can be delivered.
-        */
-       if (ioapic_i8259.pin != -1) {
-               struct IO_APIC_route_entry entry;
-
-               memset(&entry, 0, sizeof(entry));
-               entry.mask            = 0; /* Enabled */
-               entry.trigger         = 0; /* Edge */
-               entry.irr             = 0;
-               entry.polarity        = 0; /* High */
-               entry.delivery_status = 0;
-               entry.dest_mode       = 0; /* Physical */
-               entry.delivery_mode   = dest_ExtINT; /* ExtInt */
-               entry.vector          = 0;
-               entry.dest.physical.physical_dest = read_apic_id();
-
-               /*
-                * Add it to the IO-APIC irq-routing table:
-                */
-               ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
-       }
-       disconnect_bsp_APIC(ioapic_i8259.pin != -1);
-}
-
-/*
- * function to set the IO-APIC physical IDs based on the
- * values stored in the MPC table.
- *
- * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
- */
-
-static void __init setup_ioapic_ids_from_mpc(void)
-{
-       union IO_APIC_reg_00 reg_00;
-       physid_mask_t phys_id_present_map;
-       int apic;
-       int i;
-       unsigned char old_id;
-       unsigned long flags;
-
-       if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
-               return;
-
-       /*
-        * Don't check I/O APIC IDs for xAPIC systems.  They have
-        * no meaning without the serial APIC bus.
-        */
-       if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-               || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-               return;
-       /*
-        * This is broken; anything with a real cpu count has to
-        * circumvent this idiocy regardless.
-        */
-       phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
-
-       /*
-        * Set the IOAPIC ID to the value stored in the MPC table.
-        */
-       for (apic = 0; apic < nr_ioapics; apic++) {
-
-               /* Read the register 0 value */
-               spin_lock_irqsave(&ioapic_lock, flags);
-               reg_00.raw = io_apic_read(apic, 0);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
-
-               old_id = mp_ioapics[apic].mp_apicid;
-
-               if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
-                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
-                               apic, mp_ioapics[apic].mp_apicid);
-                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
-                               reg_00.bits.ID);
-                       mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
-               }
-
-               /*
-                * Sanity check, is the ID really free? Every APIC in a
-                * system must have a unique ID or we get lots of nice
-                * 'stuck on smp_invalidate_needed IPI wait' messages.
-                */
-               if (check_apicid_used(phys_id_present_map,
-                                       mp_ioapics[apic].mp_apicid)) {
-                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
-                               apic, mp_ioapics[apic].mp_apicid);
-                       for (i = 0; i < get_physical_broadcast(); i++)
-                               if (!physid_isset(i, phys_id_present_map))
-                                       break;
-                       if (i >= get_physical_broadcast())
-                               panic("Max APIC ID exceeded!\n");
-                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
-                               i);
-                       physid_set(i, phys_id_present_map);
-                       mp_ioapics[apic].mp_apicid = i;
-               } else {
-                       physid_mask_t tmp;
-                       tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
-                       apic_printk(APIC_VERBOSE, "Setting %d in the "
-                                       "phys_id_present_map\n",
-                                       mp_ioapics[apic].mp_apicid);
-                       physids_or(phys_id_present_map, phys_id_present_map, tmp);
-               }
-
-
-               /*
-                * We need to adjust the IRQ routing table
-                * if the ID changed.
-                */
-               if (old_id != mp_ioapics[apic].mp_apicid)
-                       for (i = 0; i < mp_irq_entries; i++)
-                               if (mp_irqs[i].mp_dstapic == old_id)
-                                       mp_irqs[i].mp_dstapic
-                                               = mp_ioapics[apic].mp_apicid;
-
-               /*
-                * Read the right value from the MPC table and
-                * write it into the ID register.
-                */
-               apic_printk(APIC_VERBOSE, KERN_INFO
-                       "...changing IO-APIC physical APIC ID to %d ...",
-                       mp_ioapics[apic].mp_apicid);
-
-               reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
-               spin_lock_irqsave(&ioapic_lock, flags);
-               io_apic_write(apic, 0, reg_00.raw);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
-
-               /*
-                * Sanity check
-                */
-               spin_lock_irqsave(&ioapic_lock, flags);
-               reg_00.raw = io_apic_read(apic, 0);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
-               if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
-                       printk("could not set ID!\n");
-               else
-                       apic_printk(APIC_VERBOSE, " ok.\n");
-       }
-}
-
-int no_timer_check __initdata;
-
-static int __init notimercheck(char *s)
-{
-       no_timer_check = 1;
-       return 1;
-}
-__setup("no_timer_check", notimercheck);
-
-/*
- * There is a nasty bug in some older SMP boards, their mptable lies
- * about the timer IRQ. We do the following to work around the situation:
- *
- *     - timer IRQ defaults to IO-APIC IRQ
- *     - if this function detects that timer IRQs are defunct, then we fall
- *       back to ISA timer IRQs
- */
-static int __init timer_irq_works(void)
-{
-       unsigned long t1 = jiffies;
-       unsigned long flags;
-
-       if (no_timer_check)
-               return 1;
-
-       local_save_flags(flags);
-       local_irq_enable();
-       /* Let ten ticks pass... */
-       mdelay((10 * 1000) / HZ);
-       local_irq_restore(flags);
-
-       /*
-        * Expect a few ticks at least, to be sure some possible
-        * glue logic does not lock up after one or two first
-        * ticks in a non-ExtINT mode.  Also the local APIC
-        * might have cached one ExtINT interrupt.  Finally, at
-        * least one tick may be lost due to delays.
-        */
-       if (time_after(jiffies, t1 + 4))
-               return 1;
-
-       return 0;
-}
-
-/*
- * In the SMP+IOAPIC case it might happen that there are an unspecified
- * number of pending IRQ events unhandled. These cases are very rare,
- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
- * better to do it this way as thus we do not have to be aware of
- * 'pending' interrupts in the IRQ path, except at this point.
- */
-/*
- * Edge triggered needs to resend any interrupt
- * that was delayed but this is now handled in the device
- * independent code.
- */
-
-/*
- * Startup quirk:
- *
- * Starting up a edge-triggered IO-APIC interrupt is
- * nasty - we need to make sure that we get the edge.
- * If it is already asserted for some reason, we need
- * return 1 to indicate that is was pending.
- *
- * This is not complete - we should be able to fake
- * an edge even if it isn't on the 8259A...
- *
- * (We do this for level-triggered IRQs too - it cannot hurt.)
- */
-static unsigned int startup_ioapic_irq(unsigned int irq)
-{
-       int was_pending = 0;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       if (irq < 16) {
-               disable_8259A_irq(irq);
-               if (i8259A_irq_pending(irq))
-                       was_pending = 1;
-       }
-       __unmask_IO_APIC_irq(irq);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-
-       return was_pending;
-}
-
-static void ack_ioapic_irq(unsigned int irq)
-{
-       move_native_irq(irq);
-       ack_APIC_irq();
-}
-
-static void ack_ioapic_quirk_irq(unsigned int irq)
-{
-       unsigned long v;
-       int i;
-
-       move_native_irq(irq);
-/*
- * It appears there is an erratum which affects at least version 0x11
- * of I/O APIC (that's the 82093AA and cores integrated into various
- * chipsets).  Under certain conditions a level-triggered interrupt is
- * erroneously delivered as edge-triggered one but the respective IRR
- * bit gets set nevertheless.  As a result the I/O unit expects an EOI
- * message but it will never arrive and further interrupts are blocked
- * from the source.  The exact reason is so far unknown, but the
- * phenomenon was observed when two consecutive interrupt requests
- * from a given source get delivered to the same CPU and the source is
- * temporarily disabled in between.
- *
- * A workaround is to simulate an EOI message manually.  We achieve it
- * by setting the trigger mode to edge and then to level when the edge
- * trigger mode gets detected in the TMR of a local APIC for a
- * level-triggered interrupt.  We mask the source for the time of the
- * operation to prevent an edge-triggered interrupt escaping meanwhile.
- * The idea is from Manfred Spraul.  --macro
- */
-       i = irq_vector[irq];
-
-       v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-
-       ack_APIC_irq();
-
-       if (!(v & (1 << (i & 0x1f)))) {
-               atomic_inc(&irq_mis_count);
-               spin_lock(&ioapic_lock);
-               __mask_and_edge_IO_APIC_irq(irq);
-               __unmask_and_level_IO_APIC_irq(irq);
-               spin_unlock(&ioapic_lock);
-       }
-}
-
-static int ioapic_retrigger_irq(unsigned int irq)
-{
-       send_IPI_self(irq_vector[irq]);
-
-       return 1;
-}
-
-static struct irq_chip ioapic_chip __read_mostly = {
-       .name           = "IO-APIC",
-       .startup        = startup_ioapic_irq,
-       .mask           = mask_IO_APIC_irq,
-       .unmask         = unmask_IO_APIC_irq,
-       .ack            = ack_ioapic_irq,
-       .eoi            = ack_ioapic_quirk_irq,
-#ifdef CONFIG_SMP
-       .set_affinity   = set_ioapic_affinity_irq,
-#endif
-       .retrigger      = ioapic_retrigger_irq,
-};
-
-
-static inline void init_IO_APIC_traps(void)
-{
-       int irq;
-
-       /*
-        * NOTE! The local APIC isn't very good at handling
-        * multiple interrupts at the same interrupt level.
-        * As the interrupt level is determined by taking the
-        * vector number and shifting that right by 4, we
-        * want to spread these out a bit so that they don't
-        * all fall in the same interrupt level.
-        *
-        * Also, we've got to be careful not to trash gate
-        * 0x80, because int 0x80 is hm, kind of importantish. ;)
-        */
-       for (irq = 0; irq < NR_IRQS ; irq++) {
-               if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
-                       /*
-                        * Hmm.. We don't have an entry for this,
-                        * so default to an old-fashioned 8259
-                        * interrupt if we can..
-                        */
-                       if (irq < 16)
-                               make_8259A_irq(irq);
-                       else
-                               /* Strange. Oh, well.. */
-                               irq_desc[irq].chip = &no_irq_chip;
-               }
-       }
-}
-
-/*
- * The local APIC irq-chip implementation:
- */
-
-static void ack_lapic_irq(unsigned int irq)
-{
-       ack_APIC_irq();
-}
-
-static void mask_lapic_irq(unsigned int irq)
-{
-       unsigned long v;
-
-       v = apic_read(APIC_LVT0);
-       apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-}
-
-static void unmask_lapic_irq(unsigned int irq)
-{
-       unsigned long v;
-
-       v = apic_read(APIC_LVT0);
-       apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
-}
-
-static struct irq_chip lapic_chip __read_mostly = {
-       .name           = "local-APIC",
-       .mask           = mask_lapic_irq,
-       .unmask         = unmask_lapic_irq,
-       .ack            = ack_lapic_irq,
-};
-
-static void lapic_register_intr(int irq, int vector)
-{
-       irq_desc[irq].status &= ~IRQ_LEVEL;
-       set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
-                                     "edge");
-       set_intr_gate(vector, interrupt[irq]);
-}
-
-static void __init setup_nmi(void)
-{
-       /*
-        * Dirty trick to enable the NMI watchdog ...
-        * We put the 8259A master into AEOI mode and
-        * unmask on all local APICs LVT0 as NMI.
-        *
-        * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
-        * is from Maciej W. Rozycki - so we do not have to EOI from
-        * the NMI handler or the timer interrupt.
-        */
-       apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
-       enable_NMI_through_LVT0();
-
-       apic_printk(APIC_VERBOSE, " done.\n");
-}
-
-/*
- * This looks a bit hackish but it's about the only one way of sending
- * a few INTA cycles to 8259As and any associated glue logic.  ICR does
- * not support the ExtINT mode, unfortunately.  We need to send these
- * cycles as some i82489DX-based boards have glue logic that keeps the
- * 8259A interrupt line asserted until INTA.  --macro
- */
-static inline void __init unlock_ExtINT_logic(void)
-{
-       int apic, pin, i;
-       struct IO_APIC_route_entry entry0, entry1;
-       unsigned char save_control, save_freq_select;
-
-       pin  = find_isa_irq_pin(8, mp_INT);
-       if (pin == -1) {
-               WARN_ON_ONCE(1);
-               return;
-       }
-       apic = find_isa_irq_apic(8, mp_INT);
-       if (apic == -1) {
-               WARN_ON_ONCE(1);
-               return;
-       }
-
-       entry0 = ioapic_read_entry(apic, pin);
-       clear_IO_APIC_pin(apic, pin);
-
-       memset(&entry1, 0, sizeof(entry1));
-
-       entry1.dest_mode = 0;                   /* physical delivery */
-       entry1.mask = 0;                        /* unmask IRQ now */
-       entry1.dest.physical.physical_dest = hard_smp_processor_id();
-       entry1.delivery_mode = dest_ExtINT;
-       entry1.polarity = entry0.polarity;
-       entry1.trigger = 0;
-       entry1.vector = 0;
-
-       ioapic_write_entry(apic, pin, entry1);
-
-       save_control = CMOS_READ(RTC_CONTROL);
-       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
-       CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
-                  RTC_FREQ_SELECT);
-       CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
-
-       i = 100;
-       while (i-- > 0) {
-               mdelay(10);
-               if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
-                       i -= 10;
-       }
-
-       CMOS_WRITE(save_control, RTC_CONTROL);
-       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-       clear_IO_APIC_pin(apic, pin);
-
-       ioapic_write_entry(apic, pin, entry0);
-}
-
-/*
- * This code may look a bit paranoid, but it's supposed to cooperate with
- * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
- * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
- * fanatically on his truly buggy board.
- */
-static inline void __init check_timer(void)
-{
-       int apic1, pin1, apic2, pin2;
-       int no_pin1 = 0;
-       int vector;
-       unsigned int ver;
-       unsigned long flags;
-
-       local_irq_save(flags);
-
-       ver = apic_read(APIC_LVR);
-       ver = GET_APIC_VERSION(ver);
-
-       /*
-        * get/set the timer IRQ vector:
-        */
-       disable_8259A_irq(0);
-       vector = assign_irq_vector(0);
-       set_intr_gate(vector, interrupt[0]);
-
-       /*
-        * As IRQ0 is to be enabled in the 8259A, the virtual
-        * wire has to be disabled in the local APIC.  Also
-        * timer interrupts need to be acknowledged manually in
-        * the 8259A for the i82489DX when using the NMI
-        * watchdog as that APIC treats NMIs as level-triggered.
-        * The AEOI mode will finish them in the 8259A
-        * automatically.
-        */
-       apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
-       init_8259A(1);
-       timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
-
-       pin1  = find_isa_irq_pin(0, mp_INT);
-       apic1 = find_isa_irq_apic(0, mp_INT);
-       pin2  = ioapic_i8259.pin;
-       apic2 = ioapic_i8259.apic;
-
-       apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
-                   "apic1=%d pin1=%d apic2=%d pin2=%d\n",
-                   vector, apic1, pin1, apic2, pin2);
-
-       /*
-        * Some BIOS writers are clueless and report the ExtINTA
-        * I/O APIC input from the cascaded 8259A as the timer
-        * interrupt input.  So just in case, if only one pin
-        * was found above, try it both directly and through the
-        * 8259A.
-        */
-       if (pin1 == -1) {
-               pin1 = pin2;
-               apic1 = apic2;
-               no_pin1 = 1;
-       } else if (pin2 == -1) {
-               pin2 = pin1;
-               apic2 = apic1;
-       }
-
-       if (pin1 != -1) {
-               /*
-                * Ok, does IRQ0 through the IOAPIC work?
-                */
-               if (no_pin1) {
-                       add_pin_to_irq(0, apic1, pin1);
-                       setup_timer_IRQ0_pin(apic1, pin1, vector);
-               }
-               unmask_IO_APIC_irq(0);
-               if (timer_irq_works()) {
-                       if (nmi_watchdog == NMI_IO_APIC) {
-                               setup_nmi();
-                               enable_8259A_irq(0);
-                       }
-                       if (disable_timer_pin_1 > 0)
-                               clear_IO_APIC_pin(0, pin1);
-                       goto out;
-               }
-               clear_IO_APIC_pin(apic1, pin1);
-               if (!no_pin1)
-                       apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
-                                   "8254 timer not connected to IO-APIC\n");
-
-               apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
-                           "(IRQ0) through the 8259A ...\n");
-               apic_printk(APIC_QUIET, KERN_INFO
-                           "..... (found apic %d pin %d) ...\n", apic2, pin2);
-               /*
-                * legacy devices should be connected to IO APIC #0
-                */
-               replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
-               setup_timer_IRQ0_pin(apic2, pin2, vector);
-               unmask_IO_APIC_irq(0);
-               enable_8259A_irq(0);
-               if (timer_irq_works()) {
-                       apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
-                       timer_through_8259 = 1;
-                       if (nmi_watchdog == NMI_IO_APIC) {
-                               disable_8259A_irq(0);
-                               setup_nmi();
-                               enable_8259A_irq(0);
-                       }
-                       goto out;
-               }
-               /*
-                * Cleanup, just in case ...
-                */
-               disable_8259A_irq(0);
-               clear_IO_APIC_pin(apic2, pin2);
-               apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
-       }
-
-       if (nmi_watchdog == NMI_IO_APIC) {
-               apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
-                           "through the IO-APIC - disabling NMI Watchdog!\n");
-               nmi_watchdog = NMI_NONE;
-       }
-       timer_ack = 0;
-
-       apic_printk(APIC_QUIET, KERN_INFO
-                   "...trying to set up timer as Virtual Wire IRQ...\n");
-
-       lapic_register_intr(0, vector);
-       apic_write(APIC_LVT0, APIC_DM_FIXED | vector);  /* Fixed mode */
-       enable_8259A_irq(0);
-
-       if (timer_irq_works()) {
-               apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
-               goto out;
-       }
-       disable_8259A_irq(0);
-       apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
-       apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
-
-       apic_printk(APIC_QUIET, KERN_INFO
-                   "...trying to set up timer as ExtINT IRQ...\n");
-
-       init_8259A(0);
-       make_8259A_irq(0);
-       apic_write(APIC_LVT0, APIC_DM_EXTINT);
-
-       unlock_ExtINT_logic();
-
-       if (timer_irq_works()) {
-               apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
-               goto out;
-       }
-       apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
-       panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
-               "report.  Then try booting with the 'noapic' option.\n");
-out:
-       local_irq_restore(flags);
-}
-
-/*
- * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
- * to devices.  However there may be an I/O APIC pin available for
- * this interrupt regardless.  The pin may be left unconnected, but
- * typically it will be reused as an ExtINT cascade interrupt for
- * the master 8259A.  In the MPS case such a pin will normally be
- * reported as an ExtINT interrupt in the MP table.  With ACPI
- * there is no provision for ExtINT interrupts, and in the absence
- * of an override it would be treated as an ordinary ISA I/O APIC
- * interrupt, that is edge-triggered and unmasked by default.  We
- * used to do this, but it caused problems on some systems because
- * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
- * the same ExtINT cascade interrupt to drive the local APIC of the
- * bootstrap processor.  Therefore we refrain from routing IRQ2 to
- * the I/O APIC in all cases now.  No actual device should request
- * it anyway.  --macro
- */
-#define PIC_IRQS       (1 << PIC_CASCADE_IR)
-
-void __init setup_IO_APIC(void)
-{
-       int i;
-
-       /* Reserve all the system vectors. */
-       for (i = first_system_vector; i < NR_VECTORS; i++)
-               set_bit(i, used_vectors);
-
-       enable_IO_APIC();
-
-       io_apic_irqs = ~PIC_IRQS;
-
-       printk("ENABLING IO-APIC IRQs\n");
-
-       /*
-        * Set up IO-APIC IRQ routing.
-        */
-       if (!acpi_ioapic)
-               setup_ioapic_ids_from_mpc();
-       sync_Arb_IDs();
-       setup_IO_APIC_irqs();
-       init_IO_APIC_traps();
-       check_timer();
-}
-
-/*
- *     Called after all the initialization is done. If we didnt find any
- *     APIC bugs then we can allow the modify fast path
- */
-
-static int __init io_apic_bug_finalize(void)
-{
-       if (sis_apic_bug == -1)
-               sis_apic_bug = 0;
-       return 0;
-}
-
-late_initcall(io_apic_bug_finalize);
-
-struct sysfs_ioapic_data {
-       struct sys_device dev;
-       struct IO_APIC_route_entry entry[0];
-};
-static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS];
-
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
-{
-       struct IO_APIC_route_entry *entry;
-       struct sysfs_ioapic_data *data;
-       int i;
-
-       data = container_of(dev, struct sysfs_ioapic_data, dev);
-       entry = data->entry;
-       for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
-               entry[i] = ioapic_read_entry(dev->id, i);
-
-       return 0;
-}
-
-static int ioapic_resume(struct sys_device *dev)
-{
-       struct IO_APIC_route_entry *entry;
-       struct sysfs_ioapic_data *data;
-       unsigned long flags;
-       union IO_APIC_reg_00 reg_00;
-       int i;
-
-       data = container_of(dev, struct sysfs_ioapic_data, dev);
-       entry = data->entry;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       reg_00.raw = io_apic_read(dev->id, 0);
-       if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
-               reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
-               io_apic_write(dev->id, 0, reg_00.raw);
-       }
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-       for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
-               ioapic_write_entry(dev->id, i, entry[i]);
-
-       return 0;
-}
-
-static struct sysdev_class ioapic_sysdev_class = {
-       .name = "ioapic",
-       .suspend = ioapic_suspend,
-       .resume = ioapic_resume,
-};
-
-static int __init ioapic_init_sysfs(void)
-{
-       struct sys_device *dev;
-       int i, size, error = 0;
-
-       error = sysdev_class_register(&ioapic_sysdev_class);
-       if (error)
-               return error;
-
-       for (i = 0; i < nr_ioapics; i++) {
-               size = sizeof(struct sys_device) + nr_ioapic_registers[i]
-                       * sizeof(struct IO_APIC_route_entry);
-               mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
-               if (!mp_ioapic_data[i]) {
-                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-                       continue;
-               }
-               dev = &mp_ioapic_data[i]->dev;
-               dev->id = i;
-               dev->cls = &ioapic_sysdev_class;
-               error = sysdev_register(dev);
-               if (error) {
-                       kfree(mp_ioapic_data[i]);
-                       mp_ioapic_data[i] = NULL;
-                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-                       continue;
-               }
-       }
-
-       return 0;
-}
-
-device_initcall(ioapic_init_sysfs);
-
-/*
- * Dynamic irq allocate and deallocation
- */
-int create_irq(void)
-{
-       /* Allocate an unused irq */
-       int irq, new, vector = 0;
-       unsigned long flags;
-
-       irq = -ENOSPC;
-       spin_lock_irqsave(&vector_lock, flags);
-       for (new = (NR_IRQS - 1); new >= 0; new--) {
-               if (platform_legacy_irq(new))
-                       continue;
-               if (irq_vector[new] != 0)
-                       continue;
-               vector = __assign_irq_vector(new);
-               if (likely(vector > 0))
-                       irq = new;
-               break;
-       }
-       spin_unlock_irqrestore(&vector_lock, flags);
-
-       if (irq >= 0) {
-               set_intr_gate(vector, interrupt[irq]);
-               dynamic_irq_init(irq);
-       }
-       return irq;
-}
-
-void destroy_irq(unsigned int irq)
-{
-       unsigned long flags;
-
-       dynamic_irq_cleanup(irq);
-
-       spin_lock_irqsave(&vector_lock, flags);
-       clear_bit(irq_vector[irq], used_vectors);
-       irq_vector[irq] = 0;
-       spin_unlock_irqrestore(&vector_lock, flags);
-}
-
-/*
- * MSI message composition
- */
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
-{
-       int vector;
-       unsigned dest;
-
-       vector = assign_irq_vector(irq);
-       if (vector >= 0) {
-               dest = cpu_mask_to_apicid(TARGET_CPUS);
-
-               msg->address_hi = MSI_ADDR_BASE_HI;
-               msg->address_lo =
-                       MSI_ADDR_BASE_LO |
-                       ((INT_DEST_MODE == 0) ?
-MSI_ADDR_DEST_MODE_PHYSICAL:
-                               MSI_ADDR_DEST_MODE_LOGICAL) |
-                       ((INT_DELIVERY_MODE != dest_LowestPrio) ?
-                               MSI_ADDR_REDIRECTION_CPU:
-                               MSI_ADDR_REDIRECTION_LOWPRI) |
-                       MSI_ADDR_DEST_ID(dest);
-
-               msg->data =
-                       MSI_DATA_TRIGGER_EDGE |
-                       MSI_DATA_LEVEL_ASSERT |
-                       ((INT_DELIVERY_MODE != dest_LowestPrio) ?
-MSI_DATA_DELIVERY_FIXED:
-                               MSI_DATA_DELIVERY_LOWPRI) |
-                       MSI_DATA_VECTOR(vector);
-       }
-       return vector;
-}
-
-#ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-       struct msi_msg msg;
-       unsigned int dest;
-       cpumask_t tmp;
-       int vector;
-
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
-               tmp = TARGET_CPUS;
-
-       vector = assign_irq_vector(irq);
-       if (vector < 0)
-               return;
-
-       dest = cpu_mask_to_apicid(mask);
-
-       read_msi_msg(irq, &msg);
-
-       msg.data &= ~MSI_DATA_VECTOR_MASK;
-       msg.data |= MSI_DATA_VECTOR(vector);
-       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-       write_msi_msg(irq, &msg);
-       irq_desc[irq].affinity = mask;
-}
-#endif /* CONFIG_SMP */
-
-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
- */
-static struct irq_chip msi_chip = {
-       .name           = "PCI-MSI",
-       .unmask         = unmask_msi_irq,
-       .mask           = mask_msi_irq,
-       .ack            = ack_ioapic_irq,
-#ifdef CONFIG_SMP
-       .set_affinity   = set_msi_irq_affinity,
-#endif
-       .retrigger      = ioapic_retrigger_irq,
-};
-
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
-{
-       struct msi_msg msg;
-       int irq, ret;
-       irq = create_irq();
-       if (irq < 0)
-               return irq;
-
-       ret = msi_compose_msg(dev, irq, &msg);
-       if (ret < 0) {
-               destroy_irq(irq);
-               return ret;
-       }
-
-       set_irq_msi(irq, desc);
-       write_msi_msg(irq, &msg);
-
-       set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
-                                     "edge");
-
-       return 0;
-}
-
-void arch_teardown_msi_irq(unsigned int irq)
-{
-       destroy_irq(irq);
-}
-
-#endif /* CONFIG_PCI_MSI */
-
-/*
- * Hypertransport interrupt support
- */
-#ifdef CONFIG_HT_IRQ
-
-#ifdef CONFIG_SMP
-
-static void target_ht_irq(unsigned int irq, unsigned int dest)
-{
-       struct ht_irq_msg msg;
-       fetch_ht_irq_msg(irq, &msg);
-
-       msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
-       msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-
-       msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
-       msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
-
-       write_ht_irq_msg(irq, &msg);
-}
-
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-       unsigned int dest;
-       cpumask_t tmp;
-
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
-               tmp = TARGET_CPUS;
-
-       cpus_and(mask, tmp, CPU_MASK_ALL);
-
-       dest = cpu_mask_to_apicid(mask);
-
-       target_ht_irq(irq, dest);
-       irq_desc[irq].affinity = mask;
-}
-#endif
-
-static struct irq_chip ht_irq_chip = {
-       .name           = "PCI-HT",
-       .mask           = mask_ht_irq,
-       .unmask         = unmask_ht_irq,
-       .ack            = ack_ioapic_irq,
-#ifdef CONFIG_SMP
-       .set_affinity   = set_ht_irq_affinity,
-#endif
-       .retrigger      = ioapic_retrigger_irq,
-};
-
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
-{
-       int vector;
-
-       vector = assign_irq_vector(irq);
-       if (vector >= 0) {
-               struct ht_irq_msg msg;
-               unsigned dest;
-               cpumask_t tmp;
-
-               cpus_clear(tmp);
-               cpu_set(vector >> 8, tmp);
-               dest = cpu_mask_to_apicid(tmp);
-
-               msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
-
-               msg.address_lo =
-                       HT_IRQ_LOW_BASE |
-                       HT_IRQ_LOW_DEST_ID(dest) |
-                       HT_IRQ_LOW_VECTOR(vector) |
-                       ((INT_DEST_MODE == 0) ?
-                               HT_IRQ_LOW_DM_PHYSICAL :
-                               HT_IRQ_LOW_DM_LOGICAL) |
-                       HT_IRQ_LOW_RQEOI_EDGE |
-                       ((INT_DELIVERY_MODE != dest_LowestPrio) ?
-                               HT_IRQ_LOW_MT_FIXED :
-                               HT_IRQ_LOW_MT_ARBITRATED) |
-                       HT_IRQ_LOW_IRQ_MASKED;
-
-               write_ht_irq_msg(irq, &msg);
-
-               set_irq_chip_and_handler_name(irq, &ht_irq_chip,
-                                             handle_edge_irq, "edge");
-       }
-       return vector;
-}
-#endif /* CONFIG_HT_IRQ */
-
-/* --------------------------------------------------------------------------
-                       ACPI-based IOAPIC Configuration
-   -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-int __init io_apic_get_unique_id(int ioapic, int apic_id)
-{
-       union IO_APIC_reg_00 reg_00;
-       static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
-       physid_mask_t tmp;
-       unsigned long flags;
-       int i = 0;
-
-       /*
-        * The P4 platform supports up to 256 APIC IDs on two separate APIC
-        * buses (one for LAPICs, one for IOAPICs), where predecessors only
-        * supports up to 16 on one shared APIC bus.
-        *
-        * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
-        *      advantage of new APIC bus architecture.
-        */
-
-       if (physids_empty(apic_id_map))
-               apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       reg_00.raw = io_apic_read(ioapic, 0);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-
-       if (apic_id >= get_physical_broadcast()) {
-               printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
-                       "%d\n", ioapic, apic_id, reg_00.bits.ID);
-               apic_id = reg_00.bits.ID;
-       }
-
-       /*
-        * Every APIC in a system must have a unique ID or we get lots of nice
-        * 'stuck on smp_invalidate_needed IPI wait' messages.
-        */
-       if (check_apicid_used(apic_id_map, apic_id)) {
-
-               for (i = 0; i < get_physical_broadcast(); i++) {
-                       if (!check_apicid_used(apic_id_map, i))
-                               break;
-               }
-
-               if (i == get_physical_broadcast())
-                       panic("Max apic_id exceeded!\n");
-
-               printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
-                       "trying %d\n", ioapic, apic_id, i);
-
-               apic_id = i;
-       }
-
-       tmp = apicid_to_cpu_present(apic_id);
-       physids_or(apic_id_map, apic_id_map, tmp);
-
-       if (reg_00.bits.ID != apic_id) {
-               reg_00.bits.ID = apic_id;
-
-               spin_lock_irqsave(&ioapic_lock, flags);
-               io_apic_write(ioapic, 0, reg_00.raw);
-               reg_00.raw = io_apic_read(ioapic, 0);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
-
-               /* Sanity check */
-               if (reg_00.bits.ID != apic_id) {
-                       printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
-                       return -1;
-               }
-       }
-
-       apic_printk(APIC_VERBOSE, KERN_INFO
-                       "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
-
-       return apic_id;
-}
-
-
-int __init io_apic_get_version(int ioapic)
-{
-       union IO_APIC_reg_01    reg_01;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       reg_01.raw = io_apic_read(ioapic, 1);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-
-       return reg_01.bits.version;
-}
-
-
-int __init io_apic_get_redir_entries(int ioapic)
-{
-       union IO_APIC_reg_01    reg_01;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       reg_01.raw = io_apic_read(ioapic, 1);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-
-       return reg_01.bits.entries;
-}
-
-
-int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low)
-{
-       struct IO_APIC_route_entry entry;
-
-       if (!IO_APIC_IRQ(irq)) {
-               printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
-                       ioapic);
-               return -EINVAL;
-       }
-
-       /*
-        * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
-        * Note that we mask (disable) IRQs now -- these get enabled when the
-        * corresponding device driver registers for this IRQ.
-        */
-
-       memset(&entry, 0, sizeof(entry));
-
-       entry.delivery_mode = INT_DELIVERY_MODE;
-       entry.dest_mode = INT_DEST_MODE;
-       entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
-       entry.trigger = edge_level;
-       entry.polarity = active_high_low;
-       entry.mask  = 1;
-
-       /*
-        * IRQs < 16 are already in the irq_2_pin[] map
-        */
-       if (irq >= 16)
-               add_pin_to_irq(irq, ioapic, pin);
-
-       entry.vector = assign_irq_vector(irq);
-
-       apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
-               "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
-               mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
-               edge_level, active_high_low);
-
-       ioapic_register_intr(irq, entry.vector, edge_level);
-
-       if (!ioapic && (irq < 16))
-               disable_8259A_irq(irq);
-
-       ioapic_write_entry(ioapic, pin, entry);
-
-       return 0;
-}
-
-int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
-{
-       int i;
-
-       if (skip_ioapic_setup)
-               return -1;
-
-       for (i = 0; i < mp_irq_entries; i++)
-               if (mp_irqs[i].mp_irqtype == mp_INT &&
-                   mp_irqs[i].mp_srcbusirq == bus_irq)
-                       break;
-       if (i >= mp_irq_entries)
-               return -1;
-
-       *trigger = irq_trigger(i);
-       *polarity = irq_polarity(i);
-       return 0;
-}
-
-#endif /* CONFIG_ACPI */
-
-static int __init parse_disable_timer_pin_1(char *arg)
-{
-       disable_timer_pin_1 = 1;
-       return 0;
-}
-early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
-
-static int __init parse_enable_timer_pin_1(char *arg)
-{
-       disable_timer_pin_1 = -1;
-       return 0;
-}
-early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
-
-static int __init parse_noapic(char *arg)
-{
-       /* disable IO-APIC */
-       disable_ioapic_setup();
-       return 0;
-}
-early_param("noapic", parse_noapic);
-
-void __init ioapic_init_mappings(void)
-{
-       unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
-       int i;
-
-       for (i = 0; i < nr_ioapics; i++) {
-               if (smp_found_config) {
-                       ioapic_phys = mp_ioapics[i].mp_apicaddr;
-                       if (!ioapic_phys) {
-                               printk(KERN_ERR
-                                      "WARNING: bogus zero IO-APIC "
-                                      "address found in MPTABLE, "
-                                      "disabling IO/APIC support!\n");
-                               smp_found_config = 0;
-                               skip_ioapic_setup = 1;
-                               goto fake_ioapic_page;
-                       }
-               } else {
-fake_ioapic_page:
-                       ioapic_phys = (unsigned long)
-                                     alloc_bootmem_pages(PAGE_SIZE);
-                       ioapic_phys = __pa(ioapic_phys);
-               }
-               set_fixmap_nocache(idx, ioapic_phys);
-               printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
-                      __fix_to_virt(idx), ioapic_phys);
-               idx++;
-       }
-}
-
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
new file mode 100644 (file)
index 0000000..ccf6c50
--- /dev/null
@@ -0,0 +1,189 @@
+/*
+ * Common interrupt code for 32 and 64 bit
+ */
+#include <linux/cpu.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/seq_file.h>
+
+#include <asm/apic.h>
+#include <asm/io_apic.h>
+#include <asm/smp.h>
+
+atomic_t irq_err_count;
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+       printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+       /*
+        * Currently unexpected vectors happen only on SMP and APIC.
+        * We _must_ ack these because every local APIC has only N
+        * irq slots per priority level, and a 'hanging, unacked' IRQ
+        * holds up an irq slot - in excessive cases (when multiple
+        * unexpected vectors occur) that might lock up the APIC
+        * completely.
+        * But only ack when the APIC is enabled -AK
+        */
+       if (cpu_has_apic)
+               ack_APIC_irq();
+#endif
+}
+
+#ifdef CONFIG_X86_32
+# define irq_stats(x)          (&per_cpu(irq_stat,x))
+#else
+# define irq_stats(x)          cpu_pda(x)
+#endif
+/*
+ * /proc/interrupts printing:
+ */
+static int show_other_interrupts(struct seq_file *p)
+{
+       int j;
+
+       seq_printf(p, "NMI: ");
+       for_each_online_cpu(j)
+               seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
+       seq_printf(p, "  Non-maskable interrupts\n");
+#ifdef CONFIG_X86_LOCAL_APIC
+       seq_printf(p, "LOC: ");
+       for_each_online_cpu(j)
+               seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
+       seq_printf(p, "  Local timer interrupts\n");
+#endif
+#ifdef CONFIG_SMP
+       seq_printf(p, "RES: ");
+       for_each_online_cpu(j)
+               seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
+       seq_printf(p, "  Rescheduling interrupts\n");
+       seq_printf(p, "CAL: ");
+       for_each_online_cpu(j)
+               seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
+       seq_printf(p, "  Function call interrupts\n");
+       seq_printf(p, "TLB: ");
+       for_each_online_cpu(j)
+               seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
+       seq_printf(p, "  TLB shootdowns\n");
+#endif
+#ifdef CONFIG_X86_MCE
+       seq_printf(p, "TRM: ");
+       for_each_online_cpu(j)
+               seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
+       seq_printf(p, "  Thermal event interrupts\n");
+# ifdef CONFIG_X86_64
+       seq_printf(p, "THR: ");
+       for_each_online_cpu(j)
+               seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
+       seq_printf(p, "  Threshold APIC interrupts\n");
+# endif
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+       seq_printf(p, "SPU: ");
+       for_each_online_cpu(j)
+               seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
+       seq_printf(p, "  Spurious interrupts\n");
+#endif
+       seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC)
+       seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+       return 0;
+}
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+       unsigned long flags, any_count = 0;
+       int i = *(loff_t *) v, j;
+       struct irqaction *action;
+       struct irq_desc *desc;
+
+       if (i > nr_irqs)
+               return 0;
+
+       if (i == nr_irqs)
+               return show_other_interrupts(p);
+
+       /* print header */
+       if (i == 0) {
+               seq_printf(p, "           ");
+               for_each_online_cpu(j)
+                       seq_printf(p, "CPU%-8d",j);
+               seq_putc(p, '\n');
+       }
+
+       desc = irq_to_desc(i);
+       spin_lock_irqsave(&desc->lock, flags);
+#ifndef CONFIG_SMP
+       any_count = kstat_irqs(i);
+#else
+       for_each_online_cpu(j)
+               any_count |= kstat_irqs_cpu(i, j);
+#endif
+       action = desc->action;
+       if (!action && !any_count)
+               goto out;
+
+       seq_printf(p, "%3d: ", i);
+#ifndef CONFIG_SMP
+       seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+       for_each_online_cpu(j)
+               seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+#endif
+       seq_printf(p, " %8s", desc->chip->name);
+       seq_printf(p, "-%-8s", desc->name);
+
+       if (action) {
+               seq_printf(p, "  %s", action->name);
+               while ((action = action->next) != NULL)
+                       seq_printf(p, ", %s", action->name);
+       }
+
+       seq_putc(p, '\n');
+out:
+       spin_unlock_irqrestore(&desc->lock, flags);
+       return 0;
+}
+
+/*
+ * /proc/stat helpers
+ */
+u64 arch_irq_stat_cpu(unsigned int cpu)
+{
+       u64 sum = irq_stats(cpu)->__nmi_count;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+       sum += irq_stats(cpu)->apic_timer_irqs;
+#endif
+#ifdef CONFIG_SMP
+       sum += irq_stats(cpu)->irq_resched_count;
+       sum += irq_stats(cpu)->irq_call_count;
+       sum += irq_stats(cpu)->irq_tlb_count;
+#endif
+#ifdef CONFIG_X86_MCE
+       sum += irq_stats(cpu)->irq_thermal_count;
+# ifdef CONFIG_X86_64
+       sum += irq_stats(cpu)->irq_threshold_count;
+#endif
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+       sum += irq_stats(cpu)->irq_spurious_count;
+#endif
+       return sum;
+}
+
+u64 arch_irq_stat(void)
+{
+       u64 sum = atomic_read(&irq_err_count);
+
+#ifdef CONFIG_X86_IO_APIC
+       sum += atomic_read(&irq_mis_count);
+#endif
+       return sum;
+}
index b71e02d42f4fd288a21a51f256c422b6d4d74d83..a51382672de0c5e5fb291bd6ff6bce13396a8f73 100644 (file)
@@ -25,29 +25,6 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
 DEFINE_PER_CPU(struct pt_regs *, irq_regs);
 EXPORT_PER_CPU_SYMBOL(irq_regs);
 
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
-       printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
-
-#ifdef CONFIG_X86_LOCAL_APIC
-       /*
-        * Currently unexpected vectors happen only on SMP and APIC.
-        * We _must_ ack these because every local APIC has only N
-        * irq slots per priority level, and a 'hanging, unacked' IRQ
-        * holds up an irq slot - in excessive cases (when multiple
-        * unexpected vectors occur) that might lock up the APIC
-        * completely.
-        * But only ack when the APIC is enabled -AK
-        */
-       if (cpu_has_apic)
-               ack_APIC_irq();
-#endif
-}
-
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 /* Debugging check for stack overflow: is there less than 1KB free? */
 static int check_stack_overflow(void)
@@ -223,20 +200,25 @@ unsigned int do_IRQ(struct pt_regs *regs)
 {
        struct pt_regs *old_regs;
        /* high bit used in ret_from_ code */
-       int overflow, irq = ~regs->orig_ax;
-       struct irq_desc *desc = irq_desc + irq;
+       int overflow;
+       unsigned vector = ~regs->orig_ax;
+       struct irq_desc *desc;
+       unsigned irq;
 
-       if (unlikely((unsigned)irq >= NR_IRQS)) {
-               printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
-                                       __func__, irq);
-               BUG();
-       }
 
        old_regs = set_irq_regs(regs);
        irq_enter();
+       irq = __get_cpu_var(vector_irq)[vector];
 
        overflow = check_stack_overflow();
 
+       desc = irq_to_desc(irq);
+       if (unlikely(!desc)) {
+               printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n",
+                                       __func__, irq, vector, smp_processor_id());
+               BUG();
+       }
+
        if (!execute_on_irq_stack(overflow, desc, irq)) {
                if (unlikely(overflow))
                        print_stack_overflow();
@@ -248,146 +230,6 @@ unsigned int do_IRQ(struct pt_regs *regs)
        return 1;
 }
 
-/*
- * Interrupt statistics:
- */
-
-atomic_t irq_err_count;
-
-/*
- * /proc/interrupts printing:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
-       int i = *(loff_t *) v, j;
-       struct irqaction * action;
-       unsigned long flags;
-
-       if (i == 0) {
-               seq_printf(p, "           ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "CPU%-8d",j);
-               seq_putc(p, '\n');
-       }
-
-       if (i < NR_IRQS) {
-               unsigned any_count = 0;
-
-               spin_lock_irqsave(&irq_desc[i].lock, flags);
-#ifndef CONFIG_SMP
-               any_count = kstat_irqs(i);
-#else
-               for_each_online_cpu(j)
-                       any_count |= kstat_cpu(j).irqs[i];
-#endif
-               action = irq_desc[i].action;
-               if (!action && !any_count)
-                       goto skip;
-               seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
-               seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-#endif
-               seq_printf(p, " %8s", irq_desc[i].chip->name);
-               seq_printf(p, "-%-8s", irq_desc[i].name);
-
-               if (action) {
-                       seq_printf(p, "  %s", action->name);
-                       while ((action = action->next) != NULL)
-                               seq_printf(p, ", %s", action->name);
-               }
-
-               seq_putc(p, '\n');
-skip:
-               spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-       } else if (i == NR_IRQS) {
-               seq_printf(p, "NMI: ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ", nmi_count(j));
-               seq_printf(p, "  Non-maskable interrupts\n");
-#ifdef CONFIG_X86_LOCAL_APIC
-               seq_printf(p, "LOC: ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ",
-                               per_cpu(irq_stat,j).apic_timer_irqs);
-               seq_printf(p, "  Local timer interrupts\n");
-#endif
-#ifdef CONFIG_SMP
-               seq_printf(p, "RES: ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ",
-                               per_cpu(irq_stat,j).irq_resched_count);
-               seq_printf(p, "  Rescheduling interrupts\n");
-               seq_printf(p, "CAL: ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ",
-                               per_cpu(irq_stat,j).irq_call_count);
-               seq_printf(p, "  Function call interrupts\n");
-               seq_printf(p, "TLB: ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ",
-                               per_cpu(irq_stat,j).irq_tlb_count);
-               seq_printf(p, "  TLB shootdowns\n");
-#endif
-#ifdef CONFIG_X86_MCE
-               seq_printf(p, "TRM: ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ",
-                               per_cpu(irq_stat,j).irq_thermal_count);
-               seq_printf(p, "  Thermal event interrupts\n");
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
-               seq_printf(p, "SPU: ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ",
-                               per_cpu(irq_stat,j).irq_spurious_count);
-               seq_printf(p, "  Spurious interrupts\n");
-#endif
-               seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-#if defined(CONFIG_X86_IO_APIC)
-               seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
-#endif
-       }
-       return 0;
-}
-
-/*
- * /proc/stat helpers
- */
-u64 arch_irq_stat_cpu(unsigned int cpu)
-{
-       u64 sum = nmi_count(cpu);
-
-#ifdef CONFIG_X86_LOCAL_APIC
-       sum += per_cpu(irq_stat, cpu).apic_timer_irqs;
-#endif
-#ifdef CONFIG_SMP
-       sum += per_cpu(irq_stat, cpu).irq_resched_count;
-       sum += per_cpu(irq_stat, cpu).irq_call_count;
-       sum += per_cpu(irq_stat, cpu).irq_tlb_count;
-#endif
-#ifdef CONFIG_X86_MCE
-       sum += per_cpu(irq_stat, cpu).irq_thermal_count;
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
-       sum += per_cpu(irq_stat, cpu).irq_spurious_count;
-#endif
-       return sum;
-}
-
-u64 arch_irq_stat(void)
-{
-       u64 sum = atomic_read(&irq_err_count);
-
-#ifdef CONFIG_X86_IO_APIC
-       sum += atomic_read(&irq_mis_count);
-#endif
-       return sum;
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 #include <mach_apic.h>
 
@@ -395,20 +237,22 @@ void fixup_irqs(cpumask_t map)
 {
        unsigned int irq;
        static int warned;
+       struct irq_desc *desc;
 
-       for (irq = 0; irq < NR_IRQS; irq++) {
+       for_each_irq_desc(irq, desc) {
                cpumask_t mask;
+
                if (irq == 2)
                        continue;
 
-               cpus_and(mask, irq_desc[irq].affinity, map);
+               cpus_and(mask, desc->affinity, map);
                if (any_online_cpu(mask) == NR_CPUS) {
                        printk("Breaking affinity for irq %i\n", irq);
                        mask = map;
                }
-               if (irq_desc[irq].chip->set_affinity)
-                       irq_desc[irq].chip->set_affinity(irq, mask);
-               else if (irq_desc[irq].action && !(warned++))
+               if (desc->chip->set_affinity)
+                       desc->chip->set_affinity(irq, mask);
+               else if (desc->action && !(warned++))
                        printk("Cannot set affinity for irq %i\n", irq);
        }
 
index f065fe9071b9f65dcaf1d519c0063065c43cce62..60eb84eb77a0a34232be8aafbe8a7d1e040f454f 100644 (file)
 #include <asm/idle.h>
 #include <asm/smp.h>
 
-atomic_t irq_err_count;
-
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
-       printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq);
-       /*
-        * Currently unexpected vectors happen only on SMP and APIC.
-        * We _must_ ack these because every local APIC has only N
-        * irq slots per priority level, and a 'hanging, unacked' IRQ
-        * holds up an irq slot - in excessive cases (when multiple
-        * unexpected vectors occur) that might lock up the APIC
-        * completely.
-        * But don't ack when the APIC is disabled. -AK
-        */
-       if (!disable_apic)
-               ack_APIC_irq();
-}
-
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 /*
  * Probabilistic stack overflow check:
@@ -64,122 +42,6 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 }
 #endif
 
-/*
- * Generic, controller-independent functions:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
-       int i = *(loff_t *) v, j;
-       struct irqaction * action;
-       unsigned long flags;
-
-       if (i == 0) {
-               seq_printf(p, "           ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "CPU%-8d",j);
-               seq_putc(p, '\n');
-       }
-
-       if (i < NR_IRQS) {
-               unsigned any_count = 0;
-
-               spin_lock_irqsave(&irq_desc[i].lock, flags);
-#ifndef CONFIG_SMP
-               any_count = kstat_irqs(i);
-#else
-               for_each_online_cpu(j)
-                       any_count |= kstat_cpu(j).irqs[i];
-#endif
-               action = irq_desc[i].action;
-               if (!action && !any_count)
-                       goto skip;
-               seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
-               seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-#endif
-               seq_printf(p, " %8s", irq_desc[i].chip->name);
-               seq_printf(p, "-%-8s", irq_desc[i].name);
-
-               if (action) {
-                       seq_printf(p, "  %s", action->name);
-                       while ((action = action->next) != NULL)
-                               seq_printf(p, ", %s", action->name);
-               }
-               seq_putc(p, '\n');
-skip:
-               spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-       } else if (i == NR_IRQS) {
-               seq_printf(p, "NMI: ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
-               seq_printf(p, "  Non-maskable interrupts\n");
-               seq_printf(p, "LOC: ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
-               seq_printf(p, "  Local timer interrupts\n");
-#ifdef CONFIG_SMP
-               seq_printf(p, "RES: ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count);
-               seq_printf(p, "  Rescheduling interrupts\n");
-               seq_printf(p, "CAL: ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
-               seq_printf(p, "  Function call interrupts\n");
-               seq_printf(p, "TLB: ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
-               seq_printf(p, "  TLB shootdowns\n");
-#endif
-#ifdef CONFIG_X86_MCE
-               seq_printf(p, "TRM: ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
-               seq_printf(p, "  Thermal event interrupts\n");
-               seq_printf(p, "THR: ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
-               seq_printf(p, "  Threshold APIC interrupts\n");
-#endif
-               seq_printf(p, "SPU: ");
-               for_each_online_cpu(j)
-                       seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
-               seq_printf(p, "  Spurious interrupts\n");
-               seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-       }
-       return 0;
-}
-
-/*
- * /proc/stat helpers
- */
-u64 arch_irq_stat_cpu(unsigned int cpu)
-{
-       u64 sum = cpu_pda(cpu)->__nmi_count;
-
-       sum += cpu_pda(cpu)->apic_timer_irqs;
-#ifdef CONFIG_SMP
-       sum += cpu_pda(cpu)->irq_resched_count;
-       sum += cpu_pda(cpu)->irq_call_count;
-       sum += cpu_pda(cpu)->irq_tlb_count;
-#endif
-#ifdef CONFIG_X86_MCE
-       sum += cpu_pda(cpu)->irq_thermal_count;
-       sum += cpu_pda(cpu)->irq_threshold_count;
-#endif
-       sum += cpu_pda(cpu)->irq_spurious_count;
-       return sum;
-}
-
-u64 arch_irq_stat(void)
-{
-       return atomic_read(&irq_err_count);
-}
-
 /*
  * do_IRQ handles all normal device IRQ's (the special
  * SMP cross-CPU interrupts have their own specific
@@ -188,6 +50,7 @@ u64 arch_irq_stat(void)
 asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 {
        struct pt_regs *old_regs = set_irq_regs(regs);
+       struct irq_desc *desc;
 
        /* high bit used in ret_from_ code  */
        unsigned vector = ~regs->orig_ax;
@@ -201,8 +64,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
        stack_overflow_check(regs);
 #endif
 
-       if (likely(irq < NR_IRQS))
-               generic_handle_irq(irq);
+       desc = irq_to_desc(irq);
+       if (likely(desc))
+               generic_handle_irq_desc(irq, desc);
        else {
                if (!disable_apic)
                        ack_APIC_irq();
@@ -223,8 +87,9 @@ void fixup_irqs(cpumask_t map)
 {
        unsigned int irq;
        static int warned;
+       struct irq_desc *desc;
 
-       for (irq = 0; irq < NR_IRQS; irq++) {
+       for_each_irq_desc(irq, desc) {
                cpumask_t mask;
                int break_affinity = 0;
                int set_affinity = 1;
@@ -233,32 +98,32 @@ void fixup_irqs(cpumask_t map)
                        continue;
 
                /* interrupt's are disabled at this point */
-               spin_lock(&irq_desc[irq].lock);
+               spin_lock(&desc->lock);
 
                if (!irq_has_action(irq) ||
-                   cpus_equal(irq_desc[irq].affinity, map)) {
-                       spin_unlock(&irq_desc[irq].lock);
+                   cpus_equal(desc->affinity, map)) {
+                       spin_unlock(&desc->lock);
                        continue;
                }
 
-               cpus_and(mask, irq_desc[irq].affinity, map);
+               cpus_and(mask, desc->affinity, map);
                if (cpus_empty(mask)) {
                        break_affinity = 1;
                        mask = map;
                }
 
-               if (irq_desc[irq].chip->mask)
-                       irq_desc[irq].chip->mask(irq);
+               if (desc->chip->mask)
+                       desc->chip->mask(irq);
 
-               if (irq_desc[irq].chip->set_affinity)
-                       irq_desc[irq].chip->set_affinity(irq, mask);
+               if (desc->chip->set_affinity)
+                       desc->chip->set_affinity(irq, mask);
                else if (!(warned++))
                        set_affinity = 0;
 
-               if (irq_desc[irq].chip->unmask)
-                       irq_desc[irq].chip->unmask(irq);
+               if (desc->chip->unmask)
+                       desc->chip->unmask(irq);
 
-               spin_unlock(&irq_desc[irq].lock);
+               spin_unlock(&desc->lock);
 
                if (break_affinity && set_affinity)
                        printk("Broke affinity for irq %i\n", irq);
index 9200a1e2752dd04846cff1205d4731a8965d4b78..845aa9803e804edd7cdc6ef5bb61e2db72f1a911 100644 (file)
@@ -69,6 +69,13 @@ void __init init_ISA_irqs (void)
         * 16 old-style INTA-cycle interrupts:
         */
        for (i = 0; i < 16; i++) {
+               /* first time call this irq_desc */
+               struct irq_desc *desc = irq_to_desc(i);
+
+               desc->status = IRQ_DISABLED;
+               desc->action = NULL;
+               desc->depth = 1;
+
                set_irq_chip_and_handler_name(i, &i8259A_chip,
                                              handle_level_irq, "XT");
        }
@@ -83,6 +90,27 @@ static struct irqaction irq2 = {
        .name = "cascade",
 };
 
+DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
+       [0 ... IRQ0_VECTOR - 1] = -1,
+       [IRQ0_VECTOR] = 0,
+       [IRQ1_VECTOR] = 1,
+       [IRQ2_VECTOR] = 2,
+       [IRQ3_VECTOR] = 3,
+       [IRQ4_VECTOR] = 4,
+       [IRQ5_VECTOR] = 5,
+       [IRQ6_VECTOR] = 6,
+       [IRQ7_VECTOR] = 7,
+       [IRQ8_VECTOR] = 8,
+       [IRQ9_VECTOR] = 9,
+       [IRQ10_VECTOR] = 10,
+       [IRQ11_VECTOR] = 11,
+       [IRQ12_VECTOR] = 12,
+       [IRQ13_VECTOR] = 13,
+       [IRQ14_VECTOR] = 14,
+       [IRQ15_VECTOR] = 15,
+       [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
+};
+
 /* Overridden in paravirt.c */
 void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 
@@ -98,22 +126,14 @@ void __init native_init_IRQ(void)
         * us. (some of these will be overridden and become
         * 'special' SMP interrupts)
         */
-       for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
-               int vector = FIRST_EXTERNAL_VECTOR + i;
-               if (i >= NR_IRQS)
-                       break;
+       for (i =  FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
                /* SYSCALL_VECTOR was reserved in trap_init. */
-               if (!test_bit(vector, used_vectors))
-                       set_intr_gate(vector, interrupt[i]);
+               if (i != SYSCALL_VECTOR)
+                       set_intr_gate(i, interrupt[i]);
        }
 
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
-       /*
-        * IRQ0 must be given a fixed assignment and initialized,
-        * because it's used before the IO-APIC is set up.
-        */
-       set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
 
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
        /*
         * The reschedule interrupt is a CPU-to-CPU reschedule-helper
         * IPI, driven by wakeup.
@@ -128,6 +148,9 @@ void __init native_init_IRQ(void)
 
        /* IPI for single call function */
        set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
+
+       /* Low priority IPI to cleanup after moving an irq */
+       set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
index 5b5be9d43c2a865fdcbe1ac0f689e750f5e6f5de..ff0235391285a6dc388e1d175f04a6425972d90f 100644 (file)
@@ -142,23 +142,19 @@ void __init init_ISA_irqs(void)
        init_bsp_APIC();
        init_8259A(0);
 
-       for (i = 0; i < NR_IRQS; i++) {
-               irq_desc[i].status = IRQ_DISABLED;
-               irq_desc[i].action = NULL;
-               irq_desc[i].depth = 1;
-
-               if (i < 16) {
-                       /*
-                        * 16 old-style INTA-cycle interrupts:
-                        */
-                       set_irq_chip_and_handler_name(i, &i8259A_chip,
+       for (i = 0; i < 16; i++) {
+               /* first time call this irq_desc */
+               struct irq_desc *desc = irq_to_desc(i);
+
+               desc->status = IRQ_DISABLED;
+               desc->action = NULL;
+               desc->depth = 1;
+
+               /*
+                * 16 old-style INTA-cycle interrupts:
+                */
+               set_irq_chip_and_handler_name(i, &i8259A_chip,
                                                      handle_level_irq, "XT");
-               } else {
-                       /*
-                        * 'high' PCI IRQs filled in on demand
-                        */
-                       irq_desc[i].chip = &no_irq_chip;
-               }
        }
 }
 
index f6a11b9b1f9887f8979e67c982c5c5a39b3e1915..67465ed8931088b52d9b521cb3a116511cef9e42 100644 (file)
@@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
        if (!(word & (1 << 13))) {
                dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
                        "disabling irq balancing and affinity\n");
-#ifdef CONFIG_IRQBALANCE
-               irqbalance_disable("");
-#endif
                noirqdebug_setup("");
 #ifdef CONFIG_PROC_FS
                no_irq_affinity = 1;
index b2c97874ec0f95f7cfbea51b7b7695a2939c1b01..0fa6790c1dd37d76e257de661ba3ed9312de89e0 100644 (file)
@@ -1073,6 +1073,7 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
        prefill_possible_map();
+
 #ifdef CONFIG_X86_64
        init_cpu_to_node();
 #endif
@@ -1080,6 +1081,9 @@ void __init setup_arch(char **cmdline_p)
        init_apic_mappings();
        ioapic_init_mappings();
 
+       /* need to wait for io_apic is mapped */
+       nr_irqs = probe_nr_irqs();
+
        kvm_guest_init();
 
        e820_reserve_resources();
index 0e67f72d931683413160a007d554c44a438c2c28..410c88f0bfeba4c544b3f49e5db459ceb3911515 100644 (file)
@@ -140,25 +140,30 @@ static void __init setup_cpu_pda_map(void)
  */
 void __init setup_per_cpu_areas(void)
 {
-       ssize_t size = PERCPU_ENOUGH_ROOM;
+       ssize_t size, old_size;
        char *ptr;
        int cpu;
+       unsigned long align = 1;
 
        /* Setup cpu_pda map */
        setup_cpu_pda_map();
 
        /* Copy section for each CPU (we discard the original) */
-       size = PERCPU_ENOUGH_ROOM;
+       old_size = PERCPU_ENOUGH_ROOM;
+       align = max_t(unsigned long, PAGE_SIZE, align);
+       size = roundup(old_size, align);
        printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
                          size);
 
        for_each_possible_cpu(cpu) {
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-               ptr = alloc_bootmem_pages(size);
+               ptr = __alloc_bootmem(size, align,
+                                __pa(MAX_DMA_ADDRESS));
 #else
                int node = early_cpu_to_node(cpu);
                if (!node_online(node) || !NODE_DATA(node)) {
-                       ptr = alloc_bootmem_pages(size);
+                       ptr = __alloc_bootmem(size, align,
+                                        __pa(MAX_DMA_ADDRESS));
                        printk(KERN_INFO
                               "cpu %d has no node %d or node-local memory\n",
                                cpu, node);
@@ -167,7 +172,8 @@ void __init setup_per_cpu_areas(void)
                                         cpu, __pa(ptr));
                }
                else {
-                       ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
+                       ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
+                                                       __pa(MAX_DMA_ADDRESS));
                        if (ptr)
                                printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
                                         cpu, node, __pa(ptr));
@@ -175,7 +181,6 @@ void __init setup_per_cpu_areas(void)
 #endif
                per_cpu_offset(cpu) = ptr - __per_cpu_start;
                memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
-
        }
 
        printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
index 7ed9e070a6e930d97e2d55c8750a50f57a9baef1..7ece815ea637497355a63ba90b218ef5536d57a0 100644 (file)
@@ -543,10 +543,10 @@ static inline void __inquire_remote_apic(int apicid)
        int timeout;
        u32 status;
 
-       printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
+       printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid);
 
        for (i = 0; i < ARRAY_SIZE(regs); i++) {
-               printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]);
+               printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]);
 
                /*
                 * Wait for idle.
@@ -874,7 +874,7 @@ do_rest:
        start_ip = setup_trampoline();
 
        /* So we see what's up   */
-       printk(KERN_INFO "Booting processor %d/%d ip %lx\n",
+       printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n",
                          cpu, apicid, start_ip);
 
        /*
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
new file mode 100644 (file)
index 0000000..aeef529
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV IRQ functions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/irq.h>
+
+#include <asm/apic.h>
+#include <asm/uv/uv_irq.h>
+
+static void uv_noop(unsigned int irq)
+{
+}
+
+static unsigned int uv_noop_ret(unsigned int irq)
+{
+       return 0;
+}
+
+static void uv_ack_apic(unsigned int irq)
+{
+       ack_APIC_irq();
+}
+
+struct irq_chip uv_irq_chip = {
+       .name           = "UV-CORE",
+       .startup        = uv_noop_ret,
+       .shutdown       = uv_noop,
+       .enable         = uv_noop,
+       .disable        = uv_noop,
+       .ack            = uv_noop,
+       .mask           = uv_noop,
+       .unmask         = uv_noop,
+       .eoi            = uv_ack_apic,
+       .end            = uv_noop,
+};
+
+/*
+ * Set up a mapping of an available irq and vector, and enable the specified
+ * MMR that defines the MSI that is to be sent to the specified CPU when an
+ * interrupt is raised.
+ */
+int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
+                unsigned long mmr_offset)
+{
+       int irq;
+       int ret;
+
+       irq = create_irq();
+       if (irq <= 0)
+               return -EBUSY;
+
+       ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
+       if (ret != irq)
+               destroy_irq(irq);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(uv_setup_irq);
+
+/*
+ * Tear down a mapping of an irq and vector, and disable the specified MMR that
+ * defined the MSI that was to be sent to the specified CPU when an interrupt
+ * was raised.
+ *
+ * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
+ */
+void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
+{
+       arch_disable_uv_irq(mmr_blade, mmr_offset);
+       destroy_irq(irq);
+}
+EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
new file mode 100644 (file)
index 0000000..67f9b9d
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *  Copyright (c) Russ Anderson
+ */
+
+#include <linux/sysdev.h>
+#include <asm/uv/bios.h>
+
+struct kobject *sgi_uv_kobj;
+
+static ssize_t partition_id_show(struct kobject *kobj,
+                       struct kobj_attribute *attr, char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id);
+}
+
+static ssize_t coherence_id_show(struct kobject *kobj,
+                       struct kobj_attribute *attr, char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id());
+}
+
+static struct kobj_attribute partition_id_attr =
+       __ATTR(partition_id, S_IRUGO, partition_id_show, NULL);
+
+static struct kobj_attribute coherence_id_attr =
+       __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL);
+
+
+static int __init sgi_uv_sysfs_init(void)
+{
+       unsigned long ret;
+
+       if (!sgi_uv_kobj)
+               sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
+       if (!sgi_uv_kobj) {
+               printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n");
+               return -EINVAL;
+       }
+
+       ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr);
+       if (ret) {
+               printk(KERN_WARNING "sysfs_create_file partition_id failed \n");
+               return ret;
+       }
+
+       ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr);
+       if (ret) {
+               printk(KERN_WARNING "sysfs_create_file coherence_id failed \n");
+               return ret;
+       }
+
+       return 0;
+}
+
+device_initcall(sgi_uv_sysfs_init);
index 61a97e616f7034e252fe1e23e8e1fc847fd476f3..0c9667f0752ad7c8b98a1b762d7c6e10dcd07b11 100644 (file)
@@ -484,10 +484,11 @@ static void disable_cobalt_irq(unsigned int irq)
 static unsigned int startup_cobalt_irq(unsigned int irq)
 {
        unsigned long flags;
+       struct irq_desc *desc = irq_to_desc(irq);
 
        spin_lock_irqsave(&cobalt_lock, flags);
-       if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
-               irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
+       if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
+               desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
        enable_cobalt_irq(irq);
        spin_unlock_irqrestore(&cobalt_lock, flags);
        return 0;
@@ -506,9 +507,10 @@ static void ack_cobalt_irq(unsigned int irq)
 static void end_cobalt_irq(unsigned int irq)
 {
        unsigned long flags;
+       struct irq_desc *desc = irq_to_desc(irq);
 
        spin_lock_irqsave(&cobalt_lock, flags);
-       if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
+       if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS)))
                enable_cobalt_irq(irq);
        spin_unlock_irqrestore(&cobalt_lock, flags);
 }
@@ -626,12 +628,12 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
 
        spin_unlock_irqrestore(&i8259A_lock, flags);
 
-       desc = irq_desc + realirq;
+       desc = irq_to_desc(realirq);
 
        /*
         * handle this 'virtual interrupt' as a Cobalt one now.
         */
-       kstat_cpu(smp_processor_id()).irqs[realirq]++;
+       kstat_incr_irqs_this_cpu(realirq, desc);
 
        if (likely(desc->action != NULL))
                handle_IRQ_event(realirq, desc->action);
@@ -662,27 +664,29 @@ void init_VISWS_APIC_irqs(void)
        int i;
 
        for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
-               irq_desc[i].status = IRQ_DISABLED;
-               irq_desc[i].action = 0;
-               irq_desc[i].depth = 1;
+               struct irq_desc *desc = irq_to_desc(i);
+
+               desc->status = IRQ_DISABLED;
+               desc->action = 0;
+               desc->depth = 1;
 
                if (i == 0) {
-                       irq_desc[i].chip = &cobalt_irq_type;
+                       desc->chip = &cobalt_irq_type;
                }
                else if (i == CO_IRQ_IDE0) {
-                       irq_desc[i].chip = &cobalt_irq_type;
+                       desc->chip = &cobalt_irq_type;
                }
                else if (i == CO_IRQ_IDE1) {
-                       irq_desc[i].chip = &cobalt_irq_type;
+                       desc->chip = &cobalt_irq_type;
                }
                else if (i == CO_IRQ_8259) {
-                       irq_desc[i].chip = &piix4_master_irq_type;
+                       desc->chip = &piix4_master_irq_type;
                }
                else if (i < CO_IRQ_APIC0) {
-                       irq_desc[i].chip = &piix4_virtual_irq_type;
+                       desc->chip = &piix4_virtual_irq_type;
                }
                else if (IS_CO_APIC(i)) {
-                       irq_desc[i].chip = &cobalt_irq_type;
+                       desc->chip = &cobalt_irq_type;
                }
        }
 
index 6953859fe28965bc77a7db5955a0b3a0d143127e..254ee07f8635007262591bbdf595f819caa1358d 100644 (file)
@@ -235,11 +235,14 @@ static void __devinit vmi_time_init_clockevent(void)
 
 void __init vmi_time_init(void)
 {
+       unsigned int cpu;
        /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */
        outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
 
        vmi_time_init_clockevent();
        setup_irq(0, &vmi_clock_action);
+       for_each_possible_cpu(cpu)
+               per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0;
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
index 65f0b8a47bed0dd2ccdd31e507cdfd70846c4d48..48ee4f9435f418c45439ab02a1926a1c054cb071 100644 (file)
@@ -582,7 +582,7 @@ static void __init lguest_init_IRQ(void)
        for (i = 0; i < LGUEST_IRQS; i++) {
                int vector = FIRST_EXTERNAL_VECTOR + i;
                if (vector != SYSCALL_VECTOR) {
-                       set_intr_gate(vector, interrupt[i]);
+                       set_intr_gate(vector, interrupt[vector]);
                        set_irq_chip_and_handler_name(i, &lguest_irq_controller,
                                                      handle_level_irq,
                                                      "level");
index df37fc9d6a2612d0ddf1105b8510a320b3445923..3c3b471ea496225e5a53029b92cfd5eeee658e43 100644 (file)
@@ -41,6 +41,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
         { }
 };
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+        return cpumask_of_cpu(cpu);
+}
 
 static int probe_bigsmp(void)
 {
index 6513d41ea21eb54d84aa3c7720afaf7671f27d8e..28459cab3ddb5fdae0d560e5f766b141412e00bb 100644 (file)
@@ -75,4 +75,18 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 }
 #endif
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+       /* Careful. Some cpus do not strictly honor the set of cpus
+        * specified in the interrupt destination when using lowest
+        * priority interrupt delivery mode.
+        *
+        * In particular there was a hyperthreading cpu observed to
+        * deliver interrupts to the wrong hyperthread when only one
+        * hyperthread was specified in the interrupt desitination.
+        */
+       cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+       return domain;
+}
+
 struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
index 8cf58394975e199ac7aaa0f70c2bc0de6547ccf2..71a309b122e672ef948fbe125f3637588ecf1149 100644 (file)
@@ -38,4 +38,18 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
        return 0;
 }
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+       /* Careful. Some cpus do not strictly honor the set of cpus
+        * specified in the interrupt destination when using lowest
+        * priority interrupt delivery mode.
+        *
+        * In particular there was a hyperthreading cpu observed to
+        * deliver interrupts to the wrong hyperthread when only one
+        * hyperthread was specified in the interrupt desitination.
+        */
+       cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+       return domain;
+}
+
 struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
index 6ad6b67a723de01768c0373cac3023a43cd289b3..6272b5e69da62b28f660105d9a8788e4944432b3 100644 (file)
@@ -23,4 +23,18 @@ static int probe_summit(void)
        return 0;
 }
 
+static cpumask_t vector_allocation_domain(int cpu)
+{
+       /* Careful. Some cpus do not strictly honor the set of cpus
+        * specified in the interrupt destination when using lowest
+        * priority interrupt delivery mode.
+        *
+        * In particular there was a hyperthreading cpu observed to
+        * deliver interrupts to the wrong hyperthread when only one
+        * hyperthread was specified in the interrupt desitination.
+        */
+       cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+       return domain;
+}
+
 struct genapic apic_summit = APIC_INIT("summit", probe_summit);
index 199a5f4a873c76b33728fbfaaf186a0fb6404530..0f6e8a6523ae1dfb6aed3031bbce9ae1341b6adc 100644 (file)
@@ -1483,7 +1483,7 @@ static void disable_local_vic_irq(unsigned int irq)
  * the interrupt off to another CPU */
 static void before_handle_vic_irq(unsigned int irq)
 {
-       irq_desc_t *desc = irq_desc + irq;
+       irq_desc_t *desc = irq_to_desc(irq);
        __u8 cpu = smp_processor_id();
 
        _raw_spin_lock(&vic_irq_lock);
@@ -1518,7 +1518,7 @@ static void before_handle_vic_irq(unsigned int irq)
 /* Finish the VIC interrupt: basically mask */
 static void after_handle_vic_irq(unsigned int irq)
 {
-       irq_desc_t *desc = irq_desc + irq;
+       irq_desc_t *desc = irq_to_desc(irq);
 
        _raw_spin_lock(&vic_irq_lock);
        {
index 635b50e85581de29bc4a9d32ffbd82da6612838c..2c4baa88f2cb7a377e456a147e3c139dbe2f3dff 100644 (file)
@@ -56,13 +56,6 @@ struct remap_trace {
 static DEFINE_PER_CPU(struct trap_reason, pf_reason);
 static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace);
 
-#if 0 /* XXX: no way gather this info anymore */
-/* Access to this is not per-cpu. */
-static DEFINE_PER_CPU(atomic_t, dropped);
-#endif
-
-static struct dentry *marker_file;
-
 static DEFINE_MUTEX(mmiotrace_mutex);
 static DEFINE_SPINLOCK(trace_lock);
 static atomic_t mmiotrace_enabled;
@@ -75,7 +68,7 @@ static LIST_HEAD(trace_list);         /* struct remap_trace */
  *   and trace_lock.
  * - Routines depending on is_enabled() must take trace_lock.
  * - trace_list users must hold trace_lock.
- * - is_enabled() guarantees that mmio_trace_record is allowed.
+ * - is_enabled() guarantees that mmio_trace_{rw,mapping} are allowed.
  * - pre/post callbacks assume the effect of is_enabled() being true.
  */
 
@@ -97,44 +90,6 @@ static bool is_enabled(void)
        return atomic_read(&mmiotrace_enabled);
 }
 
-#if 0 /* XXX: needs rewrite */
-/*
- * Write callback for the debugfs entry:
- * Read a marker and write it to the mmio trace log
- */
-static ssize_t write_marker(struct file *file, const char __user *buffer,
-                                               size_t count, loff_t *ppos)
-{
-       char *event = NULL;
-       struct mm_io_header *headp;
-       ssize_t len = (count > 65535) ? 65535 : count;
-
-       event = kzalloc(sizeof(*headp) + len, GFP_KERNEL);
-       if (!event)
-               return -ENOMEM;
-
-       headp = (struct mm_io_header *)event;
-       headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT);
-       headp->data_len = len;
-
-       if (copy_from_user(event + sizeof(*headp), buffer, len)) {
-               kfree(event);
-               return -EFAULT;
-       }
-
-       spin_lock_irq(&trace_lock);
-#if 0 /* XXX: convert this to use tracing */
-       if (is_enabled())
-               relay_write(chan, event, sizeof(*headp) + len);
-       else
-#endif
-               len = -EINVAL;
-       spin_unlock_irq(&trace_lock);
-       kfree(event);
-       return len;
-}
-#endif
-
 static void print_pte(unsigned long address)
 {
        unsigned int level;
@@ -307,8 +262,10 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size,
        map.map_id = trace->id;
 
        spin_lock_irq(&trace_lock);
-       if (!is_enabled())
+       if (!is_enabled()) {
+               kfree(trace);
                goto not_enabled;
+       }
 
        mmio_trace_mapping(&map);
        list_add_tail(&trace->list, &trace_list);
@@ -377,6 +334,23 @@ void mmiotrace_iounmap(volatile void __iomem *addr)
                iounmap_trace_core(addr);
 }
 
+int mmiotrace_printk(const char *fmt, ...)
+{
+       int ret = 0;
+       va_list args;
+       unsigned long flags;
+       va_start(args, fmt);
+
+       spin_lock_irqsave(&trace_lock, flags);
+       if (is_enabled())
+               ret = mmio_trace_printk(fmt, args);
+       spin_unlock_irqrestore(&trace_lock, flags);
+
+       va_end(args);
+       return ret;
+}
+EXPORT_SYMBOL(mmiotrace_printk);
+
 static void clear_trace_list(void)
 {
        struct remap_trace *trace;
@@ -462,26 +436,12 @@ static void leave_uniprocessor(void)
 }
 #endif
 
-#if 0 /* XXX: out of order */
-static struct file_operations fops_marker = {
-       .owner =        THIS_MODULE,
-       .write =        write_marker
-};
-#endif
-
 void enable_mmiotrace(void)
 {
        mutex_lock(&mmiotrace_mutex);
        if (is_enabled())
                goto out;
 
-#if 0 /* XXX: tracing does not support text entries */
-       marker_file = debugfs_create_file("marker", 0660, dir, NULL,
-                                                               &fops_marker);
-       if (!marker_file)
-               pr_err(NAME "marker file creation failed.\n");
-#endif
-
        if (nommiotrace)
                pr_info(NAME "MMIO tracing disabled.\n");
        enter_uniprocessor();
@@ -506,11 +466,6 @@ void disable_mmiotrace(void)
 
        clear_trace_list(); /* guarantees: no more kmmio callbacks */
        leave_uniprocessor();
-       if (marker_file) {
-               debugfs_remove(marker_file);
-               marker_file = NULL;
-       }
-
        pr_info(NAME "disabled.\n");
 out:
        mutex_unlock(&mmiotrace_mutex);
index efa1911e20cad5e708d14cad403617de50d643c7..df3d5c861cdad6e36680613b4db8df5776e6c094 100644 (file)
@@ -79,25 +79,34 @@ static unsigned int mw32[] = { 0xC7 };
 static unsigned int mw64[] = { 0x89, 0x8B };
 #endif /* not __i386__ */
 
-static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged,
-                                                               int *rexr)
+struct prefix_bits {
+       unsigned shorted:1;
+       unsigned enlarged:1;
+       unsigned rexr:1;
+       unsigned rex:1;
+};
+
+static int skip_prefix(unsigned char *addr, struct prefix_bits *prf)
 {
        int i;
        unsigned char *p = addr;
-       *shorted = 0;
-       *enlarged = 0;
-       *rexr = 0;
+       prf->shorted = 0;
+       prf->enlarged = 0;
+       prf->rexr = 0;
+       prf->rex = 0;
 
 restart:
        for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) {
                if (*p == prefix_codes[i]) {
                        if (*p == 0x66)
-                               *shorted = 1;
+                               prf->shorted = 1;
 #ifdef __amd64__
                        if ((*p & 0xf8) == 0x48)
-                               *enlarged = 1;
+                               prf->enlarged = 1;
                        if ((*p & 0xf4) == 0x44)
-                               *rexr = 1;
+                               prf->rexr = 1;
+                       if ((*p & 0xf0) == 0x40)
+                               prf->rex = 1;
 #endif
                        p++;
                        goto restart;
@@ -135,12 +144,12 @@ enum reason_type get_ins_type(unsigned long ins_addr)
 {
        unsigned int opcode;
        unsigned char *p;
-       int shorted, enlarged, rexr;
+       struct prefix_bits prf;
        int i;
        enum reason_type rv = OTHERS;
 
        p = (unsigned char *)ins_addr;
-       p += skip_prefix(p, &shorted, &enlarged, &rexr);
+       p += skip_prefix(p, &prf);
        p += get_opcode(p, &opcode);
 
        CHECK_OP_TYPE(opcode, reg_rop, REG_READ);
@@ -156,10 +165,11 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr)
 {
        unsigned int opcode;
        unsigned char *p;
-       int i, shorted, enlarged, rexr;
+       struct prefix_bits prf;
+       int i;
 
        p = (unsigned char *)ins_addr;
-       p += skip_prefix(p, &shorted, &enlarged, &rexr);
+       p += skip_prefix(p, &prf);
        p += get_opcode(p, &opcode);
 
        for (i = 0; i < ARRAY_SIZE(rw8); i++)
@@ -168,7 +178,7 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr)
 
        for (i = 0; i < ARRAY_SIZE(rw32); i++)
                if (rw32[i] == opcode)
-                       return (shorted ? 2 : (enlarged ? 8 : 4));
+                       return prf.shorted ? 2 : (prf.enlarged ? 8 : 4);
 
        printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
        return 0;
@@ -178,10 +188,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr)
 {
        unsigned int opcode;
        unsigned char *p;
-       int i, shorted, enlarged, rexr;
+       struct prefix_bits prf;
+       int i;
 
        p = (unsigned char *)ins_addr;
-       p += skip_prefix(p, &shorted, &enlarged, &rexr);
+       p += skip_prefix(p, &prf);
        p += get_opcode(p, &opcode);
 
        for (i = 0; i < ARRAY_SIZE(mw8); i++)
@@ -194,11 +205,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr)
 
        for (i = 0; i < ARRAY_SIZE(mw32); i++)
                if (mw32[i] == opcode)
-                       return shorted ? 2 : 4;
+                       return prf.shorted ? 2 : 4;
 
        for (i = 0; i < ARRAY_SIZE(mw64); i++)
                if (mw64[i] == opcode)
-                       return shorted ? 2 : (enlarged ? 8 : 4);
+                       return prf.shorted ? 2 : (prf.enlarged ? 8 : 4);
 
        printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
        return 0;
@@ -238,7 +249,7 @@ enum {
 #endif
 };
 
-static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
+static unsigned char *get_reg_w8(int no, int rex, struct pt_regs *regs)
 {
        unsigned char *rv = NULL;
 
@@ -255,18 +266,6 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
        case arg_DL:
                rv = (unsigned char *)&regs->dx;
                break;
-       case arg_AH:
-               rv = 1 + (unsigned char *)&regs->ax;
-               break;
-       case arg_BH:
-               rv = 1 + (unsigned char *)&regs->bx;
-               break;
-       case arg_CH:
-               rv = 1 + (unsigned char *)&regs->cx;
-               break;
-       case arg_DH:
-               rv = 1 + (unsigned char *)&regs->dx;
-               break;
 #ifdef __amd64__
        case arg_R8:
                rv = (unsigned char *)&regs->r8;
@@ -294,9 +293,55 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
                break;
 #endif
        default:
-               printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
                break;
        }
+
+       if (rv)
+               return rv;
+
+       if (rex) {
+               /*
+                * If REX prefix exists, access low bytes of SI etc.
+                * instead of AH etc.
+                */
+               switch (no) {
+               case arg_SI:
+                       rv = (unsigned char *)&regs->si;
+                       break;
+               case arg_DI:
+                       rv = (unsigned char *)&regs->di;
+                       break;
+               case arg_BP:
+                       rv = (unsigned char *)&regs->bp;
+                       break;
+               case arg_SP:
+                       rv = (unsigned char *)&regs->sp;
+                       break;
+               default:
+                       break;
+               }
+       } else {
+               switch (no) {
+               case arg_AH:
+                       rv = 1 + (unsigned char *)&regs->ax;
+                       break;
+               case arg_BH:
+                       rv = 1 + (unsigned char *)&regs->bx;
+                       break;
+               case arg_CH:
+                       rv = 1 + (unsigned char *)&regs->cx;
+                       break;
+               case arg_DH:
+                       rv = 1 + (unsigned char *)&regs->dx;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       if (!rv)
+               printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
+
        return rv;
 }
 
@@ -368,11 +413,12 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
        unsigned char mod_rm;
        int reg;
        unsigned char *p;
-       int i, shorted, enlarged, rexr;
+       struct prefix_bits prf;
+       int i;
        unsigned long rv;
 
        p = (unsigned char *)ins_addr;
-       p += skip_prefix(p, &shorted, &enlarged, &rexr);
+       p += skip_prefix(p, &prf);
        p += get_opcode(p, &opcode);
        for (i = 0; i < ARRAY_SIZE(reg_rop); i++)
                if (reg_rop[i] == opcode) {
@@ -392,10 +438,10 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
 
 do_work:
        mod_rm = *p;
-       reg = ((mod_rm >> 3) & 0x7) | (rexr << 3);
+       reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
        switch (get_ins_reg_width(ins_addr)) {
        case 1:
-               return *get_reg_w8(reg, regs);
+               return *get_reg_w8(reg, prf.rex, regs);
 
        case 2:
                return *(unsigned short *)get_reg_w32(reg, regs);
@@ -422,11 +468,12 @@ unsigned long get_ins_imm_val(unsigned long ins_addr)
        unsigned char mod_rm;
        unsigned char mod;
        unsigned char *p;
-       int i, shorted, enlarged, rexr;
+       struct prefix_bits prf;
+       int i;
        unsigned long rv;
 
        p = (unsigned char *)ins_addr;
-       p += skip_prefix(p, &shorted, &enlarged, &rexr);
+       p += skip_prefix(p, &prf);
        p += get_opcode(p, &opcode);
        for (i = 0; i < ARRAY_SIZE(imm_wop); i++)
                if (imm_wop[i] == opcode) {
index d877c5b423efbacd13c018ab4f4555f0f2c7cfb2..ab50a8d7402c8c7f4320f0fd803a22a2d9abf84a 100644 (file)
@@ -3,6 +3,7 @@
  */
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/mmiotrace.h>
 
 #define MODULE_NAME "testmmiotrace"
 
@@ -13,6 +14,7 @@ MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
 static void do_write_test(void __iomem *p)
 {
        unsigned int i;
+       mmiotrace_printk("Write test.\n");
        for (i = 0; i < 256; i++)
                iowrite8(i, p + i);
        for (i = 1024; i < (5 * 1024); i += 2)
@@ -24,6 +26,7 @@ static void do_write_test(void __iomem *p)
 static void do_read_test(void __iomem *p)
 {
        unsigned int i;
+       mmiotrace_printk("Read test.\n");
        for (i = 0; i < 256; i++)
                ioread8(p + i);
        for (i = 1024; i < (5 * 1024); i += 2)
@@ -39,6 +42,7 @@ static void do_test(void)
                pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
                return;
        }
+       mmiotrace_printk("ioremap returned %p.\n", p);
        do_write_test(p);
        do_read_test(p);
        iounmap(p);
index 006599db0dc7024a9bc9cde36bab97b2dc114b0f..bf69dbe08bff66b19b8e9a7558d152fd4fce62f7 100644 (file)
@@ -493,7 +493,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq
        if (pirq <= 4)
                irq = read_config_nybble(router, 0x56, pirq - 1);
        dev_info(&dev->dev,
-                "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n",
+                "AMD756: dev [%04x:%04x], router PIRQ %d get IRQ %d\n",
                 dev->vendor, dev->device, pirq, irq);
        return irq;
 }
@@ -501,7 +501,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq
 static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
 {
        dev_info(&dev->dev,
-                "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n",
+                "AMD756: dev [%04x:%04x], router PIRQ %d set IRQ %d\n",
                 dev->vendor, dev->device, pirq, irq);
        if (pirq <= 4)
                write_config_nybble(router, 0x56, pirq - 1, irq);
@@ -590,13 +590,20 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
        case PCI_DEVICE_ID_INTEL_ICH10_1:
        case PCI_DEVICE_ID_INTEL_ICH10_2:
        case PCI_DEVICE_ID_INTEL_ICH10_3:
-       case PCI_DEVICE_ID_INTEL_PCH_0:
-       case PCI_DEVICE_ID_INTEL_PCH_1:
                r->name = "PIIX/ICH";
                r->get = pirq_piix_get;
                r->set = pirq_piix_set;
                return 1;
        }
+
+       if ((device >= PCI_DEVICE_ID_INTEL_PCH_LPC_MIN) && 
+               (device <= PCI_DEVICE_ID_INTEL_PCH_LPC_MAX)) {
+               r->name = "PIIX/ICH";
+               r->get = pirq_piix_get;
+               r->set = pirq_piix_set;
+               return 1;
+       }
+
        return 0;
 }
 
@@ -823,7 +830,7 @@ static void __init pirq_find_router(struct irq_router *r)
        r->get = NULL;
        r->set = NULL;
 
-       DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n",
+       DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n",
            rt->rtr_vendor, rt->rtr_device);
 
        pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn);
@@ -843,7 +850,7 @@ static void __init pirq_find_router(struct irq_router *r)
                        h->probe(r, pirq_router_dev, pirq_router_dev->device))
                        break;
        }
-       dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n",
+       dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n",
                 pirq_router.name,
                 pirq_router_dev->vendor, pirq_router_dev->device);
 
index 28b85ab8422eb17d883ed031da6021f70259ae9d..bb042608c6023fd8214139d64befd8217797ba43 100644 (file)
@@ -21,7 +21,6 @@ void xen_force_evtchn_callback(void)
 
 static void __init __xen_init_IRQ(void)
 {
-#ifdef CONFIG_X86_64
        int i;
 
        /* Create identity vector->irq map */
@@ -31,7 +30,6 @@ static void __init __xen_init_IRQ(void)
                for_each_possible_cpu(cpu)
                        per_cpu(vector_irq, cpu)[i] = i;
        }
-#endif /* CONFIG_X86_64 */
 
        xen_init_IRQ();
 }
index dd71e3a021cd9ba5b5e5433f2031bad45d0ce7a5..5601506f2dd96d49031873216d7e8aa12c689a91 100644 (file)
@@ -241,7 +241,7 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
                ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
        } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
 
-       kstat_this_cpu.irqs[irq]++;
+       kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
 
 out:
        raw_local_irq_restore(flags);
index e8362c1efa309f8e0242a255235650d953aeca51..dcbf1be149f3486e26f38428499d2a07d0ff604e 100644 (file)
@@ -115,34 +115,32 @@ EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
  *     (start) dependent operations on their target channel
  * @tx: transaction with dependencies
  */
-void
-async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
+void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
 {
-       struct dma_async_tx_descriptor *next = tx->next;
+       struct dma_async_tx_descriptor *dep = tx->next;
+       struct dma_async_tx_descriptor *dep_next;
        struct dma_chan *chan;
 
-       if (!next)
+       if (!dep)
                return;
 
-       tx->next = NULL;
-       chan = next->chan;
+       chan = dep->chan;
 
        /* keep submitting up until a channel switch is detected
         * in that case we will be called again as a result of
         * processing the interrupt from async_tx_channel_switch
         */
-       while (next && next->chan == chan) {
-               struct dma_async_tx_descriptor *_next;
-
-               spin_lock_bh(&next->lock);
-               next->parent = NULL;
-               _next = next->next;
-               if (_next && _next->chan == chan)
-                       next->next = NULL;
-               spin_unlock_bh(&next->lock);
-
-               next->tx_submit(next);
-               next = _next;
+       for (; dep; dep = dep_next) {
+               spin_lock_bh(&dep->lock);
+               dep->parent = NULL;
+               dep_next = dep->next;
+               if (dep_next && dep_next->chan == chan)
+                       dep->next = NULL; /* ->next will be submitted */
+               else
+                       dep_next = NULL; /* submit current dep and terminate */
+               spin_unlock_bh(&dep->lock);
+
+               dep->tx_submit(dep);
        }
 
        chan->device->device_issue_pending(chan);
index 31dcd9142d54416f623f5a53e587c2c0ed33fafc..dc8d1a90971f78134015c7dadf811717bac10a16 100644 (file)
@@ -417,6 +417,6 @@ static void __exit agp_ali_cleanup(void)
 module_init(agp_ali_init);
 module_exit(agp_ali_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_LICENSE("GPL and additional rights");
 
index 2812ee2b165a407ff802d737695c4818f8ea917e..52f4361eb6e41fde34a1e6fa53d981725a3485bd 100644 (file)
@@ -772,6 +772,6 @@ module_init(agp_amd64_init);
 module_exit(agp_amd64_cleanup);
 #endif
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>, Andi Kleen");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>, Andi Kleen");
 module_param(agp_try_unsupported, bool, 0);
 MODULE_LICENSE("GPL");
index ae2791b926b9fe705a80e7540c4482675ad25de0..f1537eece07f7977af412097ed564a54ec2f812a 100644 (file)
@@ -561,6 +561,6 @@ static void __exit agp_ati_cleanup(void)
 module_init(agp_ati_init);
 module_exit(agp_ati_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_LICENSE("GPL and additional rights");
 
index 3a3cc03d401ce3d3d1484f681bfaae6ba6d88e28..8c617ad7497fef739a7e5f1e24d0d629bba61455 100644 (file)
@@ -349,7 +349,7 @@ static __init int agp_setup(char *s)
 __setup("agp=", agp_setup);
 #endif
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_DESCRIPTION("AGP GART driver");
 MODULE_LICENSE("GPL and additional rights");
 MODULE_ALIAS_MISCDEV(AGPGART_MINOR);
index 1108665913e207da892459750377400db809c961..9cf6e9bb017e6dca0b537bb04b70e5c49dbf705b 100644 (file)
@@ -2390,5 +2390,5 @@ static void __exit agp_intel_cleanup(void)
 module_init(agp_intel_init);
 module_exit(agp_intel_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
 MODULE_LICENSE("GPL and additional rights");
index 5bbed3d79db95f9d62ec32132817d645a961c7d2..16acee2de11790546599695eeaf83df1338d0501 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Nvidia AGPGART routines.
  * Based upon a 2.4 agpgart diff by the folks from NVIDIA, and hacked up
- * to work in 2.5 by Dave Jones <davej@codemonkey.org.uk>
+ * to work in 2.5 by Dave Jones <davej@redhat.com>
  */
 
 #include <linux/module.h>
index f2492ecf082410da192b140d9a0615624ca79f3b..db60539bf67a1ed81216b35f29819cadfc43a83c 100644 (file)
@@ -20,8 +20,8 @@
 #include <linux/agp_backend.h>
 #include <linux/log2.h>
 
-#include <asm-parisc/parisc-device.h>
-#include <asm-parisc/ropes.h>
+#include <asm/parisc-device.h>
+#include <asm/ropes.h>
 
 #include "agp.h"
 
index 9f4d49e1b59a909249ac6297aacd0a3aebfd3669..d3bd243867fc51248a16d101eeb5bbd9c8385079 100644 (file)
@@ -595,4 +595,4 @@ module_init(agp_via_init);
 module_exit(agp_via_cleanup);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
index f3cfb4c761259d5b8f6d331c24ea1108fde307fc..408f5f92cb4e1f56959fae99936b34d030f67d70 100644 (file)
@@ -219,7 +219,7 @@ static void hpet_timer_set_irq(struct hpet_dev *devp)
        for (irq = find_first_bit(&v, HPET_MAX_IRQ); irq < HPET_MAX_IRQ;
                irq = find_next_bit(&v, HPET_MAX_IRQ, 1 + irq)) {
 
-               if (irq >= NR_IRQS) {
+               if (irq >= nr_irqs) {
                        irq = HPET_MAX_IRQ;
                        break;
                }
index c8752eaad483eea52b4dbc8c8af87eaf931ad189..705a839f1796125b257925dc8c3aac722d992018 100644 (file)
@@ -558,9 +558,26 @@ struct timer_rand_state {
        unsigned dont_count_entropy:1;
 };
 
-static struct timer_rand_state input_timer_state;
 static struct timer_rand_state *irq_timer_state[NR_IRQS];
 
+static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
+{
+       if (irq >= nr_irqs)
+               return NULL;
+
+       return irq_timer_state[irq];
+}
+
+static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
+{
+       if (irq >= nr_irqs)
+               return;
+
+       irq_timer_state[irq] = state;
+}
+
+static struct timer_rand_state input_timer_state;
+
 /*
  * This function adds entropy to the entropy "pool" by using timing
  * delays.  It uses the timer_rand_state structure to make an estimate
@@ -648,11 +665,15 @@ EXPORT_SYMBOL_GPL(add_input_randomness);
 
 void add_interrupt_randomness(int irq)
 {
-       if (irq >= NR_IRQS || irq_timer_state[irq] == NULL)
+       struct timer_rand_state *state;
+
+       state = get_timer_rand_state(irq);
+
+       if (state == NULL)
                return;
 
        DEBUG_ENT("irq event %d\n", irq);
-       add_timer_randomness(irq_timer_state[irq], 0x100 + irq);
+       add_timer_randomness(state, 0x100 + irq);
 }
 
 #ifdef CONFIG_BLOCK
@@ -912,7 +933,12 @@ void rand_initialize_irq(int irq)
 {
        struct timer_rand_state *state;
 
-       if (irq >= NR_IRQS || irq_timer_state[irq])
+       if (irq >= nr_irqs)
+               return;
+
+       state = get_timer_rand_state(irq);
+
+       if (state)
                return;
 
        /*
@@ -921,7 +947,7 @@ void rand_initialize_irq(int irq)
         */
        state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
        if (state)
-               irq_timer_state[irq] = state;
+               set_timer_rand_state(irq, state);
 }
 
 #ifdef CONFIG_BLOCK
index d0c0d64ed366ce9e961c7dbbfca03a9764e29321..ce0d9da52a8ab808a24e8d30bff27d631b474713 100644 (file)
@@ -168,7 +168,7 @@ static void sysrq_handle_show_timers(int key, struct tty_struct *tty)
 static struct sysrq_key_op sysrq_show_timers_op = {
        .handler        = sysrq_handle_show_timers,
        .help_msg       = "show-all-timers(Q)",
-       .action_msg     = "Show pending hrtimers (no others)",
+       .action_msg     = "Show clockevent devices & pending hrtimers (no others)",
 };
 
 static void sysrq_handle_mountro(int key, struct tty_struct *tty)
index ffe9b4e3072e50b02e57c2fa0ac2f58240c85a45..54c837288d19453ff2baf3756d28b8cbc5b1092e 100644 (file)
@@ -641,7 +641,7 @@ static int __devinit giu_probe(struct platform_device *dev)
        }
 
        irq = platform_get_irq(dev, 0);
-       if (irq < 0 || irq >= NR_IRQS)
+       if (irq < 0 || irq >= nr_irqs)
                return -EBUSY;
 
        return cascade_irq(irq, giu_get_irq);
index 71d2ac4e3f46cc0e33410a2d8f7889ac1ae8de09..c20171078d1d6f475f04a23fc0be9e63101d5d76 100644 (file)
@@ -237,9 +237,12 @@ static int __init parse_pmtmr(char *arg)
 
        if (strict_strtoul(arg, 16, &base))
                return -EINVAL;
-
+#ifdef CONFIG_X86_64
+       if (base > UINT_MAX)
+               return -ERANGE;
+#endif
        printk(KERN_INFO "PMTMR IOPort override: 0x%04x -> 0x%04lx\n",
-              (unsigned int)pmtmr_ioport, base);
+              pmtmr_ioport, base);
        pmtmr_ioport = base;
 
        return 1;
index cd303901eb5b20c13b1636ec32614d3ac8800a60..904e57558bb5ea9f34dae3d3df84f5070b36744f 100644 (file)
@@ -48,13 +48,13 @@ config DW_DMAC
          can be integrated in chips such as the Atmel AT32ap7000.
 
 config FSL_DMA
-       bool "Freescale MPC85xx/MPC83xx DMA support"
-       depends on PPC
+       tristate "Freescale Elo and Elo Plus DMA support"
+       depends on FSL_SOC
        select DMA_ENGINE
        ---help---
-         Enable support for the Freescale DMA engine. Now, it support
-         MPC8560/40, MPC8555, MPC8548 and MPC8641 processors.
-         The MPC8349, MPC8360 is also supported.
+         Enable support for the Freescale Elo and Elo Plus DMA controllers.
+         The Elo is the DMA controller on some 82xx and 83xx parts, and the
+         Elo Plus is the DMA controller on 85xx and 86xx parts.
 
 config MV_XOR
        bool "Marvell XOR engine support"
index a08d1970474362d7d8514ca3a74b8cbb2ffe6519..d1e381e35a9e1887e79667d3df734d3dd37ad5b9 100644 (file)
@@ -325,7 +325,12 @@ static enum dma_state_client dmatest_add_channel(struct dma_chan *chan)
        struct dmatest_thread   *thread;
        unsigned int            i;
 
-       dtc = kmalloc(sizeof(struct dmatest_chan), GFP_ATOMIC);
+       /* Have we already been told about this channel? */
+       list_for_each_entry(dtc, &dmatest_channels, node)
+               if (dtc->chan == chan)
+                       return DMA_DUP;
+
+       dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
        if (!dtc) {
                pr_warning("dmatest: No memory for %s\n", chan->dev.bus_id);
                return DMA_NAK;
index c0059ca5834075e70f3fc59512d9ff69617bee29..0b95dcce447e91baef611d769026386ac7c252c3 100644 (file)
@@ -370,7 +370,10 @@ static int fsl_dma_alloc_chan_resources(struct dma_chan *chan,
                                        struct dma_client *client)
 {
        struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
-       LIST_HEAD(tmp_list);
+
+       /* Has this channel already been allocated? */
+       if (fsl_chan->desc_pool)
+               return 1;
 
        /* We need the descriptor to be aligned to 32bytes
         * for meeting FSL DMA specification requirement.
@@ -410,6 +413,8 @@ static void fsl_dma_free_chan_resources(struct dma_chan *chan)
        }
        spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
        dma_pool_destroy(fsl_chan->desc_pool);
+
+       fsl_chan->desc_pool = NULL;
 }
 
 static struct dma_async_tx_descriptor *
@@ -786,159 +791,29 @@ static void dma_do_tasklet(unsigned long data)
        fsl_chan_ld_cleanup(fsl_chan);
 }
 
-static void fsl_dma_callback_test(void *param)
-{
-       struct fsl_dma_chan *fsl_chan = param;
-       if (fsl_chan)
-               dev_dbg(fsl_chan->dev, "selftest: callback is ok!\n");
-}
-
-static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan)
-{
-       struct dma_chan *chan;
-       int err = 0;
-       dma_addr_t dma_dest, dma_src;
-       dma_cookie_t cookie;
-       u8 *src, *dest;
-       int i;
-       size_t test_size;
-       struct dma_async_tx_descriptor *tx1, *tx2, *tx3;
-
-       test_size = 4096;
-
-       src = kmalloc(test_size * 2, GFP_KERNEL);
-       if (!src) {
-               dev_err(fsl_chan->dev,
-                               "selftest: Cannot alloc memory for test!\n");
-               return -ENOMEM;
-       }
-
-       dest = src + test_size;
-
-       for (i = 0; i < test_size; i++)
-               src[i] = (u8) i;
-
-       chan = &fsl_chan->common;
-
-       if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) {
-               dev_err(fsl_chan->dev,
-                               "selftest: Cannot alloc resources for DMA\n");
-               err = -ENODEV;
-               goto out;
-       }
-
-       /* TX 1 */
-       dma_src = dma_map_single(fsl_chan->dev, src, test_size / 2,
-                                DMA_TO_DEVICE);
-       dma_dest = dma_map_single(fsl_chan->dev, dest, test_size / 2,
-                                 DMA_FROM_DEVICE);
-       tx1 = fsl_dma_prep_memcpy(chan, dma_dest, dma_src, test_size / 2, 0);
-       async_tx_ack(tx1);
-
-       cookie = fsl_dma_tx_submit(tx1);
-       fsl_dma_memcpy_issue_pending(chan);
-       msleep(2);
-
-       if (fsl_dma_is_complete(chan, cookie, NULL, NULL) != DMA_SUCCESS) {
-               dev_err(fsl_chan->dev, "selftest: Time out!\n");
-               err = -ENODEV;
-               goto free_resources;
-       }
-
-       /* Test free and re-alloc channel resources */
-       fsl_dma_free_chan_resources(chan);
-
-       if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) {
-               dev_err(fsl_chan->dev,
-                               "selftest: Cannot alloc resources for DMA\n");
-               err = -ENODEV;
-               goto free_resources;
-       }
-
-       /* Continue to test
-        * TX 2
-        */
-       dma_src = dma_map_single(fsl_chan->dev, src + test_size / 2,
-                                       test_size / 4, DMA_TO_DEVICE);
-       dma_dest = dma_map_single(fsl_chan->dev, dest + test_size / 2,
-                                       test_size / 4, DMA_FROM_DEVICE);
-       tx2 = fsl_dma_prep_memcpy(chan, dma_dest, dma_src, test_size / 4, 0);
-       async_tx_ack(tx2);
-
-       /* TX 3 */
-       dma_src = dma_map_single(fsl_chan->dev, src + test_size * 3 / 4,
-                                       test_size / 4, DMA_TO_DEVICE);
-       dma_dest = dma_map_single(fsl_chan->dev, dest + test_size * 3 / 4,
-                                       test_size / 4, DMA_FROM_DEVICE);
-       tx3 = fsl_dma_prep_memcpy(chan, dma_dest, dma_src, test_size / 4, 0);
-       async_tx_ack(tx3);
-
-       /* Interrupt tx test */
-       tx1 = fsl_dma_prep_interrupt(chan, 0);
-       async_tx_ack(tx1);
-       cookie = fsl_dma_tx_submit(tx1);
-
-       /* Test exchanging the prepared tx sort */
-       cookie = fsl_dma_tx_submit(tx3);
-       cookie = fsl_dma_tx_submit(tx2);
-
-       if (dma_has_cap(DMA_INTERRUPT, ((struct fsl_dma_device *)
-           dev_get_drvdata(fsl_chan->dev->parent))->common.cap_mask)) {
-               tx3->callback = fsl_dma_callback_test;
-               tx3->callback_param = fsl_chan;
-       }
-       fsl_dma_memcpy_issue_pending(chan);
-       msleep(2);
-
-       if (fsl_dma_is_complete(chan, cookie, NULL, NULL) != DMA_SUCCESS) {
-               dev_err(fsl_chan->dev, "selftest: Time out!\n");
-               err = -ENODEV;
-               goto free_resources;
-       }
-
-       err = memcmp(src, dest, test_size);
-       if (err) {
-               for (i = 0; (*(src + i) == *(dest + i)) && (i < test_size);
-                               i++);
-               dev_err(fsl_chan->dev, "selftest: Test failed, data %d/%ld is "
-                               "error! src 0x%x, dest 0x%x\n",
-                               i, (long)test_size, *(src + i), *(dest + i));
-       }
-
-free_resources:
-       fsl_dma_free_chan_resources(chan);
-out:
-       kfree(src);
-       return err;
-}
-
-static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
-                       const struct of_device_id *match)
+static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
+       struct device_node *node, u32 feature, const char *compatible)
 {
-       struct fsl_dma_device *fdev;
        struct fsl_dma_chan *new_fsl_chan;
        int err;
 
-       fdev = dev_get_drvdata(dev->dev.parent);
-       BUG_ON(!fdev);
-
        /* alloc channel */
        new_fsl_chan = kzalloc(sizeof(struct fsl_dma_chan), GFP_KERNEL);
        if (!new_fsl_chan) {
-               dev_err(&dev->dev, "No free memory for allocating "
+               dev_err(fdev->dev, "No free memory for allocating "
                                "dma channels!\n");
                return -ENOMEM;
        }
 
        /* get dma channel register base */
-       err = of_address_to_resource(dev->node, 0, &new_fsl_chan->reg);
+       err = of_address_to_resource(node, 0, &new_fsl_chan->reg);
        if (err) {
-               dev_err(&dev->dev, "Can't get %s property 'reg'\n",
-                               dev->node->full_name);
+               dev_err(fdev->dev, "Can't get %s property 'reg'\n",
+                               node->full_name);
                goto err_no_reg;
        }
 
-       new_fsl_chan->feature = *(u32 *)match->data;
+       new_fsl_chan->feature = feature;
 
        if (!fdev->feature)
                fdev->feature = new_fsl_chan->feature;
@@ -948,13 +823,13 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
         */
        WARN_ON(fdev->feature != new_fsl_chan->feature);
 
-       new_fsl_chan->dev = &dev->dev;
+       new_fsl_chan->dev = &new_fsl_chan->common.dev;
        new_fsl_chan->reg_base = ioremap(new_fsl_chan->reg.start,
                        new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1);
 
        new_fsl_chan->id = ((new_fsl_chan->reg.start - 0x100) & 0xfff) >> 7;
        if (new_fsl_chan->id > FSL_DMA_MAX_CHANS_PER_DEVICE) {
-               dev_err(&dev->dev, "There is no %d channel!\n",
+               dev_err(fdev->dev, "There is no %d channel!\n",
                                new_fsl_chan->id);
                err = -EINVAL;
                goto err_no_chan;
@@ -988,29 +863,23 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
                        &fdev->common.channels);
        fdev->common.chancnt++;
 
-       new_fsl_chan->irq = irq_of_parse_and_map(dev->node, 0);
+       new_fsl_chan->irq = irq_of_parse_and_map(node, 0);
        if (new_fsl_chan->irq != NO_IRQ) {
                err = request_irq(new_fsl_chan->irq,
                                        &fsl_dma_chan_do_interrupt, IRQF_SHARED,
                                        "fsldma-channel", new_fsl_chan);
                if (err) {
-                       dev_err(&dev->dev, "DMA channel %s request_irq error "
-                               "with return %d\n", dev->node->full_name, err);
+                       dev_err(fdev->dev, "DMA channel %s request_irq error "
+                               "with return %d\n", node->full_name, err);
                        goto err_no_irq;
                }
        }
 
-       err = fsl_dma_self_test(new_fsl_chan);
-       if (err)
-               goto err_self_test;
-
-       dev_info(&dev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
-                               match->compatible, new_fsl_chan->irq);
+       dev_info(fdev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
+                               compatible, new_fsl_chan->irq);
 
        return 0;
 
-err_self_test:
-       free_irq(new_fsl_chan->irq, new_fsl_chan);
 err_no_irq:
        list_del(&new_fsl_chan->common.device_node);
 err_no_chan:
@@ -1020,38 +889,20 @@ err_no_reg:
        return err;
 }
 
-const u32 mpc8540_dma_ip_feature = FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN;
-const u32 mpc8349_dma_ip_feature = FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN;
-
-static struct of_device_id of_fsl_dma_chan_ids[] = {
-       {
-               .compatible = "fsl,eloplus-dma-channel",
-               .data = (void *)&mpc8540_dma_ip_feature,
-       },
-       {
-               .compatible = "fsl,elo-dma-channel",
-               .data = (void *)&mpc8349_dma_ip_feature,
-       },
-       {}
-};
-
-static struct of_platform_driver of_fsl_dma_chan_driver = {
-       .name = "of-fsl-dma-channel",
-       .match_table = of_fsl_dma_chan_ids,
-       .probe = of_fsl_dma_chan_probe,
-};
-
-static __init int of_fsl_dma_chan_init(void)
+static void fsl_dma_chan_remove(struct fsl_dma_chan *fchan)
 {
-       return of_register_platform_driver(&of_fsl_dma_chan_driver);
+       free_irq(fchan->irq, fchan);
+       list_del(&fchan->common.device_node);
+       iounmap(fchan->reg_base);
+       kfree(fchan);
 }
 
 static int __devinit of_fsl_dma_probe(struct of_device *dev,
                        const struct of_device_id *match)
 {
        int err;
-       unsigned int irq;
        struct fsl_dma_device *fdev;
+       struct device_node *child;
 
        fdev = kzalloc(sizeof(struct fsl_dma_device), GFP_KERNEL);
        if (!fdev) {
@@ -1085,9 +936,9 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
        fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
        fdev->common.dev = &dev->dev;
 
-       irq = irq_of_parse_and_map(dev->node, 0);
-       if (irq != NO_IRQ) {
-               err = request_irq(irq, &fsl_dma_do_interrupt, IRQF_SHARED,
+       fdev->irq = irq_of_parse_and_map(dev->node, 0);
+       if (fdev->irq != NO_IRQ) {
+               err = request_irq(fdev->irq, &fsl_dma_do_interrupt, IRQF_SHARED,
                                        "fsldma-device", fdev);
                if (err) {
                        dev_err(&dev->dev, "DMA device request_irq error "
@@ -1097,7 +948,21 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
        }
 
        dev_set_drvdata(&(dev->dev), fdev);
-       of_platform_bus_probe(dev->node, of_fsl_dma_chan_ids, &dev->dev);
+
+       /* We cannot use of_platform_bus_probe() because there is no
+        * of_platform_bus_remove.  Instead, we manually instantiate every DMA
+        * channel object.
+        */
+       for_each_child_of_node(dev->node, child) {
+               if (of_device_is_compatible(child, "fsl,eloplus-dma-channel"))
+                       fsl_dma_chan_probe(fdev, child,
+                               FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN,
+                               "fsl,eloplus-dma-channel");
+               if (of_device_is_compatible(child, "fsl,elo-dma-channel"))
+                       fsl_dma_chan_probe(fdev, child,
+                               FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN,
+                               "fsl,elo-dma-channel");
+       }
 
        dma_async_device_register(&fdev->common);
        return 0;
@@ -1109,6 +974,30 @@ err_no_reg:
        return err;
 }
 
+static int of_fsl_dma_remove(struct of_device *of_dev)
+{
+       struct fsl_dma_device *fdev;
+       unsigned int i;
+
+       fdev = dev_get_drvdata(&of_dev->dev);
+
+       dma_async_device_unregister(&fdev->common);
+
+       for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++)
+               if (fdev->chan[i])
+                       fsl_dma_chan_remove(fdev->chan[i]);
+
+       if (fdev->irq != NO_IRQ)
+               free_irq(fdev->irq, fdev);
+
+       iounmap(fdev->reg_base);
+
+       kfree(fdev);
+       dev_set_drvdata(&of_dev->dev, NULL);
+
+       return 0;
+}
+
 static struct of_device_id of_fsl_dma_ids[] = {
        { .compatible = "fsl,eloplus-dma", },
        { .compatible = "fsl,elo-dma", },
@@ -1116,15 +1005,32 @@ static struct of_device_id of_fsl_dma_ids[] = {
 };
 
 static struct of_platform_driver of_fsl_dma_driver = {
-       .name = "of-fsl-dma",
+       .name = "fsl-elo-dma",
        .match_table = of_fsl_dma_ids,
        .probe = of_fsl_dma_probe,
+       .remove = of_fsl_dma_remove,
 };
 
 static __init int of_fsl_dma_init(void)
 {
-       return of_register_platform_driver(&of_fsl_dma_driver);
+       int ret;
+
+       pr_info("Freescale Elo / Elo Plus DMA driver\n");
+
+       ret = of_register_platform_driver(&of_fsl_dma_driver);
+       if (ret)
+               pr_err("fsldma: failed to register platform driver\n");
+
+       return ret;
+}
+
+static void __exit of_fsl_dma_exit(void)
+{
+       of_unregister_platform_driver(&of_fsl_dma_driver);
 }
 
-subsys_initcall(of_fsl_dma_chan_init);
 subsys_initcall(of_fsl_dma_init);
+module_exit(of_fsl_dma_exit);
+
+MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver");
+MODULE_LICENSE("GPL");
index 6faf07ba0d0eb4d095f1968dee4bdd8d5ea803c0..4f21a512d84852cf2074084ffc8f7035228a2fe7 100644 (file)
@@ -114,6 +114,7 @@ struct fsl_dma_device {
        struct dma_device common;
        struct fsl_dma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE];
        u32 feature;            /* The same as DMA channels */
+       int irq;                /* Channel IRQ */
 };
 
 /* Define macros for fsl_dma_chan->feature property */
index bc8c6e3470ca258f6646c0ad2cd9278425296ec2..1ef68b3156570487094e704eaceb9ffdb03db4e6 100644 (file)
@@ -971,11 +971,9 @@ static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
        switch (ioat_chan->device->version) {
        case IOAT_VER_1_2:
                return ioat1_dma_get_next_descriptor(ioat_chan);
-               break;
        case IOAT_VER_2_0:
        case IOAT_VER_3_0:
                return ioat2_dma_get_next_descriptor(ioat_chan);
-               break;
        }
        return NULL;
 }
index 22edc4273ef68cd46eceabe2ec249cca93faf0a5..faa1cc66e9cf43a527beb4ce824d966dc407b494 100644 (file)
@@ -1143,7 +1143,7 @@ static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 
                if (!is_out) {
                        int             irq = gpio_to_irq(gpio);
-                       struct irq_desc *desc = irq_desc + irq;
+                       struct irq_desc *desc = irq_to_desc(irq);
 
                        /* This races with request_irq(), set_irq_type(),
                         * and set_irq_wake() ... but those are "rare".
index 9097500de5f466249417c13cb4a4b804679726a5..a8b33c2ec8d2fa951a4b81509cf944eb0e4759af 100644 (file)
@@ -6,7 +6,7 @@
 #
 menuconfig DRM
        tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)"
-       depends on (AGP || AGP=n) && PCI && !EMULATED_CMPXCHG && SHMEM
+       depends on (AGP || AGP=n) && PCI && !EMULATED_CMPXCHG && MMU
        help
          Kernel-level support for the Direct Rendering Infrastructure (DRI)
          introduced in XFree86 4.0. If you say Y here, you need to select
index d490db4c0de06a11ea3de7579681f3b76177c2a5..ae73b7f7249ad36059dde96219fe2fb33022b082 100644 (file)
@@ -522,12 +522,12 @@ static int drm_gem_one_name_info(int id, void *ptr, void *data)
        struct drm_gem_object *obj = ptr;
        struct drm_gem_name_info_data   *nid = data;
 
-       DRM_INFO("name %d size %d\n", obj->name, obj->size);
+       DRM_INFO("name %d size %zd\n", obj->name, obj->size);
        if (nid->eof)
                return 0;
 
        nid->len += sprintf(&nid->buf[nid->len],
-                           "%6d%9d%8d%9d\n",
+                           "%6d %8zd %7d %8d\n",
                            obj->name, obj->size,
                            atomic_read(&obj->handlecount.refcount),
                            atomic_read(&obj->refcount.refcount));
index 9ac73dd1b422db554758061d7d8786403bf64b9e..dc2e6fdb6ca33b7b6930f049d41495da0c8b17ef 100644 (file)
@@ -171,6 +171,37 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
        return 0;
 }
 
+/*
+ * Try to write quickly with an atomic kmap. Return true on success.
+ *
+ * If this fails (which includes a partial write), we'll redo the whole
+ * thing with the slow version.
+ *
+ * This is a workaround for the low performance of iounmap (approximate
+ * 10% cpu cost on normal 3D workloads).  kmap_atomic on HIGHMEM kernels
+ * happens to let us map card memory without taking IPIs.  When the vmap
+ * rework lands we should be able to dump this hack.
+ */
+static inline int fast_user_write(unsigned long pfn, char __user *user_data,
+                                 int l, int o)
+{
+#ifdef CONFIG_HIGHMEM
+       unsigned long unwritten;
+       char *vaddr_atomic;
+
+       vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
+#if WATCH_PWRITE
+       DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
+                i, o, l, pfn, vaddr_atomic);
+#endif
+       unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o, user_data, l);
+       kunmap_atomic(vaddr_atomic, KM_USER0);
+       return !unwritten;
+#else
+       return 0;
+#endif
+}
+
 static int
 i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
                    struct drm_i915_gem_pwrite *args,
@@ -180,12 +211,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
        ssize_t remain;
        loff_t offset;
        char __user *user_data;
-       char __iomem *vaddr;
-       char *vaddr_atomic;
-       int i, o, l;
        int ret = 0;
-       unsigned long pfn;
-       unsigned long unwritten;
 
        user_data = (char __user *) (uintptr_t) args->data_ptr;
        remain = args->size;
@@ -209,6 +235,9 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
        obj_priv->dirty = 1;
 
        while (remain > 0) {
+               unsigned long pfn;
+               int i, o, l;
+
                /* Operation in this page
                 *
                 * i = page number
@@ -223,25 +252,10 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 
                pfn = (dev->agp->base >> PAGE_SHIFT) + i;
 
-#ifdef CONFIG_HIGHMEM
-               /* This is a workaround for the low performance of iounmap
-                * (approximate 10% cpu cost on normal 3D workloads).
-                * kmap_atomic on HIGHMEM kernels happens to let us map card
-                * memory without taking IPIs.  When the vmap rework lands
-                * we should be able to dump this hack.
-                */
-               vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
-#if WATCH_PWRITE
-               DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
-                        i, o, l, pfn, vaddr_atomic);
-#endif
-               unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o,
-                                                             user_data, l);
-               kunmap_atomic(vaddr_atomic, KM_USER0);
+               if (!fast_user_write(pfn, user_data, l, o)) {
+                       unsigned long unwritten;
+                       char __iomem *vaddr;
 
-               if (unwritten)
-#endif /* CONFIG_HIGHMEM */
-               {
                        vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
 #if WATCH_PWRITE
                        DRM_INFO("pwrite slow i %d o %d l %d "
index 1ea39254dac6e41dbd66fb0252d9016f1b95195d..424dad6f18d83a13488cfb0b8653db8631859ea1 100644 (file)
@@ -332,10 +332,6 @@ static int __devinit amd756_probe(struct pci_dev *pdev,
        int error;
        u8 temp;
        
-       /* driver_data might come from user-space, so check it */
-       if (id->driver_data >= ARRAY_SIZE(chipname))
-               return -EINVAL;
-
        if (amd756_ioport) {
                dev_err(&pdev->dev, "Only one device supported "
                       "(you have a strange motherboard, btw)\n");
@@ -412,7 +408,6 @@ static struct pci_driver amd756_driver = {
        .id_table       = amd756_ids,
        .probe          = amd756_probe,
        .remove         = __devexit_p(amd756_remove),
-       .dynids.use_driver_data = 1,
 };
 
 static int __init amd756_init(void)
index 73dc52e114eb00009d2b87e43250ce03d233ac17..9f194d9efd91b1d39e1cdd7ed874ed871d81d7d5 100644 (file)
@@ -332,10 +332,6 @@ static int __devinit vt596_probe(struct pci_dev *pdev,
        unsigned char temp;
        int error = -ENODEV;
 
-       /* driver_data might come from user-space, so check it */
-       if (id->driver_data & 1 || id->driver_data > 0xff)
-               return -EINVAL;
-
        /* Determine the address of the SMBus areas */
        if (force_addr) {
                vt596_smba = force_addr & 0xfff0;
@@ -483,7 +479,6 @@ static struct pci_driver vt596_driver = {
        .name           = "vt596_smbus",
        .id_table       = vt596_ids,
        .probe          = vt596_probe,
-       .dynids.use_driver_data = 1,
 };
 
 static int __init i2c_vt596_init(void)
index 74a369a6116fc7fbeb7bea0c68c629ec4cfb1102..a820ca6fc32770a94410b24a3a126a6f68564432 100644 (file)
@@ -84,21 +84,40 @@ config BLK_DEV_IDE_SATA
 
          If unsure, say N.
 
-config BLK_DEV_IDEDISK
-       tristate "Include IDE/ATA-2 DISK support"
-       ---help---
-         This will include enhanced support for MFM/RLL/IDE hard disks.  If
-         you have a MFM/RLL/IDE disk, and there is no special reason to use
-         the old hard disk driver instead, say Y.  If you have an SCSI-only
-         system, you can say N here.
+config IDE_GD
+       tristate "generic ATA/ATAPI disk support"
+       default y
+       help
+         Support for ATA/ATAPI disks (including ATAPI floppy drives).
 
-         To compile this driver as a module, choose M here: the
-         module will be called ide-disk.
-         Do not compile this driver as a module if your root file system
-         (the one containing the directory /) is located on the IDE disk.
+         To compile this driver as a module, choose M here.
+         The module will be called ide-gd_mod.
+
+         If unsure, say Y.
+
+config IDE_GD_ATA
+       bool "ATA disk support"
+       depends on IDE_GD
+       default y
+       help
+         This will include support for ATA hard disks.
 
          If unsure, say Y.
 
+config IDE_GD_ATAPI
+       bool "ATAPI floppy support"
+       depends on IDE_GD
+       select IDE_ATAPI
+       help
+         This will include support for ATAPI floppy drives
+         (i.e. Iomega ZIP or MKE LS-120).
+
+         For information about jumper settings and the question
+         of when a ZIP drive uses a partition table, see
+         <http://www.win.tue.nl/~aeb/linux/zip/zip-1.html>.
+
+         If unsure, say N.
+
 config BLK_DEV_IDECS
        tristate "PCMCIA IDE support"
        depends on PCMCIA
@@ -163,29 +182,6 @@ config BLK_DEV_IDETAPE
          To compile this driver as a module, choose M here: the
          module will be called ide-tape.
 
-config BLK_DEV_IDEFLOPPY
-       tristate "Include IDE/ATAPI FLOPPY support"
-       select IDE_ATAPI
-       ---help---
-         If you have an IDE floppy drive which uses the ATAPI protocol,
-         answer Y.  ATAPI is a newer protocol used by IDE CD-ROM/tape/floppy
-         drives, similar to the SCSI protocol.
-
-         The LS-120 and the IDE/ATAPI Iomega ZIP drive are also supported by
-         this driver. For information about jumper settings and the question
-         of when a ZIP drive uses a partition table, see
-         <http://www.win.tue.nl/~aeb/linux/zip/zip-1.html>.
-         (ATAPI PD-CD/CDR drives are not supported by this driver; support
-         for PD-CD/CDR drives is available if you answer Y to
-         "SCSI emulation support", below).
-
-         If you say Y here, the FLOPPY drive will be identified along with
-         other IDE devices, as "hdb" or "hdc", or something similar (check
-         the boot messages with dmesg).
-
-         To compile this driver as a module, choose M here: the
-         module will be called ide-floppy.
-
 config BLK_DEV_IDESCSI
        tristate "SCSI emulation support (DEPRECATED)"
        depends on SCSI
@@ -332,7 +328,7 @@ config IDEPCI_PCIBUS_ORDER
 # TODO: split it on per host driver config options (or module parameters)
 config BLK_DEV_OFFBOARD
        bool "Boot off-board chipsets first support (DEPRECATED)"
-       depends on BLK_DEV_IDEPCI && (BLK_DEV_AEC62XX || BLK_DEV_GENERIC || BLK_DEV_HPT34X || BLK_DEV_HPT366 || BLK_DEV_PDC202XX_NEW || BLK_DEV_PDC202XX_OLD || BLK_DEV_TC86C001)
+       depends on BLK_DEV_IDEPCI && (BLK_DEV_AEC62XX || BLK_DEV_GENERIC || BLK_DEV_HPT366 || BLK_DEV_PDC202XX_NEW || BLK_DEV_PDC202XX_OLD || BLK_DEV_TC86C001)
        help
          Normally, IDE controllers built into the motherboard (on-board
          controllers) are assigned to ide0 and ide1 while those on add-in PCI
@@ -482,28 +478,6 @@ config BLK_DEV_CS5535
 
          It is safe to say Y to this question.
 
-config BLK_DEV_HPT34X
-       tristate "HPT34X chipset support"
-       depends on BROKEN
-       select BLK_DEV_IDEDMA_PCI
-       help
-         This driver adds up to 4 more EIDE devices sharing a single
-         interrupt. The HPT343 chipset in its current form is a non-bootable
-         controller; the HPT345/HPT363 chipset is a bootable (needs BIOS FIX)
-         PCI UDMA controllers. This driver requires dynamic tuning of the
-         chipset during the ide-probe at boot time. It is reported to support
-         DVD II drives, by the manufacturer.
-
-config HPT34X_AUTODMA
-       bool "HPT34X AUTODMA support (EXPERIMENTAL)"
-       depends on BLK_DEV_HPT34X && EXPERIMENTAL
-       help
-         This is a dangerous thing to attempt currently! Please read the
-         comments at the top of <file:drivers/ide/pci/hpt34x.c>.  If you say Y
-         here, then say Y to "Use DMA by default when available" as well.
-
-         If unsure, say N.
-
 config BLK_DEV_HPT366
        tristate "HPT36X/37X chipset support"
        select BLK_DEV_IDEDMA_PCI
index ceaf779054eac07246702bf789b0ffbc6a97b7b3..093d3248ca898359b1e874fd9a9d4153eb21eed7 100644 (file)
@@ -37,18 +37,25 @@ obj-$(CONFIG_IDE_H8300)                     += h8300/
 obj-$(CONFIG_IDE_GENERIC)              += ide-generic.o
 obj-$(CONFIG_BLK_DEV_IDEPNP)           += ide-pnp.o
 
-ide-disk_mod-y += ide-disk.o ide-disk_ioctl.o
+ide-gd_mod-y += ide-gd.o
 ide-cd_mod-y += ide-cd.o ide-cd_ioctl.o ide-cd_verbose.o
-ide-floppy_mod-y += ide-floppy.o ide-floppy_ioctl.o
 
+ifeq ($(CONFIG_IDE_GD_ATA), y)
+       ide-gd_mod-y += ide-disk.o ide-disk_ioctl.o
 ifeq ($(CONFIG_IDE_PROC_FS), y)
-       ide-disk_mod-y += ide-disk_proc.o
-       ide-floppy_mod-y += ide-floppy_proc.o
+       ide-gd_mod-y += ide-disk_proc.o
+endif
+endif
+
+ifeq ($(CONFIG_IDE_GD_ATAPI), y)
+       ide-gd_mod-y += ide-floppy.o ide-floppy_ioctl.o
+ifeq ($(CONFIG_IDE_PROC_FS), y)
+       ide-gd_mod-y += ide-floppy_proc.o
+endif
 endif
 
-obj-$(CONFIG_BLK_DEV_IDEDISK)          += ide-disk_mod.o
+obj-$(CONFIG_IDE_GD)                   += ide-gd_mod.o
 obj-$(CONFIG_BLK_DEV_IDECD)            += ide-cd_mod.o
-obj-$(CONFIG_BLK_DEV_IDEFLOPPY)                += ide-floppy_mod.o
 obj-$(CONFIG_BLK_DEV_IDETAPE)          += ide-tape.o
 
 ifeq ($(CONFIG_BLK_DEV_IDECS), y)
index 2e305714c209e8d9573a2aee1f490ece032b2580..4e58b9e7a58a01592eb3e130aee781a41e4891d1 100644 (file)
@@ -191,7 +191,7 @@ int ide_set_media_lock(ide_drive_t *drive, struct gendisk *disk, int on)
 {
        struct ide_atapi_pc pc;
 
-       if (drive->atapi_flags & IDE_AFLAG_NO_DOORLOCK)
+       if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0)
                return 0;
 
        ide_init_pc(&pc);
index 3308b1cd3a335de2cf4da4867ec7ca2597649f5a..13265a8827da9b7cfa15d209c46634ec6a939486 100644 (file)
@@ -99,7 +99,7 @@ static void ide_cd_put(struct cdrom_info *cd)
 /* Mark that we've seen a media change and invalidate our internal buffers. */
 static void cdrom_saw_media_change(ide_drive_t *drive)
 {
-       drive->atapi_flags |= IDE_AFLAG_MEDIA_CHANGED;
+       drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
        drive->atapi_flags &= ~IDE_AFLAG_TOC_VALID;
 }
 
@@ -340,8 +340,8 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
        }
 
        ide_debug_log(IDE_DBG_RQ, "%s: stat: 0x%x, good_stat: 0x%x, "
-                     "rq->cmd_type: 0x%x, err: 0x%x\n", __func__, stat,
-                     good_stat, rq->cmd_type, err);
+                     "rq->cmd[0]: 0x%x, rq->cmd_type: 0x%x, err: 0x%x\n",
+                     __func__, stat, good_stat, rq->cmd[0], rq->cmd_type, err);
 
        if (blk_sense_request(rq)) {
                /*
@@ -843,13 +843,10 @@ static void ide_cd_restore_request(ide_drive_t *drive, struct request *rq)
        rq->q->prep_rq_fn(rq->q, rq);
 }
 
-/*
- * All other packet commands.
- */
 static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct request *rq)
 {
-
-       ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+       ide_debug_log(IDE_DBG_FUNC, "Call %s, rq->cmd[0]: 0x%x\n",
+                     __func__, rq->cmd[0]);
 
        /*
         * Some of the trailing request sense fields are optional,
@@ -876,7 +873,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
        if (!sense)
                sense = &local_sense;
 
-       ide_debug_log(IDE_DBG_PC, "Call %s, rq->cmd[0]: 0x%x, write: 0x%x, "
+       ide_debug_log(IDE_DBG_PC, "Call %s, cmd[0]: 0x%x, write: 0x%x, "
                      "timeout: %d, cmd_flags: 0x%x\n", __func__, cmd[0], write,
                      timeout, cmd_flags);
 
@@ -1177,8 +1174,9 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
        unsigned short sectors_per_frame =
                queue_hardsect_size(drive->queue) >> SECTOR_BITS;
 
-       ide_debug_log(IDE_DBG_RQ, "Call %s, write: 0x%x, secs_per_frame: %u\n",
-                     __func__, write, sectors_per_frame);
+       ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd[0]: 0x%x, write: 0x%x, "
+                     "secs_per_frame: %u\n",
+                     __func__, rq->cmd[0], write, sectors_per_frame);
 
        if (write) {
                /* disk has become write protected */
@@ -1221,7 +1219,8 @@ static ide_startstop_t cdrom_do_newpc_cont(ide_drive_t *drive)
 static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 {
 
-       ide_debug_log(IDE_DBG_PC, "Call %s, rq->cmd_type: 0x%x\n", __func__,
+       ide_debug_log(IDE_DBG_PC, "Call %s, rq->cmd[0]: 0x%x, "
+                     "rq->cmd_type: 0x%x\n", __func__, rq->cmd[0],
                      rq->cmd_type);
 
        if (blk_pc_request(rq))
@@ -1257,9 +1256,6 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
        }
 }
 
-/*
- * cdrom driver request routine.
- */
 static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
                                        sector_t block)
 {
@@ -1267,8 +1263,10 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
        ide_handler_t *fn;
        int xferlen;
 
-       ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd_type: 0x%x, block: %llu\n",
-                     __func__, rq->cmd_type, (unsigned long long)block);
+       ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd[0]: 0x%x, "
+                     "rq->cmd_type: 0x%x, block: %llu\n",
+                     __func__, rq->cmd[0], rq->cmd_type,
+                     (unsigned long long)block);
 
        if (blk_fs_request(rq)) {
                if (drive->atapi_flags & IDE_AFLAG_SEEKING) {
@@ -1412,6 +1410,10 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
 
        *capacity = 1 + be32_to_cpu(capbuf.lba);
        *sectors_per_frame = blocklen >> SECTOR_BITS;
+
+       ide_debug_log(IDE_DBG_PROBE, "%s: cap: %lu, sectors_per_frame: %lu\n",
+                     __func__, *capacity, *sectors_per_frame);
+
        return 0;
 }
 
@@ -1643,6 +1645,9 @@ void ide_cdrom_update_speed(ide_drive_t *drive, u8 *buf)
                maxspeed = be16_to_cpup((__be16 *)&buf[8 + 8]);
        }
 
+       ide_debug_log(IDE_DBG_PROBE, "%s: curspeed: %u, maxspeed: %u\n",
+                     __func__, curspeed, maxspeed);
+
        cd->current_speed = (curspeed + (176/2)) / 176;
        cd->max_speed = (maxspeed + (176/2)) / 176;
 }
@@ -1732,7 +1737,7 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
                return 0;
 
        if ((buf[8 + 6] & 0x01) == 0)
-               drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+               drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
        if (buf[8 + 6] & 0x08)
                drive->atapi_flags &= ~IDE_AFLAG_NO_EJECT;
        if (buf[8 + 3] & 0x01)
@@ -1777,7 +1782,7 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
        if ((cdi->mask & CDC_DVD_R) == 0 || (cdi->mask & CDC_DVD_RAM) == 0)
                printk(KERN_CONT " DVD%s%s",
                                 (cdi->mask & CDC_DVD_R) ? "" : "-R",
-                                (cdi->mask & CDC_DVD_RAM) ? "" : "-RAM");
+                                (cdi->mask & CDC_DVD_RAM) ? "" : "/RAM");
 
        if ((cdi->mask & CDC_CD_R) == 0 || (cdi->mask & CDC_CD_RW) == 0)
                printk(KERN_CONT " CD%s%s",
@@ -1908,6 +1913,16 @@ static const struct ide_proc_devset idecd_settings[] = {
        IDE_PROC_DEVSET(dsc_overlap, 0, 1),
        { 0 },
 };
+
+static ide_proc_entry_t *ide_cd_proc_entries(ide_drive_t *drive)
+{
+       return idecd_proc;
+}
+
+static const struct ide_proc_devset *ide_cd_proc_devsets(ide_drive_t *drive)
+{
+       return idecd_settings;
+}
 #endif
 
 static const struct cd_list_entry ide_cd_quirks_list[] = {
@@ -1986,8 +2001,8 @@ static int ide_cdrom_setup(ide_drive_t *drive)
        if (!drive->queue->unplug_delay)
                drive->queue->unplug_delay = 1;
 
-       drive->atapi_flags = IDE_AFLAG_MEDIA_CHANGED | IDE_AFLAG_NO_EJECT |
-                      ide_cd_flags(id);
+       drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
+       drive->atapi_flags = IDE_AFLAG_NO_EJECT | ide_cd_flags(id);
 
        if ((drive->atapi_flags & IDE_AFLAG_VERTOS_300_SSD) &&
            fw_rev[4] == '1' && fw_rev[6] <= '2')
@@ -2069,8 +2084,8 @@ static ide_driver_t ide_cdrom_driver = {
        .end_request            = ide_end_request,
        .error                  = __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
-       .proc                   = idecd_proc,
-       .settings               = idecd_settings,
+       .proc_entries           = ide_cd_proc_entries,
+       .proc_devsets           = ide_cd_proc_devsets,
 #endif
 };
 
index 74231b41f611b5c63a2e60198ab9b742405deb33..df3df0041eb61b8b0050703bbeb5585500bbc8fd 100644 (file)
@@ -86,8 +86,8 @@ int ide_cdrom_check_media_change_real(struct cdrom_device_info *cdi,
 
        if (slot_nr == CDSL_CURRENT) {
                (void) cdrom_check_status(drive, NULL);
-               retval = (drive->atapi_flags & IDE_AFLAG_MEDIA_CHANGED) ? 1 : 0;
-               drive->atapi_flags &= ~IDE_AFLAG_MEDIA_CHANGED;
+               retval = (drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED) ? 1 : 0;
+               drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
                return retval;
        } else {
                return -EINVAL;
@@ -136,7 +136,7 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag,
                sense = &my_sense;
 
        /* If the drive cannot lock the door, just pretend. */
-       if (drive->atapi_flags & IDE_AFLAG_NO_DOORLOCK) {
+       if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0) {
                stat = 0;
        } else {
                unsigned char cmd[BLK_MAX_CDB];
@@ -157,7 +157,7 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag,
            (sense->asc == 0x24 || sense->asc == 0x20)) {
                printk(KERN_ERR "%s: door locking not supported\n",
                        drive->name);
-               drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+               drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
                stat = 0;
        }
 
index 3853bde8eedc3c291b8b676529563afccdcc2f0d..223750c1b5a6a170bccfcd0b6171b022df0c5caa 100644 (file)
@@ -14,9 +14,6 @@
  * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c.
  */
 
-#define IDEDISK_VERSION        "1.18"
-
-#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <asm/io.h>
 #include <asm/div64.h>
 
-#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
-#define IDE_DISK_MINORS                (1 << PARTN_BITS)
-#else
-#define IDE_DISK_MINORS                0
-#endif
-
 #include "ide-disk.h"
 
-static DEFINE_MUTEX(idedisk_ref_mutex);
-
-#define to_ide_disk(obj) container_of(obj, struct ide_disk_obj, kref)
-
-static void ide_disk_release(struct kref *);
-
-static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
-{
-       struct ide_disk_obj *idkp = NULL;
-
-       mutex_lock(&idedisk_ref_mutex);
-       idkp = ide_disk_g(disk);
-       if (idkp) {
-               if (ide_device_get(idkp->drive))
-                       idkp = NULL;
-               else
-                       kref_get(&idkp->kref);
-       }
-       mutex_unlock(&idedisk_ref_mutex);
-       return idkp;
-}
-
-static void ide_disk_put(struct ide_disk_obj *idkp)
-{
-       ide_drive_t *drive = idkp->drive;
-
-       mutex_lock(&idedisk_ref_mutex);
-       kref_put(&idkp->kref, ide_disk_release);
-       ide_device_put(drive);
-       mutex_unlock(&idedisk_ref_mutex);
-}
-
 static const u8 ide_rw_cmds[] = {
        ATA_CMD_READ_MULTI,
        ATA_CMD_WRITE_MULTI,
@@ -374,7 +333,7 @@ static void idedisk_check_hpa(ide_drive_t *drive)
        }
 }
 
-static void init_idedisk_capacity(ide_drive_t *drive)
+static int ide_disk_get_capacity(ide_drive_t *drive)
 {
        u16 *id = drive->id;
        int lba;
@@ -403,11 +362,28 @@ static void init_idedisk_capacity(ide_drive_t *drive)
                if (ata_id_hpa_enabled(id))
                        idedisk_check_hpa(drive);
        }
-}
 
-sector_t ide_disk_capacity(ide_drive_t *drive)
-{
-       return drive->capacity64;
+       /* limit drive capacity to 137GB if LBA48 cannot be used */
+       if ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 &&
+           drive->capacity64 > 1ULL << 28) {
+               printk(KERN_WARNING "%s: cannot use LBA48 - full capacity "
+                      "%llu sectors (%llu MB)\n",
+                      drive->name, (unsigned long long)drive->capacity64,
+                      sectors_to_MB(drive->capacity64));
+               drive->capacity64 = 1ULL << 28;
+       }
+
+       if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
+           (drive->dev_flags & IDE_DFLAG_LBA48)) {
+               if (drive->capacity64 > 1ULL << 28) {
+                       printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode"
+                                        " will be used for accessing sectors "
+                                        "> %u\n", drive->name, 1 << 28);
+               } else
+                       drive->dev_flags &= ~IDE_DFLAG_LBA48;
+       }
+
+       return 0;
 }
 
 static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
@@ -508,7 +484,7 @@ static void update_ordered(ide_drive_t *drive)
                 * time we have trimmed the drive capacity if LBA48 is
                 * not available so we don't need to recheck that.
                 */
-               capacity = ide_disk_capacity(drive);
+               capacity = ide_gd_capacity(drive);
                barrier = ata_id_flush_enabled(id) &&
                        (drive->dev_flags & IDE_DFLAG_NOFLUSH) == 0 &&
                        ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 ||
@@ -616,7 +592,12 @@ ide_ext_devset_rw(wcache, wcache);
 
 ide_ext_devset_rw_sync(nowerr, nowerr);
 
-static void idedisk_setup(ide_drive_t *drive)
+static int ide_disk_check(ide_drive_t *drive, const char *s)
+{
+       return 1;
+}
+
+static void ide_disk_setup(ide_drive_t *drive)
 {
        struct ide_disk_obj *idkp = drive->driver_data;
        ide_hwif_t *hwif = drive->hwif;
@@ -652,33 +633,13 @@ static void idedisk_setup(ide_drive_t *drive)
                         drive->queue->max_sectors / 2);
 
        /* calculate drive capacity, and select LBA if possible */
-       init_idedisk_capacity(drive);
-
-       /* limit drive capacity to 137GB if LBA48 cannot be used */
-       if ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 &&
-           drive->capacity64 > 1ULL << 28) {
-               printk(KERN_WARNING "%s: cannot use LBA48 - full capacity "
-                      "%llu sectors (%llu MB)\n",
-                      drive->name, (unsigned long long)drive->capacity64,
-                      sectors_to_MB(drive->capacity64));
-               drive->capacity64 = 1ULL << 28;
-       }
-
-       if ((hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
-           (drive->dev_flags & IDE_DFLAG_LBA48)) {
-               if (drive->capacity64 > 1ULL << 28) {
-                       printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode"
-                                        " will be used for accessing sectors "
-                                        "> %u\n", drive->name, 1 << 28);
-               } else
-                       drive->dev_flags &= ~IDE_DFLAG_LBA48;
-       }
+       ide_disk_get_capacity(drive);
 
        /*
         * if possible, give fdisk access to more of the drive,
         * by correcting bios_cyls:
         */
-       capacity = ide_disk_capacity(drive);
+       capacity = ide_gd_capacity(drive);
 
        if ((drive->dev_flags & IDE_DFLAG_FORCED_GEOM) == 0) {
                if (ata_id_lba48_enabled(drive->id)) {
@@ -718,9 +679,17 @@ static void idedisk_setup(ide_drive_t *drive)
                drive->dev_flags |= IDE_DFLAG_WCACHE;
 
        set_wcache(drive, 1);
+
+       if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 &&
+           (drive->head == 0 || drive->head > 16)) {
+               printk(KERN_ERR "%s: invalid geometry: %d physical heads?\n",
+                       drive->name, drive->head);
+               drive->dev_flags &= ~IDE_DFLAG_ATTACH;
+       } else
+               drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
 
-static void ide_cacheflush_p(ide_drive_t *drive)
+static void ide_disk_flush(ide_drive_t *drive)
 {
        if (ata_id_flush_enabled(drive->id) == 0 ||
            (drive->dev_flags & IDE_DFLAG_WCACHE) == 0)
@@ -730,267 +699,40 @@ static void ide_cacheflush_p(ide_drive_t *drive)
                printk(KERN_INFO "%s: wcache flush failed!\n", drive->name);
 }
 
-static void ide_disk_remove(ide_drive_t *drive)
-{
-       struct ide_disk_obj *idkp = drive->driver_data;
-       struct gendisk *g = idkp->disk;
-
-       ide_proc_unregister_driver(drive, idkp->driver);
-
-       del_gendisk(g);
-
-       ide_cacheflush_p(drive);
-
-       ide_disk_put(idkp);
-}
-
-static void ide_disk_release(struct kref *kref)
-{
-       struct ide_disk_obj *idkp = to_ide_disk(kref);
-       ide_drive_t *drive = idkp->drive;
-       struct gendisk *g = idkp->disk;
-
-       drive->driver_data = NULL;
-       g->private_data = NULL;
-       put_disk(g);
-       kfree(idkp);
-}
-
-static int ide_disk_probe(ide_drive_t *drive);
-
-/*
- * On HPA drives the capacity needs to be
- * reinitilized on resume otherwise the disk
- * can not be used and a hard reset is required
- */
-static void ide_disk_resume(ide_drive_t *drive)
+static int ide_disk_init_media(ide_drive_t *drive, struct gendisk *disk)
 {
-       if (ata_id_hpa_enabled(drive->id))
-               init_idedisk_capacity(drive);
-}
-
-static void ide_device_shutdown(ide_drive_t *drive)
-{
-#ifdef CONFIG_ALPHA
-       /* On Alpha, halt(8) doesn't actually turn the machine off,
-          it puts you into the sort of firmware monitor. Typically,
-          it's used to boot another kernel image, so it's not much
-          different from reboot(8). Therefore, we don't need to
-          spin down the disk in this case, especially since Alpha
-          firmware doesn't handle disks in standby mode properly.
-          On the other hand, it's reasonably safe to turn the power
-          off when the shutdown process reaches the firmware prompt,
-          as the firmware initialization takes rather long time -
-          at least 10 seconds, which should be sufficient for
-          the disk to expire its write cache. */
-       if (system_state != SYSTEM_POWER_OFF) {
-#else
-       if (system_state == SYSTEM_RESTART) {
-#endif
-               ide_cacheflush_p(drive);
-               return;
-       }
-
-       printk(KERN_INFO "Shutdown: %s\n", drive->name);
-
-       drive->gendev.bus->suspend(&drive->gendev, PMSG_SUSPEND);
+       return 0;
 }
 
-static ide_driver_t idedisk_driver = {
-       .gen_driver = {
-               .owner          = THIS_MODULE,
-               .name           = "ide-disk",
-               .bus            = &ide_bus_type,
-       },
-       .probe                  = ide_disk_probe,
-       .remove                 = ide_disk_remove,
-       .resume                 = ide_disk_resume,
-       .shutdown               = ide_device_shutdown,
-       .version                = IDEDISK_VERSION,
-       .do_request             = ide_do_rw_disk,
-       .end_request            = ide_end_request,
-       .error                  = __ide_error,
-#ifdef CONFIG_IDE_PROC_FS
-       .proc                   = ide_disk_proc,
-       .settings               = ide_disk_settings,
-#endif
-};
-
-static int idedisk_set_doorlock(ide_drive_t *drive, int on)
+static int ide_disk_set_doorlock(ide_drive_t *drive, struct gendisk *disk,
+                                int on)
 {
        ide_task_t task;
+       int ret;
+
+       if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0)
+               return 0;
 
        memset(&task, 0, sizeof(task));
        task.tf.command = on ? ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK;
        task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
 
-       return ide_no_data_taskfile(drive, &task);
-}
-
-static int idedisk_open(struct inode *inode, struct file *filp)
-{
-       struct gendisk *disk = inode->i_bdev->bd_disk;
-       struct ide_disk_obj *idkp;
-       ide_drive_t *drive;
-
-       idkp = ide_disk_get(disk);
-       if (idkp == NULL)
-               return -ENXIO;
-
-       drive = idkp->drive;
-
-       idkp->openers++;
-
-       if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
-               check_disk_change(inode->i_bdev);
-               /*
-                * Ignore the return code from door_lock,
-                * since the open() has already succeeded,
-                * and the door_lock is irrelevant at this point.
-                */
-               if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) &&
-                   idedisk_set_doorlock(drive, 1))
-                       drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
-       }
-       return 0;
-}
-
-static int idedisk_release(struct inode *inode, struct file *filp)
-{
-       struct gendisk *disk = inode->i_bdev->bd_disk;
-       struct ide_disk_obj *idkp = ide_disk_g(disk);
-       ide_drive_t *drive = idkp->drive;
-
-       if (idkp->openers == 1)
-               ide_cacheflush_p(drive);
-
-       if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
-               if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) &&
-                   idedisk_set_doorlock(drive, 0))
-                       drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
-       }
+       ret = ide_no_data_taskfile(drive, &task);
 
-       idkp->openers--;
+       if (ret)
+               drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
 
-       ide_disk_put(idkp);
-
-       return 0;
-}
-
-static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-       struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk);
-       ide_drive_t *drive = idkp->drive;
-
-       geo->heads = drive->bios_head;
-       geo->sectors = drive->bios_sect;
-       geo->cylinders = (u16)drive->bios_cyl; /* truncate */
-       return 0;
+       return ret;
 }
 
-static int idedisk_media_changed(struct gendisk *disk)
-{
-       struct ide_disk_obj *idkp = ide_disk_g(disk);
-       ide_drive_t *drive = idkp->drive;
-
-       /* do not scan partitions twice if this is a removable device */
-       if (drive->dev_flags & IDE_DFLAG_ATTACH) {
-               drive->dev_flags &= ~IDE_DFLAG_ATTACH;
-               return 0;
-       }
-
-       /* if removable, always assume it was changed */
-       return !!(drive->dev_flags & IDE_DFLAG_REMOVABLE);
-}
-
-static int idedisk_revalidate_disk(struct gendisk *disk)
-{
-       struct ide_disk_obj *idkp = ide_disk_g(disk);
-       set_capacity(disk, ide_disk_capacity(idkp->drive));
-       return 0;
-}
-
-static struct block_device_operations idedisk_ops = {
-       .owner                  = THIS_MODULE,
-       .open                   = idedisk_open,
-       .release                = idedisk_release,
-       .ioctl                  = ide_disk_ioctl,
-       .getgeo                 = idedisk_getgeo,
-       .media_changed          = idedisk_media_changed,
-       .revalidate_disk        = idedisk_revalidate_disk
+const struct ide_disk_ops ide_ata_disk_ops = {
+       .check          = ide_disk_check,
+       .get_capacity   = ide_disk_get_capacity,
+       .setup          = ide_disk_setup,
+       .flush          = ide_disk_flush,
+       .init_media     = ide_disk_init_media,
+       .set_doorlock   = ide_disk_set_doorlock,
+       .do_request     = ide_do_rw_disk,
+       .end_request    = ide_end_request,
+       .ioctl          = ide_disk_ioctl,
 };
-
-MODULE_DESCRIPTION("ATA DISK Driver");
-
-static int ide_disk_probe(ide_drive_t *drive)
-{
-       struct ide_disk_obj *idkp;
-       struct gendisk *g;
-
-       /* strstr("foo", "") is non-NULL */
-       if (!strstr("ide-disk", drive->driver_req))
-               goto failed;
-
-       if (drive->media != ide_disk)
-               goto failed;
-
-       idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
-       if (!idkp)
-               goto failed;
-
-       g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
-       if (!g)
-               goto out_free_idkp;
-
-       ide_init_disk(g, drive);
-
-       kref_init(&idkp->kref);
-
-       idkp->drive = drive;
-       idkp->driver = &idedisk_driver;
-       idkp->disk = g;
-
-       g->private_data = &idkp->driver;
-
-       drive->driver_data = idkp;
-
-       idedisk_setup(drive);
-       if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 &&
-           (drive->head == 0 || drive->head > 16)) {
-               printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n",
-                       drive->name, drive->head);
-               drive->dev_flags &= ~IDE_DFLAG_ATTACH;
-       } else
-               drive->dev_flags |= IDE_DFLAG_ATTACH;
-
-       g->minors = IDE_DISK_MINORS;
-       g->driverfs_dev = &drive->gendev;
-       g->flags |= GENHD_FL_EXT_DEVT;
-       if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
-               g->flags = GENHD_FL_REMOVABLE;
-       set_capacity(g, ide_disk_capacity(drive));
-       g->fops = &idedisk_ops;
-       add_disk(g);
-       return 0;
-
-out_free_idkp:
-       kfree(idkp);
-failed:
-       return -ENODEV;
-}
-
-static void __exit idedisk_exit(void)
-{
-       driver_unregister(&idedisk_driver.gen_driver);
-}
-
-static int __init idedisk_init(void)
-{
-       return driver_register(&idedisk_driver.gen_driver);
-}
-
-MODULE_ALIAS("ide:*m-disk*");
-MODULE_ALIAS("ide-disk");
-module_init(idedisk_init);
-module_exit(idedisk_exit);
-MODULE_LICENSE("GPL");
index a82fa4355665caed6710050a9bc8c64af0b33684..b234b0feaf7b2f03c9a180891c97d7f8734f997d 100644 (file)
@@ -1,19 +1,11 @@
 #ifndef __IDE_DISK_H
 #define __IDE_DISK_H
 
-struct ide_disk_obj {
-       ide_drive_t     *drive;
-       ide_driver_t    *driver;
-       struct gendisk  *disk;
-       struct kref     kref;
-       unsigned int    openers;        /* protected by BKL for now */
-};
-
-#define ide_disk_g(disk) \
-       container_of((disk)->private_data, struct ide_disk_obj, driver)
+#include "ide-gd.h"
 
+#ifdef CONFIG_IDE_GD_ATA
 /* ide-disk.c */
-sector_t ide_disk_capacity(ide_drive_t *);
+extern const struct ide_disk_ops ide_ata_disk_ops;
 ide_decl_devset(address);
 ide_decl_devset(multcount);
 ide_decl_devset(nowerr);
@@ -21,12 +13,17 @@ ide_decl_devset(wcache);
 ide_decl_devset(acoustic);
 
 /* ide-disk_ioctl.c */
-int ide_disk_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+int ide_disk_ioctl(ide_drive_t *, struct inode *, struct file *, unsigned int,
+                  unsigned long);
 
 #ifdef CONFIG_IDE_PROC_FS
 /* ide-disk_proc.c */
 extern ide_proc_entry_t ide_disk_proc[];
 extern const struct ide_proc_devset ide_disk_settings[];
 #endif
+#else
+#define ide_disk_proc          NULL
+#define ide_disk_settings      NULL
+#endif
 
 #endif /* __IDE_DISK_H */
index a6cf1a03a806e9ca1ac607777ea35eefc7512dc0..a49698bcf9667e097ae7ab3899aae29efa5b3136 100644 (file)
@@ -13,12 +13,10 @@ static const struct ide_ioctl_devset ide_disk_ioctl_settings[] = {
 { 0 }
 };
 
-int ide_disk_ioctl(struct inode *inode, struct file *file,
+int ide_disk_ioctl(ide_drive_t *drive, struct inode *inode, struct file *file,
                   unsigned int cmd, unsigned long arg)
 {
        struct block_device *bdev = inode->i_bdev;
-       struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk);
-       ide_drive_t *drive = idkp->drive;
        int err;
 
        err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_disk_ioctl_settings);
index 4724976afe716f68d11383018632650fd746582a..1146f4204c6e4499ac93f75bfc409d14642c25a1 100644 (file)
@@ -56,7 +56,7 @@ static int proc_idedisk_read_capacity
        ide_drive_t*drive = (ide_drive_t *)data;
        int len;
 
-       len = sprintf(page, "%llu\n", (long long)ide_disk_capacity(drive));
+       len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive));
 
        PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
 }
index 0903782689e9753db304ea9d63fc6be61a386cc3..cac431f0df1769eda7e4f2d7fe28d7f8914602db 100644 (file)
@@ -130,7 +130,7 @@ int ide_build_dmatable(ide_drive_t *drive, struct request *rq)
                        xcount = bcount & 0xffff;
                        if (is_trm290)
                                xcount = ((xcount >> 2) - 1) << 16;
-                       if (xcount == 0x0000) {
+                       else if (xcount == 0x0000) {
                                if (count++ >= PRD_ENTRIES)
                                        goto use_pio_instead;
                                *table++ = cpu_to_le32(0x8000);
index cf0aa25470ee87368291dbf8614d2b67020315e9..aeb1ad782f54e5fbc9d39d3cda3e6eeabfc1bef4 100644 (file)
  * Documentation/ide/ChangeLog.ide-floppy.1996-2002
  */
 
-#define DRV_NAME "ide-floppy"
-#define PFX DRV_NAME ": "
-
-#define IDEFLOPPY_VERSION "1.00"
-
-#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 
 #include "ide-floppy.h"
 
-/* module parameters */
-static unsigned long debug_mask;
-module_param(debug_mask, ulong, 0644);
-
-/* define to see debug info */
-#define IDEFLOPPY_DEBUG_LOG    0
-
-#if IDEFLOPPY_DEBUG_LOG
-#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
-#else
-#define ide_debug_log(lvl, fmt, args...) do {} while (0)
-#endif
-
 /*
  * After each failed packet command we issue a request sense command and retry
  * the packet command IDEFLOPPY_MAX_PC_RETRIES times.
@@ -83,43 +64,13 @@ module_param(debug_mask, ulong, 0644);
 /* Error code returned in rq->errors to the higher part of the driver. */
 #define        IDEFLOPPY_ERROR_GENERAL         101
 
-static DEFINE_MUTEX(idefloppy_ref_mutex);
-
-static void idefloppy_cleanup_obj(struct kref *);
-
-static struct ide_floppy_obj *ide_floppy_get(struct gendisk *disk)
-{
-       struct ide_floppy_obj *floppy = NULL;
-
-       mutex_lock(&idefloppy_ref_mutex);
-       floppy = ide_drv_g(disk, ide_floppy_obj);
-       if (floppy) {
-               if (ide_device_get(floppy->drive))
-                       floppy = NULL;
-               else
-                       kref_get(&floppy->kref);
-       }
-       mutex_unlock(&idefloppy_ref_mutex);
-       return floppy;
-}
-
-static void ide_floppy_put(struct ide_floppy_obj *floppy)
-{
-       ide_drive_t *drive = floppy->drive;
-
-       mutex_lock(&idefloppy_ref_mutex);
-       kref_put(&floppy->kref, idefloppy_cleanup_obj);
-       ide_device_put(drive);
-       mutex_unlock(&idefloppy_ref_mutex);
-}
-
 /*
  * Used to finish servicing a request. For read/write requests, we will call
  * ide_end_request to pass to the next buffer.
  */
-static int idefloppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
+static int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
 {
-       idefloppy_floppy_t *floppy = drive->driver_data;
+       struct ide_disk_obj *floppy = drive->driver_data;
        struct request *rq = HWGROUP(drive)->rq;
        int error;
 
@@ -161,12 +112,12 @@ static void idefloppy_update_buffers(ide_drive_t *drive,
        struct bio *bio = rq->bio;
 
        while ((bio = rq->bio) != NULL)
-               idefloppy_end_request(drive, 1, 0);
+               ide_floppy_end_request(drive, 1, 0);
 }
 
 static void ide_floppy_callback(ide_drive_t *drive, int dsc)
 {
-       idefloppy_floppy_t *floppy = drive->driver_data;
+       struct ide_disk_obj *floppy = drive->driver_data;
        struct ide_atapi_pc *pc = drive->pc;
        int uptodate = pc->error ? 0 : 1;
 
@@ -200,10 +151,10 @@ static void ide_floppy_callback(ide_drive_t *drive, int dsc)
                               "Aborting request!\n");
        }
 
-       idefloppy_end_request(drive, uptodate, 0);
+       ide_floppy_end_request(drive, uptodate, 0);
 }
 
-static void ide_floppy_report_error(idefloppy_floppy_t *floppy,
+static void ide_floppy_report_error(struct ide_disk_obj *floppy,
                                    struct ide_atapi_pc *pc)
 {
        /* supress error messages resulting from Medium not present */
@@ -222,7 +173,7 @@ static void ide_floppy_report_error(idefloppy_floppy_t *floppy,
 static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive,
                struct ide_atapi_pc *pc)
 {
-       idefloppy_floppy_t *floppy = drive->driver_data;
+       struct ide_disk_obj *floppy = drive->driver_data;
 
        if (floppy->failed_pc == NULL &&
            pc->c[0] != GPCMD_REQUEST_SENSE)
@@ -286,7 +237,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
                                    struct ide_atapi_pc *pc, struct request *rq,
                                    unsigned long sector)
 {
-       idefloppy_floppy_t *floppy = drive->driver_data;
+       struct ide_disk_obj *floppy = drive->driver_data;
        int block = sector / floppy->bs_factor;
        int blocks = rq->nr_sectors / floppy->bs_factor;
        int cmd = rq_data_dir(rq);
@@ -310,7 +261,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
        pc->flags |= PC_FLAG_DMA_OK;
 }
 
-static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
+static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy,
                struct ide_atapi_pc *pc, struct request *rq)
 {
        ide_init_pc(pc);
@@ -329,13 +280,12 @@ static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
        pc->req_xfer = pc->buf_size = rq->data_len;
 }
 
-static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
-               struct request *rq, sector_t block_s)
+static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
+                                            struct request *rq, sector_t block)
 {
-       idefloppy_floppy_t *floppy = drive->driver_data;
+       struct ide_disk_obj *floppy = drive->driver_data;
        ide_hwif_t *hwif = drive->hwif;
        struct ide_atapi_pc *pc;
-       unsigned long block = (unsigned long)block_s;
 
        ide_debug_log(IDE_DBG_FUNC, "%s: dev: %s, cmd: 0x%x, cmd_type: %x, "
                      "errors: %d\n",
@@ -353,7 +303,7 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
                else
                        printk(KERN_ERR PFX "%s: I/O error\n", drive->name);
 
-               idefloppy_end_request(drive, 0, 0);
+               ide_floppy_end_request(drive, 0, 0);
                return ide_stopped;
        }
        if (blk_fs_request(rq)) {
@@ -361,11 +311,11 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
                    (rq->nr_sectors % floppy->bs_factor)) {
                        printk(KERN_ERR PFX "%s: unsupported r/w rq size\n",
                                drive->name);
-                       idefloppy_end_request(drive, 0, 0);
+                       ide_floppy_end_request(drive, 0, 0);
                        return ide_stopped;
                }
                pc = &floppy->queued_pc;
-               idefloppy_create_rw_cmd(drive, pc, rq, block);
+               idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
        } else if (blk_special_request(rq)) {
                pc = (struct ide_atapi_pc *) rq->buffer;
        } else if (blk_pc_request(rq)) {
@@ -373,7 +323,7 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
                idefloppy_blockpc_cmd(floppy, pc, rq);
        } else {
                blk_dump_rq_flags(rq, PFX "unsupported command in queue");
-               idefloppy_end_request(drive, 0, 0);
+               ide_floppy_end_request(drive, 0, 0);
                return ide_stopped;
        }
 
@@ -394,7 +344,7 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
  */
 static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
 {
-       idefloppy_floppy_t *floppy = drive->driver_data;
+       struct ide_disk_obj *floppy = drive->driver_data;
        struct gendisk *disk = floppy->disk;
        struct ide_atapi_pc pc;
        u8 *page;
@@ -410,11 +360,11 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
        }
 
        if (pc.buf[3] & 0x80)
-               drive->atapi_flags |= IDE_AFLAG_WP;
+               drive->dev_flags |= IDE_DFLAG_WP;
        else
-               drive->atapi_flags &= ~IDE_AFLAG_WP;
+               drive->dev_flags &= ~IDE_DFLAG_WP;
 
-       set_disk_ro(disk, !!(drive->atapi_flags & IDE_AFLAG_WP));
+       set_disk_ro(disk, !!(drive->dev_flags & IDE_DFLAG_WP));
 
        page = &pc.buf[8];
 
@@ -445,7 +395,9 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
                        drive->name, lba_capacity, capacity);
                floppy->blocks = floppy->block_size ?
                        capacity / floppy->block_size : 0;
+               drive->capacity64 = floppy->blocks * floppy->bs_factor;
        }
+
        return 0;
 }
 
@@ -455,7 +407,7 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
  */
 static int ide_floppy_get_capacity(ide_drive_t *drive)
 {
-       idefloppy_floppy_t *floppy = drive->driver_data;
+       struct ide_disk_obj *floppy = drive->driver_data;
        struct gendisk *disk = floppy->disk;
        struct ide_atapi_pc pc;
        u8 *cap_desc;
@@ -466,7 +418,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
        drive->bios_head = drive->bios_sect = 0;
        floppy->blocks = 0;
        floppy->bs_factor = 1;
-       set_capacity(floppy->disk, 0);
+       drive->capacity64 = 0;
 
        ide_floppy_create_read_capacity_cmd(&pc);
        if (ide_queue_pc_tail(drive, disk, &pc)) {
@@ -523,6 +475,8 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
                                               "non 512 bytes block size not "
                                               "fully supported\n",
                                               drive->name);
+                               drive->capacity64 =
+                                       floppy->blocks * floppy->bs_factor;
                                rc = 0;
                        }
                        break;
@@ -547,21 +501,12 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
        if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE))
                (void) ide_floppy_get_flexible_disk_page(drive);
 
-       set_capacity(disk, floppy->blocks * floppy->bs_factor);
-
        return rc;
 }
 
-sector_t ide_floppy_capacity(ide_drive_t *drive)
-{
-       idefloppy_floppy_t *floppy = drive->driver_data;
-       unsigned long capacity = floppy->blocks * floppy->bs_factor;
-
-       return capacity;
-}
-
-static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy)
+static void ide_floppy_setup(ide_drive_t *drive)
 {
+       struct ide_disk_obj *floppy = drive->driver_data;
        u16 *id = drive->id;
 
        drive->pc_callback       = ide_floppy_callback;
@@ -592,252 +537,42 @@ static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy)
                blk_queue_max_sectors(drive->queue, 64);
                drive->atapi_flags |= IDE_AFLAG_CLIK_DRIVE;
                /* IOMEGA Clik! drives do not support lock/unlock commands */
-               drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+               drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
        }
 
        (void) ide_floppy_get_capacity(drive);
 
        ide_proc_register_driver(drive, floppy->driver);
-}
 
-static void ide_floppy_remove(ide_drive_t *drive)
-{
-       idefloppy_floppy_t *floppy = drive->driver_data;
-       struct gendisk *g = floppy->disk;
-
-       ide_proc_unregister_driver(drive, floppy->driver);
-
-       del_gendisk(g);
-
-       ide_floppy_put(floppy);
+       drive->dev_flags |= IDE_DFLAG_ATTACH;
 }
 
-static void idefloppy_cleanup_obj(struct kref *kref)
+static void ide_floppy_flush(ide_drive_t *drive)
 {
-       struct ide_floppy_obj *floppy = to_ide_drv(kref, ide_floppy_obj);
-       ide_drive_t *drive = floppy->drive;
-       struct gendisk *g = floppy->disk;
-
-       drive->driver_data = NULL;
-       g->private_data = NULL;
-       put_disk(g);
-       kfree(floppy);
 }
 
-static int ide_floppy_probe(ide_drive_t *);
-
-static ide_driver_t idefloppy_driver = {
-       .gen_driver = {
-               .owner          = THIS_MODULE,
-               .name           = "ide-floppy",
-               .bus            = &ide_bus_type,
-       },
-       .probe                  = ide_floppy_probe,
-       .remove                 = ide_floppy_remove,
-       .version                = IDEFLOPPY_VERSION,
-       .do_request             = idefloppy_do_request,
-       .end_request            = idefloppy_end_request,
-       .error                  = __ide_error,
-#ifdef CONFIG_IDE_PROC_FS
-       .proc                   = ide_floppy_proc,
-       .settings               = ide_floppy_settings,
-#endif
-};
-
-static int idefloppy_open(struct inode *inode, struct file *filp)
+static int ide_floppy_init_media(ide_drive_t *drive, struct gendisk *disk)
 {
-       struct gendisk *disk = inode->i_bdev->bd_disk;
-       struct ide_floppy_obj *floppy;
-       ide_drive_t *drive;
        int ret = 0;
 
-       floppy = ide_floppy_get(disk);
-       if (!floppy)
-               return -ENXIO;
-
-       drive = floppy->drive;
-
-       ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-       floppy->openers++;
-
-       if (floppy->openers == 1) {
-               drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
-               /* Just in case */
-
-               if (ide_do_test_unit_ready(drive, disk))
-                       ide_do_start_stop(drive, disk, 1);
-
-               if (ide_floppy_get_capacity(drive)
-                  && (filp->f_flags & O_NDELAY) == 0
-                   /*
-                    * Allow O_NDELAY to open a drive without a disk, or with an
-                    * unreadable disk, so that we can get the format capacity
-                    * of the drive or begin the format - Sam
-                    */
-                   ) {
-                       ret = -EIO;
-                       goto out_put_floppy;
-               }
-
-               if ((drive->atapi_flags & IDE_AFLAG_WP) && (filp->f_mode & 2)) {
-                       ret = -EROFS;
-                       goto out_put_floppy;
-               }
-
-               drive->atapi_flags |= IDE_AFLAG_MEDIA_CHANGED;
-               ide_set_media_lock(drive, disk, 1);
-               check_disk_change(inode->i_bdev);
-       } else if (drive->atapi_flags & IDE_AFLAG_FORMAT_IN_PROGRESS) {
-               ret = -EBUSY;
-               goto out_put_floppy;
-       }
-       return 0;
-
-out_put_floppy:
-       floppy->openers--;
-       ide_floppy_put(floppy);
-       return ret;
-}
-
-static int idefloppy_release(struct inode *inode, struct file *filp)
-{
-       struct gendisk *disk = inode->i_bdev->bd_disk;
-       struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-       ide_drive_t *drive = floppy->drive;
-
-       ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-       if (floppy->openers == 1) {
-               ide_set_media_lock(drive, disk, 0);
-               drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
-       }
-
-       floppy->openers--;
-
-       ide_floppy_put(floppy);
-
-       return 0;
-}
-
-static int idefloppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-       struct ide_floppy_obj *floppy = ide_drv_g(bdev->bd_disk,
-                                                    ide_floppy_obj);
-       ide_drive_t *drive = floppy->drive;
+       if (ide_do_test_unit_ready(drive, disk))
+               ide_do_start_stop(drive, disk, 1);
 
-       geo->heads = drive->bios_head;
-       geo->sectors = drive->bios_sect;
-       geo->cylinders = (u16)drive->bios_cyl; /* truncate */
-       return 0;
-}
+       ret = ide_floppy_get_capacity(drive);
 
-static int idefloppy_media_changed(struct gendisk *disk)
-{
-       struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-       ide_drive_t *drive = floppy->drive;
-       int ret;
+       set_capacity(disk, ide_gd_capacity(drive));
 
-       /* do not scan partitions twice if this is a removable device */
-       if (drive->dev_flags & IDE_DFLAG_ATTACH) {
-               drive->dev_flags &= ~IDE_DFLAG_ATTACH;
-               return 0;
-       }
-       ret = !!(drive->atapi_flags & IDE_AFLAG_MEDIA_CHANGED);
-       drive->atapi_flags &= ~IDE_AFLAG_MEDIA_CHANGED;
        return ret;
 }
 
-static int idefloppy_revalidate_disk(struct gendisk *disk)
-{
-       struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
-       set_capacity(disk, ide_floppy_capacity(floppy->drive));
-       return 0;
-}
-
-static struct block_device_operations idefloppy_ops = {
-       .owner                  = THIS_MODULE,
-       .open                   = idefloppy_open,
-       .release                = idefloppy_release,
-       .ioctl                  = ide_floppy_ioctl,
-       .getgeo                 = idefloppy_getgeo,
-       .media_changed          = idefloppy_media_changed,
-       .revalidate_disk        = idefloppy_revalidate_disk
+const struct ide_disk_ops ide_atapi_disk_ops = {
+       .check          = ide_check_atapi_device,
+       .get_capacity   = ide_floppy_get_capacity,
+       .setup          = ide_floppy_setup,
+       .flush          = ide_floppy_flush,
+       .init_media     = ide_floppy_init_media,
+       .set_doorlock   = ide_set_media_lock,
+       .do_request     = ide_floppy_do_request,
+       .end_request    = ide_floppy_end_request,
+       .ioctl          = ide_floppy_ioctl,
 };
-
-static int ide_floppy_probe(ide_drive_t *drive)
-{
-       idefloppy_floppy_t *floppy;
-       struct gendisk *g;
-
-       if (!strstr("ide-floppy", drive->driver_req))
-               goto failed;
-
-       if (drive->media != ide_floppy)
-               goto failed;
-
-       if (!ide_check_atapi_device(drive, DRV_NAME)) {
-               printk(KERN_ERR PFX "%s: not supported by this version of "
-                      DRV_NAME "\n", drive->name);
-               goto failed;
-       }
-       floppy = kzalloc(sizeof(idefloppy_floppy_t), GFP_KERNEL);
-       if (!floppy) {
-               printk(KERN_ERR PFX "%s: Can't allocate a floppy structure\n",
-                      drive->name);
-               goto failed;
-       }
-
-       g = alloc_disk(1 << PARTN_BITS);
-       if (!g)
-               goto out_free_floppy;
-
-       ide_init_disk(g, drive);
-
-       kref_init(&floppy->kref);
-
-       floppy->drive = drive;
-       floppy->driver = &idefloppy_driver;
-       floppy->disk = g;
-
-       g->private_data = &floppy->driver;
-
-       drive->driver_data = floppy;
-
-       drive->debug_mask = debug_mask;
-
-       idefloppy_setup(drive, floppy);
-       drive->dev_flags |= IDE_DFLAG_ATTACH;
-
-       g->minors = 1 << PARTN_BITS;
-       g->driverfs_dev = &drive->gendev;
-       if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
-               g->flags = GENHD_FL_REMOVABLE;
-       g->fops = &idefloppy_ops;
-       add_disk(g);
-       return 0;
-
-out_free_floppy:
-       kfree(floppy);
-failed:
-       return -ENODEV;
-}
-
-static void __exit idefloppy_exit(void)
-{
-       driver_unregister(&idefloppy_driver.gen_driver);
-}
-
-static int __init idefloppy_init(void)
-{
-       printk(KERN_INFO DRV_NAME " driver " IDEFLOPPY_VERSION "\n");
-       return driver_register(&idefloppy_driver.gen_driver);
-}
-
-MODULE_ALIAS("ide:*m-floppy*");
-MODULE_ALIAS("ide-floppy");
-module_init(idefloppy_init);
-module_exit(idefloppy_exit);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("ATAPI FLOPPY Driver");
-
index 17cf865e583d28edd7890b3e874bcd0cdec561ce..c17124dd6079e6b7edfe1bfd3651a167678c5925 100644 (file)
@@ -1,37 +1,9 @@
 #ifndef __IDE_FLOPPY_H
 #define __IDE_FLOPPY_H
 
-/*
- * Most of our global data which we need to save even as we leave the driver
- * due to an interrupt or a timer event is stored in a variable of type
- * idefloppy_floppy_t, defined below.
- */
-typedef struct ide_floppy_obj {
-       ide_drive_t     *drive;
-       ide_driver_t    *driver;
-       struct gendisk  *disk;
-       struct kref     kref;
-       unsigned int    openers;        /* protected by BKL for now */
-
-       /* Last failed packet command */
-       struct ide_atapi_pc *failed_pc;
-       /* used for blk_{fs,pc}_request() requests */
-       struct ide_atapi_pc queued_pc;
-
-       /* Last error information */
-       u8 sense_key, asc, ascq;
-
-       int progress_indication;
-
-       /* Device information */
-       /* Current format */
-       int blocks, block_size, bs_factor;
-       /* Last format capacity descriptor */
-       u8 cap_desc[8];
-       /* Copy of the flexible disk page */
-       u8 flexible_disk_page[32];
-} idefloppy_floppy_t;
+#include "ide-gd.h"
 
+#ifdef CONFIG_IDE_GD_ATAPI
 /*
  * Pages of the SELECT SENSE / MODE SENSE packet commands.
  * See SFF-8070i spec.
@@ -46,17 +18,22 @@ typedef struct ide_floppy_obj {
 #define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS    0x4603
 
 /* ide-floppy.c */
+extern const struct ide_disk_ops ide_atapi_disk_ops;
 void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *, u8);
 void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *);
-sector_t ide_floppy_capacity(ide_drive_t *);
 
 /* ide-floppy_ioctl.c */
-int ide_floppy_ioctl(struct inode *, struct file *, unsigned, unsigned long);
+int ide_floppy_ioctl(ide_drive_t *, struct inode *, struct file *, unsigned int,
+                    unsigned long);
 
 #ifdef CONFIG_IDE_PROC_FS
 /* ide-floppy_proc.c */
 extern ide_proc_entry_t ide_floppy_proc[];
 extern const struct ide_proc_devset ide_floppy_settings[];
 #endif
+#else
+#define ide_floppy_proc                NULL
+#define ide_floppy_settings    NULL
+#endif
 
 #endif /*__IDE_FLOPPY_H */
index a3a7a0809e2bdf5edba01c3c93c0f2b6cd467d8a..409e4c15f9b71b6c2d67d2aa302bd4fe524ccf3d 100644 (file)
@@ -33,7 +33,7 @@
 
 static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg)
 {
-       struct ide_floppy_obj *floppy = drive->driver_data;
+       struct ide_disk_obj *floppy = drive->driver_data;
        struct ide_atapi_pc pc;
        u8 header_len, desc_cnt;
        int i, blocks, length, u_array_size, u_index;
@@ -113,7 +113,7 @@ static void ide_floppy_create_format_unit_cmd(struct ide_atapi_pc *pc, int b,
 
 static int ide_floppy_get_sfrp_bit(ide_drive_t *drive)
 {
-       idefloppy_floppy_t *floppy = drive->driver_data;
+       struct ide_disk_obj *floppy = drive->driver_data;
        struct ide_atapi_pc pc;
 
        drive->atapi_flags &= ~IDE_AFLAG_SRFP;
@@ -132,17 +132,17 @@ static int ide_floppy_get_sfrp_bit(ide_drive_t *drive)
 
 static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
 {
-       idefloppy_floppy_t *floppy = drive->driver_data;
+       struct ide_disk_obj *floppy = drive->driver_data;
        struct ide_atapi_pc pc;
        int blocks, length, flags, err = 0;
 
        if (floppy->openers > 1) {
                /* Don't format if someone is using the disk */
-               drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
+               drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
                return -EBUSY;
        }
 
-       drive->atapi_flags |= IDE_AFLAG_FORMAT_IN_PROGRESS;
+       drive->dev_flags |= IDE_DFLAG_FORMAT_IN_PROGRESS;
 
        /*
         * Send ATAPI_FORMAT_UNIT to the drive.
@@ -174,7 +174,7 @@ static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
 
 out:
        if (err)
-               drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
+               drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
        return err;
 }
 
@@ -190,7 +190,7 @@ out:
 
 static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg)
 {
-       idefloppy_floppy_t *floppy = drive->driver_data;
+       struct ide_disk_obj *floppy = drive->driver_data;
        struct ide_atapi_pc pc;
        int progress_indication = 0x10000;
 
@@ -226,7 +226,7 @@ static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg)
 static int ide_floppy_lockdoor(ide_drive_t *drive, struct ide_atapi_pc *pc,
                               unsigned long arg, unsigned int cmd)
 {
-       idefloppy_floppy_t *floppy = drive->driver_data;
+       struct ide_disk_obj *floppy = drive->driver_data;
        struct gendisk *disk = floppy->disk;
        int prevent = (arg && cmd != CDROMEJECT) ? 1 : 0;
 
@@ -260,13 +260,10 @@ static int ide_floppy_format_ioctl(ide_drive_t *drive, struct file *file,
        }
 }
 
-int ide_floppy_ioctl(struct inode *inode, struct file *file,
-                   unsigned int cmd, unsigned long arg)
+int ide_floppy_ioctl(ide_drive_t *drive, struct inode *inode,
+                    struct file *file, unsigned int cmd, unsigned long arg)
 {
        struct block_device *bdev = inode->i_bdev;
-       struct ide_floppy_obj *floppy = ide_drv_g(bdev->bd_disk,
-                                                    ide_floppy_obj);
-       ide_drive_t *drive = floppy->drive;
        struct ide_atapi_pc pc;
        void __user *argp = (void __user *)arg;
        int err;
index 76f0c6c4eca3c4652e69dde32c77059220663294..3ec762cb60abfcceae5e0f5ede250adbc2ec2f7c 100644 (file)
@@ -9,7 +9,7 @@ static int proc_idefloppy_read_capacity(char *page, char **start, off_t off,
        ide_drive_t*drive = (ide_drive_t *)data;
        int len;
 
-       len = sprintf(page, "%llu\n", (long long)ide_floppy_capacity(drive));
+       len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive));
        PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
 }
 
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
new file mode 100644 (file)
index 0000000..d44898f
--- /dev/null
@@ -0,0 +1,398 @@
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/genhd.h>
+#include <linux/mutex.h>
+#include <linux/ide.h>
+#include <linux/hdreg.h>
+
+#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
+#define IDE_DISK_MINORS                (1 << PARTN_BITS)
+#else
+#define IDE_DISK_MINORS                0
+#endif
+
+#include "ide-disk.h"
+#include "ide-floppy.h"
+
+#define IDE_GD_VERSION "1.18"
+
+/* module parameters */
+static unsigned long debug_mask;
+module_param(debug_mask, ulong, 0644);
+
+static DEFINE_MUTEX(ide_disk_ref_mutex);
+
+static void ide_disk_release(struct kref *);
+
+static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
+{
+       struct ide_disk_obj *idkp = NULL;
+
+       mutex_lock(&ide_disk_ref_mutex);
+       idkp = ide_drv_g(disk, ide_disk_obj);
+       if (idkp) {
+               if (ide_device_get(idkp->drive))
+                       idkp = NULL;
+               else
+                       kref_get(&idkp->kref);
+       }
+       mutex_unlock(&ide_disk_ref_mutex);
+       return idkp;
+}
+
+static void ide_disk_put(struct ide_disk_obj *idkp)
+{
+       ide_drive_t *drive = idkp->drive;
+
+       mutex_lock(&ide_disk_ref_mutex);
+       kref_put(&idkp->kref, ide_disk_release);
+       ide_device_put(drive);
+       mutex_unlock(&ide_disk_ref_mutex);
+}
+
+sector_t ide_gd_capacity(ide_drive_t *drive)
+{
+       return drive->capacity64;
+}
+
+static int ide_gd_probe(ide_drive_t *);
+
+static void ide_gd_remove(ide_drive_t *drive)
+{
+       struct ide_disk_obj *idkp = drive->driver_data;
+       struct gendisk *g = idkp->disk;
+
+       ide_proc_unregister_driver(drive, idkp->driver);
+
+       del_gendisk(g);
+
+       drive->disk_ops->flush(drive);
+
+       ide_disk_put(idkp);
+}
+
+static void ide_disk_release(struct kref *kref)
+{
+       struct ide_disk_obj *idkp = to_ide_drv(kref, ide_disk_obj);
+       ide_drive_t *drive = idkp->drive;
+       struct gendisk *g = idkp->disk;
+
+       drive->disk_ops = NULL;
+       drive->driver_data = NULL;
+       g->private_data = NULL;
+       put_disk(g);
+       kfree(idkp);
+}
+
+/*
+ * On HPA drives the capacity needs to be
+ * reinitilized on resume otherwise the disk
+ * can not be used and a hard reset is required
+ */
+static void ide_gd_resume(ide_drive_t *drive)
+{
+       if (ata_id_hpa_enabled(drive->id))
+               (void)drive->disk_ops->get_capacity(drive);
+}
+
+static void ide_gd_shutdown(ide_drive_t *drive)
+{
+#ifdef CONFIG_ALPHA
+       /* On Alpha, halt(8) doesn't actually turn the machine off,
+          it puts you into the sort of firmware monitor. Typically,
+          it's used to boot another kernel image, so it's not much
+          different from reboot(8). Therefore, we don't need to
+          spin down the disk in this case, especially since Alpha
+          firmware doesn't handle disks in standby mode properly.
+          On the other hand, it's reasonably safe to turn the power
+          off when the shutdown process reaches the firmware prompt,
+          as the firmware initialization takes rather long time -
+          at least 10 seconds, which should be sufficient for
+          the disk to expire its write cache. */
+       if (system_state != SYSTEM_POWER_OFF) {
+#else
+       if (system_state == SYSTEM_RESTART) {
+#endif
+               drive->disk_ops->flush(drive);
+               return;
+       }
+
+       printk(KERN_INFO "Shutdown: %s\n", drive->name);
+
+       drive->gendev.bus->suspend(&drive->gendev, PMSG_SUSPEND);
+}
+
+#ifdef CONFIG_IDE_PROC_FS
+static ide_proc_entry_t *ide_disk_proc_entries(ide_drive_t *drive)
+{
+       return (drive->media == ide_disk) ? ide_disk_proc : ide_floppy_proc;
+}
+
+static const struct ide_proc_devset *ide_disk_proc_devsets(ide_drive_t *drive)
+{
+       return (drive->media == ide_disk) ? ide_disk_settings
+                                         : ide_floppy_settings;
+}
+#endif
+
+static ide_startstop_t ide_gd_do_request(ide_drive_t *drive,
+                                        struct request *rq, sector_t sector)
+{
+       return drive->disk_ops->do_request(drive, rq, sector);
+}
+
+static int ide_gd_end_request(ide_drive_t *drive, int uptodate, int nrsecs)
+{
+       return drive->disk_ops->end_request(drive, uptodate, nrsecs);
+}
+
+static ide_driver_t ide_gd_driver = {
+       .gen_driver = {
+               .owner          = THIS_MODULE,
+               .name           = "ide-gd",
+               .bus            = &ide_bus_type,
+       },
+       .probe                  = ide_gd_probe,
+       .remove                 = ide_gd_remove,
+       .resume                 = ide_gd_resume,
+       .shutdown               = ide_gd_shutdown,
+       .version                = IDE_GD_VERSION,
+       .do_request             = ide_gd_do_request,
+       .end_request            = ide_gd_end_request,
+       .error                  = __ide_error,
+#ifdef CONFIG_IDE_PROC_FS
+       .proc_entries           = ide_disk_proc_entries,
+       .proc_devsets           = ide_disk_proc_devsets,
+#endif
+};
+
+static int ide_gd_open(struct inode *inode, struct file *filp)
+{
+       struct gendisk *disk = inode->i_bdev->bd_disk;
+       struct ide_disk_obj *idkp;
+       ide_drive_t *drive;
+       int ret = 0;
+
+       idkp = ide_disk_get(disk);
+       if (idkp == NULL)
+               return -ENXIO;
+
+       drive = idkp->drive;
+
+       ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+
+       idkp->openers++;
+
+       if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
+               drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
+               /* Just in case */
+
+               ret = drive->disk_ops->init_media(drive, disk);
+
+               /*
+                * Allow O_NDELAY to open a drive without a disk, or with an
+                * unreadable disk, so that we can get the format capacity
+                * of the drive or begin the format - Sam
+                */
+               if (ret && (filp->f_flags & O_NDELAY) == 0) {
+                       ret = -EIO;
+                       goto out_put_idkp;
+               }
+
+               if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) {
+                       ret = -EROFS;
+                       goto out_put_idkp;
+               }
+
+               /*
+                * Ignore the return code from door_lock,
+                * since the open() has already succeeded,
+                * and the door_lock is irrelevant at this point.
+                */
+               drive->disk_ops->set_doorlock(drive, disk, 1);
+               drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
+               check_disk_change(inode->i_bdev);
+       } else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
+               ret = -EBUSY;
+               goto out_put_idkp;
+       }
+       return 0;
+
+out_put_idkp:
+       idkp->openers--;
+       ide_disk_put(idkp);
+       return ret;
+}
+
+static int ide_gd_release(struct inode *inode, struct file *filp)
+{
+       struct gendisk *disk = inode->i_bdev->bd_disk;
+       struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
+       ide_drive_t *drive = idkp->drive;
+
+       ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
+
+       if (idkp->openers == 1)
+               drive->disk_ops->flush(drive);
+
+       if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
+               drive->disk_ops->set_doorlock(drive, disk, 0);
+               drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
+       }
+
+       idkp->openers--;
+
+       ide_disk_put(idkp);
+
+       return 0;
+}
+
+static int ide_gd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
+       ide_drive_t *drive = idkp->drive;
+
+       geo->heads = drive->bios_head;
+       geo->sectors = drive->bios_sect;
+       geo->cylinders = (u16)drive->bios_cyl; /* truncate */
+       return 0;
+}
+
+static int ide_gd_media_changed(struct gendisk *disk)
+{
+       struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
+       ide_drive_t *drive = idkp->drive;
+       int ret;
+
+       /* do not scan partitions twice if this is a removable device */
+       if (drive->dev_flags & IDE_DFLAG_ATTACH) {
+               drive->dev_flags &= ~IDE_DFLAG_ATTACH;
+               return 0;
+       }
+
+       ret = !!(drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED);
+       drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
+
+       return ret;
+}
+
+static int ide_gd_revalidate_disk(struct gendisk *disk)
+{
+       struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
+       set_capacity(disk, ide_gd_capacity(idkp->drive));
+       return 0;
+}
+
+static int ide_gd_ioctl(struct inode *inode, struct file *file,
+                            unsigned int cmd, unsigned long arg)
+{
+       struct block_device *bdev = inode->i_bdev;
+       struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
+       ide_drive_t *drive = idkp->drive;
+
+       return drive->disk_ops->ioctl(drive, inode, file, cmd, arg);
+}
+
+static struct block_device_operations ide_gd_ops = {
+       .owner                  = THIS_MODULE,
+       .open                   = ide_gd_open,
+       .release                = ide_gd_release,
+       .ioctl                  = ide_gd_ioctl,
+       .getgeo                 = ide_gd_getgeo,
+       .media_changed          = ide_gd_media_changed,
+       .revalidate_disk        = ide_gd_revalidate_disk
+};
+
+static int ide_gd_probe(ide_drive_t *drive)
+{
+       const struct ide_disk_ops *disk_ops = NULL;
+       struct ide_disk_obj *idkp;
+       struct gendisk *g;
+
+       /* strstr("foo", "") is non-NULL */
+       if (!strstr("ide-gd", drive->driver_req))
+               goto failed;
+
+#ifdef CONFIG_IDE_GD_ATA
+       if (drive->media == ide_disk)
+               disk_ops = &ide_ata_disk_ops;
+#endif
+#ifdef CONFIG_IDE_GD_ATAPI
+       if (drive->media == ide_floppy)
+               disk_ops = &ide_atapi_disk_ops;
+#endif
+       if (disk_ops == NULL)
+               goto failed;
+
+       if (disk_ops->check(drive, DRV_NAME) == 0) {
+               printk(KERN_ERR PFX "%s: not supported by this driver\n",
+                       drive->name);
+               goto failed;
+       }
+
+       idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
+       if (!idkp) {
+               printk(KERN_ERR PFX "%s: can't allocate a disk structure\n",
+                       drive->name);
+               goto failed;
+       }
+
+       g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
+       if (!g)
+               goto out_free_idkp;
+
+       ide_init_disk(g, drive);
+
+       kref_init(&idkp->kref);
+
+       idkp->drive = drive;
+       idkp->driver = &ide_gd_driver;
+       idkp->disk = g;
+
+       g->private_data = &idkp->driver;
+
+       drive->driver_data = idkp;
+       drive->debug_mask = debug_mask;
+       drive->disk_ops = disk_ops;
+
+       disk_ops->setup(drive);
+
+       set_capacity(g, ide_gd_capacity(drive));
+
+       g->minors = IDE_DISK_MINORS;
+       g->driverfs_dev = &drive->gendev;
+       g->flags |= GENHD_FL_EXT_DEVT;
+       if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
+               g->flags = GENHD_FL_REMOVABLE;
+       g->fops = &ide_gd_ops;
+       add_disk(g);
+       return 0;
+
+out_free_idkp:
+       kfree(idkp);
+failed:
+       return -ENODEV;
+}
+
+static int __init ide_gd_init(void)
+{
+       printk(KERN_INFO DRV_NAME " driver " IDE_GD_VERSION "\n");
+       return driver_register(&ide_gd_driver.gen_driver);
+}
+
+static void __exit ide_gd_exit(void)
+{
+       driver_unregister(&ide_gd_driver.gen_driver);
+}
+
+MODULE_ALIAS("ide:*m-disk*");
+MODULE_ALIAS("ide-disk");
+MODULE_ALIAS("ide:*m-floppy*");
+MODULE_ALIAS("ide-floppy");
+module_init(ide_gd_init);
+module_exit(ide_gd_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("generic ATA/ATAPI disk driver");
diff --git a/drivers/ide/ide-gd.h b/drivers/ide/ide-gd.h
new file mode 100644 (file)
index 0000000..7d3d101
--- /dev/null
@@ -0,0 +1,44 @@
+#ifndef __IDE_GD_H
+#define __IDE_GD_H
+
+#define DRV_NAME "ide-gd"
+#define PFX DRV_NAME ": "
+
+/* define to see debug info */
+#define IDE_GD_DEBUG_LOG       0
+
+#if IDE_GD_DEBUG_LOG
+#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
+#else
+#define ide_debug_log(lvl, fmt, args...) do {} while (0)
+#endif
+
+struct ide_disk_obj {
+       ide_drive_t     *drive;
+       ide_driver_t    *driver;
+       struct gendisk  *disk;
+       struct kref     kref;
+       unsigned int    openers;        /* protected by BKL for now */
+
+       /* Last failed packet command */
+       struct ide_atapi_pc *failed_pc;
+       /* used for blk_{fs,pc}_request() requests */
+       struct ide_atapi_pc queued_pc;
+
+       /* Last error information */
+       u8 sense_key, asc, ascq;
+
+       int progress_indication;
+
+       /* Device information */
+       /* Current format */
+       int blocks, block_size, bs_factor;
+       /* Last format capacity descriptor */
+       u8 cap_desc[8];
+       /* Copy of the flexible disk page */
+       u8 flexible_disk_page[32];
+};
+
+sector_t ide_gd_capacity(ide_drive_t *);
+
+#endif /* __IDE_GD_H */
index b762deb2dacb34f408ff55d0104581593f117e5d..bb7a1ed8094e399b1860113ecbaee3ced4b5c96a 100644 (file)
@@ -755,7 +755,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
         
        udelay(1);
        SELECT_DRIVE(drive);
-       SELECT_MASK(drive, 0);
+       SELECT_MASK(drive, 1);
        udelay(1);
        tp_ops->set_irq(hwif, 0);
 
index 19f8c7770a25d7449486b3df07b56cf536ee5b76..1649ea54f76ce7c194f7ec8f5016f132aec79d0f 100644 (file)
@@ -208,6 +208,7 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
                drive->ready_stat = 0;
                if (ata_id_cdb_intr(id))
                        drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT;
+               drive->dev_flags |= IDE_DFLAG_DOORLOCKING;
                /* we don't do head unloading on ATAPI devices */
                drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
                return;
index b26926487cc03f907e8e1798cdf6f0323027391e..c31d0dd7a5322aa56355462db58a213c3a3d9a63 100644 (file)
@@ -567,10 +567,10 @@ static void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t
 void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver)
 {
        mutex_lock(&ide_setting_mtx);
-       drive->settings = driver->settings;
+       drive->settings = driver->proc_devsets(drive);
        mutex_unlock(&ide_setting_mtx);
 
-       ide_add_proc_entries(drive->proc, driver->proc, drive);
+       ide_add_proc_entries(drive->proc, driver->proc_entries(drive), drive);
 }
 
 EXPORT_SYMBOL(ide_proc_register_driver);
@@ -591,7 +591,7 @@ void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver)
 {
        unsigned long flags;
 
-       ide_remove_proc_entries(drive->proc, driver->proc);
+       ide_remove_proc_entries(drive->proc, driver->proc_entries(drive));
 
        mutex_lock(&ide_setting_mtx);
        spin_lock_irqsave(&ide_lock, flags);
index d879c7797cde3a6ab961088f65ac17ca441f4120..b2b2e5e8d38ebf499ec78ffd48df02d2b88496f9 100644 (file)
@@ -2108,7 +2108,7 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive)
 
        /* device lacks locking support according to capabilities page */
        if ((caps[6] & 1) == 0)
-               drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
+               drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
 
        if (caps[7] & 0x02)
                tape->blk_size = 512;
@@ -2298,6 +2298,16 @@ static ide_proc_entry_t idetape_proc[] = {
        { "name",       S_IFREG|S_IRUGO,        proc_idetape_read_name, NULL },
        { NULL, 0, NULL, NULL }
 };
+
+static ide_proc_entry_t *ide_tape_proc_entries(ide_drive_t *drive)
+{
+       return idetape_proc;
+}
+
+static const struct ide_proc_devset *ide_tape_proc_devsets(ide_drive_t *drive)
+{
+       return idetape_settings;
+}
 #endif
 
 static int ide_tape_probe(ide_drive_t *);
@@ -2315,8 +2325,8 @@ static ide_driver_t idetape_driver = {
        .end_request            = idetape_end_request,
        .error                  = __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
-       .proc                   = idetape_proc,
-       .settings               = idetape_settings,
+       .proc_entries           = ide_tape_proc_entries,
+       .proc_devsets           = ide_tape_proc_devsets,
 #endif
 };
 
index 02e6ee7d751defaad1fe96d081a881495ecceca5..ab44a1f5f5a9340ac684d677a5eb161dab05406f 100644 (file)
@@ -11,7 +11,6 @@ obj-$(CONFIG_BLK_DEV_CS5535)          += cs5535.o
 obj-$(CONFIG_BLK_DEV_SC1200)           += sc1200.o
 obj-$(CONFIG_BLK_DEV_CY82C693)         += cy82c693.o
 obj-$(CONFIG_BLK_DEV_DELKIN)           += delkin_cb.o
-obj-$(CONFIG_BLK_DEV_HPT34X)           += hpt34x.o
 obj-$(CONFIG_BLK_DEV_HPT366)           += hpt366.o
 obj-$(CONFIG_BLK_DEV_IT8213)           += it8213.o
 obj-$(CONFIG_BLK_DEV_IT821X)           += it821x.o
index 8689a706f537186e46549d2253b51c5c66c3b3a6..8f1b2d9f05138631c541d71c2cb195987de726a5 100644 (file)
@@ -46,10 +46,27 @@ static const struct ide_port_ops delkin_cb_port_ops = {
        .quirkproc              = ide_undecoded_slave,
 };
 
+static unsigned int delkin_cb_init_chipset(struct pci_dev *dev)
+{
+       unsigned long base = pci_resource_start(dev, 0);
+       int i;
+
+       outb(0x02, base + 0x1e);        /* set nIEN to block interrupts */
+       inb(base + 0x17);               /* read status to clear interrupts */
+
+       for (i = 0; i < sizeof(setup); ++i) {
+               if (setup[i])
+                       outb(setup[i], base + i);
+       }
+
+       return 0;
+}
+
 static const struct ide_port_info delkin_cb_port_info = {
        .port_ops               = &delkin_cb_port_ops,
        .host_flags             = IDE_HFLAG_IO_32BIT | IDE_HFLAG_UNMASK_IRQS |
                                  IDE_HFLAG_NO_DMA,
+       .init_chipset           = delkin_cb_init_chipset,
 };
 
 static int __devinit
@@ -57,7 +74,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 {
        struct ide_host *host;
        unsigned long base;
-       int i, rc;
+       int rc;
        hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
 
        rc = pci_enable_device(dev);
@@ -72,12 +89,8 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
                return rc;
        }
        base = pci_resource_start(dev, 0);
-       outb(0x02, base + 0x1e);        /* set nIEN to block interrupts */
-       inb(base + 0x17);               /* read status to clear interrupts */
-       for (i = 0; i < sizeof(setup); ++i) {
-               if (setup[i])
-                       outb(setup[i], base + i);
-       }
+
+       delkin_cb_init_chipset(dev);
 
        memset(&hw, 0, sizeof(hw));
        ide_std_init_ports(&hw, base + 0x10, base + 0x1e);
@@ -110,6 +123,40 @@ delkin_cb_remove (struct pci_dev *dev)
        pci_disable_device(dev);
 }
 
+#ifdef CONFIG_PM
+static int delkin_cb_suspend(struct pci_dev *dev, pm_message_t state)
+{
+       pci_save_state(dev);
+       pci_disable_device(dev);
+       pci_set_power_state(dev, pci_choose_state(dev, state));
+
+       return 0;
+}
+
+static int delkin_cb_resume(struct pci_dev *dev)
+{
+       struct ide_host *host = pci_get_drvdata(dev);
+       int rc;
+
+       pci_set_power_state(dev, PCI_D0);
+
+       rc = pci_enable_device(dev);
+       if (rc)
+               return rc;
+
+       pci_restore_state(dev);
+       pci_set_master(dev);
+
+       if (host->init_chipset)
+               host->init_chipset(dev);
+
+       return 0;
+}
+#else
+#define delkin_cb_suspend NULL
+#define delkin_cb_resume NULL
+#endif
+
 static struct pci_device_id delkin_cb_pci_tbl[] __devinitdata = {
        { 0x1145, 0xf021, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
        { 0x1145, 0xf024, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
@@ -122,6 +169,8 @@ static struct pci_driver delkin_cb_pci_driver = {
        .id_table       = delkin_cb_pci_tbl,
        .probe          = delkin_cb_probe,
        .remove         = delkin_cb_remove,
+       .suspend        = delkin_cb_suspend,
+       .resume         = delkin_cb_resume,
 };
 
 static int __init delkin_cb_init(void)
diff --git a/drivers/ide/pci/hpt34x.c b/drivers/ide/pci/hpt34x.c
deleted file mode 100644 (file)
index fb1a3aa..0000000
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (C) 1998-2000     Andre Hedrick <andre@linux-ide.org>
- *
- * May be copied or modified under the terms of the GNU General Public License
- *
- *
- * 00:12.0 Unknown mass storage controller:
- * Triones Technologies, Inc.
- * Unknown device 0003 (rev 01)
- *
- * hde: UDMA 2 (0x0000 0x0002) (0x0000 0x0010)
- * hdf: UDMA 2 (0x0002 0x0012) (0x0010 0x0030)
- * hde: DMA 2  (0x0000 0x0002) (0x0000 0x0010)
- * hdf: DMA 2  (0x0002 0x0012) (0x0010 0x0030)
- * hdg: DMA 1  (0x0012 0x0052) (0x0030 0x0070)
- * hdh: DMA 1  (0x0052 0x0252) (0x0070 0x00f0)
- *
- * ide-pci.c reference
- *
- * Since there are two cards that report almost identically,
- * the only discernable difference is the values reported in pcicmd.
- * Booting-BIOS card or HPT363 :: pcicmd == 0x07
- * Non-bootable card or HPT343 :: pcicmd == 0x05
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ide.h>
-
-#define DRV_NAME "hpt34x"
-
-#define HPT343_DEBUG_DRIVE_INFO                0
-
-static void hpt34x_set_mode(ide_drive_t *drive, const u8 speed)
-{
-       struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-       u32 reg1= 0, tmp1 = 0, reg2 = 0, tmp2 = 0;
-       u8                      hi_speed, lo_speed;
-
-       hi_speed = speed >> 4;
-       lo_speed = speed & 0x0f;
-
-       if (hi_speed & 7) {
-               hi_speed = (hi_speed & 4) ? 0x01 : 0x10;
-       } else {
-               lo_speed <<= 5;
-               lo_speed >>= 5;
-       }
-
-       pci_read_config_dword(dev, 0x44, &reg1);
-       pci_read_config_dword(dev, 0x48, &reg2);
-       tmp1 = ((lo_speed << (3*drive->dn)) | (reg1 & ~(7 << (3*drive->dn))));
-       tmp2 = ((hi_speed << drive->dn) | (reg2 & ~(0x11 << drive->dn)));
-       pci_write_config_dword(dev, 0x44, tmp1);
-       pci_write_config_dword(dev, 0x48, tmp2);
-
-#if HPT343_DEBUG_DRIVE_INFO
-       printk("%s: %s drive%d (0x%04x 0x%04x) (0x%04x 0x%04x)" \
-               " (0x%02x 0x%02x)\n",
-               drive->name, ide_xfer_verbose(speed),
-               drive->dn, reg1, tmp1, reg2, tmp2,
-               hi_speed, lo_speed);
-#endif /* HPT343_DEBUG_DRIVE_INFO */
-}
-
-static void hpt34x_set_pio_mode(ide_drive_t *drive, const u8 pio)
-{
-       hpt34x_set_mode(drive, XFER_PIO_0 + pio);
-}
-
-/*
- * If the BIOS does not set the IO base addaress to XX00, 343 will fail.
- */
-#define        HPT34X_PCI_INIT_REG             0x80
-
-static unsigned int init_chipset_hpt34x(struct pci_dev *dev)
-{
-       int i = 0;
-       unsigned long hpt34xIoBase = pci_resource_start(dev, 4);
-       unsigned long hpt_addr[4] = { 0x20, 0x34, 0x28, 0x3c };
-       unsigned long hpt_addr_len[4] = { 7, 3, 7, 3 };
-       u16 cmd;
-       unsigned long flags;
-
-       local_irq_save(flags);
-
-       pci_write_config_byte(dev, HPT34X_PCI_INIT_REG, 0x00);
-       pci_read_config_word(dev, PCI_COMMAND, &cmd);
-
-       if (cmd & PCI_COMMAND_MEMORY)
-               pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0xF0);
-       else
-               pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x20);
-
-       /*
-        * Since 20-23 can be assigned and are R/W, we correct them.
-        */
-       pci_write_config_word(dev, PCI_COMMAND, cmd & ~PCI_COMMAND_IO);
-       for(i=0; i<4; i++) {
-               dev->resource[i].start = (hpt34xIoBase + hpt_addr[i]);
-               dev->resource[i].end = dev->resource[i].start + hpt_addr_len[i];
-               dev->resource[i].flags = IORESOURCE_IO;
-               pci_write_config_dword(dev,
-                               (PCI_BASE_ADDRESS_0 + (i * 4)),
-                               dev->resource[i].start);
-       }
-       pci_write_config_word(dev, PCI_COMMAND, cmd);
-
-       local_irq_restore(flags);
-
-       return dev->irq;
-}
-
-static const struct ide_port_ops hpt34x_port_ops = {
-       .set_pio_mode           = hpt34x_set_pio_mode,
-       .set_dma_mode           = hpt34x_set_mode,
-};
-
-#define IDE_HFLAGS_HPT34X \
-       (IDE_HFLAG_NO_ATAPI_DMA | \
-        IDE_HFLAG_NO_DSC | \
-        IDE_HFLAG_NO_AUTODMA)
-
-static const struct ide_port_info hpt34x_chipsets[] __devinitdata = {
-       { /* 0: HPT343 */
-               .name           = DRV_NAME,
-               .init_chipset   = init_chipset_hpt34x,
-               .port_ops       = &hpt34x_port_ops,
-               .host_flags     = IDE_HFLAGS_HPT34X | IDE_HFLAG_NON_BOOTABLE,
-               .pio_mask       = ATA_PIO5,
-       },
-       { /* 1: HPT345 */
-               .name           = DRV_NAME,
-               .init_chipset   = init_chipset_hpt34x,
-               .port_ops       = &hpt34x_port_ops,
-               .host_flags     = IDE_HFLAGS_HPT34X | IDE_HFLAG_OFF_BOARD,
-               .pio_mask       = ATA_PIO5,
-#ifdef CONFIG_HPT34X_AUTODMA
-               .swdma_mask     = ATA_SWDMA2,
-               .mwdma_mask     = ATA_MWDMA2,
-               .udma_mask      = ATA_UDMA2,
-#endif
-       }
-};
-
-static int __devinit hpt34x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-       const struct ide_port_info *d;
-       u16 pcicmd = 0;
-
-       pci_read_config_word(dev, PCI_COMMAND, &pcicmd);
-
-       d = &hpt34x_chipsets[(pcicmd & PCI_COMMAND_MEMORY) ? 1 : 0];
-
-       return ide_pci_init_one(dev, d, NULL);
-}
-
-static const struct pci_device_id hpt34x_pci_tbl[] = {
-       { PCI_VDEVICE(TTI, PCI_DEVICE_ID_TTI_HPT343), 0 },
-       { 0, },
-};
-MODULE_DEVICE_TABLE(pci, hpt34x_pci_tbl);
-
-static struct pci_driver hpt34x_pci_driver = {
-       .name           = "HPT34x_IDE",
-       .id_table       = hpt34x_pci_tbl,
-       .probe          = hpt34x_init_one,
-       .remove         = ide_pci_remove,
-       .suspend        = ide_pci_suspend,
-       .resume         = ide_pci_resume,
-};
-
-static int __init hpt34x_ide_init(void)
-{
-       return ide_pci_register_driver(&hpt34x_pci_driver);
-}
-
-static void __exit hpt34x_ide_exit(void)
-{
-       pci_unregister_driver(&hpt34x_pci_driver);
-}
-
-module_init(hpt34x_ide_init);
-module_exit(hpt34x_ide_exit);
-
-MODULE_AUTHOR("Andre Hedrick");
-MODULE_DESCRIPTION("PCI driver module for Highpoint 34x IDE");
-MODULE_LICENSE("GPL");
index 9cf171cb9376b5265e07f87dfe88cb3a346bdc3f..a7909e9c720e5741d402ceeee78b1d83322dabbe 100644 (file)
@@ -3,7 +3,7 @@
  * Portions Copyright (C) 2001         Sun Microsystems, Inc.
  * Portions Copyright (C) 2003         Red Hat Inc
  * Portions Copyright (C) 2007         Bartlomiej Zolnierkiewicz
- * Portions Copyright (C) 2005-2007    MontaVista Software, Inc.
+ * Portions Copyright (C) 2005-2008    MontaVista Software, Inc.
  *
  * Thanks to HighPoint Technologies for their assistance, and hardware.
  * Special Thanks to Jon Burchmore in SanDiego for the deep pockets, his
@@ -748,26 +748,24 @@ static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
        struct pci_dev  *dev    = to_pci_dev(hwif->dev);
        struct hpt_info *info   = hpt3xx_get_info(hwif->dev);
 
-       if (drive->quirk_list) {
-               if (info->chip_type >= HPT370) {
-                       u8 scr1 = 0;
-
-                       pci_read_config_byte(dev, 0x5a, &scr1);
-                       if (((scr1 & 0x10) >> 4) != mask) {
-                               if (mask)
-                                       scr1 |=  0x10;
-                               else
-                                       scr1 &= ~0x10;
-                               pci_write_config_byte(dev, 0x5a, scr1);
-                       }
-               } else {
+       if (drive->quirk_list == 0)
+               return;
+
+       if (info->chip_type >= HPT370) {
+               u8 scr1 = 0;
+
+               pci_read_config_byte(dev, 0x5a, &scr1);
+               if (((scr1 & 0x10) >> 4) != mask) {
                        if (mask)
-                               disable_irq(hwif->irq);
+                               scr1 |=  0x10;
                        else
-                               enable_irq (hwif->irq);
+                               scr1 &= ~0x10;
+                       pci_write_config_byte(dev, 0x5a, scr1);
                }
-       } else
-               outb(ATA_DEVCTL_OBS | (mask ? 2 : 0), hwif->io_ports.ctl_addr);
+       } else if (mask)
+               disable_irq(hwif->irq);
+       else
+               enable_irq(hwif->irq);
 }
 
 /*
@@ -1289,7 +1287,6 @@ static u8 hpt3xx_cable_detect(ide_hwif_t *hwif)
 
 static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 {
-       struct pci_dev *dev     = to_pci_dev(hwif->dev);
        struct hpt_info *info   = hpt3xx_get_info(hwif->dev);
        int serialize           = HPT_SERIALIZE_IO;
        u8  chip_type           = info->chip_type;
index 9ce1d8059921932de41938cc874412088b75b4ee..49f163aa51e3197773c0ac626ad094cca96d6630 100644 (file)
@@ -617,7 +617,6 @@ static int __devinit init_setup_scc(struct pci_dev *dev,
        unsigned long intmask_port;
        unsigned long mode_port;
        unsigned long ecmode_port;
-       unsigned long dma_status_port;
        u32 reg = 0;
        struct scc_ports *ports;
        int rc;
@@ -637,7 +636,6 @@ static int __devinit init_setup_scc(struct pci_dev *dev,
        intmask_port = dma_base + 0x010;
        mode_port = ctl_base + 0x024;
        ecmode_port = ctl_base + 0xf00;
-       dma_status_port = dma_base + 0x004;
 
        /* controller initialization */
        reg = 0;
@@ -843,8 +841,6 @@ static u8 scc_cable_detect(ide_hwif_t *hwif)
 
 static void __devinit init_hwif_scc(ide_hwif_t *hwif)
 {
-       struct scc_ports *ports = ide_get_hwifdata(hwif);
-
        /* PTERADD */
        out_be32((void __iomem *)(hwif->dma_base + 0x018), hwif->dmatable_dma);
 
index dd634541ce361a62a4789484cdf89fe6a85584f3..8af9b23499fdd59d481e1dc03d0d9c44a2525606 100644 (file)
@@ -101,18 +101,8 @@ sgiioc4_init_hwif_ports(hw_regs_t * hw, unsigned long data_port,
        for (i = 0; i <= 7; i++)
                hw->io_ports_array[i] = reg + i * 4;
 
-       if (ctrl_port)
-               hw->io_ports.ctl_addr = ctrl_port;
-
-       if (irq_port)
-               hw->io_ports.irq_addr = irq_port;
-}
-
-static void
-sgiioc4_maskproc(ide_drive_t * drive, int mask)
-{
-       writeb(ATA_DEVCTL_OBS | (mask ? 2 : 0),
-              (void __iomem *)drive->hwif->io_ports.ctl_addr);
+       hw->io_ports.ctl_addr = ctrl_port;
+       hw->io_ports.irq_addr = irq_port;
 }
 
 static int
@@ -310,16 +300,14 @@ static u8 sgiioc4_read_status(ide_hwif_t *hwif)
        unsigned long port = hwif->io_ports.status_addr;
        u8 reg = (u8) readb((void __iomem *) port);
 
-       if ((port & 0xFFF) == 0x11C) {  /* Status register of IOC4 */
-               if (!(reg & ATA_BUSY)) { /* Not busy... check for interrupt */
-                       unsigned long other_ir = port - 0x110;
-                       unsigned int intr_reg = (u32) readl((void __iomem *) other_ir);
+       if (!(reg & ATA_BUSY)) {        /* Not busy... check for interrupt */
+               unsigned long other_ir = port - 0x110;
+               unsigned int intr_reg = (u32) readl((void __iomem *) other_ir);
 
-                       /* Clear the Interrupt, Error bits on the IOC4 */
-                       if (intr_reg & 0x03) {
-                               writel(0x03, (void __iomem *) other_ir);
-                               intr_reg = (u32) readl((void __iomem *) other_ir);
-                       }
+               /* Clear the Interrupt, Error bits on the IOC4 */
+               if (intr_reg & 0x03) {
+                       writel(0x03, (void __iomem *) other_ir);
+                       intr_reg = (u32) readl((void __iomem *) other_ir);
                }
        }
 
@@ -332,13 +320,9 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
 {
        struct pci_dev *dev = to_pci_dev(hwif->dev);
        unsigned long dma_base = pci_resource_start(dev, 0) + IOC4_DMA_OFFSET;
-       void __iomem *virt_dma_base;
        int num_ports = sizeof (ioc4_dma_regs_t);
        void *pad;
 
-       if (dma_base == 0)
-               return -1;
-
        printk(KERN_INFO "    %s: MMIO-DMA\n", hwif->name);
 
        if (request_mem_region(dma_base, num_ports, hwif->name) == NULL) {
@@ -348,14 +332,8 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
                return -1;
        }
 
-       virt_dma_base = ioremap(dma_base, num_ports);
-       if (virt_dma_base == NULL) {
-               printk(KERN_ERR "%s(%s) -- ERROR: unable to map addresses "
-                      "0x%lx to 0x%lx\n", __func__, hwif->name,
-                      dma_base, dma_base + num_ports - 1);
-               goto dma_remap_failure;
-       }
-       hwif->dma_base = (unsigned long) virt_dma_base;
+       hwif->dma_base = (unsigned long)hwif->io_ports.irq_addr +
+                        IOC4_DMA_OFFSET;
 
        hwif->sg_max_nents = IOC4_PRD_ENTRIES;
 
@@ -379,9 +357,6 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
        printk(KERN_INFO "%s: changing from DMA to PIO mode", hwif->name);
 
 dma_pci_alloc_failure:
-       iounmap(virt_dma_base);
-
-dma_remap_failure:
        release_mem_region(dma_base, num_ports);
 
        return -1;
@@ -563,8 +538,6 @@ static const struct ide_port_ops sgiioc4_port_ops = {
        .set_dma_mode           = sgiioc4_set_dma_mode,
        /* reset DMA engine, clear IRQs */
        .resetproc              = sgiioc4_resetproc,
-       /* mask on/off NIEN register */
-       .maskproc               = sgiioc4_maskproc,
 };
 
 static const struct ide_dma_ops sgiioc4_dma_ops = {
index e3e40427e00e8f9081d1a5326785c021727cb158..c7ff1e11ea853c11dc70b25079f9ccc8b943280a 100644 (file)
@@ -179,7 +179,7 @@ config LEDS_TRIGGER_TIMER
 
 config LEDS_TRIGGER_IDE_DISK
        bool "LED IDE Disk Trigger"
-       depends on LEDS_TRIGGERS && BLK_DEV_IDEDISK
+       depends on LEDS_TRIGGERS && IDE_GD_ATA
        help
          This allows LEDs to be controlled by IDE disk activity.
          If unsure, say Y.
index ba5aa2008273bebe869b12403d5f57ce8be0a2bb..e4c0db4dc7b199cfad30f9f4e318c54bc31d0e61 100644 (file)
@@ -123,7 +123,7 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
                                        irqnr = asic->irq_base +
                                                (ASIC3_GPIOS_PER_BANK * bank)
                                                + i;
-                                       desc = irq_desc + irqnr;
+                                       desc = irq_to_desc(irqnr);
                                        desc->handle_irq(irqnr, desc);
                                        if (asic->irq_bothedge[bank] & bit)
                                                asic3_irq_flip_edge(asic, base,
@@ -136,7 +136,7 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
                for (i = ASIC3_NUM_GPIOS; i < ASIC3_NR_IRQS; i++) {
                        /* They start at bit 4 and go up */
                        if (status & (1 << (i - ASIC3_NUM_GPIOS + 4))) {
-                               desc = irq_desc + asic->irq_base + i;
+                               desc = irq_to_desc(asic->irq_base + i);
                                desc->handle_irq(asic->irq_base + i,
                                                 desc);
                        }
index 50dff6e0088d919fae79f7769f412dbf5cc297ed..1a4d04664d6dca96426c965b0ad80062f3042a95 100644 (file)
@@ -112,7 +112,7 @@ static void egpio_handler(unsigned int irq, struct irq_desc *desc)
                /* Run irq handler */
                pr_debug("got IRQ %d\n", irqpin);
                irq = ei->irq_start + irqpin;
-               desc = &irq_desc[irq];
+               desc = irq_to_desc(irq);
                desc->handle_irq(irq, desc);
        }
 }
index 491ee16da5c13e27097d1dd58606ad15933af4bb..9ba295d9dd973b5713f7d49150a3807899ff3a77 100644 (file)
@@ -90,7 +90,7 @@ static int vortex_debug = 1;
 #include <linux/eisa.h>
 #include <linux/bitops.h>
 #include <linux/jiffies.h>
-#include <asm/irq.h>                   /* For NR_IRQS only. */
+#include <asm/irq.h>                   /* For nr_irqs only. */
 #include <asm/io.h>
 #include <asm/uaccess.h>
 
@@ -1221,7 +1221,7 @@ static int __devinit vortex_probe1(struct device *gendev,
        if (print_info)
                printk(", IRQ %d\n", dev->irq);
        /* Tell them about an invalid IRQ. */
-       if (dev->irq <= 0 || dev->irq >= NR_IRQS)
+       if (dev->irq <= 0 || dev->irq >= nr_irqs)
                printk(KERN_WARNING " *** Warning: IRQ %d is unlikely to work! ***\n",
                           dev->irq);
 
index 17ac6975d70db9d0d1fdbe35487c03a2a1d1fa38..b6a816e60c0f74593b5e24d4cdded1115a6cc38b 100644 (file)
@@ -416,10 +416,10 @@ static int ser12_open(struct net_device *dev)
        if (!dev || !bc)
                return -ENXIO;
        if (!dev->base_addr || dev->base_addr > 0xffff-SER12_EXTENT ||
-           dev->irq < 2 || dev->irq > NR_IRQS) {
+           dev->irq < 2 || dev->irq > nr_irqs) {
                printk(KERN_INFO "baycom_ser_fdx: invalid portnumber (max %u) "
                                "or irq (2 <= irq <= %d)\n",
-                               0xffff-SER12_EXTENT, NR_IRQS);
+                               0xffff-SER12_EXTENT, nr_irqs);
                return -ENXIO;
        }
        if (bc->baud < 300 || bc->baud > 4800) {
index 45ae9d1191d7cb13db4ffb236c12595038f354f0..c17e39bc546007cda47673ead55580e4e6b22357 100644 (file)
@@ -1465,7 +1465,7 @@ static void z8530_init(void)
        printk(KERN_INFO "Init Z8530 driver: %u channels, IRQ", Nchips*2);
        
        flag=" ";
-       for (k = 0; k < NR_IRQS; k++)
+       for (k = 0; k < nr_irqs; k++)
                if (Ivec[k].used) 
                {
                        printk("%s%d", flag, k);
@@ -1728,7 +1728,7 @@ static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
                        if (hwcfg.irq == 2) hwcfg.irq = 9;
 
-                       if (hwcfg.irq < 0 || hwcfg.irq >= NR_IRQS)
+                       if (hwcfg.irq < 0 || hwcfg.irq >= nr_irqs)
                                return -EINVAL;
                                
                        if (!Ivec[hwcfg.irq].used && hwcfg.irq)
@@ -2148,7 +2148,7 @@ static void __exit scc_cleanup_driver(void)
                }
                
        /* To unload the port must be closed so no real IRQ pending */
-       for (k=0; k < NR_IRQS ; k++)
+       for (k = 0; k < nr_irqs ; k++)
                if (Ivec[k].used) free_irq(k, NULL);
                
        local_irq_enable();
index 38b90e7a7ed30b52d04fe46f64411a2991c3b5b0..7914867110ed74ef3e0f17f4dcba685ea4450d73 100644 (file)
@@ -168,7 +168,7 @@ static int get_registers(pegasus_t * pegasus, __u16 indx, __u16 size,
                        netif_device_detach(pegasus->net);
                if (netif_msg_drv(pegasus) && printk_ratelimit())
                        dev_err(&pegasus->intf->dev, "%s, status %d\n",
-                                       __FUNCTION__, ret);
+                                       __func__, ret);
                goto out;
        }
 
@@ -192,7 +192,7 @@ static int set_registers(pegasus_t * pegasus, __u16 indx, __u16 size,
        if (!buffer) {
                if (netif_msg_drv(pegasus))
                        dev_warn(&pegasus->intf->dev, "out of memory in %s\n",
-                                       __FUNCTION__);
+                                       __func__);
                return -ENOMEM;
        }
        memcpy(buffer, data, size);
index f972fef87c98839a9d3c8e1fc4f7c2516ed01199..ee51b6a5e60569be9d955e2bb0259a5341d0f6f5 100644 (file)
@@ -318,7 +318,7 @@ sbni_pci_probe( struct net_device  *dev )
                                continue;
                }
 
-               if( pci_irq_line <= 0  ||  pci_irq_line >= NR_IRQS )
+               if (pci_irq_line <= 0 || pci_irq_line >= nr_irqs)
                        printk( KERN_WARNING "  WARNING: The PCI BIOS assigned "
                                "this PCI card to IRQ %d, which is unlikely "
                                "to work!.\n"
index b30e38f3a50d736d95d591594c04f9aeae7c62ea..dcc1e9958d2f9bb0b0fd3625c43ed9eaf806bfd2 100644 (file)
 #undef DEBUG_CCIO_RUN_SG
 
 #ifdef CONFIG_PROC_FS
-/*
- * CCIO_SEARCH_TIME can help measure how fast the bitmap search is.
- * impacts performance though - ditch it if you don't use it.
- */
-#define CCIO_SEARCH_TIME
-#undef CCIO_MAP_STATS
-#else
-#undef CCIO_SEARCH_TIME
-#undef CCIO_MAP_STATS
+/* depends on proc fs support. But costs CPU performance. */
+#undef CCIO_COLLECT_STATS
 #endif
 
 #include <linux/proc_fs.h>
@@ -239,12 +232,10 @@ struct ioc {
        u32 res_size;                   /* size of resource map in bytes */
        spinlock_t res_lock;
 
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
 #define CCIO_SEARCH_SAMPLE 0x100
        unsigned long avg_search[CCIO_SEARCH_SAMPLE];
        unsigned long avg_idx;            /* current index into avg_search */
-#endif
-#ifdef CCIO_MAP_STATS
        unsigned long used_pages;
        unsigned long msingle_calls;
        unsigned long msingle_pages;
@@ -351,7 +342,7 @@ ccio_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
        unsigned int pages_needed = size >> IOVP_SHIFT;
        unsigned int res_idx;
        unsigned long boundary_size;
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
        unsigned long cr_start = mfctl(16);
 #endif
        
@@ -406,7 +397,7 @@ resource_found:
        DBG_RES("%s() res_idx %d res_hint: %d\n",
                __func__, res_idx, ioc->res_hint);
 
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
        {
                unsigned long cr_end = mfctl(16);
                unsigned long tmp = cr_end - cr_start;
@@ -416,7 +407,7 @@ resource_found:
        ioc->avg_search[ioc->avg_idx++] = cr_start;
        ioc->avg_idx &= CCIO_SEARCH_SAMPLE - 1;
 #endif
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
        ioc->used_pages += pages_needed;
 #endif
        /* 
@@ -452,7 +443,7 @@ ccio_free_range(struct ioc *ioc, dma_addr_t iova, unsigned long pages_mapped)
        DBG_RES("%s():  res_idx: %d pages_mapped %d\n", 
                __func__, res_idx, pages_mapped);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
        ioc->used_pages -= pages_mapped;
 #endif
 
@@ -764,7 +755,7 @@ ccio_map_single(struct device *dev, void *addr, size_t size,
        size = ALIGN(size + offset, IOVP_SIZE);
        spin_lock_irqsave(&ioc->res_lock, flags);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
        ioc->msingle_calls++;
        ioc->msingle_pages += size >> IOVP_SHIFT;
 #endif
@@ -828,7 +819,7 @@ ccio_unmap_single(struct device *dev, dma_addr_t iova, size_t size,
 
        spin_lock_irqsave(&ioc->res_lock, flags);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
        ioc->usingle_calls++;
        ioc->usingle_pages += size >> IOVP_SHIFT;
 #endif
@@ -894,7 +885,7 @@ ccio_free_consistent(struct device *dev, size_t size, void *cpu_addr,
 */
 #define PIDE_FLAG 0x80000000UL
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
 #define IOMMU_MAP_STATS
 #endif
 #include "iommu-helpers.h"
@@ -938,7 +929,7 @@ ccio_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
        
        spin_lock_irqsave(&ioc->res_lock, flags);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
        ioc->msg_calls++;
 #endif
 
@@ -997,13 +988,13 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
        DBG_RUN_SG("%s() START %d entries,  %08lx,%x\n",
                __func__, nents, sg_virt_addr(sglist), sglist->length);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
        ioc->usg_calls++;
 #endif
 
        while(sg_dma_len(sglist) && nents--) {
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
                ioc->usg_pages += sg_dma_len(sglist) >> PAGE_SHIFT;
 #endif
                ccio_unmap_single(dev, sg_dma_address(sglist),
@@ -1048,7 +1039,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
                len += seq_printf(m, "IO PDIR size    : %d bytes (%d entries)\n",
                               total_pages * 8, total_pages);
 
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
                len += seq_printf(m, "IO PDIR entries : %ld free  %ld used (%d%%)\n",
                                  total_pages - ioc->used_pages, ioc->used_pages,
                                  (int)(ioc->used_pages * 100 / total_pages));
@@ -1057,7 +1048,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
                len += seq_printf(m, "Resource bitmap : %d bytes (%d pages)\n", 
                                  ioc->res_size, total_pages);
 
-#ifdef CCIO_SEARCH_TIME
+#ifdef CCIO_COLLECT_STATS
                min = max = ioc->avg_search[0];
                for(j = 0; j < CCIO_SEARCH_SAMPLE; ++j) {
                        avg += ioc->avg_search[j];
@@ -1070,7 +1061,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
                len += seq_printf(m, "  Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n",
                                  min, avg, max);
 #endif
-#ifdef CCIO_MAP_STATS
+#ifdef CCIO_COLLECT_STATS
                len += seq_printf(m, "pci_map_single(): %8ld calls  %8ld pages (avg %d/1000)\n",
                                  ioc->msingle_calls, ioc->msingle_pages,
                                  (int)((ioc->msingle_pages * 1000)/ioc->msingle_calls));
@@ -1088,7 +1079,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
                len += seq_printf(m, "pci_unmap_sg()  : %8ld calls  %8ld pages (avg %d/1000)\n\n\n",
                                  ioc->usg_calls, ioc->usg_pages,
                                  (int)((ioc->usg_pages * 1000)/ioc->usg_calls));
-#endif /* CCIO_MAP_STATS */
+#endif /* CCIO_COLLECT_STATS */
 
                ioc = ioc->next;
        }
index fd56128525d1b11e01bc71624025258957da15dd..3bc54b30c3a18af9733718015e5370b7a3a79038 100644 (file)
@@ -298,7 +298,8 @@ struct pci_port_ops dino_port_ops = {
 
 static void dino_disable_irq(unsigned int irq)
 {
-       struct dino_device *dino_dev = irq_desc[irq].chip_data;
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct dino_device *dino_dev = desc->chip_data;
        int local_irq = gsc_find_local_irq(irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
 
        DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, irq);
@@ -310,7 +311,8 @@ static void dino_disable_irq(unsigned int irq)
 
 static void dino_enable_irq(unsigned int irq)
 {
-       struct dino_device *dino_dev = irq_desc[irq].chip_data;
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct dino_device *dino_dev = desc->chip_data;
        int local_irq = gsc_find_local_irq(irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
        u32 tmp;
 
index 771cef592542fd75182b69a1e6d324c8fdc63d7b..7891db50c483bc6e6def22133ae78e17cc97da50 100644 (file)
@@ -346,10 +346,10 @@ static int __init eisa_probe(struct parisc_device *dev)
        }
        
        /* Reserve IRQ2 */
-       irq_desc[2].action = &irq2_action;
+       irq_to_desc(2)->action = &irq2_action;
        
        for (i = 0; i < 16; i++) {
-               irq_desc[i].chip = &eisa_interrupt_type;
+               irq_to_desc(i)->chip = &eisa_interrupt_type;
        }
        
        EISA_bus = 1;
index f7d088b897ee384f849081e92851ac08fd4ea164..e76db9e4d504968fd774ca6fc877d3b462dda33a 100644 (file)
@@ -108,7 +108,8 @@ int gsc_find_local_irq(unsigned int irq, int *global_irqs, int limit)
 
 static void gsc_asic_disable_irq(unsigned int irq)
 {
-       struct gsc_asic *irq_dev = irq_desc[irq].chip_data;
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct gsc_asic *irq_dev = desc->chip_data;
        int local_irq = gsc_find_local_irq(irq, irq_dev->global_irq, 32);
        u32 imr;
 
@@ -123,7 +124,8 @@ static void gsc_asic_disable_irq(unsigned int irq)
 
 static void gsc_asic_enable_irq(unsigned int irq)
 {
-       struct gsc_asic *irq_dev = irq_desc[irq].chip_data;
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct gsc_asic *irq_dev = desc->chip_data;
        int local_irq = gsc_find_local_irq(irq, irq_dev->global_irq, 32);
        u32 imr;
 
@@ -159,12 +161,14 @@ static struct hw_interrupt_type gsc_asic_interrupt_type = {
 int gsc_assign_irq(struct hw_interrupt_type *type, void *data)
 {
        static int irq = GSC_IRQ_BASE;
+       struct irq_desc *desc;
 
        if (irq > GSC_IRQ_MAX)
                return NO_IRQ;
 
-       irq_desc[irq].chip = type;
-       irq_desc[irq].chip_data = data;
+       desc = irq_to_desc(irq);
+       desc->chip = type;
+       desc->chip_data = data;
        return irq++;
 }
 
index 6fb3f7979f21b89043bb3cc90d16240d110c590d..7beffcab274548545c15673025cbd639ce563094 100644 (file)
@@ -619,7 +619,9 @@ iosapic_set_irt_data( struct vector_info *vi, u32 *dp0, u32 *dp1)
 
 static struct vector_info *iosapic_get_vector(unsigned int irq)
 {
-       return irq_desc[irq].chip_data;
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       return desc->chip_data;
 }
 
 static void iosapic_disable_irq(unsigned int irq)
index 1e8d2d17f04c63d921fe827402ded98252affa87..1e93c837514f11fdf065ce80703ba57651276a2f 100644 (file)
@@ -363,7 +363,9 @@ int superio_fixup_irq(struct pci_dev *pcidev)
 #endif
 
        for (i = 0; i < 16; i++) {
-               irq_desc[i].chip = &superio_interrupt_type;
+               struct irq_desc *desc = irq_to_desc(i);
+
+               desc->chip = &superio_interrupt_type;
        }
 
        /*
index 529d9d7727b01d1ea857388cb0514f2703c8f5b3..999cc4088b5952beb76e018e573afff4b53fba04 100644 (file)
@@ -151,6 +151,13 @@ void pci_bus_add_devices(struct pci_bus *bus)
                        if (retval)
                                dev_err(&dev->dev, "Error creating cpuaffinity"
                                        " file, continuing...\n");
+
+                       retval = device_create_file(&child_bus->dev,
+                                               &dev_attr_cpulistaffinity);
+                       if (retval)
+                               dev_err(&dev->dev,
+                                       "Error creating cpulistaffinity"
+                                       " file, continuing...\n");
                }
        }
 }
index 7b3751136e63734054d3f53a84ec52f256cb463c..691b3adeb87057799841f112d8b77797e729e4ce 100644 (file)
@@ -211,7 +211,7 @@ static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
                include_all = 1;
        }
 
-       if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) {
+       if (ret) {
                list_del(&dmaru->list);
                kfree(dmaru);
        }
@@ -289,6 +289,24 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
        }
 }
 
+/**
+ * dmar_table_detect - checks to see if the platform supports DMAR devices
+ */
+static int __init dmar_table_detect(void)
+{
+       acpi_status status = AE_OK;
+
+       /* if we could find DMAR table, then there are DMAR devices */
+       status = acpi_get_table(ACPI_SIG_DMAR, 0,
+                               (struct acpi_table_header **)&dmar_tbl);
+
+       if (ACPI_SUCCESS(status) && !dmar_tbl) {
+               printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
+               status = AE_NOT_FOUND;
+       }
+
+       return (ACPI_SUCCESS(status) ? 1 : 0);
+}
 
 /**
  * parse_dmar_table - parses the DMA reporting table
@@ -300,6 +318,12 @@ parse_dmar_table(void)
        struct acpi_dmar_header *entry_header;
        int ret = 0;
 
+       /*
+        * Do it again, earlier dmar_tbl mapping could be mapped with
+        * fixed map.
+        */
+       dmar_table_detect();
+
        dmar = (struct acpi_table_dmar *)dmar_tbl;
        if (!dmar)
                return -ENODEV;
@@ -373,10 +397,10 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev)
 
 int __init dmar_dev_scope_init(void)
 {
-       struct dmar_drhd_unit *drhd;
+       struct dmar_drhd_unit *drhd, *drhd_n;
        int ret = -ENODEV;
 
-       for_each_drhd_unit(drhd) {
+       list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
                ret = dmar_parse_dev(drhd);
                if (ret)
                        return ret;
@@ -384,8 +408,8 @@ int __init dmar_dev_scope_init(void)
 
 #ifdef CONFIG_DMAR
        {
-               struct dmar_rmrr_unit *rmrr;
-               for_each_rmrr_units(rmrr) {
+               struct dmar_rmrr_unit *rmrr, *rmrr_n;
+               list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
                        ret = rmrr_parse_dev(rmrr);
                        if (ret)
                                return ret;
@@ -430,30 +454,11 @@ int __init dmar_table_init(void)
        return 0;
 }
 
-/**
- * early_dmar_detect - checks to see if the platform supports DMAR devices
- */
-int __init early_dmar_detect(void)
-{
-       acpi_status status = AE_OK;
-
-       /* if we could find DMAR table, then there are DMAR devices */
-       status = acpi_get_table(ACPI_SIG_DMAR, 0,
-                               (struct acpi_table_header **)&dmar_tbl);
-
-       if (ACPI_SUCCESS(status) && !dmar_tbl) {
-               printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
-               status = AE_NOT_FOUND;
-       }
-
-       return (ACPI_SUCCESS(status) ? 1 : 0);
-}
-
 void __init detect_intel_iommu(void)
 {
        int ret;
 
-       ret = early_dmar_detect();
+       ret = dmar_table_detect();
 
        {
 #ifdef CONFIG_INTR_REMAP
@@ -470,13 +475,13 @@ void __init detect_intel_iommu(void)
                               "Queued invalidation will be enabled to support "
                               "x2apic and Intr-remapping.\n");
 #endif
-
 #ifdef CONFIG_DMAR
                if (ret && !no_iommu && !iommu_detected && !swiotlb &&
                    !dmar_disabled)
                        iommu_detected = 1;
 #endif
        }
+       dmar_tbl = NULL;
 }
 
 
index 7d27631e6e627bba3202633d68ff5578dd84e663..8cfd1c4926c8c4b4c1e36ff1a39266df8a8a62e0 100644 (file)
@@ -123,10 +123,8 @@ static struct ebda_pci_rsrc *alloc_ebda_pci_rsrc (void)
 static void __init print_bus_info (void)
 {
        struct bus_info *ptr;
-       struct list_head *ptr1;
        
-       list_for_each (ptr1, &bus_info_head) {
-               ptr = list_entry (ptr1, struct bus_info, bus_info_list);
+       list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
                debug ("%s - slot_min = %x\n", __func__, ptr->slot_min);
                debug ("%s - slot_max = %x\n", __func__, ptr->slot_max);
                debug ("%s - slot_count = %x\n", __func__, ptr->slot_count);
@@ -146,10 +144,8 @@ static void __init print_bus_info (void)
 static void print_lo_info (void)
 {
        struct rio_detail *ptr;
-       struct list_head *ptr1;
        debug ("print_lo_info ----\n"); 
-       list_for_each (ptr1, &rio_lo_head) {
-               ptr = list_entry (ptr1, struct rio_detail, rio_detail_list);
+       list_for_each_entry(ptr, &rio_lo_head, rio_detail_list) {
                debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
                debug ("%s - rio_type = %x\n", __func__, ptr->rio_type);
                debug ("%s - owner_id = %x\n", __func__, ptr->owner_id);
@@ -163,10 +159,8 @@ static void print_lo_info (void)
 static void print_vg_info (void)
 {
        struct rio_detail *ptr;
-       struct list_head *ptr1;
        debug ("%s ---\n", __func__);
-       list_for_each (ptr1, &rio_vg_head) {
-               ptr = list_entry (ptr1, struct rio_detail, rio_detail_list);
+       list_for_each_entry(ptr, &rio_vg_head, rio_detail_list) {
                debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
                debug ("%s - rio_type = %x\n", __func__, ptr->rio_type);
                debug ("%s - owner_id = %x\n", __func__, ptr->owner_id);
@@ -180,10 +174,8 @@ static void print_vg_info (void)
 static void __init print_ebda_pci_rsrc (void)
 {
        struct ebda_pci_rsrc *ptr;
-       struct list_head *ptr1;
 
-       list_for_each (ptr1, &ibmphp_ebda_pci_rsrc_head) {
-               ptr = list_entry (ptr1, struct ebda_pci_rsrc, ebda_pci_rsrc_list);
+       list_for_each_entry(ptr, &ibmphp_ebda_pci_rsrc_head, ebda_pci_rsrc_list) {
                debug ("%s - rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", 
                        __func__, ptr->rsrc_type ,ptr->bus_num, ptr->dev_fun,ptr->start_addr, ptr->end_addr);
        }
@@ -192,10 +184,8 @@ static void __init print_ebda_pci_rsrc (void)
 static void __init print_ibm_slot (void)
 {
        struct slot *ptr;
-       struct list_head *ptr1;
 
-       list_for_each (ptr1, &ibmphp_slot_head) {
-               ptr = list_entry (ptr1, struct slot, ibm_slot_list);
+       list_for_each_entry(ptr, &ibmphp_slot_head, ibm_slot_list) {
                debug ("%s - slot_number: %x\n", __func__, ptr->number);
        }
 }
@@ -203,10 +193,8 @@ static void __init print_ibm_slot (void)
 static void __init print_opt_vg (void)
 {
        struct opt_rio *ptr;
-       struct list_head *ptr1;
        debug ("%s ---\n", __func__);
-       list_for_each (ptr1, &opt_vg_head) {
-               ptr = list_entry (ptr1, struct opt_rio, opt_rio_list);
+       list_for_each_entry(ptr, &opt_vg_head, opt_rio_list) {
                debug ("%s - rio_type %x\n", __func__, ptr->rio_type);
                debug ("%s - chassis_num: %x\n", __func__, ptr->chassis_num);
                debug ("%s - first_slot_num: %x\n", __func__, ptr->first_slot_num);
@@ -217,13 +205,9 @@ static void __init print_opt_vg (void)
 static void __init print_ebda_hpc (void)
 {
        struct controller *hpc_ptr;
-       struct list_head *ptr1;
        u16 index;
 
-       list_for_each (ptr1, &ebda_hpc_head) {
-
-               hpc_ptr = list_entry (ptr1, struct controller, ebda_hpc_list); 
-
+       list_for_each_entry(hpc_ptr, &ebda_hpc_head, ebda_hpc_list) {
                for (index = 0; index < hpc_ptr->slot_count; index++) {
                        debug ("%s - physical slot#: %x\n", __func__, hpc_ptr->slots[index].slot_num);
                        debug ("%s - pci bus# of the slot: %x\n", __func__, hpc_ptr->slots[index].slot_bus_num);
@@ -460,9 +444,7 @@ static int __init ebda_rio_table (void)
 static struct opt_rio *search_opt_vg (u8 chassis_num)
 {
        struct opt_rio *ptr;
-       struct list_head *ptr1;
-       list_for_each (ptr1, &opt_vg_head) {
-               ptr = list_entry (ptr1, struct opt_rio, opt_rio_list);
+       list_for_each_entry(ptr, &opt_vg_head, opt_rio_list) {
                if (ptr->chassis_num == chassis_num)
                        return ptr;
        }               
@@ -473,10 +455,8 @@ static int __init combine_wpg_for_chassis (void)
 {
        struct opt_rio *opt_rio_ptr = NULL;
        struct rio_detail *rio_detail_ptr = NULL;
-       struct list_head *list_head_ptr = NULL;
        
-       list_for_each (list_head_ptr, &rio_vg_head) {
-               rio_detail_ptr = list_entry (list_head_ptr, struct rio_detail, rio_detail_list);
+       list_for_each_entry(rio_detail_ptr, &rio_vg_head, rio_detail_list) {
                opt_rio_ptr = search_opt_vg (rio_detail_ptr->chassis_num);
                if (!opt_rio_ptr) {
                        opt_rio_ptr = kzalloc(sizeof(struct opt_rio), GFP_KERNEL);
@@ -497,14 +477,12 @@ static int __init combine_wpg_for_chassis (void)
 }      
 
 /*
- * reorgnizing linked list of expansion box     
+ * reorganizing linked list of expansion box
  */
 static struct opt_rio_lo *search_opt_lo (u8 chassis_num)
 {
        struct opt_rio_lo *ptr;
-       struct list_head *ptr1;
-       list_for_each (ptr1, &opt_lo_head) {
-               ptr = list_entry (ptr1, struct opt_rio_lo, opt_rio_lo_list);
+       list_for_each_entry(ptr, &opt_lo_head, opt_rio_lo_list) {
                if (ptr->chassis_num == chassis_num)
                        return ptr;
        }               
@@ -515,10 +493,8 @@ static int combine_wpg_for_expansion (void)
 {
        struct opt_rio_lo *opt_rio_lo_ptr = NULL;
        struct rio_detail *rio_detail_ptr = NULL;
-       struct list_head *list_head_ptr = NULL;
        
-       list_for_each (list_head_ptr, &rio_lo_head) {
-               rio_detail_ptr = list_entry (list_head_ptr, struct rio_detail, rio_detail_list);
+       list_for_each_entry(rio_detail_ptr, &rio_lo_head, rio_detail_list) {
                opt_rio_lo_ptr = search_opt_lo (rio_detail_ptr->chassis_num);
                if (!opt_rio_lo_ptr) {
                        opt_rio_lo_ptr = kzalloc(sizeof(struct opt_rio_lo), GFP_KERNEL);
@@ -550,20 +526,17 @@ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var)
 {
        struct opt_rio *opt_vg_ptr = NULL;
        struct opt_rio_lo *opt_lo_ptr = NULL;
-       struct list_head *ptr = NULL;
        int rc = 0;
 
        if (!var) {
-               list_for_each (ptr, &opt_vg_head) {
-                       opt_vg_ptr = list_entry (ptr, struct opt_rio, opt_rio_list);
+               list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) {
                        if ((first_slot < opt_vg_ptr->first_slot_num) && (slot_num >= opt_vg_ptr->first_slot_num)) { 
                                rc = -ENODEV;
                                break;
                        }
                }
        } else {
-               list_for_each (ptr, &opt_lo_head) {
-                       opt_lo_ptr = list_entry (ptr, struct opt_rio_lo, opt_rio_lo_list);
+               list_for_each_entry(opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
                        if ((first_slot < opt_lo_ptr->first_slot_num) && (slot_num >= opt_lo_ptr->first_slot_num)) {
                                rc = -ENODEV;
                                break;
@@ -576,10 +549,8 @@ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var)
 static struct opt_rio_lo * find_rxe_num (u8 slot_num)
 {
        struct opt_rio_lo *opt_lo_ptr;
-       struct list_head *ptr;
 
-       list_for_each (ptr, &opt_lo_head) {
-               opt_lo_ptr = list_entry (ptr, struct opt_rio_lo, opt_rio_lo_list);
+       list_for_each_entry(opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
                //check to see if this slot_num belongs to expansion box
                if ((slot_num >= opt_lo_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_lo_ptr->first_slot_num, 1))) 
                        return opt_lo_ptr;
@@ -590,10 +561,8 @@ static struct opt_rio_lo * find_rxe_num (u8 slot_num)
 static struct opt_rio * find_chassis_num (u8 slot_num)
 {
        struct opt_rio *opt_vg_ptr;
-       struct list_head *ptr;
 
-       list_for_each (ptr, &opt_vg_head) {
-               opt_vg_ptr = list_entry (ptr, struct opt_rio, opt_rio_list);
+       list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) {
                //check to see if this slot_num belongs to chassis 
                if ((slot_num >= opt_vg_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_vg_ptr->first_slot_num, 0))) 
                        return opt_vg_ptr;
@@ -607,11 +576,9 @@ static struct opt_rio * find_chassis_num (u8 slot_num)
 static u8 calculate_first_slot (u8 slot_num)
 {
        u8 first_slot = 1;
-       struct list_head * list;
        struct slot * slot_cur;
        
-       list_for_each (list, &ibmphp_slot_head) {
-               slot_cur = list_entry (list, struct slot, ibm_slot_list);
+       list_for_each_entry(slot_cur, &ibmphp_slot_head, ibm_slot_list) {
                if (slot_cur->ctrl) {
                        if ((slot_cur->ctrl->ctlr_type != 4) && (slot_cur->ctrl->ending_slot_num > first_slot) && (slot_num > slot_cur->ctrl->ending_slot_num)) 
                                first_slot = slot_cur->ctrl->ending_slot_num;
@@ -767,7 +734,6 @@ static int __init ebda_rsrc_controller (void)
        struct bus_info *bus_info_ptr1, *bus_info_ptr2;
        int rc;
        struct slot *tmp_slot;
-       struct list_head *list;
 
        addr = hpc_list_ptr->phys_addr;
        for (ctlr = 0; ctlr < hpc_list_ptr->num_ctlrs; ctlr++) {
@@ -997,9 +963,7 @@ static int __init ebda_rsrc_controller (void)
 
        }                       /* each hpc  */
 
-       list_for_each (list, &ibmphp_slot_head) {
-               tmp_slot = list_entry (list, struct slot, ibm_slot_list);
-
+       list_for_each_entry(tmp_slot, &ibmphp_slot_head, ibm_slot_list) {
                snprintf (tmp_slot->hotplug_slot->name, 30, "%s", create_file_name (tmp_slot));
                pci_hp_register(tmp_slot->hotplug_slot,
                        pci_find_bus(0, tmp_slot->bus), tmp_slot->device);
@@ -1101,10 +1065,8 @@ u16 ibmphp_get_total_controllers (void)
 struct slot *ibmphp_get_slot_from_physical_num (u8 physical_num)
 {
        struct slot *slot;
-       struct list_head *list;
 
-       list_for_each (list, &ibmphp_slot_head) {
-               slot = list_entry (list, struct slot, ibm_slot_list);
+       list_for_each_entry(slot, &ibmphp_slot_head, ibm_slot_list) {
                if (slot->number == physical_num)
                        return slot;
        }
@@ -1120,10 +1082,8 @@ struct slot *ibmphp_get_slot_from_physical_num (u8 physical_num)
 struct bus_info *ibmphp_find_same_bus_num (u32 num)
 {
        struct bus_info *ptr;
-       struct list_head  *ptr1;
 
-       list_for_each (ptr1, &bus_info_head) {
-               ptr = list_entry (ptr1, struct bus_info, bus_info_list); 
+       list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
                if (ptr->busno == num) 
                         return ptr;
        }
@@ -1136,10 +1096,8 @@ struct bus_info *ibmphp_find_same_bus_num (u32 num)
 int ibmphp_get_bus_index (u8 num)
 {
        struct bus_info *ptr;
-       struct list_head  *ptr1;
 
-       list_for_each (ptr1, &bus_info_head) {
-               ptr = list_entry (ptr1, struct bus_info, bus_info_list);
+       list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
                if (ptr->busno == num)  
                        return ptr->index;
        }
@@ -1212,11 +1170,9 @@ static struct pci_driver ibmphp_driver = {
 int ibmphp_register_pci (void)
 {
        struct controller *ctrl;
-       struct list_head *tmp;
        int rc = 0;
 
-       list_for_each (tmp, &ebda_hpc_head) {
-               ctrl = list_entry (tmp, struct controller, ebda_hpc_list);
+       list_for_each_entry(ctrl, &ebda_hpc_head, ebda_hpc_list) {
                if (ctrl->ctlr_type == 1) {
                        rc = pci_register_driver(&ibmphp_driver);
                        break;
@@ -1227,12 +1183,10 @@ int ibmphp_register_pci (void)
 static int ibmphp_probe (struct pci_dev * dev, const struct pci_device_id *ids)
 {
        struct controller *ctrl;
-       struct list_head *tmp;
 
        debug ("inside ibmphp_probe\n");
        
-       list_for_each (tmp, &ebda_hpc_head) {
-               ctrl = list_entry (tmp, struct controller, ebda_hpc_list);
+       list_for_each_entry(ctrl, &ebda_hpc_head, ebda_hpc_list) {
                if (ctrl->ctlr_type == 1) {
                        if ((dev->devfn == ctrl->u.pci_ctlr.dev_fun) && (dev->bus->number == ctrl->u.pci_ctlr.bus)) {
                                ctrl->ctrl_dev = dev;
index 5f85b1b120e3ddc12c350e2b821c18946ec90e27..2e6c4474644eb9e27f3f61773280838a5e351580 100644 (file)
@@ -102,13 +102,13 @@ static int get_##name (struct hotplug_slot *slot, type *value)            \
 {                                                                      \
        struct hotplug_slot_ops *ops = slot->ops;                       \
        int retval = 0;                                                 \
-       if (try_module_get(ops->owner)) {                               \
-               if (ops->get_##name)                                    \
-                       retval = ops->get_##name(slot, value);          \
-               else                                                    \
-                       *value = slot->info->name;                      \
-               module_put(ops->owner);                                 \
-       }                                                               \
+       if (!try_module_get(ops->owner))                                \
+               return -ENODEV;                                         \
+       if (ops->get_##name)                                            \
+               retval = ops->get_##name(slot, value);                  \
+       else                                                            \
+               *value = slot->info->name;                              \
+       module_put(ops->owner);                                         \
        return retval;                                                  \
 }
 
index 9e6cec67e1cc33d62690e8a3ebcf462c706956ca..c367978bd7feb21e57580f4fcc8ed4e23ff8d4dc 100644 (file)
@@ -57,6 +57,19 @@ extern struct workqueue_struct *pciehp_wq;
 #define warn(format, arg...)                                           \
        printk(KERN_WARNING "%s: " format, MY_NAME , ## arg)
 
+#define ctrl_dbg(ctrl, format, arg...)                                 \
+       do {                                                            \
+               if (pciehp_debug)                                       \
+                       dev_printk(, &ctrl->pcie->device,               \
+                                       format, ## arg);                \
+       } while (0)
+#define ctrl_err(ctrl, format, arg...)                                 \
+       dev_err(&ctrl->pcie->device, format, ## arg)
+#define ctrl_info(ctrl, format, arg...)                                        \
+       dev_info(&ctrl->pcie->device, format, ## arg)
+#define ctrl_warn(ctrl, format, arg...)                                        \
+       dev_warn(&ctrl->pcie->device, format, ## arg)
+
 #define SLOT_NAME_SIZE 10
 struct slot {
        u8 bus;
@@ -87,6 +100,7 @@ struct controller {
        int num_slots;                  /* Number of slots on ctlr */
        int slot_num_inc;               /* 1 or -1 */
        struct pci_dev *pci_dev;
+       struct pcie_device *pcie;       /* PCI Express port service */
        struct list_head slot_list;
        struct hpc_ops *hpc_ops;
        wait_queue_head_t queue;        /* sleep & wake process */
@@ -170,7 +184,7 @@ static inline struct slot *pciehp_find_slot(struct controller *ctrl, u8 device)
                        return slot;
        }
 
-       err("%s: slot (device=0x%x) not found\n", __func__, device);
+       ctrl_err(ctrl, "%s: slot (device=0x%x) not found\n", __func__, device);
        return NULL;
 }
 
index 4fd5355bc3b55a6c957f2d0b031b2a5dd9d2d7d3..c748a19db89d5dd44f8a4217d7f5e574db85ed17 100644 (file)
@@ -144,9 +144,10 @@ set_lock_exit:
  * sysfs interface which allows the user to toggle the Electro Mechanical
  * Interlock.  Valid values are either 0 or 1.  0 == unlock, 1 == lock
  */
-static ssize_t lock_write_file(struct hotplug_slot *slot, const char *buf,
-               size_t count)
+static ssize_t lock_write_file(struct hotplug_slot *hotplug_slot,
+               const char *buf, size_t count)
 {
+       struct slot *slot = hotplug_slot->private;
        unsigned long llock;
        u8 lock;
        int retval = 0;
@@ -157,10 +158,11 @@ static ssize_t lock_write_file(struct hotplug_slot *slot, const char *buf,
        switch (lock) {
                case 0:
                case 1:
-                       retval = set_lock_status(slot, lock);
+                       retval = set_lock_status(hotplug_slot, lock);
                        break;
                default:
-                       err ("%d is an invalid lock value\n", lock);
+                       ctrl_err(slot->ctrl, "%d is an invalid lock value\n",
+                                lock);
                        retval = -EINVAL;
        }
        if (retval)
@@ -180,7 +182,10 @@ static struct hotplug_slot_attribute hotplug_slot_attr_lock = {
  */
 static void release_slot(struct hotplug_slot *hotplug_slot)
 {
-       dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+       struct slot *slot = hotplug_slot->private;
+
+       ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+                __func__, hotplug_slot->name);
 
        kfree(hotplug_slot->info);
        kfree(hotplug_slot);
@@ -215,9 +220,9 @@ static int init_slots(struct controller *ctrl)
                get_adapter_status(hotplug_slot, &info->adapter_status);
                slot->hotplug_slot = hotplug_slot;
 
-               dbg("Registering bus=%x dev=%x hp_slot=%x sun=%x "
-                   "slot_device_offset=%x\n", slot->bus, slot->device,
-                   slot->hp_slot, slot->number, ctrl->slot_device_offset);
+               ctrl_dbg(ctrl, "Registering bus=%x dev=%x hp_slot=%x sun=%x "
+                        "slot_device_offset=%x\n", slot->bus, slot->device,
+                        slot->hp_slot, slot->number, ctrl->slot_device_offset);
 duplicate_name:
                retval = pci_hp_register(hotplug_slot,
                                         ctrl->pci_dev->subordinate,
@@ -233,9 +238,11 @@ duplicate_name:
                                if (len < SLOT_NAME_SIZE)
                                        goto duplicate_name;
                                else
-                                       err("duplicate slot name overflow\n");
+                                       ctrl_err(ctrl, "duplicate slot name "
+                                                "overflow\n");
                        }
-                       err("pci_hp_register failed with error %d\n", retval);
+                       ctrl_err(ctrl, "pci_hp_register failed with error %d\n",
+                                retval);
                        goto error_info;
                }
                /* create additional sysfs entries */
@@ -244,7 +251,8 @@ duplicate_name:
                                &hotplug_slot_attr_lock.attr);
                        if (retval) {
                                pci_hp_deregister(hotplug_slot);
-                               err("cannot create additional sysfs entries\n");
+                               ctrl_err(ctrl, "cannot create additional sysfs "
+                                        "entries\n");
                                goto error_info;
                        }
                }
@@ -278,7 +286,8 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
 {
        struct slot *slot = hotplug_slot->private;
 
-       dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+       ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+                 __func__, hotplug_slot->name);
 
        hotplug_slot->info->attention_status = status;
 
@@ -293,7 +302,8 @@ static int enable_slot(struct hotplug_slot *hotplug_slot)
 {
        struct slot *slot = hotplug_slot->private;
 
-       dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+       ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+                __func__, hotplug_slot->name);
 
        return pciehp_sysfs_enable_slot(slot);
 }
@@ -303,7 +313,8 @@ static int disable_slot(struct hotplug_slot *hotplug_slot)
 {
        struct slot *slot = hotplug_slot->private;
 
-       dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+       ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+                 __func__, hotplug_slot->name);
 
        return pciehp_sysfs_disable_slot(slot);
 }
@@ -313,7 +324,8 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
        struct slot *slot = hotplug_slot->private;
        int retval;
 
-       dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+       ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+                 __func__, hotplug_slot->name);
 
        retval = slot->hpc_ops->get_power_status(slot, value);
        if (retval < 0)
@@ -327,7 +339,8 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
        struct slot *slot = hotplug_slot->private;
        int retval;
 
-       dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+       ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+                 __func__, hotplug_slot->name);
 
        retval = slot->hpc_ops->get_attention_status(slot, value);
        if (retval < 0)
@@ -341,7 +354,8 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
        struct slot *slot = hotplug_slot->private;
        int retval;
 
-       dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+       ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+                __func__, hotplug_slot->name);
 
        retval = slot->hpc_ops->get_latch_status(slot, value);
        if (retval < 0)
@@ -355,7 +369,8 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
        struct slot *slot = hotplug_slot->private;
        int retval;
 
-       dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+       ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+                __func__, hotplug_slot->name);
 
        retval = slot->hpc_ops->get_adapter_status(slot, value);
        if (retval < 0)
@@ -370,7 +385,8 @@ static int get_max_bus_speed(struct hotplug_slot *hotplug_slot,
        struct slot *slot = hotplug_slot->private;
        int retval;
 
-       dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+       ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+                __func__, hotplug_slot->name);
 
        retval = slot->hpc_ops->get_max_bus_speed(slot, value);
        if (retval < 0)
@@ -384,7 +400,8 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe
        struct slot *slot = hotplug_slot->private;
        int retval;
 
-       dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
+       ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
+                __func__, hotplug_slot->name);
 
        retval = slot->hpc_ops->get_cur_bus_speed(slot, value);
        if (retval < 0)
@@ -402,14 +419,15 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_
        struct pci_dev *pdev = dev->port;
 
        if (pciehp_force)
-               dbg("Bypassing BIOS check for pciehp use on %s\n",
-                   pci_name(pdev));
+               dev_info(&dev->device,
+                        "Bypassing BIOS check for pciehp use on %s\n",
+                        pci_name(pdev));
        else if (pciehp_get_hp_hw_control_from_firmware(pdev))
                goto err_out_none;
 
        ctrl = pcie_init(dev);
        if (!ctrl) {
-               dbg("%s: controller initialization failed\n", PCIE_MODULE_NAME);
+               dev_err(&dev->device, "controller initialization failed\n");
                goto err_out_none;
        }
        set_service_data(dev, ctrl);
@@ -418,11 +436,10 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_
        rc = init_slots(ctrl);
        if (rc) {
                if (rc == -EBUSY)
-                       warn("%s: slot already registered by another "
-                               "hotplug driver\n", PCIE_MODULE_NAME);
+                       ctrl_warn(ctrl, "slot already registered by another "
+                                 "hotplug driver\n");
                else
-                       err("%s: slot initialization failed\n",
-                               PCIE_MODULE_NAME);
+                       ctrl_err(ctrl, "slot initialization failed\n");
                goto err_out_release_ctlr;
        }
 
@@ -461,13 +478,13 @@ static void pciehp_remove (struct pcie_device *dev)
 #ifdef CONFIG_PM
 static int pciehp_suspend (struct pcie_device *dev, pm_message_t state)
 {
-       printk("%s ENTRY\n", __func__);
+       dev_info(&dev->device, "%s ENTRY\n", __func__);
        return 0;
 }
 
 static int pciehp_resume (struct pcie_device *dev)
 {
-       printk("%s ENTRY\n", __func__);
+       dev_info(&dev->device, "%s ENTRY\n", __func__);
        if (pciehp_force) {
                struct controller *ctrl = get_service_data(dev);
                struct slot *t_slot;
@@ -497,10 +514,9 @@ static struct pcie_port_service_id port_pci_ids[] = { {
        .driver_data =  0,
        }, { /* end: all zeroes */ }
 };
-static const char device_name[] = "hpdriver";
 
 static struct pcie_port_service_driver hpdriver_portdrv = {
-       .name           = (char *)device_name,
+       .name           = PCIE_MODULE_NAME,
        .id_table       = &port_pci_ids[0],
 
        .probe          = pciehp_probe,
index 96a5d55a49835e43caadb3b2b0cd52210fd2fce9..acb7f9efd182e8820a9f772ab667b3a5e0b2c18c 100644 (file)
@@ -58,14 +58,15 @@ static int queue_interrupt_event(struct slot *p_slot, u32 event_type)
 u8 pciehp_handle_attention_button(struct slot *p_slot)
 {
        u32 event_type;
+       struct controller *ctrl = p_slot->ctrl;
 
        /* Attention Button Change */
-       dbg("pciehp:  Attention button interrupt received.\n");
+       ctrl_dbg(ctrl, "Attention button interrupt received.\n");
 
        /*
         *  Button pressed - See if need to TAKE ACTION!!!
         */
-       info("Button pressed on Slot(%s)\n", p_slot->name);
+       ctrl_info(ctrl, "Button pressed on Slot(%s)\n", p_slot->name);
        event_type = INT_BUTTON_PRESS;
 
        queue_interrupt_event(p_slot, event_type);
@@ -77,22 +78,23 @@ u8 pciehp_handle_switch_change(struct slot *p_slot)
 {
        u8 getstatus;
        u32 event_type;
+       struct controller *ctrl = p_slot->ctrl;
 
        /* Switch Change */
-       dbg("pciehp:  Switch interrupt received.\n");
+       ctrl_dbg(ctrl, "Switch interrupt received.\n");
 
        p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
        if (getstatus) {
                /*
                 * Switch opened
                 */
-               info("Latch open on Slot(%s)\n", p_slot->name);
+               ctrl_info(ctrl, "Latch open on Slot(%s)\n", p_slot->name);
                event_type = INT_SWITCH_OPEN;
        } else {
                /*
                 *  Switch closed
                 */
-               info("Latch close on Slot(%s)\n", p_slot->name);
+               ctrl_info(ctrl, "Latch close on Slot(%s)\n", p_slot->name);
                event_type = INT_SWITCH_CLOSE;
        }
 
@@ -105,9 +107,10 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
 {
        u32 event_type;
        u8 presence_save;
+       struct controller *ctrl = p_slot->ctrl;
 
        /* Presence Change */
-       dbg("pciehp:  Presence/Notify input change.\n");
+       ctrl_dbg(ctrl, "Presence/Notify input change.\n");
 
        /* Switch is open, assume a presence change
         * Save the presence state
@@ -117,13 +120,13 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
                /*
                 * Card Present
                 */
-               info("Card present on Slot(%s)\n", p_slot->name);
+               ctrl_info(ctrl, "Card present on Slot(%s)\n", p_slot->name);
                event_type = INT_PRESENCE_ON;
        } else {
                /*
                 * Not Present
                 */
-               info("Card not present on Slot(%s)\n", p_slot->name);
+               ctrl_info(ctrl, "Card not present on Slot(%s)\n", p_slot->name);
                event_type = INT_PRESENCE_OFF;
        }
 
@@ -135,23 +138,25 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
 u8 pciehp_handle_power_fault(struct slot *p_slot)
 {
        u32 event_type;
+       struct controller *ctrl = p_slot->ctrl;
 
        /* power fault */
-       dbg("pciehp:  Power fault interrupt received.\n");
+       ctrl_dbg(ctrl, "Power fault interrupt received.\n");
 
        if ( !(p_slot->hpc_ops->query_power_fault(p_slot))) {
                /*
                 * power fault Cleared
                 */
-               info("Power fault cleared on Slot(%s)\n", p_slot->name);
+               ctrl_info(ctrl, "Power fault cleared on Slot(%s)\n",
+                         p_slot->name);
                event_type = INT_POWER_FAULT_CLEAR;
        } else {
                /*
                 *   power fault
                 */
-               info("Power fault on Slot(%s)\n", p_slot->name);
+               ctrl_info(ctrl, "Power fault on Slot(%s)\n", p_slot->name);
                event_type = INT_POWER_FAULT;
-               info("power fault bit %x set\n", 0);
+               ctrl_info(ctrl, "power fault bit %x set\n", 0);
        }
 
        queue_interrupt_event(p_slot, event_type);
@@ -168,8 +173,9 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
        /* turn off slot, turn on Amber LED, turn off Green LED if supported*/
        if (POWER_CTRL(ctrl)) {
                if (pslot->hpc_ops->power_off_slot(pslot)) {
-                       err("%s: Issue of Slot Power Off command failed\n",
-                           __func__);
+                       ctrl_err(ctrl,
+                                "%s: Issue of Slot Power Off command failed\n",
+                                __func__);
                        return;
                }
        }
@@ -186,8 +192,8 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
 
        if (ATTN_LED(ctrl)) {
                if (pslot->hpc_ops->set_attention_status(pslot, 1)) {
-                       err("%s: Issue of Set Attention Led command failed\n",
-                           __func__);
+                       ctrl_err(ctrl, "%s: Issue of Set Attention "
+                                "Led command failed\n", __func__);
                        return;
                }
        }
@@ -205,9 +211,9 @@ static int board_added(struct slot *p_slot)
        int retval = 0;
        struct controller *ctrl = p_slot->ctrl;
 
-       dbg("%s: slot device, slot offset, hp slot = %d, %d ,%d\n",
-                       __func__, p_slot->device,
-                       ctrl->slot_device_offset, p_slot->hp_slot);
+       ctrl_dbg(ctrl, "%s: slot device, slot offset, hp slot = %d, %d ,%d\n",
+                __func__, p_slot->device, ctrl->slot_device_offset,
+                p_slot->hp_slot);
 
        if (POWER_CTRL(ctrl)) {
                /* Power on slot */
@@ -225,22 +231,22 @@ static int board_added(struct slot *p_slot)
        /* Check link training status */
        retval = p_slot->hpc_ops->check_lnk_status(ctrl);
        if (retval) {
-               err("%s: Failed to check link status\n", __func__);
+               ctrl_err(ctrl, "%s: Failed to check link status\n", __func__);
                set_slot_off(ctrl, p_slot);
                return retval;
        }
 
        /* Check for a power fault */
        if (p_slot->hpc_ops->query_power_fault(p_slot)) {
-               dbg("%s: power fault detected\n", __func__);
+               ctrl_dbg(ctrl, "%s: power fault detected\n", __func__);
                retval = POWER_FAILURE;
                goto err_exit;
        }
 
        retval = pciehp_configure_device(p_slot);
        if (retval) {
-               err("Cannot add device 0x%x:%x\n", p_slot->bus,
-                   p_slot->device);
+               ctrl_err(ctrl, "Cannot add device 0x%x:%x\n",
+                        p_slot->bus, p_slot->device);
                goto err_exit;
        }
 
@@ -272,14 +278,14 @@ static int remove_board(struct slot *p_slot)
        if (retval)
                return retval;
 
-       dbg("In %s, hp_slot = %d\n", __func__, p_slot->hp_slot);
+       ctrl_dbg(ctrl, "In %s, hp_slot = %d\n", __func__, p_slot->hp_slot);
 
        if (POWER_CTRL(ctrl)) {
                /* power off slot */
                retval = p_slot->hpc_ops->power_off_slot(p_slot);
                if (retval) {
-                       err("%s: Issue of Slot Disable command failed\n",
-                           __func__);
+                       ctrl_err(ctrl, "%s: Issue of Slot Disable command "
+                                "failed\n", __func__);
                        return retval;
                }
        }
@@ -320,8 +326,8 @@ static void pciehp_power_thread(struct work_struct *work)
        switch (p_slot->state) {
        case POWEROFF_STATE:
                mutex_unlock(&p_slot->lock);
-               dbg("%s: disabling bus:device(%x:%x)\n",
-                   __func__, p_slot->bus, p_slot->device);
+               ctrl_dbg(p_slot->ctrl, "%s: disabling bus:device(%x:%x)\n",
+                        __func__, p_slot->bus, p_slot->device);
                pciehp_disable_slot(p_slot);
                mutex_lock(&p_slot->lock);
                p_slot->state = STATIC_STATE;
@@ -349,7 +355,8 @@ void pciehp_queue_pushbutton_work(struct work_struct *work)
 
        info = kmalloc(sizeof(*info), GFP_KERNEL);
        if (!info) {
-               err("%s: Cannot allocate memory\n", __func__);
+               ctrl_err(p_slot->ctrl, "%s: Cannot allocate memory\n",
+                        __func__);
                return;
        }
        info->p_slot = p_slot;
@@ -403,12 +410,14 @@ static void handle_button_press_event(struct slot *p_slot)
                p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
                if (getstatus) {
                        p_slot->state = BLINKINGOFF_STATE;
-                       info("PCI slot #%s - powering off due to button "
-                            "press.\n", p_slot->name);
+                       ctrl_info(ctrl,
+                                 "PCI slot #%s - powering off due to button "
+                                 "press.\n", p_slot->name);
                } else {
                        p_slot->state = BLINKINGON_STATE;
-                       info("PCI slot #%s - powering on due to button "
-                            "press.\n", p_slot->name);
+                       ctrl_info(ctrl,
+                                 "PCI slot #%s - powering on due to button "
+                                 "press.\n", p_slot->name);
                }
                /* blink green LED and turn off amber */
                if (PWR_LED(ctrl))
@@ -425,8 +434,8 @@ static void handle_button_press_event(struct slot *p_slot)
                 * press the attention again before the 5 sec. limit
                 * expires to cancel hot-add or hot-remove
                 */
-               info("Button cancel on Slot(%s)\n", p_slot->name);
-               dbg("%s: button cancel\n", __func__);
+               ctrl_info(ctrl, "Button cancel on Slot(%s)\n", p_slot->name);
+               ctrl_dbg(ctrl, "%s: button cancel\n", __func__);
                cancel_delayed_work(&p_slot->work);
                if (p_slot->state == BLINKINGOFF_STATE) {
                        if (PWR_LED(ctrl))
@@ -437,8 +446,8 @@ static void handle_button_press_event(struct slot *p_slot)
                }
                if (ATTN_LED(ctrl))
                        p_slot->hpc_ops->set_attention_status(p_slot, 0);
-               info("PCI slot #%s - action canceled due to button press\n",
-                    p_slot->name);
+               ctrl_info(ctrl, "PCI slot #%s - action canceled "
+                         "due to button press\n", p_slot->name);
                p_slot->state = STATIC_STATE;
                break;
        case POWEROFF_STATE:
@@ -448,11 +457,11 @@ static void handle_button_press_event(struct slot *p_slot)
                 * this means that the previous attention button action
                 * to hot-add or hot-remove is undergoing
                 */
-               info("Button ignore on Slot(%s)\n", p_slot->name);
+               ctrl_info(ctrl, "Button ignore on Slot(%s)\n", p_slot->name);
                update_slot_info(p_slot);
                break;
        default:
-               warn("Not a valid state\n");
+               ctrl_warn(ctrl, "Not a valid state\n");
                break;
        }
 }
@@ -467,7 +476,8 @@ static void handle_surprise_event(struct slot *p_slot)
 
        info = kmalloc(sizeof(*info), GFP_KERNEL);
        if (!info) {
-               err("%s: Cannot allocate memory\n", __func__);
+               ctrl_err(p_slot->ctrl, "%s: Cannot allocate memory\n",
+                        __func__);
                return;
        }
        info->p_slot = p_slot;
@@ -505,7 +515,7 @@ static void interrupt_event_handler(struct work_struct *work)
        case INT_PRESENCE_OFF:
                if (!HP_SUPR_RM(ctrl))
                        break;
-               dbg("Surprise Removal\n");
+               ctrl_dbg(ctrl, "Surprise Removal\n");
                update_slot_info(p_slot);
                handle_surprise_event(p_slot);
                break;
@@ -522,22 +532,23 @@ int pciehp_enable_slot(struct slot *p_slot)
 {
        u8 getstatus = 0;
        int rc;
+       struct controller *ctrl = p_slot->ctrl;
 
        /* Check to see if (latch closed, card present, power off) */
        mutex_lock(&p_slot->ctrl->crit_sect);
 
        rc = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
        if (rc || !getstatus) {
-               info("%s: no adapter on slot(%s)\n", __func__,
-                    p_slot->name);
+               ctrl_info(ctrl, "%s: no adapter on slot(%s)\n",
+                         __func__, p_slot->name);
                mutex_unlock(&p_slot->ctrl->crit_sect);
                return -ENODEV;
        }
        if (MRL_SENS(p_slot->ctrl)) {
                rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
                if (rc || getstatus) {
-                       info("%s: latch open on slot(%s)\n", __func__,
-                            p_slot->name);
+                       ctrl_info(ctrl, "%s: latch open on slot(%s)\n",
+                                 __func__, p_slot->name);
                        mutex_unlock(&p_slot->ctrl->crit_sect);
                        return -ENODEV;
                }
@@ -546,8 +557,8 @@ int pciehp_enable_slot(struct slot *p_slot)
        if (POWER_CTRL(p_slot->ctrl)) {
                rc = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
                if (rc || getstatus) {
-                       info("%s: already enabled on slot(%s)\n", __func__,
-                            p_slot->name);
+                       ctrl_info(ctrl, "%s: already enabled on slot(%s)\n",
+                                 __func__, p_slot->name);
                        mutex_unlock(&p_slot->ctrl->crit_sect);
                        return -EINVAL;
                }
@@ -571,6 +582,7 @@ int pciehp_disable_slot(struct slot *p_slot)
 {
        u8 getstatus = 0;
        int ret = 0;
+       struct controller *ctrl = p_slot->ctrl;
 
        if (!p_slot->ctrl)
                return 1;
@@ -581,8 +593,8 @@ int pciehp_disable_slot(struct slot *p_slot)
        if (!HP_SUPR_RM(p_slot->ctrl)) {
                ret = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
                if (ret || !getstatus) {
-                       info("%s: no adapter on slot(%s)\n", __func__,
-                            p_slot->name);
+                       ctrl_info(ctrl, "%s: no adapter on slot(%s)\n",
+                                 __func__, p_slot->name);
                        mutex_unlock(&p_slot->ctrl->crit_sect);
                        return -ENODEV;
                }
@@ -591,8 +603,8 @@ int pciehp_disable_slot(struct slot *p_slot)
        if (MRL_SENS(p_slot->ctrl)) {
                ret = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
                if (ret || getstatus) {
-                       info("%s: latch open on slot(%s)\n", __func__,
-                            p_slot->name);
+                       ctrl_info(ctrl, "%s: latch open on slot(%s)\n",
+                                 __func__, p_slot->name);
                        mutex_unlock(&p_slot->ctrl->crit_sect);
                        return -ENODEV;
                }
@@ -601,8 +613,8 @@ int pciehp_disable_slot(struct slot *p_slot)
        if (POWER_CTRL(p_slot->ctrl)) {
                ret = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
                if (ret || !getstatus) {
-                       info("%s: already disabled slot(%s)\n", __func__,
-                            p_slot->name);
+                       ctrl_info(ctrl, "%s: already disabled slot(%s)\n",
+                                 __func__, p_slot->name);
                        mutex_unlock(&p_slot->ctrl->crit_sect);
                        return -EINVAL;
                }
@@ -618,6 +630,7 @@ int pciehp_disable_slot(struct slot *p_slot)
 int pciehp_sysfs_enable_slot(struct slot *p_slot)
 {
        int retval = -ENODEV;
+       struct controller *ctrl = p_slot->ctrl;
 
        mutex_lock(&p_slot->lock);
        switch (p_slot->state) {
@@ -631,15 +644,15 @@ int pciehp_sysfs_enable_slot(struct slot *p_slot)
                p_slot->state = STATIC_STATE;
                break;
        case POWERON_STATE:
-               info("Slot %s is already in powering on state\n",
-                    p_slot->name);
+               ctrl_info(ctrl, "Slot %s is already in powering on state\n",
+                         p_slot->name);
                break;
        case BLINKINGOFF_STATE:
        case POWEROFF_STATE:
-               info("Already enabled on slot %s\n", p_slot->name);
+               ctrl_info(ctrl, "Already enabled on slot %s\n", p_slot->name);
                break;
        default:
-               err("Not a valid state on slot %s\n", p_slot->name);
+               ctrl_err(ctrl, "Not a valid state on slot %s\n", p_slot->name);
                break;
        }
        mutex_unlock(&p_slot->lock);
@@ -650,6 +663,7 @@ int pciehp_sysfs_enable_slot(struct slot *p_slot)
 int pciehp_sysfs_disable_slot(struct slot *p_slot)
 {
        int retval = -ENODEV;
+       struct controller *ctrl = p_slot->ctrl;
 
        mutex_lock(&p_slot->lock);
        switch (p_slot->state) {
@@ -663,15 +677,15 @@ int pciehp_sysfs_disable_slot(struct slot *p_slot)
                p_slot->state = STATIC_STATE;
                break;
        case POWEROFF_STATE:
-               info("Slot %s is already in powering off state\n",
-                    p_slot->name);
+               ctrl_info(ctrl, "Slot %s is already in powering off state\n",
+                         p_slot->name);
                break;
        case BLINKINGON_STATE:
        case POWERON_STATE:
-               info("Already disabled on slot %s\n", p_slot->name);
+               ctrl_info(ctrl, "Already disabled on slot %s\n", p_slot->name);
                break;
        default:
-               err("Not a valid state on slot %s\n", p_slot->name);
+               ctrl_err(ctrl, "Not a valid state on slot %s\n", p_slot->name);
                break;
        }
        mutex_unlock(&p_slot->lock);
index 9d934ddee95661b76c133864591c545bdba571cc..8e9530c4c36dfd55ae1d2fb60b7c8b4c67f84181 100644 (file)
@@ -223,7 +223,7 @@ static void start_int_poll_timer(struct controller *ctrl, int sec)
 
 static inline int pciehp_request_irq(struct controller *ctrl)
 {
-       int retval, irq = ctrl->pci_dev->irq;
+       int retval, irq = ctrl->pcie->irq;
 
        /* Install interrupt polling timer. Start with 10 sec delay */
        if (pciehp_poll_mode) {
@@ -235,7 +235,8 @@ static inline int pciehp_request_irq(struct controller *ctrl)
        /* Installs the interrupt handler */
        retval = request_irq(irq, pcie_isr, IRQF_SHARED, MY_NAME, ctrl);
        if (retval)
-               err("Cannot get irq %d for the hotplug controller\n", irq);
+               ctrl_err(ctrl, "Cannot get irq %d for the hotplug controller\n",
+                        irq);
        return retval;
 }
 
@@ -244,7 +245,7 @@ static inline void pciehp_free_irq(struct controller *ctrl)
        if (pciehp_poll_mode)
                del_timer_sync(&ctrl->poll_timer);
        else
-               free_irq(ctrl->pci_dev->irq, ctrl);
+               free_irq(ctrl->pcie->irq, ctrl);
 }
 
 static int pcie_poll_cmd(struct controller *ctrl)
@@ -282,7 +283,7 @@ static void pcie_wait_cmd(struct controller *ctrl, int poll)
        else
                rc = wait_event_timeout(ctrl->queue, !ctrl->cmd_busy, timeout);
        if (!rc)
-               dbg("Command not completed in 1000 msec\n");
+               ctrl_dbg(ctrl, "Command not completed in 1000 msec\n");
 }
 
 /**
@@ -301,7 +302,8 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 
        retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
        if (retval) {
-               err("%s: Cannot read SLOTSTATUS register\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+                        __func__);
                goto out;
        }
 
@@ -312,26 +314,28 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
                         * proceed forward to issue the next command according
                         * to spec. Just print out the error message.
                         */
-                       dbg("%s: CMD_COMPLETED not clear after 1 sec.\n",
-                           __func__);
+                       ctrl_dbg(ctrl,
+                                "%s: CMD_COMPLETED not clear after 1 sec.\n",
+                                __func__);
                } else if (!NO_CMD_CMPL(ctrl)) {
                        /*
                         * This controller semms to notify of command completed
                         * event even though it supports none of power
                         * controller, attention led, power led and EMI.
                         */
-                       dbg("%s: Unexpected CMD_COMPLETED. Need to wait for "
-                           "command completed event.\n", __func__);
+                       ctrl_dbg(ctrl, "%s: Unexpected CMD_COMPLETED. Need to "
+                                "wait for command completed event.\n",
+                                __func__);
                        ctrl->no_cmd_complete = 0;
                } else {
-                       dbg("%s: Unexpected CMD_COMPLETED. Maybe the "
-                           "controller is broken.\n", __func__);
+                       ctrl_dbg(ctrl, "%s: Unexpected CMD_COMPLETED. Maybe "
+                                "the controller is broken.\n", __func__);
                }
        }
 
        retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
        if (retval) {
-               err("%s: Cannot read SLOTCTRL register\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
                goto out;
        }
 
@@ -341,7 +345,8 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
        smp_mb();
        retval = pciehp_writew(ctrl, SLOTCTRL, slot_ctrl);
        if (retval)
-               err("%s: Cannot write to SLOTCTRL register\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot write to SLOTCTRL register\n",
+                        __func__);
 
        /*
         * Wait for command completion.
@@ -370,14 +375,15 @@ static int hpc_check_lnk_status(struct controller *ctrl)
 
        retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
        if (retval) {
-               err("%s: Cannot read LNKSTATUS register\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
+                        __func__);
                return retval;
        }
 
-       dbg("%s: lnk_status = %x\n", __func__, lnk_status);
+       ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
        if ( (lnk_status & LNK_TRN) || (lnk_status & LNK_TRN_ERR) ||
                !(lnk_status & NEG_LINK_WD)) {
-               err("%s : Link Training Error occurs \n", __func__);
+               ctrl_err(ctrl, "%s : Link Training Error occurs \n", __func__);
                retval = -1;
                return retval;
        }
@@ -394,12 +400,12 @@ static int hpc_get_attention_status(struct slot *slot, u8 *status)
 
        retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
        if (retval) {
-               err("%s: Cannot read SLOTCTRL register\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
                return retval;
        }
 
-       dbg("%s: SLOTCTRL %x, value read %x\n",
-           __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
+       ctrl_dbg(ctrl, "%s: SLOTCTRL %x, value read %x\n",
+                __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
 
        atten_led_state = (slot_ctrl & ATTN_LED_CTRL) >> 6;
 
@@ -433,11 +439,11 @@ static int hpc_get_power_status(struct slot *slot, u8 *status)
 
        retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
        if (retval) {
-               err("%s: Cannot read SLOTCTRL register\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
                return retval;
        }
-       dbg("%s: SLOTCTRL %x value read %x\n",
-           __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
+       ctrl_dbg(ctrl, "%s: SLOTCTRL %x value read %x\n",
+                __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
 
        pwr_state = (slot_ctrl & PWR_CTRL) >> 10;
 
@@ -464,7 +470,8 @@ static int hpc_get_latch_status(struct slot *slot, u8 *status)
 
        retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
        if (retval) {
-               err("%s: Cannot read SLOTSTATUS register\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+                        __func__);
                return retval;
        }
 
@@ -482,7 +489,8 @@ static int hpc_get_adapter_status(struct slot *slot, u8 *status)
 
        retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
        if (retval) {
-               err("%s: Cannot read SLOTSTATUS register\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+                        __func__);
                return retval;
        }
        card_state = (u8)((slot_status & PRSN_STATE) >> 6);
@@ -500,7 +508,7 @@ static int hpc_query_power_fault(struct slot *slot)
 
        retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
        if (retval) {
-               err("%s: Cannot check for power fault\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot check for power fault\n", __func__);
                return retval;
        }
        pwr_fault = (u8)((slot_status & PWR_FAULT_DETECTED) >> 1);
@@ -516,7 +524,7 @@ static int hpc_get_emi_status(struct slot *slot, u8 *status)
 
        retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
        if (retval) {
-               err("%s : Cannot check EMI status\n", __func__);
+               ctrl_err(ctrl, "%s : Cannot check EMI status\n", __func__);
                return retval;
        }
        *status = (slot_status & EMI_STATE) >> EMI_STATUS_BIT;
@@ -560,8 +568,8 @@ static int hpc_set_attention_status(struct slot *slot, u8 value)
                        return -1;
        }
        rc = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-       dbg("%s: SLOTCTRL %x write cmd %x\n",
-           __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+       ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+                __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 
        return rc;
 }
@@ -575,8 +583,8 @@ static void hpc_set_green_led_on(struct slot *slot)
        slot_cmd = 0x0100;
        cmd_mask = PWR_LED_CTRL;
        pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-       dbg("%s: SLOTCTRL %x write cmd %x\n",
-           __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+       ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+                __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 }
 
 static void hpc_set_green_led_off(struct slot *slot)
@@ -588,8 +596,8 @@ static void hpc_set_green_led_off(struct slot *slot)
        slot_cmd = 0x0300;
        cmd_mask = PWR_LED_CTRL;
        pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-       dbg("%s: SLOTCTRL %x write cmd %x\n",
-           __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+       ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+                __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 }
 
 static void hpc_set_green_led_blink(struct slot *slot)
@@ -601,8 +609,8 @@ static void hpc_set_green_led_blink(struct slot *slot)
        slot_cmd = 0x0200;
        cmd_mask = PWR_LED_CTRL;
        pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-       dbg("%s: SLOTCTRL %x write cmd %x\n",
-           __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+       ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+                __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 }
 
 static int hpc_power_on_slot(struct slot * slot)
@@ -613,20 +621,22 @@ static int hpc_power_on_slot(struct slot * slot)
        u16 slot_status;
        int retval = 0;
 
-       dbg("%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
+       ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
 
        /* Clear sticky power-fault bit from previous power failures */
        retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
        if (retval) {
-               err("%s: Cannot read SLOTSTATUS register\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
+                        __func__);
                return retval;
        }
        slot_status &= PWR_FAULT_DETECTED;
        if (slot_status) {
                retval = pciehp_writew(ctrl, SLOTSTATUS, slot_status);
                if (retval) {
-                       err("%s: Cannot write to SLOTSTATUS register\n",
-                           __func__);
+                       ctrl_err(ctrl,
+                                "%s: Cannot write to SLOTSTATUS register\n",
+                                __func__);
                        return retval;
                }
        }
@@ -644,11 +654,12 @@ static int hpc_power_on_slot(struct slot * slot)
        retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
 
        if (retval) {
-               err("%s: Write %x command failed!\n", __func__, slot_cmd);
+               ctrl_err(ctrl, "%s: Write %x command failed!\n",
+                        __func__, slot_cmd);
                return -1;
        }
-       dbg("%s: SLOTCTRL %x write cmd %x\n",
-           __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+       ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+                __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
 
        return retval;
 }
@@ -694,7 +705,7 @@ static int hpc_power_off_slot(struct slot * slot)
        int retval = 0;
        int changed;
 
-       dbg("%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
+       ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
 
        /*
         * Set Bad DLLP Mask bit in Correctable Error Mask
@@ -722,12 +733,12 @@ static int hpc_power_off_slot(struct slot * slot)
 
        retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
        if (retval) {
-               err("%s: Write command failed!\n", __func__);
+               ctrl_err(ctrl, "%s: Write command failed!\n", __func__);
                retval = -1;
                goto out;
        }
-       dbg("%s: SLOTCTRL %x write cmd %x\n",
-           __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+       ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
+                __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
  out:
        if (changed)
                pcie_unmask_bad_dllp(ctrl);
@@ -749,7 +760,8 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
        intr_loc = 0;
        do {
                if (pciehp_readw(ctrl, SLOTSTATUS, &detected)) {
-                       err("%s: Cannot read SLOTSTATUS\n", __func__);
+                       ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS\n",
+                                __func__);
                        return IRQ_NONE;
                }
 
@@ -760,12 +772,13 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
                if (!intr_loc)
                        return IRQ_NONE;
                if (detected && pciehp_writew(ctrl, SLOTSTATUS, detected)) {
-                       err("%s: Cannot write to SLOTSTATUS\n", __func__);
+                       ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS\n",
+                                __func__);
                        return IRQ_NONE;
                }
        } while (detected);
 
-       dbg("%s: intr_loc %x\n", __FUNCTION__, intr_loc);
+       ctrl_dbg(ctrl, "%s: intr_loc %x\n", __func__, intr_loc);
 
        /* Check Command Complete Interrupt Pending */
        if (intr_loc & CMD_COMPLETED) {
@@ -807,7 +820,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 
        retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap);
        if (retval) {
-               err("%s: Cannot read LNKCAP register\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__);
                return retval;
        }
 
@@ -821,7 +834,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
        }
 
        *value = lnk_speed;
-       dbg("Max link speed = %d\n", lnk_speed);
+       ctrl_dbg(ctrl, "Max link speed = %d\n", lnk_speed);
 
        return retval;
 }
@@ -836,7 +849,7 @@ static int hpc_get_max_lnk_width(struct slot *slot,
 
        retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap);
        if (retval) {
-               err("%s: Cannot read LNKCAP register\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__);
                return retval;
        }
 
@@ -871,7 +884,7 @@ static int hpc_get_max_lnk_width(struct slot *slot,
        }
 
        *value = lnk_wdth;
-       dbg("Max link width = %d\n", lnk_wdth);
+       ctrl_dbg(ctrl, "Max link width = %d\n", lnk_wdth);
 
        return retval;
 }
@@ -885,7 +898,8 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 
        retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
        if (retval) {
-               err("%s: Cannot read LNKSTATUS register\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
+                        __func__);
                return retval;
        }
 
@@ -899,7 +913,7 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
        }
 
        *value = lnk_speed;
-       dbg("Current link speed = %d\n", lnk_speed);
+       ctrl_dbg(ctrl, "Current link speed = %d\n", lnk_speed);
 
        return retval;
 }
@@ -914,7 +928,8 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
 
        retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
        if (retval) {
-               err("%s: Cannot read LNKSTATUS register\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
+                        __func__);
                return retval;
        }
 
@@ -949,7 +964,7 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
        }
 
        *value = lnk_wdth;
-       dbg("Current link width = %d\n", lnk_wdth);
+       ctrl_dbg(ctrl, "Current link width = %d\n", lnk_wdth);
 
        return retval;
 }
@@ -998,7 +1013,8 @@ int pcie_enable_notification(struct controller *ctrl)
               PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
 
        if (pcie_write_cmd(ctrl, cmd, mask)) {
-               err("%s: Cannot enable software notification\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot enable software notification\n",
+                        __func__);
                return -1;
        }
        return 0;
@@ -1010,7 +1026,8 @@ static void pcie_disable_notification(struct controller *ctrl)
        mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE |
               PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
        if (pcie_write_cmd(ctrl, 0, mask))
-               warn("%s: Cannot disable software notification\n", __func__);
+               ctrl_warn(ctrl, "%s: Cannot disable software notification\n",
+                         __func__);
 }
 
 static int pcie_init_notification(struct controller *ctrl)
@@ -1071,34 +1088,45 @@ static inline void dbg_ctrl(struct controller *ctrl)
        if (!pciehp_debug)
                return;
 
-       dbg("Hotplug Controller:\n");
-       dbg("  Seg/Bus/Dev/Func/IRQ : %s IRQ %d\n", pci_name(pdev), pdev->irq);
-       dbg("  Vendor ID            : 0x%04x\n", pdev->vendor);
-       dbg("  Device ID            : 0x%04x\n", pdev->device);
-       dbg("  Subsystem ID         : 0x%04x\n", pdev->subsystem_device);
-       dbg("  Subsystem Vendor ID  : 0x%04x\n", pdev->subsystem_vendor);
-       dbg("  PCIe Cap offset      : 0x%02x\n", ctrl->cap_base);
+       ctrl_info(ctrl, "Hotplug Controller:\n");
+       ctrl_info(ctrl, "  Seg/Bus/Dev/Func/IRQ : %s IRQ %d\n",
+                 pci_name(pdev), pdev->irq);
+       ctrl_info(ctrl, "  Vendor ID            : 0x%04x\n", pdev->vendor);
+       ctrl_info(ctrl, "  Device ID            : 0x%04x\n", pdev->device);
+       ctrl_info(ctrl, "  Subsystem ID         : 0x%04x\n",
+                 pdev->subsystem_device);
+       ctrl_info(ctrl, "  Subsystem Vendor ID  : 0x%04x\n",
+                 pdev->subsystem_vendor);
+       ctrl_info(ctrl, "  PCIe Cap offset      : 0x%02x\n", ctrl->cap_base);
        for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
                if (!pci_resource_len(pdev, i))
                        continue;
-               dbg("  PCI resource [%d]     : 0x%llx@0x%llx\n", i,
-                   (unsigned long long)pci_resource_len(pdev, i),
-                   (unsigned long long)pci_resource_start(pdev, i));
+               ctrl_info(ctrl, "  PCI resource [%d]     : 0x%llx@0x%llx\n",
+                         i, (unsigned long long)pci_resource_len(pdev, i),
+                         (unsigned long long)pci_resource_start(pdev, i));
        }
-       dbg("Slot Capabilities      : 0x%08x\n", ctrl->slot_cap);
-       dbg("  Physical Slot Number : %d\n", ctrl->first_slot);
-       dbg("  Attention Button     : %3s\n", ATTN_BUTTN(ctrl) ? "yes" : "no");
-       dbg("  Power Controller     : %3s\n", POWER_CTRL(ctrl) ? "yes" : "no");
-       dbg("  MRL Sensor           : %3s\n", MRL_SENS(ctrl)   ? "yes" : "no");
-       dbg("  Attention Indicator  : %3s\n", ATTN_LED(ctrl)   ? "yes" : "no");
-       dbg("  Power Indicator      : %3s\n", PWR_LED(ctrl)    ? "yes" : "no");
-       dbg("  Hot-Plug Surprise    : %3s\n", HP_SUPR_RM(ctrl) ? "yes" : "no");
-       dbg("  EMI Present          : %3s\n", EMI(ctrl)        ? "yes" : "no");
-       dbg("  Command Completed    : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes");
+       ctrl_info(ctrl, "Slot Capabilities      : 0x%08x\n", ctrl->slot_cap);
+       ctrl_info(ctrl, "  Physical Slot Number : %d\n", ctrl->first_slot);
+       ctrl_info(ctrl, "  Attention Button     : %3s\n",
+                 ATTN_BUTTN(ctrl) ? "yes" : "no");
+       ctrl_info(ctrl, "  Power Controller     : %3s\n",
+                 POWER_CTRL(ctrl) ? "yes" : "no");
+       ctrl_info(ctrl, "  MRL Sensor           : %3s\n",
+                 MRL_SENS(ctrl)   ? "yes" : "no");
+       ctrl_info(ctrl, "  Attention Indicator  : %3s\n",
+                 ATTN_LED(ctrl)   ? "yes" : "no");
+       ctrl_info(ctrl, "  Power Indicator      : %3s\n",
+                 PWR_LED(ctrl)    ? "yes" : "no");
+       ctrl_info(ctrl, "  Hot-Plug Surprise    : %3s\n",
+                 HP_SUPR_RM(ctrl) ? "yes" : "no");
+       ctrl_info(ctrl, "  EMI Present          : %3s\n",
+                 EMI(ctrl)        ? "yes" : "no");
+       ctrl_info(ctrl, "  Command Completed    : %3s\n",
+                 NO_CMD_CMPL(ctrl) ? "no" : "yes");
        pciehp_readw(ctrl, SLOTSTATUS, &reg16);
-       dbg("Slot Status            : 0x%04x\n", reg16);
+       ctrl_info(ctrl, "Slot Status            : 0x%04x\n", reg16);
        pciehp_readw(ctrl, SLOTCTRL, &reg16);
-       dbg("Slot Control           : 0x%04x\n", reg16);
+       ctrl_info(ctrl, "Slot Control           : 0x%04x\n", reg16);
 }
 
 struct controller *pcie_init(struct pcie_device *dev)
@@ -1109,19 +1137,21 @@ struct controller *pcie_init(struct pcie_device *dev)
 
        ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
        if (!ctrl) {
-               err("%s : out of memory\n", __func__);
+               dev_err(&dev->device, "%s : out of memory\n", __func__);
                goto abort;
        }
        INIT_LIST_HEAD(&ctrl->slot_list);
 
+       ctrl->pcie = dev;
        ctrl->pci_dev = pdev;
        ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP);
        if (!ctrl->cap_base) {
-               err("%s: Cannot find PCI Express capability\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot find PCI Express capability\n",
+                        __func__);
                goto abort;
        }
        if (pciehp_readl(ctrl, SLOTCAP, &slot_cap)) {
-               err("%s: Cannot read SLOTCAP register\n", __func__);
+               ctrl_err(ctrl, "%s: Cannot read SLOTCAP register\n", __func__);
                goto abort;
        }
 
@@ -1161,9 +1191,9 @@ struct controller *pcie_init(struct pcie_device *dev)
                        goto abort_ctrl;
        }
 
-       info("HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n",
-            pdev->vendor, pdev->device,
-            pdev->subsystem_vendor, pdev->subsystem_device);
+       ctrl_info(ctrl, "HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n",
+                 pdev->vendor, pdev->device, pdev->subsystem_vendor,
+                 pdev->subsystem_device);
 
        if (pcie_init_slot(ctrl))
                goto abort_ctrl;
index 6040dcceb256388285e1db7a10d64b62fd3925e5..ffd11148fbe21416149b5e600834f4ff5f8c054b 100644 (file)
@@ -198,18 +198,20 @@ int pciehp_configure_device(struct slot *p_slot)
        struct pci_dev *dev;
        struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
        int num, fn;
+       struct controller *ctrl = p_slot->ctrl;
 
        dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, 0));
        if (dev) {
-               err("Device %s already exists at %x:%x, cannot hot-add\n",
-                               pci_name(dev), p_slot->bus, p_slot->device);
+               ctrl_err(ctrl,
+                        "Device %s already exists at %x:%x, cannot hot-add\n",
+                        pci_name(dev), p_slot->bus, p_slot->device);
                pci_dev_put(dev);
                return -EINVAL;
        }
 
        num = pci_scan_slot(parent, PCI_DEVFN(p_slot->device, 0));
        if (num == 0) {
-               err("No new device found\n");
+               ctrl_err(ctrl, "No new device found\n");
                return -ENODEV;
        }
 
@@ -218,8 +220,8 @@ int pciehp_configure_device(struct slot *p_slot)
                if (!dev)
                        continue;
                if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
-                       err("Cannot hot-add display device %s\n",
-                                       pci_name(dev));
+                       ctrl_err(ctrl, "Cannot hot-add display device %s\n",
+                                pci_name(dev));
                        pci_dev_put(dev);
                        continue;
                }
@@ -244,9 +246,10 @@ int pciehp_unconfigure_device(struct slot *p_slot)
        u8 presence = 0;
        struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
        u16 command;
+       struct controller *ctrl = p_slot->ctrl;
 
-       dbg("%s: bus/dev = %x/%x\n", __func__, p_slot->bus,
-                               p_slot->device);
+       ctrl_dbg(ctrl, "%s: bus/dev = %x/%x\n", __func__,
+                p_slot->bus, p_slot->device);
        ret = p_slot->hpc_ops->get_adapter_status(p_slot, &presence);
        if (ret)
                presence = 0;
@@ -257,16 +260,17 @@ int pciehp_unconfigure_device(struct slot *p_slot)
                if (!temp)
                        continue;
                if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
-                       err("Cannot remove display device %s\n",
-                                       pci_name(temp));
+                       ctrl_err(ctrl, "Cannot remove display device %s\n",
+                                pci_name(temp));
                        pci_dev_put(temp);
                        continue;
                }
                if (temp->hdr_type == PCI_HEADER_TYPE_BRIDGE && presence) {
                        pci_read_config_byte(temp, PCI_BRIDGE_CONTROL, &bctl);
                        if (bctl & PCI_BRIDGE_CTL_VGA) {
-                               err("Cannot remove display device %s\n",
-                                   pci_name(temp));
+                               ctrl_err(ctrl,
+                                        "Cannot remove display device %s\n",
+                                        pci_name(temp));
                                pci_dev_put(temp);
                                continue;
                        }
index 7d5921b1ee7820960afcc6b2b0ae9d711172ea49..419919a87b0fa1d923898728964de310ae4bedb2 100644 (file)
 #define PRESENT         1      /* Card in slot */
 
 #define MY_NAME "rpaphp"
-extern int debug;
+extern int rpaphp_debug;
 #define dbg(format, arg...)                                    \
        do {                                                    \
-               if (debug)                                      \
+               if (rpaphp_debug)                                       \
                        printk(KERN_DEBUG "%s: " format,        \
                                MY_NAME , ## arg);              \
        } while (0)
index 1f84f402acdbdfbffad7ac13cb56597690a0561f..95d02a08fdc7f1a1558a96583043133da5f5071c 100644 (file)
@@ -37,7 +37,7 @@
                                /* and pci_do_scan_bus */
 #include "rpaphp.h"
 
-int debug;
+int rpaphp_debug;
 LIST_HEAD(rpaphp_slot_head);
 
 #define DRIVER_VERSION "0.1"
@@ -50,7 +50,7 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-module_param(debug, bool, 0644);
+module_param_named(debug, rpaphp_debug, bool, 0644);
 
 /**
  * set_attention_status - set attention LED
index 5acfd4f3d4cb81fb48eb5c4897aa6d840c5b77a6..513e1e2823914dddf71b7ecc7f5d58ac13010459 100644 (file)
@@ -123,7 +123,7 @@ int rpaphp_enable_slot(struct slot *slot)
                        slot->state = CONFIGURED;
                }
 
-               if (debug) {
+               if (rpaphp_debug) {
                        struct pci_dev *dev;
                        dbg("%s: pci_devs of slot[%s]\n", __func__, slot->dn->full_name);
                        list_for_each_entry (dev, &bus->devices, bus_list)
index 279c940a00397444a8d49e03e1ec68386f78fc3c..bf7d6ce9bbb3e764621caa96db426509003e2d46 100644 (file)
@@ -126,7 +126,8 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
        cfg->msg.address_hi = 0xffffffff;
 
        irq = create_irq();
-       if (irq < 0) {
+
+       if (irq <= 0) {
                kfree(cfg);
                return -EBUSY;
        }
index 738d4c89581cc7a41f5c2746add5dca767ad81be..2de5a3238c947be89213119d9cf468d2fa59e11d 100644 (file)
@@ -1,3 +1,4 @@
+#include <linux/interrupt.h>
 #include <linux/dmar.h>
 #include <linux/spinlock.h>
 #include <linux/jiffies.h>
@@ -11,41 +12,64 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
 static int ir_ioapic_num;
 int intr_remapping_enabled;
 
-static struct {
+struct irq_2_iommu {
        struct intel_iommu *iommu;
        u16 irte_index;
        u16 sub_handle;
        u8  irte_mask;
-} irq_2_iommu[NR_IRQS];
+};
+
+static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
+
+static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
+{
+       return (irq < nr_irqs) ? irq_2_iommuX + irq : NULL;
+}
+
+static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
+{
+       return irq_2_iommu(irq);
+}
 
 static DEFINE_SPINLOCK(irq_2_ir_lock);
 
-int irq_remapped(int irq)
+static struct irq_2_iommu *valid_irq_2_iommu(unsigned int irq)
 {
-       if (irq > NR_IRQS)
-               return 0;
+       struct irq_2_iommu *irq_iommu;
+
+       irq_iommu = irq_2_iommu(irq);
+
+       if (!irq_iommu)
+               return NULL;
+
+       if (!irq_iommu->iommu)
+               return NULL;
 
-       if (!irq_2_iommu[irq].iommu)
-               return 0;
+       return irq_iommu;
+}
 
-       return 1;
+int irq_remapped(int irq)
+{
+       return valid_irq_2_iommu(irq) != NULL;
 }
 
 int get_irte(int irq, struct irte *entry)
 {
        int index;
+       struct irq_2_iommu *irq_iommu;
 
-       if (!entry || irq > NR_IRQS)
+       if (!entry)
                return -1;
 
        spin_lock(&irq_2_ir_lock);
-       if (!irq_2_iommu[irq].iommu) {
+       irq_iommu = valid_irq_2_iommu(irq);
+       if (!irq_iommu) {
                spin_unlock(&irq_2_ir_lock);
                return -1;
        }
 
-       index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
-       *entry = *(irq_2_iommu[irq].iommu->ir_table->base + index);
+       index = irq_iommu->irte_index + irq_iommu->sub_handle;
+       *entry = *(irq_iommu->iommu->ir_table->base + index);
 
        spin_unlock(&irq_2_ir_lock);
        return 0;
@@ -54,6 +78,7 @@ int get_irte(int irq, struct irte *entry)
 int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 {
        struct ir_table *table = iommu->ir_table;
+       struct irq_2_iommu *irq_iommu;
        u16 index, start_index;
        unsigned int mask = 0;
        int i;
@@ -61,6 +86,10 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
        if (!count)
                return -1;
 
+       /* protect irq_2_iommu_alloc later */
+       if (irq >= nr_irqs)
+               return -1;
+
        /*
         * start the IRTE search from index 0.
         */
@@ -100,10 +129,11 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
        for (i = index; i < index + count; i++)
                table->base[i].present = 1;
 
-       irq_2_iommu[irq].iommu = iommu;
-       irq_2_iommu[irq].irte_index =  index;
-       irq_2_iommu[irq].sub_handle = 0;
-       irq_2_iommu[irq].irte_mask = mask;
+       irq_iommu = irq_2_iommu_alloc(irq);
+       irq_iommu->iommu = iommu;
+       irq_iommu->irte_index =  index;
+       irq_iommu->sub_handle = 0;
+       irq_iommu->irte_mask = mask;
 
        spin_unlock(&irq_2_ir_lock);
 
@@ -124,31 +154,33 @@ static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
 int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 {
        int index;
+       struct irq_2_iommu *irq_iommu;
 
        spin_lock(&irq_2_ir_lock);
-       if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+       irq_iommu = valid_irq_2_iommu(irq);
+       if (!irq_iommu) {
                spin_unlock(&irq_2_ir_lock);
                return -1;
        }
 
-       *sub_handle = irq_2_iommu[irq].sub_handle;
-       index = irq_2_iommu[irq].irte_index;
+       *sub_handle = irq_iommu->sub_handle;
+       index = irq_iommu->irte_index;
        spin_unlock(&irq_2_ir_lock);
        return index;
 }
 
 int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 {
+       struct irq_2_iommu *irq_iommu;
+
        spin_lock(&irq_2_ir_lock);
-       if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) {
-               spin_unlock(&irq_2_ir_lock);
-               return -1;
-       }
 
-       irq_2_iommu[irq].iommu = iommu;
-       irq_2_iommu[irq].irte_index = index;
-       irq_2_iommu[irq].sub_handle = subhandle;
-       irq_2_iommu[irq].irte_mask = 0;
+       irq_iommu = irq_2_iommu_alloc(irq);
+
+       irq_iommu->iommu = iommu;
+       irq_iommu->irte_index = index;
+       irq_iommu->sub_handle = subhandle;
+       irq_iommu->irte_mask = 0;
 
        spin_unlock(&irq_2_ir_lock);
 
@@ -157,16 +189,19 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 
 int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
 {
+       struct irq_2_iommu *irq_iommu;
+
        spin_lock(&irq_2_ir_lock);
-       if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+       irq_iommu = valid_irq_2_iommu(irq);
+       if (!irq_iommu) {
                spin_unlock(&irq_2_ir_lock);
                return -1;
        }
 
-       irq_2_iommu[irq].iommu = NULL;
-       irq_2_iommu[irq].irte_index = 0;
-       irq_2_iommu[irq].sub_handle = 0;
-       irq_2_iommu[irq].irte_mask = 0;
+       irq_iommu->iommu = NULL;
+       irq_iommu->irte_index = 0;
+       irq_iommu->sub_handle = 0;
+       irq_2_iommu(irq)->irte_mask = 0;
 
        spin_unlock(&irq_2_ir_lock);
 
@@ -178,16 +213,18 @@ int modify_irte(int irq, struct irte *irte_modified)
        int index;
        struct irte *irte;
        struct intel_iommu *iommu;
+       struct irq_2_iommu *irq_iommu;
 
        spin_lock(&irq_2_ir_lock);
-       if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+       irq_iommu = valid_irq_2_iommu(irq);
+       if (!irq_iommu) {
                spin_unlock(&irq_2_ir_lock);
                return -1;
        }
 
-       iommu = irq_2_iommu[irq].iommu;
+       iommu = irq_iommu->iommu;
 
-       index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+       index = irq_iommu->irte_index + irq_iommu->sub_handle;
        irte = &iommu->ir_table->base[index];
 
        set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
@@ -203,18 +240,20 @@ int flush_irte(int irq)
 {
        int index;
        struct intel_iommu *iommu;
+       struct irq_2_iommu *irq_iommu;
 
        spin_lock(&irq_2_ir_lock);
-       if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+       irq_iommu = valid_irq_2_iommu(irq);
+       if (!irq_iommu) {
                spin_unlock(&irq_2_ir_lock);
                return -1;
        }
 
-       iommu = irq_2_iommu[irq].iommu;
+       iommu = irq_iommu->iommu;
 
-       index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+       index = irq_iommu->irte_index + irq_iommu->sub_handle;
 
-       qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
+       qi_flush_iec(iommu, index, irq_iommu->irte_mask);
        spin_unlock(&irq_2_ir_lock);
 
        return 0;
@@ -246,28 +285,30 @@ int free_irte(int irq)
        int index, i;
        struct irte *irte;
        struct intel_iommu *iommu;
+       struct irq_2_iommu *irq_iommu;
 
        spin_lock(&irq_2_ir_lock);
-       if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+       irq_iommu = valid_irq_2_iommu(irq);
+       if (!irq_iommu) {
                spin_unlock(&irq_2_ir_lock);
                return -1;
        }
 
-       iommu = irq_2_iommu[irq].iommu;
+       iommu = irq_iommu->iommu;
 
-       index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+       index = irq_iommu->irte_index + irq_iommu->sub_handle;
        irte = &iommu->ir_table->base[index];
 
-       if (!irq_2_iommu[irq].sub_handle) {
-               for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++)
+       if (!irq_iommu->sub_handle) {
+               for (i = 0; i < (1 << irq_iommu->irte_mask); i++)
                        set_64bit((unsigned long *)irte, 0);
-               qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
+               qi_flush_iec(iommu, index, irq_iommu->irte_mask);
        }
 
-       irq_2_iommu[irq].iommu = NULL;
-       irq_2_iommu[irq].irte_index = 0;
-       irq_2_iommu[irq].sub_handle = 0;
-       irq_2_iommu[irq].irte_mask = 0;
+       irq_iommu->iommu = NULL;
+       irq_iommu->irte_index = 0;
+       irq_iommu->sub_handle = 0;
+       irq_iommu->irte_mask = 0;
 
        spin_unlock(&irq_2_ir_lock);
 
index 4a10b5624f728f49c0cc8977cb48a4bbf47b2bd2..d2812013fd2293e0f5520aaf4e1cd649b715f535 100644 (file)
@@ -378,23 +378,21 @@ static int msi_capability_init(struct pci_dev *dev)
        entry->msi_attrib.masked = 1;
        entry->msi_attrib.default_irq = dev->irq;       /* Save IOAPIC IRQ */
        entry->msi_attrib.pos = pos;
-       if (is_mask_bit_support(control)) {
+       if (entry->msi_attrib.maskbit) {
                entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
-                               is_64bit_address(control));
+                               entry->msi_attrib.is_64);
        }
        entry->dev = dev;
        if (entry->msi_attrib.maskbit) {
                unsigned int maskbits, temp;
                /* All MSIs are unmasked by default, Mask them all */
                pci_read_config_dword(dev,
-                       msi_mask_bits_reg(pos, is_64bit_address(control)),
+                       msi_mask_bits_reg(pos, entry->msi_attrib.is_64),
                        &maskbits);
                temp = (1 << multi_msi_capable(control));
                temp = ((temp - 1) & ~temp);
                maskbits |= temp;
-               pci_write_config_dword(dev,
-                       msi_mask_bits_reg(pos, is_64bit_address(control)),
-                       maskbits);
+               pci_write_config_dword(dev, entry->msi_attrib.is_64, maskbits);
                entry->msi_attrib.maskbits_mask = temp;
        }
        list_add_tail(&entry->list, &dev->msi_list);
index a13f53486114fe27e665bfc7451933d823465ca1..b4cdd690ae71ca9d05811a6ce6cd9bcd0de15b20 100644 (file)
@@ -43,18 +43,32 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
 {
        struct pci_dynid *dynid;
        struct pci_driver *pdrv = to_pci_driver(driver);
+       const struct pci_device_id *ids = pdrv->id_table;
        __u32 vendor, device, subvendor=PCI_ANY_ID,
                subdevice=PCI_ANY_ID, class=0, class_mask=0;
        unsigned long driver_data=0;
        int fields=0;
-       int retval = 0;
+       int retval;
 
-       fields = sscanf(buf, "%x %x %x %x %x %x %lux",
+       fields = sscanf(buf, "%x %x %x %x %x %x %lx",
                        &vendor, &device, &subvendor, &subdevice,
                        &class, &class_mask, &driver_data);
        if (fields < 2)
                return -EINVAL;
 
+       /* Only accept driver_data values that match an existing id_table
+          entry */
+       retval = -EINVAL;
+       while (ids->vendor || ids->subvendor || ids->class_mask) {
+               if (driver_data == ids->driver_data) {
+                       retval = 0;
+                       break;
+               }
+               ids++;
+       }
+       if (retval)     /* No match */
+               return retval;
+
        dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
        if (!dynid)
                return -ENOMEM;
@@ -65,8 +79,7 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
        dynid->id.subdevice = subdevice;
        dynid->id.class = class;
        dynid->id.class_mask = class_mask;
-       dynid->id.driver_data = pdrv->dynids.use_driver_data ?
-               driver_data : 0UL;
+       dynid->id.driver_data = driver_data;
 
        spin_lock(&pdrv->dynids.lock);
        list_add_tail(&dynid->node, &pdrv->dynids.list);
index 77baff022f71b85259e1e064a06531b03a9bb727..110022d7868976a5bf2885737f5bdddf175aa372 100644 (file)
@@ -423,7 +423,7 @@ pci_write_vpd(struct kobject *kobj, struct bin_attribute *bin_attr,
  * Reads 1, 2, or 4 bytes from legacy I/O port space using an arch specific
  * callback routine (pci_legacy_read).
  */
-ssize_t
+static ssize_t
 pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
                   char *buf, loff_t off, size_t count)
 {
@@ -448,7 +448,7 @@ pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
  * Writes 1, 2, or 4 bytes from legacy I/O port space using an arch specific
  * callback routine (pci_legacy_write).
  */
-ssize_t
+static ssize_t
 pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
                    char *buf, loff_t off, size_t count)
 {
@@ -468,11 +468,11 @@ pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
  * @attr: struct bin_attribute for this file
  * @vma: struct vm_area_struct passed to mmap
  *
- * Uses an arch specific callback, pci_mmap_legacy_page_range, to mmap
+ * Uses an arch specific callback, pci_mmap_legacy_mem_page_range, to mmap
  * legacy memory space (first meg of bus space) into application virtual
  * memory space.
  */
-int
+static int
 pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
                     struct vm_area_struct *vma)
 {
@@ -480,7 +480,90 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
                                                       struct device,
                                                      kobj));
 
-        return pci_mmap_legacy_page_range(bus, vma);
+        return pci_mmap_legacy_page_range(bus, vma, pci_mmap_mem);
+}
+
+/**
+ * pci_mmap_legacy_io - map legacy PCI IO into user memory space
+ * @kobj: kobject corresponding to device to be mapped
+ * @attr: struct bin_attribute for this file
+ * @vma: struct vm_area_struct passed to mmap
+ *
+ * Uses an arch specific callback, pci_mmap_legacy_io_page_range, to mmap
+ * legacy IO space (first meg of bus space) into application virtual
+ * memory space. Returns -ENOSYS if the operation isn't supported
+ */
+static int
+pci_mmap_legacy_io(struct kobject *kobj, struct bin_attribute *attr,
+                  struct vm_area_struct *vma)
+{
+        struct pci_bus *bus = to_pci_bus(container_of(kobj,
+                                                      struct device,
+                                                     kobj));
+
+        return pci_mmap_legacy_page_range(bus, vma, pci_mmap_io);
+}
+
+/**
+ * pci_create_legacy_files - create legacy I/O port and memory files
+ * @b: bus to create files under
+ *
+ * Some platforms allow access to legacy I/O port and ISA memory space on
+ * a per-bus basis.  This routine creates the files and ties them into
+ * their associated read, write and mmap files from pci-sysfs.c
+ *
+ * On error unwind, but don't propogate the error to the caller
+ * as it is ok to set up the PCI bus without these files.
+ */
+void pci_create_legacy_files(struct pci_bus *b)
+{
+       int error;
+
+       b->legacy_io = kzalloc(sizeof(struct bin_attribute) * 2,
+                              GFP_ATOMIC);
+       if (!b->legacy_io)
+               goto kzalloc_err;
+
+       b->legacy_io->attr.name = "legacy_io";
+       b->legacy_io->size = 0xffff;
+       b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
+       b->legacy_io->read = pci_read_legacy_io;
+       b->legacy_io->write = pci_write_legacy_io;
+       b->legacy_io->mmap = pci_mmap_legacy_io;
+       error = device_create_bin_file(&b->dev, b->legacy_io);
+       if (error)
+               goto legacy_io_err;
+
+       /* Allocated above after the legacy_io struct */
+       b->legacy_mem = b->legacy_io + 1;
+       b->legacy_mem->attr.name = "legacy_mem";
+       b->legacy_mem->size = 1024*1024;
+       b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
+       b->legacy_mem->mmap = pci_mmap_legacy_mem;
+       error = device_create_bin_file(&b->dev, b->legacy_mem);
+       if (error)
+               goto legacy_mem_err;
+
+       return;
+
+legacy_mem_err:
+       device_remove_bin_file(&b->dev, b->legacy_io);
+legacy_io_err:
+       kfree(b->legacy_io);
+       b->legacy_io = NULL;
+kzalloc_err:
+       printk(KERN_WARNING "pci: warning: could not create legacy I/O port "
+              "and ISA memory resources to sysfs\n");
+       return;
+}
+
+void pci_remove_legacy_files(struct pci_bus *b)
+{
+       if (b->legacy_io) {
+               device_remove_bin_file(&b->dev, b->legacy_io);
+               device_remove_bin_file(&b->dev, b->legacy_mem);
+               kfree(b->legacy_io); /* both are allocated here */
+       }
 }
 #endif /* HAVE_PCI_LEGACY */
 
@@ -715,7 +798,7 @@ static struct bin_attribute pci_config_attr = {
                .name = "config",
                .mode = S_IRUGO | S_IWUSR,
        },
-       .size = 256,
+       .size = PCI_CFG_SPACE_SIZE,
        .read = pci_read_config,
        .write = pci_write_config,
 };
@@ -725,7 +808,7 @@ static struct bin_attribute pcie_config_attr = {
                .name = "config",
                .mode = S_IRUGO | S_IWUSR,
        },
-       .size = 4096,
+       .size = PCI_CFG_SPACE_EXP_SIZE,
        .read = pci_read_config,
        .write = pci_write_config,
 };
@@ -735,86 +818,103 @@ int __attribute__ ((weak)) pcibios_add_platform_entries(struct pci_dev *dev)
        return 0;
 }
 
+static int pci_create_capabilities_sysfs(struct pci_dev *dev)
+{
+       int retval;
+       struct bin_attribute *attr;
+
+       /* If the device has VPD, try to expose it in sysfs. */
+       if (dev->vpd) {
+               attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
+               if (!attr)
+                       return -ENOMEM;
+
+               attr->size = dev->vpd->len;
+               attr->attr.name = "vpd";
+               attr->attr.mode = S_IRUSR | S_IWUSR;
+               attr->read = pci_read_vpd;
+               attr->write = pci_write_vpd;
+               retval = sysfs_create_bin_file(&dev->dev.kobj, attr);
+               if (retval) {
+                       kfree(dev->vpd->attr);
+                       return retval;
+               }
+               dev->vpd->attr = attr;
+       }
+
+       /* Active State Power Management */
+       pcie_aspm_create_sysfs_dev_files(dev);
+
+       return 0;
+}
+
 int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
 {
-       struct bin_attribute *attr = NULL;
        int retval;
+       int rom_size = 0;
+       struct bin_attribute *attr;
 
        if (!sysfs_initialized)
                return -EACCES;
 
-       if (pdev->cfg_size < 4096)
+       if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
                retval = sysfs_create_bin_file(&pdev->dev.kobj, &pci_config_attr);
        else
                retval = sysfs_create_bin_file(&pdev->dev.kobj, &pcie_config_attr);
        if (retval)
                goto err;
 
-       /* If the device has VPD, try to expose it in sysfs. */
-       if (pdev->vpd) {
-               attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
-               if (attr) {
-                       pdev->vpd->attr = attr;
-                       attr->size = pdev->vpd->len;
-                       attr->attr.name = "vpd";
-                       attr->attr.mode = S_IRUSR | S_IWUSR;
-                       attr->read = pci_read_vpd;
-                       attr->write = pci_write_vpd;
-                       retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
-                       if (retval)
-                               goto err_vpd;
-               } else {
-                       retval = -ENOMEM;
-                       goto err_config_file;
-               }
-       }
-
        retval = pci_create_resource_files(pdev);
        if (retval)
-               goto err_vpd_file;
+               goto err_config_file;
+
+       if (pci_resource_len(pdev, PCI_ROM_RESOURCE))
+               rom_size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+       else if (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)
+               rom_size = 0x20000;
 
        /* If the device has a ROM, try to expose it in sysfs. */
-       if (pci_resource_len(pdev, PCI_ROM_RESOURCE) ||
-           (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)) {
+       if (rom_size) {
                attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
-               if (attr) {
-                       pdev->rom_attr = attr;
-                       attr->size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
-                       attr->attr.name = "rom";
-                       attr->attr.mode = S_IRUSR;
-                       attr->read = pci_read_rom;
-                       attr->write = pci_write_rom;
-                       retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
-                       if (retval)
-                               goto err_rom;
-               } else {
+               if (!attr) {
                        retval = -ENOMEM;
                        goto err_resource_files;
                }
+               attr->size = rom_size;
+               attr->attr.name = "rom";
+               attr->attr.mode = S_IRUSR;
+               attr->read = pci_read_rom;
+               attr->write = pci_write_rom;
+               retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
+               if (retval) {
+                       kfree(attr);
+                       goto err_resource_files;
+               }
+               pdev->rom_attr = attr;
        }
+
        /* add platform-specific attributes */
-       if (pcibios_add_platform_entries(pdev))
+       retval = pcibios_add_platform_entries(pdev);
+       if (retval)
                goto err_rom_file;
 
-       pcie_aspm_create_sysfs_dev_files(pdev);
+       /* add sysfs entries for various capabilities */
+       retval = pci_create_capabilities_sysfs(pdev);
+       if (retval)
+               goto err_rom_file;
 
        return 0;
 
 err_rom_file:
-       if (pci_resource_len(pdev, PCI_ROM_RESOURCE))
+       if (rom_size) {
                sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
-err_rom:
-       kfree(pdev->rom_attr);
+               kfree(pdev->rom_attr);
+               pdev->rom_attr = NULL;
+       }
 err_resource_files:
        pci_remove_resource_files(pdev);
-err_vpd_file:
-       if (pdev->vpd) {
-               sysfs_remove_bin_file(&pdev->dev.kobj, pdev->vpd->attr);
-err_vpd:
-               kfree(pdev->vpd->attr);
-       }
 err_config_file:
-       if (pdev->cfg_size < 4096)
+       if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
                sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
        else
                sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr);
@@ -822,6 +922,16 @@ err:
        return retval;
 }
 
+static void pci_remove_capabilities_sysfs(struct pci_dev *dev)
+{
+       if (dev->vpd && dev->vpd->attr) {
+               sysfs_remove_bin_file(&dev->dev.kobj, dev->vpd->attr);
+               kfree(dev->vpd->attr);
+       }
+
+       pcie_aspm_remove_sysfs_dev_files(dev);
+}
+
 /**
  * pci_remove_sysfs_dev_files - cleanup PCI specific sysfs files
  * @pdev: device whose entries we should free
@@ -830,27 +940,28 @@ err:
  */
 void pci_remove_sysfs_dev_files(struct pci_dev *pdev)
 {
+       int rom_size = 0;
+
        if (!sysfs_initialized)
                return;
 
-       pcie_aspm_remove_sysfs_dev_files(pdev);
+       pci_remove_capabilities_sysfs(pdev);
 
-       if (pdev->vpd) {
-               sysfs_remove_bin_file(&pdev->dev.kobj, pdev->vpd->attr);
-               kfree(pdev->vpd->attr);
-       }
-       if (pdev->cfg_size < 4096)
+       if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
                sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
        else
                sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr);
 
        pci_remove_resource_files(pdev);
 
-       if (pci_resource_len(pdev, PCI_ROM_RESOURCE)) {
-               if (pdev->rom_attr) {
-                       sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
-                       kfree(pdev->rom_attr);
-               }
+       if (pci_resource_len(pdev, PCI_ROM_RESOURCE))
+               rom_size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+       else if (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)
+               rom_size = 0x20000;
+
+       if (rom_size && pdev->rom_attr) {
+               sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
+               kfree(pdev->rom_attr);
        }
 }
 
index dbe9f39f44363b3eb50d9cc7bcac0a50bd701315..4db261e13e69eb397ffce8dc3eeb95e9509cbf00 100644 (file)
@@ -213,10 +213,13 @@ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap)
 int pci_find_ext_capability(struct pci_dev *dev, int cap)
 {
        u32 header;
-       int ttl = 480; /* 3840 bytes, minimum 8 bytes per capability */
-       int pos = 0x100;
+       int ttl;
+       int pos = PCI_CFG_SPACE_SIZE;
 
-       if (dev->cfg_size <= 256)
+       /* minimum 8 bytes per capability */
+       ttl = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8;
+
+       if (dev->cfg_size <= PCI_CFG_SPACE_SIZE)
                return 0;
 
        if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL)
@@ -234,7 +237,7 @@ int pci_find_ext_capability(struct pci_dev *dev, int cap)
                        return pos;
 
                pos = PCI_EXT_CAP_NEXT(header);
-               if (pos < 0x100)
+               if (pos < PCI_CFG_SPACE_SIZE)
                        break;
 
                if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL)
@@ -1126,6 +1129,27 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable)
        return pme_done ? 0 : error;
 }
 
+/**
+ * pci_wake_from_d3 - enable/disable device to wake up from D3_hot or D3_cold
+ * @dev: PCI device to prepare
+ * @enable: True to enable wake-up event generation; false to disable
+ *
+ * Many drivers want the device to wake up the system from D3_hot or D3_cold
+ * and this function allows them to set that up cleanly - pci_enable_wake()
+ * should not be called twice in a row to enable wake-up due to PCI PM vs ACPI
+ * ordering constraints.
+ *
+ * This function only returns error code if the device is not capable of
+ * generating PME# from both D3_hot and D3_cold, and the platform is unable to
+ * enable wake-up power for it.
+ */
+int pci_wake_from_d3(struct pci_dev *dev, bool enable)
+{
+       return pci_pme_capable(dev, PCI_D3cold) ?
+                       pci_enable_wake(dev, PCI_D3cold, enable) :
+                       pci_enable_wake(dev, PCI_D3hot, enable);
+}
+
 /**
  * pci_target_state - find an appropriate low power state for a given PCI dev
  * @dev: PCI device
@@ -1242,25 +1266,25 @@ void pci_pm_init(struct pci_dev *dev)
        dev->d1_support = false;
        dev->d2_support = false;
        if (!pci_no_d1d2(dev)) {
-               if (pmc & PCI_PM_CAP_D1) {
-                       dev_printk(KERN_DEBUG, &dev->dev, "supports D1\n");
+               if (pmc & PCI_PM_CAP_D1)
                        dev->d1_support = true;
-               }
-               if (pmc & PCI_PM_CAP_D2) {
-                       dev_printk(KERN_DEBUG, &dev->dev, "supports D2\n");
+               if (pmc & PCI_PM_CAP_D2)
                        dev->d2_support = true;
-               }
+
+               if (dev->d1_support || dev->d2_support)
+                       dev_printk(KERN_DEBUG, &dev->dev, "supports%s%s\n",
+                                  dev->d1_support ? " D1" : "",
+                                  dev->d2_support ? " D2" : "");
        }
 
        pmc &= PCI_PM_CAP_PME_MASK;
        if (pmc) {
-               dev_printk(KERN_INFO, &dev->dev,
-                       "PME# supported from%s%s%s%s%s\n",
-                       (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "",
-                       (pmc & PCI_PM_CAP_PME_D1) ? " D1" : "",
-                       (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "",
-                       (pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "",
-                       (pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : "");
+               dev_info(&dev->dev, "PME# supported from%s%s%s%s%s\n",
+                        (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "",
+                        (pmc & PCI_PM_CAP_PME_D1) ? " D1" : "",
+                        (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "",
+                        (pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "",
+                        (pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : "");
                dev->pme_support = pmc >> PCI_PM_CAP_PME_SHIFT;
                /*
                 * Make device's PM flags reflect the wake-up capability, but
@@ -1275,6 +1299,38 @@ void pci_pm_init(struct pci_dev *dev)
        }
 }
 
+/**
+ * pci_enable_ari - enable ARI forwarding if hardware support it
+ * @dev: the PCI device
+ */
+void pci_enable_ari(struct pci_dev *dev)
+{
+       int pos;
+       u32 cap;
+       u16 ctrl;
+
+       if (!dev->is_pcie)
+               return;
+
+       if (dev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
+           dev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
+               return;
+
+       pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+       if (!pos)
+               return;
+
+       pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap);
+       if (!(cap & PCI_EXP_DEVCAP2_ARI))
+               return;
+
+       pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+       ctrl |= PCI_EXP_DEVCTL2_ARI;
+       pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+
+       dev->ari_enabled = 1;
+}
+
 int
 pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
 {
@@ -1942,6 +1998,7 @@ EXPORT_SYMBOL(pci_restore_state);
 EXPORT_SYMBOL(pci_pme_capable);
 EXPORT_SYMBOL(pci_pme_active);
 EXPORT_SYMBOL(pci_enable_wake);
+EXPORT_SYMBOL(pci_wake_from_d3);
 EXPORT_SYMBOL(pci_target_state);
 EXPORT_SYMBOL(pci_prepare_to_sleep);
 EXPORT_SYMBOL(pci_back_from_sleep);
index d807cd786f20a18419105df88d8cf816019cab48..b205ab866a1df72c4c659052fe2742a43d99124e 100644 (file)
@@ -1,3 +1,9 @@
+#ifndef DRIVERS_PCI_H
+#define DRIVERS_PCI_H
+
+#define PCI_CFG_SPACE_SIZE     256
+#define PCI_CFG_SPACE_EXP_SIZE 4096
+
 /* Functions internal to the PCI core code */
 
 extern int pci_uevent(struct device *dev, struct kobj_uevent_env *env);
@@ -76,7 +82,13 @@ static inline int pci_proc_detach_bus(struct pci_bus *bus) { return 0; }
 /* Functions for PCI Hotplug drivers to use */
 extern unsigned int pci_do_scan_bus(struct pci_bus *bus);
 
+#ifdef HAVE_PCI_LEGACY
+extern void pci_create_legacy_files(struct pci_bus *bus);
 extern void pci_remove_legacy_files(struct pci_bus *bus);
+#else
+static inline void pci_create_legacy_files(struct pci_bus *bus) { return; }
+static inline void pci_remove_legacy_files(struct pci_bus *bus) { return; }
+#endif
 
 /* Lock for read/write access to pci device and bus lists */
 extern struct rw_semaphore pci_bus_sem;
@@ -109,6 +121,7 @@ static inline int pci_no_d1d2(struct pci_dev *dev)
 extern int pcie_mch_quirk;
 extern struct device_attribute pci_dev_attrs[];
 extern struct device_attribute dev_attr_cpuaffinity;
+extern struct device_attribute dev_attr_cpulistaffinity;
 
 /**
  * pci_match_one_device - Tell if a PCI device structure has a matching
@@ -144,3 +157,16 @@ struct pci_slot_attribute {
 };
 #define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr)
 
+extern void pci_enable_ari(struct pci_dev *dev);
+/**
+ * pci_ari_enabled - query ARI forwarding status
+ * @dev: the PCI device
+ *
+ * Returns 1 if ARI forwarding is enabled, or 0 if not enabled;
+ */
+static inline int pci_ari_enabled(struct pci_dev *dev)
+{
+       return dev->ari_enabled;
+}
+
+#endif /* DRIVERS_PCI_H */
index 77036f46acfe22e1cfdebeb3613cb6f3527b6df2..e390707661dde323e8beafc4bf96e651a843dd06 100644 (file)
@@ -105,7 +105,7 @@ static irqreturn_t aer_irq(int irq, void *context)
        unsigned long flags;
        int pos;
 
-       pos = pci_find_aer_capability(pdev->port);
+       pos = pci_find_ext_capability(pdev->port, PCI_EXT_CAP_ID_ERR);
        /*
         * Must lock access to Root Error Status Reg, Root Error ID Reg,
         * and Root error producer/consumer index
@@ -252,7 +252,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
        u32 status;
        int pos;
 
-       pos = pci_find_aer_capability(dev);
+       pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 
        /* Disable Root's interrupt in response to error messages */
        pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, 0);
@@ -316,7 +316,7 @@ static void aer_error_resume(struct pci_dev *dev)
        pci_write_config_word(dev, pos + PCI_EXP_DEVSTA, reg16);
 
        /* Clean AER Root Error Status */
-       pos = pci_find_aer_capability(dev);
+       pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
        pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
        pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
        if (dev->error_state == pci_channel_io_normal)
index ee5e7b5176d0f5d0986fbcb04050e37ff8b324ee..dfc63d01f20a0a7c072f9a65039f2776537b3e8e 100644 (file)
 static int forceload;
 module_param(forceload, bool, 0);
 
-#define PCI_CFG_SPACE_SIZE     (0x100)
-int pci_find_aer_capability(struct pci_dev *dev)
-{
-       int pos;
-       u32 reg32 = 0;
-
-       /* Check if it's a pci-express device */
-       pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
-       if (!pos)
-               return 0;
-
-       /* Check if it supports pci-express AER */
-       pos = PCI_CFG_SPACE_SIZE;
-       while (pos) {
-               if (pci_read_config_dword(dev, pos, &reg32))
-                       return 0;
-
-               /* some broken boards return ~0 */
-               if (reg32 == 0xffffffff)
-                       return 0;
-
-               if (PCI_EXT_CAP_ID(reg32) == PCI_EXT_CAP_ID_ERR)
-                       break;
-
-               pos = reg32 >> 20;
-       }
-
-       return pos;
-}
-
 int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 {
        u16 reg16 = 0;
        int pos;
 
+       pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+       if (!pos)
+               return -EIO;
+
        pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
        if (!pos)
                return -EIO;
@@ -102,7 +76,7 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
        int pos;
        u32 status, mask;
 
-       pos = pci_find_aer_capability(dev);
+       pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
        if (!pos)
                return -EIO;
 
@@ -123,7 +97,7 @@ int pci_cleanup_aer_correct_error_status(struct pci_dev *dev)
        int pos;
        u32 status;
 
-       pos = pci_find_aer_capability(dev);
+       pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
        if (!pos)
                return -EIO;
 
@@ -502,7 +476,7 @@ static void handle_error_source(struct pcie_device * aerdev,
                 * Correctable error does not need software intevention.
                 * No need to go through error recovery process.
                 */
-               pos = pci_find_aer_capability(dev);
+               pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
                if (pos)
                        pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
                                        info.status);
@@ -542,7 +516,7 @@ void aer_enable_rootport(struct aer_rpc *rpc)
        reg16 &= ~(SYSTEM_ERROR_INTR_ON_MESG_MASK);
        pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, reg16);
 
-       aer_pos = pci_find_aer_capability(pdev);
+       aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
        /* Clear error status */
        pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
        pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
@@ -579,7 +553,7 @@ static void disable_root_aer(struct aer_rpc *rpc)
        u32 reg32;
        int pos;
 
-       pos = pci_find_aer_capability(pdev);
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
        /* Disable Root's interrupt in response to error messages */
        pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, 0);
 
@@ -618,7 +592,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
 {
        int pos;
 
-       pos = pci_find_aer_capability(dev);
+       pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
 
        /* The device might not support AER */
        if (!pos)
@@ -755,7 +729,6 @@ int aer_init(struct pcie_device *dev)
        return AER_SUCCESS;
 }
 
-EXPORT_SYMBOL_GPL(pci_find_aer_capability);
 EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
 EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
 EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
index 851f5b83cdbc3f62f352627d0ee91848fc9923c0..8f63f4c6b85f64e3f4d91d76ca7da90ac142c51e 100644 (file)
@@ -528,9 +528,9 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
                pci_read_config_dword(child_dev, child_pos + PCI_EXP_DEVCAP,
                        &reg32);
                if (!(reg32 & PCI_EXP_DEVCAP_RBER) && !aspm_force) {
-                       printk("Pre-1.1 PCIe device detected, "
-                               "disable ASPM for %s. It can be enabled forcedly"
-                               " with 'pcie_aspm=force'\n", pci_name(pdev));
+                       dev_printk(KERN_INFO, &child_dev->dev, "disabling ASPM"
+                               " on pre-1.1 PCIe device.  You can enable it"
+                               " with 'pcie_aspm=force'\n");
                        return -EINVAL;
                }
        }
index 3656e0349dd1d6ee4c6917203298b0d3b635d486..2529f3f2ea5a246ad3134307eb0e50d4c73d00a6 100644 (file)
@@ -25,7 +25,6 @@
 #define PCIE_CAPABILITIES_REG          0x2
 #define PCIE_SLOT_CAPABILITIES_REG     0x14
 #define PCIE_PORT_DEVICE_MAXSERVICES   4
-#define PCI_CFG_SPACE_SIZE             256
 
 #define get_descriptor_id(type, service) (((type - 4) << 4) | service)
 
index 890f0d2b370af7003625326075073f7641e549ce..2e091e014829e61366ec69b1267d8d9f05e58111 100644 (file)
@@ -195,24 +195,11 @@ static int get_port_device_capability(struct pci_dev *dev)
        /* PME Capable - root port capability */
        if (((reg16 >> 4) & PORT_TYPE_MASK) == PCIE_RC_PORT)
                services |= PCIE_PORT_SERVICE_PME;
-       
-       pos = PCI_CFG_SPACE_SIZE;
-       while (pos) {
-               pci_read_config_dword(dev, pos, &reg32);
-               switch (reg32 & 0xffff) {
-               case PCI_EXT_CAP_ID_ERR:
-                       services |= PCIE_PORT_SERVICE_AER;
-                       pos = reg32 >> 20;
-                       break;
-               case PCI_EXT_CAP_ID_VC:
-                       services |= PCIE_PORT_SERVICE_VC;
-                       pos = reg32 >> 20;
-                       break;
-               default:
-                       pos = 0;
-                       break;
-               }
-       }
+
+       if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))
+               services |= PCIE_PORT_SERVICE_AER;
+       if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC))
+               services |= PCIE_PORT_SERVICE_VC;
 
        return services;
 }
index 367c9c20000dd59dfdd8e1130be182ed2b3fde51..584422da8d8b3e013934edfd1b5b255f2b2775a2 100644 (file)
@@ -91,7 +91,7 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev,
        
        pci_set_master(dev);
         if (!dev->irq && dev->pin) {
-               dev_warn(&dev->dev, "device [%04x/%04x] has invalid IRQ; "
+               dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; "
                         "check vendor BIOS\n", dev->vendor, dev->device);
        }
        if (pcie_port_device_register(dev)) {
index d3db8b24972995750eef5043b2ca700e779dedeb..aaaf0a1fed223364b82bd4e79d9fd29f2c178788 100644 (file)
@@ -14,8 +14,6 @@
 
 #define CARDBUS_LATENCY_TIMER  176     /* secondary latency timer */
 #define CARDBUS_RESERVE_BUSNR  3
-#define PCI_CFG_SPACE_SIZE     256
-#define PCI_CFG_SPACE_EXP_SIZE 4096
 
 /* Ugh.  Need to stop exporting this to modules. */
 LIST_HEAD(pci_root_buses);
@@ -44,72 +42,6 @@ int no_pci_devices(void)
 }
 EXPORT_SYMBOL(no_pci_devices);
 
-#ifdef HAVE_PCI_LEGACY
-/**
- * pci_create_legacy_files - create legacy I/O port and memory files
- * @b: bus to create files under
- *
- * Some platforms allow access to legacy I/O port and ISA memory space on
- * a per-bus basis.  This routine creates the files and ties them into
- * their associated read, write and mmap files from pci-sysfs.c
- *
- * On error unwind, but don't propogate the error to the caller
- * as it is ok to set up the PCI bus without these files.
- */
-static void pci_create_legacy_files(struct pci_bus *b)
-{
-       int error;
-
-       b->legacy_io = kzalloc(sizeof(struct bin_attribute) * 2,
-                              GFP_ATOMIC);
-       if (!b->legacy_io)
-               goto kzalloc_err;
-
-       b->legacy_io->attr.name = "legacy_io";
-       b->legacy_io->size = 0xffff;
-       b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
-       b->legacy_io->read = pci_read_legacy_io;
-       b->legacy_io->write = pci_write_legacy_io;
-       error = device_create_bin_file(&b->dev, b->legacy_io);
-       if (error)
-               goto legacy_io_err;
-
-       /* Allocated above after the legacy_io struct */
-       b->legacy_mem = b->legacy_io + 1;
-       b->legacy_mem->attr.name = "legacy_mem";
-       b->legacy_mem->size = 1024*1024;
-       b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
-       b->legacy_mem->mmap = pci_mmap_legacy_mem;
-       error = device_create_bin_file(&b->dev, b->legacy_mem);
-       if (error)
-               goto legacy_mem_err;
-
-       return;
-
-legacy_mem_err:
-       device_remove_bin_file(&b->dev, b->legacy_io);
-legacy_io_err:
-       kfree(b->legacy_io);
-       b->legacy_io = NULL;
-kzalloc_err:
-       printk(KERN_WARNING "pci: warning: could not create legacy I/O port "
-              "and ISA memory resources to sysfs\n");
-       return;
-}
-
-void pci_remove_legacy_files(struct pci_bus *b)
-{
-       if (b->legacy_io) {
-               device_remove_bin_file(&b->dev, b->legacy_io);
-               device_remove_bin_file(&b->dev, b->legacy_mem);
-               kfree(b->legacy_io); /* both are allocated here */
-       }
-}
-#else /* !HAVE_PCI_LEGACY */
-static inline void pci_create_legacy_files(struct pci_bus *bus) { return; }
-void pci_remove_legacy_files(struct pci_bus *bus) { return; }
-#endif /* HAVE_PCI_LEGACY */
-
 /*
  * PCI Bus Class Devices
  */
@@ -219,7 +151,7 @@ static inline enum pci_bar_type decode_bar(struct resource *res, u32 bar)
 
        res->flags = bar & ~PCI_BASE_ADDRESS_MEM_MASK;
 
-       if (res->flags == PCI_BASE_ADDRESS_MEM_TYPE_64)
+       if (res->flags & PCI_BASE_ADDRESS_MEM_TYPE_64)
                return pci_bar_mem64;
        return pci_bar_mem32;
 }
@@ -304,8 +236,8 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
                } else {
                        res->start = l64;
                        res->end = l64 + sz64;
-                       printk(KERN_DEBUG "PCI: %s reg %x 64bit mmio: %pR\n",
-                               pci_name(dev), pos, res);
+                       dev_printk(KERN_DEBUG, &dev->dev,
+                               "reg %x 64bit mmio: %pR\n", pos, res);
                }
        } else {
                sz = pci_size(l, sz, mask);
@@ -315,10 +247,10 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 
                res->start = l;
                res->end = l + sz;
-               printk(KERN_DEBUG "PCI: %s reg %x %s: %pR\n",
-                      pci_name(dev), pos,
-                      (res->flags & IORESOURCE_IO) ? "io port":"32bit mmio",
-                      res);
+
+               dev_printk(KERN_DEBUG, &dev->dev, "reg %x %s: %pR\n", pos,
+                       (res->flags & IORESOURCE_IO) ? "io port" : "32bit mmio",
+                       res);
        }
 
  out:
@@ -389,8 +321,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
                        res->start = base;
                if (!res->end)
                        res->end = limit + 0xfff;
-               printk(KERN_DEBUG "PCI: bridge %s io port: %pR\n",
-                      pci_name(dev), res);
+               dev_printk(KERN_DEBUG, &dev->dev, "bridge io port: %pR\n", res);
        }
 
        res = child->resource[1];
@@ -402,8 +333,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
                res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
                res->start = base;
                res->end = limit + 0xfffff;
-               printk(KERN_DEBUG "PCI: bridge %s 32bit mmio: %pR\n",
-                      pci_name(dev), res);
+               dev_printk(KERN_DEBUG, &dev->dev, "bridge 32bit mmio: %pR\n",
+                       res);
        }
 
        res = child->resource[2];
@@ -439,9 +370,9 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
                res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
                res->start = base;
                res->end = limit + 0xfffff;
-               printk(KERN_DEBUG "PCI: bridge %s %sbit mmio pref: %pR\n",
-                      pci_name(dev),
-                      (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64":"32", res);
+               dev_printk(KERN_DEBUG, &dev->dev, "bridge %sbit mmio pref: %pR\n",
+                       (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64" : "32",
+                       res);
        }
 }
 
@@ -762,7 +693,7 @@ static int pci_setup_device(struct pci_dev * dev)
        dev->class = class;
        class >>= 8;
 
-       dev_dbg(&dev->dev, "found [%04x/%04x] class %06x header type %02x\n",
+       dev_dbg(&dev->dev, "found [%04x:%04x] class %06x header type %02x\n",
                 dev->vendor, dev->device, class, dev->hdr_type);
 
        /* "Unknown power state" */
@@ -844,6 +775,11 @@ static int pci_setup_device(struct pci_dev * dev)
        return 0;
 }
 
+static void pci_release_capabilities(struct pci_dev *dev)
+{
+       pci_vpd_release(dev);
+}
+
 /**
  * pci_release_dev - free a pci device structure when all users of it are finished.
  * @dev: device that's been disconnected
@@ -856,7 +792,7 @@ static void pci_release_dev(struct device *dev)
        struct pci_dev *pci_dev;
 
        pci_dev = to_pci_dev(dev);
-       pci_vpd_release(pci_dev);
+       pci_release_capabilities(pci_dev);
        kfree(pci_dev);
 }
 
@@ -887,8 +823,9 @@ static void set_pcie_port_type(struct pci_dev *pdev)
 int pci_cfg_space_size_ext(struct pci_dev *dev)
 {
        u32 status;
+       int pos = PCI_CFG_SPACE_SIZE;
 
-       if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL)
+       if (pci_read_config_dword(dev, pos, &status) != PCIBIOS_SUCCESSFUL)
                goto fail;
        if (status == 0xffffffff)
                goto fail;
@@ -936,8 +873,6 @@ struct pci_dev *alloc_pci_dev(void)
 
        INIT_LIST_HEAD(&dev->bus_list);
 
-       pci_msi_init_pci_dev(dev);
-
        return dev;
 }
 EXPORT_SYMBOL(alloc_pci_dev);
@@ -949,6 +884,7 @@ EXPORT_SYMBOL(alloc_pci_dev);
 static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 {
        struct pci_dev *dev;
+       struct pci_slot *slot;
        u32 l;
        u8 hdr_type;
        int delay = 1;
@@ -997,6 +933,10 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
        dev->error_state = pci_channel_io_normal;
        set_pcie_port_type(dev);
 
+       list_for_each_entry(slot, &bus->slots, list)
+               if (PCI_SLOT(devfn) == slot->number)
+                       dev->slot = slot;
+
        /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
           set this higher, assuming the system even supports it.  */
        dev->dma_mask = 0xffffffff;
@@ -1005,9 +945,22 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
                return NULL;
        }
 
+       return dev;
+}
+
+static void pci_init_capabilities(struct pci_dev *dev)
+{
+       /* MSI/MSI-X list */
+       pci_msi_init_pci_dev(dev);
+
+       /* Power Management */
+       pci_pm_init(dev);
+
+       /* Vital Product Data */
        pci_vpd_pci22_init(dev);
 
-       return dev;
+       /* Alternative Routing-ID Forwarding */
+       pci_enable_ari(dev);
 }
 
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
@@ -1026,8 +979,8 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
        /* Fix up broken headers */
        pci_fixup_device(pci_fixup_header, dev);
 
-       /* Initialize power management of the device */
-       pci_pm_init(dev);
+       /* Initialize various capabilities */
+       pci_init_capabilities(dev);
 
        /*
         * Add the device to our list of discovered devices
index 832175d9ca2505e7daa4b4bef302cff1cb239efa..96cf8ecd04ce7b40ea60fc9e1a077607a8c46d77 100644 (file)
 #include <linux/kallsyms.h>
 #include "pci.h"
 
+int isa_dma_bridge_buggy;
+EXPORT_SYMBOL(isa_dma_bridge_buggy);
+int pci_pci_problems;
+EXPORT_SYMBOL(pci_pci_problems);
+int pcie_mch_quirk;
+EXPORT_SYMBOL(pcie_mch_quirk);
+
+#ifdef CONFIG_PCI_QUIRKS
 /* The Mellanox Tavor device gives false positive parity errors
  * Mark this device with a broken_parity_status, to allow
  * PCI scanning code to "skip" this now blacklisted device.
@@ -76,8 +84,6 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441,      quirk_p
     
     This appears to be BIOS not version dependent. So presumably there is a 
     chipset level fix */
-int isa_dma_bridge_buggy;
-EXPORT_SYMBOL(isa_dma_bridge_buggy);
     
 static void __devinit quirk_isa_dma_hangs(struct pci_dev *dev)
 {
@@ -98,9 +104,6 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,   PCI_DEVICE_ID_NEC_CBUS_1,       quirk_isa_d
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,     PCI_DEVICE_ID_NEC_CBUS_2,       quirk_isa_dma_hangs);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC,     PCI_DEVICE_ID_NEC_CBUS_3,       quirk_isa_dma_hangs);
 
-int pci_pci_problems;
-EXPORT_SYMBOL(pci_pci_problems);
-
 /*
  *     Chipsets where PCI->PCI transfers vanish or hang
  */
@@ -1376,9 +1379,6 @@ static void __init quirk_alder_ioapic(struct pci_dev *pdev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_EESSC,      quirk_alder_ioapic);
 #endif
 
-int pcie_mch_quirk;
-EXPORT_SYMBOL(pcie_mch_quirk);
-
 static void __devinit quirk_pcie_mch(struct pci_dev *pdev)
 {
        pcie_mch_quirk = 1;
@@ -1569,84 +1569,6 @@ static void __devinit fixup_rev1_53c810(struct pci_dev* dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, fixup_rev1_53c810);
 
-static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end)
-{
-       while (f < end) {
-               if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
-                   (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
-#ifdef DEBUG
-                       dev_dbg(&dev->dev, "calling %pF\n", f->hook);
-#endif
-                       f->hook(dev);
-               }
-               f++;
-       }
-}
-
-extern struct pci_fixup __start_pci_fixups_early[];
-extern struct pci_fixup __end_pci_fixups_early[];
-extern struct pci_fixup __start_pci_fixups_header[];
-extern struct pci_fixup __end_pci_fixups_header[];
-extern struct pci_fixup __start_pci_fixups_final[];
-extern struct pci_fixup __end_pci_fixups_final[];
-extern struct pci_fixup __start_pci_fixups_enable[];
-extern struct pci_fixup __end_pci_fixups_enable[];
-extern struct pci_fixup __start_pci_fixups_resume[];
-extern struct pci_fixup __end_pci_fixups_resume[];
-extern struct pci_fixup __start_pci_fixups_resume_early[];
-extern struct pci_fixup __end_pci_fixups_resume_early[];
-extern struct pci_fixup __start_pci_fixups_suspend[];
-extern struct pci_fixup __end_pci_fixups_suspend[];
-
-
-void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
-{
-       struct pci_fixup *start, *end;
-
-       switch(pass) {
-       case pci_fixup_early:
-               start = __start_pci_fixups_early;
-               end = __end_pci_fixups_early;
-               break;
-
-       case pci_fixup_header:
-               start = __start_pci_fixups_header;
-               end = __end_pci_fixups_header;
-               break;
-
-       case pci_fixup_final:
-               start = __start_pci_fixups_final;
-               end = __end_pci_fixups_final;
-               break;
-
-       case pci_fixup_enable:
-               start = __start_pci_fixups_enable;
-               end = __end_pci_fixups_enable;
-               break;
-
-       case pci_fixup_resume:
-               start = __start_pci_fixups_resume;
-               end = __end_pci_fixups_resume;
-               break;
-
-       case pci_fixup_resume_early:
-               start = __start_pci_fixups_resume_early;
-               end = __end_pci_fixups_resume_early;
-               break;
-
-       case pci_fixup_suspend:
-               start = __start_pci_fixups_suspend;
-               end = __end_pci_fixups_suspend;
-               break;
-
-       default:
-               /* stupid compiler warning, you would think with an enum... */
-               return;
-       }
-       pci_do_fixups(dev, start, end);
-}
-EXPORT_SYMBOL(pci_fixup_device);
-
 /* Enable 1k I/O space granularity on the Intel P64H2 */
 static void __devinit quirk_p64h2_1k_io(struct pci_dev *dev)
 {
@@ -2020,3 +1942,82 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4375,
                        quirk_msi_intx_disable_bug);
 
 #endif /* CONFIG_PCI_MSI */
+
+static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end)
+{
+       while (f < end) {
+               if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
+                   (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
+                       dev_dbg(&dev->dev, "calling %pF\n", f->hook);
+                       f->hook(dev);
+               }
+               f++;
+       }
+}
+
+extern struct pci_fixup __start_pci_fixups_early[];
+extern struct pci_fixup __end_pci_fixups_early[];
+extern struct pci_fixup __start_pci_fixups_header[];
+extern struct pci_fixup __end_pci_fixups_header[];
+extern struct pci_fixup __start_pci_fixups_final[];
+extern struct pci_fixup __end_pci_fixups_final[];
+extern struct pci_fixup __start_pci_fixups_enable[];
+extern struct pci_fixup __end_pci_fixups_enable[];
+extern struct pci_fixup __start_pci_fixups_resume[];
+extern struct pci_fixup __end_pci_fixups_resume[];
+extern struct pci_fixup __start_pci_fixups_resume_early[];
+extern struct pci_fixup __end_pci_fixups_resume_early[];
+extern struct pci_fixup __start_pci_fixups_suspend[];
+extern struct pci_fixup __end_pci_fixups_suspend[];
+
+
+void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
+{
+       struct pci_fixup *start, *end;
+
+       switch(pass) {
+       case pci_fixup_early:
+               start = __start_pci_fixups_early;
+               end = __end_pci_fixups_early;
+               break;
+
+       case pci_fixup_header:
+               start = __start_pci_fixups_header;
+               end = __end_pci_fixups_header;
+               break;
+
+       case pci_fixup_final:
+               start = __start_pci_fixups_final;
+               end = __end_pci_fixups_final;
+               break;
+
+       case pci_fixup_enable:
+               start = __start_pci_fixups_enable;
+               end = __end_pci_fixups_enable;
+               break;
+
+       case pci_fixup_resume:
+               start = __start_pci_fixups_resume;
+               end = __end_pci_fixups_resume;
+               break;
+
+       case pci_fixup_resume_early:
+               start = __start_pci_fixups_resume_early;
+               end = __end_pci_fixups_resume_early;
+               break;
+
+       case pci_fixup_suspend:
+               start = __start_pci_fixups_suspend;
+               end = __end_pci_fixups_suspend;
+               break;
+
+       default:
+               /* stupid compiler warning, you would think with an enum... */
+               return;
+       }
+       pci_do_fixups(dev, start, end);
+}
+#else
+void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) {}
+#endif
+EXPORT_SYMBOL(pci_fixup_device);
index bdc2a44d68e1dbc4859c7d1628a24411ca0802ee..042e08924421bbd8d3c773a1ee40dfb50d9a2a1e 100644 (file)
@@ -73,6 +73,7 @@ void pci_remove_bus(struct pci_bus *pci_bus)
        up_write(&pci_bus_sem);
        pci_remove_legacy_files(pci_bus);
        device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity);
+       device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity);
        device_unregister(&pci_bus->dev);
 }
 EXPORT_SYMBOL(pci_remove_bus);
@@ -114,13 +115,9 @@ void pci_remove_behind_bridge(struct pci_dev *dev)
 {
        struct list_head *l, *n;
 
-       if (dev->subordinate) {
-               list_for_each_safe(l, n, &dev->subordinate->devices) {
-                       struct pci_dev *dev = pci_dev_b(l);
-
-                       pci_remove_bus_device(dev);
-               }
-       }
+       if (dev->subordinate)
+               list_for_each_safe(l, n, &dev->subordinate->devices)
+                       pci_remove_bus_device(pci_dev_b(l));
 }
 
 static void pci_stop_bus_devices(struct pci_bus *bus)
index 471a429d7a20fec7b116537ed5e90c912da8b7b7..ea979f2bc6db0ecdc6f6c355e9926c59f908cfd7 100644 (file)
@@ -299,7 +299,7 @@ static void pbus_size_io(struct pci_bus *bus)
 
                        if (r->parent || !(r->flags & IORESOURCE_IO))
                                continue;
-                       r_size = r->end - r->start + 1;
+                       r_size = resource_size(r);
 
                        if (r_size < 0x400)
                                /* Might be re-aligned for ISA */
@@ -350,7 +350,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 
                        if (r->parent || (r->flags & mask) != type)
                                continue;
-                       r_size = r->end - r->start + 1;
+                       r_size = resource_size(r);
                        /* For bridges size != alignment */
                        align = resource_alignment(r);
                        order = __ffs(align) - 20;
index d4b5c690eaa776112c46cc8f9ca993a49e233d50..2dbd96cce2d8469e82198ec3d29ae3604ef104de 100644 (file)
@@ -129,7 +129,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
        resource_size_t size, min, align;
        int ret;
 
-       size = res->end - res->start + 1;
+       size = resource_size(res);
        min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
 
        align = resource_alignment(res);
index 7e5b85cbd9488936b9ecbd10928dde86618f842c..0c6db03698eaea6829c96aa03617f063e118daf6 100644 (file)
@@ -49,11 +49,16 @@ static ssize_t address_read_file(struct pci_slot *slot, char *buf)
 
 static void pci_slot_release(struct kobject *kobj)
 {
+       struct pci_dev *dev;
        struct pci_slot *slot = to_pci_slot(kobj);
 
        pr_debug("%s: releasing pci_slot on %x:%d\n", __func__,
                 slot->bus->number, slot->number);
 
+       list_for_each_entry(dev, &slot->bus->devices, bus_list)
+               if (PCI_SLOT(dev->devfn) == slot->number)
+                       dev->slot = NULL;
+
        list_del(&slot->list);
 
        kfree(slot);
@@ -108,6 +113,7 @@ static struct kobj_type pci_slot_ktype = {
 struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
                                 const char *name)
 {
+       struct pci_dev *dev;
        struct pci_slot *slot;
        int err;
 
@@ -150,6 +156,10 @@ placeholder:
        INIT_LIST_HEAD(&slot->list);
        list_add(&slot->list, &parent->slots);
 
+       list_for_each_entry(dev, &parent->devices, bus_list)
+               if (PCI_SLOT(dev->devfn) == slot_nr)
+                       dev->slot = slot;
+
        /* Don't care if debug printk has a -1 for slot_nr */
        pr_debug("%s: created pci_slot on %04x:%02x:%02x\n",
                 __func__, pci_domain_nr(parent), parent->number, slot_nr);
index a0ffb8ebfe00765cdf6fcf84e94f5dd5662844ee..9e1140f085fdfb70adca24d75e33d2a5b007b8e7 100644 (file)
@@ -273,7 +273,7 @@ static int __init at91_cf_probe(struct platform_device *pdev)
                        goto fail0d;
                cf->socket.pci_irq = board->irq_pin;
        } else
-               cf->socket.pci_irq = NR_IRQS + 1;
+               cf->socket.pci_irq = nr_irqs + 1;
 
        /* pcmcia layer only remaps "real" memory not iospace */
        cf->socket.io_offset = (unsigned long)
index 117dc12ab4380060889814b4ca0b1d8fffb317d0..9ef69cdb3183b1af1a2c40ddf53ef497fdcf7cb0 100644 (file)
@@ -233,15 +233,18 @@ static struct hw_interrupt_type hd64465_ss_irq_type = {
  */
 static void hs_map_irq(hs_socket_t *sp, unsigned int irq)
 {
+       struct irq_desc *desc;
+
        DPRINTK("hs_map_irq(sock=%d irq=%d)\n", sp->number, irq);
        
        if (irq >= HS_NUM_MAPPED_IRQS)
            return;
 
+       desc = irq_to_desc(irq);
        hs_mapped_irq[irq].sock = sp;
        /* insert ourselves as the irq controller */
-       hs_mapped_irq[irq].old_handler = irq_desc[irq].chip;
-       irq_desc[irq].chip = &hd64465_ss_irq_type;
+       hs_mapped_irq[irq].old_handler = desc->chip;
+       desc->chip = &hd64465_ss_irq_type;
 }
 
 
@@ -250,13 +253,16 @@ static void hs_map_irq(hs_socket_t *sp, unsigned int irq)
  */
 static void hs_unmap_irq(hs_socket_t *sp, unsigned int irq)
 {
+       struct irq_desc *desc;
+
        DPRINTK("hs_unmap_irq(sock=%d irq=%d)\n", sp->number, irq);
        
        if (irq >= HS_NUM_MAPPED_IRQS)
            return;
                
+       desc = irq_to_desc(irq);
        /* restore the original irq controller */
-       irq_desc[irq].chip = hs_mapped_irq[irq].old_handler;
+       desc->chip = hs_mapped_irq[irq].old_handler;
 }
 
 /*============================================================*/
index eee2f1cb213c76e35726b6c09df0eed9d6db060e..b2c412419059f9f5d03117305323378e784fb266 100644 (file)
@@ -639,7 +639,7 @@ static int __devinit vrc4171_card_setup(char *options)
                int irq;
                options += 4;
                irq = simple_strtoul(options, &options, 0);
-               if (irq >= 0 && irq < NR_IRQS)
+               if (irq >= 0 && irq < nr_irqs)
                        vrc4171_irq = irq;
 
                if (*options != ',')
index f660ef3e5b29e801f205eb06358cf54f26490e25..814f49fde530ea3bf49210222a4e6d2f23ce8617 100644 (file)
@@ -610,6 +610,14 @@ config RTC_DRV_RS5C313
        help
          If you say yes here you get support for the Ricoh RS5C313 RTC chips.
 
+config RTC_DRV_PARISC
+       tristate "PA-RISC firmware RTC support"
+       depends on PARISC
+       help
+         Say Y or M here to enable RTC support on PA-RISC systems using
+         firmware calls. If you do not know what you are doing, you should
+         just say Y.
+
 config RTC_DRV_PPC
        tristate "PowerPC machine dependent RTC support"
        depends on PPC
index d05928b3ca9466b7402a2392f555e32f3051ad31..d6a9ac7176eabed994db8af97d70794e45c1a143 100644 (file)
@@ -51,6 +51,7 @@ obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o
 obj-$(CONFIG_RTC_DRV_PCF8583)  += rtc-pcf8583.o
 obj-$(CONFIG_RTC_DRV_PL030)    += rtc-pl030.o
 obj-$(CONFIG_RTC_DRV_PL031)    += rtc-pl031.o
+obj-$(CONFIG_RTC_DRV_PARISC)   += rtc-parisc.o
 obj-$(CONFIG_RTC_DRV_PPC)      += rtc-ppc.o
 obj-$(CONFIG_RTC_DRV_R9701)    += rtc-r9701.o
 obj-$(CONFIG_RTC_DRV_RS5C313)  += rtc-rs5c313.o
diff --git a/drivers/rtc/rtc-parisc.c b/drivers/rtc/rtc-parisc.c
new file mode 100644 (file)
index 0000000..346d633
--- /dev/null
@@ -0,0 +1,111 @@
+/* rtc-parisc: RTC for HP PA-RISC firmware
+ *
+ * Copyright (C) 2008 Kyle McMartin <kyle@mcmartin.ca>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/platform_device.h>
+
+#include <asm/rtc.h>
+
+/* as simple as can be, and no simpler. */
+struct parisc_rtc {
+       struct rtc_device *rtc;
+       spinlock_t lock;
+};
+
+static int parisc_get_time(struct device *dev, struct rtc_time *tm)
+{
+       struct parisc_rtc *p = dev_get_drvdata(dev);
+       unsigned long flags, ret;
+
+       spin_lock_irqsave(&p->lock, flags);
+       ret = get_rtc_time(tm);
+       spin_unlock_irqrestore(&p->lock, flags);
+
+       if (ret & RTC_BATT_BAD)
+               return -EOPNOTSUPP;
+
+       return 0;
+}
+
+static int parisc_set_time(struct device *dev, struct rtc_time *tm)
+{
+       struct parisc_rtc *p = dev_get_drvdata(dev);
+       unsigned long flags, ret;
+
+       spin_lock_irqsave(&p->lock, flags);
+       ret = set_rtc_time(tm);
+       spin_unlock_irqrestore(&p->lock, flags);
+
+       if (ret < 0)
+               return -EOPNOTSUPP;
+
+       return 0;
+}
+
+static const struct rtc_class_ops parisc_rtc_ops = {
+       .read_time = parisc_get_time,
+       .set_time = parisc_set_time,
+};
+
+static int __devinit parisc_rtc_probe(struct platform_device *dev)
+{
+       struct parisc_rtc *p;
+
+       p = kzalloc(sizeof (*p), GFP_KERNEL);
+       if (!p)
+               return -ENOMEM;
+
+       spin_lock_init(&p->lock);
+
+       p->rtc = rtc_device_register("rtc-parisc", &dev->dev, &parisc_rtc_ops,
+                                       THIS_MODULE);
+       if (IS_ERR(p->rtc)) {
+               int err = PTR_ERR(p->rtc);
+               kfree(p);
+               return err;
+       }
+
+       platform_set_drvdata(dev, p);
+
+       return 0;
+}
+
+static int __devexit parisc_rtc_remove(struct platform_device *dev)
+{
+       struct parisc_rtc *p = platform_get_drvdata(dev);
+
+       rtc_device_unregister(p->rtc);
+       kfree(p);
+
+       return 0;
+}
+
+static struct platform_driver parisc_rtc_driver = {
+       .driver = {
+               .name = "rtc-parisc",
+               .owner = THIS_MODULE,
+       },
+       .probe = parisc_rtc_probe,
+       .remove = __devexit_p(parisc_rtc_remove),
+};
+
+static int __init parisc_rtc_init(void)
+{
+       return platform_driver_register(&parisc_rtc_driver);
+}
+
+static void __exit parisc_rtc_fini(void)
+{
+       platform_driver_unregister(&parisc_rtc_driver);
+}
+
+module_init(parisc_rtc_init);
+module_exit(parisc_rtc_fini);
+
+MODULE_AUTHOR("Kyle McMartin <kyle@mcmartin.ca>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("HP PA-RISC RTC driver");
index 884b635f028b792ee9b9afef8954345b4375e24a..834dcc6d785f899a7a4d43985388da3724da6afe 100644 (file)
@@ -360,7 +360,7 @@ static int __devinit rtc_probe(struct platform_device *pdev)
        spin_unlock_irq(&rtc_lock);
 
        aie_irq = platform_get_irq(pdev, 0);
-       if (aie_irq < 0 || aie_irq >= NR_IRQS) {
+       if (aie_irq < 0 || aie_irq >= nr_irqs) {
                retval = -EBUSY;
                goto err_device_unregister;
        }
@@ -371,7 +371,7 @@ static int __devinit rtc_probe(struct platform_device *pdev)
                goto err_device_unregister;
 
        pie_irq = platform_get_irq(pdev, 1);
-       if (pie_irq < 0 || pie_irq >= NR_IRQS)
+       if (pie_irq < 0 || pie_irq >= nr_irqs)
                goto err_free_irq;
 
        retval = request_irq(pie_irq, rtclong1_interrupt, IRQF_DISABLED,
index b5a868d85eb49760ac1f512195b29cb88c43578b..1e5478abd90ee8ae6c2371ff7f043d6f4f9122bf 100644 (file)
@@ -337,7 +337,7 @@ CMD_INC_RESID(struct scsi_cmnd *cmd, int inc)
 #else
 #define IRQ_MIN 9
 #if defined(__PPC)
-#define IRQ_MAX (NR_IRQS-1)
+#define IRQ_MAX (nr_irqs-1)
 #else
 #define IRQ_MAX 12
 #endif
index 740bad435995268112883ff77c61af58e93ce8b6..afc96e844a25d84cc45dafa7175cea1dc7b86101 100644 (file)
@@ -343,6 +343,11 @@ static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *r
 }
 
 #ifdef CONFIG_IDE_PROC_FS
+static ide_proc_entry_t idescsi_proc[] = {
+       { "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL },
+       { NULL, 0, NULL, NULL }
+};
+
 #define ide_scsi_devset_get(name, field) \
 static int get_##name(ide_drive_t *drive) \
 { \
@@ -378,6 +383,16 @@ static const struct ide_proc_devset idescsi_settings[] = {
        IDE_PROC_DEVSET(transform, 0,    3),
        { 0 },
 };
+
+static ide_proc_entry_t *ide_scsi_proc_entries(ide_drive_t *drive)
+{
+       return idescsi_proc;
+}
+
+static const struct ide_proc_devset *ide_scsi_proc_devsets(ide_drive_t *drive)
+{
+       return idescsi_settings;
+}
 #endif
 
 /*
@@ -419,13 +434,6 @@ static void ide_scsi_remove(ide_drive_t *drive)
 
 static int ide_scsi_probe(ide_drive_t *);
 
-#ifdef CONFIG_IDE_PROC_FS
-static ide_proc_entry_t idescsi_proc[] = {
-       { "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL },
-       { NULL, 0, NULL, NULL }
-};
-#endif
-
 static ide_driver_t idescsi_driver = {
        .gen_driver = {
                .owner          = THIS_MODULE,
@@ -439,8 +447,8 @@ static ide_driver_t idescsi_driver = {
        .end_request            = idescsi_end_request,
        .error                  = idescsi_atapi_error,
 #ifdef CONFIG_IDE_PROC_FS
-       .proc                   = idescsi_proc,
-       .settings               = idescsi_settings,
+       .proc_entries           = ide_scsi_proc_entries,
+       .proc_devsets           = ide_scsi_proc_devsets,
 #endif
 };
 
index d30eb7ba018e6dde266ac53253b95c8687dde8e8..098739deb02e1394dc466f51061284a484245cf8 100644 (file)
@@ -7859,7 +7859,6 @@ static struct pci_driver ipr_driver = {
        .remove = ipr_remove,
        .shutdown = ipr_shutdown,
        .err_handler = &ipr_err_handler,
-       .dynids.use_driver_data = 1
 };
 
 /**
index 83c819216771a5c9c10d886789e3fa9350d75cc0..f25f41a499e5ed7be7ebd3dc109d4d4b14c9494a 100644 (file)
@@ -2108,7 +2108,7 @@ struct scsi_qla_host;
 
 struct qla_msix_entry {
        int have_irq;
-       uint16_t msix_vector;
+       uint32_t msix_vector;
        uint16_t msix_entry;
 };
 
index 2aed4721c0d043821a5044483c4081c77adec0f9..21dd182ad512214c7646299a1af4ad4f2e9d9e88 100644 (file)
@@ -1566,9 +1566,8 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
                        goto probe_out;
        }
 
-       if (pci_find_aer_capability(pdev))
-               if (pci_enable_pcie_error_reporting(pdev))
-                       goto probe_out;
+       /* This may fail but that's ok */
+       pci_enable_pcie_error_reporting(pdev);
 
        host = scsi_host_alloc(sht, sizeof(scsi_qla_host_t));
        if (host == NULL) {
index 381b12ac20e0d249522b0811caf665c0448fba3d..d935b2d04f93345c9cc577373dd9a89e4357188c 100644 (file)
@@ -66,7 +66,6 @@
 #endif
 
 static struct m68k_serial m68k_soft[NR_PORTS];
-struct m68k_serial *IRQ_ports[NR_IRQS];
 
 static unsigned int uart_irqs[NR_PORTS] = UART_IRQ_DEFNS;
 
@@ -375,15 +374,11 @@ clear_and_return:
  */
 irqreturn_t rs_interrupt(int irq, void *dev_id)
 {
-       struct m68k_serial * info;
+       struct m68k_serial *info = dev_id;
        m68328_uart *uart;
        unsigned short rx;
        unsigned short tx;
 
-       info = IRQ_ports[irq];
-       if(!info)
-           return IRQ_NONE;
-
        uart = &uart_addr[info->line];
        rx = uart->urx.w;
 
@@ -1383,8 +1378,6 @@ rs68328_init(void)
                   info->port, info->irq);
            printk(" is a builtin MC68328 UART\n");
            
-           IRQ_ports[info->irq] = info;        /* waste of space */
-
 #ifdef CONFIG_M68VZ328
                if (i > 0 )
                        PJSEL &= 0xCF;  /* PSW enable second port output */
@@ -1393,7 +1386,7 @@ rs68328_init(void)
            if (request_irq(uart_irqs[i],
                            rs_interrupt,
                            IRQF_DISABLED,
-                           "M68328_UART", NULL))
+                           "M68328_UART", info))
                 panic("Unable to attach 68328 serial interrupt\n");
        }
        local_irq_restore(flags);
index 1528de23a6504987f78038db7be1e933e4762364..303272af386ef4ac307517d6df654759cbc23d43 100644 (file)
@@ -156,11 +156,15 @@ struct uart_8250_port {
 };
 
 struct irq_info {
-       spinlock_t              lock;
+       struct                  hlist_node node;
+       int                     irq;
+       spinlock_t              lock;   /* Protects list not the hash */
        struct list_head        *head;
 };
 
-static struct irq_info irq_lists[NR_IRQS];
+#define NR_IRQ_HASH            32      /* Can be adjusted later */
+static struct hlist_head irq_lists[NR_IRQ_HASH];
+static DEFINE_MUTEX(hash_mutex);       /* Used to walk the hash */
 
 /*
  * Here we define the default xmit fifo size used for each type of UART.
@@ -1545,15 +1549,43 @@ static void serial_do_unlink(struct irq_info *i, struct uart_8250_port *up)
                BUG_ON(i->head != &up->list);
                i->head = NULL;
        }
-
        spin_unlock_irq(&i->lock);
+       /* List empty so throw away the hash node */
+       if (i->head == NULL) {
+               hlist_del(&i->node);
+               kfree(i);
+       }
 }
 
 static int serial_link_irq_chain(struct uart_8250_port *up)
 {
-       struct irq_info *i = irq_lists + up->port.irq;
+       struct hlist_head *h;
+       struct hlist_node *n;
+       struct irq_info *i;
        int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0;
 
+       mutex_lock(&hash_mutex);
+
+       h = &irq_lists[up->port.irq % NR_IRQ_HASH];
+
+       hlist_for_each(n, h) {
+               i = hlist_entry(n, struct irq_info, node);
+               if (i->irq == up->port.irq)
+                       break;
+       }
+
+       if (n == NULL) {
+               i = kzalloc(sizeof(struct irq_info), GFP_KERNEL);
+               if (i == NULL) {
+                       mutex_unlock(&hash_mutex);
+                       return -ENOMEM;
+               }
+               spin_lock_init(&i->lock);
+               i->irq = up->port.irq;
+               hlist_add_head(&i->node, h);
+       }
+       mutex_unlock(&hash_mutex);
+
        spin_lock_irq(&i->lock);
 
        if (i->head) {
@@ -1577,14 +1609,28 @@ static int serial_link_irq_chain(struct uart_8250_port *up)
 
 static void serial_unlink_irq_chain(struct uart_8250_port *up)
 {
-       struct irq_info *i = irq_lists + up->port.irq;
+       struct irq_info *i;
+       struct hlist_node *n;
+       struct hlist_head *h;
 
+       mutex_lock(&hash_mutex);
+
+       h = &irq_lists[up->port.irq % NR_IRQ_HASH];
+
+       hlist_for_each(n, h) {
+               i = hlist_entry(n, struct irq_info, node);
+               if (i->irq == up->port.irq)
+                       break;
+       }
+
+       BUG_ON(n == NULL);
        BUG_ON(i->head == NULL);
 
        if (list_empty(i->head))
                free_irq(up->port.irq, i);
 
        serial_do_unlink(i, up);
+       mutex_unlock(&hash_mutex);
 }
 
 /* Base timer interval for polling */
@@ -2447,7 +2493,7 @@ static void serial8250_config_port(struct uart_port *port, int flags)
 static int
 serial8250_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
-       if (ser->irq >= NR_IRQS || ser->irq < 0 ||
+       if (ser->irq >= nr_irqs || ser->irq < 0 ||
            ser->baud_base < 9600 || ser->type < PORT_UNKNOWN ||
            ser->type >= ARRAY_SIZE(uart_config) || ser->type == PORT_CIRRUS ||
            ser->type == PORT_STARTECH)
@@ -2967,7 +3013,7 @@ EXPORT_SYMBOL(serial8250_unregister_port);
 
 static int __init serial8250_init(void)
 {
-       int ret, i;
+       int ret;
 
        if (nr_uarts > UART_NR)
                nr_uarts = UART_NR;
@@ -2976,9 +3022,6 @@ static int __init serial8250_init(void)
                "%d ports, IRQ sharing %sabled\n", nr_uarts,
                share_irqs ? "en" : "dis");
 
-       for (i = 0; i < NR_IRQS; i++)
-               spin_lock_init(&irq_lists[i].lock);
-
 #ifdef CONFIG_SPARC
        ret = sunserial_register_minors(&serial8250_reg, UART_NR);
 #else
@@ -3006,15 +3049,15 @@ static int __init serial8250_init(void)
                goto out;
 
        platform_device_del(serial8250_isa_devs);
- put_dev:
+put_dev:
        platform_device_put(serial8250_isa_devs);
- unreg_uart_drv:
+unreg_uart_drv:
 #ifdef CONFIG_SPARC
        sunserial_unregister_minors(&serial8250_reg, UART_NR);
 #else
        uart_unregister_driver(&serial8250_reg);
 #endif
- out:
+out:
        return ret;
 }
 
index 90b56c2c31e20f7cc5d2ea0fccb73a2a1f4b8d98..71562689116ff4e45f326dbfc6e8cbb10732ad31 100644 (file)
@@ -512,7 +512,7 @@ static int pl010_verify_port(struct uart_port *port, struct serial_struct *ser)
        int ret = 0;
        if (ser->type != PORT_UNKNOWN && ser->type != PORT_AMBA)
                ret = -EINVAL;
-       if (ser->irq < 0 || ser->irq >= NR_IRQS)
+       if (ser->irq < 0 || ser->irq >= nr_irqs)
                ret = -EINVAL;
        if (ser->baud_base < 9600)
                ret = -EINVAL;
index 9d08f27208a187db0823fee6dcabf22ba3ac0668..b7180046f8dba082f02767e07775fae72aafc53d 100644 (file)
@@ -572,7 +572,7 @@ static int pl010_verify_port(struct uart_port *port, struct serial_struct *ser)
        int ret = 0;
        if (ser->type != PORT_UNKNOWN && ser->type != PORT_AMBA)
                ret = -EINVAL;
-       if (ser->irq < 0 || ser->irq >= NR_IRQS)
+       if (ser->irq < 0 || ser->irq >= nr_irqs)
                ret = -EINVAL;
        if (ser->baud_base < 9600)
                ret = -EINVAL;
index a6c4d744495e3b694894c35453bbac4a0baff512..bde4b4b0b80f08efe1f2c5db4da2fb7e0e61b9b5 100644 (file)
@@ -623,7 +623,7 @@ static int cpm_uart_verify_port(struct uart_port *port,
 
        if (ser->type != PORT_UNKNOWN && ser->type != PORT_CPM)
                ret = -EINVAL;
-       if (ser->irq < 0 || ser->irq >= NR_IRQS)
+       if (ser->irq < 0 || ser->irq >= nr_irqs)
                ret = -EINVAL;
        if (ser->baud_base < 9600)
                ret = -EINVAL;
index 23d0305110195353e8d74146fe477d1efb365608..611c97a15654d546a4941fe6e1b1008cef3c237f 100644 (file)
@@ -922,7 +922,7 @@ static void m32r_sio_config_port(struct uart_port *port, int flags)
 static int
 m32r_sio_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
-       if (ser->irq >= NR_IRQS || ser->irq < 0 ||
+       if (ser->irq >= nr_irqs || ser->irq < 0 ||
            ser->baud_base < 9600 || ser->type < PORT_UNKNOWN ||
            ser->type >= ARRAY_SIZE(uart_config))
                return -EINVAL;
@@ -1162,7 +1162,7 @@ static int __init m32r_sio_init(void)
 
        printk(KERN_INFO "Serial: M32R SIO driver\n");
 
-       for (i = 0; i < NR_IRQS; i++)
+       for (i = 0; i < nr_irqs; i++)
                spin_lock_init(&irq_lists[i].lock);
 
        ret = uart_register_driver(&m32r_sio_reg);
index 6bdf3362e3b1e5f743ae8bad1f0bc8c9f682bf08..874786a11fe9977b98d4de8c540669111170e7ee 100644 (file)
@@ -741,7 +741,7 @@ static int uart_set_info(struct uart_state *state,
        if (port->ops->verify_port)
                retval = port->ops->verify_port(port, &new_serial);
 
-       if ((new_serial.irq >= NR_IRQS) || (new_serial.irq < 0) ||
+       if ((new_serial.irq >= nr_irqs) || (new_serial.irq < 0) ||
            (new_serial.baud_base < 9600))
                retval = -EINVAL;
 
index cb49a5ac022f880d38008c65ae4b844880c92fc3..61dc8b3daa26ce2c5159febbc05d71f89746ce54 100644 (file)
@@ -460,7 +460,7 @@ static int lh7a40xuart_verify_port (struct uart_port* port,
 
        if (ser->type != PORT_UNKNOWN && ser->type != PORT_LH7A40X)
                ret = -EINVAL;
-       if (ser->irq < 0 || ser->irq >= NR_IRQS)
+       if (ser->irq < 0 || ser->irq >= nr_irqs)
                ret = -EINVAL;
        if (ser->baud_base < 9600) /* *** FIXME: is this true? */
                ret = -EINVAL;
index 3b9d2d83b59008df447dfcff84bb1fc33e5f4425..f0658d2c45b20e2aa462f3d4b3db29e27fc0773f 100644 (file)
@@ -1149,7 +1149,7 @@ static int sci_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
        struct sci_port *s = &sci_ports[port->line];
 
-       if (ser->irq != s->irqs[SCIx_TXI_IRQ] || ser->irq > NR_IRQS)
+       if (ser->irq != s->irqs[SCIx_TXI_IRQ] || ser->irq > nr_irqs)
                return -EINVAL;
        if (ser->baud_base < 2400)
                /* No paper tape reader for Mitch.. */
index 539c933b335f306b2dc92182785ab09cf271cec6..315a9333ca3cd1b7723197c74d0ebec1fe6fcdf3 100644 (file)
@@ -1066,7 +1066,7 @@ static int qe_uart_verify_port(struct uart_port *port,
        if (ser->type != PORT_UNKNOWN && ser->type != PORT_CPM)
                return -EINVAL;
 
-       if (ser->irq < 0 || ser->irq >= NR_IRQS)
+       if (ser->irq < 0 || ser->irq >= nr_irqs)
                return -EINVAL;
 
        if (ser->baud_base < 9600)
index 5dccf057a7dd41a5f31f69ec4d5d7982615655ec..f9b4647255aa041fa816612bdcd618bc4ecbabcc 100644 (file)
@@ -47,6 +47,9 @@ static struct uio_class {
        struct class *class;
 } *uio_class;
 
+/* Protect idr accesses */
+static DEFINE_MUTEX(minor_lock);
+
 /*
  * attributes
  */
@@ -239,7 +242,6 @@ static void uio_dev_del_attributes(struct uio_device *idev)
 
 static int uio_get_minor(struct uio_device *idev)
 {
-       static DEFINE_MUTEX(minor_lock);
        int retval = -ENOMEM;
        int id;
 
@@ -261,7 +263,9 @@ exit:
 
 static void uio_free_minor(struct uio_device *idev)
 {
+       mutex_lock(&minor_lock);
        idr_remove(&uio_idr, idev->minor);
+       mutex_unlock(&minor_lock);
 }
 
 /**
@@ -305,8 +309,9 @@ static int uio_open(struct inode *inode, struct file *filep)
        struct uio_listener *listener;
        int ret = 0;
 
-       lock_kernel();
+       mutex_lock(&minor_lock);
        idev = idr_find(&uio_idr, iminor(inode));
+       mutex_unlock(&minor_lock);
        if (!idev) {
                ret = -ENODEV;
                goto out;
@@ -332,18 +337,15 @@ static int uio_open(struct inode *inode, struct file *filep)
                if (ret)
                        goto err_infoopen;
        }
-       unlock_kernel();
        return 0;
 
 err_infoopen:
-
        kfree(listener);
-err_alloc_listener:
 
+err_alloc_listener:
        module_put(idev->owner);
 
 out:
-       unlock_kernel();
        return ret;
 }
 
index d343afacb0b03215822d5d1b0043fb1bfbb234ba..15a803b206b8ea72c4774ba6edb504a0fd5e964c 100644 (file)
@@ -1111,8 +1111,8 @@ clean0:
 #ifdef DEBUG
        debugfs_remove(ehci_debug_root);
        ehci_debug_root = NULL;
-#endif
 err_debug:
+#endif
        clear_bit(USB_EHCI_LOADED, &usb_hcds_loaded);
        return retval;
 }
index 05a28106e8eb7f8fdef5dfb35f0b61d6cd34226e..8782ec1f5aa05f3ef1713992ef09788d78b3fd6c 100644 (file)
@@ -154,7 +154,7 @@ static int ibwdt_set_heartbeat(int t)
                return -EINVAL;
 
        for (i = 0x0F; i > -1; i--)
-               if (wd_times[i] > t)
+               if (wd_times[i] >= t)
                        break;
        wd_margin = i;
        return 0;
index c3290bc186a0e62c36db12cd8ef44c58f0a4f4b0..9ce1ab6c268d7e0c23d2223cc09b74ababae1b7f 100644 (file)
@@ -125,7 +125,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
 
        BUG_ON(irq == -1);
 #ifdef CONFIG_SMP
-       irq_desc[irq].affinity = cpumask_of_cpu(cpu);
+       irq_to_desc(irq)->affinity = cpumask_of_cpu(cpu);
 #endif
 
        __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
@@ -137,10 +137,12 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
 static void init_evtchn_cpu_bindings(void)
 {
 #ifdef CONFIG_SMP
+       struct irq_desc *desc;
        int i;
+
        /* By default all event channels notify CPU#0. */
-       for (i = 0; i < NR_IRQS; i++)
-               irq_desc[i].affinity = cpumask_of_cpu(0);
+       for_each_irq_desc(i, desc)
+               desc->affinity = cpumask_of_cpu(0);
 #endif
 
        memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
@@ -229,12 +231,12 @@ static int find_unbound_irq(void)
        int irq;
 
        /* Only allocate from dynirq range */
-       for (irq = 0; irq < NR_IRQS; irq++)
+       for_each_irq_nr(irq)
                if (irq_bindcount[irq] == 0)
                        break;
 
-       if (irq == NR_IRQS)
-               panic("No available IRQ to bind to: increase NR_IRQS!\n");
+       if (irq == nr_irqs)
+               panic("No available IRQ to bind to: increase nr_irqs!\n");
 
        return irq;
 }
@@ -790,7 +792,7 @@ void xen_irq_resume(void)
                mask_evtchn(evtchn);
 
        /* No IRQ <-> event-channel mappings. */
-       for (irq = 0; irq < NR_IRQS; irq++)
+       for_each_irq_nr(irq)
                irq_info[irq].evtchn = 0; /* zap event-channel binding */
 
        for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
@@ -822,7 +824,7 @@ void __init xen_init_IRQ(void)
                mask_evtchn(i);
 
        /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
-       for (i = 0; i < NR_IRQS; i++)
+       for_each_irq_nr(i)
                irq_bindcount[i] = 0;
 
        irq_ctx_init(smp_processor_id());
index e282002b94d23f5e5025bf3344d8f5e0b35f812d..e46297f020c1a5e443192f7da4ed3e9a0718e8be 100644 (file)
@@ -403,7 +403,7 @@ config AUTOFS4_FS
          N here.
 
 config FUSE_FS
-       tristate "Filesystem in Userspace support"
+       tristate "FUSE (Filesystem in Userspace) support"
        help
          With FUSE it is possible to implement a fully functional filesystem
          in a userspace program.
index e2159063198a072ef75545130ced4eb21e56accf..8fcfa398d35075e1e149b70d62226a6095524ad3 100644 (file)
@@ -1341,20 +1341,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
        prstatus->pr_pgrp = task_pgrp_vnr(p);
        prstatus->pr_sid = task_session_vnr(p);
        if (thread_group_leader(p)) {
+               struct task_cputime cputime;
+
                /*
-                * This is the record for the group leader.  Add in the
-                * cumulative times of previous dead threads.  This total
-                * won't include the time of each live thread whose state
-                * is included in the core dump.  The final total reported
-                * to our parent process when it calls wait4 will include
-                * those sums as well as the little bit more time it takes
-                * this and each other thread to finish dying after the
-                * core dump synchronization phase.
+                * This is the record for the group leader.  It shows the
+                * group-wide total, not its individual thread total.
                 */
-               cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
-                                  &prstatus->pr_utime);
-               cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
-                                  &prstatus->pr_stime);
+               thread_group_cputime(p, &cputime);
+               cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
+               cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
        } else {
                cputime_to_timeval(p->utime, &prstatus->pr_utime);
                cputime_to_timeval(p->stime, &prstatus->pr_stime);
index 0e8367c546248987418191c6ab7b14c5be61bec9..5b5424cb339151db85754685f00e2c168ff37908 100644 (file)
@@ -1390,20 +1390,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
        prstatus->pr_pgrp = task_pgrp_vnr(p);
        prstatus->pr_sid = task_session_vnr(p);
        if (thread_group_leader(p)) {
+               struct task_cputime cputime;
+
                /*
-                * This is the record for the group leader.  Add in the
-                * cumulative times of previous dead threads.  This total
-                * won't include the time of each live thread whose state
-                * is included in the core dump.  The final total reported
-                * to our parent process when it calls wait4 will include
-                * those sums as well as the little bit more time it takes
-                * this and each other thread to finish dying after the
-                * core dump synchronization phase.
+                * This is the record for the group leader.  It shows the
+                * group-wide total, not its individual thread total.
                 */
-               cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
-                                  &prstatus->pr_utime);
-               cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
-                                  &prstatus->pr_stime);
+               thread_group_cputime(p, &cputime);
+               cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
+               cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
        } else {
                cputime_to_timeval(p->utime, &prstatus->pr_utime);
                cputime_to_timeval(p->stime, &prstatus->pr_stime);
index 2bada6bbc317dbeab3887fbcd26ed9fe630b6148..34930a964b8258067ec957f9e5ff2a974c09cc58 100644 (file)
@@ -101,6 +101,8 @@ void fuse_finish_open(struct inode *inode, struct file *file,
                file->f_op = &fuse_direct_io_file_operations;
        if (!(outarg->open_flags & FOPEN_KEEP_CACHE))
                invalidate_inode_pages2(inode->i_mapping);
+       if (outarg->open_flags & FOPEN_NONSEEKABLE)
+               nonseekable_open(inode, file);
        ff->fh = outarg->fh;
        file->private_data = fuse_file_get(ff);
 }
@@ -1448,6 +1450,9 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
        mutex_lock(&inode->i_mutex);
        switch (origin) {
        case SEEK_END:
+               retval = fuse_update_attributes(inode, NULL, file, NULL);
+               if (retval)
+                       return retval;
                offset += i_size_read(inode);
                break;
        case SEEK_CUR:
index 3a876076bdd1392c93b6a838af53eef104f8461d..35accfdd747f9fde9c561c4fc081939efadd5497 100644 (file)
@@ -6,6 +6,9 @@
   See the file COPYING.
 */
 
+#ifndef _FS_FUSE_I_H
+#define _FS_FUSE_I_H
+
 #include <linux/fuse.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
@@ -655,3 +658,5 @@ void fuse_set_nowrite(struct inode *inode);
 void fuse_release_nowrite(struct inode *inode);
 
 u64 fuse_get_attr_version(struct fuse_conn *fc);
+
+#endif /* _FS_FUSE_I_H */
index 6a84388cacff6f3e0cf5a5d9e307bad71610d5df..54b1f0e1ef58325b2a6de0b52e1a0532fdecd801 100644 (file)
@@ -865,7 +865,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        if (is_bdev) {
                fc->destroy_req = fuse_request_alloc();
                if (!fc->destroy_req)
-                       goto err_put_root;
+                       goto err_free_init_req;
        }
 
        mutex_lock(&fuse_mutex);
@@ -895,6 +895,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
  err_unlock:
        mutex_unlock(&fuse_mutex);
+ err_free_init_req:
        fuse_request_free(init_req);
  err_put_root:
        dput(root_dentry);
index f4bc0e789539f413e080324ab8209575ba349c42..bb9f4b05703de9b587a1b2cbd36ab651d2613da8 100644 (file)
@@ -388,20 +388,20 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 
                /* add up live thread stats at the group level */
                if (whole) {
+                       struct task_cputime cputime;
                        struct task_struct *t = task;
                        do {
                                min_flt += t->min_flt;
                                maj_flt += t->maj_flt;
-                               utime = cputime_add(utime, task_utime(t));
-                               stime = cputime_add(stime, task_stime(t));
                                gtime = cputime_add(gtime, task_gtime(t));
                                t = next_thread(t);
                        } while (t != task);
 
                        min_flt += sig->min_flt;
                        maj_flt += sig->maj_flt;
-                       utime = cputime_add(utime, sig->utime);
-                       stime = cputime_add(stime, sig->stime);
+                       thread_group_cputime(task, &cputime);
+                       utime = cputime.utime;
+                       stime = cputime.stime;
                        gtime = cputime_add(gtime, sig->gtime);
                }
 
index 61b25f4eabe6635bdf013e4879dd8e725f10f01d..7ea52c79b2da6bf713d2da2598b647dbbb450c9e 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/pagemap.h>
+#include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
@@ -521,17 +522,13 @@ static const struct file_operations proc_vmalloc_operations = {
 
 static int show_stat(struct seq_file *p, void *v)
 {
-       int i;
+       int i, j;
        unsigned long jif;
        cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
        cputime64_t guest;
        u64 sum = 0;
        struct timespec boottime;
-       unsigned int *per_irq_sum;
-
-       per_irq_sum = kzalloc(sizeof(unsigned int)*NR_IRQS, GFP_KERNEL);
-       if (!per_irq_sum)
-               return -ENOMEM;
+       unsigned int per_irq_sum;
 
        user = nice = system = idle = iowait =
                irq = softirq = steal = cputime64_zero;
@@ -540,8 +537,6 @@ static int show_stat(struct seq_file *p, void *v)
        jif = boottime.tv_sec;
 
        for_each_possible_cpu(i) {
-               int j;
-
                user = cputime64_add(user, kstat_cpu(i).cpustat.user);
                nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
                system = cputime64_add(system, kstat_cpu(i).cpustat.system);
@@ -551,11 +546,10 @@ static int show_stat(struct seq_file *p, void *v)
                softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
                steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
                guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
-               for (j = 0; j < NR_IRQS; j++) {
-                       unsigned int temp = kstat_cpu(i).irqs[j];
-                       sum += temp;
-                       per_irq_sum[j] += temp;
-               }
+
+               for_each_irq_nr(j)
+                       sum += kstat_irqs_cpu(j, i);
+
                sum += arch_irq_stat_cpu(i);
        }
        sum += arch_irq_stat();
@@ -597,8 +591,15 @@ static int show_stat(struct seq_file *p, void *v)
        }
        seq_printf(p, "intr %llu", (unsigned long long)sum);
 
-       for (i = 0; i < NR_IRQS; i++)
-               seq_printf(p, " %u", per_irq_sum[i]);
+       /* sum again ? it could be updated? */
+       for_each_irq_nr(j) {
+               per_irq_sum = 0;
+
+               for_each_possible_cpu(i)
+                       per_irq_sum += kstat_irqs_cpu(j, i);
+
+               seq_printf(p, " %u", per_irq_sum);
+       }
 
        seq_printf(p,
                "\nctxt %llu\n"
@@ -612,7 +613,6 @@ static int show_stat(struct seq_file *p, void *v)
                nr_running(),
                nr_iowait());
 
-       kfree(per_irq_sum);
        return 0;
 }
 
@@ -651,15 +651,14 @@ static const struct file_operations proc_stat_operations = {
  */
 static void *int_seq_start(struct seq_file *f, loff_t *pos)
 {
-       return (*pos <= NR_IRQS) ? pos : NULL;
+       return (*pos <= nr_irqs) ? pos : NULL;
 }
 
+
 static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos)
 {
        (*pos)++;
-       if (*pos > NR_IRQS)
-               return NULL;
-       return pos;
+       return (*pos <= nr_irqs) ? pos : NULL;
 }
 
 static void int_seq_stop(struct seq_file *f, void *v)
@@ -667,7 +666,6 @@ static void int_seq_stop(struct seq_file *f, void *v)
        /* Nothing to do */
 }
 
-
 static const struct seq_operations int_seq_ops = {
        .start = int_seq_start,
        .next  = int_seq_next,
index 7ebcc56a22291edb6388332d16f84686c1542e61..361076611855ad1fb1afac6c5cc2aeb2454720cb 100644 (file)
 #include <asm/io.h>
 #include <asm/irq.h>
 
-/****************************************************************************/
-/*
- * some bits needed for parts of the IDE subsystem to compile
- */
-#define __ide_mm_insw(port, addr, n)   insw((unsigned long) (port), addr, n)
-#define __ide_mm_insl(port, addr, n)   insl((unsigned long) (port), addr, n)
-#define __ide_mm_outsw(port, addr, n)  outsw((unsigned long) (port), addr, n)
-#define __ide_mm_outsl(port, addr, n)  outsl((unsigned long) (port), addr, n)
-
+#include <asm-generic/ide_iops.h>
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_IDE_H */
index 0f6dabd4b5175488081fc8395a30b9a5d5e5f217..12c07c1866b2072f9c2529829c27dac5077c96a3 100644 (file)
@@ -41,7 +41,7 @@ extern void warn_slowpath(const char *file, const int line,
 #define __WARN() warn_on_slowpath(__FILE__, __LINE__)
 #define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
 #else
-#define __WARN_printf(arg...) __WARN()
+#define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0)
 #endif
 
 #ifndef WARN_ON
index 74c5faf26c053a137768d5f4224d466945bc3253..80744606bad172b57d2ab52dc01bb0bf33af293e 100644 (file)
 #define MEM_DISCARD(sec) *(.mem##sec)
 #endif
 
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+#define MCOUNT_REC()   VMLINUX_SYMBOL(__start_mcount_loc) = .; \
+                       *(__mcount_loc)                         \
+                       VMLINUX_SYMBOL(__stop_mcount_loc) = .;
+#else
+#define MCOUNT_REC()
+#endif
 
 /* .data section */
 #define DATA_DATA                                                      \
        . = ALIGN(8);                                                   \
        VMLINUX_SYMBOL(__start___markers) = .;                          \
        *(__markers)                                                    \
-       VMLINUX_SYMBOL(__stop___markers) = .;
+       VMLINUX_SYMBOL(__stop___markers) = .;                           \
+       VMLINUX_SYMBOL(__start___tracepoints) = .;                      \
+       *(__tracepoints)                                                \
+       VMLINUX_SYMBOL(__stop___tracepoints) = .;
 
 #define RO_DATA(align)                                                 \
        . = ALIGN((align));                                             \
@@ -61,6 +71,7 @@
                *(.rodata) *(.rodata.*)                                 \
                *(__vermagic)           /* Kernel version magic */      \
                *(__markers_strings)    /* Markers: strings */          \
+               *(__tracepoints_strings)/* Tracepoints: strings */      \
        }                                                               \
                                                                        \
        .rodata1          : AT(ADDR(.rodata1) - LOAD_OFFSET) {          \
        /* __*init sections */                                          \
        __init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) {         \
                *(.ref.rodata)                                          \
+               MCOUNT_REC()                                            \
                DEV_KEEP(init.rodata)                                   \
                DEV_KEEP(exit.rodata)                                   \
                CPU_KEEP(init.rodata)                                   \
index 1daf6cbdd9f0de3d9b3ae515dd939e887b7dd1e7..b996a3c8cff54ae4a4857c1a5b955af6b6a391d4 100644 (file)
 #define outsw_swapw(port, addr, n)     raw_outsw_swapw((u16 *)port, addr, n)
 #endif
 
-
-/* Q40 and Atari have byteswapped IDE busses and since many interesting
- * values in the identification string are text, chars and words they
- * happened to be almost correct without swapping.. However *_capacity
- * is needed for drives over 8 GB. RZ */
-#if defined(CONFIG_Q40) || defined(CONFIG_ATARI)
-#define M68K_IDE_SWAPW  (MACH_IS_Q40 || MACH_IS_ATARI)
-#endif
-
 #ifdef CONFIG_BLK_DEV_FALCON_IDE
 #define IDE_ARCH_LOCK
 
index d76a0839abe932c789738f0b0ec5b81c79f96b6c..ef1d72dbdfe02b74d389c2067af12373db2cd3de 100644 (file)
@@ -40,8 +40,6 @@ extern void generic_apic_probe(void);
 extern unsigned int apic_verbosity;
 extern int local_apic_timer_c2_ok;
 
-extern int ioapic_force;
-
 extern int disable_apic;
 /*
  * Basic functions accessing APICs.
@@ -100,6 +98,20 @@ extern void check_x2apic(void);
 extern void enable_x2apic(void);
 extern void enable_IR_x2apic(void);
 extern void x2apic_icr_write(u32 low, u32 id);
+static inline int x2apic_enabled(void)
+{
+       int msr, msr2;
+
+       if (!cpu_has_x2apic)
+               return 0;
+
+       rdmsr(MSR_IA32_APICBASE, msr, msr2);
+       if (msr & X2APIC_ENABLE)
+               return 1;
+       return 0;
+}
+#else
+#define x2apic_enabled()       0
 #endif
 
 struct apic_ops {
index 0a9cd7c5ca0c8813ddb6899f1ad2a776cb9a85e1..1d9543b9d35824c259a1110005866be3d1f54a4e 100644 (file)
@@ -9,22 +9,17 @@ static inline int apic_id_registered(void)
        return (1);
 }
 
-/* Round robin the irqs amoung the online cpus */
 static inline cpumask_t target_cpus(void)
 {
-       static unsigned long cpu = NR_CPUS;
-       do {
-               if (cpu >= NR_CPUS)
-                       cpu = first_cpu(cpu_online_map);
-               else
-                       cpu = next_cpu(cpu, cpu_online_map);
-       } while (cpu >= NR_CPUS);
-       return cpumask_of_cpu(cpu);
+#ifdef CONFIG_SMP
+        return cpu_online_map;
+#else
+        return cpumask_of_cpu(0);
+#endif
 }
 
 #undef APIC_DEST_LOGICAL
 #define APIC_DEST_LOGICAL      0
-#define TARGET_CPUS            (target_cpus())
 #define APIC_DFR_VALUE         (APIC_DFR_FLAT)
 #define INT_DELIVERY_MODE      (dest_Fixed)
 #define INT_DEST_MODE          (0)    /* phys delivery to target proc */
index ed2de22e87050089bb8a8c02fb85bc0a204d2141..313438e633488ae874c4476002ba7ba0378cd42a 100644 (file)
@@ -94,4 +94,17 @@ extern void efi_reserve_early(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
 
+#ifndef CONFIG_EFI
+/*
+ * IF EFI is not configured, have the EFI calls return -ENOSYS.
+ */
+#define efi_call0(_f)                                  (-ENOSYS)
+#define efi_call1(_f, _a1)                             (-ENOSYS)
+#define efi_call2(_f, _a1, _a2)                                (-ENOSYS)
+#define efi_call3(_f, _a1, _a2, _a3)                   (-ENOSYS)
+#define efi_call4(_f, _a1, _a2, _a3, _a4)              (-ENOSYS)
+#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5)         (-ENOSYS)
+#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6)    (-ENOSYS)
+#endif /* CONFIG_EFI */
+
 #endif /* ASM_X86__EFI_H */
index aae50c2fb303db35d3bab0f2cfdd1a854f1b4fb2..380f0b4f17edfcf5c1e1440baed7231e07cdb684 100644 (file)
@@ -17,7 +17,6 @@ static inline cpumask_t target_cpus(void)
        return cpumask_of_cpu(smp_processor_id());
 #endif
 }
-#define TARGET_CPUS    (target_cpus())
 
 #if defined CONFIG_ES7000_CLUSTERED_APIC
 #define APIC_DFR_VALUE         (APIC_DFR_CLUSTER)
@@ -81,7 +80,7 @@ static inline void setup_apic_routing(void)
        int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
        printk("Enabling APIC mode:  %s.  Using %d I/O APICs, target cpus %lx\n",
                (apic_version[apic] == 0x14) ?
-               "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]);
+               "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]);
 }
 
 static inline int multi_timer_check(int apic, int irq)
index be0e004ad148123fc5f4b85e2c5a6c2ce8569aea..1bb6f9bbe1ab99319d4df113086e46ec17e9c7e7 100644 (file)
@@ -7,6 +7,16 @@
 
 #ifndef __ASSEMBLY__
 extern void mcount(void);
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+       /*
+        * call mcount is "e8 <4 byte offset>"
+        * The addr points to the 4 byte offset and the caller of this
+        * function wants the pointer to e8. Simply subtract one.
+        */
+       return addr - 1;
+}
 #endif
 
 #endif /* CONFIG_FTRACE */
index 34280f027664a57267d915bda709183b745ea9a5..6fe4f81bfcf97ef96a0cde26f378ae143ce8092e 100644 (file)
@@ -57,6 +57,7 @@ struct genapic {
        unsigned (*get_apic_id)(unsigned long x);
        unsigned long apic_id_mask;
        unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
+       cpumask_t (*vector_allocation_domain)(int cpu);
 
 #ifdef CONFIG_SMP
        /* ipi */
@@ -104,6 +105,7 @@ struct genapic {
        APICFUNC(get_apic_id)                           \
        .apic_id_mask = APIC_ID_MASK,                   \
        APICFUNC(cpu_mask_to_apicid)                    \
+       APICFUNC(vector_allocation_domain)                      \
        APICFUNC(acpi_madt_oem_check)                   \
        IPIFUNC(send_IPI_mask)                          \
        IPIFUNC(send_IPI_allbutself)                    \
index cbbbb6d4dd32bb838462864da48b26b1becbcd2d..58b273f6ef07a92713eb74b6c2d54eb367e34b74 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef ASM_X86__HPET_H
 #define ASM_X86__HPET_H
 
+#include <linux/msi.h>
+
 #ifdef CONFIG_HPET_TIMER
 
 #define HPET_MMAP_SIZE         1024
 #define HPET_CFG               0x010
 #define HPET_STATUS            0x020
 #define HPET_COUNTER           0x0f0
+
+#define HPET_Tn_CFG(n)         (0x100 + 0x20 * n)
+#define HPET_Tn_CMP(n)         (0x108 + 0x20 * n)
+#define HPET_Tn_ROUTE(n)       (0x110 + 0x20 * n)
+
 #define HPET_T0_CFG            0x100
 #define HPET_T0_CMP            0x108
 #define HPET_T0_ROUTE          0x110
@@ -65,6 +72,20 @@ extern void hpet_disable(void);
 extern unsigned long hpet_readl(unsigned long a);
 extern void force_hpet_resume(void);
 
+extern void hpet_msi_unmask(unsigned int irq);
+extern void hpet_msi_mask(unsigned int irq);
+extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg);
+extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg);
+
+#ifdef CONFIG_PCI_MSI
+extern int arch_setup_hpet_msi(unsigned int irq);
+#else
+static inline int arch_setup_hpet_msi(unsigned int irq)
+{
+       return -EINVAL;
+}
+#endif
+
 #ifdef CONFIG_HPET_EMULATE_RTC
 
 #include <linux/interrupt.h>
index 50f6e0316b5029c4c2e5801bcd256119c3f037aa..749d042f055614e03f7b732743b80a8ac1ea7fd8 100644 (file)
@@ -96,13 +96,8 @@ extern asmlinkage void qic_call_function_interrupt(void);
 
 /* SMP */
 extern void smp_apic_timer_interrupt(struct pt_regs *);
-#ifdef CONFIG_X86_32
 extern void smp_spurious_interrupt(struct pt_regs *);
 extern void smp_error_interrupt(struct pt_regs *);
-#else
-extern asmlinkage void smp_spurious_interrupt(void);
-extern asmlinkage void smp_error_interrupt(void);
-#endif
 #ifdef CONFIG_X86_SMP
 extern void smp_reschedule_interrupt(struct pt_regs *);
 extern void smp_call_function_interrupt(struct pt_regs *);
@@ -115,13 +110,13 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
 #ifdef CONFIG_X86_32
-extern void (*const interrupt[NR_IRQS])(void);
-#else
+extern void (*const interrupt[NR_VECTORS])(void);
+#endif
+
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
-#endif
 
-#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_X86_64)
+#ifdef CONFIG_X86_IO_APIC
 extern void lock_vector_lock(void);
 extern void unlock_vector_lock(void);
 extern void __setup_vector_irq(int cpu);
index 8ec68a50cf103b3779da806b391978fb27fcdc2c..d35cbd7aa5873ff182902dbcfba38e52387c0af1 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <asm/mpspec.h>
 #include <asm/apicdef.h>
+#include <asm/irq_vectors.h>
 
 /*
  * Intel IO-APIC support for SMP and UP systems.
@@ -87,24 +88,8 @@ struct IO_APIC_route_entry {
                mask            :  1,   /* 0: enabled, 1: disabled */
                __reserved_2    : 15;
 
-#ifdef CONFIG_X86_32
-       union {
-               struct {
-                       __u32   __reserved_1    : 24,
-                               physical_dest   :  4,
-                               __reserved_2    :  4;
-               } physical;
-
-               struct {
-                       __u32   __reserved_1    : 24,
-                               logical_dest    :  8;
-               } logical;
-       } dest;
-#else
        __u32   __reserved_3    : 24,
                dest            :  8;
-#endif
-
 } __attribute__ ((packed));
 
 struct IR_IO_APIC_route_entry {
@@ -203,10 +188,17 @@ extern void restore_IO_APIC_setup(void);
 extern void reinit_intr_remapped_IO_APIC(int);
 #endif
 
+extern int probe_nr_irqs(void);
+
 #else  /* !CONFIG_X86_IO_APIC */
 #define io_apic_assign_pci_irqs 0
 static const int timer_through_8259 = 0;
 static inline void ioapic_init_mappings(void) { }
+
+static inline int probe_nr_irqs(void)
+{
+       return NR_IRQS;
+}
 #endif
 
 #endif /* ASM_X86__IO_APIC_H */
index c5d2d767a1f356366e7ee57716ffd057ff3deb4a..a8d065d85f5770de38b0290e3bd2866ceb105583 100644 (file)
 
 /*
  * Reserve the lowest usable priority level 0x20 - 0x2f for triggering
- * cleanup after irq migration on 64 bit.
+ * cleanup after irq migration.
  */
 #define IRQ_MOVE_CLEANUP_VECTOR        FIRST_EXTERNAL_VECTOR
 
 /*
- * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit.
- * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit.
+ * Vectors 0x30-0x3f are used for ISA interrupts.
  */
-#ifdef CONFIG_X86_32
-#define IRQ0_VECTOR            (FIRST_EXTERNAL_VECTOR)
-#else
 #define IRQ0_VECTOR            (FIRST_EXTERNAL_VECTOR + 0x10)
-#endif
 #define IRQ1_VECTOR            (IRQ0_VECTOR + 1)
 #define IRQ2_VECTOR            (IRQ0_VECTOR + 2)
 #define IRQ3_VECTOR            (IRQ0_VECTOR + 3)
  * start at 0x31(0x41) to spread out vectors evenly between priority
  * levels. (0x80 is the syscall vector)
  */
-#ifdef CONFIG_X86_32
-# define FIRST_DEVICE_VECTOR   0x31
-#else
-# define FIRST_DEVICE_VECTOR   (IRQ15_VECTOR + 2)
-#endif
+#define FIRST_DEVICE_VECTOR    (IRQ15_VECTOR + 2)
 
 #define NR_VECTORS             256
 
 # else
 #  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
 # endif
-# define NR_IRQ_VECTORS NR_IRQS
 
 #elif !defined(CONFIG_X86_VOYAGER)
 
 
 #  define NR_IRQS              224
 
-#  if (224 >= 32 * NR_CPUS)
-#   define NR_IRQ_VECTORS      NR_IRQS
-#  else
-#   define NR_IRQ_VECTORS      (32 * NR_CPUS)
-#  endif
-
 # else /* IO_APIC || PARAVIRT */
 
 #  define NR_IRQS              16
-#  define NR_IRQ_VECTORS       NR_IRQS
 
 # endif
 
 #else /* !VISWS && !VOYAGER */
 
 # define NR_IRQS               224
-# define NR_IRQ_VECTORS                NR_IRQS
 
 #endif /* VISWS */
 
index 9283b60a1dd2530a47f541e71296bd60d21fcfc1..6b1add8e31dde5a22e4e8f21c08eac11875c63c9 100644 (file)
@@ -14,6 +14,7 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
+BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
 #endif
 
 /*
index 2a330a41b3dd703fcafc3d254e3aa3f664302c0d..3c66f2cdaec17e6c7921a0e2094eda3100658a33 100644 (file)
@@ -85,6 +85,20 @@ static inline int apicid_to_node(int logical_apicid)
        return 0;
 #endif
 }
+
+static inline cpumask_t vector_allocation_domain(int cpu)
+{
+        /* Careful. Some cpus do not strictly honor the set of cpus
+         * specified in the interrupt destination when using lowest
+         * priority interrupt delivery mode.
+         *
+         * In particular there was a hyperthreading cpu observed to
+         * deliver interrupts to the wrong hyperthread when only one
+         * hyperthread was specified in the interrupt desitination.
+         */
+        cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+        return domain;
+}
 #endif
 
 static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
@@ -138,6 +152,5 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
 static inline void enable_apic_mode(void)
 {
 }
-
 #endif /* CONFIG_X86_LOCAL_APIC */
 #endif /* ASM_X86__MACH_DEFAULT__MACH_APIC_H */
diff --git a/include/asm-x86/mach-generic/irq_vectors_limits.h b/include/asm-x86/mach-generic/irq_vectors_limits.h
deleted file mode 100644 (file)
index f7870e1..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
-#define ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
-
-/*
- * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
- * even with uni-proc kernels, so use a big array.
- *
- * This value should be the same in both the generic and summit subarches.
- * Change one, change 'em both.
- */
-#define NR_IRQS        224
-#define NR_IRQ_VECTORS 1024
-
-#endif /* ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H */
index 5d010c6881dddc51bb9ad2a59503c79ad2837b14..5085b52da301152853d0f6bed29a8342b981b01b 100644 (file)
@@ -24,6 +24,7 @@
 #define check_phys_apicid_present (genapic->check_phys_apicid_present)
 #define check_apicid_used (genapic->check_apicid_used)
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define vector_allocation_domain (genapic->vector_allocation_domain)
 #define enable_apic_mode (genapic->enable_apic_mode)
 #define phys_pkg_id (genapic->phys_pkg_id)
 
index a8344ba6ea15064cd4898423c9b3fcd8183b25e1..0bf2a06b7a4e66a33629cc930f6d59c9c9907c92 100644 (file)
@@ -12,8 +12,6 @@ static inline cpumask_t target_cpus(void)
        return CPU_MASK_ALL;
 }
 
-#define TARGET_CPUS (target_cpus())
-
 #define NO_BALANCE_IRQ (1)
 #define esr_disable (1)
 
index 394b00bb5e72542b3bfd4ecc2efbee58157cf3a9..9b3070f1c2ac6fb80f0b53edf9b33b080af0f081 100644 (file)
@@ -22,7 +22,6 @@ static inline cpumask_t target_cpus(void)
         */
        return cpumask_of_cpu(0);
 }
-#define TARGET_CPUS    (target_cpus())
 
 #define INT_DELIVERY_MODE (dest_LowestPrio)
 #define INT_DEST_MODE 1     /* logical delivery broadcast to all procs */
diff --git a/include/asm-x86/summit/irq_vectors_limits.h b/include/asm-x86/summit/irq_vectors_limits.h
deleted file mode 100644 (file)
index 890ce3f..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _ASM_IRQ_VECTORS_LIMITS_H
-#define _ASM_IRQ_VECTORS_LIMITS_H
-
-/*
- * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
- * even with uni-proc kernels, so use a big array.
- *
- * This value should be the same in both the generic and summit subarches.
- * Change one, change 'em both.
- */
-#define NR_IRQS        224
-#define NR_IRQ_VECTORS 1024
-
-#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
index 7cd6d7ec1308fe523811d462865951a8667d7ab0..215f1969c266c55b72722c64efc971ab56a49b5b 100644 (file)
@@ -2,9 +2,7 @@
 #define ASM_X86__UV__BIOS_H
 
 /*
- * BIOS layer definitions.
- *
- *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ * UV BIOS layer definitions.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *  Copyright (c) Russ Anderson
  */
 
 #include <linux/rtc.h>
 
-#define BIOS_FREQ_BASE                 0x01000001
+/*
+ * Values for the BIOS calls.  It is passed as the first * argument in the
+ * BIOS call.  Passing any other value in the first argument will result
+ * in a BIOS_STATUS_UNIMPLEMENTED return status.
+ */
+enum uv_bios_cmd {
+       UV_BIOS_COMMON,
+       UV_BIOS_GET_SN_INFO,
+       UV_BIOS_FREQ_BASE
+};
 
+/*
+ * Status values returned from a BIOS call.
+ */
 enum {
-       BIOS_FREQ_BASE_PLATFORM = 0,
-       BIOS_FREQ_BASE_INTERVAL_TIMER = 1,
-       BIOS_FREQ_BASE_REALTIME_CLOCK = 2
+       BIOS_STATUS_SUCCESS             =  0,
+       BIOS_STATUS_UNIMPLEMENTED       = -ENOSYS,
+       BIOS_STATUS_EINVAL              = -EINVAL,
+       BIOS_STATUS_UNAVAIL             = -EBUSY
 };
 
-# define BIOS_CALL(result, a0, a1, a2, a3, a4, a5, a6, a7)             \
-       do {                                                            \
-               /* XXX - the real call goes here */                     \
-               result.status = BIOS_STATUS_UNIMPLEMENTED;              \
-               isrv.v0 = 0;                                            \
-               isrv.v1 = 0;                                            \
-       } while (0)
+/*
+ * The UV system table describes specific firmware
+ * capabilities available to the Linux kernel at runtime.
+ */
+struct uv_systab {
+       char signature[4];      /* must be "UVST" */
+       u32 revision;           /* distinguish different firmware revs */
+       u64 function;           /* BIOS runtime callback function ptr */
+};
 
 enum {
-       BIOS_STATUS_SUCCESS             =  0,
-       BIOS_STATUS_UNIMPLEMENTED       = -1,
-       BIOS_STATUS_EINVAL              = -2,
-       BIOS_STATUS_ERROR               = -3
+       BIOS_FREQ_BASE_PLATFORM = 0,
+       BIOS_FREQ_BASE_INTERVAL_TIMER = 1,
+       BIOS_FREQ_BASE_REALTIME_CLOCK = 2
 };
 
-struct uv_bios_retval {
-       /*
-        * A zero status value indicates call completed without error.
-        * A negative status value indicates reason of call failure.
-        * A positive status value indicates success but an
-        * informational value should be printed (e.g., "reboot for
-        * change to take effect").
-        */
-       s64 status;
-       u64 v0;
-       u64 v1;
-       u64 v2;
+union partition_info_u {
+       u64     val;
+       struct {
+               u64     hub_version     :  8,
+                       partition_id    : 16,
+                       coherence_id    : 16,
+                       region_size     : 24;
+       };
 };
 
-extern long
-x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
-                  unsigned long *drift_info);
-extern const char *x86_bios_strerror(long status);
+/*
+ * bios calls have 6 parameters
+ */
+extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
+extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
+extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
+
+extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *);
+extern s64 uv_bios_freq_base(u64, u64 *);
+
+extern void uv_bios_init(void);
+
+extern int uv_type;
+extern long sn_partition_id;
+extern long uv_coherency_id;
+extern long uv_region_size;
+#define partition_coherence_id()       (uv_coherency_id)
+
+extern struct kobject *sgi_uv_kobj;    /* /sys/firmware/sgi_uv */
 
 #endif /* ASM_X86__UV__BIOS_H */
diff --git a/include/asm-x86/uv/uv_irq.h b/include/asm-x86/uv/uv_irq.h
new file mode 100644 (file)
index 0000000..8bf5f32
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV IRQ definitions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef ASM_X86__UV__UV_IRQ_H
+#define ASM_X86__UV__UV_IRQ_H
+
+/* If a generic version of this structure gets defined, eliminate this one. */
+struct uv_IO_APIC_route_entry {
+       __u64   vector          :  8,
+               delivery_mode   :  3,
+               dest_mode       :  1,
+               delivery_status :  1,
+               polarity        :  1,
+               __reserved_1    :  1,
+               trigger         :  1,
+               mask            :  1,
+               __reserved_2    : 15,
+               dest            : 32;
+};
+
+extern struct irq_chip uv_irq_chip;
+
+extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long);
+extern void arch_disable_uv_irq(int, unsigned long);
+
+extern int uv_setup_irq(char *, int, int, unsigned long);
+extern void uv_teardown_irq(unsigned int, int, unsigned long);
+
+#endif /* ASM_X86__UV__UV_IRQ_H */
index f2518141de88fbc8f89ababec557f37cd32a86fc..f7df1eefc1071ac71a985650343ec1ca2bfbf9ac 100644 (file)
@@ -10,7 +10,6 @@
 #if defined(CONFIG_PCIEAER)
 /* pci-e port driver needs this function to enable aer */
 extern int pci_enable_pcie_error_reporting(struct pci_dev *dev);
-extern int pci_find_aer_capability(struct pci_dev *dev);
 extern int pci_disable_pcie_error_reporting(struct pci_dev *dev);
 extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
 #else
@@ -18,10 +17,6 @@ static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 {
        return -EINVAL;
 }
-static inline int pci_find_aer_capability(struct pci_dev *dev)
-{
-       return 0;
-}
 static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev)
 {
        return -EINVAL;
index 55e434feec993d9656ccf6bcd31964005daa9667..f88d32f8ff7c8a0d242763328c8e14f39445006d 100644 (file)
@@ -45,7 +45,8 @@ struct clocksource;
  * @read:              returns a cycle value
  * @mask:              bitmask for two's complement
  *                     subtraction of non 64 bit counters
- * @mult:              cycle to nanosecond multiplier
+ * @mult:              cycle to nanosecond multiplier (adjusted by NTP)
+ * @mult_orig:         cycle to nanosecond multiplier (unadjusted by NTP)
  * @shift:             cycle to nanosecond divisor (power of two)
  * @flags:             flags describing special properties
  * @vread:             vsyscall based read
@@ -63,6 +64,7 @@ struct clocksource {
        cycle_t (*read)(void);
        cycle_t mask;
        u32 mult;
+       u32 mult_orig;
        u32 shift;
        unsigned long flags;
        cycle_t (*vread)(void);
@@ -77,6 +79,7 @@ struct clocksource {
        /* timekeeping specific data, ignore */
        cycle_t cycle_interval;
        u64     xtime_interval;
+       u32     raw_interval;
        /*
         * Second part is written at each timer interrupt
         * Keep it in a different cache line to dirty no
@@ -85,6 +88,7 @@ struct clocksource {
        cycle_t cycle_last ____cacheline_aligned_in_smp;
        u64 xtime_nsec;
        s64 error;
+       struct timespec raw_time;
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
        /* Watchdog related data, used by the framework */
@@ -201,17 +205,19 @@ static inline void clocksource_calculate_interval(struct clocksource *c,
 {
        u64 tmp;
 
-       /* XXX - All of this could use a whole lot of optimization */
+       /* Do the ns -> cycle conversion first, using original mult */
        tmp = length_nsec;
        tmp <<= c->shift;
-       tmp += c->mult/2;
-       do_div(tmp, c->mult);
+       tmp += c->mult_orig/2;
+       do_div(tmp, c->mult_orig);
 
        c->cycle_interval = (cycle_t)tmp;
        if (c->cycle_interval == 0)
                c->cycle_interval = 1;
 
+       /* Go back from cycles -> shifted ns, this time use ntp adjused mult */
        c->xtime_interval = (u64)c->cycle_interval * c->mult;
+       c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift;
 }
 
 
index 8322141ee480c802ee6919d0f13a86218e45bfc4..98115d9d04daa6c8008b528bee3014a8cee11078 100644 (file)
@@ -44,6 +44,8 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # error Sorry, your compiler is too old/not recognized.
 #endif
 
+#define notrace __attribute__((no_instrument_function))
+
 /* Intel compiler defines __GNUC__. So we will overwrite implementations
  * coming from above header files here
  */
index c360c558e59eb5b58952955e3172658ed86f8848..f1984fc3e06d54e21646c79c591169c5a3f84aeb 100644 (file)
@@ -45,7 +45,6 @@ extern struct list_head dmar_drhd_units;
        list_for_each_entry(drhd, &dmar_drhd_units, list)
 
 extern int dmar_table_init(void);
-extern int early_dmar_detect(void);
 extern int dmar_dev_scope_init(void);
 
 /* Intel IOMMU detection */
index 807373d467f7485a03dfa1417f16a62097108e17..bb66feb164bd86596b527cce7398070343b8e26f 100644 (file)
@@ -208,6 +208,9 @@ typedef efi_status_t efi_set_virtual_address_map_t (unsigned long memory_map_siz
 #define EFI_GLOBAL_VARIABLE_GUID \
     EFI_GUID(  0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c )
 
+#define UV_SYSTEM_TABLE_GUID \
+    EFI_GUID(  0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 )
+
 typedef struct {
        efi_guid_t guid;
        unsigned long table;
@@ -255,6 +258,7 @@ extern struct efi {
        unsigned long boot_info;        /* boot info table */
        unsigned long hcdp;             /* HCDP table */
        unsigned long uga;              /* UGA table */
+       unsigned long uv_systab;        /* UV system table */
        efi_get_time_t *get_time;
        efi_set_time_t *set_time;
        efi_get_wakeup_time_t *get_wakeup_time;
index bb384068272e1ef5ef9f4aa4a04572e48dcbfca5..a3d46151be195cec1d701b7058ec98662860e65b 100644 (file)
@@ -1,10 +1,14 @@
 #ifndef _LINUX_FTRACE_H
 #define _LINUX_FTRACE_H
 
-#ifdef CONFIG_FTRACE
-
 #include <linux/linkage.h>
 #include <linux/fs.h>
+#include <linux/ktime.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kallsyms.h>
+
+#ifdef CONFIG_FTRACE
 
 extern int ftrace_enabled;
 extern int
@@ -36,6 +40,7 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1);
 # define register_ftrace_function(ops) do { } while (0)
 # define unregister_ftrace_function(ops) do { } while (0)
 # define clear_ftrace_function(ops) do { } while (0)
+static inline void ftrace_kill_atomic(void) { }
 #endif /* CONFIG_FTRACE */
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -76,8 +81,10 @@ extern void mcount_call(void);
 
 extern int skip_trace(unsigned long ip);
 
-void ftrace_disable_daemon(void);
-void ftrace_enable_daemon(void);
+extern void ftrace_release(void *start, unsigned long size);
+
+extern void ftrace_disable_daemon(void);
+extern void ftrace_enable_daemon(void);
 
 #else
 # define skip_trace(ip)                                ({ 0; })
@@ -85,6 +92,7 @@ void ftrace_enable_daemon(void);
 # define ftrace_set_filter(buf, len, reset)    do { } while (0)
 # define ftrace_disable_daemon()               do { } while (0)
 # define ftrace_enable_daemon()                        do { } while (0)
+static inline void ftrace_release(void *start, unsigned long size) { }
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 /* totally disable ftrace - can not re-enable after this */
@@ -98,9 +106,11 @@ static inline void tracer_disable(void)
 #endif
 }
 
-/* Ftrace disable/restore without lock. Some synchronization mechanism
+/*
+ * Ftrace disable/restore without lock. Some synchronization mechanism
  * must be used to prevent ftrace_enabled to be changed between
- * disable/restore. */
+ * disable/restore.
+ */
 static inline int __ftrace_enabled_save(void)
 {
 #ifdef CONFIG_FTRACE
@@ -157,9 +167,71 @@ static inline void __ftrace_enabled_restore(int enabled)
 #ifdef CONFIG_TRACING
 extern void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
+
+/**
+ * ftrace_printk - printf formatting in the ftrace buffer
+ * @fmt: the printf format for printing
+ *
+ * Note: __ftrace_printk is an internal function for ftrace_printk and
+ *       the @ip is passed in via the ftrace_printk macro.
+ *
+ * This function allows a kernel developer to debug fast path sections
+ * that printk is not appropriate for. By scattering in various
+ * printk like tracing in the code, a developer can quickly see
+ * where problems are occurring.
+ *
+ * This is intended as a debugging tool for the developer only.
+ * Please refrain from leaving ftrace_printks scattered around in
+ * your code.
+ */
+# define ftrace_printk(fmt...) __ftrace_printk(_THIS_IP_, fmt)
+extern int
+__ftrace_printk(unsigned long ip, const char *fmt, ...)
+       __attribute__ ((format (printf, 2, 3)));
+extern void ftrace_dump(void);
 #else
 static inline void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
+static inline int
+ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)));
+
+static inline int
+ftrace_printk(const char *fmt, ...)
+{
+       return 0;
+}
+static inline void ftrace_dump(void) { }
 #endif
 
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+extern void ftrace_init(void);
+extern void ftrace_init_module(unsigned long *start, unsigned long *end);
+#else
+static inline void ftrace_init(void) { }
+static inline void
+ftrace_init_module(unsigned long *start, unsigned long *end) { }
+#endif
+
+
+struct boot_trace {
+       pid_t                   caller;
+       char                    func[KSYM_NAME_LEN];
+       int                     result;
+       unsigned long long      duration;               /* usecs */
+       ktime_t                 calltime;
+       ktime_t                 rettime;
+};
+
+#ifdef CONFIG_BOOT_TRACER
+extern void trace_boot(struct boot_trace *it, initcall_t fn);
+extern void start_boot_trace(void);
+extern void stop_boot_trace(void);
+#else
+static inline void trace_boot(struct boot_trace *it, initcall_t fn) { }
+static inline void start_boot_trace(void) { }
+static inline void stop_boot_trace(void) { }
+#endif
+
+
+
 #endif /* _LINUX_FTRACE_H */
index 265635dc990812a319dcd632131e8c9f79051f1d..350fe9767bbc77d755718afe43513e6c8513a4e8 100644 (file)
  *  - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in
  *  - add blksize field to fuse_attr
  *  - add file flags field to fuse_read_in and fuse_write_in
+ *
+ * 7.10
+ *  - add nonseekable open flag
  */
 
+#ifndef _LINUX_FUSE_H
+#define _LINUX_FUSE_H
+
 #include <asm/types.h>
 #include <linux/major.h>
 
@@ -26,7 +32,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 9
+#define FUSE_KERNEL_MINOR_VERSION 10
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -98,9 +104,11 @@ struct fuse_file_lock {
  *
  * FOPEN_DIRECT_IO: bypass page cache for this open file
  * FOPEN_KEEP_CACHE: don't invalidate the data cache on open
+ * FOPEN_NONSEEKABLE: the file is not seekable
  */
 #define FOPEN_DIRECT_IO                (1 << 0)
 #define FOPEN_KEEP_CACHE       (1 << 1)
+#define FOPEN_NONSEEKABLE      (1 << 2)
 
 /**
  * INIT request/reply flags
@@ -409,3 +417,5 @@ struct fuse_dirent {
 #define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1))
 #define FUSE_DIRENT_SIZE(d) \
        FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+
+#endif /* _LINUX_FUSE_H */
index 2f245fe63bda5611ad909c1452aa8a79c4f29eb4..9a4e35cd5f79d80e52bf3a1509e0438bb6a77ec4 100644 (file)
@@ -125,12 +125,12 @@ struct hrtimer {
        enum hrtimer_restart            (*function)(struct hrtimer *);
        struct hrtimer_clock_base       *base;
        unsigned long                   state;
-       enum hrtimer_cb_mode            cb_mode;
        struct list_head                cb_entry;
+       enum hrtimer_cb_mode            cb_mode;
 #ifdef CONFIG_TIMER_STATS
+       int                             start_pid;
        void                            *start_site;
        char                            start_comm[16];
-       int                             start_pid;
 #endif
 };
 
@@ -155,10 +155,8 @@ struct hrtimer_sleeper {
  * @first:             pointer to the timer node which expires first
  * @resolution:                the resolution of the clock, in nanoseconds
  * @get_time:          function to retrieve the current time of the clock
- * @get_softirq_time:  function to retrieve the current time from the softirq
  * @softirq_time:      the time when running the hrtimer queue in the softirq
  * @offset:            offset of this clock to the monotonic base
- * @reprogram:         function to reprogram the timer event
  */
 struct hrtimer_clock_base {
        struct hrtimer_cpu_base *cpu_base;
@@ -167,13 +165,9 @@ struct hrtimer_clock_base {
        struct rb_node          *first;
        ktime_t                 resolution;
        ktime_t                 (*get_time)(void);
-       ktime_t                 (*get_softirq_time)(void);
        ktime_t                 softirq_time;
 #ifdef CONFIG_HIGH_RES_TIMERS
        ktime_t                 offset;
-       int                     (*reprogram)(struct hrtimer *t,
-                                            struct hrtimer_clock_base *b,
-                                            ktime_t n);
 #endif
 };
 
index c47e371554c1ccb122945ab087eb4c9914480010..89e53cfbc787fdcb60635764ce9bae5d48239287 100644 (file)
@@ -461,12 +461,26 @@ struct ide_acpi_drive_link;
 struct ide_acpi_hwif_link;
 #endif
 
+struct ide_drive_s;
+
+struct ide_disk_ops {
+       int             (*check)(struct ide_drive_s *, const char *);
+       int             (*get_capacity)(struct ide_drive_s *);
+       void            (*setup)(struct ide_drive_s *);
+       void            (*flush)(struct ide_drive_s *);
+       int             (*init_media)(struct ide_drive_s *, struct gendisk *);
+       int             (*set_doorlock)(struct ide_drive_s *, struct gendisk *,
+                                       int);
+       ide_startstop_t (*do_request)(struct ide_drive_s *, struct request *,
+                                     sector_t);
+       int             (*end_request)(struct ide_drive_s *, int, int);
+       int             (*ioctl)(struct ide_drive_s *, struct inode *,
+                                struct file *, unsigned int, unsigned long);
+};
+
 /* ATAPI device flags */
 enum {
        IDE_AFLAG_DRQ_INTERRUPT         = (1 << 0),
-       IDE_AFLAG_MEDIA_CHANGED         = (1 << 1),
-       /* Drive cannot lock the door. */
-       IDE_AFLAG_NO_DOORLOCK           = (1 << 2),
 
        /* ide-cd */
        /* Drive cannot eject the disc. */
@@ -498,14 +512,10 @@ enum {
        IDE_AFLAG_LE_SPEED_FIELDS       = (1 << 17),
 
        /* ide-floppy */
-       /* Format in progress */
-       IDE_AFLAG_FORMAT_IN_PROGRESS    = (1 << 18),
        /* Avoid commands not supported in Clik drive */
        IDE_AFLAG_CLIK_DRIVE            = (1 << 19),
        /* Requires BH algorithm for packets */
        IDE_AFLAG_ZIP_DRIVE             = (1 << 20),
-       /* Write protect */
-       IDE_AFLAG_WP                    = (1 << 21),
        /* Supports format progress report */
        IDE_AFLAG_SRFP                  = (1 << 22),
 
@@ -578,7 +588,11 @@ enum {
        /* don't unload heads */
        IDE_DFLAG_NO_UNLOAD             = (1 << 27),
        /* heads unloaded, please don't reset port */
-       IDE_DFLAG_PARKED                = (1 << 28)
+       IDE_DFLAG_PARKED                = (1 << 28),
+       IDE_DFLAG_MEDIA_CHANGED         = (1 << 29),
+       /* write protect */
+       IDE_DFLAG_WP                    = (1 << 30),
+       IDE_DFLAG_FORMAT_IN_PROGRESS    = (1 << 31),
 };
 
 struct ide_drive_s {
@@ -597,6 +611,8 @@ struct ide_drive_s {
 #endif
        struct hwif_s           *hwif;  /* actually (ide_hwif_t *) */
 
+       const struct ide_disk_ops *disk_ops;
+
        unsigned long dev_flags;
 
        unsigned long sleep;            /* sleep until this time */
@@ -1123,8 +1139,8 @@ struct ide_driver_s {
        void            (*resume)(ide_drive_t *);
        void            (*shutdown)(ide_drive_t *);
 #ifdef CONFIG_IDE_PROC_FS
-       ide_proc_entry_t                *proc;
-       const struct ide_proc_devset    *settings;
+       ide_proc_entry_t *              (*proc_entries)(ide_drive_t *);
+       const struct ide_proc_devset *  (*proc_devsets)(ide_drive_t *);
 #endif
 };
 
index ad63824460e34d2838d0a77833f21121814251cd..0c1264668be0ccb32ffa98a304a706dd1009b6f3 100644 (file)
@@ -40,7 +40,7 @@
 
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
-#define __init         __section(.init.text) __cold
+#define __init         __section(.init.text) __cold notrace
 #define __initdata     __section(.init.data)
 #define __initconst    __section(.init.rodata)
 #define __exitdata     __section(.exit.data)
index 35a61dc60d51ac9878ea9ba6fad94b579403f652..f58a0cf8929a81fb14025ab8683ab32bf8ab539c 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/preempt.h>
 #include <linux/cpumask.h>
 #include <linux/irqreturn.h>
+#include <linux/irqnr.h>
 #include <linux/hardirq.h>
 #include <linux/sched.h>
 #include <linux/irqflags.h>
index 8d9411bc60f6f9356e0237cc601330576283d22a..d058c57be02d3508e27382e63980bfb593658649 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/spinlock.h>
 #include <linux/cpumask.h>
 #include <linux/irqreturn.h>
+#include <linux/irqnr.h>
 #include <linux/errno.h>
 
 #include <asm/irq.h>
@@ -152,6 +153,7 @@ struct irq_chip {
  * @name:              flow handler name for /proc/interrupts output
  */
 struct irq_desc {
+       unsigned int            irq;
        irq_flow_handler_t      handle_irq;
        struct irq_chip         *chip;
        struct msi_desc         *msi_desc;
@@ -170,7 +172,7 @@ struct irq_desc {
        cpumask_t               affinity;
        unsigned int            cpu;
 #endif
-#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
+#ifdef CONFIG_GENERIC_PENDING_IRQ
        cpumask_t               pending_mask;
 #endif
 #ifdef CONFIG_PROC_FS
@@ -179,8 +181,14 @@ struct irq_desc {
        const char              *name;
 } ____cacheline_internodealigned_in_smp;
 
+
 extern struct irq_desc irq_desc[NR_IRQS];
 
+static inline struct irq_desc *irq_to_desc(unsigned int irq)
+{
+       return (irq < nr_irqs) ? irq_desc + irq : NULL;
+}
+
 /*
  * Migration helpers for obsolete names, they will go away:
  */
@@ -198,19 +206,15 @@ extern int setup_irq(unsigned int irq, struct irqaction *new);
 
 #ifdef CONFIG_GENERIC_HARDIRQS
 
-#ifndef handle_dynamic_tick
-# define handle_dynamic_tick(a)                do { } while (0)
-#endif
-
 #ifdef CONFIG_SMP
 
-#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
+#ifdef CONFIG_GENERIC_PENDING_IRQ
 
 void set_pending_irq(unsigned int irq, cpumask_t mask);
 void move_native_irq(int irq);
 void move_masked_irq(int irq);
 
-#else /* CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE */
+#else /* CONFIG_GENERIC_PENDING_IRQ */
 
 static inline void move_irq(int irq)
 {
@@ -237,19 +241,14 @@ static inline void set_pending_irq(unsigned int irq, cpumask_t mask)
 
 #endif /* CONFIG_SMP */
 
-#ifdef CONFIG_IRQBALANCE
-extern void set_balance_irq_affinity(unsigned int irq, cpumask_t mask);
-#else
-static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
-{
-}
-#endif
-
 extern int no_irq_affinity;
 
 static inline int irq_balancing_disabled(unsigned int irq)
 {
-       return irq_desc[irq].status & IRQ_NO_BALANCING_MASK;
+       struct irq_desc *desc;
+
+       desc = irq_to_desc(irq);
+       return desc->status & IRQ_NO_BALANCING_MASK;
 }
 
 /* Handle irq action chains: */
@@ -279,10 +278,8 @@ extern unsigned int __do_IRQ(unsigned int irq);
  * irqchip-style controller then we call the ->handle_irq() handler,
  * and it calls __do_IRQ() if it's attached to an irqtype-style controller.
  */
-static inline void generic_handle_irq(unsigned int irq)
+static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc)
 {
-       struct irq_desc *desc = irq_desc + irq;
-
 #ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
        desc->handle_irq(irq, desc);
 #else
@@ -293,6 +290,11 @@ static inline void generic_handle_irq(unsigned int irq)
 #endif
 }
 
+static inline void generic_handle_irq(unsigned int irq)
+{
+       generic_handle_irq_desc(irq, irq_to_desc(irq));
+}
+
 /* Handling of unhandled and spurious interrupts: */
 extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
                           int action_ret);
@@ -325,7 +327,10 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 static inline void __set_irq_handler_unlocked(int irq,
                                              irq_flow_handler_t handler)
 {
-       irq_desc[irq].handle_irq = handler;
+       struct irq_desc *desc;
+
+       desc = irq_to_desc(irq);
+       desc->handle_irq = handler;
 }
 
 /*
@@ -353,13 +358,14 @@ extern void set_irq_noprobe(unsigned int irq);
 extern void set_irq_probe(unsigned int irq);
 
 /* Handle dynamic irq creation and destruction */
+extern unsigned int create_irq_nr(unsigned int irq_want);
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
 
 /* Test to see if a driver has successfully requested an irq */
 static inline int irq_has_action(unsigned int irq)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
        return desc->action != NULL;
 }
 
@@ -374,10 +380,10 @@ extern int set_irq_chip_data(unsigned int irq, void *data);
 extern int set_irq_type(unsigned int irq, unsigned int type);
 extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 
-#define get_irq_chip(irq)      (irq_desc[irq].chip)
-#define get_irq_chip_data(irq) (irq_desc[irq].chip_data)
-#define get_irq_data(irq)      (irq_desc[irq].handler_data)
-#define get_irq_msi(irq)       (irq_desc[irq].msi_desc)
+#define get_irq_chip(irq)      (irq_to_desc(irq)->chip)
+#define get_irq_chip_data(irq) (irq_to_desc(irq)->chip_data)
+#define get_irq_data(irq)      (irq_to_desc(irq)->handler_data)
+#define get_irq_msi(irq)       (irq_to_desc(irq)->msi_desc)
 
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
new file mode 100644 (file)
index 0000000..3171ddc
--- /dev/null
@@ -0,0 +1,24 @@
+#ifndef _LINUX_IRQNR_H
+#define _LINUX_IRQNR_H
+
+#ifndef CONFIG_GENERIC_HARDIRQS
+#include <asm/irq.h>
+# define nr_irqs               NR_IRQS
+
+# define for_each_irq_desc(irq, desc)          \
+       for (irq = 0; irq < nr_irqs; irq++)
+#else
+extern int nr_irqs;
+
+# define for_each_irq_desc(irq, desc)          \
+       for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
+
+# define for_each_irq_desc_reverse(irq, desc)                  \
+       for (irq = nr_irqs -1, desc = irq_desc + (nr_irqs -1 ); \
+            irq > 0; irq--, desc--)
+#endif
+
+#define for_each_irq_nr(irq)                   \
+       for (irq = 0; irq < nr_irqs; irq++)
+
+#endif
index 5a566b705ca9c4afe9720fc5f4c665617352f627..94d17ff64c5a3b48c0f6716d29b9832d6a896551 100644 (file)
@@ -496,4 +496,9 @@ struct sysinfo {
 #define NUMA_BUILD 0
 #endif
 
+/* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+# define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD
+#endif
+
 #endif
index cf9f40a91c9c79837753956b135e49c9ec984630..4a145caeee075d3209fa4e0d324dfed7f9a15fe7 100644 (file)
@@ -39,19 +39,34 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
 
 extern unsigned long long nr_context_switches(void);
 
+struct irq_desc;
+
+static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
+                                           struct irq_desc *desc)
+{
+       kstat_this_cpu.irqs[irq]++;
+}
+
+static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
+{
+       return kstat_cpu(cpu).irqs[irq];
+}
+
 /*
  * Number of interrupts per specific IRQ source, since bootup
  */
-static inline int kstat_irqs(int irq)
+static inline unsigned int kstat_irqs(unsigned int irq)
 {
-       int cpu, sum = 0;
+       unsigned int sum = 0;
+       int cpu;
 
        for_each_possible_cpu(cpu)
-               sum += kstat_cpu(cpu).irqs[irq];
+               sum += kstat_irqs_cpu(irq, cpu);
 
        return sum;
 }
 
+extern unsigned long long task_delta_exec(struct task_struct *);
 extern void account_user_time(struct task_struct *, cputime_t);
 extern void account_user_time_scaled(struct task_struct *, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t);
index 0be7795655fab4279d3b9b4a6fa110b01a4fc070..497b1d1f7a05421ed88c66496848715e9b84811f 100644 (file)
@@ -29,6 +29,7 @@
  *             <jkenisto@us.ibm.com>  and Prasanna S Panchamukhi
  *             <prasanna@in.ibm.com> added function-return probes.
  */
+#include <linux/linkage.h>
 #include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/smp.h>
@@ -47,7 +48,7 @@
 #define KPROBE_HIT_SSDONE      0x00000008
 
 /* Attach to insert probes on any functions which should be ignored*/
-#define __kprobes      __attribute__((__section__(".kprobes.text")))
+#define __kprobes      __attribute__((__section__(".kprobes.text"))) notrace
 
 struct kprobe;
 struct pt_regs;
@@ -256,7 +257,7 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
 
 #else /* CONFIG_KPROBES */
 
-#define __kprobes      /**/
+#define __kprobes      notrace
 struct jprobe;
 struct kretprobe;
 
index 56ba37394656c7f211eaebdadf5481eda844acca..9fd1f859021b5018baffdb440261379dab4a55b3 100644 (file)
@@ -4,8 +4,6 @@
 #include <linux/compiler.h>
 #include <asm/linkage.h>
 
-#define notrace __attribute__((no_instrument_function))
-
 #ifdef __cplusplus
 #define CPP_ASMLINKAGE extern "C"
 #else
index 1290653f924181333f6699d37b161238370b1515..889196c7fbb1e77cc5b4561e2b0b7f937b864434 100644 (file)
@@ -160,4 +160,11 @@ extern int marker_probe_unregister_private_data(marker_probe_func *probe,
 extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
        int num);
 
+/*
+ * marker_synchronize_unregister must be called between the last marker probe
+ * unregistration and the end of module exit to make sure there is no caller
+ * executing a probe when it is freed.
+ */
+#define marker_synchronize_unregister() synchronize_sched()
+
 #endif
index 61d19e1b7a0b0684ec6799b1d2351e167f10a1ae..139d7c88d9c91ddebe065b1898916f17d2dc27f1 100644 (file)
@@ -34,11 +34,15 @@ extern void unregister_kmmio_probe(struct kmmio_probe *p);
 /* Called from page fault handler. */
 extern int kmmio_handler(struct pt_regs *regs, unsigned long addr);
 
-/* Called from ioremap.c */
 #ifdef CONFIG_MMIOTRACE
+/* Called from ioremap.c */
 extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
                                                        void __iomem *addr);
 extern void mmiotrace_iounmap(volatile void __iomem *addr);
+
+/* For anyone to insert markers. Remember trailing newline. */
+extern int mmiotrace_printk(const char *fmt, ...)
+                               __attribute__ ((format (printf, 1, 2)));
 #else
 static inline void mmiotrace_ioremap(resource_size_t offset,
                                        unsigned long size, void __iomem *addr)
@@ -48,15 +52,22 @@ static inline void mmiotrace_ioremap(resource_size_t offset,
 static inline void mmiotrace_iounmap(volatile void __iomem *addr)
 {
 }
-#endif /* CONFIG_MMIOTRACE_HOOKS */
+
+static inline int mmiotrace_printk(const char *fmt, ...)
+                               __attribute__ ((format (printf, 1, 0)));
+
+static inline int mmiotrace_printk(const char *fmt, ...)
+{
+       return 0;
+}
+#endif /* CONFIG_MMIOTRACE */
 
 enum mm_io_opcode {
        MMIO_READ = 0x1,     /* struct mmiotrace_rw */
        MMIO_WRITE = 0x2,    /* struct mmiotrace_rw */
        MMIO_PROBE = 0x3,    /* struct mmiotrace_map */
        MMIO_UNPROBE = 0x4,  /* struct mmiotrace_map */
-       MMIO_MARKER = 0x5,   /* raw char data */
-       MMIO_UNKNOWN_OP = 0x6, /* struct mmiotrace_rw */
+       MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */
 };
 
 struct mmiotrace_rw {
@@ -81,5 +92,6 @@ extern void enable_mmiotrace(void);
 extern void disable_mmiotrace(void);
 extern void mmio_trace_rw(struct mmiotrace_rw *rw);
 extern void mmio_trace_mapping(struct mmiotrace_map *map);
+extern int mmio_trace_printk(const char *fmt, va_list args);
 
 #endif /* MMIOTRACE_H */
index a41555cbe00ae071a02dab3c88b3c57bf6c38d4b..5d2970cdce932733237921b1d9ec4a347ff287ea 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
 #include <linux/marker.h>
+#include <linux/tracepoint.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -331,6 +332,10 @@ struct module
        struct marker *markers;
        unsigned int num_markers;
 #endif
+#ifdef CONFIG_TRACEPOINTS
+       struct tracepoint *tracepoints;
+       unsigned int num_tracepoints;
+#endif
 
 #ifdef CONFIG_MODULE_UNLOAD
        /* What modules depend on me? */
@@ -453,6 +458,9 @@ extern void print_modules(void);
 
 extern void module_update_markers(void);
 
+extern void module_update_tracepoints(void);
+extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
+
 #else /* !CONFIG_MODULES... */
 #define EXPORT_SYMBOL(sym)
 #define EXPORT_SYMBOL_GPL(sym)
@@ -557,6 +565,15 @@ static inline void module_update_markers(void)
 {
 }
 
+static inline void module_update_tracepoints(void)
+{
+}
+
+static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+       return 0;
+}
+
 #endif /* CONFIG_MODULES */
 
 struct device_driver;
index acf8f24037cd0cd261772e23cbf8d86386318f2d..085187be29c76953fd5258438d60575b6d8372c3 100644 (file)
@@ -214,6 +214,7 @@ struct pci_dev {
        unsigned int    broken_parity_status:1; /* Device generates false positive parity */
        unsigned int    msi_enabled:1;
        unsigned int    msix_enabled:1;
+       unsigned int    ari_enabled:1;  /* ARI forwarding */
        unsigned int    is_managed:1;
        unsigned int    is_pcie:1;
        pci_dev_flags_t dev_flags;
@@ -347,7 +348,6 @@ struct pci_bus_region {
 struct pci_dynids {
        spinlock_t lock;            /* protects list, index */
        struct list_head list;      /* for IDs added at runtime */
-       unsigned int use_driver_data:1; /* pci_device_id->driver_data is used */
 };
 
 /* ---------------------------------------------------------------- */
@@ -456,8 +456,8 @@ struct pci_driver {
 
 /**
  * PCI_VDEVICE - macro used to describe a specific pci device in short form
- * @vend: the vendor name
- * @dev: the 16 bit PCI Device ID
+ * @vendor: the vendor name
+ * @device: the 16 bit PCI Device ID
  *
  * This macro is used to create a struct pci_device_id that matches a
  * specific PCI device.  The subvendor, and subdevice fields will be set
@@ -645,6 +645,7 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
 bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
 void pci_pme_active(struct pci_dev *dev, bool enable);
 int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable);
+int pci_wake_from_d3(struct pci_dev *dev, bool enable);
 pci_power_t pci_target_state(struct pci_dev *dev);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
@@ -725,7 +726,7 @@ enum pci_dma_burst_strategy {
 };
 
 struct msix_entry {
-       u16     vector; /* kernel uses to write allocated vector */
+       u32     vector; /* kernel uses to write allocated vector */
        u16     entry;  /* driver uses to specify entry, OS writes */
 };
 
@@ -1118,5 +1119,20 @@ static inline void pci_mmcfg_early_init(void) { }
 static inline void pci_mmcfg_late_init(void) { }
 #endif
 
+#ifdef CONFIG_HAS_IOMEM
+static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar)
+{
+       /*
+        * Make sure the BAR is actually a memory resource, not an IO resource
+        */
+       if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
+               WARN_ON(1);
+               return NULL;
+       }
+       return ioremap_nocache(pci_resource_start(pdev, bar),
+                                    pci_resource_len(pdev, bar));
+}
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
index 8edddc240e4f61c133308d9af0af7da179b9c56b..e5d344bfcb7e584d91c6f8fc553900799ea9bbad 100644 (file)
 #define PCI_DEVICE_ID_INTEL_ICH10_3    0x3a1a
 #define PCI_DEVICE_ID_INTEL_ICH10_4    0x3a30
 #define PCI_DEVICE_ID_INTEL_ICH10_5    0x3a60
-#define PCI_DEVICE_ID_INTEL_PCH_0      0x3b10
-#define PCI_DEVICE_ID_INTEL_PCH_1      0x3b11
-#define PCI_DEVICE_ID_INTEL_PCH_2      0x3b30
+#define PCI_DEVICE_ID_INTEL_PCH_LPC_MIN        0x3b00
+#define PCI_DEVICE_ID_INTEL_PCH_LPC_MAX        0x3b1f
+#define PCI_DEVICE_ID_INTEL_PCH_SMBUS  0x3b30
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB   0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16    0x65f0
 #define PCI_DEVICE_ID_INTEL_5100_21    0x65f5
index 450684f7eaacd41e08d57b08b0698dadffab1281..eb6686b88f9aad52ab3afafef38a4b74b1945304 100644 (file)
 #define  PCI_EXP_RTCTL_CRSSVE  0x10    /* CRS Software Visibility Enable */
 #define PCI_EXP_RTCAP          30      /* Root Capabilities */
 #define PCI_EXP_RTSTA          32      /* Root Status */
+#define PCI_EXP_DEVCAP2                36      /* Device Capabilities 2 */
+#define  PCI_EXP_DEVCAP2_ARI   0x20    /* Alternative Routing-ID */
+#define PCI_EXP_DEVCTL2                40      /* Device Control 2 */
+#define  PCI_EXP_DEVCTL2_ARI   0x20    /* Alternative Routing-ID */
 
 /* Extended Capabilities (PCI-X 2.0 and Express) */
 #define PCI_EXT_CAP_ID(header)         (header & 0x0000ffff)
 #define PCI_EXT_CAP_ID_VC      2
 #define PCI_EXT_CAP_ID_DSN     3
 #define PCI_EXT_CAP_ID_PWR     4
+#define PCI_EXT_CAP_ID_ARI     14
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS   4       /* Uncorrectable Error Status */
 #define HT_CAPTYPE_GEN3                0xD0    /* Generation 3 hypertransport configuration */
 #define HT_CAPTYPE_PM          0xE0    /* Hypertransport powermanagement configuration */
 
+/* Alternative Routing-ID Interpretation */
+#define PCI_ARI_CAP            0x04    /* ARI Capability Register */
+#define  PCI_ARI_CAP_MFVC      0x0001  /* MFVC Function Groups Capability */
+#define  PCI_ARI_CAP_ACS       0x0002  /* ACS Function Groups Capability */
+#define  PCI_ARI_CAP_NFN(x)    (((x) >> 8) & 0xff) /* Next Function Number */
+#define PCI_ARI_CTRL           0x06    /* ARI Control Register */
+#define  PCI_ARI_CTRL_MFVC     0x0001  /* MFVC Function Groups Enable */
+#define  PCI_ARI_CTRL_ACS      0x0002  /* ACS Function Groups Enable */
+#define  PCI_ARI_CTRL_FG(x)    (((x) >> 4) & 7) /* Function Group */
 
 #endif /* LINUX_PCI_REGS_H */
index a7dd38f30ade61d1cf6fba16d7f7f60b3e8cc944..a7c7213555492520e649a4920d9fc671e88460cd 100644 (file)
@@ -45,8 +45,6 @@ struct k_itimer {
        int it_requeue_pending;         /* waiting to requeue this timer */
 #define REQUEUE_PENDING 1
        int it_sigev_notify;            /* notify word of sigevent struct */
-       int it_sigev_signo;             /* signo word of sigevent struct */
-       sigval_t it_sigev_value;        /* value word of sigevent struct */
        struct task_struct *it_process; /* process to send signal to */
        struct sigqueue *sigq;          /* signal queue entry. */
        union {
@@ -115,4 +113,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 
 long clock_nanosleep_restart(struct restart_block *restart_block);
 
+void update_rlimit_cpu(unsigned long rlim_new);
+
 #endif
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
new file mode 100644 (file)
index 0000000..536b0ca
--- /dev/null
@@ -0,0 +1,127 @@
+#ifndef _LINUX_RING_BUFFER_H
+#define _LINUX_RING_BUFFER_H
+
+#include <linux/mm.h>
+#include <linux/seq_file.h>
+
+struct ring_buffer;
+struct ring_buffer_iter;
+
+/*
+ * Don't reference this struct directly, use functions below.
+ */
+struct ring_buffer_event {
+       u32             type:2, len:3, time_delta:27;
+       u32             array[];
+};
+
+/**
+ * enum ring_buffer_type - internal ring buffer types
+ *
+ * @RINGBUF_TYPE_PADDING:      Left over page padding
+ *                              array is ignored
+ *                              size is variable depending on how much
+ *                               padding is needed
+ *
+ * @RINGBUF_TYPE_TIME_EXTEND:  Extend the time delta
+ *                              array[0] = time delta (28 .. 59)
+ *                              size = 8 bytes
+ *
+ * @RINGBUF_TYPE_TIME_STAMP:   Sync time stamp with external clock
+ *                              array[0] = tv_nsec
+ *                              array[1] = tv_sec
+ *                              size = 16 bytes
+ *
+ * @RINGBUF_TYPE_DATA:         Data record
+ *                              If len is zero:
+ *                               array[0] holds the actual length
+ *                               array[1..(length+3)/4-1] holds data
+ *                              else
+ *                               length = len << 2
+ *                               array[0..(length+3)/4] holds data
+ */
+enum ring_buffer_type {
+       RINGBUF_TYPE_PADDING,
+       RINGBUF_TYPE_TIME_EXTEND,
+       /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
+       RINGBUF_TYPE_TIME_STAMP,
+       RINGBUF_TYPE_DATA,
+};
+
+unsigned ring_buffer_event_length(struct ring_buffer_event *event);
+void *ring_buffer_event_data(struct ring_buffer_event *event);
+
+/**
+ * ring_buffer_event_time_delta - return the delta timestamp of the event
+ * @event: the event to get the delta timestamp of
+ *
+ * The delta timestamp is the 27 bit timestamp since the last event.
+ */
+static inline unsigned
+ring_buffer_event_time_delta(struct ring_buffer_event *event)
+{
+       return event->time_delta;
+}
+
+/*
+ * size is in bytes for each per CPU buffer.
+ */
+struct ring_buffer *
+ring_buffer_alloc(unsigned long size, unsigned flags);
+void ring_buffer_free(struct ring_buffer *buffer);
+
+int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
+
+struct ring_buffer_event *
+ring_buffer_lock_reserve(struct ring_buffer *buffer,
+                        unsigned long length,
+                        unsigned long *flags);
+int ring_buffer_unlock_commit(struct ring_buffer *buffer,
+                             struct ring_buffer_event *event,
+                             unsigned long flags);
+int ring_buffer_write(struct ring_buffer *buffer,
+                     unsigned long length, void *data);
+
+struct ring_buffer_event *
+ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts);
+struct ring_buffer_event *
+ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts);
+
+struct ring_buffer_iter *
+ring_buffer_read_start(struct ring_buffer *buffer, int cpu);
+void ring_buffer_read_finish(struct ring_buffer_iter *iter);
+
+struct ring_buffer_event *
+ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts);
+struct ring_buffer_event *
+ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts);
+void ring_buffer_iter_reset(struct ring_buffer_iter *iter);
+int ring_buffer_iter_empty(struct ring_buffer_iter *iter);
+
+unsigned long ring_buffer_size(struct ring_buffer *buffer);
+
+void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu);
+void ring_buffer_reset(struct ring_buffer *buffer);
+
+int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
+                        struct ring_buffer *buffer_b, int cpu);
+
+int ring_buffer_empty(struct ring_buffer *buffer);
+int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu);
+
+void ring_buffer_record_disable(struct ring_buffer *buffer);
+void ring_buffer_record_enable(struct ring_buffer *buffer);
+void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
+void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
+
+unsigned long ring_buffer_entries(struct ring_buffer *buffer);
+unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
+
+u64 ring_buffer_time_stamp(int cpu);
+void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
+
+enum ring_buffer_flags {
+       RB_FL_OVERWRITE         = 1 << 0,
+};
+
+#endif /* _LINUX_RING_BUFFER_H */
index f52dbd3587a78c4c031759f6934b5d03685b666a..5c38db536e07f8b7b0d6e5b0dc4f31a9e3cb07c1 100644 (file)
@@ -434,6 +434,39 @@ struct pacct_struct {
        unsigned long           ac_minflt, ac_majflt;
 };
 
+/**
+ * struct task_cputime - collected CPU time counts
+ * @utime:             time spent in user mode, in &cputime_t units
+ * @stime:             time spent in kernel mode, in &cputime_t units
+ * @sum_exec_runtime:  total time spent on the CPU, in nanoseconds
+ *
+ * This structure groups together three kinds of CPU time that are
+ * tracked for threads and thread groups.  Most things considering
+ * CPU time want to group these counts together and treat all three
+ * of them in parallel.
+ */
+struct task_cputime {
+       cputime_t utime;
+       cputime_t stime;
+       unsigned long long sum_exec_runtime;
+};
+/* Alternate field names when used to cache expirations. */
+#define prof_exp       stime
+#define virt_exp       utime
+#define sched_exp      sum_exec_runtime
+
+/**
+ * struct thread_group_cputime - thread group interval timer counts
+ * @totals:            thread group interval timers; substructure for
+ *                     uniprocessor kernel, per-cpu for SMP kernel.
+ *
+ * This structure contains the version of task_cputime, above, that is
+ * used for thread group CPU clock calculations.
+ */
+struct thread_group_cputime {
+       struct task_cputime *totals;
+};
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -479,6 +512,17 @@ struct signal_struct {
        cputime_t it_prof_expires, it_virt_expires;
        cputime_t it_prof_incr, it_virt_incr;
 
+       /*
+        * Thread group totals for process CPU clocks.
+        * See thread_group_cputime(), et al, for details.
+        */
+       struct thread_group_cputime cputime;
+
+       /* Earliest-expiration cache. */
+       struct task_cputime cputime_expires;
+
+       struct list_head cpu_timers[3];
+
        /* job control IDs */
 
        /*
@@ -509,7 +553,7 @@ struct signal_struct {
         * Live threads maintain their own counters and add to these
         * in __exit_signal, except for the group leader.
         */
-       cputime_t utime, stime, cutime, cstime;
+       cputime_t cutime, cstime;
        cputime_t gtime;
        cputime_t cgtime;
        unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@ -517,14 +561,6 @@ struct signal_struct {
        unsigned long inblock, oublock, cinblock, coublock;
        struct task_io_accounting ioac;
 
-       /*
-        * Cumulative ns of scheduled CPU time for dead threads in the
-        * group, not including a zombie group leader.  (This only differs
-        * from jiffies_to_ns(utime + stime) if sched_clock uses something
-        * other than jiffies.)
-        */
-       unsigned long long sum_sched_runtime;
-
        /*
         * We don't bother to synchronize most readers of this at all,
         * because there is no reader checking a limit that actually needs
@@ -536,8 +572,6 @@ struct signal_struct {
         */
        struct rlimit rlim[RLIM_NLIMITS];
 
-       struct list_head cpu_timers[3];
-
        /* keep the process-shared keyrings here so that they do the right
         * thing in threads created with CLONE_THREAD */
 #ifdef CONFIG_KEYS
@@ -1146,8 +1180,7 @@ struct task_struct {
 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
        unsigned long min_flt, maj_flt;
 
-       cputime_t it_prof_expires, it_virt_expires;
-       unsigned long long it_sched_expires;
+       struct task_cputime cputime_expires;
        struct list_head cpu_timers[3];
 
 /* process credentials */
@@ -1597,6 +1630,7 @@ extern unsigned long long cpu_clock(int cpu);
 
 extern unsigned long long
 task_sched_runtime(struct task_struct *task);
+extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
 
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
@@ -2093,6 +2127,30 @@ static inline int spin_needbreak(spinlock_t *lock)
 #endif
 }
 
+/*
+ * Thread group CPU time accounting.
+ */
+
+extern int thread_group_cputime_alloc(struct task_struct *);
+extern void thread_group_cputime(struct task_struct *, struct task_cputime *);
+
+static inline void thread_group_cputime_init(struct signal_struct *sig)
+{
+       sig->cputime.totals = NULL;
+}
+
+static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
+{
+       if (curr->signal->cputime.totals)
+               return 0;
+       return thread_group_cputime_alloc(curr);
+}
+
+static inline void thread_group_cputime_free(struct signal_struct *sig)
+{
+       free_percpu(sig->cputime.totals);
+}
+
 /*
  * Reevaluate whether the task has signals pending delivery.
  * Wake the task if so.
index 98921a3e1aa8db33b7e743e6711abb671485ea12..b6ec8189ac0c16673689f5327cde1d78b3ab49d6 100644 (file)
@@ -96,9 +96,11 @@ extern cpumask_t *tick_get_broadcast_oneshot_mask(void);
 extern void tick_clock_notify(void);
 extern int tick_check_oneshot_change(int allow_nohz);
 extern struct tick_sched *tick_get_tick_sched(int cpu);
+extern void tick_check_idle(int cpu);
 # else
 static inline void tick_clock_notify(void) { }
 static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
+static inline void tick_check_idle(int cpu) { }
 # endif
 
 #else /* CONFIG_GENERIC_CLOCKEVENTS */
@@ -106,26 +108,23 @@ static inline void tick_init(void) { }
 static inline void tick_cancel_sched_timer(int cpu) { }
 static inline void tick_clock_notify(void) { }
 static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
+static inline void tick_check_idle(int cpu) { }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
 # ifdef CONFIG_NO_HZ
 extern void tick_nohz_stop_sched_tick(int inidle);
 extern void tick_nohz_restart_sched_tick(void);
-extern void tick_nohz_update_jiffies(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
-extern void tick_nohz_stop_idle(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 # else
 static inline void tick_nohz_stop_sched_tick(int inidle) { }
 static inline void tick_nohz_restart_sched_tick(void) { }
-static inline void tick_nohz_update_jiffies(void) { }
 static inline ktime_t tick_nohz_get_sleep_length(void)
 {
        ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
 
        return len;
 }
-static inline void tick_nohz_stop_idle(int cpu) { }
 static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
 # endif /* !NO_HZ */
 
index 51e883df0fa51fe598832747477533fc4303e30a..4f1c9db577079ed3cc688def99e476219b5aa9b0 100644 (file)
@@ -119,6 +119,7 @@ extern int do_setitimer(int which, struct itimerval *value,
 extern unsigned int alarm_setitimer(unsigned int seconds);
 extern int do_getitimer(int which, struct itimerval *value);
 extern void getnstimeofday(struct timespec *tv);
+extern void getrawmonotonic(struct timespec *ts);
 extern void getboottime(struct timespec *ts);
 extern void monotonic_to_bootbased(struct timespec *ts);
 
@@ -127,6 +128,9 @@ extern int timekeeping_valid_for_hres(void);
 extern void update_wall_time(void);
 extern void update_xtime_cache(u64 nsec);
 
+struct tms;
+extern void do_sys_times(struct tms *);
+
 /**
  * timespec_to_ns - Convert timespec to nanoseconds
  * @ts:                pointer to the timespec variable to be converted
@@ -216,6 +220,7 @@ struct itimerval {
 #define CLOCK_MONOTONIC                        1
 #define CLOCK_PROCESS_CPUTIME_ID       2
 #define CLOCK_THREAD_CPUTIME_ID                3
+#define CLOCK_MONOTONIC_RAW            4
 
 /*
  * The IDs of various hardware clocks:
index fc6035d29d568a018e5c43f6575e5b860379fa96..9007313b5b7168ae03ed85e63d80ce6afe0ce438 100644 (file)
@@ -82,7 +82,7 @@
  */
 #define SHIFT_USEC 16          /* frequency offset scale (shift) */
 #define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC))
-#define PPM_SCALE_INV_SHIFT 20
+#define PPM_SCALE_INV_SHIFT 19
 #define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \
                       PPM_SCALE + 1)
 
@@ -141,8 +141,15 @@ struct timex {
 #define ADJ_MICRO              0x1000  /* select microsecond resolution */
 #define ADJ_NANO               0x2000  /* select nanosecond resolution */
 #define ADJ_TICK               0x4000  /* tick value */
+
+#ifdef __KERNEL__
+#define ADJ_ADJTIME            0x8000  /* switch between adjtime/adjtimex modes */
+#define ADJ_OFFSET_SINGLESHOT  0x0001  /* old-fashioned adjtime */
+#define ADJ_OFFSET_READONLY    0x2000  /* read-only adjtime */
+#else
 #define ADJ_OFFSET_SINGLESHOT  0x8001  /* old-fashioned adjtime */
-#define ADJ_OFFSET_SS_READ     0xa001  /* read-only adjtime */
+#define ADJ_OFFSET_SS_READ     0xa001  /* read-only adjtime */
+#endif
 
 /* xntp 3.4 compatibility names */
 #define MOD_OFFSET     ADJ_OFFSET
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
new file mode 100644 (file)
index 0000000..c5bb39c
--- /dev/null
@@ -0,0 +1,137 @@
+#ifndef _LINUX_TRACEPOINT_H
+#define _LINUX_TRACEPOINT_H
+
+/*
+ * Kernel Tracepoint API.
+ *
+ * See Documentation/tracepoint.txt.
+ *
+ * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * Heavily inspired from the Linux Kernel Markers.
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+
+struct module;
+struct tracepoint;
+
+struct tracepoint {
+       const char *name;               /* Tracepoint name */
+       int state;                      /* State. */
+       void **funcs;
+} __attribute__((aligned(8)));
+
+
+#define TPPROTO(args...)       args
+#define TPARGS(args...)                args
+
+#ifdef CONFIG_TRACEPOINTS
+
+/*
+ * it_func[0] is never NULL because there is at least one element in the array
+ * when the array itself is non NULL.
+ */
+#define __DO_TRACE(tp, proto, args)                                    \
+       do {                                                            \
+               void **it_func;                                         \
+                                                                       \
+               rcu_read_lock_sched();                                  \
+               it_func = rcu_dereference((tp)->funcs);                 \
+               if (it_func) {                                          \
+                       do {                                            \
+                               ((void(*)(proto))(*it_func))(args);     \
+                       } while (*(++it_func));                         \
+               }                                                       \
+               rcu_read_unlock_sched();                                \
+       } while (0)
+
+/*
+ * Make sure the alignment of the structure in the __tracepoints section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ */
+#define DEFINE_TRACE(name, proto, args)                                        \
+       static inline void trace_##name(proto)                          \
+       {                                                               \
+               static const char __tpstrtab_##name[]                   \
+               __attribute__((section("__tracepoints_strings")))       \
+               = #name ":" #proto;                                     \
+               static struct tracepoint __tracepoint_##name            \
+               __attribute__((section("__tracepoints"), aligned(8))) = \
+               { __tpstrtab_##name, 0, NULL };                         \
+               if (unlikely(__tracepoint_##name.state))                \
+                       __DO_TRACE(&__tracepoint_##name,                \
+                               TPPROTO(proto), TPARGS(args));          \
+       }                                                               \
+       static inline int register_trace_##name(void (*probe)(proto))   \
+       {                                                               \
+               return tracepoint_probe_register(#name ":" #proto,      \
+                       (void *)probe);                                 \
+       }                                                               \
+       static inline void unregister_trace_##name(void (*probe)(proto))\
+       {                                                               \
+               tracepoint_probe_unregister(#name ":" #proto,           \
+                       (void *)probe);                                 \
+       }
+
+extern void tracepoint_update_probe_range(struct tracepoint *begin,
+       struct tracepoint *end);
+
+#else /* !CONFIG_TRACEPOINTS */
+#define DEFINE_TRACE(name, proto, args)                        \
+       static inline void _do_trace_##name(struct tracepoint *tp, proto) \
+       { }                                                             \
+       static inline void trace_##name(proto)                          \
+       { }                                                             \
+       static inline int register_trace_##name(void (*probe)(proto))   \
+       {                                                               \
+               return -ENOSYS;                                         \
+       }                                                               \
+       static inline void unregister_trace_##name(void (*probe)(proto))\
+       { }
+
+static inline void tracepoint_update_probe_range(struct tracepoint *begin,
+       struct tracepoint *end)
+{ }
+#endif /* CONFIG_TRACEPOINTS */
+
+/*
+ * Connect a probe to a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_register(const char *name, void *probe);
+
+/*
+ * Disconnect a probe from a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_unregister(const char *name, void *probe);
+
+struct tracepoint_iter {
+       struct module *module;
+       struct tracepoint *tracepoint;
+};
+
+extern void tracepoint_iter_start(struct tracepoint_iter *iter);
+extern void tracepoint_iter_next(struct tracepoint_iter *iter);
+extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
+extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
+extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+       struct tracepoint *begin, struct tracepoint *end);
+
+/*
+ * tracepoint_synchronize_unregister must be called between the last tracepoint
+ * probe unregistration and the end of module exit to make sure there is no
+ * caller executing a probe when it is freed.
+ */
+static inline void tracepoint_synchronize_unregister(void)
+{
+       synchronize_sched();
+}
+
+#endif
diff --git a/include/trace/sched.h b/include/trace/sched.h
new file mode 100644 (file)
index 0000000..ad47369
--- /dev/null
@@ -0,0 +1,56 @@
+#ifndef _TRACE_SCHED_H
+#define _TRACE_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(sched_kthread_stop,
+       TPPROTO(struct task_struct *t),
+               TPARGS(t));
+
+DEFINE_TRACE(sched_kthread_stop_ret,
+       TPPROTO(int ret),
+               TPARGS(ret));
+
+DEFINE_TRACE(sched_wait_task,
+       TPPROTO(struct rq *rq, struct task_struct *p),
+               TPARGS(rq, p));
+
+DEFINE_TRACE(sched_wakeup,
+       TPPROTO(struct rq *rq, struct task_struct *p),
+               TPARGS(rq, p));
+
+DEFINE_TRACE(sched_wakeup_new,
+       TPPROTO(struct rq *rq, struct task_struct *p),
+               TPARGS(rq, p));
+
+DEFINE_TRACE(sched_switch,
+       TPPROTO(struct rq *rq, struct task_struct *prev,
+               struct task_struct *next),
+               TPARGS(rq, prev, next));
+
+DEFINE_TRACE(sched_migrate_task,
+       TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu),
+               TPARGS(rq, p, dest_cpu));
+
+DEFINE_TRACE(sched_process_free,
+       TPPROTO(struct task_struct *p),
+               TPARGS(p));
+
+DEFINE_TRACE(sched_process_exit,
+       TPPROTO(struct task_struct *p),
+               TPARGS(p));
+
+DEFINE_TRACE(sched_process_wait,
+       TPPROTO(struct pid *pid),
+               TPARGS(pid));
+
+DEFINE_TRACE(sched_process_fork,
+       TPPROTO(struct task_struct *parent, struct task_struct *child),
+               TPARGS(parent, child));
+
+DEFINE_TRACE(sched_signal_send,
+       TPPROTO(int sig, struct task_struct *p),
+               TPARGS(sig, p));
+
+#endif
index 8828ed0b2051f16df1d3a491419d3e868ac8d014..113c74c07da411eefafbe0e03dbbb24dc527ec33 100644 (file)
@@ -737,6 +737,14 @@ config VM_EVENT_COUNTERS
          on EMBEDDED systems.  /proc/vmstat will only show page counts
          if VM event counters are disabled.
 
+config PCI_QUIRKS
+       default y
+       bool "Enable PCI quirk workarounds" if EMBEDDED && PCI
+       help
+         This enables workarounds for various PCI chipset
+          bugs/quirks. Disable this only if your target machine is
+          unaffected by PCI quirks.
+
 config SLUB_DEBUG
        default y
        bool "Enable SLUB debugging support" if EMBEDDED
@@ -786,6 +794,13 @@ config PROFILING
          Say Y here to enable the extended profiling support mechanisms used
          by profilers such as OProfile.
 
+#
+# Place an empty function call at each tracepoint site. Can be
+# dynamically changed for a probe function.
+#
+config TRACEPOINTS
+       bool
+
 config MARKERS
        bool "Activate markers"
        help
index 4371d11721f618f45ccfc65b799fbbdfebbf8140..3e17a3bafe60cf2ccf85fc5bc83fd393c76a6785 100644 (file)
@@ -61,6 +61,7 @@
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/idr.h>
+#include <linux/ftrace.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -689,6 +690,8 @@ asmlinkage void __init start_kernel(void)
 
        acpi_early_init(); /* before LAPIC and SMP init */
 
+       ftrace_init();
+
        /* Do the rest non-__init'ed, we're now alive */
        rest_init();
 }
@@ -705,30 +708,31 @@ __setup("initcall_debug", initcall_debug_setup);
 int do_one_initcall(initcall_t fn)
 {
        int count = preempt_count();
-       ktime_t t0, t1, delta;
+       ktime_t delta;
        char msgbuf[64];
-       int result;
+       struct boot_trace it;
 
        if (initcall_debug) {
-               printk("calling  %pF @ %i\n", fn, task_pid_nr(current));
-               t0 = ktime_get();
+               it.caller = task_pid_nr(current);
+               printk("calling  %pF @ %i\n", fn, it.caller);
+               it.calltime = ktime_get();
        }
 
-       result = fn();
+       it.result = fn();
 
        if (initcall_debug) {
-               t1 = ktime_get();
-               delta = ktime_sub(t1, t0);
-
-               printk("initcall %pF returned %d after %Ld msecs\n",
-                       fn, result,
-                       (unsigned long long) delta.tv64 >> 20);
+               it.rettime = ktime_get();
+               delta = ktime_sub(it.rettime, it.calltime);
+               it.duration = (unsigned long long) delta.tv64 >> 10;
+               printk("initcall %pF returned %d after %Ld usecs\n", fn,
+                       it.result, it.duration);
+               trace_boot(&it, fn);
        }
 
        msgbuf[0] = 0;
 
-       if (result && result != -ENODEV && initcall_debug)
-               sprintf(msgbuf, "error code %d ", result);
+       if (it.result && it.result != -ENODEV && initcall_debug)
+               sprintf(msgbuf, "error code %d ", it.result);
 
        if (preempt_count() != count) {
                strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -742,7 +746,7 @@ int do_one_initcall(initcall_t fn)
                printk("initcall %pF returned with %s\n", fn, msgbuf);
        }
 
-       return result;
+       return it.result;
 }
 
 
@@ -857,6 +861,7 @@ static int __init kernel_init(void * unused)
        smp_prepare_cpus(setup_max_cpus);
 
        do_pre_smp_initcalls();
+       start_boot_trace();
 
        smp_init();
        sched_init_smp();
@@ -883,6 +888,7 @@ static int __init kernel_init(void * unused)
         * we're essentially up and running. Get rid of the
         * initmem segments and start the user-mode stuff..
         */
+       stop_boot_trace();
        init_post();
        return 0;
 }
index 066550aa61c5018a32dff60fcd75e6ffdebbdedd..305f11dbef216cd9cb5efba14483492b59a26354 100644 (file)
@@ -85,6 +85,7 @@ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 obj-$(CONFIG_MARKERS) += marker.o
+obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
 obj-$(CONFIG_FTRACE) += trace/
index 143990e48cb9aab2af2f04d93a372d63dc570882..8eafe3eb50d9feb76dce5b289a4a7278321e4d8a 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/timex.h>
 #include <linux/migrate.h>
 #include <linux/posix-timers.h>
+#include <linux/times.h>
 
 #include <asm/uaccess.h>
 
@@ -208,49 +209,23 @@ asmlinkage long compat_sys_setitimer(int which,
        return 0;
 }
 
+static compat_clock_t clock_t_to_compat_clock_t(clock_t x)
+{
+       return compat_jiffies_to_clock_t(clock_t_to_jiffies(x));
+}
+
 asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
 {
-       /*
-        *      In the SMP world we might just be unlucky and have one of
-        *      the times increment as we use it. Since the value is an
-        *      atomically safe type this is just fine. Conceptually its
-        *      as if the syscall took an instant longer to occur.
-        */
        if (tbuf) {
+               struct tms tms;
                struct compat_tms tmp;
-               struct task_struct *tsk = current;
-               struct task_struct *t;
-               cputime_t utime, stime, cutime, cstime;
-
-               read_lock(&tasklist_lock);
-               utime = tsk->signal->utime;
-               stime = tsk->signal->stime;
-               t = tsk;
-               do {
-                       utime = cputime_add(utime, t->utime);
-                       stime = cputime_add(stime, t->stime);
-                       t = next_thread(t);
-               } while (t != tsk);
-
-               /*
-                * While we have tasklist_lock read-locked, no dying thread
-                * can be updating current->signal->[us]time.  Instead,
-                * we got their counts included in the live thread loop.
-                * However, another thread can come in right now and
-                * do a wait call that updates current->signal->c[us]time.
-                * To make sure we always see that pair updated atomically,
-                * we take the siglock around fetching them.
-                */
-               spin_lock_irq(&tsk->sighand->siglock);
-               cutime = tsk->signal->cutime;
-               cstime = tsk->signal->cstime;
-               spin_unlock_irq(&tsk->sighand->siglock);
-               read_unlock(&tasklist_lock);
-
-               tmp.tms_utime = compat_jiffies_to_clock_t(cputime_to_jiffies(utime));
-               tmp.tms_stime = compat_jiffies_to_clock_t(cputime_to_jiffies(stime));
-               tmp.tms_cutime = compat_jiffies_to_clock_t(cputime_to_jiffies(cutime));
-               tmp.tms_cstime = compat_jiffies_to_clock_t(cputime_to_jiffies(cstime));
+
+               do_sys_times(&tms);
+               /* Convert our struct tms to the compat version. */
+               tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime);
+               tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime);
+               tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime);
+               tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime);
                if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
                        return -EFAULT;
        }
index 0ef4673e351bddd4e738f88941511efa02f062fa..80137a5d9467811ba4dab35c6e95790002a5f12a 100644 (file)
@@ -47,6 +47,7 @@
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/tracehook.h>
+#include <trace/sched.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -112,8 +113,6 @@ static void __exit_signal(struct task_struct *tsk)
                 * We won't ever get here for the group leader, since it
                 * will have been the last reference on the signal_struct.
                 */
-               sig->utime = cputime_add(sig->utime, task_utime(tsk));
-               sig->stime = cputime_add(sig->stime, task_stime(tsk));
                sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
                sig->min_flt += tsk->min_flt;
                sig->maj_flt += tsk->maj_flt;
@@ -122,7 +121,6 @@ static void __exit_signal(struct task_struct *tsk)
                sig->inblock += task_io_get_inblock(tsk);
                sig->oublock += task_io_get_oublock(tsk);
                task_io_accounting_add(&sig->ioac, &tsk->ioac);
-               sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
                sig = NULL; /* Marker for below. */
        }
 
@@ -149,7 +147,10 @@ static void __exit_signal(struct task_struct *tsk)
 
 static void delayed_put_task_struct(struct rcu_head *rhp)
 {
-       put_task_struct(container_of(rhp, struct task_struct, rcu));
+       struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+
+       trace_sched_process_free(tsk);
+       put_task_struct(tsk);
 }
 
 
@@ -1073,6 +1074,8 @@ NORET_TYPE void do_exit(long code)
 
        if (group_dead)
                acct_process();
+       trace_sched_process_exit(tsk);
+
        exit_sem(tsk);
        exit_files(tsk);
        exit_fs(tsk);
@@ -1301,6 +1304,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
        if (likely(!traced)) {
                struct signal_struct *psig;
                struct signal_struct *sig;
+               struct task_cputime cputime;
 
                /*
                 * The resource counters for the group leader are in its
@@ -1316,20 +1320,23 @@ static int wait_task_zombie(struct task_struct *p, int options,
                 * need to protect the access to p->parent->signal fields,
                 * as other threads in the parent group can be right
                 * here reaping other children at the same time.
+                *
+                * We use thread_group_cputime() to get times for the thread
+                * group, which consolidates times for all threads in the
+                * group including the group leader.
                 */
                spin_lock_irq(&p->parent->sighand->siglock);
                psig = p->parent->signal;
                sig = p->signal;
+               thread_group_cputime(p, &cputime);
                psig->cutime =
                        cputime_add(psig->cutime,
-                       cputime_add(p->utime,
-                       cputime_add(sig->utime,
-                                   sig->cutime)));
+                       cputime_add(cputime.utime,
+                                   sig->cutime));
                psig->cstime =
                        cputime_add(psig->cstime,
-                       cputime_add(p->stime,
-                       cputime_add(sig->stime,
-                                   sig->cstime)));
+                       cputime_add(cputime.stime,
+                                   sig->cstime));
                psig->cgtime =
                        cputime_add(psig->cgtime,
                        cputime_add(p->gtime,
@@ -1674,6 +1681,8 @@ static long do_wait(enum pid_type type, struct pid *pid, int options,
        struct task_struct *tsk;
        int retval;
 
+       trace_sched_process_wait(pid);
+
        add_wait_queue(&current->signal->wait_chldexit,&wait);
 repeat:
        /*
index 30de644a40c4d4d9617d650589f4c90da1e977a2..4d093552dd6e79aea8553b32e1ff465cd0439116 100644 (file)
@@ -58,6 +58,7 @@
 #include <linux/tty.h>
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
+#include <trace/sched.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -759,15 +760,44 @@ void __cleanup_sighand(struct sighand_struct *sighand)
                kmem_cache_free(sighand_cachep, sighand);
 }
 
+
+/*
+ * Initialize POSIX timer handling for a thread group.
+ */
+static void posix_cpu_timers_init_group(struct signal_struct *sig)
+{
+       /* Thread group counters. */
+       thread_group_cputime_init(sig);
+
+       /* Expiration times and increments. */
+       sig->it_virt_expires = cputime_zero;
+       sig->it_virt_incr = cputime_zero;
+       sig->it_prof_expires = cputime_zero;
+       sig->it_prof_incr = cputime_zero;
+
+       /* Cached expiration times. */
+       sig->cputime_expires.prof_exp = cputime_zero;
+       sig->cputime_expires.virt_exp = cputime_zero;
+       sig->cputime_expires.sched_exp = 0;
+
+       /* The timer lists. */
+       INIT_LIST_HEAD(&sig->cpu_timers[0]);
+       INIT_LIST_HEAD(&sig->cpu_timers[1]);
+       INIT_LIST_HEAD(&sig->cpu_timers[2]);
+}
+
 static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 {
        struct signal_struct *sig;
        int ret;
 
        if (clone_flags & CLONE_THREAD) {
-               atomic_inc(&current->signal->count);
-               atomic_inc(&current->signal->live);
-               return 0;
+               ret = thread_group_cputime_clone_thread(current);
+               if (likely(!ret)) {
+                       atomic_inc(&current->signal->count);
+                       atomic_inc(&current->signal->live);
+               }
+               return ret;
        }
        sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
        tsk->signal = sig;
@@ -795,40 +825,25 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        sig->it_real_incr.tv64 = 0;
        sig->real_timer.function = it_real_fn;
 
-       sig->it_virt_expires = cputime_zero;
-       sig->it_virt_incr = cputime_zero;
-       sig->it_prof_expires = cputime_zero;
-       sig->it_prof_incr = cputime_zero;
-
        sig->leader = 0;        /* session leadership doesn't inherit */
        sig->tty_old_pgrp = NULL;
        sig->tty = NULL;
 
-       sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
+       sig->cutime = sig->cstime = cputime_zero;
        sig->gtime = cputime_zero;
        sig->cgtime = cputime_zero;
        sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
        sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
        sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
        task_io_accounting_init(&sig->ioac);
-       sig->sum_sched_runtime = 0;
-       INIT_LIST_HEAD(&sig->cpu_timers[0]);
-       INIT_LIST_HEAD(&sig->cpu_timers[1]);
-       INIT_LIST_HEAD(&sig->cpu_timers[2]);
        taskstats_tgid_init(sig);
 
        task_lock(current->group_leader);
        memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
        task_unlock(current->group_leader);
 
-       if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
-               /*
-                * New sole thread in the process gets an expiry time
-                * of the whole CPU time limit.
-                */
-               tsk->it_prof_expires =
-                       secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
-       }
+       posix_cpu_timers_init_group(sig);
+
        acct_init_pacct(&sig->pacct);
 
        tty_audit_fork(sig);
@@ -838,6 +853,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 
 void __cleanup_signal(struct signal_struct *sig)
 {
+       thread_group_cputime_free(sig);
        exit_thread_group_keys(sig);
        tty_kref_put(sig->tty);
        kmem_cache_free(signal_cachep, sig);
@@ -887,6 +903,19 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 }
 #endif /* CONFIG_MM_OWNER */
 
+/*
+ * Initialize POSIX timer handling for a single task.
+ */
+static void posix_cpu_timers_init(struct task_struct *tsk)
+{
+       tsk->cputime_expires.prof_exp = cputime_zero;
+       tsk->cputime_expires.virt_exp = cputime_zero;
+       tsk->cputime_expires.sched_exp = 0;
+       INIT_LIST_HEAD(&tsk->cpu_timers[0]);
+       INIT_LIST_HEAD(&tsk->cpu_timers[1]);
+       INIT_LIST_HEAD(&tsk->cpu_timers[2]);
+}
+
 /*
  * This creates a new process as a copy of the old one,
  * but does not actually start it yet.
@@ -997,12 +1026,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        task_io_accounting_init(&p->ioac);
        acct_clear_integrals(p);
 
-       p->it_virt_expires = cputime_zero;
-       p->it_prof_expires = cputime_zero;
-       p->it_sched_expires = 0;
-       INIT_LIST_HEAD(&p->cpu_timers[0]);
-       INIT_LIST_HEAD(&p->cpu_timers[1]);
-       INIT_LIST_HEAD(&p->cpu_timers[2]);
+       posix_cpu_timers_init(p);
 
        p->lock_depth = -1;             /* -1 = no lock */
        do_posix_clock_monotonic_gettime(&p->start_time);
@@ -1203,21 +1227,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        if (clone_flags & CLONE_THREAD) {
                p->group_leader = current->group_leader;
                list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
-
-               if (!cputime_eq(current->signal->it_virt_expires,
-                               cputime_zero) ||
-                   !cputime_eq(current->signal->it_prof_expires,
-                               cputime_zero) ||
-                   current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||
-                   !list_empty(&current->signal->cpu_timers[0]) ||
-                   !list_empty(&current->signal->cpu_timers[1]) ||
-                   !list_empty(&current->signal->cpu_timers[2])) {
-                       /*
-                        * Have child wake up on its first tick to check
-                        * for process CPU timers.
-                        */
-                       p->it_prof_expires = jiffies_to_cputime(1);
-               }
        }
 
        if (likely(p->pid)) {
@@ -1364,6 +1373,8 @@ long do_fork(unsigned long clone_flags,
        if (!IS_ERR(p)) {
                struct completion vfork;
 
+               trace_sched_process_fork(current, p);
+
                nr = task_pid_vnr(p);
 
                if (clone_flags & CLONE_PARENT_SETTID)
index cdec83e722fa1b80ee0af0f828d8e47532431a20..95978f48e039fcbd7e7e233224a7e3f4a0b2f884 100644 (file)
@@ -1403,9 +1403,7 @@ void hrtimer_run_queues(void)
                if (!base->first)
                        continue;
 
-               if (base->get_softirq_time)
-                       base->softirq_time = base->get_softirq_time();
-               else if (gettime) {
+               if (gettime) {
                        hrtimer_get_softirq_time(cpu_base);
                        gettime = 0;
                }
@@ -1688,9 +1686,11 @@ static void migrate_hrtimers(int cpu)
        new_base = &get_cpu_var(hrtimer_bases);
 
        tick_cancel_sched_timer(cpu);
-
-       local_irq_disable();
-       spin_lock(&new_base->lock);
+       /*
+        * The caller is globally serialized and nobody else
+        * takes two locks at once, deadlock is not possible.
+        */
+       spin_lock_irq(&new_base->lock);
        spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
@@ -1703,8 +1703,7 @@ static void migrate_hrtimers(int cpu)
                raise = 1;
 
        spin_unlock(&old_base->lock);
-       spin_unlock(&new_base->lock);
-       local_irq_enable();
+       spin_unlock_irq(&new_base->lock);
        put_cpu_var(hrtimer_bases);
 
        if (raise)
index 533068cfb607fad10202210b916909efd314cbb7..cc0f7321b8cede4192a4ceb9ffc97311ec9cc7d0 100644 (file)
@@ -30,17 +30,16 @@ static DEFINE_MUTEX(probing_active);
 unsigned long probe_irq_on(void)
 {
        struct irq_desc *desc;
-       unsigned long mask;
-       unsigned int i;
+       unsigned long mask = 0;
+       unsigned int status;
+       int i;
 
        mutex_lock(&probing_active);
        /*
         * something may have generated an irq long ago and we want to
         * flush such a longstanding irq before considering it as spurious.
         */
-       for (i = NR_IRQS-1; i > 0; i--) {
-               desc = irq_desc + i;
-
+       for_each_irq_desc_reverse(i, desc) {
                spin_lock_irq(&desc->lock);
                if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
                        /*
@@ -68,9 +67,7 @@ unsigned long probe_irq_on(void)
         * (we must startup again here because if a longstanding irq
         * happened in the previous stage, it may have masked itself)
         */
-       for (i = NR_IRQS-1; i > 0; i--) {
-               desc = irq_desc + i;
-
+       for_each_irq_desc_reverse(i, desc) {
                spin_lock_irq(&desc->lock);
                if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
                        desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
@@ -88,11 +85,7 @@ unsigned long probe_irq_on(void)
        /*
         * Now filter out any obviously spurious interrupts
         */
-       mask = 0;
-       for (i = 0; i < NR_IRQS; i++) {
-               unsigned int status;
-
-               desc = irq_desc + i;
+       for_each_irq_desc(i, desc) {
                spin_lock_irq(&desc->lock);
                status = desc->status;
 
@@ -126,14 +119,11 @@ EXPORT_SYMBOL(probe_irq_on);
  */
 unsigned int probe_irq_mask(unsigned long val)
 {
-       unsigned int mask;
+       unsigned int status, mask = 0;
+       struct irq_desc *desc;
        int i;
 
-       mask = 0;
-       for (i = 0; i < NR_IRQS; i++) {
-               struct irq_desc *desc = irq_desc + i;
-               unsigned int status;
-
+       for_each_irq_desc(i, desc) {
                spin_lock_irq(&desc->lock);
                status = desc->status;
 
@@ -171,20 +161,19 @@ EXPORT_SYMBOL(probe_irq_mask);
  */
 int probe_irq_off(unsigned long val)
 {
-       int i, irq_found = 0, nr_irqs = 0;
-
-       for (i = 0; i < NR_IRQS; i++) {
-               struct irq_desc *desc = irq_desc + i;
-               unsigned int status;
+       int i, irq_found = 0, nr_of_irqs = 0;
+       struct irq_desc *desc;
+       unsigned int status;
 
+       for_each_irq_desc(i, desc) {
                spin_lock_irq(&desc->lock);
                status = desc->status;
 
                if (status & IRQ_AUTODETECT) {
                        if (!(status & IRQ_WAITING)) {
-                               if (!nr_irqs)
+                               if (!nr_of_irqs)
                                        irq_found = i;
-                               nr_irqs++;
+                               nr_of_irqs++;
                        }
                        desc->status = status & ~IRQ_AUTODETECT;
                        desc->chip->shutdown(i);
@@ -193,7 +182,7 @@ int probe_irq_off(unsigned long val)
        }
        mutex_unlock(&probing_active);
 
-       if (nr_irqs > 1)
+       if (nr_of_irqs > 1)
                irq_found = -irq_found;
 
        return irq_found;
index 3cd441ebf5d2178fcd2d9bdd49573706d6c78b51..4895fde4eb933c25fe50fbbfabc3fb0a6cd9c742 100644 (file)
  */
 void dynamic_irq_init(unsigned int irq)
 {
-       struct irq_desc *desc;
+       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
 
-       if (irq >= NR_IRQS) {
+       if (!desc) {
                WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
                return;
        }
 
        /* Ensure we don't have left over values from a previous use of this irq */
-       desc = irq_desc + irq;
        spin_lock_irqsave(&desc->lock, flags);
        desc->status = IRQ_DISABLED;
        desc->chip = &no_irq_chip;
@@ -57,15 +56,14 @@ void dynamic_irq_init(unsigned int irq)
  */
 void dynamic_irq_cleanup(unsigned int irq)
 {
-       struct irq_desc *desc;
+       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
 
-       if (irq >= NR_IRQS) {
+       if (!desc) {
                WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
                return;
        }
 
-       desc = irq_desc + irq;
        spin_lock_irqsave(&desc->lock, flags);
        if (desc->action) {
                spin_unlock_irqrestore(&desc->lock, flags);
@@ -89,10 +87,10 @@ void dynamic_irq_cleanup(unsigned int irq)
  */
 int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 {
-       struct irq_desc *desc;
+       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
 
-       if (irq >= NR_IRQS) {
+       if (!desc) {
                WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
                return -EINVAL;
        }
@@ -100,7 +98,6 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
        if (!chip)
                chip = &no_irq_chip;
 
-       desc = irq_desc + irq;
        spin_lock_irqsave(&desc->lock, flags);
        irq_chip_set_defaults(chip);
        desc->chip = chip;
@@ -111,27 +108,27 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 EXPORT_SYMBOL(set_irq_chip);
 
 /**
- *     set_irq_type - set the irq type for an irq
+ *     set_irq_type - set the irq trigger type for an irq
  *     @irq:   irq number
- *     @type:  interrupt type - see include/linux/interrupt.h
+ *     @type:  IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
  */
 int set_irq_type(unsigned int irq, unsigned int type)
 {
-       struct irq_desc *desc;
+       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
        int ret = -ENXIO;
 
-       if (irq >= NR_IRQS) {
+       if (!desc) {
                printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
                return -ENODEV;
        }
 
-       desc = irq_desc + irq;
-       if (desc->chip->set_type) {
-               spin_lock_irqsave(&desc->lock, flags);
-               ret = desc->chip->set_type(irq, type);
-               spin_unlock_irqrestore(&desc->lock, flags);
-       }
+       if (type == IRQ_TYPE_NONE)
+               return 0;
+
+       spin_lock_irqsave(&desc->lock, flags);
+       ret = __irq_set_trigger(desc, irq, flags);
+       spin_unlock_irqrestore(&desc->lock, flags);
        return ret;
 }
 EXPORT_SYMBOL(set_irq_type);
@@ -145,16 +142,15 @@ EXPORT_SYMBOL(set_irq_type);
  */
 int set_irq_data(unsigned int irq, void *data)
 {
-       struct irq_desc *desc;
+       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
 
-       if (irq >= NR_IRQS) {
+       if (!desc) {
                printk(KERN_ERR
                       "Trying to install controller data for IRQ%d\n", irq);
                return -EINVAL;
        }
 
-       desc = irq_desc + irq;
        spin_lock_irqsave(&desc->lock, flags);
        desc->handler_data = data;
        spin_unlock_irqrestore(&desc->lock, flags);
@@ -171,15 +167,15 @@ EXPORT_SYMBOL(set_irq_data);
  */
 int set_irq_msi(unsigned int irq, struct msi_desc *entry)
 {
-       struct irq_desc *desc;
+       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
 
-       if (irq >= NR_IRQS) {
+       if (!desc) {
                printk(KERN_ERR
                       "Trying to install msi data for IRQ%d\n", irq);
                return -EINVAL;
        }
-       desc = irq_desc + irq;
+
        spin_lock_irqsave(&desc->lock, flags);
        desc->msi_desc = entry;
        if (entry)
@@ -197,10 +193,16 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
  */
 int set_irq_chip_data(unsigned int irq, void *data)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
 
-       if (irq >= NR_IRQS || !desc->chip) {
+       if (!desc) {
+               printk(KERN_ERR
+                      "Trying to install chip data for IRQ%d\n", irq);
+               return -EINVAL;
+       }
+
+       if (!desc->chip) {
                printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
                return -EINVAL;
        }
@@ -218,7 +220,7 @@ EXPORT_SYMBOL(set_irq_chip_data);
  */
 static void default_enable(unsigned int irq)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
 
        desc->chip->unmask(irq);
        desc->status &= ~IRQ_MASKED;
@@ -236,8 +238,9 @@ static void default_disable(unsigned int irq)
  */
 static unsigned int default_startup(unsigned int irq)
 {
-       irq_desc[irq].chip->enable(irq);
+       struct irq_desc *desc = irq_to_desc(irq);
 
+       desc->chip->enable(irq);
        return 0;
 }
 
@@ -246,7 +249,7 @@ static unsigned int default_startup(unsigned int irq)
  */
 static void default_shutdown(unsigned int irq)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
 
        desc->chip->mask(irq);
        desc->status |= IRQ_MASKED;
@@ -305,14 +308,13 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
 {
        struct irqaction *action;
        irqreturn_t action_ret;
-       const unsigned int cpu = smp_processor_id();
 
        spin_lock(&desc->lock);
 
        if (unlikely(desc->status & IRQ_INPROGRESS))
                goto out_unlock;
        desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-       kstat_cpu(cpu).irqs[irq]++;
+       kstat_incr_irqs_this_cpu(irq, desc);
 
        action = desc->action;
        if (unlikely(!action || (desc->status & IRQ_DISABLED)))
@@ -344,7 +346,6 @@ out_unlock:
 void
 handle_level_irq(unsigned int irq, struct irq_desc *desc)
 {
-       unsigned int cpu = smp_processor_id();
        struct irqaction *action;
        irqreturn_t action_ret;
 
@@ -354,7 +355,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
        if (unlikely(desc->status & IRQ_INPROGRESS))
                goto out_unlock;
        desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-       kstat_cpu(cpu).irqs[irq]++;
+       kstat_incr_irqs_this_cpu(irq, desc);
 
        /*
         * If its disabled or no action available
@@ -392,7 +393,6 @@ out_unlock:
 void
 handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 {
-       unsigned int cpu = smp_processor_id();
        struct irqaction *action;
        irqreturn_t action_ret;
 
@@ -402,7 +402,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
                goto out;
 
        desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-       kstat_cpu(cpu).irqs[irq]++;
+       kstat_incr_irqs_this_cpu(irq, desc);
 
        /*
         * If its disabled or no action available
@@ -451,8 +451,6 @@ out:
 void
 handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 {
-       const unsigned int cpu = smp_processor_id();
-
        spin_lock(&desc->lock);
 
        desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
@@ -468,8 +466,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
                mask_ack_irq(desc, irq);
                goto out_unlock;
        }
-
-       kstat_cpu(cpu).irqs[irq]++;
+       kstat_incr_irqs_this_cpu(irq, desc);
 
        /* Start handling the irq */
        desc->chip->ack(irq);
@@ -524,7 +521,7 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 {
        irqreturn_t action_ret;
 
-       kstat_this_cpu.irqs[irq]++;
+       kstat_incr_irqs_this_cpu(irq, desc);
 
        if (desc->chip->ack)
                desc->chip->ack(irq);
@@ -541,17 +538,15 @@ void
 __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
                  const char *name)
 {
-       struct irq_desc *desc;
+       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
 
-       if (irq >= NR_IRQS) {
+       if (!desc) {
                printk(KERN_ERR
                       "Trying to install type control for IRQ%d\n", irq);
                return;
        }
 
-       desc = irq_desc + irq;
-
        if (!handle)
                handle = handle_bad_irq;
        else if (desc->chip == &no_irq_chip) {
@@ -583,7 +578,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
                desc->status &= ~IRQ_DISABLED;
                desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
                desc->depth = 0;
-               desc->chip->unmask(irq);
+               desc->chip->startup(irq);
        }
        spin_unlock_irqrestore(&desc->lock, flags);
 }
@@ -606,17 +601,14 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
 
 void __init set_irq_noprobe(unsigned int irq)
 {
-       struct irq_desc *desc;
+       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
 
-       if (irq >= NR_IRQS) {
+       if (!desc) {
                printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq);
-
                return;
        }
 
-       desc = irq_desc + irq;
-
        spin_lock_irqsave(&desc->lock, flags);
        desc->status |= IRQ_NOPROBE;
        spin_unlock_irqrestore(&desc->lock, flags);
@@ -624,17 +616,14 @@ void __init set_irq_noprobe(unsigned int irq)
 
 void __init set_irq_probe(unsigned int irq)
 {
-       struct irq_desc *desc;
+       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
 
-       if (irq >= NR_IRQS) {
+       if (!desc) {
                printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq);
-
                return;
        }
 
-       desc = irq_desc + irq;
-
        spin_lock_irqsave(&desc->lock, flags);
        desc->status &= ~IRQ_NOPROBE;
        spin_unlock_irqrestore(&desc->lock, flags);
index 5fa6198e91394a935bcb8f51d70cdef9222c261f..c815b42d0f5bf12baed5e25eb92c7f2d5e4b4472 100644 (file)
  *
  * Handles spurious and unhandled IRQ's. It also prints a debugmessage.
  */
-void
-handle_bad_irq(unsigned int irq, struct irq_desc *desc)
+void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
 {
        print_irq_desc(irq, desc);
-       kstat_this_cpu.irqs[irq]++;
+       kstat_incr_irqs_this_cpu(irq, desc);
        ack_bad_irq(irq);
 }
 
@@ -47,6 +46,9 @@ handle_bad_irq(unsigned int irq, struct irq_desc *desc)
  *
  * Controller mappings for all interrupt sources:
  */
+int nr_irqs = NR_IRQS;
+EXPORT_SYMBOL_GPL(nr_irqs);
+
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
        [0 ... NR_IRQS-1] = {
                .status = IRQ_DISABLED,
@@ -66,7 +68,9 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
  */
 static void ack_bad(unsigned int irq)
 {
-       print_irq_desc(irq, irq_desc + irq);
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       print_irq_desc(irq, desc);
        ack_bad_irq(irq);
 }
 
@@ -131,8 +135,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
        irqreturn_t ret, retval = IRQ_NONE;
        unsigned int status = 0;
 
-       handle_dynamic_tick(action);
-
        if (!(action->flags & IRQF_DISABLED))
                local_irq_enable_in_hardirq();
 
@@ -165,11 +167,12 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
  */
 unsigned int __do_IRQ(unsigned int irq)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
        struct irqaction *action;
        unsigned int status;
 
-       kstat_this_cpu.irqs[irq]++;
+       kstat_incr_irqs_this_cpu(irq, desc);
+
        if (CHECK_IRQ_PER_CPU(desc->status)) {
                irqreturn_t action_ret;
 
@@ -256,8 +259,8 @@ out:
 }
 #endif
 
-#ifdef CONFIG_TRACE_IRQFLAGS
 
+#ifdef CONFIG_TRACE_IRQFLAGS
 /*
  * lockdep: we want to handle all irq_desc locks as a single lock-class:
  */
@@ -265,10 +268,10 @@ static struct lock_class_key irq_desc_lock_class;
 
 void early_init_irq_lock_class(void)
 {
+       struct irq_desc *desc;
        int i;
 
-       for (i = 0; i < NR_IRQS; i++)
-               lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
+       for_each_irq_desc(i, desc)
+               lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 }
-
 #endif
index 08a849a224475102084ca807de6861b6ab7286b5..c9767e641980e1b2ae91454fe723088ad9cc5f83 100644 (file)
@@ -10,12 +10,15 @@ extern void irq_chip_set_defaults(struct irq_chip *chip);
 /* Set default handler: */
 extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
 
+extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
+               unsigned long flags);
+
 #ifdef CONFIG_PROC_FS
-extern void register_irq_proc(unsigned int irq);
+extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
 extern void register_handler_proc(unsigned int irq, struct irqaction *action);
 extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
 #else
-static inline void register_irq_proc(unsigned int irq) { }
+static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { }
 static inline void register_handler_proc(unsigned int irq,
                                         struct irqaction *action) { }
 static inline void unregister_handler_proc(unsigned int irq,
index 60c49e324390bca07f9109fc2bb09d0ecf18f5c6..c498a1b8c621e02894974b7f0e0cda8e96d81011 100644 (file)
@@ -31,10 +31,10 @@ cpumask_t irq_default_affinity = CPU_MASK_ALL;
  */
 void synchronize_irq(unsigned int irq)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
        unsigned int status;
 
-       if (irq >= NR_IRQS)
+       if (!desc)
                return;
 
        do {
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(synchronize_irq);
  */
 int irq_can_set_affinity(unsigned int irq)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
 
        if (CHECK_IRQ_PER_CPU(desc->status) || !desc->chip ||
            !desc->chip->set_affinity)
@@ -81,18 +81,17 @@ int irq_can_set_affinity(unsigned int irq)
  */
 int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
 
        if (!desc->chip->set_affinity)
                return -EINVAL;
 
-       set_balance_irq_affinity(irq, cpumask);
-
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-       if (desc->status & IRQ_MOVE_PCNTXT) {
+       if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
                unsigned long flags;
 
                spin_lock_irqsave(&desc->lock, flags);
+               desc->affinity = cpumask;
                desc->chip->set_affinity(irq, cpumask);
                spin_unlock_irqrestore(&desc->lock, flags);
        } else
@@ -111,16 +110,17 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 int irq_select_affinity(unsigned int irq)
 {
        cpumask_t mask;
+       struct irq_desc *desc;
 
        if (!irq_can_set_affinity(irq))
                return 0;
 
        cpus_and(mask, cpu_online_map, irq_default_affinity);
 
-       irq_desc[irq].affinity = mask;
-       irq_desc[irq].chip->set_affinity(irq, mask);
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
+       desc->chip->set_affinity(irq, mask);
 
-       set_balance_irq_affinity(irq, mask);
        return 0;
 }
 #endif
@@ -140,10 +140,10 @@ int irq_select_affinity(unsigned int irq)
  */
 void disable_irq_nosync(unsigned int irq)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
 
-       if (irq >= NR_IRQS)
+       if (!desc)
                return;
 
        spin_lock_irqsave(&desc->lock, flags);
@@ -169,9 +169,9 @@ EXPORT_SYMBOL(disable_irq_nosync);
  */
 void disable_irq(unsigned int irq)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
 
-       if (irq >= NR_IRQS)
+       if (!desc)
                return;
 
        disable_irq_nosync(irq);
@@ -211,10 +211,10 @@ static void __enable_irq(struct irq_desc *desc, unsigned int irq)
  */
 void enable_irq(unsigned int irq)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
 
-       if (irq >= NR_IRQS)
+       if (!desc)
                return;
 
        spin_lock_irqsave(&desc->lock, flags);
@@ -223,9 +223,9 @@ void enable_irq(unsigned int irq)
 }
 EXPORT_SYMBOL(enable_irq);
 
-int set_irq_wake_real(unsigned int irq, unsigned int on)
+static int set_irq_wake_real(unsigned int irq, unsigned int on)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
        int ret = -ENXIO;
 
        if (desc->chip->set_wake)
@@ -248,7 +248,7 @@ int set_irq_wake_real(unsigned int irq, unsigned int on)
  */
 int set_irq_wake(unsigned int irq, unsigned int on)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
        int ret = 0;
 
@@ -288,12 +288,16 @@ EXPORT_SYMBOL(set_irq_wake);
  */
 int can_request_irq(unsigned int irq, unsigned long irqflags)
 {
+       struct irq_desc *desc = irq_to_desc(irq);
        struct irqaction *action;
 
-       if (irq >= NR_IRQS || irq_desc[irq].status & IRQ_NOREQUEST)
+       if (!desc)
+               return 0;
+
+       if (desc->status & IRQ_NOREQUEST)
                return 0;
 
-       action = irq_desc[irq].action;
+       action = desc->action;
        if (action)
                if (irqflags & action->flags & IRQF_SHARED)
                        action = NULL;
@@ -312,10 +316,11 @@ void compat_irq_chip_set_default_handler(struct irq_desc *desc)
                desc->handle_irq = NULL;
 }
 
-static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
+int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
                unsigned long flags)
 {
        int ret;
+       struct irq_chip *chip = desc->chip;
 
        if (!chip || !chip->set_type) {
                /*
@@ -333,6 +338,11 @@ static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
                pr_err("setting trigger mode %d for irq %u failed (%pF)\n",
                                (int)(flags & IRQF_TRIGGER_MASK),
                                irq, chip->set_type);
+       else {
+               /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */
+               desc->status &= ~IRQ_TYPE_SENSE_MASK;
+               desc->status |= flags & IRQ_TYPE_SENSE_MASK;
+       }
 
        return ret;
 }
@@ -341,16 +351,16 @@ static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
  * Internal function to register an irqaction - typically used to
  * allocate special interrupts that are part of the architecture.
  */
-int setup_irq(unsigned int irq, struct irqaction *new)
+static int
+__setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
 {
-       struct irq_desc *desc = irq_desc + irq;
        struct irqaction *old, **p;
        const char *old_name = NULL;
        unsigned long flags;
        int shared = 0;
        int ret;
 
-       if (irq >= NR_IRQS)
+       if (!desc)
                return -EINVAL;
 
        if (desc->chip == &no_irq_chip)
@@ -411,7 +421,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 
                /* Setup the type (level, edge polarity) if configured: */
                if (new->flags & IRQF_TRIGGER_MASK) {
-                       ret = __irq_set_trigger(desc->chip, irq, new->flags);
+                       ret = __irq_set_trigger(desc, irq, new->flags);
 
                        if (ret) {
                                spin_unlock_irqrestore(&desc->lock, flags);
@@ -430,16 +440,21 @@ int setup_irq(unsigned int irq, struct irqaction *new)
                if (!(desc->status & IRQ_NOAUTOEN)) {
                        desc->depth = 0;
                        desc->status &= ~IRQ_DISABLED;
-                       if (desc->chip->startup)
-                               desc->chip->startup(irq);
-                       else
-                               desc->chip->enable(irq);
+                       desc->chip->startup(irq);
                } else
                        /* Undo nested disables: */
                        desc->depth = 1;
 
                /* Set default affinity mask once everything is setup */
                irq_select_affinity(irq);
+
+       } else if ((new->flags & IRQF_TRIGGER_MASK)
+                       && (new->flags & IRQF_TRIGGER_MASK)
+                               != (desc->status & IRQ_TYPE_SENSE_MASK)) {
+               /* hope the handler works with the actual trigger mode... */
+               pr_warning("IRQ %d uses trigger mode %d; requested %d\n",
+                               irq, (int)(desc->status & IRQ_TYPE_SENSE_MASK),
+                               (int)(new->flags & IRQF_TRIGGER_MASK));
        }
 
        *p = new;
@@ -464,7 +479,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
        spin_unlock_irqrestore(&desc->lock, flags);
 
        new->irq = irq;
-       register_irq_proc(irq);
+       register_irq_proc(irq, desc);
        new->dir = NULL;
        register_handler_proc(irq, new);
 
@@ -483,6 +498,20 @@ mismatch:
        return -EBUSY;
 }
 
+/**
+ *     setup_irq - setup an interrupt
+ *     @irq: Interrupt line to setup
+ *     @act: irqaction for the interrupt
+ *
+ * Used to statically setup interrupts in the early boot process.
+ */
+int setup_irq(unsigned int irq, struct irqaction *act)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       return __setup_irq(irq, desc, act);
+}
+
 /**
  *     free_irq - free an interrupt
  *     @irq: Interrupt line to free
@@ -499,15 +528,15 @@ mismatch:
  */
 void free_irq(unsigned int irq, void *dev_id)
 {
-       struct irq_desc *desc;
+       struct irq_desc *desc = irq_to_desc(irq);
        struct irqaction **p;
        unsigned long flags;
 
        WARN_ON(in_interrupt());
-       if (irq >= NR_IRQS)
+
+       if (!desc)
                return;
 
-       desc = irq_desc + irq;
        spin_lock_irqsave(&desc->lock, flags);
        p = &desc->action;
        for (;;) {
@@ -596,12 +625,14 @@ EXPORT_SYMBOL(free_irq);
  *     IRQF_SHARED             Interrupt is shared
  *     IRQF_DISABLED   Disable local interrupts while processing
  *     IRQF_SAMPLE_RANDOM      The interrupt can be used for entropy
+ *     IRQF_TRIGGER_*          Specify active edge(s) or level
  *
  */
 int request_irq(unsigned int irq, irq_handler_t handler,
                unsigned long irqflags, const char *devname, void *dev_id)
 {
        struct irqaction *action;
+       struct irq_desc *desc;
        int retval;
 
 #ifdef CONFIG_LOCKDEP
@@ -618,9 +649,12 @@ int request_irq(unsigned int irq, irq_handler_t handler,
         */
        if ((irqflags & IRQF_SHARED) && !dev_id)
                return -EINVAL;
-       if (irq >= NR_IRQS)
+
+       desc = irq_to_desc(irq);
+       if (!desc)
                return -EINVAL;
-       if (irq_desc[irq].status & IRQ_NOREQUEST)
+
+       if (desc->status & IRQ_NOREQUEST)
                return -EINVAL;
        if (!handler)
                return -EINVAL;
@@ -636,26 +670,29 @@ int request_irq(unsigned int irq, irq_handler_t handler,
        action->next = NULL;
        action->dev_id = dev_id;
 
+       retval = __setup_irq(irq, desc, action);
+       if (retval)
+               kfree(action);
+
 #ifdef CONFIG_DEBUG_SHIRQ
        if (irqflags & IRQF_SHARED) {
                /*
                 * It's a shared IRQ -- the driver ought to be prepared for it
                 * to happen immediately, so let's make sure....
-                * We do this before actually registering it, to make sure that
-                * a 'real' IRQ doesn't run in parallel with our fake
+                * We disable the irq to make sure that a 'real' IRQ doesn't
+                * run in parallel with our fake.
                 */
                unsigned long flags;
 
+               disable_irq(irq);
                local_irq_save(flags);
+
                handler(irq, dev_id);
+
                local_irq_restore(flags);
+               enable_irq(irq);
        }
 #endif
-
-       retval = setup_irq(irq, action);
-       if (retval)
-               kfree(action);
-
        return retval;
 }
 EXPORT_SYMBOL(request_irq);
index 77b7acc875c5df6755954161736b72918fff49ad..90b920d3f52b3ca29f69cc204ab2481f4d95d4f6 100644 (file)
@@ -3,18 +3,18 @@
 
 void set_pending_irq(unsigned int irq, cpumask_t mask)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
 
        spin_lock_irqsave(&desc->lock, flags);
        desc->status |= IRQ_MOVE_PENDING;
-       irq_desc[irq].pending_mask = mask;
+       desc->pending_mask = mask;
        spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 void move_masked_irq(int irq)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
        cpumask_t tmp;
 
        if (likely(!(desc->status & IRQ_MOVE_PENDING)))
@@ -30,7 +30,7 @@ void move_masked_irq(int irq)
 
        desc->status &= ~IRQ_MOVE_PENDING;
 
-       if (unlikely(cpus_empty(irq_desc[irq].pending_mask)))
+       if (unlikely(cpus_empty(desc->pending_mask)))
                return;
 
        if (!desc->chip->set_affinity)
@@ -38,7 +38,7 @@ void move_masked_irq(int irq)
 
        assert_spin_locked(&desc->lock);
 
-       cpus_and(tmp, irq_desc[irq].pending_mask, cpu_online_map);
+       cpus_and(tmp, desc->pending_mask, cpu_online_map);
 
        /*
         * If there was a valid mask to work with, please
@@ -55,12 +55,12 @@ void move_masked_irq(int irq)
        if (likely(!cpus_empty(tmp))) {
                desc->chip->set_affinity(irq,tmp);
        }
-       cpus_clear(irq_desc[irq].pending_mask);
+       cpus_clear(desc->pending_mask);
 }
 
 void move_native_irq(int irq)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
 
        if (likely(!(desc->status & IRQ_MOVE_PENDING)))
                return;
index a09dd29c2fd748dea1dd83fd00b73751aa649316..fac014a81b244f232409e5c717a2e1c76f7ac1ba 100644 (file)
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir;
 
 static int irq_affinity_proc_show(struct seq_file *m, void *v)
 {
-       struct irq_desc *desc = irq_desc + (long)m->private;
+       struct irq_desc *desc = irq_to_desc((long)m->private);
        cpumask_t *mask = &desc->affinity;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -43,7 +43,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
        cpumask_t new_value;
        int err;
 
-       if (!irq_desc[irq].chip->set_affinity || no_irq_affinity ||
+       if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
            irq_balancing_disabled(irq))
                return -EIO;
 
@@ -132,20 +132,20 @@ static const struct file_operations default_affinity_proc_fops = {
 static int irq_spurious_read(char *page, char **start, off_t off,
                                  int count, int *eof, void *data)
 {
-       struct irq_desc *d = &irq_desc[(long) data];
+       struct irq_desc *desc = irq_to_desc((long) data);
        return sprintf(page, "count %u\n"
                             "unhandled %u\n"
                             "last_unhandled %u ms\n",
-                       d->irq_count,
-                       d->irqs_unhandled,
-                       jiffies_to_msecs(d->last_unhandled));
+                       desc->irq_count,
+                       desc->irqs_unhandled,
+                       jiffies_to_msecs(desc->last_unhandled));
 }
 
 #define MAX_NAMELEN 128
 
 static int name_unique(unsigned int irq, struct irqaction *new_action)
 {
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_desc *desc = irq_to_desc(irq);
        struct irqaction *action;
        unsigned long flags;
        int ret = 1;
@@ -165,8 +165,9 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
 void register_handler_proc(unsigned int irq, struct irqaction *action)
 {
        char name [MAX_NAMELEN];
+       struct irq_desc *desc = irq_to_desc(irq);
 
-       if (!irq_desc[irq].dir || action->dir || !action->name ||
+       if (!desc->dir || action->dir || !action->name ||
                                        !name_unique(irq, action))
                return;
 
@@ -174,36 +175,34 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
        snprintf(name, MAX_NAMELEN, "%s", action->name);
 
        /* create /proc/irq/1234/handler/ */
-       action->dir = proc_mkdir(name, irq_desc[irq].dir);
+       action->dir = proc_mkdir(name, desc->dir);
 }
 
 #undef MAX_NAMELEN
 
 #define MAX_NAMELEN 10
 
-void register_irq_proc(unsigned int irq)
+void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 {
        char name [MAX_NAMELEN];
        struct proc_dir_entry *entry;
 
-       if (!root_irq_dir ||
-               (irq_desc[irq].chip == &no_irq_chip) ||
-                       irq_desc[irq].dir)
+       if (!root_irq_dir || (desc->chip == &no_irq_chip) || desc->dir)
                return;
 
        memset(name, 0, MAX_NAMELEN);
        sprintf(name, "%d", irq);
 
        /* create /proc/irq/1234 */
-       irq_desc[irq].dir = proc_mkdir(name, root_irq_dir);
+       desc->dir = proc_mkdir(name, root_irq_dir);
 
 #ifdef CONFIG_SMP
        /* create /proc/irq/<irq>/smp_affinity */
-       proc_create_data("smp_affinity", 0600, irq_desc[irq].dir,
+       proc_create_data("smp_affinity", 0600, desc->dir,
                         &irq_affinity_proc_fops, (void *)(long)irq);
 #endif
 
-       entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir);
+       entry = create_proc_entry("spurious", 0444, desc->dir);
        if (entry) {
                entry->data = (void *)(long)irq;
                entry->read_proc = irq_spurious_read;
@@ -214,8 +213,11 @@ void register_irq_proc(unsigned int irq)
 
 void unregister_handler_proc(unsigned int irq, struct irqaction *action)
 {
-       if (action->dir)
-               remove_proc_entry(action->dir->name, irq_desc[irq].dir);
+       if (action->dir) {
+               struct irq_desc *desc = irq_to_desc(irq);
+
+               remove_proc_entry(action->dir->name, desc->dir);
+       }
 }
 
 void register_default_affinity_proc(void)
@@ -228,7 +230,8 @@ void register_default_affinity_proc(void)
 
 void init_irq_proc(void)
 {
-       int i;
+       unsigned int irq;
+       struct irq_desc *desc;
 
        /* create /proc/irq */
        root_irq_dir = proc_mkdir("irq", NULL);
@@ -240,7 +243,7 @@ void init_irq_proc(void)
        /*
         * Create entries for all existing IRQs.
         */
-       for (i = 0; i < NR_IRQS; i++)
-               register_irq_proc(i);
+       for_each_irq_desc(irq, desc)
+               register_irq_proc(irq, desc);
 }
 
index a8046791ba2d3fde84853ac7a7bfbb0bbca07b84..89c7117acf2beffa3a1ca189b3ad721fd4ee047f 100644 (file)
@@ -33,10 +33,10 @@ static void resend_irqs(unsigned long arg)
        struct irq_desc *desc;
        int irq;
 
-       while (!bitmap_empty(irqs_resend, NR_IRQS)) {
-               irq = find_first_bit(irqs_resend, NR_IRQS);
+       while (!bitmap_empty(irqs_resend, nr_irqs)) {
+               irq = find_first_bit(irqs_resend, nr_irqs);
                clear_bit(irq, irqs_resend);
-               desc = irq_desc + irq;
+               desc = irq_to_desc(irq);
                local_irq_disable();
                desc->handle_irq(irq, desc);
                local_irq_enable();
index c66d3f10e85326ab1041a29202047734769b9b10..dd364c11e56e0f82e0e923fcc3b60d17524e991d 100644 (file)
 #include <linux/kallsyms.h>
 #include <linux/interrupt.h>
 #include <linux/moduleparam.h>
+#include <linux/timer.h>
 
 static int irqfixup __read_mostly;
 
+#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
+static void poll_spurious_irqs(unsigned long dummy);
+static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0);
+
 /*
  * Recovery handler for misrouted interrupts.
  */
-static int misrouted_irq(int irq)
+static int try_one_irq(int irq, struct irq_desc *desc)
 {
-       int i;
-       int ok = 0;
-       int work = 0;   /* Did we do work for a real IRQ */
-
-       for (i = 1; i < NR_IRQS; i++) {
-               struct irq_desc *desc = irq_desc + i;
-               struct irqaction *action;
-
-               if (i == irq)   /* Already tried */
-                       continue;
+       struct irqaction *action;
+       int ok = 0, work = 0;
 
-               spin_lock(&desc->lock);
-               /* Already running on another processor */
-               if (desc->status & IRQ_INPROGRESS) {
-                       /*
-                        * Already running: If it is shared get the other
-                        * CPU to go looking for our mystery interrupt too
-                        */
-                       if (desc->action && (desc->action->flags & IRQF_SHARED))
-                               desc->status |= IRQ_PENDING;
-                       spin_unlock(&desc->lock);
-                       continue;
-               }
-               /* Honour the normal IRQ locking */
-               desc->status |= IRQ_INPROGRESS;
-               action = desc->action;
+       spin_lock(&desc->lock);
+       /* Already running on another processor */
+       if (desc->status & IRQ_INPROGRESS) {
+               /*
+                * Already running: If it is shared get the other
+                * CPU to go looking for our mystery interrupt too
+                */
+               if (desc->action && (desc->action->flags & IRQF_SHARED))
+                       desc->status |= IRQ_PENDING;
                spin_unlock(&desc->lock);
+               return ok;
+       }
+       /* Honour the normal IRQ locking */
+       desc->status |= IRQ_INPROGRESS;
+       action = desc->action;
+       spin_unlock(&desc->lock);
 
-               while (action) {
-                       /* Only shared IRQ handlers are safe to call */
-                       if (action->flags & IRQF_SHARED) {
-                               if (action->handler(i, action->dev_id) ==
-                                               IRQ_HANDLED)
-                                       ok = 1;
-                       }
-                       action = action->next;
+       while (action) {
+               /* Only shared IRQ handlers are safe to call */
+               if (action->flags & IRQF_SHARED) {
+                       if (action->handler(irq, action->dev_id) ==
+                               IRQ_HANDLED)
+                               ok = 1;
                }
-               local_irq_disable();
-               /* Now clean up the flags */
-               spin_lock(&desc->lock);
-               action = desc->action;
+               action = action->next;
+       }
+       local_irq_disable();
+       /* Now clean up the flags */
+       spin_lock(&desc->lock);
+       action = desc->action;
 
+       /*
+        * While we were looking for a fixup someone queued a real
+        * IRQ clashing with our walk:
+        */
+       while ((desc->status & IRQ_PENDING) && action) {
                /*
-                * While we were looking for a fixup someone queued a real
-                * IRQ clashing with our walk:
-                */
-               while ((desc->status & IRQ_PENDING) && action) {
-                       /*
-                        * Perform real IRQ processing for the IRQ we deferred
-                        */
-                       work = 1;
-                       spin_unlock(&desc->lock);
-                       handle_IRQ_event(i, action);
-                       spin_lock(&desc->lock);
-                       desc->status &= ~IRQ_PENDING;
-               }
-               desc->status &= ~IRQ_INPROGRESS;
-               /*
-                * If we did actual work for the real IRQ line we must let the
-                * IRQ controller clean up too
+                * Perform real IRQ processing for the IRQ we deferred
                 */
-               if (work && desc->chip && desc->chip->end)
-                       desc->chip->end(i);
+               work = 1;
                spin_unlock(&desc->lock);
+               handle_IRQ_event(irq, action);
+               spin_lock(&desc->lock);
+               desc->status &= ~IRQ_PENDING;
+       }
+       desc->status &= ~IRQ_INPROGRESS;
+       /*
+        * If we did actual work for the real IRQ line we must let the
+        * IRQ controller clean up too
+        */
+       if (work && desc->chip && desc->chip->end)
+               desc->chip->end(irq);
+       spin_unlock(&desc->lock);
+
+       return ok;
+}
+
+static int misrouted_irq(int irq)
+{
+       struct irq_desc *desc;
+       int i, ok = 0;
+
+       for_each_irq_desc(i, desc) {
+               if (!i)
+                        continue;
+
+               if (i == irq)   /* Already tried */
+                       continue;
+
+               if (try_one_irq(i, desc))
+                       ok = 1;
        }
        /* So the caller can adjust the irq error counts */
        return ok;
 }
 
+static void poll_spurious_irqs(unsigned long dummy)
+{
+       struct irq_desc *desc;
+       int i;
+
+       for_each_irq_desc(i, desc) {
+               unsigned int status;
+
+               if (!i)
+                        continue;
+
+               /* Racy but it doesn't matter */
+               status = desc->status;
+               barrier();
+               if (!(status & IRQ_SPURIOUS_DISABLED))
+                       continue;
+
+               try_one_irq(i, desc);
+       }
+
+       mod_timer(&poll_spurious_irq_timer,
+                 jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
+}
+
 /*
  * If 99,900 of the previous 100,000 interrupts have not been handled
  * then assume that the IRQ is stuck in some manner. Drop a diagnostic
@@ -137,7 +176,9 @@ report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
        }
 }
 
-static inline int try_misrouted_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
+static inline int
+try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
+                 irqreturn_t action_ret)
 {
        struct irqaction *action;
 
@@ -212,6 +253,9 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
                desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED;
                desc->depth++;
                desc->chip->disable(irq);
+
+               mod_timer(&poll_spurious_irq_timer,
+                         jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
        }
        desc->irqs_unhandled = 0;
 }
@@ -241,7 +285,7 @@ static int __init irqfixup_setup(char *str)
 
 __setup("irqfixup", irqfixup_setup);
 module_param(irqfixup, int, 0644);
-MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode 2: irqpoll mode");
+MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode, 2: irqpoll mode");
 
 static int __init irqpoll_setup(char *str)
 {
index ab982747d9bd8121c19b3327b96c7760adb7577e..db7c358b9a02f1cc50954517124c8272b5eed463 100644 (file)
@@ -55,17 +55,15 @@ int do_getitimer(int which, struct itimerval *value)
                spin_unlock_irq(&tsk->sighand->siglock);
                break;
        case ITIMER_VIRTUAL:
-               read_lock(&tasklist_lock);
                spin_lock_irq(&tsk->sighand->siglock);
                cval = tsk->signal->it_virt_expires;
                cinterval = tsk->signal->it_virt_incr;
                if (!cputime_eq(cval, cputime_zero)) {
-                       struct task_struct *t = tsk;
-                       cputime_t utime = tsk->signal->utime;
-                       do {
-                               utime = cputime_add(utime, t->utime);
-                               t = next_thread(t);
-                       } while (t != tsk);
+                       struct task_cputime cputime;
+                       cputime_t utime;
+
+                       thread_group_cputime(tsk, &cputime);
+                       utime = cputime.utime;
                        if (cputime_le(cval, utime)) { /* about to fire */
                                cval = jiffies_to_cputime(1);
                        } else {
@@ -73,25 +71,19 @@ int do_getitimer(int which, struct itimerval *value)
                        }
                }
                spin_unlock_irq(&tsk->sighand->siglock);
-               read_unlock(&tasklist_lock);
                cputime_to_timeval(cval, &value->it_value);
                cputime_to_timeval(cinterval, &value->it_interval);
                break;
        case ITIMER_PROF:
-               read_lock(&tasklist_lock);
                spin_lock_irq(&tsk->sighand->siglock);
                cval = tsk->signal->it_prof_expires;
                cinterval = tsk->signal->it_prof_incr;
                if (!cputime_eq(cval, cputime_zero)) {
-                       struct task_struct *t = tsk;
-                       cputime_t ptime = cputime_add(tsk->signal->utime,
-                                                     tsk->signal->stime);
-                       do {
-                               ptime = cputime_add(ptime,
-                                                   cputime_add(t->utime,
-                                                               t->stime));
-                               t = next_thread(t);
-                       } while (t != tsk);
+                       struct task_cputime times;
+                       cputime_t ptime;
+
+                       thread_group_cputime(tsk, &times);
+                       ptime = cputime_add(times.utime, times.stime);
                        if (cputime_le(cval, ptime)) { /* about to fire */
                                cval = jiffies_to_cputime(1);
                        } else {
@@ -99,7 +91,6 @@ int do_getitimer(int which, struct itimerval *value)
                        }
                }
                spin_unlock_irq(&tsk->sighand->siglock);
-               read_unlock(&tasklist_lock);
                cputime_to_timeval(cval, &value->it_value);
                cputime_to_timeval(cinterval, &value->it_interval);
                break;
@@ -185,7 +176,6 @@ again:
        case ITIMER_VIRTUAL:
                nval = timeval_to_cputime(&value->it_value);
                ninterval = timeval_to_cputime(&value->it_interval);
-               read_lock(&tasklist_lock);
                spin_lock_irq(&tsk->sighand->siglock);
                cval = tsk->signal->it_virt_expires;
                cinterval = tsk->signal->it_virt_incr;
@@ -200,7 +190,6 @@ again:
                tsk->signal->it_virt_expires = nval;
                tsk->signal->it_virt_incr = ninterval;
                spin_unlock_irq(&tsk->sighand->siglock);
-               read_unlock(&tasklist_lock);
                if (ovalue) {
                        cputime_to_timeval(cval, &ovalue->it_value);
                        cputime_to_timeval(cinterval, &ovalue->it_interval);
@@ -209,7 +198,6 @@ again:
        case ITIMER_PROF:
                nval = timeval_to_cputime(&value->it_value);
                ninterval = timeval_to_cputime(&value->it_interval);
-               read_lock(&tasklist_lock);
                spin_lock_irq(&tsk->sighand->siglock);
                cval = tsk->signal->it_prof_expires;
                cinterval = tsk->signal->it_prof_incr;
@@ -224,7 +212,6 @@ again:
                tsk->signal->it_prof_expires = nval;
                tsk->signal->it_prof_incr = ninterval;
                spin_unlock_irq(&tsk->sighand->siglock);
-               read_unlock(&tasklist_lock);
                if (ovalue) {
                        cputime_to_timeval(cval, &ovalue->it_value);
                        cputime_to_timeval(cinterval, &ovalue->it_interval);
index 777ac458ac993b8195a3c3cb7042a941ba611339..ac0fde7b54d082ee9ee327a0e57a7664f5cb72e7 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/pm.h>
 #include <linux/cpu.h>
 #include <linux/console.h>
+#include <linux/vmalloc.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
index 14ec64fe175ad5a742aac30718362a1f6875e9af..8e7a7ce3ed0a642f99dc7f73c220722c936e88ac 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <trace/sched.h>
 
 #define KTHREAD_NICE_LEVEL (-5)
 
@@ -205,6 +206,8 @@ int kthread_stop(struct task_struct *k)
        /* It could exit after stop_info.k set, but before wake_up_process. */
        get_task_struct(k);
 
+       trace_sched_kthread_stop(k);
+
        /* Must init completion *before* thread sees kthread_stop_info.k */
        init_completion(&kthread_stop_info.done);
        smp_wmb();
@@ -220,6 +223,8 @@ int kthread_stop(struct task_struct *k)
        ret = kthread_stop_info.err;
        mutex_unlock(&kthread_stop_lock);
 
+       trace_sched_kthread_stop_ret(ret);
+
        return ret;
 }
 EXPORT_SYMBOL(kthread_stop);
index 7d1faecd7a51bd643cc1e10f0f2048765136bf16..e9c6b2bc9400627cf183382ee55933333f0ee83b 100644 (file)
@@ -62,7 +62,7 @@ struct marker_entry {
        int refcount;   /* Number of times armed. 0 if disarmed. */
        struct rcu_head rcu;
        void *oldptr;
-       unsigned char rcu_pending:1;
+       int rcu_pending;
        unsigned char ptype:1;
        char name[0];   /* Contains name'\0'format'\0' */
 };
@@ -103,11 +103,11 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
        char ptype;
 
        /*
-        * preempt_disable does two things : disabling preemption to make sure
-        * the teardown of the callbacks can be done correctly when they are in
-        * modules and they insure RCU read coherency.
+        * rcu_read_lock_sched does two things : disabling preemption to make
+        * sure the teardown of the callbacks can be done correctly when they
+        * are in modules and they insure RCU read coherency.
         */
-       preempt_disable();
+       rcu_read_lock_sched();
        ptype = mdata->ptype;
        if (likely(!ptype)) {
                marker_probe_func *func;
@@ -145,7 +145,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
                        va_end(args);
                }
        }
-       preempt_enable();
+       rcu_read_unlock_sched();
 }
 EXPORT_SYMBOL_GPL(marker_probe_cb);
 
@@ -162,7 +162,7 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
        va_list args;   /* not initialized */
        char ptype;
 
-       preempt_disable();
+       rcu_read_lock_sched();
        ptype = mdata->ptype;
        if (likely(!ptype)) {
                marker_probe_func *func;
@@ -195,7 +195,7 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
                        multi[i].func(multi[i].probe_private, call_private,
                                mdata->format, &args);
        }
-       preempt_enable();
+       rcu_read_unlock_sched();
 }
 EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
 
@@ -560,7 +560,7 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
  * Disable a marker and its probe callback.
  * Note: only waiting an RCU period after setting elem->call to the empty
  * function insures that the original callback is not used anymore. This insured
- * by preempt_disable around the call site.
+ * by rcu_read_lock_sched around the call site.
  */
 static void disable_marker(struct marker *elem)
 {
@@ -653,11 +653,17 @@ int marker_probe_register(const char *name, const char *format,
        entry = get_marker(name);
        if (!entry) {
                entry = add_marker(name, format);
-               if (IS_ERR(entry)) {
+               if (IS_ERR(entry))
                        ret = PTR_ERR(entry);
-                       goto end;
-               }
+       } else if (format) {
+               if (!entry->format)
+                       ret = marker_set_format(&entry, format);
+               else if (strcmp(entry->format, format))
+                       ret = -EPERM;
        }
+       if (ret)
+               goto end;
+
        /*
         * If we detect that a call_rcu is pending for this marker,
         * make sure it's executed now.
@@ -674,6 +680,8 @@ int marker_probe_register(const char *name, const char *format,
        mutex_lock(&markers_mutex);
        entry = get_marker(name);
        WARN_ON(!entry);
+       if (entry->rcu_pending)
+               rcu_barrier_sched();
        entry->oldptr = old;
        entry->rcu_pending = 1;
        /* write rcu_pending before calling the RCU callback */
@@ -717,6 +725,8 @@ int marker_probe_unregister(const char *name,
        entry = get_marker(name);
        if (!entry)
                goto end;
+       if (entry->rcu_pending)
+               rcu_barrier_sched();
        entry->oldptr = old;
        entry->rcu_pending = 1;
        /* write rcu_pending before calling the RCU callback */
@@ -795,6 +805,8 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
        mutex_lock(&markers_mutex);
        entry = get_marker_from_private_data(probe, probe_private);
        WARN_ON(!entry);
+       if (entry->rcu_pending)
+               rcu_barrier_sched();
        entry->oldptr = old;
        entry->rcu_pending = 1;
        /* write rcu_pending before calling the RCU callback */
index 25bc9ac9e226ae02dce8ff39a7c1d914684b4e3e..0d8d21ee792c4e5812694162d241b7975ffd99bf 100644 (file)
@@ -46,6 +46,8 @@
 #include <asm/cacheflush.h>
 #include <linux/license.h>
 #include <asm/sections.h>
+#include <linux/tracepoint.h>
+#include <linux/ftrace.h>
 
 #if 0
 #define DEBUGP printk
@@ -1430,6 +1432,9 @@ static void free_module(struct module *mod)
        /* Module unload stuff */
        module_unload_free(mod);
 
+       /* release any pointers to mcount in this module */
+       ftrace_release(mod->module_core, mod->core_size);
+
        /* This may be NULL, but that's OK */
        module_free(mod, mod->module_init);
        kfree(mod->args);
@@ -1861,9 +1866,13 @@ static noinline struct module *load_module(void __user *umod,
        unsigned int markersindex;
        unsigned int markersstringsindex;
        unsigned int verboseindex;
+       unsigned int tracepointsindex;
+       unsigned int tracepointsstringsindex;
+       unsigned int mcountindex;
        struct module *mod;
        long err = 0;
        void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+       void *mseg;
        struct exception_table_entry *extable;
        mm_segment_t old_fs;
 
@@ -2156,6 +2165,12 @@ static noinline struct module *load_module(void __user *umod,
        markersstringsindex = find_sec(hdr, sechdrs, secstrings,
                                        "__markers_strings");
        verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose");
+       tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints");
+       tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
+                                       "__tracepoints_strings");
+
+       mcountindex = find_sec(hdr, sechdrs, secstrings,
+                              "__mcount_loc");
 
        /* Now do relocations. */
        for (i = 1; i < hdr->e_shnum; i++) {
@@ -2183,6 +2198,12 @@ static noinline struct module *load_module(void __user *umod,
        mod->num_markers =
                sechdrs[markersindex].sh_size / sizeof(*mod->markers);
 #endif
+#ifdef CONFIG_TRACEPOINTS
+       mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr;
+       mod->num_tracepoints =
+               sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints);
+#endif
+
 
         /* Find duplicate symbols */
        err = verify_export_symbols(mod);
@@ -2201,12 +2222,22 @@ static noinline struct module *load_module(void __user *umod,
 
        add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
 
+       if (!mod->taints) {
 #ifdef CONFIG_MARKERS
-       if (!mod->taints)
                marker_update_probe_range(mod->markers,
                        mod->markers + mod->num_markers);
 #endif
        dynamic_printk_setup(sechdrs, verboseindex);
+#ifdef CONFIG_TRACEPOINTS
+               tracepoint_update_probe_range(mod->tracepoints,
+                       mod->tracepoints + mod->num_tracepoints);
+#endif
+       }
+
+       /* sechdrs[0].sh_size is always zero */
+       mseg = (void *)sechdrs[mcountindex].sh_addr;
+       ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size);
+
        err = module_finalize(hdr, sechdrs, mod);
        if (err < 0)
                goto cleanup;
@@ -2276,6 +2307,7 @@ static noinline struct module *load_module(void __user *umod,
  cleanup:
        kobject_del(&mod->mkobj.kobj);
        kobject_put(&mod->mkobj.kobj);
+       ftrace_release(mod->module_core, mod->core_size);
  free_unload:
        module_unload_free(mod);
        module_free(mod, mod->module_init);
@@ -2759,3 +2791,50 @@ void module_update_markers(void)
        mutex_unlock(&module_mutex);
 }
 #endif
+
+#ifdef CONFIG_TRACEPOINTS
+void module_update_tracepoints(void)
+{
+       struct module *mod;
+
+       mutex_lock(&module_mutex);
+       list_for_each_entry(mod, &modules, list)
+               if (!mod->taints)
+                       tracepoint_update_probe_range(mod->tracepoints,
+                               mod->tracepoints + mod->num_tracepoints);
+       mutex_unlock(&module_mutex);
+}
+
+/*
+ * Returns 0 if current not found.
+ * Returns 1 if current found.
+ */
+int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+       struct module *iter_mod;
+       int found = 0;
+
+       mutex_lock(&module_mutex);
+       list_for_each_entry(iter_mod, &modules, list) {
+               if (!iter_mod->taints) {
+                       /*
+                        * Sorted module list
+                        */
+                       if (iter_mod < iter->module)
+                               continue;
+                       else if (iter_mod > iter->module)
+                               iter->tracepoint = NULL;
+                       found = tracepoint_get_iter_range(&iter->tracepoint,
+                               iter_mod->tracepoints,
+                               iter_mod->tracepoints
+                                       + iter_mod->num_tracepoints);
+                       if (found) {
+                               iter->module = iter_mod;
+                               break;
+                       }
+               }
+       }
+       mutex_unlock(&module_mutex);
+       return found;
+}
+#endif
index 823be11584efef8ef1d344f484cbf8c3d4f9617e..4282c0a40a57ada651b86c7dcce2389abf489448 100644 (file)
@@ -550,7 +550,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier);
 
 static ATOMIC_NOTIFIER_HEAD(die_chain);
 
-int notify_die(enum die_val val, const char *str,
+int notrace notify_die(enum die_val val, const char *str,
               struct pt_regs *regs, long err, int trap, int sig)
 {
        struct die_args args = {
index c42a03aef36f07fd326eba959a90aa7bbe45dbd8..153dcb2639c3df614c7862041a9d485b7dbe6614 100644 (file)
@@ -7,6 +7,93 @@
 #include <linux/errno.h>
 #include <linux/math64.h>
 #include <asm/uaccess.h>
+#include <linux/kernel_stat.h>
+
+/*
+ * Allocate the thread_group_cputime structure appropriately and fill in the
+ * current values of the fields.  Called from copy_signal() via
+ * thread_group_cputime_clone_thread() when adding a second or subsequent
+ * thread to a thread group.  Assumes interrupts are enabled when called.
+ */
+int thread_group_cputime_alloc(struct task_struct *tsk)
+{
+       struct signal_struct *sig = tsk->signal;
+       struct task_cputime *cputime;
+
+       /*
+        * If we have multiple threads and we don't already have a
+        * per-CPU task_cputime struct (checked in the caller), allocate
+        * one and fill it in with the times accumulated so far.  We may
+        * race with another thread so recheck after we pick up the sighand
+        * lock.
+        */
+       cputime = alloc_percpu(struct task_cputime);
+       if (cputime == NULL)
+               return -ENOMEM;
+       spin_lock_irq(&tsk->sighand->siglock);
+       if (sig->cputime.totals) {
+               spin_unlock_irq(&tsk->sighand->siglock);
+               free_percpu(cputime);
+               return 0;
+       }
+       sig->cputime.totals = cputime;
+       cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id());
+       cputime->utime = tsk->utime;
+       cputime->stime = tsk->stime;
+       cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
+       spin_unlock_irq(&tsk->sighand->siglock);
+       return 0;
+}
+
+/**
+ * thread_group_cputime - Sum the thread group time fields across all CPUs.
+ *
+ * @tsk:       The task we use to identify the thread group.
+ * @times:     task_cputime structure in which we return the summed fields.
+ *
+ * Walk the list of CPUs to sum the per-CPU time fields in the thread group
+ * time structure.
+ */
+void thread_group_cputime(
+       struct task_struct *tsk,
+       struct task_cputime *times)
+{
+       struct signal_struct *sig;
+       int i;
+       struct task_cputime *tot;
+
+       sig = tsk->signal;
+       if (unlikely(!sig) || !sig->cputime.totals) {
+               times->utime = tsk->utime;
+               times->stime = tsk->stime;
+               times->sum_exec_runtime = tsk->se.sum_exec_runtime;
+               return;
+       }
+       times->stime = times->utime = cputime_zero;
+       times->sum_exec_runtime = 0;
+       for_each_possible_cpu(i) {
+               tot = per_cpu_ptr(tsk->signal->cputime.totals, i);
+               times->utime = cputime_add(times->utime, tot->utime);
+               times->stime = cputime_add(times->stime, tot->stime);
+               times->sum_exec_runtime += tot->sum_exec_runtime;
+       }
+}
+
+/*
+ * Called after updating RLIMIT_CPU to set timer expiration if necessary.
+ */
+void update_rlimit_cpu(unsigned long rlim_new)
+{
+       cputime_t cputime;
+
+       cputime = secs_to_cputime(rlim_new);
+       if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
+           cputime_lt(current->signal->it_prof_expires, cputime)) {
+               spin_lock_irq(&current->sighand->siglock);
+               set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
+               spin_unlock_irq(&current->sighand->siglock);
+       }
+}
 
 static int check_clock(const clockid_t which_clock)
 {
@@ -158,10 +245,6 @@ static inline cputime_t virt_ticks(struct task_struct *p)
 {
        return p->utime;
 }
-static inline unsigned long long sched_ns(struct task_struct *p)
-{
-       return task_sched_runtime(p);
-}
 
 int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
 {
@@ -211,7 +294,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
                cpu->cpu = virt_ticks(p);
                break;
        case CPUCLOCK_SCHED:
-               cpu->sched = sched_ns(p);
+               cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p);
                break;
        }
        return 0;
@@ -220,59 +303,30 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 /*
  * Sample a process (thread group) clock for the given group_leader task.
  * Must be called with tasklist_lock held for reading.
- * Must be called with tasklist_lock held for reading, and p->sighand->siglock.
  */
-static int cpu_clock_sample_group_locked(unsigned int clock_idx,
-                                        struct task_struct *p,
-                                        union cpu_time_count *cpu)
+static int cpu_clock_sample_group(const clockid_t which_clock,
+                                 struct task_struct *p,
+                                 union cpu_time_count *cpu)
 {
-       struct task_struct *t = p;
-       switch (clock_idx) {
+       struct task_cputime cputime;
+
+       thread_group_cputime(p, &cputime);
+       switch (which_clock) {
        default:
                return -EINVAL;
        case CPUCLOCK_PROF:
-               cpu->cpu = cputime_add(p->signal->utime, p->signal->stime);
-               do {
-                       cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t));
-                       t = next_thread(t);
-               } while (t != p);
+               cpu->cpu = cputime_add(cputime.utime, cputime.stime);
                break;
        case CPUCLOCK_VIRT:
-               cpu->cpu = p->signal->utime;
-               do {
-                       cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t));
-                       t = next_thread(t);
-               } while (t != p);
+               cpu->cpu = cputime.utime;
                break;
        case CPUCLOCK_SCHED:
-               cpu->sched = p->signal->sum_sched_runtime;
-               /* Add in each other live thread.  */
-               while ((t = next_thread(t)) != p) {
-                       cpu->sched += t->se.sum_exec_runtime;
-               }
-               cpu->sched += sched_ns(p);
+               cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
                break;
        }
        return 0;
 }
 
-/*
- * Sample a process (thread group) clock for the given group_leader task.
- * Must be called with tasklist_lock held for reading.
- */
-static int cpu_clock_sample_group(const clockid_t which_clock,
-                                 struct task_struct *p,
-                                 union cpu_time_count *cpu)
-{
-       int ret;
-       unsigned long flags;
-       spin_lock_irqsave(&p->sighand->siglock, flags);
-       ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p,
-                                           cpu);
-       spin_unlock_irqrestore(&p->sighand->siglock, flags);
-       return ret;
-}
-
 
 int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
 {
@@ -471,80 +525,11 @@ void posix_cpu_timers_exit(struct task_struct *tsk)
 }
 void posix_cpu_timers_exit_group(struct task_struct *tsk)
 {
-       cleanup_timers(tsk->signal->cpu_timers,
-                      cputime_add(tsk->utime, tsk->signal->utime),
-                      cputime_add(tsk->stime, tsk->signal->stime),
-                    tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime);
-}
+       struct task_cputime cputime;
 
-
-/*
- * Set the expiry times of all the threads in the process so one of them
- * will go off before the process cumulative expiry total is reached.
- */
-static void process_timer_rebalance(struct task_struct *p,
-                                   unsigned int clock_idx,
-                                   union cpu_time_count expires,
-                                   union cpu_time_count val)
-{
-       cputime_t ticks, left;
-       unsigned long long ns, nsleft;
-       struct task_struct *t = p;
-       unsigned int nthreads = atomic_read(&p->signal->live);
-
-       if (!nthreads)
-               return;
-
-       switch (clock_idx) {
-       default:
-               BUG();
-               break;
-       case CPUCLOCK_PROF:
-               left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
-                                      nthreads);
-               do {
-                       if (likely(!(t->flags & PF_EXITING))) {
-                               ticks = cputime_add(prof_ticks(t), left);
-                               if (cputime_eq(t->it_prof_expires,
-                                              cputime_zero) ||
-                                   cputime_gt(t->it_prof_expires, ticks)) {
-                                       t->it_prof_expires = ticks;
-                               }
-                       }
-                       t = next_thread(t);
-               } while (t != p);
-               break;
-       case CPUCLOCK_VIRT:
-               left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
-                                      nthreads);
-               do {
-                       if (likely(!(t->flags & PF_EXITING))) {
-                               ticks = cputime_add(virt_ticks(t), left);
-                               if (cputime_eq(t->it_virt_expires,
-                                              cputime_zero) ||
-                                   cputime_gt(t->it_virt_expires, ticks)) {
-                                       t->it_virt_expires = ticks;
-                               }
-                       }
-                       t = next_thread(t);
-               } while (t != p);
-               break;
-       case CPUCLOCK_SCHED:
-               nsleft = expires.sched - val.sched;
-               do_div(nsleft, nthreads);
-               nsleft = max_t(unsigned long long, nsleft, 1);
-               do {
-                       if (likely(!(t->flags & PF_EXITING))) {
-                               ns = t->se.sum_exec_runtime + nsleft;
-                               if (t->it_sched_expires == 0 ||
-                                   t->it_sched_expires > ns) {
-                                       t->it_sched_expires = ns;
-                               }
-                       }
-                       t = next_thread(t);
-               } while (t != p);
-               break;
-       }
+       thread_group_cputime(tsk, &cputime);
+       cleanup_timers(tsk->signal->cpu_timers,
+                      cputime.utime, cputime.stime, cputime.sum_exec_runtime);
 }
 
 static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
@@ -608,29 +593,32 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
                        default:
                                BUG();
                        case CPUCLOCK_PROF:
-                               if (cputime_eq(p->it_prof_expires,
+                               if (cputime_eq(p->cputime_expires.prof_exp,
                                               cputime_zero) ||
-                                   cputime_gt(p->it_prof_expires,
+                                   cputime_gt(p->cputime_expires.prof_exp,
                                               nt->expires.cpu))
-                                       p->it_prof_expires = nt->expires.cpu;
+                                       p->cputime_expires.prof_exp =
+                                               nt->expires.cpu;
                                break;
                        case CPUCLOCK_VIRT:
-                               if (cputime_eq(p->it_virt_expires,
+                               if (cputime_eq(p->cputime_expires.virt_exp,
                                               cputime_zero) ||
-                                   cputime_gt(p->it_virt_expires,
+                                   cputime_gt(p->cputime_expires.virt_exp,
                                               nt->expires.cpu))
-                                       p->it_virt_expires = nt->expires.cpu;
+                                       p->cputime_expires.virt_exp =
+                                               nt->expires.cpu;
                                break;
                        case CPUCLOCK_SCHED:
-                               if (p->it_sched_expires == 0 ||
-                                   p->it_sched_expires > nt->expires.sched)
-                                       p->it_sched_expires = nt->expires.sched;
+                               if (p->cputime_expires.sched_exp == 0 ||
+                                   p->cputime_expires.sched_exp >
+                                                       nt->expires.sched)
+                                       p->cputime_expires.sched_exp =
+                                               nt->expires.sched;
                                break;
                        }
                } else {
                        /*
-                        * For a process timer, we must balance
-                        * all the live threads' expirations.
+                        * For a process timer, set the cached expiration time.
                         */
                        switch (CPUCLOCK_WHICH(timer->it_clock)) {
                        default:
@@ -641,7 +629,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
                                    cputime_lt(p->signal->it_virt_expires,
                                               timer->it.cpu.expires.cpu))
                                        break;
-                               goto rebalance;
+                               p->signal->cputime_expires.virt_exp =
+                                       timer->it.cpu.expires.cpu;
+                               break;
                        case CPUCLOCK_PROF:
                                if (!cputime_eq(p->signal->it_prof_expires,
                                                cputime_zero) &&
@@ -652,13 +642,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
                                if (i != RLIM_INFINITY &&
                                    i <= cputime_to_secs(timer->it.cpu.expires.cpu))
                                        break;
-                               goto rebalance;
+                               p->signal->cputime_expires.prof_exp =
+                                       timer->it.cpu.expires.cpu;
+                               break;
                        case CPUCLOCK_SCHED:
-                       rebalance:
-                               process_timer_rebalance(
-                                       timer->it.cpu.task,
-                                       CPUCLOCK_WHICH(timer->it_clock),
-                                       timer->it.cpu.expires, now);
+                               p->signal->cputime_expires.sched_exp =
+                                       timer->it.cpu.expires.sched;
                                break;
                        }
                }
@@ -969,13 +958,13 @@ static void check_thread_timers(struct task_struct *tsk,
        struct signal_struct *const sig = tsk->signal;
 
        maxfire = 20;
-       tsk->it_prof_expires = cputime_zero;
+       tsk->cputime_expires.prof_exp = cputime_zero;
        while (!list_empty(timers)) {
                struct cpu_timer_list *t = list_first_entry(timers,
                                                      struct cpu_timer_list,
                                                      entry);
                if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) {
-                       tsk->it_prof_expires = t->expires.cpu;
+                       tsk->cputime_expires.prof_exp = t->expires.cpu;
                        break;
                }
                t->firing = 1;
@@ -984,13 +973,13 @@ static void check_thread_timers(struct task_struct *tsk,
 
        ++timers;
        maxfire = 20;
-       tsk->it_virt_expires = cputime_zero;
+       tsk->cputime_expires.virt_exp = cputime_zero;
        while (!list_empty(timers)) {
                struct cpu_timer_list *t = list_first_entry(timers,
                                                      struct cpu_timer_list,
                                                      entry);
                if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) {
-                       tsk->it_virt_expires = t->expires.cpu;
+                       tsk->cputime_expires.virt_exp = t->expires.cpu;
                        break;
                }
                t->firing = 1;
@@ -999,13 +988,13 @@ static void check_thread_timers(struct task_struct *tsk,
 
        ++timers;
        maxfire = 20;
-       tsk->it_sched_expires = 0;
+       tsk->cputime_expires.sched_exp = 0;
        while (!list_empty(timers)) {
                struct cpu_timer_list *t = list_first_entry(timers,
                                                      struct cpu_timer_list,
                                                      entry);
                if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
-                       tsk->it_sched_expires = t->expires.sched;
+                       tsk->cputime_expires.sched_exp = t->expires.sched;
                        break;
                }
                t->firing = 1;
@@ -1055,10 +1044,10 @@ static void check_process_timers(struct task_struct *tsk,
 {
        int maxfire;
        struct signal_struct *const sig = tsk->signal;
-       cputime_t utime, stime, ptime, virt_expires, prof_expires;
+       cputime_t utime, ptime, virt_expires, prof_expires;
        unsigned long long sum_sched_runtime, sched_expires;
-       struct task_struct *t;
        struct list_head *timers = sig->cpu_timers;
+       struct task_cputime cputime;
 
        /*
         * Don't sample the current process CPU clocks if there are no timers.
@@ -1074,18 +1063,10 @@ static void check_process_timers(struct task_struct *tsk,
        /*
         * Collect the current process totals.
         */
-       utime = sig->utime;
-       stime = sig->stime;
-       sum_sched_runtime = sig->sum_sched_runtime;
-       t = tsk;
-       do {
-               utime = cputime_add(utime, t->utime);
-               stime = cputime_add(stime, t->stime);
-               sum_sched_runtime += t->se.sum_exec_runtime;
-               t = next_thread(t);
-       } while (t != tsk);
-       ptime = cputime_add(utime, stime);
-
+       thread_group_cputime(tsk, &cputime);
+       utime = cputime.utime;
+       ptime = cputime_add(utime, cputime.stime);
+       sum_sched_runtime = cputime.sum_exec_runtime;
        maxfire = 20;
        prof_expires = cputime_zero;
        while (!list_empty(timers)) {
@@ -1193,60 +1174,18 @@ static void check_process_timers(struct task_struct *tsk,
                }
        }
 
-       if (!cputime_eq(prof_expires, cputime_zero) ||
-           !cputime_eq(virt_expires, cputime_zero) ||
-           sched_expires != 0) {
-               /*
-                * Rebalance the threads' expiry times for the remaining
-                * process CPU timers.
-                */
-
-               cputime_t prof_left, virt_left, ticks;
-               unsigned long long sched_left, sched;
-               const unsigned int nthreads = atomic_read(&sig->live);
-
-               if (!nthreads)
-                       return;
-
-               prof_left = cputime_sub(prof_expires, utime);
-               prof_left = cputime_sub(prof_left, stime);
-               prof_left = cputime_div_non_zero(prof_left, nthreads);
-               virt_left = cputime_sub(virt_expires, utime);
-               virt_left = cputime_div_non_zero(virt_left, nthreads);
-               if (sched_expires) {
-                       sched_left = sched_expires - sum_sched_runtime;
-                       do_div(sched_left, nthreads);
-                       sched_left = max_t(unsigned long long, sched_left, 1);
-               } else {
-                       sched_left = 0;
-               }
-               t = tsk;
-               do {
-                       if (unlikely(t->flags & PF_EXITING))
-                               continue;
-
-                       ticks = cputime_add(cputime_add(t->utime, t->stime),
-                                           prof_left);
-                       if (!cputime_eq(prof_expires, cputime_zero) &&
-                           (cputime_eq(t->it_prof_expires, cputime_zero) ||
-                            cputime_gt(t->it_prof_expires, ticks))) {
-                               t->it_prof_expires = ticks;
-                       }
-
-                       ticks = cputime_add(t->utime, virt_left);
-                       if (!cputime_eq(virt_expires, cputime_zero) &&
-                           (cputime_eq(t->it_virt_expires, cputime_zero) ||
-                            cputime_gt(t->it_virt_expires, ticks))) {
-                               t->it_virt_expires = ticks;
-                       }
-
-                       sched = t->se.sum_exec_runtime + sched_left;
-                       if (sched_expires && (t->it_sched_expires == 0 ||
-                                             t->it_sched_expires > sched)) {
-                               t->it_sched_expires = sched;
-                       }
-               } while ((t = next_thread(t)) != tsk);
-       }
+       if (!cputime_eq(prof_expires, cputime_zero) &&
+           (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) ||
+            cputime_gt(sig->cputime_expires.prof_exp, prof_expires)))
+               sig->cputime_expires.prof_exp = prof_expires;
+       if (!cputime_eq(virt_expires, cputime_zero) &&
+           (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) ||
+            cputime_gt(sig->cputime_expires.virt_exp, virt_expires)))
+               sig->cputime_expires.virt_exp = virt_expires;
+       if (sched_expires != 0 &&
+           (sig->cputime_expires.sched_exp == 0 ||
+            sig->cputime_expires.sched_exp > sched_expires))
+               sig->cputime_expires.sched_exp = sched_expires;
 }
 
 /*
@@ -1314,6 +1253,86 @@ out:
        ++timer->it_requeue_pending;
 }
 
+/**
+ * task_cputime_zero - Check a task_cputime struct for all zero fields.
+ *
+ * @cputime:   The struct to compare.
+ *
+ * Checks @cputime to see if all fields are zero.  Returns true if all fields
+ * are zero, false if any field is nonzero.
+ */
+static inline int task_cputime_zero(const struct task_cputime *cputime)
+{
+       if (cputime_eq(cputime->utime, cputime_zero) &&
+           cputime_eq(cputime->stime, cputime_zero) &&
+           cputime->sum_exec_runtime == 0)
+               return 1;
+       return 0;
+}
+
+/**
+ * task_cputime_expired - Compare two task_cputime entities.
+ *
+ * @sample:    The task_cputime structure to be checked for expiration.
+ * @expires:   Expiration times, against which @sample will be checked.
+ *
+ * Checks @sample against @expires to see if any field of @sample has expired.
+ * Returns true if any field of the former is greater than the corresponding
+ * field of the latter if the latter field is set.  Otherwise returns false.
+ */
+static inline int task_cputime_expired(const struct task_cputime *sample,
+                                       const struct task_cputime *expires)
+{
+       if (!cputime_eq(expires->utime, cputime_zero) &&
+           cputime_ge(sample->utime, expires->utime))
+               return 1;
+       if (!cputime_eq(expires->stime, cputime_zero) &&
+           cputime_ge(cputime_add(sample->utime, sample->stime),
+                      expires->stime))
+               return 1;
+       if (expires->sum_exec_runtime != 0 &&
+           sample->sum_exec_runtime >= expires->sum_exec_runtime)
+               return 1;
+       return 0;
+}
+
+/**
+ * fastpath_timer_check - POSIX CPU timers fast path.
+ *
+ * @tsk:       The task (thread) being checked.
+ *
+ * Check the task and thread group timers.  If both are zero (there are no
+ * timers set) return false.  Otherwise snapshot the task and thread group
+ * timers and compare them with the corresponding expiration times.  Return
+ * true if a timer has expired, else return false.
+ */
+static inline int fastpath_timer_check(struct task_struct *tsk)
+{
+       struct signal_struct *sig = tsk->signal;
+
+       if (unlikely(!sig))
+               return 0;
+
+       if (!task_cputime_zero(&tsk->cputime_expires)) {
+               struct task_cputime task_sample = {
+                       .utime = tsk->utime,
+                       .stime = tsk->stime,
+                       .sum_exec_runtime = tsk->se.sum_exec_runtime
+               };
+
+               if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
+                       return 1;
+       }
+       if (!task_cputime_zero(&sig->cputime_expires)) {
+               struct task_cputime group_sample;
+
+               thread_group_cputime(tsk, &group_sample);
+               if (task_cputime_expired(&group_sample, &sig->cputime_expires))
+                       return 1;
+       }
+       return 0;
+}
+
 /*
  * This is called from the timer interrupt handler.  The irq handler has
  * already updated our counts.  We need to check if any timers fire now.
@@ -1326,42 +1345,31 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 
        BUG_ON(!irqs_disabled());
 
-#define UNEXPIRED(clock) \
-               (cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \
-                cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires))
-
-       if (UNEXPIRED(prof) && UNEXPIRED(virt) &&
-           (tsk->it_sched_expires == 0 ||
-            tsk->se.sum_exec_runtime < tsk->it_sched_expires))
+       /*
+        * The fast path checks that there are no expired thread or thread
+        * group timers.  If that's so, just return.
+        */
+       if (!fastpath_timer_check(tsk))
                return;
 
-#undef UNEXPIRED
-
+       spin_lock(&tsk->sighand->siglock);
        /*
-        * Double-check with locks held.
+        * Here we take off tsk->signal->cpu_timers[N] and
+        * tsk->cpu_timers[N] all the timers that are firing, and
+        * put them on the firing list.
         */
-       read_lock(&tasklist_lock);
-       if (likely(tsk->signal != NULL)) {
-               spin_lock(&tsk->sighand->siglock);
+       check_thread_timers(tsk, &firing);
+       check_process_timers(tsk, &firing);
 
-               /*
-                * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
-                * all the timers that are firing, and put them on the firing list.
-                */
-               check_thread_timers(tsk, &firing);
-               check_process_timers(tsk, &firing);
-
-               /*
-                * We must release these locks before taking any timer's lock.
-                * There is a potential race with timer deletion here, as the
-                * siglock now protects our private firing list.  We have set
-                * the firing flag in each timer, so that a deletion attempt
-                * that gets the timer lock before we do will give it up and
-                * spin until we've taken care of that timer below.
-                */
-               spin_unlock(&tsk->sighand->siglock);
-       }
-       read_unlock(&tasklist_lock);
+       /*
+        * We must release these locks before taking any timer's lock.
+        * There is a potential race with timer deletion here, as the
+        * siglock now protects our private firing list.  We have set
+        * the firing flag in each timer, so that a deletion attempt
+        * that gets the timer lock before we do will give it up and
+        * spin until we've taken care of that timer below.
+        */
+       spin_unlock(&tsk->sighand->siglock);
 
        /*
         * Now that all the timers on our list have the firing flag,
@@ -1389,10 +1397,9 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 
 /*
  * Set one of the process-wide special case CPU timers.
- * The tasklist_lock and tsk->sighand->siglock must be held by the caller.
- * The oldval argument is null for the RLIMIT_CPU timer, where *newval is
- * absolute; non-null for ITIMER_*, where *newval is relative and we update
- * it to be absolute, *oldval is absolute and we update it to be relative.
+ * The tsk->sighand->siglock must be held by the caller.
+ * The *newval argument is relative and we update it to be absolute, *oldval
+ * is absolute and we update it to be relative.
  */
 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
                           cputime_t *newval, cputime_t *oldval)
@@ -1401,7 +1408,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
        struct list_head *head;
 
        BUG_ON(clock_idx == CPUCLOCK_SCHED);
-       cpu_clock_sample_group_locked(clock_idx, tsk, &now);
+       cpu_clock_sample_group(clock_idx, tsk, &now);
 
        if (oldval) {
                if (!cputime_eq(*oldval, cputime_zero)) {
@@ -1435,13 +1442,14 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
            cputime_ge(list_first_entry(head,
                                  struct cpu_timer_list, entry)->expires.cpu,
                       *newval)) {
-               /*
-                * Rejigger each thread's expiry time so that one will
-                * notice before we hit the process-cumulative expiry time.
-                */
-               union cpu_time_count expires = { .sched = 0 };
-               expires.cpu = *newval;
-               process_timer_rebalance(tsk, clock_idx, expires, now);
+               switch (clock_idx) {
+               case CPUCLOCK_PROF:
+                       tsk->signal->cputime_expires.prof_exp = *newval;
+                       break;
+               case CPUCLOCK_VIRT:
+                       tsk->signal->cputime_expires.virt_exp = *newval;
+                       break;
+               }
        }
 }
 
index 5131e5471169226ef8db42f20792c8ffdac6d12b..b931d7cedbfa9fd70a47d07535d3b791661ec39a 100644 (file)
@@ -222,6 +222,15 @@ static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
        return 0;
 }
 
+/*
+ * Get monotonic time for posix timers
+ */
+static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
+{
+       getrawmonotonic(tp);
+       return 0;
+}
+
 /*
  * Initialize everything, well, just everything in Posix clocks/timers ;)
  */
@@ -235,9 +244,15 @@ static __init int init_posix_timers(void)
                .clock_get = posix_ktime_get_ts,
                .clock_set = do_posix_clock_nosettime,
        };
+       struct k_clock clock_monotonic_raw = {
+               .clock_getres = hrtimer_get_res,
+               .clock_get = posix_get_monotonic_raw,
+               .clock_set = do_posix_clock_nosettime,
+       };
 
        register_posix_clock(CLOCK_REALTIME, &clock_realtime);
        register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
+       register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
 
        posix_timers_cache = kmem_cache_create("posix_timers_cache",
                                        sizeof (struct k_itimer), 0, SLAB_PANIC,
@@ -298,6 +313,7 @@ void do_schedule_next_timer(struct siginfo *info)
 
 int posix_timer_event(struct k_itimer *timr, int si_private)
 {
+       int shared, ret;
        /*
         * FIXME: if ->sigq is queued we can race with
         * dequeue_signal()->do_schedule_next_timer().
@@ -311,25 +327,10 @@ int posix_timer_event(struct k_itimer *timr, int si_private)
         */
        timr->sigq->info.si_sys_private = si_private;
 
-       timr->sigq->info.si_signo = timr->it_sigev_signo;
-       timr->sigq->info.si_code = SI_TIMER;
-       timr->sigq->info.si_tid = timr->it_id;
-       timr->sigq->info.si_value = timr->it_sigev_value;
-
-       if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
-               struct task_struct *leader;
-               int ret = send_sigqueue(timr->sigq, timr->it_process, 0);
-
-               if (likely(ret >= 0))
-                       return ret;
-
-               timr->it_sigev_notify = SIGEV_SIGNAL;
-               leader = timr->it_process->group_leader;
-               put_task_struct(timr->it_process);
-               timr->it_process = leader;
-       }
-
-       return send_sigqueue(timr->sigq, timr->it_process, 1);
+       shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
+       ret = send_sigqueue(timr->sigq, timr->it_process, shared);
+       /* If we failed to send the signal the timer stops. */
+       return ret > 0;
 }
 EXPORT_SYMBOL_GPL(posix_timer_event);
 
@@ -468,11 +469,9 @@ sys_timer_create(const clockid_t which_clock,
                 struct sigevent __user *timer_event_spec,
                 timer_t __user * created_timer_id)
 {
-       int error = 0;
-       struct k_itimer *new_timer = NULL;
-       int new_timer_id;
-       struct task_struct *process = NULL;
-       unsigned long flags;
+       struct k_itimer *new_timer;
+       int error, new_timer_id;
+       struct task_struct *process;
        sigevent_t event;
        int it_id_set = IT_ID_NOT_SET;
 
@@ -490,12 +489,11 @@ sys_timer_create(const clockid_t which_clock,
                goto out;
        }
        spin_lock_irq(&idr_lock);
-       error = idr_get_new(&posix_timers_id, (void *) new_timer,
-                           &new_timer_id);
+       error = idr_get_new(&posix_timers_id, new_timer, &new_timer_id);
        spin_unlock_irq(&idr_lock);
-       if (error == -EAGAIN)
-               goto retry;
-       else if (error) {
+       if (error) {
+               if (error == -EAGAIN)
+                       goto retry;
                /*
                 * Weird looking, but we return EAGAIN if the IDR is
                 * full (proper POSIX return value for this)
@@ -526,67 +524,43 @@ sys_timer_create(const clockid_t which_clock,
                        error = -EFAULT;
                        goto out;
                }
-               new_timer->it_sigev_notify = event.sigev_notify;
-               new_timer->it_sigev_signo = event.sigev_signo;
-               new_timer->it_sigev_value = event.sigev_value;
-
-               read_lock(&tasklist_lock);
-               if ((process = good_sigevent(&event))) {
-                       /*
-                        * We may be setting up this process for another
-                        * thread.  It may be exiting.  To catch this
-                        * case the we check the PF_EXITING flag.  If
-                        * the flag is not set, the siglock will catch
-                        * him before it is too late (in exit_itimers).
-                        *
-                        * The exec case is a bit more invloved but easy
-                        * to code.  If the process is in our thread
-                        * group (and it must be or we would not allow
-                        * it here) and is doing an exec, it will cause
-                        * us to be killed.  In this case it will wait
-                        * for us to die which means we can finish this
-                        * linkage with our last gasp. I.e. no code :)
-                        */
-                       spin_lock_irqsave(&process->sighand->siglock, flags);
-                       if (!(process->flags & PF_EXITING)) {
-                               new_timer->it_process = process;
-                               list_add(&new_timer->list,
-                                        &process->signal->posix_timers);
-                               if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-                                       get_task_struct(process);
-                               spin_unlock_irqrestore(&process->sighand->siglock, flags);
-                       } else {
-                               spin_unlock_irqrestore(&process->sighand->siglock, flags);
-                               process = NULL;
-                       }
-               }
-               read_unlock(&tasklist_lock);
+               rcu_read_lock();
+               process = good_sigevent(&event);
+               if (process)
+                       get_task_struct(process);
+               rcu_read_unlock();
                if (!process) {
                        error = -EINVAL;
                        goto out;
                }
        } else {
-               new_timer->it_sigev_notify = SIGEV_SIGNAL;
-               new_timer->it_sigev_signo = SIGALRM;
-               new_timer->it_sigev_value.sival_int = new_timer->it_id;
+               event.sigev_notify = SIGEV_SIGNAL;
+               event.sigev_signo = SIGALRM;
+               event.sigev_value.sival_int = new_timer->it_id;
                process = current->group_leader;
-               spin_lock_irqsave(&process->sighand->siglock, flags);
-               new_timer->it_process = process;
-               list_add(&new_timer->list, &process->signal->posix_timers);
-               spin_unlock_irqrestore(&process->sighand->siglock, flags);
+               get_task_struct(process);
        }
 
+       new_timer->it_sigev_notify     = event.sigev_notify;
+       new_timer->sigq->info.si_signo = event.sigev_signo;
+       new_timer->sigq->info.si_value = event.sigev_value;
+       new_timer->sigq->info.si_tid   = new_timer->it_id;
+       new_timer->sigq->info.si_code  = SI_TIMER;
+
+       spin_lock_irq(&current->sighand->siglock);
+       new_timer->it_process = process;
+       list_add(&new_timer->list, &current->signal->posix_timers);
+       spin_unlock_irq(&current->sighand->siglock);
+
+       return 0;
        /*
         * In the case of the timer belonging to another task, after
         * the task is unlocked, the timer is owned by the other task
         * and may cease to exist at any time.  Don't use or modify
         * new_timer after the unlock call.
         */
-
 out:
-       if (error)
-               release_posix_timer(new_timer, it_id_set);
-
+       release_posix_timer(new_timer, it_id_set);
        return error;
 }
 
@@ -597,7 +571,7 @@ out:
  * the find to the timer lock.  To avoid a dead lock, the timer id MUST
  * be release with out holding the timer lock.
  */
-static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
+static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags)
 {
        struct k_itimer *timr;
        /*
@@ -605,23 +579,20 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
         * flags part over to the timer lock.  Must not let interrupts in
         * while we are moving the lock.
         */
-
        spin_lock_irqsave(&idr_lock, *flags);
-       timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id);
+       timr = idr_find(&posix_timers_id, (int)timer_id);
        if (timr) {
                spin_lock(&timr->it_lock);
-
-               if ((timr->it_id != timer_id) || !(timr->it_process) ||
-                               !same_thread_group(timr->it_process, current)) {
-                       spin_unlock(&timr->it_lock);
-                       spin_unlock_irqrestore(&idr_lock, *flags);
-                       timr = NULL;
-               } else
+               if (timr->it_process &&
+                   same_thread_group(timr->it_process, current)) {
                        spin_unlock(&idr_lock);
-       } else
-               spin_unlock_irqrestore(&idr_lock, *flags);
+                       return timr;
+               }
+               spin_unlock(&timr->it_lock);
+       }
+       spin_unlock_irqrestore(&idr_lock, *flags);
 
-       return timr;
+       return NULL;
 }
 
 /*
@@ -862,8 +833,7 @@ retry_delete:
         * This keeps any tasks waiting on the spin lock from thinking
         * they got something (see the lock code above).
         */
-       if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-               put_task_struct(timer->it_process);
+       put_task_struct(timer->it_process);
        timer->it_process = NULL;
 
        unlock_timer(timer, flags);
@@ -890,8 +860,7 @@ retry_delete:
         * This keeps any tasks waiting on the spin lock from thinking
         * they got something (see the lock code above).
         */
-       if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-               put_task_struct(timer->it_process);
+       put_task_struct(timer->it_process);
        timer->it_process = NULL;
 
        unlock_timer(timer, flags);
index 90b5b123f7a1ee2814686ed791a4d1e45c95935b..85cb90588a55ca54348f00a46cdd9199c6bba2bf 100644 (file)
 #include <linux/freezer.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
-#include <linux/byteorder/swabb.h>
 #include <linux/stat.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
+#include <asm/byteorder.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
index 6f230596bd0c1d21a2c68ffbff8207e93dcd65b5..d906f72b42d23ae1d8c2355d9b605e5fd0761eaa 100644 (file)
@@ -71,6 +71,7 @@
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
+#include <trace/sched.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
@@ -1936,6 +1937,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
                 * just go back and repeat.
                 */
                rq = task_rq_lock(p, &flags);
+               trace_sched_wait_task(rq, p);
                running = task_running(rq, p);
                on_rq = p->se.on_rq;
                ncsw = 0;
@@ -2297,9 +2299,7 @@ out_activate:
        success = 1;
 
 out_running:
-       trace_mark(kernel_sched_wakeup,
-               "pid %d state %ld ## rq %p task %p rq->curr %p",
-               p->pid, p->state, rq, p, rq->curr);
+       trace_sched_wakeup(rq, p);
        check_preempt_curr(rq, p, sync);
 
        p->state = TASK_RUNNING;
@@ -2432,9 +2432,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
                p->sched_class->task_new(rq, p);
                inc_nr_running(rq);
        }
-       trace_mark(kernel_sched_wakeup_new,
-               "pid %d state %ld ## rq %p task %p rq->curr %p",
-               p->pid, p->state, rq, p, rq->curr);
+       trace_sched_wakeup_new(rq, p);
        check_preempt_curr(rq, p, 0);
 #ifdef CONFIG_SMP
        if (p->sched_class->task_wake_up)
@@ -2607,11 +2605,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
        struct mm_struct *mm, *oldmm;
 
        prepare_task_switch(rq, prev, next);
-       trace_mark(kernel_sched_schedule,
-               "prev_pid %d next_pid %d prev_state %ld "
-               "## rq %p prev %p next %p",
-               prev->pid, next->pid, prev->state,
-               rq, prev, next);
+       trace_sched_switch(rq, prev, next);
        mm = next->mm;
        oldmm = prev->active_mm;
        /*
@@ -2851,6 +2845,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
            || unlikely(!cpu_active(dest_cpu)))
                goto out;
 
+       trace_sched_migrate_task(rq, p, dest_cpu);
        /* force the process onto the specified CPU */
        if (migrate_task(p, dest_cpu, &req)) {
                /* Need to wait for migration thread (might exit: take ref). */
@@ -4052,23 +4047,26 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
 EXPORT_PER_CPU_SYMBOL(kstat);
 
 /*
- * Return p->sum_exec_runtime plus any more ns on the sched_clock
- * that have not yet been banked in case the task is currently running.
+ * Return any ns on the sched_clock that have not yet been banked in
+ * @p in case that task is currently running.
  */
-unsigned long long task_sched_runtime(struct task_struct *p)
+unsigned long long task_delta_exec(struct task_struct *p)
 {
        unsigned long flags;
-       u64 ns, delta_exec;
        struct rq *rq;
+       u64 ns = 0;
 
        rq = task_rq_lock(p, &flags);
-       ns = p->se.sum_exec_runtime;
+
        if (task_current(rq, p)) {
+               u64 delta_exec;
+
                update_rq_clock(rq);
                delta_exec = rq->clock - p->se.exec_start;
                if ((s64)delta_exec > 0)
-                       ns += delta_exec;
+                       ns = delta_exec;
        }
+
        task_rq_unlock(rq, &flags);
 
        return ns;
@@ -4085,6 +4083,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
        cputime64_t tmp;
 
        p->utime = cputime_add(p->utime, cputime);
+       account_group_user_time(p, cputime);
 
        /* Add user time to cpustat. */
        tmp = cputime_to_cputime64(cputime);
@@ -4109,6 +4108,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime)
        tmp = cputime_to_cputime64(cputime);
 
        p->utime = cputime_add(p->utime, cputime);
+       account_group_user_time(p, cputime);
        p->gtime = cputime_add(p->gtime, cputime);
 
        cpustat->user = cputime64_add(cpustat->user, tmp);
@@ -4144,6 +4144,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
        }
 
        p->stime = cputime_add(p->stime, cputime);
+       account_group_system_time(p, cputime);
 
        /* Add system time to cpustat. */
        tmp = cputime_to_cputime64(cputime);
@@ -4185,6 +4186,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 
        if (p == rq->idle) {
                p->stime = cputime_add(p->stime, steal);
+               account_group_system_time(p, steal);
                if (atomic_read(&rq->nr_iowait) > 0)
                        cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
                else
index 18fd17172eb66bb567ca4bcc47ca6c0cea923462..f604dae71316264445e63b4d09f26a483d61113e 100644 (file)
@@ -449,6 +449,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
                struct task_struct *curtask = task_of(curr);
 
                cpuacct_charge(curtask, delta_exec);
+               account_group_exec_runtime(curtask, delta_exec);
        }
 }
 
index cdf5740ab03e8133c0a2b7713d6c77d2be1f07bf..b446dc87494fd681fd0a7d7e265402ac09936773 100644 (file)
@@ -526,6 +526,8 @@ static void update_curr_rt(struct rq *rq)
        schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
 
        curr->se.sum_exec_runtime += delta_exec;
+       account_group_exec_runtime(curr, delta_exec);
+
        curr->se.exec_start = rq->clock;
        cpuacct_charge(curr, delta_exec);
 
@@ -1458,7 +1460,7 @@ static void watchdog(struct rq *rq, struct task_struct *p)
                p->rt.timeout++;
                next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
                if (p->rt.timeout > next)
-                       p->it_sched_expires = p->se.sum_exec_runtime;
+                       p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
        }
 }
 
index 8385d43987e29b3b6a4775947d9a52fd22e8554c..b8c156979cf2ad600b0f9b59f9a935cec86aab85 100644 (file)
@@ -270,3 +270,89 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
 #define sched_info_switch(t, next)             do { } while (0)
 #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
 
+/*
+ * The following are functions that support scheduler-internal time accounting.
+ * These functions are generally called at the timer tick.  None of this depends
+ * on CONFIG_SCHEDSTATS.
+ */
+
+/**
+ * account_group_user_time - Maintain utime for a thread group.
+ *
+ * @tsk:       Pointer to task structure.
+ * @cputime:   Time value by which to increment the utime field of the
+ *             thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the utime field there.
+ */
+static inline void account_group_user_time(struct task_struct *tsk,
+                                          cputime_t cputime)
+{
+       struct signal_struct *sig;
+
+       sig = tsk->signal;
+       if (unlikely(!sig))
+               return;
+       if (sig->cputime.totals) {
+               struct task_cputime *times;
+
+               times = per_cpu_ptr(sig->cputime.totals, get_cpu());
+               times->utime = cputime_add(times->utime, cputime);
+               put_cpu_no_resched();
+       }
+}
+
+/**
+ * account_group_system_time - Maintain stime for a thread group.
+ *
+ * @tsk:       Pointer to task structure.
+ * @cputime:   Time value by which to increment the stime field of the
+ *             thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the stime field there.
+ */
+static inline void account_group_system_time(struct task_struct *tsk,
+                                            cputime_t cputime)
+{
+       struct signal_struct *sig;
+
+       sig = tsk->signal;
+       if (unlikely(!sig))
+               return;
+       if (sig->cputime.totals) {
+               struct task_cputime *times;
+
+               times = per_cpu_ptr(sig->cputime.totals, get_cpu());
+               times->stime = cputime_add(times->stime, cputime);
+               put_cpu_no_resched();
+       }
+}
+
+/**
+ * account_group_exec_runtime - Maintain exec runtime for a thread group.
+ *
+ * @tsk:       Pointer to task structure.
+ * @ns:                Time value by which to increment the sum_exec_runtime field
+ *             of the thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the sum_exec_runtime field there.
+ */
+static inline void account_group_exec_runtime(struct task_struct *tsk,
+                                             unsigned long long ns)
+{
+       struct signal_struct *sig;
+
+       sig = tsk->signal;
+       if (unlikely(!sig))
+               return;
+       if (sig->cputime.totals) {
+               struct task_cputime *times;
+
+               times = per_cpu_ptr(sig->cputime.totals, get_cpu());
+               times->sum_exec_runtime += ns;
+               put_cpu_no_resched();
+       }
+}
index e661b01d340f06a17afb6cfef13a712ae0338ff9..105217da5c82eeed7fef7d7c66cf8ec59c6cc2dc 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
 #include <linux/nsproxy.h>
+#include <trace/sched.h>
 
 #include <asm/param.h>
 #include <asm/uaccess.h>
@@ -803,6 +804,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
        struct sigpending *pending;
        struct sigqueue *q;
 
+       trace_sched_signal_send(sig, t);
+
        assert_spin_locked(&t->sighand->siglock);
        if (!prepare_signal(sig, t))
                return 0;
@@ -1338,6 +1341,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
        struct siginfo info;
        unsigned long flags;
        struct sighand_struct *psig;
+       struct task_cputime cputime;
        int ret = sig;
 
        BUG_ON(sig == -1);
@@ -1368,10 +1372,9 @@ int do_notify_parent(struct task_struct *tsk, int sig)
 
        info.si_uid = tsk->uid;
 
-       info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
-                                                      tsk->signal->utime));
-       info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
-                                                      tsk->signal->stime));
+       thread_group_cputime(tsk, &cputime);
+       info.si_utime = cputime_to_jiffies(cputime.utime);
+       info.si_stime = cputime_to_jiffies(cputime.stime);
 
        info.si_status = tsk->exit_code & 0x7f;
        if (tsk->exit_code & 0x80)
index 83ba21a13bd470cea2815d6792e8ff24af43e727..7110daeb9a90b2b585d97cf0fda94b7eb0b1869f 100644 (file)
@@ -267,16 +267,12 @@ asmlinkage void do_softirq(void)
  */
 void irq_enter(void)
 {
-#ifdef CONFIG_NO_HZ
        int cpu = smp_processor_id();
+
        if (idle_cpu(cpu) && !in_interrupt())
-               tick_nohz_stop_idle(cpu);
-#endif
+               tick_check_idle(cpu);
+
        __irq_enter();
-#ifdef CONFIG_NO_HZ
-       if (idle_cpu(cpu))
-               tick_nohz_update_jiffies();
-#endif
 }
 
 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
index 0bc8fa3c2288110b49fad4e9eaab2326f52c69f7..53879cdae483b6371543bdeb94818292e91f07bb 100644 (file)
@@ -853,38 +853,28 @@ asmlinkage long sys_setfsgid(gid_t gid)
        return old_fsgid;
 }
 
+void do_sys_times(struct tms *tms)
+{
+       struct task_cputime cputime;
+       cputime_t cutime, cstime;
+
+       spin_lock_irq(&current->sighand->siglock);
+       thread_group_cputime(current, &cputime);
+       cutime = current->signal->cutime;
+       cstime = current->signal->cstime;
+       spin_unlock_irq(&current->sighand->siglock);
+       tms->tms_utime = cputime_to_clock_t(cputime.utime);
+       tms->tms_stime = cputime_to_clock_t(cputime.stime);
+       tms->tms_cutime = cputime_to_clock_t(cutime);
+       tms->tms_cstime = cputime_to_clock_t(cstime);
+}
+
 asmlinkage long sys_times(struct tms __user * tbuf)
 {
-       /*
-        *      In the SMP world we might just be unlucky and have one of
-        *      the times increment as we use it. Since the value is an
-        *      atomically safe type this is just fine. Conceptually its
-        *      as if the syscall took an instant longer to occur.
-        */
        if (tbuf) {
                struct tms tmp;
-               struct task_struct *tsk = current;
-               struct task_struct *t;
-               cputime_t utime, stime, cutime, cstime;
-
-               spin_lock_irq(&tsk->sighand->siglock);
-               utime = tsk->signal->utime;
-               stime = tsk->signal->stime;
-               t = tsk;
-               do {
-                       utime = cputime_add(utime, t->utime);
-                       stime = cputime_add(stime, t->stime);
-                       t = next_thread(t);
-               } while (t != tsk);
-
-               cutime = tsk->signal->cutime;
-               cstime = tsk->signal->cstime;
-               spin_unlock_irq(&tsk->sighand->siglock);
-
-               tmp.tms_utime = cputime_to_clock_t(utime);
-               tmp.tms_stime = cputime_to_clock_t(stime);
-               tmp.tms_cutime = cputime_to_clock_t(cutime);
-               tmp.tms_cstime = cputime_to_clock_t(cstime);
+
+               do_sys_times(&tmp);
                if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
                        return -EFAULT;
        }
@@ -1449,7 +1439,6 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
 {
        struct rlimit new_rlim, *old_rlim;
-       unsigned long it_prof_secs;
        int retval;
 
        if (resource >= RLIM_NLIMITS)
@@ -1503,18 +1492,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
        if (new_rlim.rlim_cur == RLIM_INFINITY)
                goto out;
 
-       it_prof_secs = cputime_to_secs(current->signal->it_prof_expires);
-       if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) {
-               unsigned long rlim_cur = new_rlim.rlim_cur;
-               cputime_t cputime;
-
-               cputime = secs_to_cputime(rlim_cur);
-               read_lock(&tasklist_lock);
-               spin_lock_irq(&current->sighand->siglock);
-               set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
-               spin_unlock_irq(&current->sighand->siglock);
-               read_unlock(&tasklist_lock);
-       }
+       update_rlimit_cpu(new_rlim.rlim_cur);
 out:
        return 0;
 }
@@ -1552,11 +1530,8 @@ out:
  *
  */
 
-static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r,
-                                    cputime_t *utimep, cputime_t *stimep)
+static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
 {
-       *utimep = cputime_add(*utimep, t->utime);
-       *stimep = cputime_add(*stimep, t->stime);
        r->ru_nvcsw += t->nvcsw;
        r->ru_nivcsw += t->nivcsw;
        r->ru_minflt += t->min_flt;
@@ -1570,12 +1545,13 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
        struct task_struct *t;
        unsigned long flags;
        cputime_t utime, stime;
+       struct task_cputime cputime;
 
        memset((char *) r, 0, sizeof *r);
        utime = stime = cputime_zero;
 
        if (who == RUSAGE_THREAD) {
-               accumulate_thread_rusage(p, r, &utime, &stime);
+               accumulate_thread_rusage(p, r);
                goto out;
        }
 
@@ -1598,8 +1574,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
                                break;
 
                case RUSAGE_SELF:
-                       utime = cputime_add(utime, p->signal->utime);
-                       stime = cputime_add(stime, p->signal->stime);
+                       thread_group_cputime(p, &cputime);
+                       utime = cputime_add(utime, cputime.utime);
+                       stime = cputime_add(stime, cputime.stime);
                        r->ru_nvcsw += p->signal->nvcsw;
                        r->ru_nivcsw += p->signal->nivcsw;
                        r->ru_minflt += p->signal->min_flt;
@@ -1608,7 +1585,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
                        r->ru_oublock += p->signal->oublock;
                        t = p;
                        do {
-                               accumulate_thread_rusage(t, r, &utime, &stime);
+                               accumulate_thread_rusage(t, r);
                                t = next_thread(t);
                        } while (t != p);
                        break;
index 093d4acf993b73fde0d575a4e29b06db88935942..9ed2eec97526546e26c4fb640702a8907b3530a9 100644 (file)
@@ -325,6 +325,9 @@ int clocksource_register(struct clocksource *c)
        unsigned long flags;
        int ret;
 
+       /* save mult_orig on registration */
+       c->mult_orig = c->mult;
+
        spin_lock_irqsave(&clocksource_lock, flags);
        ret = clocksource_enqueue(c);
        if (!ret)
index 4c256fdb8875b54f19e1c8062371c4b362cc5773..1ca99557e929261da5825325dafba49edde8be12 100644 (file)
@@ -61,6 +61,7 @@ struct clocksource clocksource_jiffies = {
        .read           = jiffies_read,
        .mask           = 0xffffffff, /*32bits*/
        .mult           = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
+       .mult_orig      = NSEC_PER_JIFFY << JIFFIES_SHIFT,
        .shift          = JIFFIES_SHIFT,
 };
 
index 1ad46f3df6e76cd8994403b1c1ca72c14ec3553b..1a20715bfd6e4854e96e96eb1541f792767aeaa5 100644 (file)
 
 #include <linux/mm.h>
 #include <linux/time.h>
-#include <linux/timer.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
 #include <linux/hrtimer.h>
 #include <linux/capability.h>
 #include <linux/math64.h>
 #include <linux/clocksource.h>
+#include <linux/workqueue.h>
 #include <asm/timex.h>
 
 /*
@@ -218,11 +218,11 @@ void second_overflow(void)
 /* Disable the cmos update - used by virtualization and embedded */
 int no_sync_cmos_clock  __read_mostly;
 
-static void sync_cmos_clock(unsigned long dummy);
+static void sync_cmos_clock(struct work_struct *work);
 
-static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
+static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
 
-static void sync_cmos_clock(unsigned long dummy)
+static void sync_cmos_clock(struct work_struct *work)
 {
        struct timespec now, next;
        int fail = 1;
@@ -258,13 +258,13 @@ static void sync_cmos_clock(unsigned long dummy)
                next.tv_sec++;
                next.tv_nsec -= NSEC_PER_SEC;
        }
-       mod_timer(&sync_cmos_timer, jiffies + timespec_to_jiffies(&next));
+       schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next));
 }
 
 static void notify_cmos_timer(void)
 {
        if (!no_sync_cmos_clock)
-               mod_timer(&sync_cmos_timer, jiffies + 1);
+               schedule_delayed_work(&sync_cmos_work, 0);
 }
 
 #else
@@ -277,38 +277,50 @@ static inline void notify_cmos_timer(void) { }
 int do_adjtimex(struct timex *txc)
 {
        struct timespec ts;
-       long save_adjust, sec;
        int result;
 
-       /* In order to modify anything, you gotta be super-user! */
-       if (txc->modes && !capable(CAP_SYS_TIME))
-               return -EPERM;
-
-       /* Now we validate the data before disabling interrupts */
-
-       if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) {
+       /* Validate the data before disabling interrupts */
+       if (txc->modes & ADJ_ADJTIME) {
                /* singleshot must not be used with any other mode bits */
-               if (txc->modes & ~ADJ_OFFSET_SS_READ)
+               if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
                        return -EINVAL;
+               if (!(txc->modes & ADJ_OFFSET_READONLY) &&
+                   !capable(CAP_SYS_TIME))
+                       return -EPERM;
+       } else {
+               /* In order to modify anything, you gotta be super-user! */
+                if (txc->modes && !capable(CAP_SYS_TIME))
+                       return -EPERM;
+
+               /* if the quartz is off by more than 10% something is VERY wrong! */
+               if (txc->modes & ADJ_TICK &&
+                   (txc->tick <  900000/USER_HZ ||
+                    txc->tick > 1100000/USER_HZ))
+                               return -EINVAL;
+
+               if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
+                       hrtimer_cancel(&leap_timer);
        }
 
-       /* if the quartz is off by more than 10% something is VERY wrong ! */
-       if (txc->modes & ADJ_TICK)
-               if (txc->tick <  900000/USER_HZ ||
-                   txc->tick > 1100000/USER_HZ)
-                       return -EINVAL;
-
-       if (time_state != TIME_OK && txc->modes & ADJ_STATUS)
-               hrtimer_cancel(&leap_timer);
        getnstimeofday(&ts);
 
        write_seqlock_irq(&xtime_lock);
 
-       /* Save for later - semantics of adjtime is to return old value */
-       save_adjust = time_adjust;
-
        /* If there are input parameters, then process them */
+       if (txc->modes & ADJ_ADJTIME) {
+               long save_adjust = time_adjust;
+
+               if (!(txc->modes & ADJ_OFFSET_READONLY)) {
+                       /* adjtime() is independent from ntp_adjtime() */
+                       time_adjust = txc->offset;
+                       ntp_update_frequency();
+               }
+               txc->offset = save_adjust;
+               goto adj_done;
+       }
        if (txc->modes) {
+               long sec;
+
                if (txc->modes & ADJ_STATUS) {
                        if ((time_status & STA_PLL) &&
                            !(txc->status & STA_PLL)) {
@@ -375,13 +387,8 @@ int do_adjtimex(struct timex *txc)
                if (txc->modes & ADJ_TAI && txc->constant > 0)
                        time_tai = txc->constant;
 
-               if (txc->modes & ADJ_OFFSET) {
-                       if (txc->modes == ADJ_OFFSET_SINGLESHOT)
-                               /* adjtime() is independent from ntp_adjtime() */
-                               time_adjust = txc->offset;
-                       else
-                               ntp_update_offset(txc->offset);
-               }
+               if (txc->modes & ADJ_OFFSET)
+                       ntp_update_offset(txc->offset);
                if (txc->modes & ADJ_TICK)
                        tick_usec = txc->tick;
 
@@ -389,22 +396,18 @@ int do_adjtimex(struct timex *txc)
                        ntp_update_frequency();
        }
 
+       txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
+                                 NTP_SCALE_SHIFT);
+       if (!(time_status & STA_NANO))
+               txc->offset /= NSEC_PER_USEC;
+
+adj_done:
        result = time_state;    /* mostly `TIME_OK' */
        if (time_status & (STA_UNSYNC|STA_CLOCKERR))
                result = TIME_ERROR;
 
-       if ((txc->modes == ADJ_OFFSET_SINGLESHOT) ||
-           (txc->modes == ADJ_OFFSET_SS_READ))
-               txc->offset = save_adjust;
-       else {
-               txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
-                                         NTP_SCALE_SHIFT);
-               if (!(time_status & STA_NANO))
-                       txc->offset /= NSEC_PER_USEC;
-       }
-       txc->freq          = shift_right((s32)(time_freq >> PPM_SCALE_INV_SHIFT) *
-                                        (s64)PPM_SCALE_INV,
-                                        NTP_SCALE_SHIFT);
+       txc->freq          = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
+                                        (s64)PPM_SCALE_INV, NTP_SCALE_SHIFT);
        txc->maxerror      = time_maxerror;
        txc->esterror      = time_esterror;
        txc->status        = time_status;
index cb01cd8f919b2752cfb7d97b4dc9ea4e87834a97..f98a1b7b16e942018ffe9e5998756405695da9a4 100644 (file)
@@ -383,6 +383,19 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
        return 0;
 }
 
+/*
+ * Called from irq_enter() when idle was interrupted to reenable the
+ * per cpu device.
+ */
+void tick_check_oneshot_broadcast(int cpu)
+{
+       if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
+               struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
+
+               clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
+       }
+}
+
 /*
  * Handle oneshot mode broadcasting
  */
index 469248782c2355c0c2d31cf731dc8c692023268e..b1c05bf75ee0ce36e0664d3e2e71d00ac0e09edd 100644 (file)
@@ -36,6 +36,7 @@ extern void tick_broadcast_switch_to_oneshot(void);
 extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
 extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 extern int tick_broadcast_oneshot_active(void);
+extern void tick_check_oneshot_broadcast(int cpu);
 # else /* BROADCAST */
 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 {
@@ -45,6 +46,7 @@ static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
 static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
+static inline void tick_check_oneshot_broadcast(int cpu) { }
 # endif /* !BROADCAST */
 
 #else /* !ONESHOT */
index b711ffcb106c906be19eab01140ec1e8cafd0063..0581c11fe6c6765121a84f531475e0e3c78809b3 100644 (file)
@@ -155,7 +155,7 @@ void tick_nohz_update_jiffies(void)
        touch_softlockup_watchdog();
 }
 
-void tick_nohz_stop_idle(int cpu)
+static void tick_nohz_stop_idle(int cpu)
 {
        struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 
@@ -377,6 +377,32 @@ ktime_t tick_nohz_get_sleep_length(void)
        return ts->sleep_length;
 }
 
+static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
+{
+       hrtimer_cancel(&ts->sched_timer);
+       ts->sched_timer.expires = ts->idle_tick;
+
+       while (1) {
+               /* Forward the time to expire in the future */
+               hrtimer_forward(&ts->sched_timer, now, tick_period);
+
+               if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+                       hrtimer_start(&ts->sched_timer,
+                                     ts->sched_timer.expires,
+                                     HRTIMER_MODE_ABS);
+                       /* Check, if the timer was already in the past */
+                       if (hrtimer_active(&ts->sched_timer))
+                               break;
+               } else {
+                       if (!tick_program_event(ts->sched_timer.expires, 0))
+                               break;
+               }
+               /* Update jiffies and reread time */
+               tick_do_update_jiffies64(now);
+               now = ktime_get();
+       }
+}
+
 /**
  * tick_nohz_restart_sched_tick - restart the idle tick from the idle task
  *
@@ -430,28 +456,7 @@ void tick_nohz_restart_sched_tick(void)
         */
        ts->tick_stopped  = 0;
        ts->idle_exittime = now;
-       hrtimer_cancel(&ts->sched_timer);
-       ts->sched_timer.expires = ts->idle_tick;
-
-       while (1) {
-               /* Forward the time to expire in the future */
-               hrtimer_forward(&ts->sched_timer, now, tick_period);
-
-               if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
-                       hrtimer_start(&ts->sched_timer,
-                                     ts->sched_timer.expires,
-                                     HRTIMER_MODE_ABS);
-                       /* Check, if the timer was already in the past */
-                       if (hrtimer_active(&ts->sched_timer))
-                               break;
-               } else {
-                       if (!tick_program_event(ts->sched_timer.expires, 0))
-                               break;
-               }
-               /* Update jiffies and reread time */
-               tick_do_update_jiffies64(now);
-               now = ktime_get();
-       }
+       tick_nohz_restart(ts, now);
        local_irq_enable();
 }
 
@@ -503,10 +508,6 @@ static void tick_nohz_handler(struct clock_event_device *dev)
        update_process_times(user_mode(regs));
        profile_tick(CPU_PROFILING);
 
-       /* Do not restart, when we are in the idle loop */
-       if (ts->tick_stopped)
-               return;
-
        while (tick_nohz_reprogram(ts, now)) {
                now = ktime_get();
                tick_do_update_jiffies64(now);
@@ -552,12 +553,46 @@ static void tick_nohz_switch_to_nohz(void)
               smp_processor_id());
 }
 
+/*
+ * When NOHZ is enabled and the tick is stopped, we need to kick the
+ * tick timer from irq_enter() so that the jiffies update is kept
+ * alive during long running softirqs. That's ugly as hell, but
+ * correctness is key even if we need to fix the offending softirq in
+ * the first place.
+ *
+ * Note, this is different to tick_nohz_restart. We just kick the
+ * timer and do not touch the other magic bits which need to be done
+ * when idle is left.
+ */
+static void tick_nohz_kick_tick(int cpu)
+{
+       struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+
+       if (!ts->tick_stopped)
+               return;
+
+       tick_nohz_restart(ts, ktime_get());
+}
+
 #else
 
 static inline void tick_nohz_switch_to_nohz(void) { }
 
 #endif /* NO_HZ */
 
+/*
+ * Called from irq_enter to notify about the possible interruption of idle()
+ */
+void tick_check_idle(int cpu)
+{
+       tick_check_oneshot_broadcast(cpu);
+#ifdef CONFIG_NO_HZ
+       tick_nohz_stop_idle(cpu);
+       tick_nohz_update_jiffies();
+       tick_nohz_kick_tick(cpu);
+#endif
+}
+
 /*
  * High resolution timer specific code
  */
@@ -611,10 +646,6 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
                profile_tick(CPU_PROFILING);
        }
 
-       /* Do not restart, when we are in the idle loop */
-       if (ts->tick_stopped)
-               return HRTIMER_NORESTART;
-
        hrtimer_forward(timer, now, tick_period);
 
        return HRTIMER_RESTART;
index e91c29f961c900d7739c0dc2f27b81c480cdb55c..e7acfb482a680ea248f5268fbb7f158868938e56 100644 (file)
@@ -58,27 +58,26 @@ struct clocksource *clock;
 
 #ifdef CONFIG_GENERIC_TIME
 /**
- * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook
+ * clocksource_forward_now - update clock to the current time
  *
- * private function, must hold xtime_lock lock when being
- * called. Returns the number of nanoseconds since the
- * last call to update_wall_time() (adjusted by NTP scaling)
+ * Forward the current clock to update its state since the last call to
+ * update_wall_time(). This is useful before significant clock changes,
+ * as it avoids having to deal with this time offset explicitly.
  */
-static inline s64 __get_nsec_offset(void)
+static void clocksource_forward_now(void)
 {
        cycle_t cycle_now, cycle_delta;
-       s64 ns_offset;
+       s64 nsec;
 
-       /* read clocksource: */
        cycle_now = clocksource_read(clock);
-
-       /* calculate the delta since the last update_wall_time: */
        cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+       clock->cycle_last = cycle_now;
 
-       /* convert to nanoseconds: */
-       ns_offset = cyc2ns(clock, cycle_delta);
+       nsec = cyc2ns(clock, cycle_delta);
+       timespec_add_ns(&xtime, nsec);
 
-       return ns_offset;
+       nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
+       clock->raw_time.tv_nsec += nsec;
 }
 
 /**
@@ -89,6 +88,7 @@ static inline s64 __get_nsec_offset(void)
  */
 void getnstimeofday(struct timespec *ts)
 {
+       cycle_t cycle_now, cycle_delta;
        unsigned long seq;
        s64 nsecs;
 
@@ -96,7 +96,15 @@ void getnstimeofday(struct timespec *ts)
                seq = read_seqbegin(&xtime_lock);
 
                *ts = xtime;
-               nsecs = __get_nsec_offset();
+
+               /* read clocksource: */
+               cycle_now = clocksource_read(clock);
+
+               /* calculate the delta since the last update_wall_time: */
+               cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+
+               /* convert to nanoseconds: */
+               nsecs = cyc2ns(clock, cycle_delta);
 
        } while (read_seqretry(&xtime_lock, seq));
 
@@ -129,22 +137,22 @@ EXPORT_SYMBOL(do_gettimeofday);
  */
 int do_settimeofday(struct timespec *tv)
 {
+       struct timespec ts_delta;
        unsigned long flags;
-       time_t wtm_sec, sec = tv->tv_sec;
-       long wtm_nsec, nsec = tv->tv_nsec;
 
        if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
                return -EINVAL;
 
        write_seqlock_irqsave(&xtime_lock, flags);
 
-       nsec -= __get_nsec_offset();
+       clocksource_forward_now();
+
+       ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec;
+       ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec;
+       wall_to_monotonic = timespec_sub(wall_to_monotonic, ts_delta);
 
-       wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-       wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
+       xtime = *tv;
 
-       set_normalized_timespec(&xtime, sec, nsec);
-       set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
        update_xtime_cache(0);
 
        clock->error = 0;
@@ -170,22 +178,19 @@ EXPORT_SYMBOL(do_settimeofday);
 static void change_clocksource(void)
 {
        struct clocksource *new;
-       cycle_t now;
-       u64 nsec;
 
        new = clocksource_get_next();
 
        if (clock == new)
                return;
 
-       new->cycle_last = 0;
-       now = clocksource_read(new);
-       nsec =  __get_nsec_offset();
-       timespec_add_ns(&xtime, nsec);
+       clocksource_forward_now();
 
-       clock = new;
-       clock->cycle_last = now;
+       new->raw_time = clock->raw_time;
 
+       clock = new;
+       clock->cycle_last = 0;
+       clock->cycle_last = clocksource_read(new);
        clock->error = 0;
        clock->xtime_nsec = 0;
        clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
@@ -200,10 +205,43 @@ static void change_clocksource(void)
         */
 }
 #else
+static inline void clocksource_forward_now(void) { }
 static inline void change_clocksource(void) { }
-static inline s64 __get_nsec_offset(void) { return 0; }
 #endif
 
+/**
+ * getrawmonotonic - Returns the raw monotonic time in a timespec
+ * @ts:                pointer to the timespec to be set
+ *
+ * Returns the raw monotonic time (completely un-modified by ntp)
+ */
+void getrawmonotonic(struct timespec *ts)
+{
+       unsigned long seq;
+       s64 nsecs;
+       cycle_t cycle_now, cycle_delta;
+
+       do {
+               seq = read_seqbegin(&xtime_lock);
+
+               /* read clocksource: */
+               cycle_now = clocksource_read(clock);
+
+               /* calculate the delta since the last update_wall_time: */
+               cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+
+               /* convert to nanoseconds: */
+               nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
+
+               *ts = clock->raw_time;
+
+       } while (read_seqretry(&xtime_lock, seq));
+
+       timespec_add_ns(ts, nsecs);
+}
+EXPORT_SYMBOL(getrawmonotonic);
+
+
 /**
  * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
  */
@@ -265,8 +303,6 @@ void __init timekeeping_init(void)
 static int timekeeping_suspended;
 /* time in seconds when suspend began */
 static unsigned long timekeeping_suspend_time;
-/* xtime offset when we went into suspend */
-static s64 timekeeping_suspend_nsecs;
 
 /**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
@@ -292,8 +328,6 @@ static int timekeeping_resume(struct sys_device *dev)
                wall_to_monotonic.tv_sec -= sleep_length;
                total_sleep_time += sleep_length;
        }
-       /* Make sure that we have the correct xtime reference */
-       timespec_add_ns(&xtime, timekeeping_suspend_nsecs);
        update_xtime_cache(0);
        /* re-base the last cycle value */
        clock->cycle_last = 0;
@@ -319,8 +353,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
        timekeeping_suspend_time = read_persistent_clock();
 
        write_seqlock_irqsave(&xtime_lock, flags);
-       /* Get the current xtime offset */
-       timekeeping_suspend_nsecs = __get_nsec_offset();
+       clocksource_forward_now();
        timekeeping_suspended = 1;
        write_sequnlock_irqrestore(&xtime_lock, flags);
 
@@ -454,23 +487,29 @@ void update_wall_time(void)
 #else
        offset = clock->cycle_interval;
 #endif
-       clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift;
+       clock->xtime_nsec = (s64)xtime.tv_nsec << clock->shift;
 
        /* normally this loop will run just once, however in the
         * case of lost or late ticks, it will accumulate correctly.
         */
        while (offset >= clock->cycle_interval) {
                /* accumulate one interval */
-               clock->xtime_nsec += clock->xtime_interval;
-               clock->cycle_last += clock->cycle_interval;
                offset -= clock->cycle_interval;
+               clock->cycle_last += clock->cycle_interval;
 
+               clock->xtime_nsec += clock->xtime_interval;
                if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
                        clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
                        xtime.tv_sec++;
                        second_overflow();
                }
 
+               clock->raw_time.tv_nsec += clock->raw_interval;
+               if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) {
+                       clock->raw_time.tv_nsec -= NSEC_PER_SEC;
+                       clock->raw_time.tv_sec++;
+               }
+
                /* accumulate error between NTP and clock interval */
                clock->error += tick_length;
                clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift);
@@ -479,9 +518,12 @@ void update_wall_time(void)
        /* correct the clock when NTP error is too big */
        clocksource_adjust(offset);
 
-       /* store full nanoseconds into xtime */
-       xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift;
+       /* store full nanoseconds into xtime after rounding it up and
+        * add the remainder to the error difference.
+        */
+       xtime.tv_nsec = ((s64)clock->xtime_nsec >> clock->shift) + 1;
        clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
+       clock->error += clock->xtime_nsec << (NTP_SCALE_SHIFT - clock->shift);
 
        update_xtime_cache(cyc2ns(clock, offset));
 
index a40e20fd00010d000a3dfc2406888b2f81146db6..f6426911e35a6da3a711e8b8e2cb602a5057a2dc 100644 (file)
@@ -47,13 +47,14 @@ static void print_name_offset(struct seq_file *m, void *sym)
 }
 
 static void
-print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now)
+print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
+           int idx, u64 now)
 {
 #ifdef CONFIG_TIMER_STATS
        char tmp[TASK_COMM_LEN + 1];
 #endif
        SEQ_printf(m, " #%d: ", idx);
-       print_name_offset(m, timer);
+       print_name_offset(m, taddr);
        SEQ_printf(m, ", ");
        print_name_offset(m, timer->function);
        SEQ_printf(m, ", S:%02lx", timer->state);
@@ -99,7 +100,7 @@ next_one:
                tmp = *timer;
                spin_unlock_irqrestore(&base->cpu_base->lock, flags);
 
-               print_timer(m, &tmp, i, now);
+               print_timer(m, timer, &tmp, i, now);
                next++;
                goto next_one;
        }
@@ -109,6 +110,7 @@ next_one:
 static void
 print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now)
 {
+       SEQ_printf(m, "  .base:       %p\n", base);
        SEQ_printf(m, "  .index:      %d\n",
                        base->index);
        SEQ_printf(m, "  .resolution: %Lu nsecs\n",
@@ -183,12 +185,16 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 static void
-print_tickdevice(struct seq_file *m, struct tick_device *td)
+print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 {
        struct clock_event_device *dev = td->evtdev;
 
        SEQ_printf(m, "\n");
        SEQ_printf(m, "Tick Device: mode:     %d\n", td->mode);
+       if (cpu < 0)
+               SEQ_printf(m, "Broadcast device\n");
+       else
+               SEQ_printf(m, "Per CPU device: %d\n", cpu);
 
        SEQ_printf(m, "Clock Event Device: ");
        if (!dev) {
@@ -222,7 +228,7 @@ static void timer_list_show_tickdevices(struct seq_file *m)
        int cpu;
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-       print_tickdevice(m, tick_get_broadcast_device());
+       print_tickdevice(m, tick_get_broadcast_device(), -1);
        SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
                   tick_get_broadcast_mask()->bits[0]);
 #ifdef CONFIG_TICK_ONESHOT
@@ -232,7 +238,7 @@ static void timer_list_show_tickdevices(struct seq_file *m)
        SEQ_printf(m, "\n");
 #endif
        for_each_online_cpu(cpu)
-                  print_tickdevice(m, tick_get_device(cpu));
+               print_tickdevice(m, tick_get_device(cpu), cpu);
        SEQ_printf(m, "\n");
 }
 #else
@@ -244,7 +250,7 @@ static int timer_list_show(struct seq_file *m, void *v)
        u64 now = ktime_to_ns(ktime_get());
        int cpu;
 
-       SEQ_printf(m, "Timer List Version: v0.3\n");
+       SEQ_printf(m, "Timer List Version: v0.4\n");
        SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
        SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
 
index 510fe69351ca2ec19700802b59ac3dbe9d142019..56becf373c589ba90c36a5b6e23df0527b227663 100644 (file)
@@ -1436,9 +1436,11 @@ static void __cpuinit migrate_timers(int cpu)
        BUG_ON(cpu_online(cpu));
        old_base = per_cpu(tvec_bases, cpu);
        new_base = get_cpu_var(tvec_bases);
-
-       local_irq_disable();
-       spin_lock(&new_base->lock);
+       /*
+        * The caller is globally serialized and nobody else
+        * takes two locks at once, deadlock is not possible.
+        */
+       spin_lock_irq(&new_base->lock);
        spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
        BUG_ON(old_base->running_timer);
@@ -1453,8 +1455,7 @@ static void __cpuinit migrate_timers(int cpu)
        }
 
        spin_unlock(&old_base->lock);
-       spin_unlock(&new_base->lock);
-       local_irq_enable();
+       spin_unlock_irq(&new_base->lock);
        put_cpu_var(tvec_bases);
 }
 #endif /* CONFIG_HOTPLUG_CPU */
index 263e9e6bbd60912132ba18077c0fa0d66d7e1ffb..1cb3e1f616af5fcd36a6efceebb8892ef26f4324 100644 (file)
@@ -1,23 +1,37 @@
 #
 # Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
 #
+
+config NOP_TRACER
+       bool
+
 config HAVE_FTRACE
        bool
+       select NOP_TRACER
 
 config HAVE_DYNAMIC_FTRACE
        bool
 
+config HAVE_FTRACE_MCOUNT_RECORD
+       bool
+
 config TRACER_MAX_TRACE
        bool
 
+config RING_BUFFER
+       bool
+
 config TRACING
        bool
        select DEBUG_FS
+       select RING_BUFFER
        select STACKTRACE
+       select TRACEPOINTS
 
 config FTRACE
        bool "Kernel Function Tracer"
        depends on HAVE_FTRACE
+       depends on DEBUG_KERNEL
        select FRAME_POINTER
        select TRACING
        select CONTEXT_SWITCH_TRACER
@@ -36,6 +50,7 @@ config IRQSOFF_TRACER
        depends on TRACE_IRQFLAGS_SUPPORT
        depends on GENERIC_TIME
        depends on HAVE_FTRACE
+       depends on DEBUG_KERNEL
        select TRACE_IRQFLAGS
        select TRACING
        select TRACER_MAX_TRACE
@@ -59,6 +74,7 @@ config PREEMPT_TRACER
        depends on GENERIC_TIME
        depends on PREEMPT
        depends on HAVE_FTRACE
+       depends on DEBUG_KERNEL
        select TRACING
        select TRACER_MAX_TRACE
        help
@@ -86,6 +102,7 @@ config SYSPROF_TRACER
 config SCHED_TRACER
        bool "Scheduling Latency Tracer"
        depends on HAVE_FTRACE
+       depends on DEBUG_KERNEL
        select TRACING
        select CONTEXT_SWITCH_TRACER
        select TRACER_MAX_TRACE
@@ -96,16 +113,56 @@ config SCHED_TRACER
 config CONTEXT_SWITCH_TRACER
        bool "Trace process context switches"
        depends on HAVE_FTRACE
+       depends on DEBUG_KERNEL
        select TRACING
        select MARKERS
        help
          This tracer gets called from the context switch and records
          all switching of tasks.
 
+config BOOT_TRACER
+       bool "Trace boot initcalls"
+       depends on HAVE_FTRACE
+       depends on DEBUG_KERNEL
+       select TRACING
+       help
+         This tracer helps developers to optimize boot times: it records
+         the timings of the initcalls and traces key events and the identity
+         of tasks that can cause boot delays, such as context-switches.
+
+         Its aim is to be parsed by the /scripts/bootgraph.pl tool to
+         produce pretty graphics about boot inefficiencies, giving a visual
+         representation of the delays during initcalls - but the raw
+         /debug/tracing/trace text output is readable too.
+
+         ( Note that tracing self tests can't be enabled if this tracer is
+           selected, because the self-tests are an initcall as well and that
+           would invalidate the boot trace. )
+
+config STACK_TRACER
+       bool "Trace max stack"
+       depends on HAVE_FTRACE
+       depends on DEBUG_KERNEL
+       select FTRACE
+       select STACKTRACE
+       help
+         This special tracer records the maximum stack footprint of the
+         kernel and displays it in debugfs/tracing/stack_trace.
+
+         This tracer works by hooking into every function call that the
+         kernel executes, and keeping a maximum stack depth value and
+         stack-trace saved. Because this logic has to execute in every
+         kernel function, all the time, this option can slow down the
+         kernel measurably and is generally intended for kernel
+         developers only.
+
+         Say N if unsure.
+
 config DYNAMIC_FTRACE
        bool "enable/disable ftrace tracepoints dynamically"
        depends on FTRACE
        depends on HAVE_DYNAMIC_FTRACE
+       depends on DEBUG_KERNEL
        default y
        help
          This option will modify all the calls to ftrace dynamically
@@ -121,12 +178,17 @@ config DYNAMIC_FTRACE
         were made. If so, it runs stop_machine (stops all CPUS)
         and modifies the code to jump over the call to ftrace.
 
+config FTRACE_MCOUNT_RECORD
+       def_bool y
+       depends on DYNAMIC_FTRACE
+       depends on HAVE_FTRACE_MCOUNT_RECORD
+
 config FTRACE_SELFTEST
        bool
 
 config FTRACE_STARTUP_TEST
        bool "Perform a startup test on ftrace"
-       depends on TRACING
+       depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER
        select FTRACE_SELFTEST
        help
          This option performs a series of startup tests on ftrace. On bootup
index 71d17de172886b5841a246b2505555cd13d9a981..a85dfba88ba0402d5614d015c5faed60fe4abc55 100644 (file)
@@ -11,6 +11,7 @@ obj-y += trace_selftest_dynamic.o
 endif
 
 obj-$(CONFIG_FTRACE) += libftrace.o
+obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
 
 obj-$(CONFIG_TRACING) += trace.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
@@ -19,6 +20,9 @@ obj-$(CONFIG_FTRACE) += trace_functions.o
 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
+obj-$(CONFIG_NOP_TRACER) += trace_nop.o
+obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
+obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
 
 libftrace-y := ftrace.o
index f6e3af31b403d8eaa77128712fdb594ca48be0c0..4dda4f60a2a9262770e3195b894e48b8178296a3 100644 (file)
@@ -81,7 +81,7 @@ void clear_ftrace_function(void)
 
 static int __register_ftrace_function(struct ftrace_ops *ops)
 {
-       /* Should never be called by interrupts */
+       /* should not be called from interrupt context */
        spin_lock(&ftrace_lock);
 
        ops->next = ftrace_list;
@@ -115,6 +115,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
        struct ftrace_ops **p;
        int ret = 0;
 
+       /* should not be called from interrupt context */
        spin_lock(&ftrace_lock);
 
        /*
@@ -153,6 +154,30 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
+#ifndef CONFIG_FTRACE_MCOUNT_RECORD
+/*
+ * The hash lock is only needed when the recording of the mcount
+ * callers are dynamic. That is, by the caller themselves and
+ * not recorded via the compilation.
+ */
+static DEFINE_SPINLOCK(ftrace_hash_lock);
+#define ftrace_hash_lock(flags)          spin_lock_irqsave(&ftrace_hash_lock, flags)
+#define ftrace_hash_unlock(flags) \
+                       spin_unlock_irqrestore(&ftrace_hash_lock, flags)
+#else
+/* This is protected via the ftrace_lock with MCOUNT_RECORD. */
+#define ftrace_hash_lock(flags)   do { (void)(flags); } while (0)
+#define ftrace_hash_unlock(flags) do { } while(0)
+#endif
+
+/*
+ * Since MCOUNT_ADDR may point to mcount itself, we do not want
+ * to get it confused by reading a reference in the code as we
+ * are parsing on objcopy output of text. Use a variable for
+ * it instead.
+ */
+static unsigned long mcount_addr = MCOUNT_ADDR;
+
 static struct task_struct *ftraced_task;
 
 enum {
@@ -171,7 +196,6 @@ static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
 
 static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
 
-static DEFINE_SPINLOCK(ftrace_shutdown_lock);
 static DEFINE_MUTEX(ftraced_lock);
 static DEFINE_MUTEX(ftrace_regex_lock);
 
@@ -294,13 +318,37 @@ static inline void ftrace_del_hash(struct dyn_ftrace *node)
 
 static void ftrace_free_rec(struct dyn_ftrace *rec)
 {
-       /* no locking, only called from kstop_machine */
-
        rec->ip = (unsigned long)ftrace_free_records;
        ftrace_free_records = rec;
        rec->flags |= FTRACE_FL_FREE;
 }
 
+void ftrace_release(void *start, unsigned long size)
+{
+       struct dyn_ftrace *rec;
+       struct ftrace_page *pg;
+       unsigned long s = (unsigned long)start;
+       unsigned long e = s + size;
+       int i;
+
+       if (ftrace_disabled || !start)
+               return;
+
+       /* should not be called from interrupt context */
+       spin_lock(&ftrace_lock);
+
+       for (pg = ftrace_pages_start; pg; pg = pg->next) {
+               for (i = 0; i < pg->index; i++) {
+                       rec = &pg->records[i];
+
+                       if ((rec->ip >= s) && (rec->ip < e))
+                               ftrace_free_rec(rec);
+               }
+       }
+       spin_unlock(&ftrace_lock);
+
+}
+
 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
 {
        struct dyn_ftrace *rec;
@@ -338,7 +386,6 @@ ftrace_record_ip(unsigned long ip)
        unsigned long flags;
        unsigned long key;
        int resched;
-       int atomic;
        int cpu;
 
        if (!ftrace_enabled || ftrace_disabled)
@@ -368,9 +415,7 @@ ftrace_record_ip(unsigned long ip)
        if (ftrace_ip_in_hash(ip, key))
                goto out;
 
-       atomic = irqs_disabled();
-
-       spin_lock_irqsave(&ftrace_shutdown_lock, flags);
+       ftrace_hash_lock(flags);
 
        /* This ip may have hit the hash before the lock */
        if (ftrace_ip_in_hash(ip, key))
@@ -387,7 +432,7 @@ ftrace_record_ip(unsigned long ip)
        ftraced_trigger = 1;
 
  out_unlock:
-       spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
+       ftrace_hash_unlock(flags);
  out:
        per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
 
@@ -531,6 +576,16 @@ static void ftrace_shutdown_replenish(void)
        ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
 }
 
+static void print_ip_ins(const char *fmt, unsigned char *p)
+{
+       int i;
+
+       printk(KERN_CONT "%s", fmt);
+
+       for (i = 0; i < MCOUNT_INSN_SIZE; i++)
+               printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
+}
+
 static int
 ftrace_code_disable(struct dyn_ftrace *rec)
 {
@@ -541,10 +596,27 @@ ftrace_code_disable(struct dyn_ftrace *rec)
        ip = rec->ip;
 
        nop = ftrace_nop_replace();
-       call = ftrace_call_replace(ip, MCOUNT_ADDR);
+       call = ftrace_call_replace(ip, mcount_addr);
 
        failed = ftrace_modify_code(ip, call, nop);
        if (failed) {
+               switch (failed) {
+               case 1:
+                       WARN_ON_ONCE(1);
+                       pr_info("ftrace faulted on modifying ");
+                       print_ip_sym(ip);
+                       break;
+               case 2:
+                       WARN_ON_ONCE(1);
+                       pr_info("ftrace failed to modify ");
+                       print_ip_sym(ip);
+                       print_ip_ins(" expected: ", call);
+                       print_ip_ins(" actual: ", (unsigned char *)ip);
+                       print_ip_ins(" replace: ", nop);
+                       printk(KERN_CONT "\n");
+                       break;
+               }
+
                rec->flags |= FTRACE_FL_FAILED;
                return 0;
        }
@@ -792,47 +864,7 @@ static int ftrace_update_code(void)
        return 1;
 }
 
-static int ftraced(void *ignore)
-{
-       unsigned long usecs;
-
-       while (!kthread_should_stop()) {
-
-               set_current_state(TASK_INTERRUPTIBLE);
-
-               /* check once a second */
-               schedule_timeout(HZ);
-
-               if (unlikely(ftrace_disabled))
-                       continue;
-
-               mutex_lock(&ftrace_sysctl_lock);
-               mutex_lock(&ftraced_lock);
-               if (!ftraced_suspend && !ftraced_stop &&
-                   ftrace_update_code()) {
-                       usecs = nsecs_to_usecs(ftrace_update_time);
-                       if (ftrace_update_tot_cnt > 100000) {
-                               ftrace_update_tot_cnt = 0;
-                               pr_info("hm, dftrace overflow: %lu change%s"
-                                       " (%lu total) in %lu usec%s\n",
-                                       ftrace_update_cnt,
-                                       ftrace_update_cnt != 1 ? "s" : "",
-                                       ftrace_update_tot_cnt,
-                                       usecs, usecs != 1 ? "s" : "");
-                               ftrace_disabled = 1;
-                               WARN_ON_ONCE(1);
-                       }
-               }
-               mutex_unlock(&ftraced_lock);
-               mutex_unlock(&ftrace_sysctl_lock);
-
-               ftrace_shutdown_replenish();
-       }
-       __set_current_state(TASK_RUNNING);
-       return 0;
-}
-
-static int __init ftrace_dyn_table_alloc(void)
+static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
 {
        struct ftrace_page *pg;
        int cnt;
@@ -859,7 +891,9 @@ static int __init ftrace_dyn_table_alloc(void)
 
        pg = ftrace_pages = ftrace_pages_start;
 
-       cnt = NR_TO_INIT / ENTRIES_PER_PAGE;
+       cnt = num_to_init / ENTRIES_PER_PAGE;
+       pr_info("ftrace: allocating %ld hash entries in %d pages\n",
+               num_to_init, cnt);
 
        for (i = 0; i < cnt; i++) {
                pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -901,6 +935,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 
        (*pos)++;
 
+       /* should not be called from interrupt context */
+       spin_lock(&ftrace_lock);
  retry:
        if (iter->idx >= iter->pg->index) {
                if (iter->pg->next) {
@@ -910,15 +946,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
                }
        } else {
                rec = &iter->pg->records[iter->idx++];
-               if ((!(iter->flags & FTRACE_ITER_FAILURES) &&
+               if ((rec->flags & FTRACE_FL_FREE) ||
+
+                   (!(iter->flags & FTRACE_ITER_FAILURES) &&
                     (rec->flags & FTRACE_FL_FAILED)) ||
 
                    ((iter->flags & FTRACE_ITER_FAILURES) &&
-                    (!(rec->flags & FTRACE_FL_FAILED) ||
-                     (rec->flags & FTRACE_FL_FREE))) ||
-
-                   ((iter->flags & FTRACE_ITER_FILTER) &&
-                    !(rec->flags & FTRACE_FL_FILTER)) ||
+                    !(rec->flags & FTRACE_FL_FAILED)) ||
 
                    ((iter->flags & FTRACE_ITER_NOTRACE) &&
                     !(rec->flags & FTRACE_FL_NOTRACE))) {
@@ -926,6 +960,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
                        goto retry;
                }
        }
+       spin_unlock(&ftrace_lock);
 
        iter->pos = *pos;
 
@@ -1039,8 +1074,8 @@ static void ftrace_filter_reset(int enable)
        unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
        unsigned i;
 
-       /* keep kstop machine from running */
-       preempt_disable();
+       /* should not be called from interrupt context */
+       spin_lock(&ftrace_lock);
        if (enable)
                ftrace_filtered = 0;
        pg = ftrace_pages_start;
@@ -1053,7 +1088,7 @@ static void ftrace_filter_reset(int enable)
                }
                pg = pg->next;
        }
-       preempt_enable();
+       spin_unlock(&ftrace_lock);
 }
 
 static int
@@ -1165,8 +1200,8 @@ ftrace_match(unsigned char *buff, int len, int enable)
                }
        }
 
-       /* keep kstop machine from running */
-       preempt_disable();
+       /* should not be called from interrupt context */
+       spin_lock(&ftrace_lock);
        if (enable)
                ftrace_filtered = 1;
        pg = ftrace_pages_start;
@@ -1203,7 +1238,7 @@ ftrace_match(unsigned char *buff, int len, int enable)
                }
                pg = pg->next;
        }
-       preempt_enable();
+       spin_unlock(&ftrace_lock);
 }
 
 static ssize_t
@@ -1556,6 +1591,114 @@ static __init int ftrace_init_debugfs(void)
 
 fs_initcall(ftrace_init_debugfs);
 
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+static int ftrace_convert_nops(unsigned long *start,
+                              unsigned long *end)
+{
+       unsigned long *p;
+       unsigned long addr;
+       unsigned long flags;
+
+       p = start;
+       while (p < end) {
+               addr = ftrace_call_adjust(*p++);
+               /* should not be called from interrupt context */
+               spin_lock(&ftrace_lock);
+               ftrace_record_ip(addr);
+               spin_unlock(&ftrace_lock);
+               ftrace_shutdown_replenish();
+       }
+
+       /* p is ignored */
+       local_irq_save(flags);
+       __ftrace_update_code(p);
+       local_irq_restore(flags);
+
+       return 0;
+}
+
+void ftrace_init_module(unsigned long *start, unsigned long *end)
+{
+       if (ftrace_disabled || start == end)
+               return;
+       ftrace_convert_nops(start, end);
+}
+
+extern unsigned long __start_mcount_loc[];
+extern unsigned long __stop_mcount_loc[];
+
+void __init ftrace_init(void)
+{
+       unsigned long count, addr, flags;
+       int ret;
+
+       /* Keep the ftrace pointer to the stub */
+       addr = (unsigned long)ftrace_stub;
+
+       local_irq_save(flags);
+       ftrace_dyn_arch_init(&addr);
+       local_irq_restore(flags);
+
+       /* ftrace_dyn_arch_init places the return code in addr */
+       if (addr)
+               goto failed;
+
+       count = __stop_mcount_loc - __start_mcount_loc;
+
+       ret = ftrace_dyn_table_alloc(count);
+       if (ret)
+               goto failed;
+
+       last_ftrace_enabled = ftrace_enabled = 1;
+
+       ret = ftrace_convert_nops(__start_mcount_loc,
+                                 __stop_mcount_loc);
+
+       return;
+ failed:
+       ftrace_disabled = 1;
+}
+#else /* CONFIG_FTRACE_MCOUNT_RECORD */
+static int ftraced(void *ignore)
+{
+       unsigned long usecs;
+
+       while (!kthread_should_stop()) {
+
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               /* check once a second */
+               schedule_timeout(HZ);
+
+               if (unlikely(ftrace_disabled))
+                       continue;
+
+               mutex_lock(&ftrace_sysctl_lock);
+               mutex_lock(&ftraced_lock);
+               if (!ftraced_suspend && !ftraced_stop &&
+                   ftrace_update_code()) {
+                       usecs = nsecs_to_usecs(ftrace_update_time);
+                       if (ftrace_update_tot_cnt > 100000) {
+                               ftrace_update_tot_cnt = 0;
+                               pr_info("hm, dftrace overflow: %lu change%s"
+                                       " (%lu total) in %lu usec%s\n",
+                                       ftrace_update_cnt,
+                                       ftrace_update_cnt != 1 ? "s" : "",
+                                       ftrace_update_tot_cnt,
+                                       usecs, usecs != 1 ? "s" : "");
+                               ftrace_disabled = 1;
+                               WARN_ON_ONCE(1);
+                       }
+               }
+               mutex_unlock(&ftraced_lock);
+               mutex_unlock(&ftrace_sysctl_lock);
+
+               ftrace_shutdown_replenish();
+       }
+       __set_current_state(TASK_RUNNING);
+       return 0;
+}
+
 static int __init ftrace_dynamic_init(void)
 {
        struct task_struct *p;
@@ -1572,7 +1715,7 @@ static int __init ftrace_dynamic_init(void)
                goto failed;
        }
 
-       ret = ftrace_dyn_table_alloc();
+       ret = ftrace_dyn_table_alloc(NR_TO_INIT);
        if (ret)
                goto failed;
 
@@ -1593,6 +1736,8 @@ static int __init ftrace_dynamic_init(void)
 }
 
 core_initcall(ftrace_dynamic_init);
+#endif /* CONFIG_FTRACE_MCOUNT_RECORD */
+
 #else
 # define ftrace_startup()              do { } while (0)
 # define ftrace_shutdown()             do { } while (0)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
new file mode 100644 (file)
index 0000000..94af1fe
--- /dev/null
@@ -0,0 +1,2014 @@
+/*
+ * Generic ring buffer
+ *
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ */
+#include <linux/ring_buffer.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>       /* used for sched_clock() (for now) */
+#include <linux/init.h>
+#include <linux/hash.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+
+/* Up this if you want to test the TIME_EXTENTS and normalization */
+#define DEBUG_SHIFT 0
+
+/* FIXME!!! */
+u64 ring_buffer_time_stamp(int cpu)
+{
+       /* shift to debug/test normalization and TIME_EXTENTS */
+       return sched_clock() << DEBUG_SHIFT;
+}
+
+void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
+{
+       /* Just stupid testing the normalize function and deltas */
+       *ts >>= DEBUG_SHIFT;
+}
+
+#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
+#define RB_ALIGNMENT_SHIFT     2
+#define RB_ALIGNMENT           (1 << RB_ALIGNMENT_SHIFT)
+#define RB_MAX_SMALL_DATA      28
+
+enum {
+       RB_LEN_TIME_EXTEND = 8,
+       RB_LEN_TIME_STAMP = 16,
+};
+
+/* inline for ring buffer fast paths */
+static inline unsigned
+rb_event_length(struct ring_buffer_event *event)
+{
+       unsigned length;
+
+       switch (event->type) {
+       case RINGBUF_TYPE_PADDING:
+               /* undefined */
+               return -1;
+
+       case RINGBUF_TYPE_TIME_EXTEND:
+               return RB_LEN_TIME_EXTEND;
+
+       case RINGBUF_TYPE_TIME_STAMP:
+               return RB_LEN_TIME_STAMP;
+
+       case RINGBUF_TYPE_DATA:
+               if (event->len)
+                       length = event->len << RB_ALIGNMENT_SHIFT;
+               else
+                       length = event->array[0];
+               return length + RB_EVNT_HDR_SIZE;
+       default:
+               BUG();
+       }
+       /* not hit */
+       return 0;
+}
+
+/**
+ * ring_buffer_event_length - return the length of the event
+ * @event: the event to get the length of
+ */
+unsigned ring_buffer_event_length(struct ring_buffer_event *event)
+{
+       return rb_event_length(event);
+}
+
+/* inline for ring buffer fast paths */
+static inline void *
+rb_event_data(struct ring_buffer_event *event)
+{
+       BUG_ON(event->type != RINGBUF_TYPE_DATA);
+       /* If length is in len field, then array[0] has the data */
+       if (event->len)
+               return (void *)&event->array[0];
+       /* Otherwise length is in array[0] and array[1] has the data */
+       return (void *)&event->array[1];
+}
+
+/**
+ * ring_buffer_event_data - return the data of the event
+ * @event: the event to get the data from
+ */
+void *ring_buffer_event_data(struct ring_buffer_event *event)
+{
+       return rb_event_data(event);
+}
+
+#define for_each_buffer_cpu(buffer, cpu)               \
+       for_each_cpu_mask(cpu, buffer->cpumask)
+
+#define TS_SHIFT       27
+#define TS_MASK                ((1ULL << TS_SHIFT) - 1)
+#define TS_DELTA_TEST  (~TS_MASK)
+
+/*
+ * This hack stolen from mm/slob.c.
+ * We can store per page timing information in the page frame of the page.
+ * Thanks to Peter Zijlstra for suggesting this idea.
+ */
+struct buffer_page {
+       u64              time_stamp;    /* page time stamp */
+       local_t          write;         /* index for next write */
+       local_t          commit;        /* write commited index */
+       unsigned         read;          /* index for next read */
+       struct list_head list;          /* list of free pages */
+       void *page;                     /* Actual data page */
+};
+
+/*
+ * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
+ * this issue out.
+ */
+static inline void free_buffer_page(struct buffer_page *bpage)
+{
+       if (bpage->page)
+               __free_page(bpage->page);
+       kfree(bpage);
+}
+
+/*
+ * We need to fit the time_stamp delta into 27 bits.
+ */
+static inline int test_time_stamp(u64 delta)
+{
+       if (delta & TS_DELTA_TEST)
+               return 1;
+       return 0;
+}
+
+#define BUF_PAGE_SIZE PAGE_SIZE
+
+/*
+ * head_page == tail_page && head == tail then buffer is empty.
+ */
+struct ring_buffer_per_cpu {
+       int                             cpu;
+       struct ring_buffer              *buffer;
+       spinlock_t                      lock;
+       struct lock_class_key           lock_key;
+       struct list_head                pages;
+       struct buffer_page              *head_page;     /* read from head */
+       struct buffer_page              *tail_page;     /* write to tail */
+       struct buffer_page              *commit_page;   /* commited pages */
+       struct buffer_page              *reader_page;
+       unsigned long                   overrun;
+       unsigned long                   entries;
+       u64                             write_stamp;
+       u64                             read_stamp;
+       atomic_t                        record_disabled;
+};
+
+struct ring_buffer {
+       unsigned long                   size;
+       unsigned                        pages;
+       unsigned                        flags;
+       int                             cpus;
+       cpumask_t                       cpumask;
+       atomic_t                        record_disabled;
+
+       struct mutex                    mutex;
+
+       struct ring_buffer_per_cpu      **buffers;
+};
+
+struct ring_buffer_iter {
+       struct ring_buffer_per_cpu      *cpu_buffer;
+       unsigned long                   head;
+       struct buffer_page              *head_page;
+       u64                             read_stamp;
+};
+
+#define RB_WARN_ON(buffer, cond)                               \
+       do {                                                    \
+               if (unlikely(cond)) {                           \
+                       atomic_inc(&buffer->record_disabled);   \
+                       WARN_ON(1);                             \
+               }                                               \
+       } while (0)
+
+#define RB_WARN_ON_RET(buffer, cond)                           \
+       do {                                                    \
+               if (unlikely(cond)) {                           \
+                       atomic_inc(&buffer->record_disabled);   \
+                       WARN_ON(1);                             \
+                       return -1;                              \
+               }                                               \
+       } while (0)
+
+#define RB_WARN_ON_ONCE(buffer, cond)                          \
+       do {                                                    \
+               static int once;                                \
+               if (unlikely(cond) && !once) {                  \
+                       once++;                                 \
+                       atomic_inc(&buffer->record_disabled);   \
+                       WARN_ON(1);                             \
+               }                                               \
+       } while (0)
+
+/**
+ * check_pages - integrity check of buffer pages
+ * @cpu_buffer: CPU buffer with pages to test
+ *
+ * As a safty measure we check to make sure the data pages have not
+ * been corrupted.
+ */
+static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
+{
+       struct list_head *head = &cpu_buffer->pages;
+       struct buffer_page *page, *tmp;
+
+       RB_WARN_ON_RET(cpu_buffer, head->next->prev != head);
+       RB_WARN_ON_RET(cpu_buffer, head->prev->next != head);
+
+       list_for_each_entry_safe(page, tmp, head, list) {
+               RB_WARN_ON_RET(cpu_buffer,
+                              page->list.next->prev != &page->list);
+               RB_WARN_ON_RET(cpu_buffer,
+                              page->list.prev->next != &page->list);
+       }
+
+       return 0;
+}
+
+static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
+                            unsigned nr_pages)
+{
+       struct list_head *head = &cpu_buffer->pages;
+       struct buffer_page *page, *tmp;
+       unsigned long addr;
+       LIST_HEAD(pages);
+       unsigned i;
+
+       for (i = 0; i < nr_pages; i++) {
+               page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+                                   GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
+               if (!page)
+                       goto free_pages;
+               list_add(&page->list, &pages);
+
+               addr = __get_free_page(GFP_KERNEL);
+               if (!addr)
+                       goto free_pages;
+               page->page = (void *)addr;
+       }
+
+       list_splice(&pages, head);
+
+       rb_check_pages(cpu_buffer);
+
+       return 0;
+
+ free_pages:
+       list_for_each_entry_safe(page, tmp, &pages, list) {
+               list_del_init(&page->list);
+               free_buffer_page(page);
+       }
+       return -ENOMEM;
+}
+
+static struct ring_buffer_per_cpu *
+rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       struct buffer_page *page;
+       unsigned long addr;
+       int ret;
+
+       cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
+                                 GFP_KERNEL, cpu_to_node(cpu));
+       if (!cpu_buffer)
+               return NULL;
+
+       cpu_buffer->cpu = cpu;
+       cpu_buffer->buffer = buffer;
+       spin_lock_init(&cpu_buffer->lock);
+       INIT_LIST_HEAD(&cpu_buffer->pages);
+
+       page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+                           GFP_KERNEL, cpu_to_node(cpu));
+       if (!page)
+               goto fail_free_buffer;
+
+       cpu_buffer->reader_page = page;
+       addr = __get_free_page(GFP_KERNEL);
+       if (!addr)
+               goto fail_free_reader;
+       page->page = (void *)addr;
+
+       INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+
+       ret = rb_allocate_pages(cpu_buffer, buffer->pages);
+       if (ret < 0)
+               goto fail_free_reader;
+
+       cpu_buffer->head_page
+               = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+       cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
+
+       return cpu_buffer;
+
+ fail_free_reader:
+       free_buffer_page(cpu_buffer->reader_page);
+
+ fail_free_buffer:
+       kfree(cpu_buffer);
+       return NULL;
+}
+
+static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
+{
+       struct list_head *head = &cpu_buffer->pages;
+       struct buffer_page *page, *tmp;
+
+       list_del_init(&cpu_buffer->reader_page->list);
+       free_buffer_page(cpu_buffer->reader_page);
+
+       list_for_each_entry_safe(page, tmp, head, list) {
+               list_del_init(&page->list);
+               free_buffer_page(page);
+       }
+       kfree(cpu_buffer);
+}
+
+/*
+ * Causes compile errors if the struct buffer_page gets bigger
+ * than the struct page.
+ */
+extern int ring_buffer_page_too_big(void);
+
+/**
+ * ring_buffer_alloc - allocate a new ring_buffer
+ * @size: the size in bytes that is needed.
+ * @flags: attributes to set for the ring buffer.
+ *
+ * Currently the only flag that is available is the RB_FL_OVERWRITE
+ * flag. This flag means that the buffer will overwrite old data
+ * when the buffer wraps. If this flag is not set, the buffer will
+ * drop data when the tail hits the head.
+ */
+struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
+{
+       struct ring_buffer *buffer;
+       int bsize;
+       int cpu;
+
+       /* Paranoid! Optimizes out when all is well */
+       if (sizeof(struct buffer_page) > sizeof(struct page))
+               ring_buffer_page_too_big();
+
+
+       /* keep it in its own cache line */
+       buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
+                        GFP_KERNEL);
+       if (!buffer)
+               return NULL;
+
+       buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+       buffer->flags = flags;
+
+       /* need at least two pages */
+       if (buffer->pages == 1)
+               buffer->pages++;
+
+       buffer->cpumask = cpu_possible_map;
+       buffer->cpus = nr_cpu_ids;
+
+       bsize = sizeof(void *) * nr_cpu_ids;
+       buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
+                                 GFP_KERNEL);
+       if (!buffer->buffers)
+               goto fail_free_buffer;
+
+       for_each_buffer_cpu(buffer, cpu) {
+               buffer->buffers[cpu] =
+                       rb_allocate_cpu_buffer(buffer, cpu);
+               if (!buffer->buffers[cpu])
+                       goto fail_free_buffers;
+       }
+
+       mutex_init(&buffer->mutex);
+
+       return buffer;
+
+ fail_free_buffers:
+       for_each_buffer_cpu(buffer, cpu) {
+               if (buffer->buffers[cpu])
+                       rb_free_cpu_buffer(buffer->buffers[cpu]);
+       }
+       kfree(buffer->buffers);
+
+ fail_free_buffer:
+       kfree(buffer);
+       return NULL;
+}
+
+/**
+ * ring_buffer_free - free a ring buffer.
+ * @buffer: the buffer to free.
+ */
+void
+ring_buffer_free(struct ring_buffer *buffer)
+{
+       int cpu;
+
+       for_each_buffer_cpu(buffer, cpu)
+               rb_free_cpu_buffer(buffer->buffers[cpu]);
+
+       kfree(buffer);
+}
+
+static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
+
+static void
+rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
+{
+       struct buffer_page *page;
+       struct list_head *p;
+       unsigned i;
+
+       atomic_inc(&cpu_buffer->record_disabled);
+       synchronize_sched();
+
+       for (i = 0; i < nr_pages; i++) {
+               BUG_ON(list_empty(&cpu_buffer->pages));
+               p = cpu_buffer->pages.next;
+               page = list_entry(p, struct buffer_page, list);
+               list_del_init(&page->list);
+               free_buffer_page(page);
+       }
+       BUG_ON(list_empty(&cpu_buffer->pages));
+
+       rb_reset_cpu(cpu_buffer);
+
+       rb_check_pages(cpu_buffer);
+
+       atomic_dec(&cpu_buffer->record_disabled);
+
+}
+
+static void
+rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
+               struct list_head *pages, unsigned nr_pages)
+{
+       struct buffer_page *page;
+       struct list_head *p;
+       unsigned i;
+
+       atomic_inc(&cpu_buffer->record_disabled);
+       synchronize_sched();
+
+       for (i = 0; i < nr_pages; i++) {
+               BUG_ON(list_empty(pages));
+               p = pages->next;
+               page = list_entry(p, struct buffer_page, list);
+               list_del_init(&page->list);
+               list_add_tail(&page->list, &cpu_buffer->pages);
+       }
+       rb_reset_cpu(cpu_buffer);
+
+       rb_check_pages(cpu_buffer);
+
+       atomic_dec(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_resize - resize the ring buffer
+ * @buffer: the buffer to resize.
+ * @size: the new size.
+ *
+ * The tracer is responsible for making sure that the buffer is
+ * not being used while changing the size.
+ * Note: We may be able to change the above requirement by using
+ *  RCU synchronizations.
+ *
+ * Minimum size is 2 * BUF_PAGE_SIZE.
+ *
+ * Returns -1 on failure.
+ */
+int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       unsigned nr_pages, rm_pages, new_pages;
+       struct buffer_page *page, *tmp;
+       unsigned long buffer_size;
+       unsigned long addr;
+       LIST_HEAD(pages);
+       int i, cpu;
+
+       size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+       size *= BUF_PAGE_SIZE;
+       buffer_size = buffer->pages * BUF_PAGE_SIZE;
+
+       /* we need a minimum of two pages */
+       if (size < BUF_PAGE_SIZE * 2)
+               size = BUF_PAGE_SIZE * 2;
+
+       if (size == buffer_size)
+               return size;
+
+       mutex_lock(&buffer->mutex);
+
+       nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+
+       if (size < buffer_size) {
+
+               /* easy case, just free pages */
+               BUG_ON(nr_pages >= buffer->pages);
+
+               rm_pages = buffer->pages - nr_pages;
+
+               for_each_buffer_cpu(buffer, cpu) {
+                       cpu_buffer = buffer->buffers[cpu];
+                       rb_remove_pages(cpu_buffer, rm_pages);
+               }
+               goto out;
+       }
+
+       /*
+        * This is a bit more difficult. We only want to add pages
+        * when we can allocate enough for all CPUs. We do this
+        * by allocating all the pages and storing them on a local
+        * link list. If we succeed in our allocation, then we
+        * add these pages to the cpu_buffers. Otherwise we just free
+        * them all and return -ENOMEM;
+        */
+       BUG_ON(nr_pages <= buffer->pages);
+       new_pages = nr_pages - buffer->pages;
+
+       for_each_buffer_cpu(buffer, cpu) {
+               for (i = 0; i < new_pages; i++) {
+                       page = kzalloc_node(ALIGN(sizeof(*page),
+                                                 cache_line_size()),
+                                           GFP_KERNEL, cpu_to_node(cpu));
+                       if (!page)
+                               goto free_pages;
+                       list_add(&page->list, &pages);
+                       addr = __get_free_page(GFP_KERNEL);
+                       if (!addr)
+                               goto free_pages;
+                       page->page = (void *)addr;
+               }
+       }
+
+       for_each_buffer_cpu(buffer, cpu) {
+               cpu_buffer = buffer->buffers[cpu];
+               rb_insert_pages(cpu_buffer, &pages, new_pages);
+       }
+
+       BUG_ON(!list_empty(&pages));
+
+ out:
+       buffer->pages = nr_pages;
+       mutex_unlock(&buffer->mutex);
+
+       return size;
+
+ free_pages:
+       list_for_each_entry_safe(page, tmp, &pages, list) {
+               list_del_init(&page->list);
+               free_buffer_page(page);
+       }
+       return -ENOMEM;
+}
+
+static inline int rb_null_event(struct ring_buffer_event *event)
+{
+       return event->type == RINGBUF_TYPE_PADDING;
+}
+
+static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
+{
+       return page->page + index;
+}
+
+static inline struct ring_buffer_event *
+rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
+{
+       return __rb_page_index(cpu_buffer->reader_page,
+                              cpu_buffer->reader_page->read);
+}
+
+static inline struct ring_buffer_event *
+rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
+{
+       return __rb_page_index(cpu_buffer->head_page,
+                              cpu_buffer->head_page->read);
+}
+
+static inline struct ring_buffer_event *
+rb_iter_head_event(struct ring_buffer_iter *iter)
+{
+       return __rb_page_index(iter->head_page, iter->head);
+}
+
+static inline unsigned rb_page_write(struct buffer_page *bpage)
+{
+       return local_read(&bpage->write);
+}
+
+static inline unsigned rb_page_commit(struct buffer_page *bpage)
+{
+       return local_read(&bpage->commit);
+}
+
+/* Size is determined by what has been commited */
+static inline unsigned rb_page_size(struct buffer_page *bpage)
+{
+       return rb_page_commit(bpage);
+}
+
+static inline unsigned
+rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
+{
+       return rb_page_commit(cpu_buffer->commit_page);
+}
+
+static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
+{
+       return rb_page_commit(cpu_buffer->head_page);
+}
+
+/*
+ * When the tail hits the head and the buffer is in overwrite mode,
+ * the head jumps to the next page and all content on the previous
+ * page is discarded. But before doing so, we update the overrun
+ * variable of the buffer.
+ */
+static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
+{
+       struct ring_buffer_event *event;
+       unsigned long head;
+
+       for (head = 0; head < rb_head_size(cpu_buffer);
+            head += rb_event_length(event)) {
+
+               event = __rb_page_index(cpu_buffer->head_page, head);
+               BUG_ON(rb_null_event(event));
+               /* Only count data entries */
+               if (event->type != RINGBUF_TYPE_DATA)
+                       continue;
+               cpu_buffer->overrun++;
+               cpu_buffer->entries--;
+       }
+}
+
+static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
+                              struct buffer_page **page)
+{
+       struct list_head *p = (*page)->list.next;
+
+       if (p == &cpu_buffer->pages)
+               p = p->next;
+
+       *page = list_entry(p, struct buffer_page, list);
+}
+
+static inline unsigned
+rb_event_index(struct ring_buffer_event *event)
+{
+       unsigned long addr = (unsigned long)event;
+
+       return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
+}
+
+static inline int
+rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+            struct ring_buffer_event *event)
+{
+       unsigned long addr = (unsigned long)event;
+       unsigned long index;
+
+       index = rb_event_index(event);
+       addr &= PAGE_MASK;
+
+       return cpu_buffer->commit_page->page == (void *)addr &&
+               rb_commit_index(cpu_buffer) == index;
+}
+
+static inline void
+rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
+                   struct ring_buffer_event *event)
+{
+       unsigned long addr = (unsigned long)event;
+       unsigned long index;
+
+       index = rb_event_index(event);
+       addr &= PAGE_MASK;
+
+       while (cpu_buffer->commit_page->page != (void *)addr) {
+               RB_WARN_ON(cpu_buffer,
+                          cpu_buffer->commit_page == cpu_buffer->tail_page);
+               cpu_buffer->commit_page->commit =
+                       cpu_buffer->commit_page->write;
+               rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
+               cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+       }
+
+       /* Now set the commit to the event's index */
+       local_set(&cpu_buffer->commit_page->commit, index);
+}
+
+static inline void
+rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
+{
+       /*
+        * We only race with interrupts and NMIs on this CPU.
+        * If we own the commit event, then we can commit
+        * all others that interrupted us, since the interruptions
+        * are in stack format (they finish before they come
+        * back to us). This allows us to do a simple loop to
+        * assign the commit to the tail.
+        */
+       while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
+               cpu_buffer->commit_page->commit =
+                       cpu_buffer->commit_page->write;
+               rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
+               cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+               /* add barrier to keep gcc from optimizing too much */
+               barrier();
+       }
+       while (rb_commit_index(cpu_buffer) !=
+              rb_page_write(cpu_buffer->commit_page)) {
+               cpu_buffer->commit_page->commit =
+                       cpu_buffer->commit_page->write;
+               barrier();
+       }
+}
+
+static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+       cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
+       cpu_buffer->reader_page->read = 0;
+}
+
+static inline void rb_inc_iter(struct ring_buffer_iter *iter)
+{
+       struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+       /*
+        * The iterator could be on the reader page (it starts there).
+        * But the head could have moved, since the reader was
+        * found. Check for this case and assign the iterator
+        * to the head page instead of next.
+        */
+       if (iter->head_page == cpu_buffer->reader_page)
+               iter->head_page = cpu_buffer->head_page;
+       else
+               rb_inc_page(cpu_buffer, &iter->head_page);
+
+       iter->read_stamp = iter->head_page->time_stamp;
+       iter->head = 0;
+}
+
+/**
+ * ring_buffer_update_event - update event type and data
+ * @event: the even to update
+ * @type: the type of event
+ * @length: the size of the event field in the ring buffer
+ *
+ * Update the type and data fields of the event. The length
+ * is the actual size that is written to the ring buffer,
+ * and with this, we can determine what to place into the
+ * data field.
+ */
+static inline void
+rb_update_event(struct ring_buffer_event *event,
+                        unsigned type, unsigned length)
+{
+       event->type = type;
+
+       switch (type) {
+
+       case RINGBUF_TYPE_PADDING:
+               break;
+
+       case RINGBUF_TYPE_TIME_EXTEND:
+               event->len =
+                       (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
+                       >> RB_ALIGNMENT_SHIFT;
+               break;
+
+       case RINGBUF_TYPE_TIME_STAMP:
+               event->len =
+                       (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
+                       >> RB_ALIGNMENT_SHIFT;
+               break;
+
+       case RINGBUF_TYPE_DATA:
+               length -= RB_EVNT_HDR_SIZE;
+               if (length > RB_MAX_SMALL_DATA) {
+                       event->len = 0;
+                       event->array[0] = length;
+               } else
+                       event->len =
+                               (length + (RB_ALIGNMENT-1))
+                               >> RB_ALIGNMENT_SHIFT;
+               break;
+       default:
+               BUG();
+       }
+}
+
+static inline unsigned rb_calculate_event_length(unsigned length)
+{
+       struct ring_buffer_event event; /* Used only for sizeof array */
+
+       /* zero length can cause confusions */
+       if (!length)
+               length = 1;
+
+       if (length > RB_MAX_SMALL_DATA)
+               length += sizeof(event.array[0]);
+
+       length += RB_EVNT_HDR_SIZE;
+       length = ALIGN(length, RB_ALIGNMENT);
+
+       return length;
+}
+
+static struct ring_buffer_event *
+__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
+                 unsigned type, unsigned long length, u64 *ts)
+{
+       struct buffer_page *tail_page, *head_page, *reader_page;
+       unsigned long tail, write;
+       struct ring_buffer *buffer = cpu_buffer->buffer;
+       struct ring_buffer_event *event;
+       unsigned long flags;
+
+       tail_page = cpu_buffer->tail_page;
+       write = local_add_return(length, &tail_page->write);
+       tail = write - length;
+
+       /* See if we shot pass the end of this buffer page */
+       if (write > BUF_PAGE_SIZE) {
+               struct buffer_page *next_page = tail_page;
+
+               spin_lock_irqsave(&cpu_buffer->lock, flags);
+
+               rb_inc_page(cpu_buffer, &next_page);
+
+               head_page = cpu_buffer->head_page;
+               reader_page = cpu_buffer->reader_page;
+
+               /* we grabbed the lock before incrementing */
+               RB_WARN_ON(cpu_buffer, next_page == reader_page);
+
+               /*
+                * If for some reason, we had an interrupt storm that made
+                * it all the way around the buffer, bail, and warn
+                * about it.
+                */
+               if (unlikely(next_page == cpu_buffer->commit_page)) {
+                       WARN_ON_ONCE(1);
+                       goto out_unlock;
+               }
+
+               if (next_page == head_page) {
+                       if (!(buffer->flags & RB_FL_OVERWRITE)) {
+                               /* reset write */
+                               if (tail <= BUF_PAGE_SIZE)
+                                       local_set(&tail_page->write, tail);
+                               goto out_unlock;
+                       }
+
+                       /* tail_page has not moved yet? */
+                       if (tail_page == cpu_buffer->tail_page) {
+                               /* count overflows */
+                               rb_update_overflow(cpu_buffer);
+
+                               rb_inc_page(cpu_buffer, &head_page);
+                               cpu_buffer->head_page = head_page;
+                               cpu_buffer->head_page->read = 0;
+                       }
+               }
+
+               /*
+                * If the tail page is still the same as what we think
+                * it is, then it is up to us to update the tail
+                * pointer.
+                */
+               if (tail_page == cpu_buffer->tail_page) {
+                       local_set(&next_page->write, 0);
+                       local_set(&next_page->commit, 0);
+                       cpu_buffer->tail_page = next_page;
+
+                       /* reread the time stamp */
+                       *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
+                       cpu_buffer->tail_page->time_stamp = *ts;
+               }
+
+               /*
+                * The actual tail page has moved forward.
+                */
+               if (tail < BUF_PAGE_SIZE) {
+                       /* Mark the rest of the page with padding */
+                       event = __rb_page_index(tail_page, tail);
+                       event->type = RINGBUF_TYPE_PADDING;
+               }
+
+               if (tail <= BUF_PAGE_SIZE)
+                       /* Set the write back to the previous setting */
+                       local_set(&tail_page->write, tail);
+
+               /*
+                * If this was a commit entry that failed,
+                * increment that too
+                */
+               if (tail_page == cpu_buffer->commit_page &&
+                   tail == rb_commit_index(cpu_buffer)) {
+                       rb_set_commit_to_write(cpu_buffer);
+               }
+
+               spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+
+               /* fail and let the caller try again */
+               return ERR_PTR(-EAGAIN);
+       }
+
+       /* We reserved something on the buffer */
+
+       BUG_ON(write > BUF_PAGE_SIZE);
+
+       event = __rb_page_index(tail_page, tail);
+       rb_update_event(event, type, length);
+
+       /*
+        * If this is a commit and the tail is zero, then update
+        * this page's time stamp.
+        */
+       if (!tail && rb_is_commit(cpu_buffer, event))
+               cpu_buffer->commit_page->time_stamp = *ts;
+
+       return event;
+
+ out_unlock:
+       spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+       return NULL;
+}
+
+static int
+rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+                 u64 *ts, u64 *delta)
+{
+       struct ring_buffer_event *event;
+       static int once;
+       int ret;
+
+       if (unlikely(*delta > (1ULL << 59) && !once++)) {
+               printk(KERN_WARNING "Delta way too big! %llu"
+                      " ts=%llu write stamp = %llu\n",
+                      *delta, *ts, cpu_buffer->write_stamp);
+               WARN_ON(1);
+       }
+
+       /*
+        * The delta is too big, we to add a
+        * new timestamp.
+        */
+       event = __rb_reserve_next(cpu_buffer,
+                                 RINGBUF_TYPE_TIME_EXTEND,
+                                 RB_LEN_TIME_EXTEND,
+                                 ts);
+       if (!event)
+               return -EBUSY;
+
+       if (PTR_ERR(event) == -EAGAIN)
+               return -EAGAIN;
+
+       /* Only a commited time event can update the write stamp */
+       if (rb_is_commit(cpu_buffer, event)) {
+               /*
+                * If this is the first on the page, then we need to
+                * update the page itself, and just put in a zero.
+                */
+               if (rb_event_index(event)) {
+                       event->time_delta = *delta & TS_MASK;
+                       event->array[0] = *delta >> TS_SHIFT;
+               } else {
+                       cpu_buffer->commit_page->time_stamp = *ts;
+                       event->time_delta = 0;
+                       event->array[0] = 0;
+               }
+               cpu_buffer->write_stamp = *ts;
+               /* let the caller know this was the commit */
+               ret = 1;
+       } else {
+               /* Darn, this is just wasted space */
+               event->time_delta = 0;
+               event->array[0] = 0;
+               ret = 0;
+       }
+
+       *delta = 0;
+
+       return ret;
+}
+
+static struct ring_buffer_event *
+rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
+                     unsigned type, unsigned long length)
+{
+       struct ring_buffer_event *event;
+       u64 ts, delta;
+       int commit = 0;
+
+ again:
+       ts = ring_buffer_time_stamp(cpu_buffer->cpu);
+
+       /*
+        * Only the first commit can update the timestamp.
+        * Yes there is a race here. If an interrupt comes in
+        * just after the conditional and it traces too, then it
+        * will also check the deltas. More than one timestamp may
+        * also be made. But only the entry that did the actual
+        * commit will be something other than zero.
+        */
+       if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
+           rb_page_write(cpu_buffer->tail_page) ==
+           rb_commit_index(cpu_buffer)) {
+
+               delta = ts - cpu_buffer->write_stamp;
+
+               /* make sure this delta is calculated here */
+               barrier();
+
+               /* Did the write stamp get updated already? */
+               if (unlikely(ts < cpu_buffer->write_stamp))
+                       goto again;
+
+               if (test_time_stamp(delta)) {
+
+                       commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
+
+                       if (commit == -EBUSY)
+                               return NULL;
+
+                       if (commit == -EAGAIN)
+                               goto again;
+
+                       RB_WARN_ON(cpu_buffer, commit < 0);
+               }
+       } else
+               /* Non commits have zero deltas */
+               delta = 0;
+
+       event = __rb_reserve_next(cpu_buffer, type, length, &ts);
+       if (PTR_ERR(event) == -EAGAIN)
+               goto again;
+
+       if (!event) {
+               if (unlikely(commit))
+                       /*
+                        * Ouch! We needed a timestamp and it was commited. But
+                        * we didn't get our event reserved.
+                        */
+                       rb_set_commit_to_write(cpu_buffer);
+               return NULL;
+       }
+
+       /*
+        * If the timestamp was commited, make the commit our entry
+        * now so that we will update it when needed.
+        */
+       if (commit)
+               rb_set_commit_event(cpu_buffer, event);
+       else if (!rb_is_commit(cpu_buffer, event))
+               delta = 0;
+
+       event->time_delta = delta;
+
+       return event;
+}
+
+static DEFINE_PER_CPU(int, rb_need_resched);
+
+/**
+ * ring_buffer_lock_reserve - reserve a part of the buffer
+ * @buffer: the ring buffer to reserve from
+ * @length: the length of the data to reserve (excluding event header)
+ * @flags: a pointer to save the interrupt flags
+ *
+ * Returns a reseverd event on the ring buffer to copy directly to.
+ * The user of this interface will need to get the body to write into
+ * and can use the ring_buffer_event_data() interface.
+ *
+ * The length is the length of the data needed, not the event length
+ * which also includes the event header.
+ *
+ * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
+ * If NULL is returned, then nothing has been allocated or locked.
+ */
+struct ring_buffer_event *
+ring_buffer_lock_reserve(struct ring_buffer *buffer,
+                        unsigned long length,
+                        unsigned long *flags)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       struct ring_buffer_event *event;
+       int cpu, resched;
+
+       if (atomic_read(&buffer->record_disabled))
+               return NULL;
+
+       /* If we are tracing schedule, we don't want to recurse */
+       resched = need_resched();
+       preempt_disable_notrace();
+
+       cpu = raw_smp_processor_id();
+
+       if (!cpu_isset(cpu, buffer->cpumask))
+               goto out;
+
+       cpu_buffer = buffer->buffers[cpu];
+
+       if (atomic_read(&cpu_buffer->record_disabled))
+               goto out;
+
+       length = rb_calculate_event_length(length);
+       if (length > BUF_PAGE_SIZE)
+               goto out;
+
+       event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
+       if (!event)
+               goto out;
+
+       /*
+        * Need to store resched state on this cpu.
+        * Only the first needs to.
+        */
+
+       if (preempt_count() == 1)
+               per_cpu(rb_need_resched, cpu) = resched;
+
+       return event;
+
+ out:
+       if (resched)
+               preempt_enable_notrace();
+       else
+               preempt_enable_notrace();
+       return NULL;
+}
+
+static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
+                     struct ring_buffer_event *event)
+{
+       cpu_buffer->entries++;
+
+       /* Only process further if we own the commit */
+       if (!rb_is_commit(cpu_buffer, event))
+               return;
+
+       cpu_buffer->write_stamp += event->time_delta;
+
+       rb_set_commit_to_write(cpu_buffer);
+}
+
+/**
+ * ring_buffer_unlock_commit - commit a reserved
+ * @buffer: The buffer to commit to
+ * @event: The event pointer to commit.
+ * @flags: the interrupt flags received from ring_buffer_lock_reserve.
+ *
+ * This commits the data to the ring buffer, and releases any locks held.
+ *
+ * Must be paired with ring_buffer_lock_reserve.
+ */
+int ring_buffer_unlock_commit(struct ring_buffer *buffer,
+                             struct ring_buffer_event *event,
+                             unsigned long flags)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       int cpu = raw_smp_processor_id();
+
+       cpu_buffer = buffer->buffers[cpu];
+
+       rb_commit(cpu_buffer, event);
+
+       /*
+        * Only the last preempt count needs to restore preemption.
+        */
+       if (preempt_count() == 1) {
+               if (per_cpu(rb_need_resched, cpu))
+                       preempt_enable_no_resched_notrace();
+               else
+                       preempt_enable_notrace();
+       } else
+               preempt_enable_no_resched_notrace();
+
+       return 0;
+}
+
+/**
+ * ring_buffer_write - write data to the buffer without reserving
+ * @buffer: The ring buffer to write to.
+ * @length: The length of the data being written (excluding the event header)
+ * @data: The data to write to the buffer.
+ *
+ * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
+ * one function. If you already have the data to write to the buffer, it
+ * may be easier to simply call this function.
+ *
+ * Note, like ring_buffer_lock_reserve, the length is the length of the data
+ * and not the length of the event which would hold the header.
+ */
+int ring_buffer_write(struct ring_buffer *buffer,
+                       unsigned long length,
+                       void *data)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       struct ring_buffer_event *event;
+       unsigned long event_length;
+       void *body;
+       int ret = -EBUSY;
+       int cpu, resched;
+
+       if (atomic_read(&buffer->record_disabled))
+               return -EBUSY;
+
+       resched = need_resched();
+       preempt_disable_notrace();
+
+       cpu = raw_smp_processor_id();
+
+       if (!cpu_isset(cpu, buffer->cpumask))
+               goto out;
+
+       cpu_buffer = buffer->buffers[cpu];
+
+       if (atomic_read(&cpu_buffer->record_disabled))
+               goto out;
+
+       event_length = rb_calculate_event_length(length);
+       event = rb_reserve_next_event(cpu_buffer,
+                                     RINGBUF_TYPE_DATA, event_length);
+       if (!event)
+               goto out;
+
+       body = rb_event_data(event);
+
+       memcpy(body, data, length);
+
+       rb_commit(cpu_buffer, event);
+
+       ret = 0;
+ out:
+       if (resched)
+               preempt_enable_no_resched_notrace();
+       else
+               preempt_enable_notrace();
+
+       return ret;
+}
+
+static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+{
+       struct buffer_page *reader = cpu_buffer->reader_page;
+       struct buffer_page *head = cpu_buffer->head_page;
+       struct buffer_page *commit = cpu_buffer->commit_page;
+
+       return reader->read == rb_page_commit(reader) &&
+               (commit == reader ||
+                (commit == head &&
+                 head->read == rb_page_commit(commit)));
+}
+
+/**
+ * ring_buffer_record_disable - stop all writes into the buffer
+ * @buffer: The ring buffer to stop writes to.
+ *
+ * This prevents all writes to the buffer. Any attempt to write
+ * to the buffer after this will fail and return NULL.
+ *
+ * The caller should call synchronize_sched() after this.
+ */
+void ring_buffer_record_disable(struct ring_buffer *buffer)
+{
+       atomic_inc(&buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_record_enable - enable writes to the buffer
+ * @buffer: The ring buffer to enable writes
+ *
+ * Note, multiple disables will need the same number of enables
+ * to truely enable the writing (much like preempt_disable).
+ */
+void ring_buffer_record_enable(struct ring_buffer *buffer)
+{
+       atomic_dec(&buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
+ * @buffer: The ring buffer to stop writes to.
+ * @cpu: The CPU buffer to stop
+ *
+ * This prevents all writes to the buffer. Any attempt to write
+ * to the buffer after this will fail and return NULL.
+ *
+ * The caller should call synchronize_sched() after this.
+ */
+void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+
+       if (!cpu_isset(cpu, buffer->cpumask))
+               return;
+
+       cpu_buffer = buffer->buffers[cpu];
+       atomic_inc(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_record_enable_cpu - enable writes to the buffer
+ * @buffer: The ring buffer to enable writes
+ * @cpu: The CPU to enable.
+ *
+ * Note, multiple disables will need the same number of enables
+ * to truely enable the writing (much like preempt_disable).
+ */
+void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+
+       if (!cpu_isset(cpu, buffer->cpumask))
+               return;
+
+       cpu_buffer = buffer->buffers[cpu];
+       atomic_dec(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the entries from.
+ */
+unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+
+       if (!cpu_isset(cpu, buffer->cpumask))
+               return 0;
+
+       cpu_buffer = buffer->buffers[cpu];
+       return cpu_buffer->entries;
+}
+
+/**
+ * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+
+       if (!cpu_isset(cpu, buffer->cpumask))
+               return 0;
+
+       cpu_buffer = buffer->buffers[cpu];
+       return cpu_buffer->overrun;
+}
+
+/**
+ * ring_buffer_entries - get the number of entries in a buffer
+ * @buffer: The ring buffer
+ *
+ * Returns the total number of entries in the ring buffer
+ * (all CPU entries)
+ */
+unsigned long ring_buffer_entries(struct ring_buffer *buffer)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       unsigned long entries = 0;
+       int cpu;
+
+       /* if you care about this being correct, lock the buffer */
+       for_each_buffer_cpu(buffer, cpu) {
+               cpu_buffer = buffer->buffers[cpu];
+               entries += cpu_buffer->entries;
+       }
+
+       return entries;
+}
+
+/**
+ * ring_buffer_overrun_cpu - get the number of overruns in buffer
+ * @buffer: The ring buffer
+ *
+ * Returns the total number of overruns in the ring buffer
+ * (all CPU entries)
+ */
+unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       unsigned long overruns = 0;
+       int cpu;
+
+       /* if you care about this being correct, lock the buffer */
+       for_each_buffer_cpu(buffer, cpu) {
+               cpu_buffer = buffer->buffers[cpu];
+               overruns += cpu_buffer->overrun;
+       }
+
+       return overruns;
+}
+
+/**
+ * ring_buffer_iter_reset - reset an iterator
+ * @iter: The iterator to reset
+ *
+ * Resets the iterator, so that it will start from the beginning
+ * again.
+ */
+void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
+{
+       struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+       /* Iterator usage is expected to have record disabled */
+       if (list_empty(&cpu_buffer->reader_page->list)) {
+               iter->head_page = cpu_buffer->head_page;
+               iter->head = cpu_buffer->head_page->read;
+       } else {
+               iter->head_page = cpu_buffer->reader_page;
+               iter->head = cpu_buffer->reader_page->read;
+       }
+       if (iter->head)
+               iter->read_stamp = cpu_buffer->read_stamp;
+       else
+               iter->read_stamp = iter->head_page->time_stamp;
+}
+
+/**
+ * ring_buffer_iter_empty - check if an iterator has no more to read
+ * @iter: The iterator to check
+ */
+int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+
+       cpu_buffer = iter->cpu_buffer;
+
+       return iter->head_page == cpu_buffer->commit_page &&
+               iter->head == rb_commit_index(cpu_buffer);
+}
+
+static void
+rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+                    struct ring_buffer_event *event)
+{
+       u64 delta;
+
+       switch (event->type) {
+       case RINGBUF_TYPE_PADDING:
+               return;
+
+       case RINGBUF_TYPE_TIME_EXTEND:
+               delta = event->array[0];
+               delta <<= TS_SHIFT;
+               delta += event->time_delta;
+               cpu_buffer->read_stamp += delta;
+               return;
+
+       case RINGBUF_TYPE_TIME_STAMP:
+               /* FIXME: not implemented */
+               return;
+
+       case RINGBUF_TYPE_DATA:
+               cpu_buffer->read_stamp += event->time_delta;
+               return;
+
+       default:
+               BUG();
+       }
+       return;
+}
+
+static void
+rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
+                         struct ring_buffer_event *event)
+{
+       u64 delta;
+
+       switch (event->type) {
+       case RINGBUF_TYPE_PADDING:
+               return;
+
+       case RINGBUF_TYPE_TIME_EXTEND:
+               delta = event->array[0];
+               delta <<= TS_SHIFT;
+               delta += event->time_delta;
+               iter->read_stamp += delta;
+               return;
+
+       case RINGBUF_TYPE_TIME_STAMP:
+               /* FIXME: not implemented */
+               return;
+
+       case RINGBUF_TYPE_DATA:
+               iter->read_stamp += event->time_delta;
+               return;
+
+       default:
+               BUG();
+       }
+       return;
+}
+
+static struct buffer_page *
+rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+       struct buffer_page *reader = NULL;
+       unsigned long flags;
+
+       spin_lock_irqsave(&cpu_buffer->lock, flags);
+
+ again:
+       reader = cpu_buffer->reader_page;
+
+       /* If there's more to read, return this page */
+       if (cpu_buffer->reader_page->read < rb_page_size(reader))
+               goto out;
+
+       /* Never should we have an index greater than the size */
+       RB_WARN_ON(cpu_buffer,
+                  cpu_buffer->reader_page->read > rb_page_size(reader));
+
+       /* check if we caught up to the tail */
+       reader = NULL;
+       if (cpu_buffer->commit_page == cpu_buffer->reader_page)
+               goto out;
+
+       /*
+        * Splice the empty reader page into the list around the head.
+        * Reset the reader page to size zero.
+        */
+
+       reader = cpu_buffer->head_page;
+       cpu_buffer->reader_page->list.next = reader->list.next;
+       cpu_buffer->reader_page->list.prev = reader->list.prev;
+
+       local_set(&cpu_buffer->reader_page->write, 0);
+       local_set(&cpu_buffer->reader_page->commit, 0);
+
+       /* Make the reader page now replace the head */
+       reader->list.prev->next = &cpu_buffer->reader_page->list;
+       reader->list.next->prev = &cpu_buffer->reader_page->list;
+
+       /*
+        * If the tail is on the reader, then we must set the head
+        * to the inserted page, otherwise we set it one before.
+        */
+       cpu_buffer->head_page = cpu_buffer->reader_page;
+
+       if (cpu_buffer->commit_page != reader)
+               rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
+
+       /* Finally update the reader page to the new head */
+       cpu_buffer->reader_page = reader;
+       rb_reset_reader_page(cpu_buffer);
+
+       goto again;
+
+ out:
+       spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+
+       return reader;
+}
+
+static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
+{
+       struct ring_buffer_event *event;
+       struct buffer_page *reader;
+       unsigned length;
+
+       reader = rb_get_reader_page(cpu_buffer);
+
+       /* This function should not be called when buffer is empty */
+       BUG_ON(!reader);
+
+       event = rb_reader_event(cpu_buffer);
+
+       if (event->type == RINGBUF_TYPE_DATA)
+               cpu_buffer->entries--;
+
+       rb_update_read_stamp(cpu_buffer, event);
+
+       length = rb_event_length(event);
+       cpu_buffer->reader_page->read += length;
+}
+
+static void rb_advance_iter(struct ring_buffer_iter *iter)
+{
+       struct ring_buffer *buffer;
+       struct ring_buffer_per_cpu *cpu_buffer;
+       struct ring_buffer_event *event;
+       unsigned length;
+
+       cpu_buffer = iter->cpu_buffer;
+       buffer = cpu_buffer->buffer;
+
+       /*
+        * Check if we are at the end of the buffer.
+        */
+       if (iter->head >= rb_page_size(iter->head_page)) {
+               BUG_ON(iter->head_page == cpu_buffer->commit_page);
+               rb_inc_iter(iter);
+               return;
+       }
+
+       event = rb_iter_head_event(iter);
+
+       length = rb_event_length(event);
+
+       /*
+        * This should not be called to advance the header if we are
+        * at the tail of the buffer.
+        */
+       BUG_ON((iter->head_page == cpu_buffer->commit_page) &&
+              (iter->head + length > rb_commit_index(cpu_buffer)));
+
+       rb_update_iter_read_stamp(iter, event);
+
+       iter->head += length;
+
+       /* check for end of page padding */
+       if ((iter->head >= rb_page_size(iter->head_page)) &&
+           (iter->head_page != cpu_buffer->commit_page))
+               rb_advance_iter(iter);
+}
+
+/**
+ * ring_buffer_peek - peek at the next event to be read
+ * @buffer: The ring buffer to read
+ * @cpu: The cpu to peak at
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not consume the data.
+ */
+struct ring_buffer_event *
+ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       struct ring_buffer_event *event;
+       struct buffer_page *reader;
+
+       if (!cpu_isset(cpu, buffer->cpumask))
+               return NULL;
+
+       cpu_buffer = buffer->buffers[cpu];
+
+ again:
+       reader = rb_get_reader_page(cpu_buffer);
+       if (!reader)
+               return NULL;
+
+       event = rb_reader_event(cpu_buffer);
+
+       switch (event->type) {
+       case RINGBUF_TYPE_PADDING:
+               RB_WARN_ON(cpu_buffer, 1);
+               rb_advance_reader(cpu_buffer);
+               return NULL;
+
+       case RINGBUF_TYPE_TIME_EXTEND:
+               /* Internal data, OK to advance */
+               rb_advance_reader(cpu_buffer);
+               goto again;
+
+       case RINGBUF_TYPE_TIME_STAMP:
+               /* FIXME: not implemented */
+               rb_advance_reader(cpu_buffer);
+               goto again;
+
+       case RINGBUF_TYPE_DATA:
+               if (ts) {
+                       *ts = cpu_buffer->read_stamp + event->time_delta;
+                       ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
+               }
+               return event;
+
+       default:
+               BUG();
+       }
+
+       return NULL;
+}
+
+/**
+ * ring_buffer_iter_peek - peek at the next event to be read
+ * @iter: The ring buffer iterator
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not increment the iterator.
+ */
+struct ring_buffer_event *
+ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
+{
+       struct ring_buffer *buffer;
+       struct ring_buffer_per_cpu *cpu_buffer;
+       struct ring_buffer_event *event;
+
+       if (ring_buffer_iter_empty(iter))
+               return NULL;
+
+       cpu_buffer = iter->cpu_buffer;
+       buffer = cpu_buffer->buffer;
+
+ again:
+       if (rb_per_cpu_empty(cpu_buffer))
+               return NULL;
+
+       event = rb_iter_head_event(iter);
+
+       switch (event->type) {
+       case RINGBUF_TYPE_PADDING:
+               rb_inc_iter(iter);
+               goto again;
+
+       case RINGBUF_TYPE_TIME_EXTEND:
+               /* Internal data, OK to advance */
+               rb_advance_iter(iter);
+               goto again;
+
+       case RINGBUF_TYPE_TIME_STAMP:
+               /* FIXME: not implemented */
+               rb_advance_iter(iter);
+               goto again;
+
+       case RINGBUF_TYPE_DATA:
+               if (ts) {
+                       *ts = iter->read_stamp + event->time_delta;
+                       ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
+               }
+               return event;
+
+       default:
+               BUG();
+       }
+
+       return NULL;
+}
+
+/**
+ * ring_buffer_consume - return an event and consume it
+ * @buffer: The ring buffer to get the next event from
+ *
+ * Returns the next event in the ring buffer, and that event is consumed.
+ * Meaning, that sequential reads will keep returning a different event,
+ * and eventually empty the ring buffer if the producer is slower.
+ */
+struct ring_buffer_event *
+ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       struct ring_buffer_event *event;
+
+       if (!cpu_isset(cpu, buffer->cpumask))
+               return NULL;
+
+       event = ring_buffer_peek(buffer, cpu, ts);
+       if (!event)
+               return NULL;
+
+       cpu_buffer = buffer->buffers[cpu];
+       rb_advance_reader(cpu_buffer);
+
+       return event;
+}
+
+/**
+ * ring_buffer_read_start - start a non consuming read of the buffer
+ * @buffer: The ring buffer to read from
+ * @cpu: The cpu buffer to iterate over
+ *
+ * This starts up an iteration through the buffer. It also disables
+ * the recording to the buffer until the reading is finished.
+ * This prevents the reading from being corrupted. This is not
+ * a consuming read, so a producer is not expected.
+ *
+ * Must be paired with ring_buffer_finish.
+ */
+struct ring_buffer_iter *
+ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       struct ring_buffer_iter *iter;
+       unsigned long flags;
+
+       if (!cpu_isset(cpu, buffer->cpumask))
+               return NULL;
+
+       iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+       if (!iter)
+               return NULL;
+
+       cpu_buffer = buffer->buffers[cpu];
+
+       iter->cpu_buffer = cpu_buffer;
+
+       atomic_inc(&cpu_buffer->record_disabled);
+       synchronize_sched();
+
+       spin_lock_irqsave(&cpu_buffer->lock, flags);
+       ring_buffer_iter_reset(iter);
+       spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+
+       return iter;
+}
+
+/**
+ * ring_buffer_finish - finish reading the iterator of the buffer
+ * @iter: The iterator retrieved by ring_buffer_start
+ *
+ * This re-enables the recording to the buffer, and frees the
+ * iterator.
+ */
+void
+ring_buffer_read_finish(struct ring_buffer_iter *iter)
+{
+       struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+       atomic_dec(&cpu_buffer->record_disabled);
+       kfree(iter);
+}
+
+/**
+ * ring_buffer_read - read the next item in the ring buffer by the iterator
+ * @iter: The ring buffer iterator
+ * @ts: The time stamp of the event read.
+ *
+ * This reads the next event in the ring buffer and increments the iterator.
+ */
+struct ring_buffer_event *
+ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
+{
+       struct ring_buffer_event *event;
+
+       event = ring_buffer_iter_peek(iter, ts);
+       if (!event)
+               return NULL;
+
+       rb_advance_iter(iter);
+
+       return event;
+}
+
+/**
+ * ring_buffer_size - return the size of the ring buffer (in bytes)
+ * @buffer: The ring buffer.
+ */
+unsigned long ring_buffer_size(struct ring_buffer *buffer)
+{
+       return BUF_PAGE_SIZE * buffer->pages;
+}
+
+static void
+rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
+{
+       cpu_buffer->head_page
+               = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+       local_set(&cpu_buffer->head_page->write, 0);
+       local_set(&cpu_buffer->head_page->commit, 0);
+
+       cpu_buffer->head_page->read = 0;
+
+       cpu_buffer->tail_page = cpu_buffer->head_page;
+       cpu_buffer->commit_page = cpu_buffer->head_page;
+
+       INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+       local_set(&cpu_buffer->reader_page->write, 0);
+       local_set(&cpu_buffer->reader_page->commit, 0);
+       cpu_buffer->reader_page->read = 0;
+
+       cpu_buffer->overrun = 0;
+       cpu_buffer->entries = 0;
+}
+
+/**
+ * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
+ * @buffer: The ring buffer to reset a per cpu buffer of
+ * @cpu: The CPU buffer to be reset
+ */
+void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
+{
+       struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+       unsigned long flags;
+
+       if (!cpu_isset(cpu, buffer->cpumask))
+               return;
+
+       spin_lock_irqsave(&cpu_buffer->lock, flags);
+
+       rb_reset_cpu(cpu_buffer);
+
+       spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+}
+
+/**
+ * ring_buffer_reset - reset a ring buffer
+ * @buffer: The ring buffer to reset all cpu buffers
+ */
+void ring_buffer_reset(struct ring_buffer *buffer)
+{
+       int cpu;
+
+       for_each_buffer_cpu(buffer, cpu)
+               ring_buffer_reset_cpu(buffer, cpu);
+}
+
+/**
+ * rind_buffer_empty - is the ring buffer empty?
+ * @buffer: The ring buffer to test
+ */
+int ring_buffer_empty(struct ring_buffer *buffer)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       int cpu;
+
+       /* yes this is racy, but if you don't like the race, lock the buffer */
+       for_each_buffer_cpu(buffer, cpu) {
+               cpu_buffer = buffer->buffers[cpu];
+               if (!rb_per_cpu_empty(cpu_buffer))
+                       return 0;
+       }
+       return 1;
+}
+
+/**
+ * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
+ * @buffer: The ring buffer
+ * @cpu: The CPU buffer to test
+ */
+int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+
+       if (!cpu_isset(cpu, buffer->cpumask))
+               return 1;
+
+       cpu_buffer = buffer->buffers[cpu];
+       return rb_per_cpu_empty(cpu_buffer);
+}
+
+/**
+ * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
+ * @buffer_a: One buffer to swap with
+ * @buffer_b: The other buffer to swap with
+ *
+ * This function is useful for tracers that want to take a "snapshot"
+ * of a CPU buffer and has another back up buffer lying around.
+ * it is expected that the tracer handles the cpu buffer not being
+ * used at the moment.
+ */
+int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
+                        struct ring_buffer *buffer_b, int cpu)
+{
+       struct ring_buffer_per_cpu *cpu_buffer_a;
+       struct ring_buffer_per_cpu *cpu_buffer_b;
+
+       if (!cpu_isset(cpu, buffer_a->cpumask) ||
+           !cpu_isset(cpu, buffer_b->cpumask))
+               return -EINVAL;
+
+       /* At least make sure the two buffers are somewhat the same */
+       if (buffer_a->size != buffer_b->size ||
+           buffer_a->pages != buffer_b->pages)
+               return -EINVAL;
+
+       cpu_buffer_a = buffer_a->buffers[cpu];
+       cpu_buffer_b = buffer_b->buffers[cpu];
+
+       /*
+        * We can't do a synchronize_sched here because this
+        * function can be called in atomic context.
+        * Normally this will be called from the same CPU as cpu.
+        * If not it's up to the caller to protect this.
+        */
+       atomic_inc(&cpu_buffer_a->record_disabled);
+       atomic_inc(&cpu_buffer_b->record_disabled);
+
+       buffer_a->buffers[cpu] = cpu_buffer_b;
+       buffer_b->buffers[cpu] = cpu_buffer_a;
+
+       cpu_buffer_b->buffer = buffer_a;
+       cpu_buffer_a->buffer = buffer_b;
+
+       atomic_dec(&cpu_buffer_a->record_disabled);
+       atomic_dec(&cpu_buffer_b->record_disabled);
+
+       return 0;
+}
+
index 8f3fb3db61c39306bdbffd36303cfed7dab02420..d345d649d073a39d6e572b88047e726754e8da1f 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/utsrelease.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
+#include <linux/notifier.h>
 #include <linux/debugfs.h>
 #include <linux/pagemap.h>
 #include <linux/hardirq.h>
@@ -22,6 +23,7 @@
 #include <linux/ftrace.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
+#include <linux/kdebug.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/poll.h>
 #include <linux/writeback.h>
 
 #include <linux/stacktrace.h>
+#include <linux/ring_buffer.h>
 
 #include "trace.h"
 
+#define TRACE_BUFFER_FLAGS     (RB_FL_OVERWRITE)
+
 unsigned long __read_mostly    tracing_max_latency = (cycle_t)ULONG_MAX;
 unsigned long __read_mostly    tracing_thresh;
 
-static unsigned long __read_mostly     tracing_nr_buffers;
+static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
+
+static inline void ftrace_disable_cpu(void)
+{
+       preempt_disable();
+       local_inc(&__get_cpu_var(ftrace_cpu_disabled));
+}
+
+static inline void ftrace_enable_cpu(void)
+{
+       local_dec(&__get_cpu_var(ftrace_cpu_disabled));
+       preempt_enable();
+}
+
 static cpumask_t __read_mostly         tracing_buffer_mask;
 
 #define for_each_tracing_cpu(cpu)      \
        for_each_cpu_mask(cpu, tracing_buffer_mask)
 
-static int trace_alloc_page(void);
-static int trace_free_page(void);
-
 static int tracing_disabled = 1;
 
-static unsigned long tracing_pages_allocated;
-
 long
 ns2usecs(cycle_t nsec)
 {
@@ -60,7 +73,9 @@ ns2usecs(cycle_t nsec)
 
 cycle_t ftrace_now(int cpu)
 {
-       return cpu_clock(cpu);
+       u64 ts = ring_buffer_time_stamp(cpu);
+       ring_buffer_normalize_time_stamp(cpu, &ts);
+       return ts;
 }
 
 /*
@@ -100,11 +115,18 @@ static int                        tracer_enabled = 1;
 int                            ftrace_function_enabled;
 
 /*
- * trace_nr_entries is the number of entries that is allocated
- * for a buffer. Note, the number of entries is always rounded
- * to ENTRIES_PER_PAGE.
+ * trace_buf_size is the size in bytes that is allocated
+ * for a buffer. Note, the number of bytes is always rounded
+ * to page size.
+ *
+ * This number is purposely set to a low number of 16384.
+ * If the dump on oops happens, it will be much appreciated
+ * to not have to wait for all that output. Anyway this can be
+ * boot time and run time configurable.
  */
-static unsigned long           trace_nr_entries = 65536UL;
+#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
+
+static unsigned long           trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 
 /* trace_types holds a link list of available tracers. */
 static struct tracer           *trace_types __read_mostly;
@@ -133,24 +155,6 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
 /* trace_flags holds iter_ctrl options */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
 
-static notrace void no_trace_init(struct trace_array *tr)
-{
-       int cpu;
-
-       ftrace_function_enabled = 0;
-       if(tr->ctrl)
-               for_each_online_cpu(cpu)
-                       tracing_reset(tr->data[cpu]);
-       tracer_enabled = 0;
-}
-
-/* dummy trace to disable tracing */
-static struct tracer no_tracer __read_mostly = {
-       .name           = "none",
-       .init           = no_trace_init
-};
-
-
 /**
  * trace_wake_up - wake up tasks waiting for trace input
  *
@@ -167,44 +171,27 @@ void trace_wake_up(void)
                wake_up(&trace_wait);
 }
 
-#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
-
-static int __init set_nr_entries(char *str)
+static int __init set_buf_size(char *str)
 {
-       unsigned long nr_entries;
+       unsigned long buf_size;
        int ret;
 
        if (!str)
                return 0;
-       ret = strict_strtoul(str, 0, &nr_entries);
+       ret = strict_strtoul(str, 0, &buf_size);
        /* nr_entries can not be zero */
-       if (ret < 0 || nr_entries == 0)
+       if (ret < 0 || buf_size == 0)
                return 0;
-       trace_nr_entries = nr_entries;
+       trace_buf_size = buf_size;
        return 1;
 }
-__setup("trace_entries=", set_nr_entries);
+__setup("trace_buf_size=", set_buf_size);
 
 unsigned long nsecs_to_usecs(unsigned long nsecs)
 {
        return nsecs / 1000;
 }
 
-/*
- * trace_flag_type is an enumeration that holds different
- * states when a trace occurs. These are:
- *  IRQS_OFF   - interrupts were disabled
- *  NEED_RESCED - reschedule is requested
- *  HARDIRQ    - inside an interrupt handler
- *  SOFTIRQ    - inside a softirq handler
- */
-enum trace_flag_type {
-       TRACE_FLAG_IRQS_OFF             = 0x01,
-       TRACE_FLAG_NEED_RESCHED         = 0x02,
-       TRACE_FLAG_HARDIRQ              = 0x04,
-       TRACE_FLAG_SOFTIRQ              = 0x08,
-};
-
 /*
  * TRACE_ITER_SYM_MASK masks the options in trace_flags that
  * control the output of kernel symbols.
@@ -224,6 +211,7 @@ static const char *trace_options[] = {
        "block",
        "stacktrace",
        "sched-tree",
+       "ftrace_printk",
        NULL
 };
 
@@ -266,54 +254,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
        tracing_record_cmdline(current);
 }
 
-#define CHECK_COND(cond)                       \
-       if (unlikely(cond)) {                   \
-               tracing_disabled = 1;           \
-               WARN_ON(1);                     \
-               return -1;                      \
-       }
-
-/**
- * check_pages - integrity check of trace buffers
- *
- * As a safty measure we check to make sure the data pages have not
- * been corrupted.
- */
-int check_pages(struct trace_array_cpu *data)
-{
-       struct page *page, *tmp;
-
-       CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
-       CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
-
-       list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
-               CHECK_COND(page->lru.next->prev != &page->lru);
-               CHECK_COND(page->lru.prev->next != &page->lru);
-       }
-
-       return 0;
-}
-
-/**
- * head_page - page address of the first page in per_cpu buffer.
- *
- * head_page returns the page address of the first page in
- * a per_cpu buffer. This also preforms various consistency
- * checks to make sure the buffer has not been corrupted.
- */
-void *head_page(struct trace_array_cpu *data)
-{
-       struct page *page;
-
-       if (list_empty(&data->trace_pages))
-               return NULL;
-
-       page = list_entry(data->trace_pages.next, struct page, lru);
-       BUG_ON(&page->lru == &data->trace_pages);
-
-       return page_address(page);
-}
-
 /**
  * trace_seq_printf - sequence printing of trace information
  * @s: trace sequence descriptor
@@ -395,28 +335,23 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
        return len;
 }
 
-#define HEX_CHARS 17
-static const char hex2asc[] = "0123456789abcdef";
+#define MAX_MEMHEX_BYTES       8
+#define HEX_CHARS              (MAX_MEMHEX_BYTES*2 + 1)
 
 static int
 trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
 {
        unsigned char hex[HEX_CHARS];
        unsigned char *data = mem;
-       unsigned char byte;
        int i, j;
 
-       BUG_ON(len >= HEX_CHARS);
-
 #ifdef __BIG_ENDIAN
        for (i = 0, j = 0; i < len; i++) {
 #else
        for (i = len-1, j = 0; i >= 0; i--) {
 #endif
-               byte = data[i];
-
-               hex[j++] = hex2asc[byte & 0x0f];
-               hex[j++] = hex2asc[byte >> 4];
+               hex[j++] = hex_asc_hi(data[i]);
+               hex[j++] = hex_asc_lo(data[i]);
        }
        hex[j++] = ' ';
 
@@ -460,34 +395,6 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s)
        trace_seq_reset(s);
 }
 
-/*
- * flip the trace buffers between two trace descriptors.
- * This usually is the buffers between the global_trace and
- * the max_tr to record a snapshot of a current trace.
- *
- * The ftrace_max_lock must be held.
- */
-static void
-flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
-{
-       struct list_head flip_pages;
-
-       INIT_LIST_HEAD(&flip_pages);
-
-       memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
-               sizeof(struct trace_array_cpu) -
-               offsetof(struct trace_array_cpu, trace_head_idx));
-
-       check_pages(tr1);
-       check_pages(tr2);
-       list_splice_init(&tr1->trace_pages, &flip_pages);
-       list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
-       list_splice_init(&flip_pages, &tr2->trace_pages);
-       BUG_ON(!list_empty(&flip_pages));
-       check_pages(tr1);
-       check_pages(tr2);
-}
-
 /**
  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
  * @tr: tracer
@@ -500,17 +407,17 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
 void
 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
-       struct trace_array_cpu *data;
-       int i;
+       struct ring_buffer *buf = tr->buffer;
 
        WARN_ON_ONCE(!irqs_disabled());
        __raw_spin_lock(&ftrace_max_lock);
-       /* clear out all the previous traces */
-       for_each_tracing_cpu(i) {
-               data = tr->data[i];
-               flip_trace(max_tr.data[i], data);
-               tracing_reset(data);
-       }
+
+       tr->buffer = max_tr.buffer;
+       max_tr.buffer = buf;
+
+       ftrace_disable_cpu();
+       ring_buffer_reset(tr->buffer);
+       ftrace_enable_cpu();
 
        __update_max_tr(tr, tsk, cpu);
        __raw_spin_unlock(&ftrace_max_lock);
@@ -527,16 +434,19 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 void
 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
-       struct trace_array_cpu *data = tr->data[cpu];
-       int i;
+       int ret;
 
        WARN_ON_ONCE(!irqs_disabled());
        __raw_spin_lock(&ftrace_max_lock);
-       for_each_tracing_cpu(i)
-               tracing_reset(max_tr.data[i]);
 
-       flip_trace(max_tr.data[cpu], data);
-       tracing_reset(data);
+       ftrace_disable_cpu();
+
+       ring_buffer_reset(max_tr.buffer);
+       ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
+
+       ftrace_enable_cpu();
+
+       WARN_ON_ONCE(ret);
 
        __update_max_tr(tr, tsk, cpu);
        __raw_spin_unlock(&ftrace_max_lock);
@@ -573,7 +483,6 @@ int register_tracer(struct tracer *type)
 #ifdef CONFIG_FTRACE_STARTUP_TEST
        if (type->selftest) {
                struct tracer *saved_tracer = current_trace;
-               struct trace_array_cpu *data;
                struct trace_array *tr = &global_trace;
                int saved_ctrl = tr->ctrl;
                int i;
@@ -585,10 +494,7 @@ int register_tracer(struct tracer *type)
                 * If we fail, we do not register this tracer.
                 */
                for_each_tracing_cpu(i) {
-                       data = tr->data[i];
-                       if (!head_page(data))
-                               continue;
-                       tracing_reset(data);
+                       tracing_reset(tr, i);
                }
                current_trace = type;
                tr->ctrl = 0;
@@ -604,10 +510,7 @@ int register_tracer(struct tracer *type)
                }
                /* Only reset on passing, to avoid touching corrupted buffers */
                for_each_tracing_cpu(i) {
-                       data = tr->data[i];
-                       if (!head_page(data))
-                               continue;
-                       tracing_reset(data);
+                       tracing_reset(tr, i);
                }
                printk(KERN_CONT "PASSED\n");
        }
@@ -653,13 +556,11 @@ void unregister_tracer(struct tracer *type)
        mutex_unlock(&trace_types_lock);
 }
 
-void tracing_reset(struct trace_array_cpu *data)
+void tracing_reset(struct trace_array *tr, int cpu)
 {
-       data->trace_idx = 0;
-       data->overrun = 0;
-       data->trace_head = data->trace_tail = head_page(data);
-       data->trace_head_idx = 0;
-       data->trace_tail_idx = 0;
+       ftrace_disable_cpu();
+       ring_buffer_reset_cpu(tr->buffer, cpu);
+       ftrace_enable_cpu();
 }
 
 #define SAVED_CMDLINES 128
@@ -745,82 +646,16 @@ void tracing_record_cmdline(struct task_struct *tsk)
        trace_save_cmdline(tsk);
 }
 
-static inline struct list_head *
-trace_next_list(struct trace_array_cpu *data, struct list_head *next)
-{
-       /*
-        * Roundrobin - but skip the head (which is not a real page):
-        */
-       next = next->next;
-       if (unlikely(next == &data->trace_pages))
-               next = next->next;
-       BUG_ON(next == &data->trace_pages);
-
-       return next;
-}
-
-static inline void *
-trace_next_page(struct trace_array_cpu *data, void *addr)
-{
-       struct list_head *next;
-       struct page *page;
-
-       page = virt_to_page(addr);
-
-       next = trace_next_list(data, &page->lru);
-       page = list_entry(next, struct page, lru);
-
-       return page_address(page);
-}
-
-static inline struct trace_entry *
-tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
-{
-       unsigned long idx, idx_next;
-       struct trace_entry *entry;
-
-       data->trace_idx++;
-       idx = data->trace_head_idx;
-       idx_next = idx + 1;
-
-       BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
-
-       entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
-
-       if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
-               data->trace_head = trace_next_page(data, data->trace_head);
-               idx_next = 0;
-       }
-
-       if (data->trace_head == data->trace_tail &&
-           idx_next == data->trace_tail_idx) {
-               /* overrun */
-               data->overrun++;
-               data->trace_tail_idx++;
-               if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
-                       data->trace_tail =
-                               trace_next_page(data, data->trace_tail);
-                       data->trace_tail_idx = 0;
-               }
-       }
-
-       data->trace_head_idx = idx_next;
-
-       return entry;
-}
-
-static inline void
-tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
+void
+tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
+                            int pc)
 {
        struct task_struct *tsk = current;
-       unsigned long pc;
-
-       pc = preempt_count();
 
-       entry->preempt_count    = pc & 0xff;
-       entry->pid              = (tsk) ? tsk->pid : 0;
-       entry->t                = ftrace_now(raw_smp_processor_id());
-       entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
+       entry->preempt_count            = pc & 0xff;
+       entry->pid                      = (tsk) ? tsk->pid : 0;
+       entry->flags =
+               (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
                ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
                ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
                (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
@@ -828,145 +663,139 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 
 void
 trace_function(struct trace_array *tr, struct trace_array_cpu *data,
-              unsigned long ip, unsigned long parent_ip, unsigned long flags)
+              unsigned long ip, unsigned long parent_ip, unsigned long flags,
+              int pc)
 {
-       struct trace_entry *entry;
+       struct ring_buffer_event *event;
+       struct ftrace_entry *entry;
        unsigned long irq_flags;
 
-       raw_local_irq_save(irq_flags);
-       __raw_spin_lock(&data->lock);
-       entry                   = tracing_get_trace_entry(tr, data);
-       tracing_generic_entry_update(entry, flags);
-       entry->type             = TRACE_FN;
-       entry->fn.ip            = ip;
-       entry->fn.parent_ip     = parent_ip;
-       __raw_spin_unlock(&data->lock);
-       raw_local_irq_restore(irq_flags);
+       /* If we are reading the ring buffer, don't trace */
+       if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+               return;
+
+       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+                                        &irq_flags);
+       if (!event)
+               return;
+       entry   = ring_buffer_event_data(event);
+       tracing_generic_entry_update(&entry->ent, flags, pc);
+       entry->ent.type                 = TRACE_FN;
+       entry->ip                       = ip;
+       entry->parent_ip                = parent_ip;
+       ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 }
 
 void
 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
-       unsigned long ip, unsigned long parent_ip, unsigned long flags)
+       unsigned long ip, unsigned long parent_ip, unsigned long flags,
+       int pc)
 {
        if (likely(!atomic_read(&data->disabled)))
-               trace_function(tr, data, ip, parent_ip, flags);
+               trace_function(tr, data, ip, parent_ip, flags, pc);
 }
 
-#ifdef CONFIG_MMIOTRACE
-void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data,
-                                               struct mmiotrace_rw *rw)
+static void ftrace_trace_stack(struct trace_array *tr,
+                              struct trace_array_cpu *data,
+                              unsigned long flags,
+                              int skip, int pc)
 {
-       struct trace_entry *entry;
+       struct ring_buffer_event *event;
+       struct stack_entry *entry;
+       struct stack_trace trace;
        unsigned long irq_flags;
 
-       raw_local_irq_save(irq_flags);
-       __raw_spin_lock(&data->lock);
-
-       entry                   = tracing_get_trace_entry(tr, data);
-       tracing_generic_entry_update(entry, 0);
-       entry->type             = TRACE_MMIO_RW;
-       entry->mmiorw           = *rw;
-
-       __raw_spin_unlock(&data->lock);
-       raw_local_irq_restore(irq_flags);
-
-       trace_wake_up();
-}
-
-void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data,
-                                               struct mmiotrace_map *map)
-{
-       struct trace_entry *entry;
-       unsigned long irq_flags;
+       if (!(trace_flags & TRACE_ITER_STACKTRACE))
+               return;
 
-       raw_local_irq_save(irq_flags);
-       __raw_spin_lock(&data->lock);
+       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+                                        &irq_flags);
+       if (!event)
+               return;
+       entry   = ring_buffer_event_data(event);
+       tracing_generic_entry_update(&entry->ent, flags, pc);
+       entry->ent.type         = TRACE_STACK;
 
-       entry                   = tracing_get_trace_entry(tr, data);
-       tracing_generic_entry_update(entry, 0);
-       entry->type             = TRACE_MMIO_MAP;
-       entry->mmiomap          = *map;
+       memset(&entry->caller, 0, sizeof(entry->caller));
 
-       __raw_spin_unlock(&data->lock);
-       raw_local_irq_restore(irq_flags);
+       trace.nr_entries        = 0;
+       trace.max_entries       = FTRACE_STACK_ENTRIES;
+       trace.skip              = skip;
+       trace.entries           = entry->caller;
 
-       trace_wake_up();
+       save_stack_trace(&trace);
+       ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 }
-#endif
 
 void __trace_stack(struct trace_array *tr,
                   struct trace_array_cpu *data,
                   unsigned long flags,
                   int skip)
 {
-       struct trace_entry *entry;
-       struct stack_trace trace;
-
-       if (!(trace_flags & TRACE_ITER_STACKTRACE))
-               return;
-
-       entry                   = tracing_get_trace_entry(tr, data);
-       tracing_generic_entry_update(entry, flags);
-       entry->type             = TRACE_STACK;
-
-       memset(&entry->stack, 0, sizeof(entry->stack));
-
-       trace.nr_entries        = 0;
-       trace.max_entries       = FTRACE_STACK_ENTRIES;
-       trace.skip              = skip;
-       trace.entries           = entry->stack.caller;
-
-       save_stack_trace(&trace);
+       ftrace_trace_stack(tr, data, flags, skip, preempt_count());
 }
 
-void
-__trace_special(void *__tr, void *__data,
-               unsigned long arg1, unsigned long arg2, unsigned long arg3)
+static void
+ftrace_trace_special(void *__tr, void *__data,
+                    unsigned long arg1, unsigned long arg2, unsigned long arg3,
+                    int pc)
 {
+       struct ring_buffer_event *event;
        struct trace_array_cpu *data = __data;
        struct trace_array *tr = __tr;
-       struct trace_entry *entry;
+       struct special_entry *entry;
        unsigned long irq_flags;
 
-       raw_local_irq_save(irq_flags);
-       __raw_spin_lock(&data->lock);
-       entry                   = tracing_get_trace_entry(tr, data);
-       tracing_generic_entry_update(entry, 0);
-       entry->type             = TRACE_SPECIAL;
-       entry->special.arg1     = arg1;
-       entry->special.arg2     = arg2;
-       entry->special.arg3     = arg3;
-       __trace_stack(tr, data, irq_flags, 4);
-       __raw_spin_unlock(&data->lock);
-       raw_local_irq_restore(irq_flags);
+       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+                                        &irq_flags);
+       if (!event)
+               return;
+       entry   = ring_buffer_event_data(event);
+       tracing_generic_entry_update(&entry->ent, 0, pc);
+       entry->ent.type                 = TRACE_SPECIAL;
+       entry->arg1                     = arg1;
+       entry->arg2                     = arg2;
+       entry->arg3                     = arg3;
+       ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+       ftrace_trace_stack(tr, data, irq_flags, 4, pc);
 
        trace_wake_up();
 }
 
+void
+__trace_special(void *__tr, void *__data,
+               unsigned long arg1, unsigned long arg2, unsigned long arg3)
+{
+       ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
+}
+
 void
 tracing_sched_switch_trace(struct trace_array *tr,
                           struct trace_array_cpu *data,
                           struct task_struct *prev,
                           struct task_struct *next,
-                          unsigned long flags)
+                          unsigned long flags, int pc)
 {
-       struct trace_entry *entry;
+       struct ring_buffer_event *event;
+       struct ctx_switch_entry *entry;
        unsigned long irq_flags;
 
-       raw_local_irq_save(irq_flags);
-       __raw_spin_lock(&data->lock);
-       entry                   = tracing_get_trace_entry(tr, data);
-       tracing_generic_entry_update(entry, flags);
-       entry->type             = TRACE_CTX;
-       entry->ctx.prev_pid     = prev->pid;
-       entry->ctx.prev_prio    = prev->prio;
-       entry->ctx.prev_state   = prev->state;
-       entry->ctx.next_pid     = next->pid;
-       entry->ctx.next_prio    = next->prio;
-       entry->ctx.next_state   = next->state;
-       __trace_stack(tr, data, flags, 5);
-       __raw_spin_unlock(&data->lock);
-       raw_local_irq_restore(irq_flags);
+       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+                                          &irq_flags);
+       if (!event)
+               return;
+       entry   = ring_buffer_event_data(event);
+       tracing_generic_entry_update(&entry->ent, flags, pc);
+       entry->ent.type                 = TRACE_CTX;
+       entry->prev_pid                 = prev->pid;
+       entry->prev_prio                = prev->prio;
+       entry->prev_state               = prev->state;
+       entry->next_pid                 = next->pid;
+       entry->next_prio                = next->prio;
+       entry->next_state               = next->state;
+       entry->next_cpu = task_cpu(next);
+       ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+       ftrace_trace_stack(tr, data, flags, 5, pc);
 }
 
 void
@@ -974,25 +803,28 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
                           struct trace_array_cpu *data,
                           struct task_struct *wakee,
                           struct task_struct *curr,
-                          unsigned long flags)
+                          unsigned long flags, int pc)
 {
-       struct trace_entry *entry;
+       struct ring_buffer_event *event;
+       struct ctx_switch_entry *entry;
        unsigned long irq_flags;
 
-       raw_local_irq_save(irq_flags);
-       __raw_spin_lock(&data->lock);
-       entry                   = tracing_get_trace_entry(tr, data);
-       tracing_generic_entry_update(entry, flags);
-       entry->type             = TRACE_WAKE;
-       entry->ctx.prev_pid     = curr->pid;
-       entry->ctx.prev_prio    = curr->prio;
-       entry->ctx.prev_state   = curr->state;
-       entry->ctx.next_pid     = wakee->pid;
-       entry->ctx.next_prio    = wakee->prio;
-       entry->ctx.next_state   = wakee->state;
-       __trace_stack(tr, data, flags, 6);
-       __raw_spin_unlock(&data->lock);
-       raw_local_irq_restore(irq_flags);
+       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+                                          &irq_flags);
+       if (!event)
+               return;
+       entry   = ring_buffer_event_data(event);
+       tracing_generic_entry_update(&entry->ent, flags, pc);
+       entry->ent.type                 = TRACE_WAKE;
+       entry->prev_pid                 = curr->pid;
+       entry->prev_prio                = curr->prio;
+       entry->prev_state               = curr->state;
+       entry->next_pid                 = wakee->pid;
+       entry->next_prio                = wakee->prio;
+       entry->next_state               = wakee->state;
+       entry->next_cpu                 = task_cpu(wakee);
+       ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+       ftrace_trace_stack(tr, data, flags, 6, pc);
 
        trace_wake_up();
 }
@@ -1002,23 +834,21 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
 {
        struct trace_array *tr = &global_trace;
        struct trace_array_cpu *data;
-       unsigned long flags;
-       long disabled;
        int cpu;
+       int pc;
 
-       if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl)
+       if (tracing_disabled || !tr->ctrl)
                return;
 
-       local_irq_save(flags);
+       pc = preempt_count();
+       preempt_disable_notrace();
        cpu = raw_smp_processor_id();
        data = tr->data[cpu];
-       disabled = atomic_inc_return(&data->disabled);
 
-       if (likely(disabled == 1))
-               __trace_special(tr, data, arg1, arg2, arg3);
+       if (likely(!atomic_read(&data->disabled)))
+               ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
 
-       atomic_dec(&data->disabled);
-       local_irq_restore(flags);
+       preempt_enable_notrace();
 }
 
 #ifdef CONFIG_FTRACE
@@ -1029,7 +859,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
        struct trace_array_cpu *data;
        unsigned long flags;
        long disabled;
-       int cpu;
+       int cpu, resched;
+       int pc;
 
        if (unlikely(!ftrace_function_enabled))
                return;
@@ -1037,16 +868,22 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
        if (skip_trace(ip))
                return;
 
-       local_irq_save(flags);
+       pc = preempt_count();
+       resched = need_resched();
+       preempt_disable_notrace();
+       local_save_flags(flags);
        cpu = raw_smp_processor_id();
        data = tr->data[cpu];
        disabled = atomic_inc_return(&data->disabled);
 
        if (likely(disabled == 1))
-               trace_function(tr, data, ip, parent_ip, flags);
+               trace_function(tr, data, ip, parent_ip, flags, pc);
 
        atomic_dec(&data->disabled);
-       local_irq_restore(flags);
+       if (resched)
+               preempt_enable_no_resched_notrace();
+       else
+               preempt_enable_notrace();
 }
 
 static struct ftrace_ops trace_ops __read_mostly =
@@ -1073,111 +910,96 @@ enum trace_file_type {
        TRACE_FILE_LAT_FMT      = 1,
 };
 
-static struct trace_entry *
-trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
-               struct trace_iterator *iter, int cpu)
+static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
 {
-       struct page *page;
-       struct trace_entry *array;
+       /* Don't allow ftrace to trace into the ring buffers */
+       ftrace_disable_cpu();
 
-       if (iter->next_idx[cpu] >= tr->entries ||
-           iter->next_idx[cpu] >= data->trace_idx ||
-           (data->trace_head == data->trace_tail &&
-            data->trace_head_idx == data->trace_tail_idx))
-               return NULL;
+       iter->idx++;
+       if (iter->buffer_iter[iter->cpu])
+               ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
 
-       if (!iter->next_page[cpu]) {
-               /* Initialize the iterator for this cpu trace buffer */
-               WARN_ON(!data->trace_tail);
-               page = virt_to_page(data->trace_tail);
-               iter->next_page[cpu] = &page->lru;
-               iter->next_page_idx[cpu] = data->trace_tail_idx;
-       }
+       ftrace_enable_cpu();
+}
+
+static struct trace_entry *
+peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
+{
+       struct ring_buffer_event *event;
+       struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
 
-       page = list_entry(iter->next_page[cpu], struct page, lru);
-       BUG_ON(&data->trace_pages == &page->lru);
+       /* Don't allow ftrace to trace into the ring buffers */
+       ftrace_disable_cpu();
+
+       if (buf_iter)
+               event = ring_buffer_iter_peek(buf_iter, ts);
+       else
+               event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
 
-       array = page_address(page);
+       ftrace_enable_cpu();
 
-       WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
-       return &array[iter->next_page_idx[cpu]];
+       return event ? ring_buffer_event_data(event) : NULL;
 }
 
 static struct trace_entry *
-find_next_entry(struct trace_iterator *iter, int *ent_cpu)
+__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 {
-       struct trace_array *tr = iter->tr;
+       struct ring_buffer *buffer = iter->tr->buffer;
        struct trace_entry *ent, *next = NULL;
+       u64 next_ts = 0, ts;
        int next_cpu = -1;
        int cpu;
 
        for_each_tracing_cpu(cpu) {
-               if (!head_page(tr->data[cpu]))
+
+               if (ring_buffer_empty_cpu(buffer, cpu))
                        continue;
-               ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
+
+               ent = peek_next_entry(iter, cpu, &ts);
+
                /*
                 * Pick the entry with the smallest timestamp:
                 */
-               if (ent && (!next || ent->t < next->t)) {
+               if (ent && (!next || ts < next_ts)) {
                        next = ent;
                        next_cpu = cpu;
+                       next_ts = ts;
                }
        }
 
        if (ent_cpu)
                *ent_cpu = next_cpu;
 
+       if (ent_ts)
+               *ent_ts = next_ts;
+
        return next;
 }
 
-static void trace_iterator_increment(struct trace_iterator *iter)
+/* Find the next real entry, without updating the iterator itself */
+static struct trace_entry *
+find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 {
-       iter->idx++;
-       iter->next_idx[iter->cpu]++;
-       iter->next_page_idx[iter->cpu]++;
-
-       if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
-               struct trace_array_cpu *data = iter->tr->data[iter->cpu];
-
-               iter->next_page_idx[iter->cpu] = 0;
-               iter->next_page[iter->cpu] =
-                       trace_next_list(data, iter->next_page[iter->cpu]);
-       }
+       return __find_next_entry(iter, ent_cpu, ent_ts);
 }
 
-static void trace_consume(struct trace_iterator *iter)
+/* Find the next real entry, and increment the iterator to the next entry */
+static void *find_next_entry_inc(struct trace_iterator *iter)
 {
-       struct trace_array_cpu *data = iter->tr->data[iter->cpu];
+       iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
 
-       data->trace_tail_idx++;
-       if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
-               data->trace_tail = trace_next_page(data, data->trace_tail);
-               data->trace_tail_idx = 0;
-       }
+       if (iter->ent)
+               trace_iterator_increment(iter, iter->cpu);
 
-       /* Check if we empty it, then reset the index */
-       if (data->trace_head == data->trace_tail &&
-           data->trace_head_idx == data->trace_tail_idx)
-               data->trace_idx = 0;
+       return iter->ent ? iter : NULL;
 }
 
-static void *find_next_entry_inc(struct trace_iterator *iter)
+static void trace_consume(struct trace_iterator *iter)
 {
-       struct trace_entry *next;
-       int next_cpu = -1;
-
-       next = find_next_entry(iter, &next_cpu);
-
-       iter->prev_ent = iter->ent;
-       iter->prev_cpu = iter->cpu;
-
-       iter->ent = next;
-       iter->cpu = next_cpu;
-
-       if (next)
-               trace_iterator_increment(iter);
-
-       return next ? iter : NULL;
+       /* Don't allow ftrace to trace into the ring buffers */
+       ftrace_disable_cpu();
+       ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
+       ftrace_enable_cpu();
 }
 
 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1210,7 +1032,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
        struct trace_iterator *iter = m->private;
        void *p = NULL;
        loff_t l = 0;
-       int i;
+       int cpu;
 
        mutex_lock(&trace_types_lock);
 
@@ -1229,14 +1051,15 @@ static void *s_start(struct seq_file *m, loff_t *pos)
                iter->ent = NULL;
                iter->cpu = 0;
                iter->idx = -1;
-               iter->prev_ent = NULL;
-               iter->prev_cpu = -1;
 
-               for_each_tracing_cpu(i) {
-                       iter->next_idx[i] = 0;
-                       iter->next_page[i] = NULL;
+               ftrace_disable_cpu();
+
+               for_each_tracing_cpu(cpu) {
+                       ring_buffer_iter_reset(iter->buffer_iter[cpu]);
                }
 
+               ftrace_enable_cpu();
+
                for (p = iter; p && l < *pos; p = s_next(m, p, &l))
                        ;
 
@@ -1330,21 +1153,21 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
 
 static void print_lat_help_header(struct seq_file *m)
 {
-       seq_puts(m, "#                _------=> CPU#            \n");
-       seq_puts(m, "#               / _-----=> irqs-off        \n");
-       seq_puts(m, "#              | / _----=> need-resched    \n");
-       seq_puts(m, "#              || / _---=> hardirq/softirq \n");
-       seq_puts(m, "#              ||| / _--=> preempt-depth   \n");
-       seq_puts(m, "#              |||| /                      \n");
-       seq_puts(m, "#              |||||     delay             \n");
-       seq_puts(m, "#  cmd     pid ||||| time  |   caller      \n");
-       seq_puts(m, "#     \\   /    |||||   \\   |   /           \n");
+       seq_puts(m, "#                  _------=> CPU#            \n");
+       seq_puts(m, "#                 / _-----=> irqs-off        \n");
+       seq_puts(m, "#                | / _----=> need-resched    \n");
+       seq_puts(m, "#                || / _---=> hardirq/softirq \n");
+       seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
+       seq_puts(m, "#                |||| /                      \n");
+       seq_puts(m, "#                |||||     delay             \n");
+       seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
+       seq_puts(m, "#     \\   /      |||||   \\   |   /           \n");
 }
 
 static void print_func_help_header(struct seq_file *m)
 {
-       seq_puts(m, "#           TASK-PID   CPU#    TIMESTAMP  FUNCTION\n");
-       seq_puts(m, "#              | |      |          |         |\n");
+       seq_puts(m, "#           TASK-PID    CPU#    TIMESTAMP  FUNCTION\n");
+       seq_puts(m, "#              | |       |          |         |\n");
 }
 
 
@@ -1355,23 +1178,16 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
        struct trace_array *tr = iter->tr;
        struct trace_array_cpu *data = tr->data[tr->cpu];
        struct tracer *type = current_trace;
-       unsigned long total   = 0;
-       unsigned long entries = 0;
-       int cpu;
+       unsigned long total;
+       unsigned long entries;
        const char *name = "preemption";
 
        if (type)
                name = type->name;
 
-       for_each_tracing_cpu(cpu) {
-               if (head_page(tr->data[cpu])) {
-                       total += tr->data[cpu]->trace_idx;
-                       if (tr->data[cpu]->trace_idx > tr->entries)
-                               entries += tr->entries;
-                       else
-                               entries += tr->data[cpu]->trace_idx;
-               }
-       }
+       entries = ring_buffer_entries(iter->tr->buffer);
+       total = entries +
+               ring_buffer_overruns(iter->tr->buffer);
 
        seq_printf(m, "%s latency trace v1.1.5 on %s\n",
                   name, UTS_RELEASE);
@@ -1428,7 +1244,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
        comm = trace_find_cmdline(entry->pid);
 
        trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
-       trace_seq_printf(s, "%d", cpu);
+       trace_seq_printf(s, "%3d", cpu);
        trace_seq_printf(s, "%c%c",
                        (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
                        ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
@@ -1457,7 +1273,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 unsigned long preempt_mark_thresh = 100;
 
 static void
-lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
+lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
                    unsigned long rel_usecs)
 {
        trace_seq_printf(s, " %4lldus", abs_usecs);
@@ -1471,34 +1287,76 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
 
 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
 
-static int
+/*
+ * The message is supposed to contain an ending newline.
+ * If the printing stops prematurely, try to add a newline of our own.
+ */
+void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
+{
+       struct trace_entry *ent;
+       struct trace_field_cont *cont;
+       bool ok = true;
+
+       ent = peek_next_entry(iter, iter->cpu, NULL);
+       if (!ent || ent->type != TRACE_CONT) {
+               trace_seq_putc(s, '\n');
+               return;
+       }
+
+       do {
+               cont = (struct trace_field_cont *)ent;
+               if (ok)
+                       ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
+
+               ftrace_disable_cpu();
+
+               if (iter->buffer_iter[iter->cpu])
+                       ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+               else
+                       ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+
+               ftrace_enable_cpu();
+
+               ent = peek_next_entry(iter, iter->cpu, NULL);
+       } while (ent && ent->type == TRACE_CONT);
+
+       if (!ok)
+               trace_seq_putc(s, '\n');
+}
+
+static enum print_line_t
 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 {
        struct trace_seq *s = &iter->seq;
        unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
-       struct trace_entry *next_entry = find_next_entry(iter, NULL);
+       struct trace_entry *next_entry;
        unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
        struct trace_entry *entry = iter->ent;
        unsigned long abs_usecs;
        unsigned long rel_usecs;
+       u64 next_ts;
        char *comm;
        int S, T;
        int i;
        unsigned state;
 
+       if (entry->type == TRACE_CONT)
+               return TRACE_TYPE_HANDLED;
+
+       next_entry = find_next_entry(iter, NULL, &next_ts);
        if (!next_entry)
-               next_entry = entry;
-       rel_usecs = ns2usecs(next_entry->t - entry->t);
-       abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
+               next_ts = iter->ts;
+       rel_usecs = ns2usecs(next_ts - iter->ts);
+       abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
 
        if (verbose) {
                comm = trace_find_cmdline(entry->pid);
-               trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
+               trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
                                 " %ld.%03ldms (+%ld.%03ldms): ",
                                 comm,
                                 entry->pid, cpu, entry->flags,
                                 entry->preempt_count, trace_idx,
-                                ns2usecs(entry->t),
+                                ns2usecs(iter->ts),
                                 abs_usecs/1000,
                                 abs_usecs % 1000, rel_usecs/1000,
                                 rel_usecs % 1000);
@@ -1507,52 +1365,85 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
                lat_print_timestamp(s, abs_usecs, rel_usecs);
        }
        switch (entry->type) {
-       case TRACE_FN:
-               seq_print_ip_sym(s, entry->fn.ip, sym_flags);
+       case TRACE_FN: {
+               struct ftrace_entry *field;
+
+               trace_assign_type(field, entry);
+
+               seq_print_ip_sym(s, field->ip, sym_flags);
                trace_seq_puts(s, " (");
-               if (kretprobed(entry->fn.parent_ip))
+               if (kretprobed(field->parent_ip))
                        trace_seq_puts(s, KRETPROBE_MSG);
                else
-                       seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
+                       seq_print_ip_sym(s, field->parent_ip, sym_flags);
                trace_seq_puts(s, ")\n");
                break;
+       }
        case TRACE_CTX:
-       case TRACE_WAKE:
-               T = entry->ctx.next_state < sizeof(state_to_char) ?
-                       state_to_char[entry->ctx.next_state] : 'X';
+       case TRACE_WAKE: {
+               struct ctx_switch_entry *field;
+
+               trace_assign_type(field, entry);
 
-               state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0;
+               T = field->next_state < sizeof(state_to_char) ?
+                       state_to_char[field->next_state] : 'X';
+
+               state = field->prev_state ?
+                       __ffs(field->prev_state) + 1 : 0;
                S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
-               comm = trace_find_cmdline(entry->ctx.next_pid);
-               trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
-                                entry->ctx.prev_pid,
-                                entry->ctx.prev_prio,
+               comm = trace_find_cmdline(field->next_pid);
+               trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
+                                field->prev_pid,
+                                field->prev_prio,
                                 S, entry->type == TRACE_CTX ? "==>" : "  +",
-                                entry->ctx.next_pid,
-                                entry->ctx.next_prio,
+                                field->next_cpu,
+                                field->next_pid,
+                                field->next_prio,
                                 T, comm);
                break;
-       case TRACE_SPECIAL:
+       }
+       case TRACE_SPECIAL: {
+               struct special_entry *field;
+
+               trace_assign_type(field, entry);
+
                trace_seq_printf(s, "# %ld %ld %ld\n",
-                                entry->special.arg1,
-                                entry->special.arg2,
-                                entry->special.arg3);
+                                field->arg1,
+                                field->arg2,
+                                field->arg3);
                break;
-       case TRACE_STACK:
+       }
+       case TRACE_STACK: {
+               struct stack_entry *field;
+
+               trace_assign_type(field, entry);
+
                for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
                        if (i)
                                trace_seq_puts(s, " <= ");
-                       seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
+                       seq_print_ip_sym(s, field->caller[i], sym_flags);
                }
                trace_seq_puts(s, "\n");
                break;
+       }
+       case TRACE_PRINT: {
+               struct print_entry *field;
+
+               trace_assign_type(field, entry);
+
+               seq_print_ip_sym(s, field->ip, sym_flags);
+               trace_seq_printf(s, ": %s", field->buf);
+               if (entry->flags & TRACE_FLAG_CONT)
+                       trace_seq_print_cont(s, iter);
+               break;
+       }
        default:
                trace_seq_printf(s, "Unknown type %d\n", entry->type);
        }
-       return 1;
+       return TRACE_TYPE_HANDLED;
 }
 
-static int print_trace_fmt(struct trace_iterator *iter)
+static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 {
        struct trace_seq *s = &iter->seq;
        unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -1567,90 +1458,126 @@ static int print_trace_fmt(struct trace_iterator *iter)
 
        entry = iter->ent;
 
+       if (entry->type == TRACE_CONT)
+               return TRACE_TYPE_HANDLED;
+
        comm = trace_find_cmdline(iter->ent->pid);
 
-       t = ns2usecs(entry->t);
+       t = ns2usecs(iter->ts);
        usec_rem = do_div(t, 1000000ULL);
        secs = (unsigned long)t;
 
        ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
        if (!ret)
-               return 0;
-       ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
+               return TRACE_TYPE_PARTIAL_LINE;
+       ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
        if (!ret)
-               return 0;
+               return TRACE_TYPE_PARTIAL_LINE;
        ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
        if (!ret)
-               return 0;
+               return TRACE_TYPE_PARTIAL_LINE;
 
        switch (entry->type) {
-       case TRACE_FN:
-               ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
+       case TRACE_FN: {
+               struct ftrace_entry *field;
+
+               trace_assign_type(field, entry);
+
+               ret = seq_print_ip_sym(s, field->ip, sym_flags);
                if (!ret)
-                       return 0;
+                       return TRACE_TYPE_PARTIAL_LINE;
                if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
-                                               entry->fn.parent_ip) {
+                                               field->parent_ip) {
                        ret = trace_seq_printf(s, " <-");
                        if (!ret)
-                               return 0;
-                       if (kretprobed(entry->fn.parent_ip))
+                               return TRACE_TYPE_PARTIAL_LINE;
+                       if (kretprobed(field->parent_ip))
                                ret = trace_seq_puts(s, KRETPROBE_MSG);
                        else
-                               ret = seq_print_ip_sym(s, entry->fn.parent_ip,
+                               ret = seq_print_ip_sym(s,
+                                                      field->parent_ip,
                                                       sym_flags);
                        if (!ret)
-                               return 0;
+                               return TRACE_TYPE_PARTIAL_LINE;
                }
                ret = trace_seq_printf(s, "\n");
                if (!ret)
-                       return 0;
+                       return TRACE_TYPE_PARTIAL_LINE;
                break;
+       }
        case TRACE_CTX:
-       case TRACE_WAKE:
-               S = entry->ctx.prev_state < sizeof(state_to_char) ?
-                       state_to_char[entry->ctx.prev_state] : 'X';
-               T = entry->ctx.next_state < sizeof(state_to_char) ?
-                       state_to_char[entry->ctx.next_state] : 'X';
-               ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
-                                      entry->ctx.prev_pid,
-                                      entry->ctx.prev_prio,
+       case TRACE_WAKE: {
+               struct ctx_switch_entry *field;
+
+               trace_assign_type(field, entry);
+
+               S = field->prev_state < sizeof(state_to_char) ?
+                       state_to_char[field->prev_state] : 'X';
+               T = field->next_state < sizeof(state_to_char) ?
+                       state_to_char[field->next_state] : 'X';
+               ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
+                                      field->prev_pid,
+                                      field->prev_prio,
                                       S,
                                       entry->type == TRACE_CTX ? "==>" : "  +",
-                                      entry->ctx.next_pid,
-                                      entry->ctx.next_prio,
+                                      field->next_cpu,
+                                      field->next_pid,
+                                      field->next_prio,
                                       T);
                if (!ret)
-                       return 0;
+                       return TRACE_TYPE_PARTIAL_LINE;
                break;
-       case TRACE_SPECIAL:
+       }
+       case TRACE_SPECIAL: {
+               struct special_entry *field;
+
+               trace_assign_type(field, entry);
+
                ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-                                entry->special.arg1,
-                                entry->special.arg2,
-                                entry->special.arg3);
+                                field->arg1,
+                                field->arg2,
+                                field->arg3);
                if (!ret)
-                       return 0;
+                       return TRACE_TYPE_PARTIAL_LINE;
                break;
-       case TRACE_STACK:
-               for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+       }
+       case TRACE_STACK: {
+               struct stack_entry *field;
+
+               trace_assign_type(field, entry);
+
+               for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
                        if (i) {
                                ret = trace_seq_puts(s, " <= ");
                                if (!ret)
-                                       return 0;
+                                       return TRACE_TYPE_PARTIAL_LINE;
                        }
-                       ret = seq_print_ip_sym(s, entry->stack.caller[i],
+                       ret = seq_print_ip_sym(s, field->caller[i],
                                               sym_flags);
                        if (!ret)
-                               return 0;
+                               return TRACE_TYPE_PARTIAL_LINE;
                }
                ret = trace_seq_puts(s, "\n");
                if (!ret)
-                       return 0;
+                       return TRACE_TYPE_PARTIAL_LINE;
                break;
        }
-       return 1;
+       case TRACE_PRINT: {
+               struct print_entry *field;
+
+               trace_assign_type(field, entry);
+
+               seq_print_ip_sym(s, field->ip, sym_flags);
+               trace_seq_printf(s, ": %s", field->buf);
+               if (entry->flags & TRACE_FLAG_CONT)
+                       trace_seq_print_cont(s, iter);
+               break;
+       }
+       }
+       return TRACE_TYPE_HANDLED;
 }
 
-static int print_raw_fmt(struct trace_iterator *iter)
+static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 {
        struct trace_seq *s = &iter->seq;
        struct trace_entry *entry;
@@ -1659,47 +1586,77 @@ static int print_raw_fmt(struct trace_iterator *iter)
 
        entry = iter->ent;
 
+       if (entry->type == TRACE_CONT)
+               return TRACE_TYPE_HANDLED;
+
        ret = trace_seq_printf(s, "%d %d %llu ",
-               entry->pid, iter->cpu, entry->t);
+               entry->pid, iter->cpu, iter->ts);
        if (!ret)
-               return 0;
+               return TRACE_TYPE_PARTIAL_LINE;
 
        switch (entry->type) {
-       case TRACE_FN:
+       case TRACE_FN: {
+               struct ftrace_entry *field;
+
+               trace_assign_type(field, entry);
+
                ret = trace_seq_printf(s, "%x %x\n",
-                                       entry->fn.ip, entry->fn.parent_ip);
+                                       field->ip,
+                                       field->parent_ip);
                if (!ret)
-                       return 0;
+                       return TRACE_TYPE_PARTIAL_LINE;
                break;
+       }
        case TRACE_CTX:
-       case TRACE_WAKE:
-               S = entry->ctx.prev_state < sizeof(state_to_char) ?
-                       state_to_char[entry->ctx.prev_state] : 'X';
-               T = entry->ctx.next_state < sizeof(state_to_char) ?
-                       state_to_char[entry->ctx.next_state] : 'X';
+       case TRACE_WAKE: {
+               struct ctx_switch_entry *field;
+
+               trace_assign_type(field, entry);
+
+               S = field->prev_state < sizeof(state_to_char) ?
+                       state_to_char[field->prev_state] : 'X';
+               T = field->next_state < sizeof(state_to_char) ?
+                       state_to_char[field->next_state] : 'X';
                if (entry->type == TRACE_WAKE)
                        S = '+';
-               ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
-                                      entry->ctx.prev_pid,
-                                      entry->ctx.prev_prio,
+               ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
+                                      field->prev_pid,
+                                      field->prev_prio,
                                       S,
-                                      entry->ctx.next_pid,
-                                      entry->ctx.next_prio,
+                                      field->next_cpu,
+                                      field->next_pid,
+                                      field->next_prio,
                                       T);
                if (!ret)
-                       return 0;
+                       return TRACE_TYPE_PARTIAL_LINE;
                break;
+       }
        case TRACE_SPECIAL:
-       case TRACE_STACK:
+       case TRACE_STACK: {
+               struct special_entry *field;
+
+               trace_assign_type(field, entry);
+
                ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-                                entry->special.arg1,
-                                entry->special.arg2,
-                                entry->special.arg3);
+                                field->arg1,
+                                field->arg2,
+                                field->arg3);
                if (!ret)
-                       return 0;
+                       return TRACE_TYPE_PARTIAL_LINE;
                break;
        }
-       return 1;
+       case TRACE_PRINT: {
+               struct print_entry *field;
+
+               trace_assign_type(field, entry);
+
+               trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
+               if (entry->flags & TRACE_FLAG_CONT)
+                       trace_seq_print_cont(s, iter);
+               break;
+       }
+       }
+       return TRACE_TYPE_HANDLED;
 }
 
 #define SEQ_PUT_FIELD_RET(s, x)                                \
@@ -1710,11 +1667,12 @@ do {                                                    \
 
 #define SEQ_PUT_HEX_FIELD_RET(s, x)                    \
 do {                                                   \
+       BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES);     \
        if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))  \
                return 0;                               \
 } while (0)
 
-static int print_hex_fmt(struct trace_iterator *iter)
+static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 {
        struct trace_seq *s = &iter->seq;
        unsigned char newline = '\n';
@@ -1723,97 +1681,139 @@ static int print_hex_fmt(struct trace_iterator *iter)
 
        entry = iter->ent;
 
+       if (entry->type == TRACE_CONT)
+               return TRACE_TYPE_HANDLED;
+
        SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
        SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
-       SEQ_PUT_HEX_FIELD_RET(s, entry->t);
+       SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
 
        switch (entry->type) {
-       case TRACE_FN:
-               SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
-               SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
+       case TRACE_FN: {
+               struct ftrace_entry *field;
+
+               trace_assign_type(field, entry);
+
+               SEQ_PUT_HEX_FIELD_RET(s, field->ip);
+               SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
                break;
+       }
        case TRACE_CTX:
-       case TRACE_WAKE:
-               S = entry->ctx.prev_state < sizeof(state_to_char) ?
-                       state_to_char[entry->ctx.prev_state] : 'X';
-               T = entry->ctx.next_state < sizeof(state_to_char) ?
-                       state_to_char[entry->ctx.next_state] : 'X';
+       case TRACE_WAKE: {
+               struct ctx_switch_entry *field;
+
+               trace_assign_type(field, entry);
+
+               S = field->prev_state < sizeof(state_to_char) ?
+                       state_to_char[field->prev_state] : 'X';
+               T = field->next_state < sizeof(state_to_char) ?
+                       state_to_char[field->next_state] : 'X';
                if (entry->type == TRACE_WAKE)
                        S = '+';
-               SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
-               SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
+               SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
+               SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
                SEQ_PUT_HEX_FIELD_RET(s, S);
-               SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
-               SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
-               SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
+               SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
+               SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
+               SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
                SEQ_PUT_HEX_FIELD_RET(s, T);
                break;
+       }
        case TRACE_SPECIAL:
-       case TRACE_STACK:
-               SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
-               SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
-               SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
+       case TRACE_STACK: {
+               struct special_entry *field;
+
+               trace_assign_type(field, entry);
+
+               SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
+               SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
+               SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
                break;
        }
+       }
        SEQ_PUT_FIELD_RET(s, newline);
 
-       return 1;
+       return TRACE_TYPE_HANDLED;
 }
 
-static int print_bin_fmt(struct trace_iterator *iter)
+static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 {
        struct trace_seq *s = &iter->seq;
        struct trace_entry *entry;
 
        entry = iter->ent;
 
+       if (entry->type == TRACE_CONT)
+               return TRACE_TYPE_HANDLED;
+
        SEQ_PUT_FIELD_RET(s, entry->pid);
-       SEQ_PUT_FIELD_RET(s, entry->cpu);
-       SEQ_PUT_FIELD_RET(s, entry->t);
+       SEQ_PUT_FIELD_RET(s, iter->cpu);
+       SEQ_PUT_FIELD_RET(s, iter->ts);
 
        switch (entry->type) {
-       case TRACE_FN:
-               SEQ_PUT_FIELD_RET(s, entry->fn.ip);
-               SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
+       case TRACE_FN: {
+               struct ftrace_entry *field;
+
+               trace_assign_type(field, entry);
+
+               SEQ_PUT_FIELD_RET(s, field->ip);
+               SEQ_PUT_FIELD_RET(s, field->parent_ip);
                break;
-       case TRACE_CTX:
-               SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
-               SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
-               SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
-               SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
-               SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
-               SEQ_PUT_FIELD_RET(s, entry->ctx.next_state);
+       }
+       case TRACE_CTX: {
+               struct ctx_switch_entry *field;
+
+               trace_assign_type(field, entry);
+
+               SEQ_PUT_FIELD_RET(s, field->prev_pid);
+               SEQ_PUT_FIELD_RET(s, field->prev_prio);
+               SEQ_PUT_FIELD_RET(s, field->prev_state);
+               SEQ_PUT_FIELD_RET(s, field->next_pid);
+               SEQ_PUT_FIELD_RET(s, field->next_prio);
+               SEQ_PUT_FIELD_RET(s, field->next_state);
                break;
+       }
        case TRACE_SPECIAL:
-       case TRACE_STACK:
-               SEQ_PUT_FIELD_RET(s, entry->special.arg1);
-               SEQ_PUT_FIELD_RET(s, entry->special.arg2);
-               SEQ_PUT_FIELD_RET(s, entry->special.arg3);
+       case TRACE_STACK: {
+               struct special_entry *field;
+
+               trace_assign_type(field, entry);
+
+               SEQ_PUT_FIELD_RET(s, field->arg1);
+               SEQ_PUT_FIELD_RET(s, field->arg2);
+               SEQ_PUT_FIELD_RET(s, field->arg3);
                break;
        }
+       }
        return 1;
 }
 
 static int trace_empty(struct trace_iterator *iter)
 {
-       struct trace_array_cpu *data;
        int cpu;
 
        for_each_tracing_cpu(cpu) {
-               data = iter->tr->data[cpu];
-
-               if (head_page(data) && data->trace_idx &&
-                   (data->trace_tail != data->trace_head ||
-                    data->trace_tail_idx != data->trace_head_idx))
-                       return 0;
+               if (iter->buffer_iter[cpu]) {
+                       if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
+                               return 0;
+               } else {
+                       if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
+                               return 0;
+               }
        }
+
        return 1;
 }
 
-static int print_trace_line(struct trace_iterator *iter)
+static enum print_line_t print_trace_line(struct trace_iterator *iter)
 {
-       if (iter->trace && iter->trace->print_line)
-               return iter->trace->print_line(iter);
+       enum print_line_t ret;
+
+       if (iter->trace && iter->trace->print_line) {
+               ret = iter->trace->print_line(iter);
+               if (ret != TRACE_TYPE_UNHANDLED)
+                       return ret;
+       }
 
        if (trace_flags & TRACE_ITER_BIN)
                return print_bin_fmt(iter);
@@ -1869,6 +1869,8 @@ static struct trace_iterator *
 __tracing_open(struct inode *inode, struct file *file, int *ret)
 {
        struct trace_iterator *iter;
+       struct seq_file *m;
+       int cpu;
 
        if (tracing_disabled) {
                *ret = -ENODEV;
@@ -1889,28 +1891,45 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
        iter->trace = current_trace;
        iter->pos = -1;
 
+       for_each_tracing_cpu(cpu) {
+
+               iter->buffer_iter[cpu] =
+                       ring_buffer_read_start(iter->tr->buffer, cpu);
+
+               if (!iter->buffer_iter[cpu])
+                       goto fail_buffer;
+       }
+
        /* TODO stop tracer */
        *ret = seq_open(file, &tracer_seq_ops);
-       if (!*ret) {
-               struct seq_file *m = file->private_data;
-               m->private = iter;
+       if (*ret)
+               goto fail_buffer;
 
-               /* stop the trace while dumping */
-               if (iter->tr->ctrl) {
-                       tracer_enabled = 0;
-                       ftrace_function_enabled = 0;
-               }
+       m = file->private_data;
+       m->private = iter;
 
-               if (iter->trace && iter->trace->open)
-                       iter->trace->open(iter);
-       } else {
-               kfree(iter);
-               iter = NULL;
+       /* stop the trace while dumping */
+       if (iter->tr->ctrl) {
+               tracer_enabled = 0;
+               ftrace_function_enabled = 0;
        }
+
+       if (iter->trace && iter->trace->open)
+                       iter->trace->open(iter);
+
        mutex_unlock(&trace_types_lock);
 
  out:
        return iter;
+
+ fail_buffer:
+       for_each_tracing_cpu(cpu) {
+               if (iter->buffer_iter[cpu])
+                       ring_buffer_read_finish(iter->buffer_iter[cpu]);
+       }
+       mutex_unlock(&trace_types_lock);
+
+       return ERR_PTR(-ENOMEM);
 }
 
 int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -1926,8 +1945,14 @@ int tracing_release(struct inode *inode, struct file *file)
 {
        struct seq_file *m = (struct seq_file *)file->private_data;
        struct trace_iterator *iter = m->private;
+       int cpu;
 
        mutex_lock(&trace_types_lock);
+       for_each_tracing_cpu(cpu) {
+               if (iter->buffer_iter[cpu])
+                       ring_buffer_read_finish(iter->buffer_iter[cpu]);
+       }
+
        if (iter->trace && iter->trace->close)
                iter->trace->close(iter);
 
@@ -2352,9 +2377,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
        struct tracer *t;
        char buf[max_tracer_type_len+1];
        int i;
+       size_t ret;
 
        if (cnt > max_tracer_type_len)
                cnt = max_tracer_type_len;
+       ret = cnt;
 
        if (copy_from_user(&buf, ubuf, cnt))
                return -EFAULT;
@@ -2370,7 +2397,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
                if (strcmp(t->name, buf) == 0)
                        break;
        }
-       if (!t || t == current_trace)
+       if (!t) {
+               ret = -EINVAL;
+               goto out;
+       }
+       if (t == current_trace)
                goto out;
 
        if (current_trace && current_trace->reset)
@@ -2383,9 +2414,10 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
  out:
        mutex_unlock(&trace_types_lock);
 
-       filp->f_pos += cnt;
+       if (ret == cnt)
+               filp->f_pos += cnt;
 
-       return cnt;
+       return ret;
 }
 
 static ssize_t
@@ -2500,20 +2532,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
                  size_t cnt, loff_t *ppos)
 {
        struct trace_iterator *iter = filp->private_data;
-       struct trace_array_cpu *data;
-       static cpumask_t mask;
-       unsigned long flags;
-#ifdef CONFIG_FTRACE
-       int ftrace_save;
-#endif
-       int cpu;
        ssize_t sret;
 
        /* return any leftover data */
        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
        if (sret != -EBUSY)
                return sret;
-       sret = 0;
 
        trace_seq_reset(&iter->seq);
 
@@ -2524,6 +2548,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
                        goto out;
        }
 
+waitagain:
+       sret = 0;
        while (trace_empty(iter)) {
 
                if ((filp->f_flags & O_NONBLOCK)) {
@@ -2588,46 +2614,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
               offsetof(struct trace_iterator, seq));
        iter->pos = -1;
 
-       /*
-        * We need to stop all tracing on all CPUS to read the
-        * the next buffer. This is a bit expensive, but is
-        * not done often. We fill all what we can read,
-        * and then release the locks again.
-        */
-
-       cpus_clear(mask);
-       local_irq_save(flags);
-#ifdef CONFIG_FTRACE
-       ftrace_save = ftrace_enabled;
-       ftrace_enabled = 0;
-#endif
-       smp_wmb();
-       for_each_tracing_cpu(cpu) {
-               data = iter->tr->data[cpu];
-
-               if (!head_page(data) || !data->trace_idx)
-                       continue;
-
-               atomic_inc(&data->disabled);
-               cpu_set(cpu, mask);
-       }
-
-       for_each_cpu_mask(cpu, mask) {
-               data = iter->tr->data[cpu];
-               __raw_spin_lock(&data->lock);
-
-               if (data->overrun > iter->last_overrun[cpu])
-                       iter->overrun[cpu] +=
-                               data->overrun - iter->last_overrun[cpu];
-               iter->last_overrun[cpu] = data->overrun;
-       }
-
        while (find_next_entry_inc(iter) != NULL) {
-               int ret;
+               enum print_line_t ret;
                int len = iter->seq.len;
 
                ret = print_trace_line(iter);
-               if (!ret) {
+               if (ret == TRACE_TYPE_PARTIAL_LINE) {
                        /* don't print partial lines */
                        iter->seq.len = len;
                        break;
@@ -2639,26 +2631,17 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
                        break;
        }
 
-       for_each_cpu_mask(cpu, mask) {
-               data = iter->tr->data[cpu];
-               __raw_spin_unlock(&data->lock);
-       }
-
-       for_each_cpu_mask(cpu, mask) {
-               data = iter->tr->data[cpu];
-               atomic_dec(&data->disabled);
-       }
-#ifdef CONFIG_FTRACE
-       ftrace_enabled = ftrace_save;
-#endif
-       local_irq_restore(flags);
-
        /* Now copy what we have to the user */
        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
        if (iter->seq.readpos >= iter->seq.len)
                trace_seq_reset(&iter->seq);
+
+       /*
+        * If there was nothing to send to user, inspite of consuming trace
+        * entries, go back to wait for more entries.
+        */
        if (sret == -EBUSY)
-               sret = 0;
+               goto waitagain;
 
 out:
        mutex_unlock(&trace_types_lock);
@@ -2684,7 +2667,8 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 {
        unsigned long val;
        char buf[64];
-       int i, ret;
+       int ret;
+       struct trace_array *tr = filp->private_data;
 
        if (cnt >= sizeof(buf))
                return -EINVAL;
@@ -2704,59 +2688,38 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 
        mutex_lock(&trace_types_lock);
 
-       if (current_trace != &no_tracer) {
+       if (tr->ctrl) {
                cnt = -EBUSY;
-               pr_info("ftrace: set current_tracer to none"
+               pr_info("ftrace: please disable tracing"
                        " before modifying buffer size\n");
                goto out;
        }
 
-       if (val > global_trace.entries) {
-               long pages_requested;
-               unsigned long freeable_pages;
-
-               /* make sure we have enough memory before mapping */
-               pages_requested =
-                       (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
-
-               /* account for each buffer (and max_tr) */
-               pages_requested *= tracing_nr_buffers * 2;
-
-               /* Check for overflow */
-               if (pages_requested < 0) {
-                       cnt = -ENOMEM;
+       if (val != global_trace.entries) {
+               ret = ring_buffer_resize(global_trace.buffer, val);
+               if (ret < 0) {
+                       cnt = ret;
                        goto out;
                }
 
-               freeable_pages = determine_dirtyable_memory();
-
-               /* we only allow to request 1/4 of useable memory */
-               if (pages_requested >
-                   ((freeable_pages + tracing_pages_allocated) / 4)) {
-                       cnt = -ENOMEM;
-                       goto out;
-               }
-
-               while (global_trace.entries < val) {
-                       if (trace_alloc_page()) {
-                               cnt = -ENOMEM;
-                               goto out;
+               ret = ring_buffer_resize(max_tr.buffer, val);
+               if (ret < 0) {
+                       int r;
+                       cnt = ret;
+                       r = ring_buffer_resize(global_trace.buffer,
+                                              global_trace.entries);
+                       if (r < 0) {
+                               /* AARGH! We are left with different
+                                * size max buffer!!!! */
+                               WARN_ON(1);
+                               tracing_disabled = 1;
                        }
-                       /* double check that we don't go over the known pages */
-                       if (tracing_pages_allocated > pages_requested)
-                               break;
+                       goto out;
                }
 
-       } else {
-               /* include the number of entries in val (inc of page entries) */
-               while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
-                       trace_free_page();
+               global_trace.entries = val;
        }
 
-       /* check integrity */
-       for_each_tracing_cpu(i)
-               check_pages(global_trace.data[i]);
-
        filp->f_pos += cnt;
 
        /* If check pages failed, return ENOMEM */
@@ -2769,6 +2732,52 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
        return cnt;
 }
 
+static int mark_printk(const char *fmt, ...)
+{
+       int ret;
+       va_list args;
+       va_start(args, fmt);
+       ret = trace_vprintk(0, fmt, args);
+       va_end(args);
+       return ret;
+}
+
+static ssize_t
+tracing_mark_write(struct file *filp, const char __user *ubuf,
+                                       size_t cnt, loff_t *fpos)
+{
+       char *buf;
+       char *end;
+       struct trace_array *tr = &global_trace;
+
+       if (!tr->ctrl || tracing_disabled)
+               return -EINVAL;
+
+       if (cnt > TRACE_BUF_SIZE)
+               cnt = TRACE_BUF_SIZE;
+
+       buf = kmalloc(cnt + 1, GFP_KERNEL);
+       if (buf == NULL)
+               return -ENOMEM;
+
+       if (copy_from_user(buf, ubuf, cnt)) {
+               kfree(buf);
+               return -EFAULT;
+       }
+
+       /* Cut from the first nil or newline. */
+       buf[cnt] = '\0';
+       end = strchr(buf, '\n');
+       if (end)
+               *end = '\0';
+
+       cnt = mark_printk("%s\n", buf);
+       kfree(buf);
+       *fpos += cnt;
+
+       return cnt;
+}
+
 static struct file_operations tracing_max_lat_fops = {
        .open           = tracing_open_generic,
        .read           = tracing_max_lat_read,
@@ -2800,6 +2809,11 @@ static struct file_operations tracing_entries_fops = {
        .write          = tracing_entries_write,
 };
 
+static struct file_operations tracing_mark_fops = {
+       .open           = tracing_open_generic,
+       .write          = tracing_mark_write,
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 static ssize_t
@@ -2846,7 +2860,7 @@ struct dentry *tracing_init_dentry(void)
 #include "trace_selftest.c"
 #endif
 
-static __init void tracer_init_debugfs(void)
+static __init int tracer_init_debugfs(void)
 {
        struct dentry *d_tracer;
        struct dentry *entry;
@@ -2881,12 +2895,12 @@ static __init void tracer_init_debugfs(void)
        entry = debugfs_create_file("available_tracers", 0444, d_tracer,
                                    &global_trace, &show_traces_fops);
        if (!entry)
-               pr_warning("Could not create debugfs 'trace' entry\n");
+               pr_warning("Could not create debugfs 'available_tracers' entry\n");
 
        entry = debugfs_create_file("current_tracer", 0444, d_tracer,
                                    &global_trace, &set_tracer_fops);
        if (!entry)
-               pr_warning("Could not create debugfs 'trace' entry\n");
+               pr_warning("Could not create debugfs 'current_tracer' entry\n");
 
        entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
                                    &tracing_max_latency,
@@ -2899,7 +2913,7 @@ static __init void tracer_init_debugfs(void)
                                    &tracing_thresh, &tracing_max_lat_fops);
        if (!entry)
                pr_warning("Could not create debugfs "
-                          "'tracing_threash' entry\n");
+                          "'tracing_thresh' entry\n");
        entry = debugfs_create_file("README", 0644, d_tracer,
                                    NULL, &tracing_readme_fops);
        if (!entry)
@@ -2909,13 +2923,19 @@ static __init void tracer_init_debugfs(void)
                                    NULL, &tracing_pipe_fops);
        if (!entry)
                pr_warning("Could not create debugfs "
-                          "'tracing_threash' entry\n");
+                          "'trace_pipe' entry\n");
 
        entry = debugfs_create_file("trace_entries", 0644, d_tracer,
                                    &global_trace, &tracing_entries_fops);
        if (!entry)
                pr_warning("Could not create debugfs "
-                          "'tracing_threash' entry\n");
+                          "'trace_entries' entry\n");
+
+       entry = debugfs_create_file("trace_marker", 0220, d_tracer,
+                                   NULL, &tracing_mark_fops);
+       if (!entry)
+               pr_warning("Could not create debugfs "
+                          "'trace_marker' entry\n");
 
 #ifdef CONFIG_DYNAMIC_FTRACE
        entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
@@ -2928,230 +2948,263 @@ static __init void tracer_init_debugfs(void)
 #ifdef CONFIG_SYSPROF_TRACER
        init_tracer_sysprof_debugfs(d_tracer);
 #endif
+       return 0;
 }
 
-static int trace_alloc_page(void)
+int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 {
+       static DEFINE_SPINLOCK(trace_buf_lock);
+       static char trace_buf[TRACE_BUF_SIZE];
+
+       struct ring_buffer_event *event;
+       struct trace_array *tr = &global_trace;
        struct trace_array_cpu *data;
-       struct page *page, *tmp;
-       LIST_HEAD(pages);
-       void *array;
-       unsigned pages_allocated = 0;
-       int i;
+       struct print_entry *entry;
+       unsigned long flags, irq_flags;
+       int cpu, len = 0, size, pc;
 
-       /* first allocate a page for each CPU */
-       for_each_tracing_cpu(i) {
-               array = (void *)__get_free_page(GFP_KERNEL);
-               if (array == NULL) {
-                       printk(KERN_ERR "tracer: failed to allocate page"
-                              "for trace buffer!\n");
-                       goto free_pages;
-               }
+       if (!tr->ctrl || tracing_disabled)
+               return 0;
 
-               pages_allocated++;
-               page = virt_to_page(array);
-               list_add(&page->lru, &pages);
+       pc = preempt_count();
+       preempt_disable_notrace();
+       cpu = raw_smp_processor_id();
+       data = tr->data[cpu];
 
-/* Only allocate if we are actually using the max trace */
-#ifdef CONFIG_TRACER_MAX_TRACE
-               array = (void *)__get_free_page(GFP_KERNEL);
-               if (array == NULL) {
-                       printk(KERN_ERR "tracer: failed to allocate page"
-                              "for trace buffer!\n");
-                       goto free_pages;
-               }
-               pages_allocated++;
-               page = virt_to_page(array);
-               list_add(&page->lru, &pages);
-#endif
-       }
+       if (unlikely(atomic_read(&data->disabled)))
+               goto out;
 
-       /* Now that we successfully allocate a page per CPU, add them */
-       for_each_tracing_cpu(i) {
-               data = global_trace.data[i];
-               page = list_entry(pages.next, struct page, lru);
-               list_del_init(&page->lru);
-               list_add_tail(&page->lru, &data->trace_pages);
-               ClearPageLRU(page);
+       spin_lock_irqsave(&trace_buf_lock, flags);
+       len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
 
-#ifdef CONFIG_TRACER_MAX_TRACE
-               data = max_tr.data[i];
-               page = list_entry(pages.next, struct page, lru);
-               list_del_init(&page->lru);
-               list_add_tail(&page->lru, &data->trace_pages);
-               SetPageLRU(page);
-#endif
-       }
-       tracing_pages_allocated += pages_allocated;
-       global_trace.entries += ENTRIES_PER_PAGE;
+       len = min(len, TRACE_BUF_SIZE-1);
+       trace_buf[len] = 0;
 
-       return 0;
+       size = sizeof(*entry) + len + 1;
+       event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
+       if (!event)
+               goto out_unlock;
+       entry = ring_buffer_event_data(event);
+       tracing_generic_entry_update(&entry->ent, flags, pc);
+       entry->ent.type                 = TRACE_PRINT;
+       entry->ip                       = ip;
 
- free_pages:
-       list_for_each_entry_safe(page, tmp, &pages, lru) {
-               list_del_init(&page->lru);
-               __free_page(page);
-       }
-       return -ENOMEM;
+       memcpy(&entry->buf, trace_buf, len);
+       entry->buf[len] = 0;
+       ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ out_unlock:
+       spin_unlock_irqrestore(&trace_buf_lock, flags);
+
+ out:
+       preempt_enable_notrace();
+
+       return len;
 }
+EXPORT_SYMBOL_GPL(trace_vprintk);
 
-static int trace_free_page(void)
+int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 {
-       struct trace_array_cpu *data;
-       struct page *page;
-       struct list_head *p;
-       int i;
-       int ret = 0;
+       int ret;
+       va_list ap;
 
-       /* free one page from each buffer */
-       for_each_tracing_cpu(i) {
-               data = global_trace.data[i];
-               p = data->trace_pages.next;
-               if (p == &data->trace_pages) {
-                       /* should never happen */
-                       WARN_ON(1);
-                       tracing_disabled = 1;
-                       ret = -1;
-                       break;
-               }
-               page = list_entry(p, struct page, lru);
-               ClearPageLRU(page);
-               list_del(&page->lru);
-               tracing_pages_allocated--;
-               tracing_pages_allocated--;
-               __free_page(page);
+       if (!(trace_flags & TRACE_ITER_PRINTK))
+               return 0;
 
-               tracing_reset(data);
+       va_start(ap, fmt);
+       ret = trace_vprintk(ip, fmt, ap);
+       va_end(ap);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(__ftrace_printk);
 
-#ifdef CONFIG_TRACER_MAX_TRACE
-               data = max_tr.data[i];
-               p = data->trace_pages.next;
-               if (p == &data->trace_pages) {
-                       /* should never happen */
-                       WARN_ON(1);
-                       tracing_disabled = 1;
-                       ret = -1;
-                       break;
-               }
-               page = list_entry(p, struct page, lru);
-               ClearPageLRU(page);
-               list_del(&page->lru);
-               __free_page(page);
+static int trace_panic_handler(struct notifier_block *this,
+                              unsigned long event, void *unused)
+{
+       ftrace_dump();
+       return NOTIFY_OK;
+}
 
-               tracing_reset(data);
-#endif
-       }
-       global_trace.entries -= ENTRIES_PER_PAGE;
+static struct notifier_block trace_panic_notifier = {
+       .notifier_call  = trace_panic_handler,
+       .next           = NULL,
+       .priority       = 150   /* priority: INT_MAX >= x >= 0 */
+};
 
-       return ret;
+static int trace_die_handler(struct notifier_block *self,
+                            unsigned long val,
+                            void *data)
+{
+       switch (val) {
+       case DIE_OOPS:
+               ftrace_dump();
+               break;
+       default:
+               break;
+       }
+       return NOTIFY_OK;
 }
 
-__init static int tracer_alloc_buffers(void)
+static struct notifier_block trace_die_notifier = {
+       .notifier_call = trace_die_handler,
+       .priority = 200
+};
+
+/*
+ * printk is set to max of 1024, we really don't need it that big.
+ * Nothing should be printing 1000 characters anyway.
+ */
+#define TRACE_MAX_PRINT                1000
+
+/*
+ * Define here KERN_TRACE so that we have one place to modify
+ * it if we decide to change what log level the ftrace dump
+ * should be at.
+ */
+#define KERN_TRACE             KERN_INFO
+
+static void
+trace_printk_seq(struct trace_seq *s)
 {
-       struct trace_array_cpu *data;
-       void *array;
-       struct page *page;
-       int pages = 0;
-       int ret = -ENOMEM;
-       int i;
+       /* Probably should print a warning here. */
+       if (s->len >= 1000)
+               s->len = 1000;
 
-       /* TODO: make the number of buffers hot pluggable with CPUS */
-       tracing_nr_buffers = num_possible_cpus();
-       tracing_buffer_mask = cpu_possible_map;
+       /* should be zero ended, but we are paranoid. */
+       s->buffer[s->len] = 0;
 
-       /* Allocate the first page for all buffers */
-       for_each_tracing_cpu(i) {
-               data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
-               max_tr.data[i] = &per_cpu(max_data, i);
+       printk(KERN_TRACE "%s", s->buffer);
 
-               array = (void *)__get_free_page(GFP_KERNEL);
-               if (array == NULL) {
-                       printk(KERN_ERR "tracer: failed to allocate page"
-                              "for trace buffer!\n");
-                       goto free_buffers;
-               }
+       trace_seq_reset(s);
+}
 
-               /* set the array to the list */
-               INIT_LIST_HEAD(&data->trace_pages);
-               page = virt_to_page(array);
-               list_add(&page->lru, &data->trace_pages);
-               /* use the LRU flag to differentiate the two buffers */
-               ClearPageLRU(page);
 
-               data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
-               max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+void ftrace_dump(void)
+{
+       static DEFINE_SPINLOCK(ftrace_dump_lock);
+       /* use static because iter can be a bit big for the stack */
+       static struct trace_iterator iter;
+       static cpumask_t mask;
+       static int dump_ran;
+       unsigned long flags;
+       int cnt = 0, cpu;
 
-/* Only allocate if we are actually using the max trace */
-#ifdef CONFIG_TRACER_MAX_TRACE
-               array = (void *)__get_free_page(GFP_KERNEL);
-               if (array == NULL) {
-                       printk(KERN_ERR "tracer: failed to allocate page"
-                              "for trace buffer!\n");
-                       goto free_buffers;
-               }
+       /* only one dump */
+       spin_lock_irqsave(&ftrace_dump_lock, flags);
+       if (dump_ran)
+               goto out;
 
-               INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
-               page = virt_to_page(array);
-               list_add(&page->lru, &max_tr.data[i]->trace_pages);
-               SetPageLRU(page);
-#endif
+       dump_ran = 1;
+
+       /* No turning back! */
+       ftrace_kill_atomic();
+
+       for_each_tracing_cpu(cpu) {
+               atomic_inc(&global_trace.data[cpu]->disabled);
        }
 
+       printk(KERN_TRACE "Dumping ftrace buffer:\n");
+
+       iter.tr = &global_trace;
+       iter.trace = current_trace;
+
        /*
-        * Since we allocate by orders of pages, we may be able to
-        * round up a bit.
+        * We need to stop all tracing on all CPUS to read the
+        * the next buffer. This is a bit expensive, but is
+        * not done often. We fill all what we can read,
+        * and then release the locks again.
         */
-       global_trace.entries = ENTRIES_PER_PAGE;
-       pages++;
 
-       while (global_trace.entries < trace_nr_entries) {
-               if (trace_alloc_page())
-                       break;
-               pages++;
+       cpus_clear(mask);
+
+       while (!trace_empty(&iter)) {
+
+               if (!cnt)
+                       printk(KERN_TRACE "---------------------------------\n");
+
+               cnt++;
+
+               /* reset all but tr, trace, and overruns */
+               memset(&iter.seq, 0,
+                      sizeof(struct trace_iterator) -
+                      offsetof(struct trace_iterator, seq));
+               iter.iter_flags |= TRACE_FILE_LAT_FMT;
+               iter.pos = -1;
+
+               if (find_next_entry_inc(&iter) != NULL) {
+                       print_trace_line(&iter);
+                       trace_consume(&iter);
+               }
+
+               trace_printk_seq(&iter.seq);
        }
-       max_tr.entries = global_trace.entries;
 
-       pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n",
-               pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE);
-       pr_info("   actual entries %ld\n", global_trace.entries);
+       if (!cnt)
+               printk(KERN_TRACE "   (ftrace buffer empty)\n");
+       else
+               printk(KERN_TRACE "---------------------------------\n");
+
+ out:
+       spin_unlock_irqrestore(&ftrace_dump_lock, flags);
+}
+
+__init static int tracer_alloc_buffers(void)
+{
+       struct trace_array_cpu *data;
+       int i;
+
+       /* TODO: make the number of buffers hot pluggable with CPUS */
+       tracing_buffer_mask = cpu_possible_map;
+
+       global_trace.buffer = ring_buffer_alloc(trace_buf_size,
+                                                  TRACE_BUFFER_FLAGS);
+       if (!global_trace.buffer) {
+               printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
+               WARN_ON(1);
+               return 0;
+       }
+       global_trace.entries = ring_buffer_size(global_trace.buffer);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+       max_tr.buffer = ring_buffer_alloc(trace_buf_size,
+                                            TRACE_BUFFER_FLAGS);
+       if (!max_tr.buffer) {
+               printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
+               WARN_ON(1);
+               ring_buffer_free(global_trace.buffer);
+               return 0;
+       }
+       max_tr.entries = ring_buffer_size(max_tr.buffer);
+       WARN_ON(max_tr.entries != global_trace.entries);
+#endif
 
-       tracer_init_debugfs();
+       /* Allocate the first page for all buffers */
+       for_each_tracing_cpu(i) {
+               data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
+               max_tr.data[i] = &per_cpu(max_data, i);
+       }
 
        trace_init_cmdlines();
 
-       register_tracer(&no_tracer);
-       current_trace = &no_tracer;
+       register_tracer(&nop_trace);
+#ifdef CONFIG_BOOT_TRACER
+       register_tracer(&boot_tracer);
+       current_trace = &boot_tracer;
+       current_trace->init(&global_trace);
+#else
+       current_trace = &nop_trace;
+#endif
 
        /* All seems OK, enable tracing */
        global_trace.ctrl = tracer_enabled;
        tracing_disabled = 0;
 
-       return 0;
+       atomic_notifier_chain_register(&panic_notifier_list,
+                                      &trace_panic_notifier);
 
- free_buffers:
-       for (i-- ; i >= 0; i--) {
-               struct page *page, *tmp;
-               struct trace_array_cpu *data = global_trace.data[i];
+       register_die_notifier(&trace_die_notifier);
 
-               if (data) {
-                       list_for_each_entry_safe(page, tmp,
-                                                &data->trace_pages, lru) {
-                               list_del_init(&page->lru);
-                               __free_page(page);
-                       }
-               }
-
-#ifdef CONFIG_TRACER_MAX_TRACE
-               data = max_tr.data[i];
-               if (data) {
-                       list_for_each_entry_safe(page, tmp,
-                                                &data->trace_pages, lru) {
-                               list_del_init(&page->lru);
-                               __free_page(page);
-                       }
-               }
-#endif
-       }
-       return ret;
+       return 0;
 }
-fs_initcall(tracer_alloc_buffers);
+early_initcall(tracer_alloc_buffers);
+fs_initcall(tracer_init_debugfs);
index f69f86788c2bd12a2df18e4dabe9e66046a2e71f..f1f99572cde7dee758217125925e2690a45b38d1 100644 (file)
@@ -5,7 +5,9 @@
 #include <asm/atomic.h>
 #include <linux/sched.h>
 #include <linux/clocksource.h>
+#include <linux/ring_buffer.h>
 #include <linux/mmiotrace.h>
+#include <linux/ftrace.h>
 
 enum trace_type {
        __TRACE_FIRST_TYPE = 0,
@@ -13,38 +15,60 @@ enum trace_type {
        TRACE_FN,
        TRACE_CTX,
        TRACE_WAKE,
+       TRACE_CONT,
        TRACE_STACK,
+       TRACE_PRINT,
        TRACE_SPECIAL,
        TRACE_MMIO_RW,
        TRACE_MMIO_MAP,
+       TRACE_BOOT,
 
        __TRACE_LAST_TYPE
 };
 
+/*
+ * The trace entry - the most basic unit of tracing. This is what
+ * is printed in the end as a single line in the trace output, such as:
+ *
+ *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
+ */
+struct trace_entry {
+       unsigned char           type;
+       unsigned char           cpu;
+       unsigned char           flags;
+       unsigned char           preempt_count;
+       int                     pid;
+};
+
 /*
  * Function trace entry - function address and parent function addres:
  */
 struct ftrace_entry {
+       struct trace_entry      ent;
        unsigned long           ip;
        unsigned long           parent_ip;
 };
+extern struct tracer boot_tracer;
 
 /*
  * Context switch trace entry - which task (and prio) we switched from/to:
  */
 struct ctx_switch_entry {
+       struct trace_entry      ent;
        unsigned int            prev_pid;
        unsigned char           prev_prio;
        unsigned char           prev_state;
        unsigned int            next_pid;
        unsigned char           next_prio;
        unsigned char           next_state;
+       unsigned int            next_cpu;
 };
 
 /*
  * Special (free-form) trace entry:
  */
 struct special_entry {
+       struct trace_entry      ent;
        unsigned long           arg1;
        unsigned long           arg2;
        unsigned long           arg3;
@@ -57,33 +81,60 @@ struct special_entry {
 #define FTRACE_STACK_ENTRIES   8
 
 struct stack_entry {
+       struct trace_entry      ent;
        unsigned long           caller[FTRACE_STACK_ENTRIES];
 };
 
 /*
- * The trace entry - the most basic unit of tracing. This is what
- * is printed in the end as a single line in the trace output, such as:
- *
- *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
+ * ftrace_printk entry:
  */
-struct trace_entry {
-       char                    type;
-       char                    cpu;
-       char                    flags;
-       char                    preempt_count;
-       int                     pid;
-       cycle_t                 t;
-       union {
-               struct ftrace_entry             fn;
-               struct ctx_switch_entry         ctx;
-               struct special_entry            special;
-               struct stack_entry              stack;
-               struct mmiotrace_rw             mmiorw;
-               struct mmiotrace_map            mmiomap;
-       };
+struct print_entry {
+       struct trace_entry      ent;
+       unsigned long           ip;
+       char                    buf[];
+};
+
+#define TRACE_OLD_SIZE         88
+
+struct trace_field_cont {
+       unsigned char           type;
+       /* Temporary till we get rid of this completely */
+       char                    buf[TRACE_OLD_SIZE - 1];
+};
+
+struct trace_mmiotrace_rw {
+       struct trace_entry      ent;
+       struct mmiotrace_rw     rw;
 };
 
-#define TRACE_ENTRY_SIZE       sizeof(struct trace_entry)
+struct trace_mmiotrace_map {
+       struct trace_entry      ent;
+       struct mmiotrace_map    map;
+};
+
+struct trace_boot {
+       struct trace_entry      ent;
+       struct boot_trace       initcall;
+};
+
+/*
+ * trace_flag_type is an enumeration that holds different
+ * states when a trace occurs. These are:
+ *  IRQS_OFF   - interrupts were disabled
+ *  NEED_RESCED - reschedule is requested
+ *  HARDIRQ    - inside an interrupt handler
+ *  SOFTIRQ    - inside a softirq handler
+ *  CONT       - multiple entries hold the trace item
+ */
+enum trace_flag_type {
+       TRACE_FLAG_IRQS_OFF             = 0x01,
+       TRACE_FLAG_NEED_RESCHED         = 0x02,
+       TRACE_FLAG_HARDIRQ              = 0x04,
+       TRACE_FLAG_SOFTIRQ              = 0x08,
+       TRACE_FLAG_CONT                 = 0x10,
+};
+
+#define TRACE_BUF_SIZE         1024
 
 /*
  * The CPU trace array - it consists of thousands of trace entries
@@ -91,16 +142,9 @@ struct trace_entry {
  * the trace, etc.)
  */
 struct trace_array_cpu {
-       struct list_head        trace_pages;
        atomic_t                disabled;
-       raw_spinlock_t          lock;
-       struct lock_class_key   lock_key;
 
        /* these fields get copied into max-trace: */
-       unsigned                trace_head_idx;
-       unsigned                trace_tail_idx;
-       void                    *trace_head; /* producer */
-       void                    *trace_tail; /* consumer */
        unsigned long           trace_idx;
        unsigned long           overrun;
        unsigned long           saved_latency;
@@ -124,6 +168,7 @@ struct trace_iterator;
  * They have on/off state as well:
  */
 struct trace_array {
+       struct ring_buffer      *buffer;
        unsigned long           entries;
        long                    ctrl;
        int                     cpu;
@@ -132,6 +177,56 @@ struct trace_array {
        struct trace_array_cpu  *data[NR_CPUS];
 };
 
+#define FTRACE_CMP_TYPE(var, type) \
+       __builtin_types_compatible_p(typeof(var), type *)
+
+#undef IF_ASSIGN
+#define IF_ASSIGN(var, entry, etype, id)               \
+       if (FTRACE_CMP_TYPE(var, etype)) {              \
+               var = (typeof(var))(entry);             \
+               WARN_ON(id && (entry)->type != id);     \
+               break;                                  \
+       }
+
+/* Will cause compile errors if type is not found. */
+extern void __ftrace_bad_type(void);
+
+/*
+ * The trace_assign_type is a verifier that the entry type is
+ * the same as the type being assigned. To add new types simply
+ * add a line with the following format:
+ *
+ * IF_ASSIGN(var, ent, type, id);
+ *
+ *  Where "type" is the trace type that includes the trace_entry
+ *  as the "ent" item. And "id" is the trace identifier that is
+ *  used in the trace_type enum.
+ *
+ *  If the type can have more than one id, then use zero.
+ */
+#define trace_assign_type(var, ent)                                    \
+       do {                                                            \
+               IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN);     \
+               IF_ASSIGN(var, ent, struct ctx_switch_entry, 0);        \
+               IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
+               IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);   \
+               IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);   \
+               IF_ASSIGN(var, ent, struct special_entry, 0);           \
+               IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,          \
+                         TRACE_MMIO_RW);                               \
+               IF_ASSIGN(var, ent, struct trace_mmiotrace_map,         \
+                         TRACE_MMIO_MAP);                              \
+               IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT);     \
+               __ftrace_bad_type();                                    \
+       } while (0)
+
+/* Return values for print_line callback */
+enum print_line_t {
+       TRACE_TYPE_PARTIAL_LINE = 0,    /* Retry after flushing the seq */
+       TRACE_TYPE_HANDLED      = 1,
+       TRACE_TYPE_UNHANDLED    = 2     /* Relay to other output functions */
+};
+
 /*
  * A specific tracer, represented by methods that operate on a trace array:
  */
@@ -152,7 +247,7 @@ struct tracer {
        int                     (*selftest)(struct tracer *trace,
                                            struct trace_array *tr);
 #endif
-       int                     (*print_line)(struct trace_iterator *iter);
+       enum print_line_t       (*print_line)(struct trace_iterator *iter);
        struct tracer           *next;
        int                     print_max;
 };
@@ -171,57 +266,58 @@ struct trace_iterator {
        struct trace_array      *tr;
        struct tracer           *trace;
        void                    *private;
-       long                    last_overrun[NR_CPUS];
-       long                    overrun[NR_CPUS];
+       struct ring_buffer_iter *buffer_iter[NR_CPUS];
 
        /* The below is zeroed out in pipe_read */
        struct trace_seq        seq;
        struct trace_entry      *ent;
        int                     cpu;
-
-       struct trace_entry      *prev_ent;
-       int                     prev_cpu;
+       u64                     ts;
 
        unsigned long           iter_flags;
        loff_t                  pos;
-       unsigned long           next_idx[NR_CPUS];
-       struct list_head        *next_page[NR_CPUS];
-       unsigned                next_page_idx[NR_CPUS];
        long                    idx;
 };
 
-void tracing_reset(struct trace_array_cpu *data);
+void trace_wake_up(void);
+void tracing_reset(struct trace_array *tr, int cpu);
 int tracing_open_generic(struct inode *inode, struct file *filp);
 struct dentry *tracing_init_dentry(void);
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 
+struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
+                                               struct trace_array_cpu *data);
+void tracing_generic_entry_update(struct trace_entry *entry,
+                                 unsigned long flags,
+                                 int pc);
+
 void ftrace(struct trace_array *tr,
                            struct trace_array_cpu *data,
                            unsigned long ip,
                            unsigned long parent_ip,
-                           unsigned long flags);
+                           unsigned long flags, int pc);
 void tracing_sched_switch_trace(struct trace_array *tr,
                                struct trace_array_cpu *data,
                                struct task_struct *prev,
                                struct task_struct *next,
-                               unsigned long flags);
+                               unsigned long flags, int pc);
 void tracing_record_cmdline(struct task_struct *tsk);
 
 void tracing_sched_wakeup_trace(struct trace_array *tr,
                                struct trace_array_cpu *data,
                                struct task_struct *wakee,
                                struct task_struct *cur,
-                               unsigned long flags);
+                               unsigned long flags, int pc);
 void trace_special(struct trace_array *tr,
                   struct trace_array_cpu *data,
                   unsigned long arg1,
                   unsigned long arg2,
-                  unsigned long arg3);
+                  unsigned long arg3, int pc);
 void trace_function(struct trace_array *tr,
                    struct trace_array_cpu *data,
                    unsigned long ip,
                    unsigned long parent_ip,
-                   unsigned long flags);
+                   unsigned long flags, int pc);
 
 void tracing_start_cmdline_record(void);
 void tracing_stop_cmdline_record(void);
@@ -268,51 +364,33 @@ extern unsigned long ftrace_update_tot_cnt;
 extern int DYN_FTRACE_TEST_NAME(void);
 #endif
 
-#ifdef CONFIG_MMIOTRACE
-extern void __trace_mmiotrace_rw(struct trace_array *tr,
-                               struct trace_array_cpu *data,
-                               struct mmiotrace_rw *rw);
-extern void __trace_mmiotrace_map(struct trace_array *tr,
-                               struct trace_array_cpu *data,
-                               struct mmiotrace_map *map);
-#endif
-
 #ifdef CONFIG_FTRACE_STARTUP_TEST
-#ifdef CONFIG_FTRACE
 extern int trace_selftest_startup_function(struct tracer *trace,
                                           struct trace_array *tr);
-#endif
-#ifdef CONFIG_IRQSOFF_TRACER
 extern int trace_selftest_startup_irqsoff(struct tracer *trace,
                                          struct trace_array *tr);
-#endif
-#ifdef CONFIG_PREEMPT_TRACER
 extern int trace_selftest_startup_preemptoff(struct tracer *trace,
                                             struct trace_array *tr);
-#endif
-#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
 extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
                                                 struct trace_array *tr);
-#endif
-#ifdef CONFIG_SCHED_TRACER
 extern int trace_selftest_startup_wakeup(struct tracer *trace,
                                         struct trace_array *tr);
-#endif
-#ifdef CONFIG_CONTEXT_SWITCH_TRACER
+extern int trace_selftest_startup_nop(struct tracer *trace,
+                                        struct trace_array *tr);
 extern int trace_selftest_startup_sched_switch(struct tracer *trace,
                                               struct trace_array *tr);
-#endif
-#ifdef CONFIG_SYSPROF_TRACER
 extern int trace_selftest_startup_sysprof(struct tracer *trace,
                                               struct trace_array *tr);
-#endif
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
 extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
+extern void trace_seq_print_cont(struct trace_seq *s,
+                                struct trace_iterator *iter);
 extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
                                 size_t cnt);
 extern long ns2usecs(cycle_t nsec);
+extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
 
 extern unsigned long trace_flags;
 
@@ -334,6 +412,9 @@ enum trace_iterator_flags {
        TRACE_ITER_BLOCK                = 0x80,
        TRACE_ITER_STACKTRACE           = 0x100,
        TRACE_ITER_SCHED_TREE           = 0x200,
+       TRACE_ITER_PRINTK               = 0x400,
 };
 
+extern struct tracer nop_trace;
+
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
new file mode 100644 (file)
index 0000000..d0a5e50
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * ring buffer based initcalls tracer
+ *
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/kallsyms.h>
+
+#include "trace.h"
+
+static struct trace_array *boot_trace;
+static int trace_boot_enabled;
+
+
+/* Should be started after do_pre_smp_initcalls() in init/main.c */
+void start_boot_trace(void)
+{
+       trace_boot_enabled = 1;
+}
+
+void stop_boot_trace(void)
+{
+       trace_boot_enabled = 0;
+}
+
+void reset_boot_trace(struct trace_array *tr)
+{
+       stop_boot_trace();
+}
+
+static void boot_trace_init(struct trace_array *tr)
+{
+       int cpu;
+       boot_trace = tr;
+
+       trace_boot_enabled = 0;
+
+       for_each_cpu_mask(cpu, cpu_possible_map)
+               tracing_reset(tr, cpu);
+}
+
+static void boot_trace_ctrl_update(struct trace_array *tr)
+{
+       if (tr->ctrl)
+               start_boot_trace();
+       else
+               stop_boot_trace();
+}
+
+static enum print_line_t initcall_print_line(struct trace_iterator *iter)
+{
+       int ret;
+       struct trace_entry *entry = iter->ent;
+       struct trace_boot *field = (struct trace_boot *)entry;
+       struct boot_trace *it = &field->initcall;
+       struct trace_seq *s = &iter->seq;
+       struct timespec calltime = ktime_to_timespec(it->calltime);
+       struct timespec rettime = ktime_to_timespec(it->rettime);
+
+       if (entry->type == TRACE_BOOT) {
+               ret = trace_seq_printf(s, "[%5ld.%09ld] calling  %s @ %i\n",
+                                         calltime.tv_sec,
+                                         calltime.tv_nsec,
+                                         it->func, it->caller);
+               if (!ret)
+                       return TRACE_TYPE_PARTIAL_LINE;
+
+               ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
+                                         "returned %d after %lld msecs\n",
+                                         rettime.tv_sec,
+                                         rettime.tv_nsec,
+                                         it->func, it->result, it->duration);
+
+               if (!ret)
+                       return TRACE_TYPE_PARTIAL_LINE;
+               return TRACE_TYPE_HANDLED;
+       }
+       return TRACE_TYPE_UNHANDLED;
+}
+
+struct tracer boot_tracer __read_mostly =
+{
+       .name           = "initcall",
+       .init           = boot_trace_init,
+       .reset          = reset_boot_trace,
+       .ctrl_update    = boot_trace_ctrl_update,
+       .print_line     = initcall_print_line,
+};
+
+void trace_boot(struct boot_trace *it, initcall_t fn)
+{
+       struct ring_buffer_event *event;
+       struct trace_boot *entry;
+       struct trace_array_cpu *data;
+       unsigned long irq_flags;
+       struct trace_array *tr = boot_trace;
+
+       if (!trace_boot_enabled)
+               return;
+
+       /* Get its name now since this function could
+        * disappear because it is in the .init section.
+        */
+       sprint_symbol(it->func, (unsigned long)fn);
+       preempt_disable();
+       data = tr->data[smp_processor_id()];
+
+       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+                                        &irq_flags);
+       if (!event)
+               goto out;
+       entry   = ring_buffer_event_data(event);
+       tracing_generic_entry_update(&entry->ent, 0, 0);
+       entry->ent.type = TRACE_BOOT;
+       entry->initcall = *it;
+       ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+       trace_wake_up();
+
+ out:
+       preempt_enable();
+}
index 312144897970b18c7d9fd596bbf033182f68822b..e90eb0c2c56ca78d917f4e353dd099fd51c29172 100644 (file)
@@ -23,7 +23,7 @@ static void function_reset(struct trace_array *tr)
        tr->time_start = ftrace_now(tr->cpu);
 
        for_each_online_cpu(cpu)
-               tracing_reset(tr->data[cpu]);
+               tracing_reset(tr, cpu);
 }
 
 static void start_function_trace(struct trace_array *tr)
index ece6cfb649fa52823f33988fd6792dad2d0d1fae..a7db7f040ae03bc3b3556b3f77e47f032be62c1d 100644 (file)
@@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
        disabled = atomic_inc_return(&data->disabled);
 
        if (likely(disabled == 1))
-               trace_function(tr, data, ip, parent_ip, flags);
+               trace_function(tr, data, ip, parent_ip, flags, preempt_count());
 
        atomic_dec(&data->disabled);
 }
@@ -130,6 +130,7 @@ check_critical_timing(struct trace_array *tr,
        unsigned long latency, t0, t1;
        cycle_t T0, T1, delta;
        unsigned long flags;
+       int pc;
 
        /*
         * usecs conversion is slow so we try to delay the conversion
@@ -141,6 +142,8 @@ check_critical_timing(struct trace_array *tr,
 
        local_save_flags(flags);
 
+       pc = preempt_count();
+
        if (!report_latency(delta))
                goto out;
 
@@ -150,7 +153,7 @@ check_critical_timing(struct trace_array *tr,
        if (!report_latency(delta))
                goto out_unlock;
 
-       trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
+       trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
 
        latency = nsecs_to_usecs(delta);
 
@@ -173,8 +176,8 @@ out_unlock:
 out:
        data->critical_sequence = max_sequence;
        data->preempt_timestamp = ftrace_now(cpu);
-       tracing_reset(data);
-       trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
+       tracing_reset(tr, cpu);
+       trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
 }
 
 static inline void
@@ -203,11 +206,11 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
        data->critical_sequence = max_sequence;
        data->preempt_timestamp = ftrace_now(cpu);
        data->critical_start = parent_ip ? : ip;
-       tracing_reset(data);
+       tracing_reset(tr, cpu);
 
        local_save_flags(flags);
 
-       trace_function(tr, data, ip, parent_ip, flags);
+       trace_function(tr, data, ip, parent_ip, flags, preempt_count());
 
        per_cpu(tracing_cpu, cpu) = 1;
 
@@ -234,14 +237,14 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
 
        data = tr->data[cpu];
 
-       if (unlikely(!data) || unlikely(!head_page(data)) ||
+       if (unlikely(!data) ||
            !data->critical_start || atomic_read(&data->disabled))
                return;
 
        atomic_inc(&data->disabled);
 
        local_save_flags(flags);
-       trace_function(tr, data, ip, parent_ip, flags);
+       trace_function(tr, data, ip, parent_ip, flags, preempt_count());
        check_critical_timing(tr, data, parent_ip ? : ip, cpu);
        data->critical_start = 0;
        atomic_dec(&data->disabled);
index b13dc19dcbb4691e51bd802238cfbbda15782a73..f28484618ff0de99b0c9d5062f23fe8eec25070a 100644 (file)
@@ -27,7 +27,7 @@ static void mmio_reset_data(struct trace_array *tr)
        tr->time_start = ftrace_now(tr->cpu);
 
        for_each_online_cpu(cpu)
-               tracing_reset(tr->data[cpu]);
+               tracing_reset(tr, cpu);
 }
 
 static void mmio_trace_init(struct trace_array *tr)
@@ -130,10 +130,14 @@ static unsigned long count_overruns(struct trace_iterator *iter)
 {
        int cpu;
        unsigned long cnt = 0;
+/* FIXME: */
+#if 0
        for_each_online_cpu(cpu) {
                cnt += iter->overrun[cpu];
                iter->overrun[cpu] = 0;
        }
+#endif
+       (void)cpu;
        return cnt;
 }
 
@@ -171,17 +175,21 @@ print_out:
        return (ret == -EBUSY) ? 0 : ret;
 }
 
-static int mmio_print_rw(struct trace_iterator *iter)
+static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
 {
        struct trace_entry *entry = iter->ent;
-       struct mmiotrace_rw *rw = &entry->mmiorw;
+       struct trace_mmiotrace_rw *field;
+       struct mmiotrace_rw *rw;
        struct trace_seq *s     = &iter->seq;
-       unsigned long long t    = ns2usecs(entry->t);
+       unsigned long long t    = ns2usecs(iter->ts);
        unsigned long usec_rem  = do_div(t, 1000000ULL);
        unsigned secs           = (unsigned long)t;
        int ret = 1;
 
-       switch (entry->mmiorw.opcode) {
+       trace_assign_type(field, entry);
+       rw = &field->rw;
+
+       switch (rw->opcode) {
        case MMIO_READ:
                ret = trace_seq_printf(s,
                        "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
@@ -209,21 +217,25 @@ static int mmio_print_rw(struct trace_iterator *iter)
                break;
        }
        if (ret)
-               return 1;
-       return 0;
+               return TRACE_TYPE_HANDLED;
+       return TRACE_TYPE_PARTIAL_LINE;
 }
 
-static int mmio_print_map(struct trace_iterator *iter)
+static enum print_line_t mmio_print_map(struct trace_iterator *iter)
 {
        struct trace_entry *entry = iter->ent;
-       struct mmiotrace_map *m = &entry->mmiomap;
+       struct trace_mmiotrace_map *field;
+       struct mmiotrace_map *m;
        struct trace_seq *s     = &iter->seq;
-       unsigned long long t    = ns2usecs(entry->t);
+       unsigned long long t    = ns2usecs(iter->ts);
        unsigned long usec_rem  = do_div(t, 1000000ULL);
        unsigned secs           = (unsigned long)t;
-       int ret = 1;
+       int ret;
 
-       switch (entry->mmiorw.opcode) {
+       trace_assign_type(field, entry);
+       m = &field->map;
+
+       switch (m->opcode) {
        case MMIO_PROBE:
                ret = trace_seq_printf(s,
                        "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
@@ -241,20 +253,43 @@ static int mmio_print_map(struct trace_iterator *iter)
                break;
        }
        if (ret)
-               return 1;
-       return 0;
+               return TRACE_TYPE_HANDLED;
+       return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
+{
+       struct trace_entry *entry = iter->ent;
+       struct print_entry *print = (struct print_entry *)entry;
+       const char *msg         = print->buf;
+       struct trace_seq *s     = &iter->seq;
+       unsigned long long t    = ns2usecs(iter->ts);
+       unsigned long usec_rem  = do_div(t, 1000000ULL);
+       unsigned secs           = (unsigned long)t;
+       int ret;
+
+       /* The trailing newline must be in the message. */
+       ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       if (entry->flags & TRACE_FLAG_CONT)
+               trace_seq_print_cont(s, iter);
+
+       return TRACE_TYPE_HANDLED;
 }
 
-/* return 0 to abort printing without consuming current entry in pipe mode */
-static int mmio_print_line(struct trace_iterator *iter)
+static enum print_line_t mmio_print_line(struct trace_iterator *iter)
 {
        switch (iter->ent->type) {
        case TRACE_MMIO_RW:
                return mmio_print_rw(iter);
        case TRACE_MMIO_MAP:
                return mmio_print_map(iter);
+       case TRACE_PRINT:
+               return mmio_print_mark(iter);
        default:
-               return 1; /* ignore unknown entries */
+               return TRACE_TYPE_HANDLED; /* ignore unknown entries */
        }
 }
 
@@ -276,6 +311,27 @@ __init static int init_mmio_trace(void)
 }
 device_initcall(init_mmio_trace);
 
+static void __trace_mmiotrace_rw(struct trace_array *tr,
+                               struct trace_array_cpu *data,
+                               struct mmiotrace_rw *rw)
+{
+       struct ring_buffer_event *event;
+       struct trace_mmiotrace_rw *entry;
+       unsigned long irq_flags;
+
+       event   = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+                                          &irq_flags);
+       if (!event)
+               return;
+       entry   = ring_buffer_event_data(event);
+       tracing_generic_entry_update(&entry->ent, 0, preempt_count());
+       entry->ent.type                 = TRACE_MMIO_RW;
+       entry->rw                       = *rw;
+       ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+       trace_wake_up();
+}
+
 void mmio_trace_rw(struct mmiotrace_rw *rw)
 {
        struct trace_array *tr = mmio_trace_array;
@@ -283,6 +339,27 @@ void mmio_trace_rw(struct mmiotrace_rw *rw)
        __trace_mmiotrace_rw(tr, data, rw);
 }
 
+static void __trace_mmiotrace_map(struct trace_array *tr,
+                               struct trace_array_cpu *data,
+                               struct mmiotrace_map *map)
+{
+       struct ring_buffer_event *event;
+       struct trace_mmiotrace_map *entry;
+       unsigned long irq_flags;
+
+       event   = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+                                          &irq_flags);
+       if (!event)
+               return;
+       entry   = ring_buffer_event_data(event);
+       tracing_generic_entry_update(&entry->ent, 0, preempt_count());
+       entry->ent.type                 = TRACE_MMIO_MAP;
+       entry->map                      = *map;
+       ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+       trace_wake_up();
+}
+
 void mmio_trace_mapping(struct mmiotrace_map *map)
 {
        struct trace_array *tr = mmio_trace_array;
@@ -293,3 +370,8 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
        __trace_mmiotrace_map(tr, data, map);
        preempt_enable();
 }
+
+int mmio_trace_printk(const char *fmt, va_list args)
+{
+       return trace_vprintk(0, fmt, args);
+}
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
new file mode 100644 (file)
index 0000000..4592b48
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * nop tracer
+ *
+ * Copyright (C) 2008 Steven Noonan <steven@uplinklabs.net>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+
+#include "trace.h"
+
+static struct trace_array      *ctx_trace;
+
+static void start_nop_trace(struct trace_array *tr)
+{
+       /* Nothing to do! */
+}
+
+static void stop_nop_trace(struct trace_array *tr)
+{
+       /* Nothing to do! */
+}
+
+static void nop_trace_init(struct trace_array *tr)
+{
+       int cpu;
+       ctx_trace = tr;
+
+       for_each_online_cpu(cpu)
+               tracing_reset(tr, cpu);
+
+       if (tr->ctrl)
+               start_nop_trace(tr);
+}
+
+static void nop_trace_reset(struct trace_array *tr)
+{
+       if (tr->ctrl)
+               stop_nop_trace(tr);
+}
+
+static void nop_trace_ctrl_update(struct trace_array *tr)
+{
+       /* When starting a new trace, reset the buffers */
+       if (tr->ctrl)
+               start_nop_trace(tr);
+       else
+               stop_nop_trace(tr);
+}
+
+struct tracer nop_trace __read_mostly =
+{
+       .name           = "nop",
+       .init           = nop_trace_init,
+       .reset          = nop_trace_reset,
+       .ctrl_update    = nop_trace_ctrl_update,
+#ifdef CONFIG_FTRACE_SELFTEST
+       .selftest       = trace_selftest_startup_nop,
+#endif
+};
+
index cb817a209aa005d9b151b79e2ef52b48178dffd4..b8f56beb1a621d5ff527a93aee383f0e02fd30dd 100644 (file)
@@ -9,8 +9,8 @@
 #include <linux/debugfs.h>
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
-#include <linux/marker.h>
 #include <linux/ftrace.h>
+#include <trace/sched.h>
 
 #include "trace.h"
 
@@ -19,15 +19,16 @@ static int __read_mostly    tracer_enabled;
 static atomic_t                        sched_ref;
 
 static void
-sched_switch_func(void *private, void *__rq, struct task_struct *prev,
+probe_sched_switch(struct rq *__rq, struct task_struct *prev,
                        struct task_struct *next)
 {
-       struct trace_array **ptr = private;
-       struct trace_array *tr = *ptr;
        struct trace_array_cpu *data;
        unsigned long flags;
-       long disabled;
        int cpu;
+       int pc;
+
+       if (!atomic_read(&sched_ref))
+               return;
 
        tracing_record_cmdline(prev);
        tracing_record_cmdline(next);
@@ -35,97 +36,41 @@ sched_switch_func(void *private, void *__rq, struct task_struct *prev,
        if (!tracer_enabled)
                return;
 
+       pc = preempt_count();
        local_irq_save(flags);
        cpu = raw_smp_processor_id();
-       data = tr->data[cpu];
-       disabled = atomic_inc_return(&data->disabled);
+       data = ctx_trace->data[cpu];
 
-       if (likely(disabled == 1))
-               tracing_sched_switch_trace(tr, data, prev, next, flags);
+       if (likely(!atomic_read(&data->disabled)))
+               tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc);
 
-       atomic_dec(&data->disabled);
        local_irq_restore(flags);
 }
 
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
-                     const char *format, va_list *args)
-{
-       struct task_struct *prev;
-       struct task_struct *next;
-       struct rq *__rq;
-
-       if (!atomic_read(&sched_ref))
-               return;
-
-       /* skip prev_pid %d next_pid %d prev_state %ld */
-       (void)va_arg(*args, int);
-       (void)va_arg(*args, int);
-       (void)va_arg(*args, long);
-       __rq = va_arg(*args, typeof(__rq));
-       prev = va_arg(*args, typeof(prev));
-       next = va_arg(*args, typeof(next));
-
-       /*
-        * If tracer_switch_func only points to the local
-        * switch func, it still needs the ptr passed to it.
-        */
-       sched_switch_func(probe_data, __rq, prev, next);
-}
-
 static void
-wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
-                       task_struct *curr)
+probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
 {
-       struct trace_array **ptr = private;
-       struct trace_array *tr = *ptr;
        struct trace_array_cpu *data;
        unsigned long flags;
-       long disabled;
-       int cpu;
+       int cpu, pc;
 
-       if (!tracer_enabled)
+       if (!likely(tracer_enabled))
                return;
 
-       tracing_record_cmdline(curr);
+       pc = preempt_count();
+       tracing_record_cmdline(current);
 
        local_irq_save(flags);
        cpu = raw_smp_processor_id();
-       data = tr->data[cpu];
-       disabled = atomic_inc_return(&data->disabled);
+       data = ctx_trace->data[cpu];
 
-       if (likely(disabled == 1))
-               tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
+       if (likely(!atomic_read(&data->disabled)))
+               tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
+                                          flags, pc);
 
-       atomic_dec(&data->disabled);
        local_irq_restore(flags);
 }
 
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
-                const char *format, va_list *args)
-{
-       struct task_struct *curr;
-       struct task_struct *task;
-       struct rq *__rq;
-
-       if (likely(!tracer_enabled))
-               return;
-
-       /* Skip pid %d state %ld */
-       (void)va_arg(*args, int);
-       (void)va_arg(*args, long);
-       /* now get the meat: "rq %p task %p rq->curr %p" */
-       __rq = va_arg(*args, typeof(__rq));
-       task = va_arg(*args, typeof(task));
-       curr = va_arg(*args, typeof(curr));
-
-       tracing_record_cmdline(task);
-       tracing_record_cmdline(curr);
-
-       wakeup_func(probe_data, __rq, task, curr);
-}
-
 static void sched_switch_reset(struct trace_array *tr)
 {
        int cpu;
@@ -133,67 +78,47 @@ static void sched_switch_reset(struct trace_array *tr)
        tr->time_start = ftrace_now(tr->cpu);
 
        for_each_online_cpu(cpu)
-               tracing_reset(tr->data[cpu]);
+               tracing_reset(tr, cpu);
 }
 
 static int tracing_sched_register(void)
 {
        int ret;
 
-       ret = marker_probe_register("kernel_sched_wakeup",
-                       "pid %d state %ld ## rq %p task %p rq->curr %p",
-                       wake_up_callback,
-                       &ctx_trace);
+       ret = register_trace_sched_wakeup(probe_sched_wakeup);
        if (ret) {
-               pr_info("wakeup trace: Couldn't add marker"
+               pr_info("wakeup trace: Couldn't activate tracepoint"
                        " probe to kernel_sched_wakeup\n");
                return ret;
        }
 
-       ret = marker_probe_register("kernel_sched_wakeup_new",
-                       "pid %d state %ld ## rq %p task %p rq->curr %p",
-                       wake_up_callback,
-                       &ctx_trace);
+       ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
        if (ret) {
-               pr_info("wakeup trace: Couldn't add marker"
+               pr_info("wakeup trace: Couldn't activate tracepoint"
                        " probe to kernel_sched_wakeup_new\n");
                goto fail_deprobe;
        }
 
-       ret = marker_probe_register("kernel_sched_schedule",
-               "prev_pid %d next_pid %d prev_state %ld "
-               "## rq %p prev %p next %p",
-               sched_switch_callback,
-               &ctx_trace);
+       ret = register_trace_sched_switch(probe_sched_switch);
        if (ret) {
-               pr_info("sched trace: Couldn't add marker"
+               pr_info("sched trace: Couldn't activate tracepoint"
                        " probe to kernel_sched_schedule\n");
                goto fail_deprobe_wake_new;
        }
 
        return ret;
 fail_deprobe_wake_new:
-       marker_probe_unregister("kernel_sched_wakeup_new",
-                               wake_up_callback,
-                               &ctx_trace);
+       unregister_trace_sched_wakeup_new(probe_sched_wakeup);
 fail_deprobe:
-       marker_probe_unregister("kernel_sched_wakeup",
-                               wake_up_callback,
-                               &ctx_trace);
+       unregister_trace_sched_wakeup(probe_sched_wakeup);
        return ret;
 }
 
 static void tracing_sched_unregister(void)
 {
-       marker_probe_unregister("kernel_sched_schedule",
-                               sched_switch_callback,
-                               &ctx_trace);
-       marker_probe_unregister("kernel_sched_wakeup_new",
-                               wake_up_callback,
-                               &ctx_trace);
-       marker_probe_unregister("kernel_sched_wakeup",
-                               wake_up_callback,
-                               &ctx_trace);
+       unregister_trace_sched_switch(probe_sched_switch);
+       unregister_trace_sched_wakeup_new(probe_sched_wakeup);
+       unregister_trace_sched_wakeup(probe_sched_wakeup);
 }
 
 static void tracing_start_sched_switch(void)
index e303ccb62cdfb1284847864821d63a72694ca6af..fe4a252c236384bb1ac5eff3ac441522d90ce0d4 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <linux/marker.h>
+#include <trace/sched.h>
 
 #include "trace.h"
 
@@ -44,10 +44,12 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
        long disabled;
        int resched;
        int cpu;
+       int pc;
 
        if (likely(!wakeup_task))
                return;
 
+       pc = preempt_count();
        resched = need_resched();
        preempt_disable_notrace();
 
@@ -70,7 +72,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
        if (task_cpu(wakeup_task) != cpu)
                goto unlock;
 
-       trace_function(tr, data, ip, parent_ip, flags);
+       trace_function(tr, data, ip, parent_ip, flags, pc);
 
  unlock:
        __raw_spin_unlock(&wakeup_lock);
@@ -112,17 +114,18 @@ static int report_latency(cycle_t delta)
 }
 
 static void notrace
-wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
+probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
        struct task_struct *next)
 {
        unsigned long latency = 0, t0 = 0, t1 = 0;
-       struct trace_array **ptr = private;
-       struct trace_array *tr = *ptr;
        struct trace_array_cpu *data;
        cycle_t T0, T1, delta;
        unsigned long flags;
        long disabled;
        int cpu;
+       int pc;
+
+       tracing_record_cmdline(prev);
 
        if (unlikely(!tracer_enabled))
                return;
@@ -139,12 +142,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
        if (next != wakeup_task)
                return;
 
+       pc = preempt_count();
+
        /* The task we are waiting for is waking up */
-       data = tr->data[wakeup_cpu];
+       data = wakeup_trace->data[wakeup_cpu];
 
        /* disable local data, not wakeup_cpu data */
        cpu = raw_smp_processor_id();
-       disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+       disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
        if (likely(disabled != 1))
                goto out;
 
@@ -155,7 +160,7 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
        if (unlikely(!tracer_enabled || next != wakeup_task))
                goto out_unlock;
 
-       trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
+       trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
 
        /*
         * usecs conversion is slow so we try to delay the conversion
@@ -174,39 +179,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
        t0 = nsecs_to_usecs(T0);
        t1 = nsecs_to_usecs(T1);
 
-       update_max_tr(tr, wakeup_task, wakeup_cpu);
+       update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
 
 out_unlock:
-       __wakeup_reset(tr);
+       __wakeup_reset(wakeup_trace);
        __raw_spin_unlock(&wakeup_lock);
        local_irq_restore(flags);
 out:
-       atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
-                     const char *format, va_list *args)
-{
-       struct task_struct *prev;
-       struct task_struct *next;
-       struct rq *__rq;
-
-       /* skip prev_pid %d next_pid %d prev_state %ld */
-       (void)va_arg(*args, int);
-       (void)va_arg(*args, int);
-       (void)va_arg(*args, long);
-       __rq = va_arg(*args, typeof(__rq));
-       prev = va_arg(*args, typeof(prev));
-       next = va_arg(*args, typeof(next));
-
-       tracing_record_cmdline(prev);
-
-       /*
-        * If tracer_switch_func only points to the local
-        * switch func, it still needs the ptr passed to it.
-        */
-       wakeup_sched_switch(probe_data, __rq, prev, next);
+       atomic_dec(&wakeup_trace->data[cpu]->disabled);
 }
 
 static void __wakeup_reset(struct trace_array *tr)
@@ -216,7 +196,7 @@ static void __wakeup_reset(struct trace_array *tr)
 
        for_each_possible_cpu(cpu) {
                data = tr->data[cpu];
-               tracing_reset(data);
+               tracing_reset(tr, cpu);
        }
 
        wakeup_cpu = -1;
@@ -240,19 +220,26 @@ static void wakeup_reset(struct trace_array *tr)
 }
 
 static void
-wakeup_check_start(struct trace_array *tr, struct task_struct *p,
-                  struct task_struct *curr)
+probe_wakeup(struct rq *rq, struct task_struct *p)
 {
        int cpu = smp_processor_id();
        unsigned long flags;
        long disabled;
+       int pc;
+
+       if (likely(!tracer_enabled))
+               return;
+
+       tracing_record_cmdline(p);
+       tracing_record_cmdline(current);
 
        if (likely(!rt_task(p)) ||
                        p->prio >= wakeup_prio ||
-                       p->prio >= curr->prio)
+                       p->prio >= current->prio)
                return;
 
-       disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+       pc = preempt_count();
+       disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
        if (unlikely(disabled != 1))
                goto out;
 
@@ -264,7 +251,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
                goto out_locked;
 
        /* reset the trace */
-       __wakeup_reset(tr);
+       __wakeup_reset(wakeup_trace);
 
        wakeup_cpu = task_cpu(p);
        wakeup_prio = p->prio;
@@ -274,74 +261,37 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
 
        local_save_flags(flags);
 
-       tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
-       trace_function(tr, tr->data[wakeup_cpu],
-                      CALLER_ADDR1, CALLER_ADDR2, flags);
+       wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
+       trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
+                      CALLER_ADDR1, CALLER_ADDR2, flags, pc);
 
 out_locked:
        __raw_spin_unlock(&wakeup_lock);
 out:
-       atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
-                const char *format, va_list *args)
-{
-       struct trace_array **ptr = probe_data;
-       struct trace_array *tr = *ptr;
-       struct task_struct *curr;
-       struct task_struct *task;
-       struct rq *__rq;
-
-       if (likely(!tracer_enabled))
-               return;
-
-       /* Skip pid %d state %ld */
-       (void)va_arg(*args, int);
-       (void)va_arg(*args, long);
-       /* now get the meat: "rq %p task %p rq->curr %p" */
-       __rq = va_arg(*args, typeof(__rq));
-       task = va_arg(*args, typeof(task));
-       curr = va_arg(*args, typeof(curr));
-
-       tracing_record_cmdline(task);
-       tracing_record_cmdline(curr);
-
-       wakeup_check_start(tr, task, curr);
+       atomic_dec(&wakeup_trace->data[cpu]->disabled);
 }
 
 static void start_wakeup_tracer(struct trace_array *tr)
 {
        int ret;
 
-       ret = marker_probe_register("kernel_sched_wakeup",
-                       "pid %d state %ld ## rq %p task %p rq->curr %p",
-                       wake_up_callback,
-                       &wakeup_trace);
+       ret = register_trace_sched_wakeup(probe_wakeup);
        if (ret) {
-               pr_info("wakeup trace: Couldn't add marker"
+               pr_info("wakeup trace: Couldn't activate tracepoint"
                        " probe to kernel_sched_wakeup\n");
                return;
        }
 
-       ret = marker_probe_register("kernel_sched_wakeup_new",
-                       "pid %d state %ld ## rq %p task %p rq->curr %p",
-                       wake_up_callback,
-                       &wakeup_trace);
+       ret = register_trace_sched_wakeup_new(probe_wakeup);
        if (ret) {
-               pr_info("wakeup trace: Couldn't add marker"
+               pr_info("wakeup trace: Couldn't activate tracepoint"
                        " probe to kernel_sched_wakeup_new\n");
                goto fail_deprobe;
        }
 
-       ret = marker_probe_register("kernel_sched_schedule",
-               "prev_pid %d next_pid %d prev_state %ld "
-               "## rq %p prev %p next %p",
-               sched_switch_callback,
-               &wakeup_trace);
+       ret = register_trace_sched_switch(probe_wakeup_sched_switch);
        if (ret) {
-               pr_info("sched trace: Couldn't add marker"
+               pr_info("sched trace: Couldn't activate tracepoint"
                        " probe to kernel_sched_schedule\n");
                goto fail_deprobe_wake_new;
        }
@@ -363,28 +313,18 @@ static void start_wakeup_tracer(struct trace_array *tr)
 
        return;
 fail_deprobe_wake_new:
-       marker_probe_unregister("kernel_sched_wakeup_new",
-                               wake_up_callback,
-                               &wakeup_trace);
+       unregister_trace_sched_wakeup_new(probe_wakeup);
 fail_deprobe:
-       marker_probe_unregister("kernel_sched_wakeup",
-                               wake_up_callback,
-                               &wakeup_trace);
+       unregister_trace_sched_wakeup(probe_wakeup);
 }
 
 static void stop_wakeup_tracer(struct trace_array *tr)
 {
        tracer_enabled = 0;
        unregister_ftrace_function(&trace_ops);
-       marker_probe_unregister("kernel_sched_schedule",
-                               sched_switch_callback,
-                               &wakeup_trace);
-       marker_probe_unregister("kernel_sched_wakeup_new",
-                               wake_up_callback,
-                               &wakeup_trace);
-       marker_probe_unregister("kernel_sched_wakeup",
-                               wake_up_callback,
-                               &wakeup_trace);
+       unregister_trace_sched_switch(probe_wakeup_sched_switch);
+       unregister_trace_sched_wakeup_new(probe_wakeup);
+       unregister_trace_sched_wakeup(probe_wakeup);
 }
 
 static void wakeup_tracer_init(struct trace_array *tr)
index 0911b7e073bf197b021ba77c75f6777a03910aa7..09cf230d7ecae8ecd25d5f24dee21c4e2fd7b41d 100644 (file)
@@ -9,65 +9,29 @@ static inline int trace_valid_entry(struct trace_entry *entry)
        case TRACE_FN:
        case TRACE_CTX:
        case TRACE_WAKE:
+       case TRACE_CONT:
        case TRACE_STACK:
+       case TRACE_PRINT:
        case TRACE_SPECIAL:
                return 1;
        }
        return 0;
 }
 
-static int
-trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
+static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
 {
-       struct trace_entry *entries;
-       struct page *page;
-       int idx = 0;
-       int i;
+       struct ring_buffer_event *event;
+       struct trace_entry *entry;
 
-       BUG_ON(list_empty(&data->trace_pages));
-       page = list_entry(data->trace_pages.next, struct page, lru);
-       entries = page_address(page);
+       while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
+               entry = ring_buffer_event_data(event);
 
-       check_pages(data);
-       if (head_page(data) != entries)
-               goto failed;
-
-       /*
-        * The starting trace buffer always has valid elements,
-        * if any element exists.
-        */
-       entries = head_page(data);
-
-       for (i = 0; i < tr->entries; i++) {
-
-               if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
+               if (!trace_valid_entry(entry)) {
                        printk(KERN_CONT ".. invalid entry %d ",
-                               entries[idx].type);
+                               entry->type);
                        goto failed;
                }
-
-               idx++;
-               if (idx >= ENTRIES_PER_PAGE) {
-                       page = virt_to_page(entries);
-                       if (page->lru.next == &data->trace_pages) {
-                               if (i != tr->entries - 1) {
-                                       printk(KERN_CONT ".. entries buffer mismatch");
-                                       goto failed;
-                               }
-                       } else {
-                               page = list_entry(page->lru.next, struct page, lru);
-                               entries = page_address(page);
-                       }
-                       idx = 0;
-               }
        }
-
-       page = virt_to_page(entries);
-       if (page->lru.next != &data->trace_pages) {
-               printk(KERN_CONT ".. too many entries");
-               goto failed;
-       }
-
        return 0;
 
  failed:
@@ -89,13 +53,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
        /* Don't allow flipping of max traces now */
        raw_local_irq_save(flags);
        __raw_spin_lock(&ftrace_max_lock);
-       for_each_possible_cpu(cpu) {
-               if (!head_page(tr->data[cpu]))
-                       continue;
 
-               cnt += tr->data[cpu]->trace_idx;
+       cnt = ring_buffer_entries(tr->buffer);
 
-               ret = trace_test_buffer_cpu(tr, tr->data[cpu]);
+       for_each_possible_cpu(cpu) {
+               ret = trace_test_buffer_cpu(tr, cpu);
                if (ret)
                        break;
        }
@@ -120,11 +82,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
                                           struct trace_array *tr,
                                           int (*func)(void))
 {
-       unsigned long count;
-       int ret;
        int save_ftrace_enabled = ftrace_enabled;
        int save_tracer_enabled = tracer_enabled;
+       unsigned long count;
        char *func_name;
+       int ret;
 
        /* The ftrace test PASSED */
        printk(KERN_CONT "PASSED\n");
@@ -157,6 +119,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
        /* enable tracing */
        tr->ctrl = 1;
        trace->init(tr);
+
        /* Sleep for a 1/10 of a second */
        msleep(100);
 
@@ -212,10 +175,10 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 int
 trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
 {
-       unsigned long count;
-       int ret;
        int save_ftrace_enabled = ftrace_enabled;
        int save_tracer_enabled = tracer_enabled;
+       unsigned long count;
+       int ret;
 
        /* make sure msleep has been recorded */
        msleep(1);
@@ -415,6 +378,15 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
 }
 #endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
 
+#ifdef CONFIG_NOP_TRACER
+int
+trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
+{
+       /* What could possibly go wrong? */
+       return 0;
+}
+#endif
+
 #ifdef CONFIG_SCHED_TRACER
 static int trace_wakeup_test_thread(void *data)
 {
@@ -486,6 +458,9 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
 
        wake_up_process(p);
 
+       /* give a little time to let the thread wake up */
+       msleep(100);
+
        /* stop the tracing. */
        tr->ctrl = 0;
        trace->ctrl_update(tr);
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
new file mode 100644 (file)
index 0000000..74c5d9a
--- /dev/null
@@ -0,0 +1,310 @@
+/*
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/stacktrace.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include "trace.h"
+
+#define STACK_TRACE_ENTRIES 500
+
+static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
+        { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
+static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
+
+static struct stack_trace max_stack_trace = {
+       .max_entries            = STACK_TRACE_ENTRIES,
+       .entries                = stack_dump_trace,
+};
+
+static unsigned long max_stack_size;
+static raw_spinlock_t max_stack_lock =
+       (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+static int stack_trace_disabled __read_mostly;
+static DEFINE_PER_CPU(int, trace_active);
+
+static inline void check_stack(void)
+{
+       unsigned long this_size, flags;
+       unsigned long *p, *top, *start;
+       int i;
+
+       this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
+       this_size = THREAD_SIZE - this_size;
+
+       if (this_size <= max_stack_size)
+               return;
+
+       raw_local_irq_save(flags);
+       __raw_spin_lock(&max_stack_lock);
+
+       /* a race could have already updated it */
+       if (this_size <= max_stack_size)
+               goto out;
+
+       max_stack_size = this_size;
+
+       max_stack_trace.nr_entries      = 0;
+       max_stack_trace.skip            = 3;
+
+       save_stack_trace(&max_stack_trace);
+
+       /*
+        * Now find where in the stack these are.
+        */
+       i = 0;
+       start = &this_size;
+       top = (unsigned long *)
+               (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
+
+       /*
+        * Loop through all the entries. One of the entries may
+        * for some reason be missed on the stack, so we may
+        * have to account for them. If they are all there, this
+        * loop will only happen once. This code only takes place
+        * on a new max, so it is far from a fast path.
+        */
+       while (i < max_stack_trace.nr_entries) {
+
+               stack_dump_index[i] = this_size;
+               p = start;
+
+               for (; p < top && i < max_stack_trace.nr_entries; p++) {
+                       if (*p == stack_dump_trace[i]) {
+                               this_size = stack_dump_index[i++] =
+                                       (top - p) * sizeof(unsigned long);
+                               /* Start the search from here */
+                               start = p + 1;
+                       }
+               }
+
+               i++;
+       }
+
+ out:
+       __raw_spin_unlock(&max_stack_lock);
+       raw_local_irq_restore(flags);
+}
+
+static void
+stack_trace_call(unsigned long ip, unsigned long parent_ip)
+{
+       int cpu, resched;
+
+       if (unlikely(!ftrace_enabled || stack_trace_disabled))
+               return;
+
+       resched = need_resched();
+       preempt_disable_notrace();
+
+       cpu = raw_smp_processor_id();
+       /* no atomic needed, we only modify this variable by this cpu */
+       if (per_cpu(trace_active, cpu)++ != 0)
+               goto out;
+
+       check_stack();
+
+ out:
+       per_cpu(trace_active, cpu)--;
+       /* prevent recursion in schedule */
+       if (resched)
+               preempt_enable_no_resched_notrace();
+       else
+               preempt_enable_notrace();
+}
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+       .func = stack_trace_call,
+};
+
+static ssize_t
+stack_max_size_read(struct file *filp, char __user *ubuf,
+                   size_t count, loff_t *ppos)
+{
+       unsigned long *ptr = filp->private_data;
+       char buf[64];
+       int r;
+
+       r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
+       if (r > sizeof(buf))
+               r = sizeof(buf);
+       return simple_read_from_buffer(ubuf, count, ppos, buf, r);
+}
+
+static ssize_t
+stack_max_size_write(struct file *filp, const char __user *ubuf,
+                    size_t count, loff_t *ppos)
+{
+       long *ptr = filp->private_data;
+       unsigned long val, flags;
+       char buf[64];
+       int ret;
+
+       if (count >= sizeof(buf))
+               return -EINVAL;
+
+       if (copy_from_user(&buf, ubuf, count))
+               return -EFAULT;
+
+       buf[count] = 0;
+
+       ret = strict_strtoul(buf, 10, &val);
+       if (ret < 0)
+               return ret;
+
+       raw_local_irq_save(flags);
+       __raw_spin_lock(&max_stack_lock);
+       *ptr = val;
+       __raw_spin_unlock(&max_stack_lock);
+       raw_local_irq_restore(flags);
+
+       return count;
+}
+
+static struct file_operations stack_max_size_fops = {
+       .open           = tracing_open_generic,
+       .read           = stack_max_size_read,
+       .write          = stack_max_size_write,
+};
+
+static void *
+t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       long i = (long)m->private;
+
+       (*pos)++;
+
+       i++;
+
+       if (i >= max_stack_trace.nr_entries ||
+           stack_dump_trace[i] == ULONG_MAX)
+               return NULL;
+
+       m->private = (void *)i;
+
+       return &m->private;
+}
+
+static void *t_start(struct seq_file *m, loff_t *pos)
+{
+       void *t = &m->private;
+       loff_t l = 0;
+
+       local_irq_disable();
+       __raw_spin_lock(&max_stack_lock);
+
+       for (; t && l < *pos; t = t_next(m, t, &l))
+               ;
+
+       return t;
+}
+
+static void t_stop(struct seq_file *m, void *p)
+{
+       __raw_spin_unlock(&max_stack_lock);
+       local_irq_enable();
+}
+
+static int trace_lookup_stack(struct seq_file *m, long i)
+{
+       unsigned long addr = stack_dump_trace[i];
+#ifdef CONFIG_KALLSYMS
+       char str[KSYM_SYMBOL_LEN];
+
+       sprint_symbol(str, addr);
+
+       return seq_printf(m, "%s\n", str);
+#else
+       return seq_printf(m, "%p\n", (void*)addr);
+#endif
+}
+
+static int t_show(struct seq_file *m, void *v)
+{
+       long i = *(long *)v;
+       int size;
+
+       if (i < 0) {
+               seq_printf(m, "        Depth   Size      Location"
+                          "    (%d entries)\n"
+                          "        -----   ----      --------\n",
+                          max_stack_trace.nr_entries);
+               return 0;
+       }
+
+       if (i >= max_stack_trace.nr_entries ||
+           stack_dump_trace[i] == ULONG_MAX)
+               return 0;
+
+       if (i+1 == max_stack_trace.nr_entries ||
+           stack_dump_trace[i+1] == ULONG_MAX)
+               size = stack_dump_index[i];
+       else
+               size = stack_dump_index[i] - stack_dump_index[i+1];
+
+       seq_printf(m, "%3ld) %8d   %5d   ", i, stack_dump_index[i], size);
+
+       trace_lookup_stack(m, i);
+
+       return 0;
+}
+
+static struct seq_operations stack_trace_seq_ops = {
+       .start          = t_start,
+       .next           = t_next,
+       .stop           = t_stop,
+       .show           = t_show,
+};
+
+static int stack_trace_open(struct inode *inode, struct file *file)
+{
+       int ret;
+
+       ret = seq_open(file, &stack_trace_seq_ops);
+       if (!ret) {
+               struct seq_file *m = file->private_data;
+               m->private = (void *)-1;
+       }
+
+       return ret;
+}
+
+static struct file_operations stack_trace_fops = {
+       .open           = stack_trace_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+};
+
+static __init int stack_trace_init(void)
+{
+       struct dentry *d_tracer;
+       struct dentry *entry;
+
+       d_tracer = tracing_init_dentry();
+
+       entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
+                                   &max_stack_size, &stack_max_size_fops);
+       if (!entry)
+               pr_warning("Could not create debugfs 'stack_max_size' entry\n");
+
+       entry = debugfs_create_file("stack_trace", 0444, d_tracer,
+                                   NULL, &stack_trace_fops);
+       if (!entry)
+               pr_warning("Could not create debugfs 'stack_trace' entry\n");
+
+       register_ftrace_function(&trace_ops);
+
+       return 0;
+}
+
+device_initcall(stack_trace_init);
index db58fb66a135bbbbc71b16f949bcbf47300fbb4c..9587d3bcba556761de49854c95676a03a6dcecbf 100644 (file)
@@ -241,7 +241,7 @@ static void stack_reset(struct trace_array *tr)
        tr->time_start = ftrace_now(tr->cpu);
 
        for_each_online_cpu(cpu)
-               tracing_reset(tr->data[cpu]);
+               tracing_reset(tr, cpu);
 }
 
 static void start_stack_trace(struct trace_array *tr)
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
new file mode 100644 (file)
index 0000000..f2b7c28
--- /dev/null
@@ -0,0 +1,477 @@
+/*
+ * Copyright (C) 2008 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/tracepoint.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+extern struct tracepoint __start___tracepoints[];
+extern struct tracepoint __stop___tracepoints[];
+
+/* Set to 1 to enable tracepoint debug output */
+static const int tracepoint_debug;
+
+/*
+ * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
+ * builtin and module tracepoints and the hash table.
+ */
+static DEFINE_MUTEX(tracepoints_mutex);
+
+/*
+ * Tracepoint hash table, containing the active tracepoints.
+ * Protected by tracepoints_mutex.
+ */
+#define TRACEPOINT_HASH_BITS 6
+#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
+
+/*
+ * Note about RCU :
+ * It is used to to delay the free of multiple probes array until a quiescent
+ * state is reached.
+ * Tracepoint entries modifications are protected by the tracepoints_mutex.
+ */
+struct tracepoint_entry {
+       struct hlist_node hlist;
+       void **funcs;
+       int refcount;   /* Number of times armed. 0 if disarmed. */
+       struct rcu_head rcu;
+       void *oldptr;
+       unsigned char rcu_pending:1;
+       char name[0];
+};
+
+static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
+
+static void free_old_closure(struct rcu_head *head)
+{
+       struct tracepoint_entry *entry = container_of(head,
+               struct tracepoint_entry, rcu);
+       kfree(entry->oldptr);
+       /* Make sure we free the data before setting the pending flag to 0 */
+       smp_wmb();
+       entry->rcu_pending = 0;
+}
+
+static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old)
+{
+       if (!old)
+               return;
+       entry->oldptr = old;
+       entry->rcu_pending = 1;
+       /* write rcu_pending before calling the RCU callback */
+       smp_wmb();
+       call_rcu_sched(&entry->rcu, free_old_closure);
+}
+
+static void debug_print_probes(struct tracepoint_entry *entry)
+{
+       int i;
+
+       if (!tracepoint_debug)
+               return;
+
+       for (i = 0; entry->funcs[i]; i++)
+               printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]);
+}
+
+static void *
+tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
+{
+       int nr_probes = 0;
+       void **old, **new;
+
+       WARN_ON(!probe);
+
+       debug_print_probes(entry);
+       old = entry->funcs;
+       if (old) {
+               /* (N -> N+1), (N != 0, 1) probes */
+               for (nr_probes = 0; old[nr_probes]; nr_probes++)
+                       if (old[nr_probes] == probe)
+                               return ERR_PTR(-EEXIST);
+       }
+       /* + 2 : one for new probe, one for NULL func */
+       new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL);
+       if (new == NULL)
+               return ERR_PTR(-ENOMEM);
+       if (old)
+               memcpy(new, old, nr_probes * sizeof(void *));
+       new[nr_probes] = probe;
+       entry->refcount = nr_probes + 1;
+       entry->funcs = new;
+       debug_print_probes(entry);
+       return old;
+}
+
+static void *
+tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
+{
+       int nr_probes = 0, nr_del = 0, i;
+       void **old, **new;
+
+       old = entry->funcs;
+
+       debug_print_probes(entry);
+       /* (N -> M), (N > 1, M >= 0) probes */
+       for (nr_probes = 0; old[nr_probes]; nr_probes++) {
+               if ((!probe || old[nr_probes] == probe))
+                       nr_del++;
+       }
+
+       if (nr_probes - nr_del == 0) {
+               /* N -> 0, (N > 1) */
+               entry->funcs = NULL;
+               entry->refcount = 0;
+               debug_print_probes(entry);
+               return old;
+       } else {
+               int j = 0;
+               /* N -> M, (N > 1, M > 0) */
+               /* + 1 for NULL */
+               new = kzalloc((nr_probes - nr_del + 1)
+                       * sizeof(void *), GFP_KERNEL);
+               if (new == NULL)
+                       return ERR_PTR(-ENOMEM);
+               for (i = 0; old[i]; i++)
+                       if ((probe && old[i] != probe))
+                               new[j++] = old[i];
+               entry->refcount = nr_probes - nr_del;
+               entry->funcs = new;
+       }
+       debug_print_probes(entry);
+       return old;
+}
+
+/*
+ * Get tracepoint if the tracepoint is present in the tracepoint hash table.
+ * Must be called with tracepoints_mutex held.
+ * Returns NULL if not present.
+ */
+static struct tracepoint_entry *get_tracepoint(const char *name)
+{
+       struct hlist_head *head;
+       struct hlist_node *node;
+       struct tracepoint_entry *e;
+       u32 hash = jhash(name, strlen(name), 0);
+
+       head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
+       hlist_for_each_entry(e, node, head, hlist) {
+               if (!strcmp(name, e->name))
+                       return e;
+       }
+       return NULL;
+}
+
+/*
+ * Add the tracepoint to the tracepoint hash table. Must be called with
+ * tracepoints_mutex held.
+ */
+static struct tracepoint_entry *add_tracepoint(const char *name)
+{
+       struct hlist_head *head;
+       struct hlist_node *node;
+       struct tracepoint_entry *e;
+       size_t name_len = strlen(name) + 1;
+       u32 hash = jhash(name, name_len-1, 0);
+
+       head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
+       hlist_for_each_entry(e, node, head, hlist) {
+               if (!strcmp(name, e->name)) {
+                       printk(KERN_NOTICE
+                               "tracepoint %s busy\n", name);
+                       return ERR_PTR(-EEXIST);        /* Already there */
+               }
+       }
+       /*
+        * Using kmalloc here to allocate a variable length element. Could
+        * cause some memory fragmentation if overused.
+        */
+       e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
+       if (!e)
+               return ERR_PTR(-ENOMEM);
+       memcpy(&e->name[0], name, name_len);
+       e->funcs = NULL;
+       e->refcount = 0;
+       e->rcu_pending = 0;
+       hlist_add_head(&e->hlist, head);
+       return e;
+}
+
+/*
+ * Remove the tracepoint from the tracepoint hash table. Must be called with
+ * mutex_lock held.
+ */
+static int remove_tracepoint(const char *name)
+{
+       struct hlist_head *head;
+       struct hlist_node *node;
+       struct tracepoint_entry *e;
+       int found = 0;
+       size_t len = strlen(name) + 1;
+       u32 hash = jhash(name, len-1, 0);
+
+       head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
+       hlist_for_each_entry(e, node, head, hlist) {
+               if (!strcmp(name, e->name)) {
+                       found = 1;
+                       break;
+               }
+       }
+       if (!found)
+               return -ENOENT;
+       if (e->refcount)
+               return -EBUSY;
+       hlist_del(&e->hlist);
+       /* Make sure the call_rcu_sched has been executed */
+       if (e->rcu_pending)
+               rcu_barrier_sched();
+       kfree(e);
+       return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one tracepoint.
+ */
+static void set_tracepoint(struct tracepoint_entry **entry,
+       struct tracepoint *elem, int active)
+{
+       WARN_ON(strcmp((*entry)->name, elem->name) != 0);
+
+       /*
+        * rcu_assign_pointer has a smp_wmb() which makes sure that the new
+        * probe callbacks array is consistent before setting a pointer to it.
+        * This array is referenced by __DO_TRACE from
+        * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
+        * is used.
+        */
+       rcu_assign_pointer(elem->funcs, (*entry)->funcs);
+       elem->state = active;
+}
+
+/*
+ * Disable a tracepoint and its probe callback.
+ * Note: only waiting an RCU period after setting elem->call to the empty
+ * function insures that the original callback is not used anymore. This insured
+ * by preempt_disable around the call site.
+ */
+static void disable_tracepoint(struct tracepoint *elem)
+{
+       elem->state = 0;
+}
+
+/**
+ * tracepoint_update_probe_range - Update a probe range
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Updates the probe callback corresponding to a range of tracepoints.
+ */
+void tracepoint_update_probe_range(struct tracepoint *begin,
+       struct tracepoint *end)
+{
+       struct tracepoint *iter;
+       struct tracepoint_entry *mark_entry;
+
+       mutex_lock(&tracepoints_mutex);
+       for (iter = begin; iter < end; iter++) {
+               mark_entry = get_tracepoint(iter->name);
+               if (mark_entry) {
+                       set_tracepoint(&mark_entry, iter,
+                                       !!mark_entry->refcount);
+               } else {
+                       disable_tracepoint(iter);
+               }
+       }
+       mutex_unlock(&tracepoints_mutex);
+}
+
+/*
+ * Update probes, removing the faulty probes.
+ */
+static void tracepoint_update_probes(void)
+{
+       /* Core kernel tracepoints */
+       tracepoint_update_probe_range(__start___tracepoints,
+               __stop___tracepoints);
+       /* tracepoints in modules. */
+       module_update_tracepoints();
+}
+
+/**
+ * tracepoint_probe_register -  Connect a probe to a tracepoint
+ * @name: tracepoint name
+ * @probe: probe handler
+ *
+ * Returns 0 if ok, error value on error.
+ * The probe address must at least be aligned on the architecture pointer size.
+ */
+int tracepoint_probe_register(const char *name, void *probe)
+{
+       struct tracepoint_entry *entry;
+       int ret = 0;
+       void *old;
+
+       mutex_lock(&tracepoints_mutex);
+       entry = get_tracepoint(name);
+       if (!entry) {
+               entry = add_tracepoint(name);
+               if (IS_ERR(entry)) {
+                       ret = PTR_ERR(entry);
+                       goto end;
+               }
+       }
+       /*
+        * If we detect that a call_rcu_sched is pending for this tracepoint,
+        * make sure it's executed now.
+        */
+       if (entry->rcu_pending)
+               rcu_barrier_sched();
+       old = tracepoint_entry_add_probe(entry, probe);
+       if (IS_ERR(old)) {
+               ret = PTR_ERR(old);
+               goto end;
+       }
+       mutex_unlock(&tracepoints_mutex);
+       tracepoint_update_probes();             /* may update entry */
+       mutex_lock(&tracepoints_mutex);
+       entry = get_tracepoint(name);
+       WARN_ON(!entry);
+       if (entry->rcu_pending)
+               rcu_barrier_sched();
+       tracepoint_entry_free_old(entry, old);
+end:
+       mutex_unlock(&tracepoints_mutex);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_register);
+
+/**
+ * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
+ * @name: tracepoint name
+ * @probe: probe function pointer
+ *
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
+ */
+int tracepoint_probe_unregister(const char *name, void *probe)
+{
+       struct tracepoint_entry *entry;
+       void *old;
+       int ret = -ENOENT;
+
+       mutex_lock(&tracepoints_mutex);
+       entry = get_tracepoint(name);
+       if (!entry)
+               goto end;
+       if (entry->rcu_pending)
+               rcu_barrier_sched();
+       old = tracepoint_entry_remove_probe(entry, probe);
+       mutex_unlock(&tracepoints_mutex);
+       tracepoint_update_probes();             /* may update entry */
+       mutex_lock(&tracepoints_mutex);
+       entry = get_tracepoint(name);
+       if (!entry)
+               goto end;
+       if (entry->rcu_pending)
+               rcu_barrier_sched();
+       tracepoint_entry_free_old(entry, old);
+       remove_tracepoint(name);        /* Ignore busy error message */
+       ret = 0;
+end:
+       mutex_unlock(&tracepoints_mutex);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+
+/**
+ * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
+ * @tracepoint: current tracepoints (in), next tracepoint (out)
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Returns whether a next tracepoint has been found (1) or not (0).
+ * Will return the first tracepoint in the range if the input tracepoint is
+ * NULL.
+ */
+int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+       struct tracepoint *begin, struct tracepoint *end)
+{
+       if (!*tracepoint && begin != end) {
+               *tracepoint = begin;
+               return 1;
+       }
+       if (*tracepoint >= begin && *tracepoint < end)
+               return 1;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
+
+static void tracepoint_get_iter(struct tracepoint_iter *iter)
+{
+       int found = 0;
+
+       /* Core kernel tracepoints */
+       if (!iter->module) {
+               found = tracepoint_get_iter_range(&iter->tracepoint,
+                               __start___tracepoints, __stop___tracepoints);
+               if (found)
+                       goto end;
+       }
+       /* tracepoints in modules. */
+       found = module_get_iter_tracepoints(iter);
+end:
+       if (!found)
+               tracepoint_iter_reset(iter);
+}
+
+void tracepoint_iter_start(struct tracepoint_iter *iter)
+{
+       tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_start);
+
+void tracepoint_iter_next(struct tracepoint_iter *iter)
+{
+       iter->tracepoint++;
+       /*
+        * iter->tracepoint may be invalid because we blindly incremented it.
+        * Make sure it is valid by marshalling on the tracepoints, getting the
+        * tracepoints from following modules if necessary.
+        */
+       tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_next);
+
+void tracepoint_iter_stop(struct tracepoint_iter *iter)
+{
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
+
+void tracepoint_iter_reset(struct tracepoint_iter *iter)
+{
+       iter->module = NULL;
+       iter->tracepoint = NULL;
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
index 3a6c4a6583256584303c4ac7c8813938abc49ed0..164951c473058a25c081d5e47260d872068cdbb7 100644 (file)
@@ -64,8 +64,6 @@
 
 #include "internal.h"
 
-#include "internal.h"
-
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 /* use the per-pgdat data instead for discontigmem - mbligh */
 unsigned long max_mapnr;
index 8d7a27a6335c4c6076b38fc2ab990a82081ada5d..3e67d575ee6e64821c43ea2b7d181a3d36d9805d 100644 (file)
@@ -95,6 +95,7 @@ put_dentry:
 put_memory:
        return ERR_PTR(error);
 }
+EXPORT_SYMBOL_GPL(shmem_file_setup);
 
 /**
  * shmem_zero_setup - setup a shared anonymous mapping
index 712ae47af0bf5f23a4b32ea0497dd2f6e8272085..65ae576030da559356c30199cecb2fb6fbe19b0e 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/interrupt.h>
 #include <linux/seq_file.h>
 #include <linux/debugobjects.h>
-#include <linux/vmalloc.h>
 #include <linux/kallsyms.h>
 #include <linux/list.h>
 #include <linux/rbtree.h>
@@ -175,6 +174,21 @@ static int vmap_page_range(unsigned long addr, unsigned long end,
        return nr;
 }
 
+static inline int is_vmalloc_or_module_addr(const void *x)
+{
+       /*
+        * x86-64 and sparc64 put modules in a special place,
+        * and fall back on vmalloc() if that fails. Others
+        * just put it in the vmalloc space.
+        */
+#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
+       unsigned long addr = (unsigned long)x;
+       if (addr >= MODULES_VADDR && addr < MODULES_END)
+               return 1;
+#endif
+       return is_vmalloc_addr(x);
+}
+
 /*
  * Walk a vmap address to the struct page it maps.
  */
@@ -188,8 +202,7 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
         * XXX we might need to change this if we add VIRTUAL_BUG_ON for
         * architectures that do not vmalloc module space
         */
-       VIRTUAL_BUG_ON(!is_vmalloc_addr(vmalloc_addr) &&
-                       !is_module_address(addr));
+       VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
 
        if (!pgd_none(*pgd)) {
                pud_t *pud = pud_offset(pgd, addr);
index e1fb471cc50182e5e907bef3caa566f8ede7e6d4..4b02f5a0e6560f4c886d413409d5507ffe6b0356 100644 (file)
@@ -13,6 +13,12 @@ config SAMPLE_MARKERS
        help
          This build markers example modules.
 
+config SAMPLE_TRACEPOINTS
+       tristate "Build tracepoints examples -- loadable modules only"
+       depends on TRACEPOINTS && m
+       help
+         This build tracepoints example modules.
+
 config SAMPLE_KOBJECT
        tristate "Build kobject examples"
        help
index 2e02575f779441bf1babe686f50f0b7fb1d3e03e..10eaca89fe17913875f90cf0354a9478978db018 100644 (file)
@@ -1,3 +1,3 @@
 # Makefile for Linux samples code
 
-obj-$(CONFIG_SAMPLES)  += markers/ kobject/ kprobes/
+obj-$(CONFIG_SAMPLES)  += markers/ kobject/ kprobes/ tracepoints/
index c8e099d4d1fdd7d5cec4a7148d259ee6281878cc..2dfb3b32937e03e7621c30212bdc12880f4c7467 100644 (file)
@@ -81,6 +81,7 @@ static void __exit probe_fini(void)
                        probe_array[i].probe_func, &probe_array[i]);
        printk(KERN_INFO "Number of event b : %u\n",
                        atomic_read(&eventb_count));
+       marker_synchronize_unregister();
 }
 
 module_init(probe_init);
diff --git a/samples/tracepoints/Makefile b/samples/tracepoints/Makefile
new file mode 100644 (file)
index 0000000..36479ad
--- /dev/null
@@ -0,0 +1,6 @@
+# builds the tracepoint example kernel modules;
+# then to use one (as root):  insmod <module_name.ko>
+
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-sample.o
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample.o
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample2.o
diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h
new file mode 100644 (file)
index 0000000..0216b55
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef _TP_SAMPLES_TRACE_H
+#define _TP_SAMPLES_TRACE_H
+
+#include <linux/proc_fs.h>     /* for struct inode and struct file */
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(subsys_event,
+       TPPROTO(struct inode *inode, struct file *file),
+       TPARGS(inode, file));
+DEFINE_TRACE(subsys_eventb,
+       TPPROTO(void),
+       TPARGS());
+#endif
diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c
new file mode 100644 (file)
index 0000000..55abfdd
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * tracepoint-probe-sample.c
+ *
+ * sample tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/dcache.h>
+#include "tp-samples-trace.h"
+
+/*
+ * Here the caller only guarantees locking for struct file and struct inode.
+ * Locking must therefore be done in the probe to use the dentry.
+ */
+static void probe_subsys_event(struct inode *inode, struct file *file)
+{
+       path_get(&file->f_path);
+       dget(file->f_path.dentry);
+       printk(KERN_INFO "Event is encountered with filename %s\n",
+               file->f_path.dentry->d_name.name);
+       dput(file->f_path.dentry);
+       path_put(&file->f_path);
+}
+
+static void probe_subsys_eventb(void)
+{
+       printk(KERN_INFO "Event B is encountered\n");
+}
+
+int __init tp_sample_trace_init(void)
+{
+       int ret;
+
+       ret = register_trace_subsys_event(probe_subsys_event);
+       WARN_ON(ret);
+       ret = register_trace_subsys_eventb(probe_subsys_eventb);
+       WARN_ON(ret);
+
+       return 0;
+}
+
+module_init(tp_sample_trace_init);
+
+void __exit tp_sample_trace_exit(void)
+{
+       unregister_trace_subsys_eventb(probe_subsys_eventb);
+       unregister_trace_subsys_event(probe_subsys_event);
+}
+
+module_exit(tp_sample_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c
new file mode 100644 (file)
index 0000000..5e9fcf4
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * tracepoint-probe-sample2.c
+ *
+ * 2nd sample tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include "tp-samples-trace.h"
+
+/*
+ * Here the caller only guarantees locking for struct file and struct inode.
+ * Locking must therefore be done in the probe to use the dentry.
+ */
+static void probe_subsys_event(struct inode *inode, struct file *file)
+{
+       printk(KERN_INFO "Event is encountered with inode number %lu\n",
+               inode->i_ino);
+}
+
+int __init tp_sample_trace_init(void)
+{
+       int ret;
+
+       ret = register_trace_subsys_event(probe_subsys_event);
+       WARN_ON(ret);
+
+       return 0;
+}
+
+module_init(tp_sample_trace_init);
+
+void __exit tp_sample_trace_exit(void)
+{
+       unregister_trace_subsys_event(probe_subsys_event);
+}
+
+module_exit(tp_sample_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
new file mode 100644 (file)
index 0000000..4ae4b7f
--- /dev/null
@@ -0,0 +1,53 @@
+/* tracepoint-sample.c
+ *
+ * Executes a tracepoint when /proc/tracepoint-example is opened.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include "tp-samples-trace.h"
+
+struct proc_dir_entry *pentry_example;
+
+static int my_open(struct inode *inode, struct file *file)
+{
+       int i;
+
+       trace_subsys_event(inode, file);
+       for (i = 0; i < 10; i++)
+               trace_subsys_eventb();
+       return -EPERM;
+}
+
+static struct file_operations mark_ops = {
+       .open = my_open,
+};
+
+static int example_init(void)
+{
+       printk(KERN_ALERT "example init\n");
+       pentry_example = proc_create("tracepoint-example", 0444, NULL,
+               &mark_ops);
+       if (!pentry_example)
+               return -EPERM;
+       return 0;
+}
+
+static void example_exit(void)
+{
+       printk(KERN_ALERT "example exit\n");
+       remove_proc_entry("tracepoint-example", NULL);
+}
+
+module_init(example_init)
+module_exit(example_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint example");
index 277cfe0b71001e69bc67d3eda6407250d07d7ab8..5ed4cbf1e0e1f412346431a870f1300a2d77a50b 100644 (file)
@@ -198,10 +198,17 @@ cmd_modversions =                                                 \
        fi;
 endif
 
+ifdef CONFIG_FTRACE_MCOUNT_RECORD
+cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl \
+       "$(ARCH)" "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" \
+       "$(MV)" "$(@)";
+endif
+
 define rule_cc_o_c
        $(call echo-cmd,checksrc) $(cmd_checksrc)                         \
        $(call echo-cmd,cc_o_c) $(cmd_cc_o_c);                            \
        $(cmd_modversions)                                                \
+       $(cmd_record_mcount)                                              \
        scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' >    \
                                                      $(dot-target).tmp;  \
        rm -f $(depfile);                                                 \
index 2243353fe55dc00058cc9b96613a7a092e26226b..5e7316e5aa395a4c5da260c1d2890336b4c70039 100644 (file)
 #      dmesg | perl scripts/bootgraph.pl > output.svg
 #
 
-my @rows;
-my %start, %end, %row;
+my %start, %end;
 my $done = 0;
-my $rowcount = 0;
 my $maxtime = 0;
 my $firsttime = 100;
 my $count = 0;
+my %pids;
+
 while (<>) {
        my $line = $_;
        if ($line =~ /([0-9\.]+)\] calling  ([a-zA-Z0-9\_]+)\+/) {
@@ -54,14 +54,8 @@ while (<>) {
                                $firsttime = $1;
                        }
                }
-               $row{$func} = 1;
                if ($line =~ /\@ ([0-9]+)/) {
-                       my $pid = $1;
-                       if (!defined($rows[$pid])) {
-                               $rowcount = $rowcount + 1;
-                               $rows[$pid] = $rowcount;
-                       }
-                       $row{$func} = $rows[$pid];
+                       $pids{$func} = $1;
                }
                $count = $count + 1;
        }
@@ -109,17 +103,25 @@ $styles[11] = "fill:rgb(128,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(
 my $mult = 950.0 / ($maxtime - $firsttime);
 my $threshold = ($maxtime - $firsttime) / 60.0;
 my $stylecounter = 0;
+my %rows;
+my $rowscount = 1;
 while (($key,$value) = each %start) {
        my $duration = $end{$key} - $start{$key};
 
        if ($duration >= $threshold) {
                my $s, $s2, $e, $y;
+               $pid = $pids{$key};
+
+               if (!defined($rows{$pid})) {
+                       $rows{$pid} = $rowscount;
+                       $rowscount = $rowscount + 1;
+               }
                $s = ($value - $firsttime) * $mult;
                $s2 = $s + 6;
                $e = ($end{$key} - $firsttime) * $mult;
                $w = $e - $s;
 
-               $y = $row{$key} * 150;
+               $y = $rows{$pid} * 150;
                $y2 = $y + 4;
 
                $style = $styles[$stylecounter];
index e30bac141b21ee1894f62135b8727efa276fe38e..f88bb3e21cda9c9a29470004f9e964907a1a17bb 100755 (executable)
@@ -1,5 +1,5 @@
 #!/usr/bin/perl -w
-# (c) 2001, Dave Jones. <davej@codemonkey.org.uk> (the file handling bit)
+# (c) 2001, Dave Jones. <davej@redhat.com> (the file handling bit)
 # (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit)
 # (c) 2007, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite, etc)
 # Licensed under the terms of the GNU GPL License version 2
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
new file mode 100755 (executable)
index 0000000..f56d760
--- /dev/null
@@ -0,0 +1,395 @@
+#!/usr/bin/perl -w
+# (c) 2008, Steven Rostedt <srostedt@redhat.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# recordmcount.pl - makes a section called __mcount_loc that holds
+#                   all the offsets to the calls to mcount.
+#
+#
+# What we want to end up with is a section in vmlinux called
+# __mcount_loc that contains a list of pointers to all the
+# call sites in the kernel that call mcount. Later on boot up, the kernel
+# will read this list, save the locations and turn them into nops.
+# When tracing or profiling is later enabled, these locations will then
+# be converted back to pointers to some function.
+#
+# This is no easy feat. This script is called just after the original
+# object is compiled and before it is linked.
+#
+# The references to the call sites are offsets from the section of text
+# that the call site is in. Hence, all functions in a section that
+# has a call site to mcount, will have the offset from the beginning of
+# the section and not the beginning of the function.
+#
+# The trick is to find a way to record the beginning of the section.
+# The way we do this is to look at the first function in the section
+# which will also be the location of that section after final link.
+# e.g.
+#
+#  .section ".text.sched"
+#  .globl my_func
+#  my_func:
+#        [...]
+#        call mcount  (offset: 0x5)
+#        [...]
+#        ret
+#  other_func:
+#        [...]
+#        call mcount (offset: 0x1b)
+#        [...]
+#
+# Both relocation offsets for the mcounts in the above example will be
+# offset from .text.sched. If we make another file called tmp.s with:
+#
+#  .section __mcount_loc
+#  .quad  my_func + 0x5
+#  .quad  my_func + 0x1b
+#
+# We can then compile this tmp.s into tmp.o, and link it to the original
+# object.
+#
+# But this gets hard if my_func is not globl (a static function).
+# In such a case we have:
+#
+#  .section ".text.sched"
+#  my_func:
+#        [...]
+#        call mcount  (offset: 0x5)
+#        [...]
+#        ret
+#  .globl my_func
+#  other_func:
+#        [...]
+#        call mcount (offset: 0x1b)
+#        [...]
+#
+# If we make the tmp.s the same as above, when we link together with
+# the original object, we will end up with two symbols for my_func:
+# one local, one global.  After final compile, we will end up with
+# an undefined reference to my_func.
+#
+# Since local objects can reference local variables, we need to find
+# a way to make tmp.o reference the local objects of the original object
+# file after it is linked together. To do this, we convert the my_func
+# into a global symbol before linking tmp.o. Then after we link tmp.o
+# we will only have a single symbol for my_func that is global.
+# We can convert my_func back into a local symbol and we are done.
+#
+# Here are the steps we take:
+#
+# 1) Record all the local symbols by using 'nm'
+# 2) Use objdump to find all the call site offsets and sections for
+#    mcount.
+# 3) Compile the list into its own object.
+# 4) Do we have to deal with local functions? If not, go to step 8.
+# 5) Make an object that converts these local functions to global symbols
+#    with objcopy.
+# 6) Link together this new object with the list object.
+# 7) Convert the local functions back to local symbols and rename
+#    the result as the original object.
+#    End.
+# 8) Link the object with the list object.
+# 9) Move the result back to the original object.
+#    End.
+#
+
+use strict;
+
+my $P = $0;
+$P =~ s@.*/@@g;
+
+my $V = '0.1';
+
+if ($#ARGV < 6) {
+       print "usage: $P arch objdump objcopy cc ld nm rm mv inputfile\n";
+       print "version: $V\n";
+       exit(1);
+}
+
+my ($arch, $objdump, $objcopy, $cc, $ld, $nm, $rm, $mv, $inputfile) = @ARGV;
+
+$objdump = "objdump" if ((length $objdump) == 0);
+$objcopy = "objcopy" if ((length $objcopy) == 0);
+$cc = "gcc" if ((length $cc) == 0);
+$ld = "ld" if ((length $ld) == 0);
+$nm = "nm" if ((length $nm) == 0);
+$rm = "rm" if ((length $rm) == 0);
+$mv = "mv" if ((length $mv) == 0);
+
+#print STDERR "running: $P '$arch' '$objdump' '$objcopy' '$cc' '$ld' " .
+#    "'$nm' '$rm' '$mv' '$inputfile'\n";
+
+my %locals;            # List of local (static) functions
+my %weak;              # List of weak functions
+my %convert;           # List of local functions used that needs conversion
+
+my $type;
+my $section_regex;     # Find the start of a section
+my $function_regex;    # Find the name of a function
+                       #    (return offset and func name)
+my $mcount_regex;      # Find the call site to mcount (return offset)
+
+if ($arch eq "x86_64") {
+    $section_regex = "Disassembly of section";
+    $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
+    $type = ".quad";
+
+    # force flags for this arch
+    $ld .= " -m elf_x86_64";
+    $objdump .= " -M x86-64";
+    $objcopy .= " -O elf64-x86-64";
+    $cc .= " -m64";
+
+} elsif ($arch eq "i386") {
+    $section_regex = "Disassembly of section";
+    $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
+    $type = ".long";
+
+    # force flags for this arch
+    $ld .= " -m elf_i386";
+    $objdump .= " -M i386";
+    $objcopy .= " -O elf32-i386";
+    $cc .= " -m32";
+
+} else {
+    die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
+}
+
+my $text_found = 0;
+my $read_function = 0;
+my $opened = 0;
+my $mcount_section = "__mcount_loc";
+
+my $dirname;
+my $filename;
+my $prefix;
+my $ext;
+
+if ($inputfile =~ m,^(.*)/([^/]*)$,) {
+    $dirname = $1;
+    $filename = $2;
+} else {
+    $dirname = ".";
+    $filename = $inputfile;
+}
+
+if ($filename =~ m,^(.*)(\.\S),) {
+    $prefix = $1;
+    $ext = $2;
+} else {
+    $prefix = $filename;
+    $ext = "";
+}
+
+my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s";
+my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o";
+
+#
+# --globalize-symbols came out in 2.17, we must test the version
+# of objcopy, and if it is less than 2.17, then we can not
+# record local functions.
+my $use_locals = 01;
+my $local_warn_once = 0;
+my $found_version = 0;
+
+open (IN, "$objcopy --version |") || die "error running $objcopy";
+while (<IN>) {
+    if (/objcopy.*\s(\d+)\.(\d+)/) {
+       my $major = $1;
+       my $minor = $2;
+
+       $found_version = 1;
+       if ($major < 2 ||
+           ($major == 2 && $minor < 17)) {
+           $use_locals = 0;
+       }
+       last;
+    }
+}
+close (IN);
+
+if (!$found_version) {
+    print STDERR "WARNING: could not find objcopy version.\n" .
+       "\tDisabling local function references.\n";
+}
+
+
+#
+# Step 1: find all the local (static functions) and weak symbols.
+#        't' is local, 'w/W' is weak (we never use a weak function)
+#
+open (IN, "$nm $inputfile|") || die "error running $nm";
+while (<IN>) {
+    if (/^[0-9a-fA-F]+\s+t\s+(\S+)/) {
+       $locals{$1} = 1;
+    } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) {
+       $weak{$2} = $1;
+    }
+}
+close(IN);
+
+my @offsets;           # Array of offsets of mcount callers
+my $ref_func;          # reference function to use for offsets
+my $offset = 0;                # offset of ref_func to section beginning
+
+##
+# update_funcs - print out the current mcount callers
+#
+#  Go through the list of offsets to callers and write them to
+#  the output file in a format that can be read by an assembler.
+#
+sub update_funcs
+{
+    return if ($#offsets < 0);
+
+    defined($ref_func) || die "No function to reference";
+
+    # A section only had a weak function, to represent it.
+    # Unfortunately, a weak function may be overwritten by another
+    # function of the same name, making all these offsets incorrect.
+    # To be safe, we simply print a warning and bail.
+    if (defined $weak{$ref_func}) {
+       print STDERR
+           "$inputfile: WARNING: referencing weak function" .
+           " $ref_func for mcount\n";
+       return;
+    }
+
+    # is this function static? If so, note this fact.
+    if (defined $locals{$ref_func}) {
+
+       # only use locals if objcopy supports globalize-symbols
+       if (!$use_locals) {
+           return;
+       }
+       $convert{$ref_func} = 1;
+    }
+
+    # Loop through all the mcount caller offsets and print a reference
+    # to the caller based from the ref_func.
+    for (my $i=0; $i <= $#offsets; $i++) {
+       if (!$opened) {
+           open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
+           $opened = 1;
+           print FILE "\t.section $mcount_section,\"a\",\@progbits\n";
+       }
+       printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset;
+    }
+}
+
+#
+# Step 2: find the sections and mcount call sites
+#
+open(IN, "$objdump -dr $inputfile|") || die "error running $objdump";
+
+my $text;
+
+while (<IN>) {
+    # is it a section?
+    if (/$section_regex/) {
+       $read_function = 1;
+       # print out any recorded offsets
+       update_funcs() if ($text_found);
+
+       # reset all markers and arrays
+       $text_found = 0;
+       undef($ref_func);
+       undef(@offsets);
+
+    # section found, now is this a start of a function?
+    } elsif ($read_function && /$function_regex/) {
+       $text_found = 1;
+       $offset = hex $1;
+       $text = $2;
+
+       # if this is either a local function or a weak function
+       # keep looking for functions that are global that
+       # we can use safely.
+       if (!defined($locals{$text}) && !defined($weak{$text})) {
+           $ref_func = $text;
+           $read_function = 0;
+       } else {
+           # if we already have a function, and this is weak, skip it
+           if (!defined($ref_func) || !defined($weak{$text})) {
+               $ref_func = $text;
+           }
+       }
+    }
+
+    # is this a call site to mcount? If so, record it to print later
+    if ($text_found && /$mcount_regex/) {
+       $offsets[$#offsets + 1] = hex $1;
+    }
+}
+
+# dump out anymore offsets that may have been found
+update_funcs() if ($text_found);
+
+# If we did not find any mcount callers, we are done (do nothing).
+if (!$opened) {
+    exit(0);
+}
+
+close(FILE);
+
+#
+# Step 3: Compile the file that holds the list of call sites to mcount.
+#
+`$cc -o $mcount_o -c $mcount_s`;
+
+my @converts = keys %convert;
+
+#
+# Step 4: Do we have sections that started with local functions?
+#
+if ($#converts >= 0) {
+    my $globallist = "";
+    my $locallist = "";
+
+    foreach my $con (@converts) {
+       $globallist .= " --globalize-symbol $con";
+       $locallist .= " --localize-symbol $con";
+    }
+
+    my $globalobj = $dirname . "/.tmp_gl_" . $filename;
+    my $globalmix = $dirname . "/.tmp_mx_" . $filename;
+
+    #
+    # Step 5: set up each local function as a global
+    #
+    `$objcopy $globallist $inputfile $globalobj`;
+
+    #
+    # Step 6: Link the global version to our list.
+    #
+    `$ld -r $globalobj $mcount_o -o $globalmix`;
+
+    #
+    # Step 7: Convert the local functions back into local symbols
+    #
+    `$objcopy $locallist $globalmix $inputfile`;
+
+    # Remove the temp files
+    `$rm $globalobj $globalmix`;
+
+} else {
+
+    my $mix = $dirname . "/.tmp_mx_" . $filename;
+
+    #
+    # Step 8: Link the object with our list of call sites object.
+    #
+    `$ld -r $inputfile $mcount_o -o $mix`;
+
+    #
+    # Step 9: Move the result back to the original object.
+    #
+    `$mv $mix $inputfile`;
+}
+
+# Clean up the temp files
+`$rm $mcount_o $mcount_s`;
+
+exit(0);
index 576e511990794eacfad7ad525722c4fa077ae852..3e3fde7c1d2bf2a48af6d19d72e47f3adc95514b 100644 (file)
@@ -75,6 +75,7 @@
 #include <linux/string.h>
 #include <linux/selinux.h>
 #include <linux/mutex.h>
+#include <linux/posix-timers.h>
 
 #include "avc.h"
 #include "objsec.h"
@@ -2322,13 +2323,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm)
                        initrlim = init_task.signal->rlim+i;
                        rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur);
                }
-               if (current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
-                       /*
-                        * This will cause RLIMIT_CPU calculations
-                        * to be refigured.
-                        */
-                       current->it_prof_expires = jiffies_to_cputime(1);
-               }
+               update_rlimit_cpu(rlim->rlim_cur);
        }
 
        /* Wake up the parent if it is waiting so that it can