]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branches 'x86/urgent', 'x86/amd-iommu', 'x86/apic', 'x86/cleanups', 'x86/core...
authorIngo Molnar <mingo@elte.hu>
Mon, 21 Jul 2008 14:37:17 +0000 (16:37 +0200)
committerIngo Molnar <mingo@elte.hu>
Mon, 21 Jul 2008 14:37:17 +0000 (16:37 +0200)
26 files changed:
1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20 
Documentation/kernel-parameters.txt
arch/x86/Kconfig.debug
arch/x86/ia32/ia32entry.S
arch/x86/kernel/amd_iommu.c
arch/x86/kernel/amd_iommu_init.c
arch/x86/kernel/apic_32.c
arch/x86/kernel/apic_64.c
arch/x86/kernel/cpu/common_64.c
arch/x86/kernel/early-quirks.c
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/nmi.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/pci-dma.c
arch/x86/kernel/pci-gart_64.c
arch/x86/kernel/process.c
arch/x86/kernel/setup.c
arch/x86/kernel/signal_32.c
arch/x86/kernel/smpboot.c
arch/x86/mm/init_32.c
arch/x86/mm/pat.c
arch/x86/pci/pci.h
arch/x86/xen/enlighten.c
drivers/pci/intel-iommu.c
include/asm-x86/paravirt.h
include/asm-x86/setup.h

index 09ad7450647bc81dff32a3eaf7ea3c0858f4a896,09ad7450647bc81dff32a3eaf7ea3c0858f4a896,09ad7450647bc81dff32a3eaf7ea3c0858f4a896,09ad7450647bc81dff32a3eaf7ea3c0858f4a896,09ad7450647bc81dff32a3eaf7ea3c0858f4a896,09ad7450647bc81dff32a3eaf7ea3c0858f4a896,b3a5aad7e6291e0607d061e3563e2528b2bc79c1,795c487af8e4dbbcdb0cedf5d048e7036bbb5ee0,312fe77764a48cba9fb04e000fc2dffeba7fa978,70624ddd1df35f97e359fb63f783696c7c26b7ee,e07c432c731ff9a516fe8b23228af0fe6fa1d29a,06fbb3aa288cc538559a112a769151ee1e3c26da,b3a5aad7e6291e0607d061e3563e2528b2bc79c1,312fe77764a48cba9fb04e000fc2dffeba7fa978,09ad7450647bc81dff32a3eaf7ea3c0858f4a896,09ad7450647bc81dff32a3eaf7ea3c0858f4a896,795c487af8e4dbbcdb0cedf5d048e7036bbb5ee0,09ad7450647bc81dff32a3eaf7ea3c0858f4a896,09ad7450647bc81dff32a3eaf7ea3c0858f4a896,09ad7450647bc81dff32a3eaf7ea3c0858f4a896..25e88cf5d84ececd222ed309a95115fc2c6d3c16
@@@@@@@@@@@@@@@@@@@@@ -147,14 -147,14 -147,14 -147,14 -147,14 -147,14 -147,10 -147,10 -147,10 -147,14 -147,10 -147,14 -147,10 -147,10 -147,14 -147,14 -147,10 -147,14 -147,14 -147,14 +147,14 @@@@@@@@@@@@@@@@@@@@@ and is between 256 and 4096 characters
                                        default: 0
                    
                        acpi_sleep=     [HW,ACPI] Sleep options
      --- - --  -                       Format: { s3_bios, s3_mode, s3_beep }
      +++ + ++  +                       Format: { s3_bios, s3_mode, s3_beep, old_ordering }
                                        See Documentation/power/video.txt for s3_bios and s3_mode.
                                        s3_beep is for debugging; it makes the PC's speaker beep
                                        as soon as the kernel's real-mode entry point is called.
      +++ + ++  +                       old_ordering causes the ACPI 1.0 ordering of the _PTS
      +++ + ++  +                       control method, wrt putting devices into low power
      +++ + ++  +                       states, to be enforced (the ACPI 2.0 ordering of _PTS is
      +++ + ++  +                       used by default).
                    
                        acpi_sci=       [HW,ACPI] ACPI System Control Interrupt trigger mode
                                        Format: { level | edge | high | low }
                        aic79xx=        [HW,SCSI]
                                        See Documentation/scsi/aic79xx.txt.
                    
          +             amd_iommu=      [HW,X86-84]
          +                             Pass parameters to the AMD IOMMU driver in the system.
          +                             Possible values are:
          +                             isolate - enable device isolation (each device, as far
          +                                       as possible, will get its own protection
          +                                       domain)
          +             amd_iommu_size= [HW,X86-64]
          +                             Define the size of the aperture for the AMD IOMMU
          +                             driver. Possible values are:
          +                             '32M', '64M' (default), '128M', '256M', '512M', '1G'
          +         
                        amijoy.map=     [HW,JOY] Amiga joystick support
                                        Map of devices attached to JOY0DAT and JOY1DAT
                                        Format: <a>,<b>
                                        when initialising the APIC and IO-APIC components.
                    
                        apm=            [APM] Advanced Power Management
          -                             See header of arch/i386/kernel/apm.c.
          +                             See header of arch/x86/kernel/apm_32.c.
                    
                        arcrimi=        [HW,NET] ARCnet - "RIM I" (entirely mem-mapped) cards
                                        Format: <io>,<irq>,<nodeID>
                    
                        debug_objects   [KNL] Enable object debugging
                    
      ++  + +   +       debugpat        [X86] Enable PAT debugging
      ++  + +   +   
                        decnet.addr=    [HW,NET]
                                        Format: <area>[,<node>]
                                        See also Documentation/networking/decnet.txt.
                                        See drivers/char/README.epca and
                                        Documentation/digiepca.txt.
                    
          +             disable_mtrr_cleanup [X86]
          +             enable_mtrr_cleanup [X86]
          +                             The kernel tries to adjust MTRR layout from continuous
          +                             to discrete, to make X server driver able to add WB
          +                             entry later. This parameter enables/disables that.
          +         
          +             mtrr_chunk_size=nn[KMG] [X86]
          +                             used for mtrr cleanup. It is largest continous chunk
          +                             that could hold holes aka. UC entries.
          +         
          +             mtrr_gran_size=nn[KMG] [X86]
          +                             Used for mtrr cleanup. It is granularity of mtrr block.
          +                             Default is 1.
          +                             Large value could prevent small alignment from
          +                             using up MTRRs.
          +         
          +             mtrr_spare_reg_nr=n [X86]
          +                             Format: <integer>
          +                             Range: 0,7 : spare reg number
          +                             Default : 1
          +                             Used for mtrr cleanup. It is spare mtrr entries number.
          +                             Set to 2 or more if your graphical card needs more.
          +         
                        disable_mtrr_trim [X86, Intel and AMD only]
                                        By default the kernel will trim any uncacheable
                                        memory out of your available memory pool based on
                    
                        elanfreq=       [X86-32]
                                        See comment before function elanfreq_setup() in
          -                             arch/i386/kernel/cpu/cpufreq/elanfreq.c.
          +                             arch/x86/kernel/cpu/cpufreq/elanfreq.c.
                    
                        elevator=       [IOSCHED]
                                        Format: {"anticipatory" | "cfq" | "deadline" | "noop"}
                        hd=             [EIDE] (E)IDE hard drive subsystem geometry
                                        Format: <cyl>,<head>,<sect>
                    
      --  - -   -       hd?=            [HW] (E)IDE subsystem
      --  - -   -       hd?lun=         See Documentation/ide/ide.txt.
      --  - -   -   
                        highmem=nn[KMG] [KNL,BOOT] forces the highmem zone to have an exact
                                        size of <nn>. This works even on boxes that have no
                                        highmem otherwise. This also works to reduce highmem
                                        See Documentation/ide/ide.txt.
                    
                        idle=           [X86]
      --- - --  -                       Format: idle=poll or idle=mwait
      +++ + ++  +                       Format: idle=poll or idle=mwait, idle=halt, idle=nomwait
                                        Poll forces a polling idle loop that can slightly improves the performance
                                        of waking up a idle CPU, but will use a lot of power and make the system
                                        run hot. Not recommended.
                                        to not use it because it doesn't save as much power as a normal idle
                                        loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same
                                        as idle=poll.
      +++ + ++  +                       idle=halt. Halt is forced to be used for CPU idle.
      +++ + ++  +                       In such case C2/C3 won't be used again.
      +++ + ++  +                       idle=nomwait. Disable mwait for CPU C-states
                    
                        ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
                                        Claim all unknown PCI IDE storage controllers.
                                                 or
                                                 memmap=0x10000$0x18690000
                    
--------- ----------    memtest=        [KNL,X86_64] Enable memtest
+++++++++ ++++++++++    memtest=        [KNL,X86] Enable memtest
                                        Format: <integer>
                                        range: 0,4 : pattern number
                                        default : 0 <disable>
                        mtdparts=       [MTD]
                                        See drivers/mtd/cmdlinepart.c.
                    
       +  +     +       mtdset=         [ARM]
       +  +     +                       ARM/S3C2412 JIVE boot control
       +  +     +   
       +  +     +                       See arch/arm/mach-s3c2412/mach-jive.c
       +  +     +   
                        mtouchusb.raw_coordinates=
                                        [HW] Make the MicroTouch USB driver use raw coordinates
                                        ('y', default) or cooked coordinates ('n')
                                                Use with caution as certain devices share
                                                address decoders between ROMs and other
                                                resources.
      +++ + ++  +               norom           [X86-32,X86_64] Do not assign address space to
      +++ + ++  +                               expansion ROMs that do not already have
      +++ + ++  +                               BIOS assigned address ranges.
                                irqmask=0xMMMM  [X86-32] Set a bit mask of IRQs allowed to be
                                                assigned automatically to PCI devices. You can
                                                make the kernel exclude IRQs of your ISA cards
                                        Format: { parport<nr> | timid | 0 }
                                        See also Documentation/parport.txt.
                    
      ++  + +   +       pmtmr=          [X86] Manual setup of pmtmr I/O Port. 
      ++  + +   +                       Override pmtimer IOPort with a hex value.
      ++  + +   +                       e.g. pmtmr=0x508
      ++  + +   +   
                        pnpacpi=        [ACPI]
                                        { off }
                    
                                        Format: <reboot_mode>[,<reboot_mode2>[,...]]
                                        See arch/*/kernel/reboot.c or arch/*/kernel/process.c                   
                    
          +             relax_domain_level=
          +                             [KNL, SMP] Set scheduler's default relax_domain_level.
          +                             See Documentation/cpusets.txt.
          +         
                        reserve=        [KNL,BUGS] Force the kernel to ignore some iomem area
                    
                        reservetop=     [X86-32]
                                        Note that genuine overcurrent events won't be
                                        reported either.
                    
+++++++++++ ++++++++    unknown_nmi_panic
+++++++++++ ++++++++                    [X86-32,X86-64]
+++++++++++ ++++++++                    Set unknown_nmi_panic=1 early on boot.
+++++++++++ ++++++++
                        usbcore.autosuspend=
                                        [USB] The autosuspend time delay (in seconds) used
                                        for newly-detected USB devices (default 2).  This
                        usbhid.mousepoll=
                                        [USBHID] The interval which mice are to be polled at.
                    
          +             add_efi_memmap  [EFI; x86-32,X86-64] Include EFI memory map in
          +                             kernel's map of available physical RAM.
          +         
                        vdso=           [X86-32,SH,x86-64]
                                        vdso=2: enable compat VDSO (default with COMPAT_VDSO)
                                        vdso=1: enable VDSO (default)
diff --combined arch/x86/Kconfig.debug
index 51c8214779513e3c1e1df6b56cda8907d7a9a636,ae36bfa814e5b389d706b9cc3fa1d879f16abe8b,ae36bfa814e5b389d706b9cc3fa1d879f16abe8b,ae36bfa814e5b389d706b9cc3fa1d879f16abe8b,ffd5913b35d16f57307cd9f9f50a19c82c06d5cc,ae36bfa814e5b389d706b9cc3fa1d879f16abe8b,ae36bfa814e5b389d706b9cc3fa1d879f16abe8b,acc0271920f2e3d80be6a50bb882066727e297e5,ae36bfa814e5b389d706b9cc3fa1d879f16abe8b,ae36bfa814e5b389d706b9cc3fa1d879f16abe8b,18363374d51a9a57b39b6fb8d3f87a054b4b4aa5,ae36bfa814e5b389d706b9cc3fa1d879f16abe8b,ae36bfa814e5b389d706b9cc3fa1d879f16abe8b,ae36bfa814e5b389d706b9cc3fa1d879f16abe8b,ae36bfa814e5b389d706b9cc3fa1d879f16abe8b,ae36bfa814e5b389d706b9cc3fa1d879f16abe8b,acc0271920f2e3d80be6a50bb882066727e297e5,ae36bfa814e5b389d706b9cc3fa1d879f16abe8b,ae36bfa814e5b389d706b9cc3fa1d879f16abe8b,ae36bfa814e5b389d706b9cc3fa1d879f16abe8b..85a87d2ac0c09940c9d0d6654345375417e3d378
@@@@@@@@@@@@@@@@@@@@@ -5,15 -5,13 -5,13 -5,13 -5,13 -5,13 -5,13 -5,13 -5,13 -5,13 -5,13 -5,13 -5,13 -5,13 -5,13 -5,13 -5,13 -5,13 -5,13 -5,13 +5,15 @@@@@@@@@@@@@@@@@@@@@ config TRACE_IRQFLAGS_SUPPOR
                    
                    source "lib/Kconfig.debug"
                    
 -------------------config NONPROMISC_DEVMEM
 +++++++++++++++++++config STRICT_DEVMEM
                        bool "Filter access to /dev/mem"
                        help
 -------------------      If this option is left off, you allow userspace access to all
 +++++++++++++++++++      If this option is left on, you allow userspace (root) access to all
                          of memory, including kernel and userspace memory. Accidental
                          access to this is obviously disastrous, but specific access can
 -------------------      be used by people debugging the kernel.
 +++++++++++++++++++      be used by people debugging the kernel. Note that with PAT support
 +++++++++++++++++++      enabled, even in this case there are restrictions on /dev/mem
 +++++++++++++++++++      use due to the cache aliasing requirements.
                    
                          If this option is switched on, the /dev/mem file only allows
                          userspace access to PCI space and the BIOS code and data regions.
                    
                          If in doubt, say Y.
                    
          +         config X86_VERBOSE_BOOTUP
          +             bool "Enable verbose x86 bootup info messages"
          +             default y
          +             help
          +               Enables the informational output from the decompression stage
          +               (e.g. bzImage) of the boot. If you disable this you will still
          +               see errors. Disable this if you want silent bootup.
          +         
                    config EARLY_PRINTK
                        bool "Early printk" if EMBEDDED
                        default y
@@@@@@@@@@@@@@@@@@@@@ -70,7 -68,7 -68,7 -68,7 -68,7 -68,7 -68,7 -68,7 -68,7 -68,7 -60,7 -68,7 -68,7 -68,7 -68,7 -68,7 -68,7 -68,7 -68,7 -68,7 +70,7 @@@@@@@@@@@@@@@@@@@@@ config DEBUG_PAGEALLO
                    config DEBUG_PER_CPU_MAPS
                        bool "Debug access to per_cpu maps"
                        depends on DEBUG_KERNEL
          -             depends on X86_64_SMP
          +             depends on X86_SMP
                        default n
                        help
                          Say Y to verify that the per_cpu map being accessed has
                          on the VM subsystem for higher order allocations. This option
                          will also use IRQ stacks to compensate for the reduced stackspace.
                    
          -         config X86_FIND_SMP_CONFIG
          -             def_bool y
          -             depends on X86_LOCAL_APIC || X86_VOYAGER
          -             depends on X86_32
          -         
          -         config X86_MPPARSE
          -             def_bool y
          -             depends on (X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64
          -         
                    config DOUBLEFAULT
                        default y
                        bool "Enable doublefault exception handler" if EMBEDDED
                          Add a simple leak tracer to the IOMMU code. This is useful when you
                          are debugging a buggy device driver that leaks IOMMU mappings.
                    
       +  +     +   config MMIOTRACE_HOOKS
       +  +     +       bool
       +  +     +   
       +  +     +   config MMIOTRACE
       +  +     +       bool "Memory mapped IO tracing"
       +  +     +       depends on DEBUG_KERNEL && PCI
       +  +     +       select TRACING
       +  +     +       select MMIOTRACE_HOOKS
       +  +     +       help
       +  +     +         Mmiotrace traces Memory Mapped I/O access and is meant for
       +  +     +         debugging and reverse engineering. It is called from the ioremap
       +  +     +         implementation and works via page faults. Tracing is disabled by
       +  +     +         default and can be enabled at run-time.
       +  +     +   
       +  +     +         See Documentation/tracers/mmiotrace.txt.
       +  +     +         If you are not helping to develop drivers, say N.
       +  +     +   
       +  +     +   config MMIOTRACE_TEST
       +  +     +       tristate "Test module for mmiotrace"
       +  +     +       depends on MMIOTRACE && m
       +  +     +       help
       +  +     +         This is a dumb module for testing mmiotrace. It is very dangerous
       +  +     +         as it will write garbage to IO memory starting at a given address.
       +  +     +         However, it should be safe to use on e.g. unused portion of VRAM.
       +  +     +   
       +  +     +         Say N, unless you absolutely know what you are doing.
       +  +     +   
                    #
                    # IO delay types:
                    #
                    
                    config OPTIMIZE_INLINING
                        bool "Allow gcc to uninline functions marked 'inline'"
---- ---------------    depends on BROKEN
                        help
                          This option determines if the kernel forces gcc to inline the functions
                          developers have marked 'inline'. Doing so takes away freedom from gcc to
                          become the default in the future, until then this option is there to
                          test gcc for this.
                    
++++ +++++++++++++++      If unsure, say N.
++++ +++++++++++++++
                    endmenu
                    
index 20371d0635e44975850ea37b5a8a03a2f52f0d58,20371d0635e44975850ea37b5a8a03a2f52f0d58,20371d0635e44975850ea37b5a8a03a2f52f0d58,20371d0635e44975850ea37b5a8a03a2f52f0d58,20371d0635e44975850ea37b5a8a03a2f52f0d58,20371d0635e44975850ea37b5a8a03a2f52f0d58,20371d0635e44975850ea37b5a8a03a2f52f0d58,20371d0635e44975850ea37b5a8a03a2f52f0d58,20371d0635e44975850ea37b5a8a03a2f52f0d58,20371d0635e44975850ea37b5a8a03a2f52f0d58,b5e329da166cfa2a48a74339a47539739ec939ac,20371d0635e44975850ea37b5a8a03a2f52f0d58,20371d0635e44975850ea37b5a8a03a2f52f0d58,20371d0635e44975850ea37b5a8a03a2f52f0d58,20371d0635e44975850ea37b5a8a03a2f52f0d58,8796d1905255116eef6740a73be8dc33a95886c8,20371d0635e44975850ea37b5a8a03a2f52f0d58,20371d0635e44975850ea37b5a8a03a2f52f0d58,20371d0635e44975850ea37b5a8a03a2f52f0d58,0ae1e77eae50857629ecc64f2c41f2ee24cc249e..23d146ce676bc0e1b8c6b65ead1e2971479e1708
                        movq    %rax,R8(%rsp)
                        .endm
                    
+++++++++++++++ ++++    /*
+++++++++++++++ ++++     * Reload arg registers from stack in case ptrace changed them.
+++++++++++++++ ++++     * We don't reload %eax because syscall_trace_enter() returned
+++++++++++++++ ++++     * the value it wants us to use in the table lookup.
+++++++++++++++ ++++     */
                        .macro LOAD_ARGS32 offset
                        movl \offset(%rsp),%r11d
                        movl \offset+8(%rsp),%r10d
                        movl \offset+48(%rsp),%edx
                        movl \offset+56(%rsp),%esi
                        movl \offset+64(%rsp),%edi
--------------- ----    movl \offset+72(%rsp),%eax
                        .endm
                        
                        .macro CFI_STARTPROC32 simple
                        CFI_UNDEFINED   r15
                        .endm
                    
          +         #ifdef CONFIG_PARAVIRT
          +         ENTRY(native_usergs_sysret32)
          +             swapgs
          +             sysretl
          +         ENDPROC(native_usergs_sysret32)
          +         
          +         ENTRY(native_irq_enable_sysexit)
          +             swapgs
          +             sti
          +             sysexit
          +         ENDPROC(native_irq_enable_sysexit)
          +         #endif
          +         
                    /*
                     * 32bit SYSENTER instruction entry.
                     *
@@@@@@@@@@@@@@@@@@@@@ -98,14 -98,14 -98,14 -98,14 -98,14 -98,14 -98,14 -98,14 -98,14 -98,14 -85,14 -98,14 -98,14 -98,14 -98,14 -102,14 -98,14 -98,14 -98,14 -98,14 +102,14 @@@@@@@@@@@@@@@@@@@@@ ENTRY(ia32_sysenter_target
                        CFI_SIGNAL_FRAME
                        CFI_DEF_CFA     rsp,0
                        CFI_REGISTER    rsp,rbp
          -             swapgs
          +             SWAPGS_UNSAFE_STACK
                        movq    %gs:pda_kernelstack, %rsp
                        addq    $(PDA_STACKOFFSET),%rsp 
                        /*
                         * No need to follow this irqs on/off section: the syscall
                         * disabled irqs, here we enable it straight after entry:
                         */
          -             sti     
          +             ENABLE_INTERRUPTS(CLBR_NONE)
                        movl    %ebp,%ebp               /* zero extension */
                        pushq   $__USER32_DS
                        CFI_ADJUST_CFA_OFFSET 8
                        pushfq
                        CFI_ADJUST_CFA_OFFSET 8
                        /*CFI_REL_OFFSET rflags,0*/
          -             movl    8*3-THREAD_SIZE+threadinfo_sysenter_return(%rsp), %r10d
          +             movl    8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d
                        CFI_REGISTER rip,r10
                        pushq   $__USER32_CS
                        CFI_ADJUST_CFA_OFFSET 8
                        .quad 1b,ia32_badarg
                        .previous       
                        GET_THREAD_INFO(%r10)
          -             orl    $TS_COMPAT,threadinfo_status(%r10)
          -             testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
          +             orl    $TS_COMPAT,TI_status(%r10)
---------- ---- ----    testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----             TI_flags(%r10)
+++++++++++++++ ++++    testl  $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
                        CFI_REMEMBER_STATE
                        jnz  sysenter_tracesys
--------------- ----sysenter_do_call:   
                        cmpl    $(IA32_NR_syscalls-1),%eax
                        ja      ia32_badsys
+++++++++++++++ ++++sysenter_do_call:
                        IA32_ARG_FIXUP 1
                        call    *ia32_sys_call_table(,%rax,8)
                        movq    %rax,RAX-ARGOFFSET(%rsp)
                        GET_THREAD_INFO(%r10)
          -             cli
          +             DISABLE_INTERRUPTS(CLBR_NONE)
                        TRACE_IRQS_OFF
          -             testl   $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
          +             testl   $_TIF_ALLWORK_MASK,TI_flags(%r10)
                        jnz     int_ret_from_sys_call
          -             andl    $~TS_COMPAT,threadinfo_status(%r10)
          +             andl    $~TS_COMPAT,TI_status(%r10)
                        /* clear IF, that popfq doesn't enable interrupts early */
                        andl  $~0x200,EFLAGS-R11(%rsp) 
                        movl    RIP-R11(%rsp),%edx              /* User %eip */
                        CFI_ADJUST_CFA_OFFSET -8
                        CFI_REGISTER rsp,rcx
                        TRACE_IRQS_ON
          -             swapgs
          -             sti             /* sti only takes effect after the next instruction */
          -             /* sysexit */
          -             .byte   0xf, 0x35
          +             ENABLE_INTERRUPTS_SYSEXIT32
                    
                    sysenter_tracesys:
                        CFI_RESTORE_STATE
@@@@@@@@@@@@@@@@@@@@@ -211,7 -211,7 -211,7 -211,7 -211,7 -211,7 -211,7 -211,7 -211,7 -211,7 -200,7 -211,7 -211,7 -211,7 -211,7 -214,7 -211,7 -211,7 -211,7 -211,7 +214,7 @@@@@@@@@@@@@@@@@@@@@ ENTRY(ia32_cstar_target
                        CFI_DEF_CFA     rsp,PDA_STACKOFFSET
                        CFI_REGISTER    rip,rcx
                        /*CFI_REGISTER  rflags,r11*/
          -             swapgs
          +             SWAPGS_UNSAFE_STACK
                        movl    %esp,%r8d
                        CFI_REGISTER    rsp,r8
                        movq    %gs:pda_kernelstack,%rsp
                         * No need to follow this irqs on/off section: the syscall
                         * disabled irqs and here we enable it straight after entry:
                         */
          -             sti
          +             ENABLE_INTERRUPTS(CLBR_NONE)
                        SAVE_ARGS 8,1,1
                        movl    %eax,%eax       /* zero extension */
                        movq    %rax,ORIG_RAX-ARGOFFSET(%rsp)
                        .quad 1b,ia32_badarg
                        .previous       
                        GET_THREAD_INFO(%r10)
          -             orl   $TS_COMPAT,threadinfo_status(%r10)
          -             testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
          +             orl   $TS_COMPAT,TI_status(%r10)
---------- ---- ----    testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----            TI_flags(%r10)
+++++++++++++++ ++++    testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
                        CFI_REMEMBER_STATE
                        jnz   cstar_tracesys
                    cstar_do_call:      
                        call *ia32_sys_call_table(,%rax,8)
                        movq %rax,RAX-ARGOFFSET(%rsp)
                        GET_THREAD_INFO(%r10)
          -             cli
          +             DISABLE_INTERRUPTS(CLBR_NONE)
                        TRACE_IRQS_OFF
          -             testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
          +             testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
                        jnz  int_ret_from_sys_call
          -             andl $~TS_COMPAT,threadinfo_status(%r10)
          +             andl $~TS_COMPAT,TI_status(%r10)
                        RESTORE_ARGS 1,-ARG_SKIP,1,1,1
                        movl RIP-ARGOFFSET(%rsp),%ecx
                        CFI_REGISTER rip,rcx
                        TRACE_IRQS_ON
                        movl RSP-ARGOFFSET(%rsp),%esp
                        CFI_RESTORE rsp
          -             swapgs
          -             sysretl
          +             USERGS_SYSRET32
                        
                    cstar_tracesys:     
                        CFI_RESTORE_STATE
                        /*CFI_REL_OFFSET        rflags,EFLAGS-RIP*/
                        /*CFI_REL_OFFSET        cs,CS-RIP*/
                        CFI_REL_OFFSET  rip,RIP-RIP
          -             swapgs
+++++++++++++++++++     PARAVIRT_ADJUST_EXCEPTION_FRAME
          +             SWAPGS
                        /*
                         * No need to follow this irqs on/off section: the syscall
                         * disabled irqs and here we enable it straight after entry:
                         */
          -             sti
          +             ENABLE_INTERRUPTS(CLBR_NONE)
                        movl %eax,%eax
                        pushq %rax
                        CFI_ADJUST_CFA_OFFSET 8
                           this could be a problem. */
                        SAVE_ARGS 0,0,1
                        GET_THREAD_INFO(%r10)
          -             orl   $TS_COMPAT,threadinfo_status(%r10)
          -             testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
          +             orl   $TS_COMPAT,TI_status(%r10)
---------- ---- ----    testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----            TI_flags(%r10)
+++++++++++++++ ++++    testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
                        jnz ia32_tracesys
                    ia32_do_syscall:    
                        cmpl $(IA32_NR_syscalls-1),%eax
                        PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
                        PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
                        PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
          -             PTREGSCALL stub32_sigsuspend, sys32_sigsuspend, %rcx
                        PTREGSCALL stub32_execve, sys32_execve, %rcx
                        PTREGSCALL stub32_fork, sys_fork, %rdi
                        PTREGSCALL stub32_clone, sys32_clone, %rdx
                        PTREGSCALL stub32_vfork, sys_vfork, %rdi
                        PTREGSCALL stub32_iopl, sys_iopl, %rsi
          -             PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx
                    
                    ENTRY(ia32_ptregs_common)
                        popq %r11
@@@@@@@@@@@@@@@@@@@@@ -486,7 -486,7 -486,7 -486,7 -486,7 -486,7 -486,7 -486,7 -486,7 -486,7 -476,7 -486,7 -486,7 -486,7 -486,7 -487,7 -486,7 -486,7 -486,7 -487,7 +488,7 @@@@@@@@@@@@@@@@@@@@@ ia32_sys_call_table
                        .quad sys_ssetmask
                        .quad sys_setreuid16    /* 70 */
                        .quad sys_setregid16
          -             .quad stub32_sigsuspend
          +             .quad sys32_sigsuspend
                        .quad compat_sys_sigpending
                        .quad sys_sethostname
                        .quad compat_sys_setrlimit      /* 75 */
                        .quad sys32_rt_sigpending
                        .quad compat_sys_rt_sigtimedwait
                        .quad sys32_rt_sigqueueinfo
          -             .quad stub32_rt_sigsuspend
          +             .quad sys_rt_sigsuspend
                        .quad sys32_pread               /* 180 */
                        .quad sys32_pwrite
                        .quad sys_chown16
index f2766d84c7a00c78c4f24951a7f2912f550ef475,8c3deb027d3acf66408732bb998a62c7a0eb9f48,f2766d84c7a00c78c4f24951a7f2912f550ef475,f2766d84c7a00c78c4f24951a7f2912f550ef475,f2766d84c7a00c78c4f24951a7f2912f550ef475,f2766d84c7a00c78c4f24951a7f2912f550ef475,f2766d84c7a00c78c4f24951a7f2912f550ef475,cf2f74bcde53aa94bec66dc935d1b87e655dbd02,f2766d84c7a00c78c4f24951a7f2912f550ef475,f2766d84c7a00c78c4f24951a7f2912f550ef475,0000000000000000000000000000000000000000,f2766d84c7a00c78c4f24951a7f2912f550ef475,f2766d84c7a00c78c4f24951a7f2912f550ef475,f2766d84c7a00c78c4f24951a7f2912f550ef475,f2766d84c7a00c78c4f24951a7f2912f550ef475,f2766d84c7a00c78c4f24951a7f2912f550ef475,f2766d84c7a00c78c4f24951a7f2912f550ef475,f2766d84c7a00c78c4f24951a7f2912f550ef475,f2766d84c7a00c78c4f24951a7f2912f550ef475,f2766d84c7a00c78c4f24951a7f2912f550ef475..c25210e6ac888e94224b460e6eb82f3c556d7616
mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644
--- /dev/null
------- -- ---------#include <asm/gart.h>
          +         /*
          +          * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
          +          * Author: Joerg Roedel <joerg.roedel@amd.com>
          +          *         Leo Duran <leo.duran@amd.com>
          +          *
          +          * This program is free software; you can redistribute it and/or modify it
          +          * under the terms of the GNU General Public License version 2 as published
          +          * by the Free Software Foundation.
          +          *
          +          * This program is distributed in the hope that it will be useful,
          +          * but WITHOUT ANY WARRANTY; without even the implied warranty of
          +          * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
          +          * GNU General Public License for more details.
          +          *
          +          * You should have received a copy of the GNU General Public License
          +          * along with this program; if not, write to the Free Software
          +          * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
          +          */
          +         
          +         #include <linux/pci.h>
          +         #include <linux/gfp.h>
          +         #include <linux/bitops.h>
          +         #include <linux/scatterlist.h>
          +         #include <linux/iommu-helper.h>
          +         #include <asm/proto.h>
- -------- ---------struct command {
+++++++ ++++++++++++#include <asm/iommu.h>
          +         #include <asm/amd_iommu_types.h>
          +         #include <asm/amd_iommu.h>
          +         
          +         #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
          +         
          +         #define to_pages(addr, size) \
          +              (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
          +         
+ ++++++++++++++++++#define EXIT_LOOP_COUNT 10000000
+ ++++++++++++++++++
          +         static DEFINE_RWLOCK(amd_iommu_devtable_lock);
          +         
- -------- ---------static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * general struct to manage commands send to an IOMMU
+ ++++++++++++++++++ */
+ ++++++++++++++++++struct iommu_cmd {
          +             u32 data[4];
          +         };
          +         
          +         static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
          +                                  struct unity_map_entry *e);
          +         
+ ++++++++++++++++++/* returns !0 if the IOMMU is caching non-present entries in its TLB */
          +         static int iommu_has_npcache(struct amd_iommu *iommu)
          +         {
          +             return iommu->cap & IOMMU_CAP_NPCACHE;
          +         }
          +         
- -------- ---------static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * IOMMU command queuing functions
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Writes the command to the IOMMUs command buffer and informs the
+ ++++++++++++++++++ * hardware about the new command. Must be called with iommu->lock held.
+ ++++++++++++++++++ */
+ ++++++++++++++++++static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
          +         {
          +             u32 tail, head;
          +             u8 *target;
          +         
          +             tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
          +             target = (iommu->cmd_buf + tail);
          +             memcpy_toio(target, cmd, sizeof(*cmd));
          +             tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
          +             head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
          +             if (tail == head)
          +                     return -ENOMEM;
          +             writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
          +         
          +             return 0;
          +         }
          +         
- -------- ---------    struct command cmd;
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * General queuing function for commands. Takes iommu->lock and calls
+ ++++++++++++++++++ * __iommu_queue_command().
+ ++++++++++++++++++ */
+ ++++++++++++++++++static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
          +         {
          +             unsigned long flags;
          +             int ret;
          +         
          +             spin_lock_irqsave(&iommu->lock, flags);
          +             ret = __iommu_queue_command(iommu, cmd);
          +             spin_unlock_irqrestore(&iommu->lock, flags);
          +         
          +             return ret;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function is called whenever we need to ensure that the IOMMU has
+ ++++++++++++++++++ * completed execution of all commands we sent. It sends a
+ ++++++++++++++++++ * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
+ ++++++++++++++++++ * us about that by writing a value to a physical address we pass with
+ ++++++++++++++++++ * the command.
+ ++++++++++++++++++ */
          +         static int iommu_completion_wait(struct amd_iommu *iommu)
          +         {
          +             int ret;
- -------- ---------    cmd.data[1] = HIGH_U32(ready_phys);
+ ++++++++++++++++++    struct iommu_cmd cmd;
          +             volatile u64 ready = 0;
          +             unsigned long ready_phys = virt_to_phys(&ready);
+ ++++++++++++++++++    unsigned long i = 0;
          +         
          +             memset(&cmd, 0, sizeof(cmd));
          +             cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
- -------- ---------    while (!ready)
+ ++++++++++++++++++    cmd.data[1] = upper_32_bits(ready_phys);
          +             cmd.data[2] = 1; /* value written to 'ready' */
          +             CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
          +         
          +             iommu->need_sync = 0;
          +         
          +             ret = iommu_queue_command(iommu, &cmd);
          +         
          +             if (ret)
          +                     return ret;
          +         
- -------- ---------    struct command cmd;
+ ++++++++++++++++++    while (!ready && (i < EXIT_LOOP_COUNT)) {
+ ++++++++++++++++++            ++i;
          +                     cpu_relax();
+ ++++++++++++++++++    }
+ ++++++++++++++++++
+ ++++++++++++++++++    if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
+ ++++++++++++++++++            printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Command send function for invalidating a device table entry
+ ++++++++++++++++++ */
          +         static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
          +         {
- -------- ---------    struct command cmd;
+ ++++++++++++++++++    struct iommu_cmd cmd;
          +         
          +             BUG_ON(iommu == NULL);
          +         
          +             memset(&cmd, 0, sizeof(cmd));
          +             CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
          +             cmd.data[0] = devid;
          +         
          +             iommu->need_sync = 1;
          +         
          +             return iommu_queue_command(iommu, &cmd);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Generic command send function for invalidaing TLB entries
+ ++++++++++++++++++ */
          +         static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
          +                     u64 address, u16 domid, int pde, int s)
          +         {
- -------- ---------    cmd.data[3] = HIGH_U32(address);
- -------- ---------    if (s)
+ ++++++++++++++++++    struct iommu_cmd cmd;
          +         
          +             memset(&cmd, 0, sizeof(cmd));
          +             address &= PAGE_MASK;
          +             CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
          +             cmd.data[1] |= domid;
          +             cmd.data[2] = LOW_U32(address);
- -------- ---------    if (pde)
+ ++++++++++++++++++    cmd.data[3] = upper_32_bits(address);
+ ++++++++++++++++++    if (s) /* size bit - we flush more than one 4kb page */
          +                     cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
- -------- ---------    _bdf = (pcidev->bus->number << 8) | pcidev->devfn;
+ ++++++++++++++++++    if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
          +                     cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
          +         
          +             iommu->need_sync = 1;
          +         
          +             return iommu_queue_command(iommu, &cmd);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * TLB invalidation function which is called from the mapping functions.
+ ++++++++++++++++++ * It invalidates a single PTE if the range to flush is within a single
+ ++++++++++++++++++ * page. Otherwise it flushes the whole TLB of the IOMMU.
+ ++++++++++++++++++ */
          +         static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
          +                     u64 address, size_t size)
          +         {
          +             int s = 0;
          +             unsigned pages = to_pages(address, size);
          +         
          +             address &= PAGE_MASK;
          +         
          +             if (pages > 1) {
          +                     /*
          +                      * If we have to flush more than one page, flush all
          +                      * TLB entries for this domain
          +                      */
          +                     address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
          +                     s = 1;
          +             }
          +         
          +             iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The functions below are used the create the page table mappings for
+ ++++++++++++++++++ * unity mapped regions.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Generic mapping functions. It maps a physical address into a DMA
+ ++++++++++++++++++ * address space. It allocates the page table pages if necessary.
+ ++++++++++++++++++ * In the future it can be extended to a generic mapping function
+ ++++++++++++++++++ * supporting all features of AMD IOMMU page tables like level skipping
+ ++++++++++++++++++ * and full 64 bit address spaces.
+ ++++++++++++++++++ */
          +         static int iommu_map(struct protection_domain *dom,
          +                          unsigned long bus_addr,
          +                          unsigned long phys_addr,
          +                          int prot)
          +         {
          +             u64 __pte, *pte, *page;
          +         
          +             bus_addr  = PAGE_ALIGN(bus_addr);
          +             phys_addr = PAGE_ALIGN(bus_addr);
          +         
          +             /* only support 512GB address spaces for now */
          +             if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
          +                     return -EINVAL;
          +         
          +             pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
          +         
          +             if (!IOMMU_PTE_PRESENT(*pte)) {
          +                     page = (u64 *)get_zeroed_page(GFP_KERNEL);
          +                     if (!page)
          +                             return -ENOMEM;
          +                     *pte = IOMMU_L2_PDE(virt_to_phys(page));
          +             }
          +         
          +             pte = IOMMU_PTE_PAGE(*pte);
          +             pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
          +         
          +             if (!IOMMU_PTE_PRESENT(*pte)) {
          +                     page = (u64 *)get_zeroed_page(GFP_KERNEL);
          +                     if (!page)
          +                             return -ENOMEM;
          +                     *pte = IOMMU_L1_PDE(virt_to_phys(page));
          +             }
          +         
          +             pte = IOMMU_PTE_PAGE(*pte);
          +             pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
          +         
          +             if (IOMMU_PTE_PRESENT(*pte))
          +                     return -EBUSY;
          +         
          +             __pte = phys_addr | IOMMU_PTE_P;
          +             if (prot & IOMMU_PROT_IR)
          +                     __pte |= IOMMU_PTE_IR;
          +             if (prot & IOMMU_PROT_IW)
          +                     __pte |= IOMMU_PTE_IW;
          +         
          +             *pte = __pte;
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function checks if a specific unity mapping entry is needed for
+ ++++++++++++++++++ * this specific IOMMU.
+ ++++++++++++++++++ */
          +         static int iommu_for_unity_map(struct amd_iommu *iommu,
          +                                    struct unity_map_entry *entry)
          +         {
          +             u16 bdf, i;
          +         
          +             for (i = entry->devid_start; i <= entry->devid_end; ++i) {
          +                     bdf = amd_iommu_alias_table[i];
          +                     if (amd_iommu_rlookup_table[bdf] == iommu)
          +                             return 1;
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Init the unity mappings for a specific IOMMU in the system
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * Basically iterates over all unity mapping entries and applies them to
+ ++++++++++++++++++ * the default domain DMA of that IOMMU if necessary.
+ ++++++++++++++++++ */
          +         static int iommu_init_unity_mappings(struct amd_iommu *iommu)
          +         {
          +             struct unity_map_entry *entry;
          +             int ret;
          +         
          +             list_for_each_entry(entry, &amd_iommu_unity_map, list) {
          +                     if (!iommu_for_unity_map(iommu, entry))
          +                             continue;
          +                     ret = dma_ops_unity_map(iommu->default_dom, entry);
          +                     if (ret)
          +                             return ret;
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function actually applies the mapping to the page table of the
+ ++++++++++++++++++ * dma_ops domain.
+ ++++++++++++++++++ */
          +         static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
          +                                  struct unity_map_entry *e)
          +         {
          +             u64 addr;
          +             int ret;
          +         
          +             for (addr = e->address_start; addr < e->address_end;
          +                  addr += PAGE_SIZE) {
          +                     ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);
          +                     if (ret)
          +                             return ret;
          +                     /*
          +                      * if unity mapping is in aperture range mark the page
          +                      * as allocated in the aperture
          +                      */
          +                     if (addr < dma_dom->aperture_size)
          +                             __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Inits the unity mappings required for a specific device
+ ++++++++++++++++++ */
          +         static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
          +                                               u16 devid)
          +         {
          +             struct unity_map_entry *e;
          +             int ret;
          +         
          +             list_for_each_entry(e, &amd_iommu_unity_map, list) {
          +                     if (!(devid >= e->devid_start && devid <= e->devid_end))
          +                             continue;
          +                     ret = dma_ops_unity_map(dma_dom, e);
          +                     if (ret)
          +                             return ret;
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the address allocator for the dma_ops
+ ++++++++++++++++++ * interface functions. They work like the allocators in the other IOMMU
+ ++++++++++++++++++ * drivers. Its basically a bitmap which marks the allocated pages in
+ ++++++++++++++++++ * the aperture. Maybe it could be enhanced in the future to a more
+ ++++++++++++++++++ * efficient allocator.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
          +         static unsigned long dma_mask_to_pages(unsigned long mask)
          +         {
          +             return (mask >> PAGE_SHIFT) +
          +                     (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The address allocator core function.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * called with domain->lock held
+ ++++++++++++++++++ */
          +         static unsigned long dma_ops_alloc_addresses(struct device *dev,
          +                                                  struct dma_ops_domain *dom,
          +                                                  unsigned int pages)
          +         {
          +             unsigned long limit = dma_mask_to_pages(*dev->dma_mask);
          +             unsigned long address;
          +             unsigned long size = dom->aperture_size >> PAGE_SHIFT;
          +             unsigned long boundary_size;
          +         
          +             boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
          +                             PAGE_SIZE) >> PAGE_SHIFT;
          +             limit = limit < size ? limit : size;
          +         
          +             if (dom->next_bit >= limit)
          +                     dom->next_bit = 0;
          +         
          +             address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
          +                             0 , boundary_size, 0);
          +             if (address == -1)
          +                     address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
          +                                     0, boundary_size, 0);
          +         
          +             if (likely(address != -1)) {
          +                     dom->next_bit = address + pages;
          +                     address <<= PAGE_SHIFT;
          +             } else
          +                     address = bad_dma_address;
          +         
          +             WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
          +         
          +             return address;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The address free function.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * called with domain->lock held
+ ++++++++++++++++++ */
          +         static void dma_ops_free_addresses(struct dma_ops_domain *dom,
          +                                        unsigned long address,
          +                                        unsigned int pages)
          +         {
          +             address >>= PAGE_SHIFT;
          +             iommu_area_free(dom->bitmap, address, pages);
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the domain allocation. A domain is
+ ++++++++++++++++++ * allocated for every IOMMU as the default domain. If device isolation
+ ++++++++++++++++++ * is enabled, every device get its own domain. The most important thing
+ ++++++++++++++++++ * about domains is the page table mapping the DMA address space they
+ ++++++++++++++++++ * contain.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
          +         static u16 domain_id_alloc(void)
          +         {
          +             unsigned long flags;
          +             int id;
          +         
          +             write_lock_irqsave(&amd_iommu_devtable_lock, flags);
          +             id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
          +             BUG_ON(id == 0);
          +             if (id > 0 && id < MAX_DOMAIN_ID)
          +                     __set_bit(id, amd_iommu_pd_alloc_bitmap);
          +             else
          +                     id = 0;
          +             write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
          +         
          +             return id;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Used to reserve address ranges in the aperture (e.g. for exclusion
+ ++++++++++++++++++ * ranges.
+ ++++++++++++++++++ */
          +         static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
          +                                           unsigned long start_page,
          +                                           unsigned int pages)
          +         {
          +             unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
          +         
          +             if (start_page + pages > last_page)
          +                     pages = last_page - start_page;
          +         
          +             set_bit_string(dom->bitmap, start_page, pages);
          +         }
          +         
          +         static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
          +         {
          +             int i, j;
          +             u64 *p1, *p2, *p3;
          +         
          +             p1 = dma_dom->domain.pt_root;
          +         
          +             if (!p1)
          +                     return;
          +         
          +             for (i = 0; i < 512; ++i) {
          +                     if (!IOMMU_PTE_PRESENT(p1[i]))
          +                             continue;
          +         
          +                     p2 = IOMMU_PTE_PAGE(p1[i]);
          +                     for (j = 0; j < 512; ++i) {
          +                             if (!IOMMU_PTE_PRESENT(p2[j]))
          +                                     continue;
          +                             p3 = IOMMU_PTE_PAGE(p2[j]);
          +                             free_page((unsigned long)p3);
          +                     }
          +         
          +                     free_page((unsigned long)p2);
          +             }
          +         
          +             free_page((unsigned long)p1);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Free a domain, only used if something went wrong in the
+ ++++++++++++++++++ * allocation path and we need to free an already allocated page table
+ ++++++++++++++++++ */
          +         static void dma_ops_domain_free(struct dma_ops_domain *dom)
          +         {
          +             if (!dom)
          +                     return;
          +         
          +             dma_ops_free_pagetable(dom);
          +         
          +             kfree(dom->pte_pages);
          +         
          +             kfree(dom->bitmap);
          +         
          +             kfree(dom);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Allocates a new protection domain usable for the dma_ops functions.
+ ++++++++++++++++++ * It also intializes the page table and the address allocator data
+ ++++++++++++++++++ * structures required for the dma_ops interface
+ ++++++++++++++++++ */
          +         static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
          +                                                        unsigned order)
          +         {
          +             struct dma_ops_domain *dma_dom;
          +             unsigned i, num_pte_pages;
          +             u64 *l2_pde;
          +             u64 address;
          +         
          +             /*
          +              * Currently the DMA aperture must be between 32 MB and 1GB in size
          +              */
          +             if ((order < 25) || (order > 30))
          +                     return NULL;
          +         
          +             dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
          +             if (!dma_dom)
          +                     return NULL;
          +         
          +             spin_lock_init(&dma_dom->domain.lock);
          +         
          +             dma_dom->domain.id = domain_id_alloc();
          +             if (dma_dom->domain.id == 0)
          +                     goto free_dma_dom;
          +             dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
          +             dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
          +             dma_dom->domain.priv = dma_dom;
          +             if (!dma_dom->domain.pt_root)
          +                     goto free_dma_dom;
          +             dma_dom->aperture_size = (1ULL << order);
          +             dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
          +                                       GFP_KERNEL);
          +             if (!dma_dom->bitmap)
          +                     goto free_dma_dom;
          +             /*
          +              * mark the first page as allocated so we never return 0 as
          +              * a valid dma-address. So we can use 0 as error value
          +              */
          +             dma_dom->bitmap[0] = 1;
          +             dma_dom->next_bit = 0;
          +         
+ ++++++++++++++++++    /* Intialize the exclusion range if necessary */
          +             if (iommu->exclusion_start &&
          +                 iommu->exclusion_start < dma_dom->aperture_size) {
          +                     unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
          +                     int pages = to_pages(iommu->exclusion_start,
          +                                     iommu->exclusion_length);
          +                     dma_ops_reserve_addresses(dma_dom, startpage, pages);
          +             }
          +         
+ ++++++++++++++++++    /*
+ ++++++++++++++++++     * At the last step, build the page tables so we don't need to
+ ++++++++++++++++++     * allocate page table pages in the dma_ops mapping/unmapping
+ ++++++++++++++++++     * path.
+ ++++++++++++++++++     */
          +             num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
          +             dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
          +                             GFP_KERNEL);
          +             if (!dma_dom->pte_pages)
          +                     goto free_dma_dom;
          +         
          +             l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
          +             if (l2_pde == NULL)
          +                     goto free_dma_dom;
          +         
          +             dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
          +         
          +             for (i = 0; i < num_pte_pages; ++i) {
          +                     dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
          +                     if (!dma_dom->pte_pages[i])
          +                             goto free_dma_dom;
          +                     address = virt_to_phys(dma_dom->pte_pages[i]);
          +                     l2_pde[i] = IOMMU_L1_PDE(address);
          +             }
          +         
          +             return dma_dom;
          +         
          +         free_dma_dom:
          +             dma_ops_domain_free(dma_dom);
          +         
          +             return NULL;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Find out the protection domain structure for a given PCI device. This
+ ++++++++++++++++++ * will give us the pointer to the page table root for example.
+ ++++++++++++++++++ */
          +         static struct protection_domain *domain_for_device(u16 devid)
          +         {
          +             struct protection_domain *dom;
          +             unsigned long flags;
          +         
          +             read_lock_irqsave(&amd_iommu_devtable_lock, flags);
          +             dom = amd_iommu_pd_table[devid];
          +             read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
          +         
          +             return dom;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * If a device is not yet associated with a domain, this function does
+ ++++++++++++++++++ * assigns it visible for the hardware
+ ++++++++++++++++++ */
          +         static void set_device_domain(struct amd_iommu *iommu,
          +                                   struct protection_domain *domain,
          +                                   u16 devid)
          +         {
          +             unsigned long flags;
          +         
          +             u64 pte_root = virt_to_phys(domain->pt_root);
          +         
          +             pte_root |= (domain->mode & 0x07) << 9;
          +             pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2;
          +         
          +             write_lock_irqsave(&amd_iommu_devtable_lock, flags);
          +             amd_iommu_dev_table[devid].data[0] = pte_root;
          +             amd_iommu_dev_table[devid].data[1] = pte_root >> 32;
          +             amd_iommu_dev_table[devid].data[2] = domain->id;
          +         
          +             amd_iommu_pd_table[devid] = domain;
          +             write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
          +         
          +             iommu_queue_inv_dev_entry(iommu, devid);
          +         
          +             iommu->need_sync = 1;
          +         }
          +         
+ ++++++++++++++++++/*****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the dma_ops mapping/unmapping code.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * In the dma_ops path we only have the struct device. This function
+ ++++++++++++++++++ * finds the corresponding IOMMU, the protection domain and the
+ ++++++++++++++++++ * requestor id for a given device.
+ ++++++++++++++++++ * If the device is not yet associated with a domain this is also done
+ ++++++++++++++++++ * in this function.
+ ++++++++++++++++++ */
          +         static int get_device_resources(struct device *dev,
          +                                     struct amd_iommu **iommu,
          +                                     struct protection_domain **domain,
          +                                     u16 *bdf)
          +         {
          +             struct dma_ops_domain *dma_dom;
          +             struct pci_dev *pcidev;
          +             u16 _bdf;
          +         
          +             BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask);
          +         
          +             pcidev = to_pci_dev(dev);
+ ++++++++++++++++++    _bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
          +         
+ ++++++++++++++++++    /* device not translated by any IOMMU in the system? */
          +             if (_bdf >= amd_iommu_last_bdf) {
          +                     *iommu = NULL;
          +                     *domain = NULL;
          +                     *bdf = 0xffff;
          +                     return 0;
          +             }
          +         
          +             *bdf = amd_iommu_alias_table[_bdf];
          +         
          +             *iommu = amd_iommu_rlookup_table[*bdf];
          +             if (*iommu == NULL)
          +                     return 0;
          +             dma_dom = (*iommu)->default_dom;
          +             *domain = domain_for_device(*bdf);
          +             if (*domain == NULL) {
          +                     *domain = &dma_dom->domain;
          +                     set_device_domain(*iommu, *domain, *bdf);
          +                     printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
          +                                     "device ", (*domain)->id);
          +                     print_devid(_bdf, 1);
          +             }
          +         
          +             return 1;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This is the generic map function. It maps one 4kb page at paddr to
+ ++++++++++++++++++ * the given address in the DMA address space for the domain.
+ ++++++++++++++++++ */
          +         static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
          +                                          struct dma_ops_domain *dom,
          +                                          unsigned long address,
          +                                          phys_addr_t paddr,
          +                                          int direction)
          +         {
          +             u64 *pte, __pte;
          +         
          +             WARN_ON(address > dom->aperture_size);
          +         
          +             paddr &= PAGE_MASK;
          +         
          +             pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
          +             pte += IOMMU_PTE_L0_INDEX(address);
          +         
          +             __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
          +         
          +             if (direction == DMA_TO_DEVICE)
          +                     __pte |= IOMMU_PTE_IR;
          +             else if (direction == DMA_FROM_DEVICE)
          +                     __pte |= IOMMU_PTE_IW;
          +             else if (direction == DMA_BIDIRECTIONAL)
          +                     __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
          +         
          +             WARN_ON(*pte);
          +         
          +             *pte = __pte;
          +         
          +             return (dma_addr_t)address;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The generic unmapping function for on page in the DMA address space.
+ ++++++++++++++++++ */
          +         static void dma_ops_domain_unmap(struct amd_iommu *iommu,
          +                                      struct dma_ops_domain *dom,
          +                                      unsigned long address)
          +         {
          +             u64 *pte;
          +         
          +             if (address >= dom->aperture_size)
          +                     return;
          +         
          +             WARN_ON(address & 0xfffULL || address > dom->aperture_size);
          +         
          +             pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
          +             pte += IOMMU_PTE_L0_INDEX(address);
          +         
          +             WARN_ON(!*pte);
          +         
          +             *pte = 0ULL;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function contains common code for mapping of a physically
+ ++++++++++++++++++ * contiguous memory region into DMA address space. It is uses by all
+ ++++++++++++++++++ * mapping functions provided by this IOMMU driver.
+ ++++++++++++++++++ * Must be called with the domain lock held.
+ ++++++++++++++++++ */
          +         static dma_addr_t __map_single(struct device *dev,
          +                                    struct amd_iommu *iommu,
          +                                    struct dma_ops_domain *dma_dom,
          +                                    phys_addr_t paddr,
          +                                    size_t size,
          +                                    int dir)
          +         {
          +             dma_addr_t offset = paddr & ~PAGE_MASK;
          +             dma_addr_t address, start;
          +             unsigned int pages;
          +             int i;
          +         
          +             pages = to_pages(paddr, size);
          +             paddr &= PAGE_MASK;
          +         
          +             address = dma_ops_alloc_addresses(dev, dma_dom, pages);
          +             if (unlikely(address == bad_dma_address))
          +                     goto out;
          +         
          +             start = address;
          +             for (i = 0; i < pages; ++i) {
          +                     dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
          +                     paddr += PAGE_SIZE;
          +                     start += PAGE_SIZE;
          +             }
          +             address += offset;
          +         
          +         out:
          +             return address;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Does the reverse of the __map_single function. Must be called with
+ ++++++++++++++++++ * the domain lock held too
+ ++++++++++++++++++ */
          +         static void __unmap_single(struct amd_iommu *iommu,
          +                                struct dma_ops_domain *dma_dom,
          +                                dma_addr_t dma_addr,
          +                                size_t size,
          +                                int dir)
          +         {
          +             dma_addr_t i, start;
          +             unsigned int pages;
          +         
          +             if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
          +                     return;
          +         
          +             pages = to_pages(dma_addr, size);
          +             dma_addr &= PAGE_MASK;
          +             start = dma_addr;
          +         
          +             for (i = 0; i < pages; ++i) {
          +                     dma_ops_domain_unmap(iommu, dma_dom, start);
          +                     start += PAGE_SIZE;
          +             }
          +         
          +             dma_ops_free_addresses(dma_dom, dma_addr, pages);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported map_single function for dma_ops.
+ ++++++++++++++++++ */
          +         static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
          +                                  size_t size, int dir)
          +         {
          +             unsigned long flags;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             u16 devid;
          +             dma_addr_t addr;
          +         
          +             get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +             if (iommu == NULL || domain == NULL)
+ ++++++++++++++++++            /* device not handled by any AMD IOMMU */
          +                     return (dma_addr_t)paddr;
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +             addr = __map_single(dev, iommu, domain->priv, paddr, size, dir);
          +             if (addr == bad_dma_address)
          +                     goto out;
          +         
          +             if (iommu_has_npcache(iommu))
          +                     iommu_flush_pages(iommu, domain->id, addr, size);
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +         out:
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +             return addr;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported unmap_single function for dma_ops.
+ ++++++++++++++++++ */
          +         static void unmap_single(struct device *dev, dma_addr_t dma_addr,
          +                              size_t size, int dir)
          +         {
          +             unsigned long flags;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             u16 devid;
          +         
          +             if (!get_device_resources(dev, &iommu, &domain, &devid))
+ ++++++++++++++++++            /* device not handled by any AMD IOMMU */
          +                     return;
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +         
          +             __unmap_single(iommu, domain->priv, dma_addr, size, dir);
          +         
          +             iommu_flush_pages(iommu, domain->id, dma_addr, size);
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This is a special map_sg function which is used if we should map a
+ ++++++++++++++++++ * device which is not handled by an AMD IOMMU in the system.
+ ++++++++++++++++++ */
          +         static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
          +                                int nelems, int dir)
          +         {
          +             struct scatterlist *s;
          +             int i;
          +         
          +             for_each_sg(sglist, s, nelems, i) {
          +                     s->dma_address = (dma_addr_t)sg_phys(s);
          +                     s->dma_length  = s->length;
          +             }
          +         
          +             return nelems;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported map_sg function for dma_ops (handles scatter-gather
+ ++++++++++++++++++ * lists).
+ ++++++++++++++++++ */
          +         static int map_sg(struct device *dev, struct scatterlist *sglist,
          +                       int nelems, int dir)
          +         {
          +             unsigned long flags;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             u16 devid;
          +             int i;
          +             struct scatterlist *s;
          +             phys_addr_t paddr;
          +             int mapped_elems = 0;
          +         
          +             get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +             if (!iommu || !domain)
          +                     return map_sg_no_iommu(dev, sglist, nelems, dir);
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +         
          +             for_each_sg(sglist, s, nelems, i) {
          +                     paddr = sg_phys(s);
          +         
          +                     s->dma_address = __map_single(dev, iommu, domain->priv,
          +                                                   paddr, s->length, dir);
          +         
          +                     if (s->dma_address) {
          +                             s->dma_length = s->length;
          +                             mapped_elems++;
          +                     } else
          +                             goto unmap;
          +                     if (iommu_has_npcache(iommu))
          +                             iommu_flush_pages(iommu, domain->id, s->dma_address,
          +                                               s->dma_length);
          +             }
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +         out:
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +             return mapped_elems;
          +         unmap:
          +             for_each_sg(sglist, s, mapped_elems, i) {
          +                     if (s->dma_address)
          +                             __unmap_single(iommu, domain->priv, s->dma_address,
          +                                            s->dma_length, dir);
          +                     s->dma_address = s->dma_length = 0;
          +             }
          +         
          +             mapped_elems = 0;
          +         
          +             goto out;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported map_sg function for dma_ops (handles scatter-gather
+ ++++++++++++++++++ * lists).
+ ++++++++++++++++++ */
          +         static void unmap_sg(struct device *dev, struct scatterlist *sglist,
          +                          int nelems, int dir)
          +         {
          +             unsigned long flags;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             struct scatterlist *s;
          +             u16 devid;
          +             int i;
          +         
          +             if (!get_device_resources(dev, &iommu, &domain, &devid))
          +                     return;
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +         
          +             for_each_sg(sglist, s, nelems, i) {
          +                     __unmap_single(iommu, domain->priv, s->dma_address,
          +                                    s->dma_length, dir);
          +                     iommu_flush_pages(iommu, domain->id, s->dma_address,
          +                                       s->dma_length);
          +                     s->dma_address = s->dma_length = 0;
          +             }
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported alloc_coherent function for dma_ops.
+ ++++++++++++++++++ */
          +         static void *alloc_coherent(struct device *dev, size_t size,
          +                                 dma_addr_t *dma_addr, gfp_t flag)
          +         {
          +             unsigned long flags;
          +             void *virt_addr;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             u16 devid;
          +             phys_addr_t paddr;
          +         
          +             virt_addr = (void *)__get_free_pages(flag, get_order(size));
          +             if (!virt_addr)
          +                     return 0;
          +         
          +             memset(virt_addr, 0, size);
          +             paddr = virt_to_phys(virt_addr);
          +         
          +             get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +             if (!iommu || !domain) {
          +                     *dma_addr = (dma_addr_t)paddr;
          +                     return virt_addr;
          +             }
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +         
          +             *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
          +                                      size, DMA_BIDIRECTIONAL);
          +         
          +             if (*dma_addr == bad_dma_address) {
          +                     free_pages((unsigned long)virt_addr, get_order(size));
          +                     virt_addr = NULL;
          +                     goto out;
          +             }
          +         
          +             if (iommu_has_npcache(iommu))
          +                     iommu_flush_pages(iommu, domain->id, *dma_addr, size);
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +         out:
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +             return virt_addr;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported free_coherent function for dma_ops.
+ ++++++++++++++++++ * FIXME: fix the generic x86 DMA layer so that it actually calls that
+ ++++++++++++++++++ *        function.
+ ++++++++++++++++++ */
          +         static void free_coherent(struct device *dev, size_t size,
          +                               void *virt_addr, dma_addr_t dma_addr)
          +         {
          +             unsigned long flags;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             u16 devid;
          +         
          +             get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +             if (!iommu || !domain)
          +                     goto free_mem;
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +         
          +             __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
          +             iommu_flush_pages(iommu, domain->id, dma_addr, size);
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +         free_mem:
          +             free_pages((unsigned long)virt_addr, get_order(size));
          +         }
          +         
          +         /*
+ ++++++++++++++++++ * The function for pre-allocating protection domains.
+ ++++++++++++++++++ *
          +          * If the driver core informs the DMA layer if a driver grabs a device
          +          * we don't need to preallocate the protection domains anymore.
          +          * For now we have to.
          +          */
          +         void prealloc_protection_domains(void)
          +         {
          +             struct pci_dev *dev = NULL;
          +             struct dma_ops_domain *dma_dom;
          +             struct amd_iommu *iommu;
          +             int order = amd_iommu_aperture_order;
          +             u16 devid;
          +         
          +             while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
          +                     devid = (dev->bus->number << 8) | dev->devfn;
          +                     if (devid >= amd_iommu_last_bdf)
          +                             continue;
          +                     devid = amd_iommu_alias_table[devid];
          +                     if (domain_for_device(devid))
          +                             continue;
          +                     iommu = amd_iommu_rlookup_table[devid];
          +                     if (!iommu)
          +                             continue;
          +                     dma_dom = dma_ops_domain_alloc(iommu, order);
          +                     if (!dma_dom)
          +                             continue;
          +                     init_unity_mappings_for_device(dma_dom, devid);
          +                     set_device_domain(iommu, &dma_dom->domain, devid);
          +                     printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ",
          +                            dma_dom->domain.id);
          +                     print_devid(devid, 1);
          +             }
          +         }
          +         
          +         static struct dma_mapping_ops amd_iommu_dma_ops = {
          +             .alloc_coherent = alloc_coherent,
          +             .free_coherent = free_coherent,
          +             .map_single = map_single,
          +             .unmap_single = unmap_single,
          +             .map_sg = map_sg,
          +             .unmap_sg = unmap_sg,
          +         };
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The function which clues the AMD IOMMU driver into dma_ops.
+ ++++++++++++++++++ */
          +         int __init amd_iommu_init_dma_ops(void)
          +         {
          +             struct amd_iommu *iommu;
          +             int order = amd_iommu_aperture_order;
          +             int ret;
          +         
+ ++++++++++++++++++    /*
+ ++++++++++++++++++     * first allocate a default protection domain for every IOMMU we
+ ++++++++++++++++++     * found in the system. Devices not assigned to any other
+ ++++++++++++++++++     * protection domain will be assigned to the default one.
+ ++++++++++++++++++     */
          +             list_for_each_entry(iommu, &amd_iommu_list, list) {
          +                     iommu->default_dom = dma_ops_domain_alloc(iommu, order);
          +                     if (iommu->default_dom == NULL)
          +                             return -ENOMEM;
          +                     ret = iommu_init_unity_mappings(iommu);
          +                     if (ret)
          +                             goto free_domains;
          +             }
          +         
+ ++++++++++++++++++    /*
+ ++++++++++++++++++     * If device isolation is enabled, pre-allocate the protection
+ ++++++++++++++++++     * domains for each device.
+ ++++++++++++++++++     */
          +             if (amd_iommu_isolate)
          +                     prealloc_protection_domains();
          +         
          +             iommu_detected = 1;
          +             force_iommu = 1;
          +             bad_dma_address = 0;
          +         #ifdef CONFIG_GART_IOMMU
          +             gart_iommu_aperture_disabled = 1;
          +             gart_iommu_aperture = 0;
          +         #endif
          +         
+ ++++++++++++++++++    /* Make the driver finally visible to the drivers */
          +             dma_ops = &amd_iommu_dma_ops;
          +         
          +             return 0;
          +         
          +         free_domains:
          +         
          +             list_for_each_entry(iommu, &amd_iommu_list, list) {
          +                     if (iommu->default_dom)
          +                             dma_ops_domain_free(iommu->default_dom);
          +             }
          +         
          +             return ret;
          +         }
index 2a13e430437dc5f1e05793aa995df4befcbc8938,7661b02d7208b3fe575958d50feb4af704187e7b,2a13e430437dc5f1e05793aa995df4befcbc8938,2a13e430437dc5f1e05793aa995df4befcbc8938,2a13e430437dc5f1e05793aa995df4befcbc8938,2a13e430437dc5f1e05793aa995df4befcbc8938,2a13e430437dc5f1e05793aa995df4befcbc8938,66438284c69956b5b2d1e436ed027151c6b834ef,2a13e430437dc5f1e05793aa995df4befcbc8938,2a13e430437dc5f1e05793aa995df4befcbc8938,0000000000000000000000000000000000000000,2a13e430437dc5f1e05793aa995df4befcbc8938,2a13e430437dc5f1e05793aa995df4befcbc8938,2a13e430437dc5f1e05793aa995df4befcbc8938,2a13e430437dc5f1e05793aa995df4befcbc8938,2a13e430437dc5f1e05793aa995df4befcbc8938,2a13e430437dc5f1e05793aa995df4befcbc8938,2a13e430437dc5f1e05793aa995df4befcbc8938,2a13e430437dc5f1e05793aa995df4befcbc8938,2a13e430437dc5f1e05793aa995df4befcbc8938..c9d8ff2eb130b3ed384bc525ef13a5c4784161d7
mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644
--- /dev/null
------- -- ---------#include <asm/gart.h>
          +         /*
          +          * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
          +          * Author: Joerg Roedel <joerg.roedel@amd.com>
          +          *         Leo Duran <leo.duran@amd.com>
          +          *
          +          * This program is free software; you can redistribute it and/or modify it
          +          * under the terms of the GNU General Public License version 2 as published
          +          * by the Free Software Foundation.
          +          *
          +          * This program is distributed in the hope that it will be useful,
          +          * but WITHOUT ANY WARRANTY; without even the implied warranty of
          +          * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
          +          * GNU General Public License for more details.
          +          *
          +          * You should have received a copy of the GNU General Public License
          +          * along with this program; if not, write to the Free Software
          +          * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
          +          */
          +         
          +         #include <linux/pci.h>
          +         #include <linux/acpi.h>
          +         #include <linux/gfp.h>
          +         #include <linux/list.h>
          +         #include <linux/sysdev.h>
          +         #include <asm/pci-direct.h>
          +         #include <asm/amd_iommu_types.h>
          +         #include <asm/amd_iommu.h>
- -------- ---------#define UPDATE_LAST_BDF(x) do {\
- -------- ---------    if ((x) > amd_iommu_last_bdf) \
- -------- ---------            amd_iommu_last_bdf = (x); \
- -------- ---------    } while (0);
- -------- ---------
- -------- ---------#define DEVID(bus, devfn) (((bus) << 8) | (devfn))
+++++++ ++++++++++++#include <asm/iommu.h>
          +         
          +         /*
          +          * definitions for the ACPI scanning code
          +          */
- -------- ---------#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x))))
          +         #define PCI_BUS(x) (((x) >> 8) & 0xff)
          +         #define IVRS_HEADER_LENGTH 48
- -------- ---------u16 amd_iommu_last_bdf;
- -------- ---------struct list_head amd_iommu_unity_map;
- -------- ---------unsigned amd_iommu_aperture_order = 26;
- -------- ---------int amd_iommu_isolate;
          +         
          +         #define ACPI_IVHD_TYPE                  0x10
          +         #define ACPI_IVMD_TYPE_ALL              0x20
          +         #define ACPI_IVMD_TYPE                  0x21
          +         #define ACPI_IVMD_TYPE_RANGE            0x22
          +         
          +         #define IVHD_DEV_ALL                    0x01
          +         #define IVHD_DEV_SELECT                 0x02
          +         #define IVHD_DEV_SELECT_RANGE_START     0x03
          +         #define IVHD_DEV_RANGE_END              0x04
          +         #define IVHD_DEV_ALIAS                  0x42
          +         #define IVHD_DEV_ALIAS_RANGE            0x43
          +         #define IVHD_DEV_EXT_SELECT             0x46
          +         #define IVHD_DEV_EXT_SELECT_RANGE       0x47
          +         
          +         #define IVHD_FLAG_HT_TUN_EN             0x00
          +         #define IVHD_FLAG_PASSPW_EN             0x01
          +         #define IVHD_FLAG_RESPASSPW_EN          0x02
          +         #define IVHD_FLAG_ISOC_EN               0x03
          +         
          +         #define IVMD_FLAG_EXCL_RANGE            0x08
          +         #define IVMD_FLAG_UNITY_MAP             0x01
          +         
          +         #define ACPI_DEVFLAG_INITPASS           0x01
          +         #define ACPI_DEVFLAG_EXTINT             0x02
          +         #define ACPI_DEVFLAG_NMI                0x04
          +         #define ACPI_DEVFLAG_SYSMGT1            0x10
          +         #define ACPI_DEVFLAG_SYSMGT2            0x20
          +         #define ACPI_DEVFLAG_LINT0              0x40
          +         #define ACPI_DEVFLAG_LINT1              0x80
          +         #define ACPI_DEVFLAG_ATSDIS             0x10000000
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * ACPI table definitions
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * These data structures are laid over the table to parse the important values
+ ++++++++++++++++++ * out of it.
+ ++++++++++++++++++ */
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * structure describing one IOMMU in the ACPI table. Typically followed by one
+ ++++++++++++++++++ * or more ivhd_entrys.
+ ++++++++++++++++++ */
          +         struct ivhd_header {
          +             u8 type;
          +             u8 flags;
          +             u16 length;
          +             u16 devid;
          +             u16 cap_ptr;
          +             u64 mmio_phys;
          +             u16 pci_seg;
          +             u16 info;
          +             u32 reserved;
          +         } __attribute__((packed));
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * A device entry describing which devices a specific IOMMU translates and
+ ++++++++++++++++++ * which requestor ids they use.
+ ++++++++++++++++++ */
          +         struct ivhd_entry {
          +             u8 type;
          +             u16 devid;
          +             u8 flags;
          +             u32 ext;
          +         } __attribute__((packed));
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * An AMD IOMMU memory definition structure. It defines things like exclusion
+ ++++++++++++++++++ * ranges for devices and regions that should be unity mapped.
+ ++++++++++++++++++ */
          +         struct ivmd_header {
          +             u8 type;
          +             u8 flags;
          +             u16 length;
          +             u16 devid;
          +             u16 aux;
          +             u64 resv;
          +             u64 range_start;
          +             u64 range_length;
          +         } __attribute__((packed));
          +         
          +         static int __initdata amd_iommu_detected;
          +         
- -------- ---------struct list_head amd_iommu_list;
+ ++++++++++++++++++u16 amd_iommu_last_bdf;                     /* largest PCI device id we have
+ ++++++++++++++++++                                       to handle */
+ ++++++++++++++++++LIST_HEAD(amd_iommu_unity_map);             /* a list of required unity mappings
+ ++++++++++++++++++                                       we find in ACPI */
+ ++++++++++++++++++unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
+ ++++++++++++++++++int amd_iommu_isolate;                      /* if 1, device isolation is enabled */
+ ++++++++++++++++++
+ ++++++++++++++++++LIST_HEAD(amd_iommu_list);          /* list of all AMD IOMMUs in the
+ ++++++++++++++++++                                       system */
          +         
- -------- ---------static u32 dev_table_size;
- -------- ---------static u32 alias_table_size;
- -------- ---------static u32 rlookup_table_size;
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Pointer to the device table which is shared by all AMD IOMMUs
+ ++++++++++++++++++ * it is indexed by the PCI device id or the HT unit id and contains
+ ++++++++++++++++++ * information about the domain the device belongs to as well as the
+ ++++++++++++++++++ * page table root pointer.
+ ++++++++++++++++++ */
          +         struct dev_table_entry *amd_iommu_dev_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The alias table is a driver specific data structure which contains the
+ ++++++++++++++++++ * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
+ ++++++++++++++++++ * More than one device can share the same requestor id.
+ ++++++++++++++++++ */
          +         u16 *amd_iommu_alias_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The rlookup table is used to find the IOMMU which is responsible
+ ++++++++++++++++++ * for a specific device. It is also indexed by the PCI device id.
+ ++++++++++++++++++ */
          +         struct amd_iommu **amd_iommu_rlookup_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The pd table (protection domain table) is used to find the protection domain
+ ++++++++++++++++++ * data structure a device belongs to. Indexed with the PCI device id too.
+ ++++++++++++++++++ */
          +         struct protection_domain **amd_iommu_pd_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap
+ ++++++++++++++++++ * to know which ones are already in use.
+ ++++++++++++++++++ */
          +         unsigned long *amd_iommu_pd_alloc_bitmap;
          +         
- -------- ---------    UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
+ ++++++++++++++++++static u32 dev_table_size;  /* size of the device table */
+ ++++++++++++++++++static u32 alias_table_size;        /* size of the alias table */
+ ++++++++++++++++++static u32 rlookup_table_size;      /* size if the rlookup table */
          +         
+ ++++++++++++++++++static inline void update_last_devid(u16 devid)
+ ++++++++++++++++++{
+ ++++++++++++++++++    if (devid > amd_iommu_last_bdf)
+ ++++++++++++++++++            amd_iommu_last_bdf = devid;
+ ++++++++++++++++++}
+ ++++++++++++++++++
+ ++++++++++++++++++static inline unsigned long tbl_size(int entry_size)
+ ++++++++++++++++++{
+ ++++++++++++++++++    unsigned shift = PAGE_SHIFT +
+ ++++++++++++++++++                     get_order(amd_iommu_last_bdf * entry_size);
+ ++++++++++++++++++
+ ++++++++++++++++++    return 1UL << shift;
+ ++++++++++++++++++}
+ ++++++++++++++++++
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * AMD IOMMU MMIO register space handling functions
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * These functions are used to program the IOMMU device registers in
+ ++++++++++++++++++ * MMIO space required for that driver.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function set the exclusion range in the IOMMU. DMA accesses to the
+ ++++++++++++++++++ * exclusion range are passed through untranslated
+ ++++++++++++++++++ */
          +         static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
          +         {
          +             u64 start = iommu->exclusion_start & PAGE_MASK;
          +             u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
          +             u64 entry;
          +         
          +             if (!iommu->exclusion_start)
          +                     return;
          +         
          +             entry = start | MMIO_EXCL_ENABLE_MASK;
          +             memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
          +                             &entry, sizeof(entry));
          +         
          +             entry = limit;
          +             memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
          +                             &entry, sizeof(entry));
          +         }
          +         
+ ++++++++++++++++++/* Programs the physical address of the device table into the IOMMU hardware */
          +         static void __init iommu_set_device_table(struct amd_iommu *iommu)
          +         {
          +             u32 entry;
          +         
          +             BUG_ON(iommu->mmio_base == NULL);
          +         
          +             entry = virt_to_phys(amd_iommu_dev_table);
          +             entry |= (dev_table_size >> 12) - 1;
          +             memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
          +                             &entry, sizeof(entry));
          +         }
          +         
+ ++++++++++++++++++/* Generic functions to enable/disable certain features of the IOMMU. */
          +         static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
          +         {
          +             u32 ctrl;
          +         
          +             ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +             ctrl |= (1 << bit);
          +             writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +         }
          +         
          +         static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
          +         {
          +             u32 ctrl;
          +         
          +             ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +             ctrl &= ~(1 << bit);
          +             writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +         }
          +         
+ ++++++++++++++++++/* Function to enable the hardware */
          +         void __init iommu_enable(struct amd_iommu *iommu)
          +         {
          +             printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at ");
          +             print_devid(iommu->devid, 0);
          +             printk(" cap 0x%hx\n", iommu->cap_ptr);
          +         
          +             iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
+ ++++++++++++++++++ * the system has one.
+ ++++++++++++++++++ */
          +         static u8 * __init iommu_map_mmio_space(u64 address)
          +         {
          +             u8 *ret;
          +         
          +             if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu"))
          +                     return NULL;
          +         
          +             ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
          +             if (ret != NULL)
          +                     return ret;
          +         
          +             release_mem_region(address, MMIO_REGION_LENGTH);
          +         
          +             return NULL;
          +         }
          +         
          +         static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
          +         {
          +             if (iommu->mmio_base)
          +                     iounmap(iommu->mmio_base);
          +             release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH);
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The functions below belong to the first pass of AMD IOMMU ACPI table
+ ++++++++++++++++++ * parsing. In this pass we try to find out the highest device id this
+ ++++++++++++++++++ * code has to handle. Upon this information the size of the shared data
+ ++++++++++++++++++ * structures is determined later.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function reads the last device id the IOMMU has to handle from the PCI
+ ++++++++++++++++++ * capability header for this IOMMU
+ ++++++++++++++++++ */
          +         static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
          +         {
          +             u32 cap;
          +         
          +             cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
- -------- ---------                    UPDATE_LAST_BDF(dev->devid);
+ ++++++++++++++++++    update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * After reading the highest device id from the IOMMU PCI capability header
+ ++++++++++++++++++ * this function looks if there is a higher device id defined in the ACPI table
+ ++++++++++++++++++ */
          +         static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
          +         {
          +             u8 *p = (void *)h, *end = (void *)h;
          +             struct ivhd_entry *dev;
          +         
          +             p += sizeof(*h);
          +             end += h->length;
          +         
          +             find_last_devid_on_pci(PCI_BUS(h->devid),
          +                             PCI_SLOT(h->devid),
          +                             PCI_FUNC(h->devid),
          +                             h->cap_ptr);
          +         
          +             while (p < end) {
          +                     dev = (struct ivhd_entry *)p;
          +                     switch (dev->type) {
          +                     case IVHD_DEV_SELECT:
          +                     case IVHD_DEV_RANGE_END:
          +                     case IVHD_DEV_ALIAS:
          +                     case IVHD_DEV_EXT_SELECT:
- -------- ---------    u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++                    /* all the above subfield types refer to device ids */
+ ++++++++++++++++++                    update_last_devid(dev->devid);
          +                             break;
          +                     default:
          +                             break;
          +                     }
          +                     p += 0x04 << (*p >> 6);
          +             }
          +         
          +             WARN_ON(p != end);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Iterate over all IVHD entries in the ACPI table and find the highest device
+ ++++++++++++++++++ * id which we need to handle. This is the first of three functions which parse
+ ++++++++++++++++++ * the ACPI table. So we check the checksum here.
+ ++++++++++++++++++ */
          +         static int __init find_last_devid_acpi(struct acpi_table_header *table)
          +         {
          +             int i;
          +             u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table;
          +             struct ivhd_header *h;
          +         
          +             /*
          +              * Validate checksum here so we don't need to do it when
          +              * we actually parse the table
          +              */
          +             for (i = 0; i < table->length; ++i)
          +                     checksum += p[i];
          +             if (checksum != 0)
          +                     /* ACPI table corrupt */
          +                     return -ENODEV;
          +         
          +             p += IVRS_HEADER_LENGTH;
          +         
          +             end += table->length;
          +             while (p < end) {
          +                     h = (struct ivhd_header *)p;
          +                     switch (h->type) {
          +                     case ACPI_IVHD_TYPE:
          +                             find_last_devid_from_ivhd(h);
          +                             break;
          +                     default:
          +                             break;
          +                     }
          +                     p += h->length;
          +             }
          +             WARN_ON(p != end);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The following functions belong the the code path which parses the ACPI table
+ ++++++++++++++++++ * the second time. In this ACPI parsing iteration we allocate IOMMU specific
+ ++++++++++++++++++ * data structures, initialize the device/alias/rlookup table and also
+ ++++++++++++++++++ * basically initialize the hardware.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Allocates the command buffer. This buffer is per AMD IOMMU. We can
+ ++++++++++++++++++ * write commands to that buffer later and the IOMMU will execute them
+ ++++++++++++++++++ * asynchronously
+ ++++++++++++++++++ */
          +         static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
          +         {
- -------- ---------    u64 entry = 0;
+ ++++++++++++++++++    u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
          +                             get_order(CMD_BUFFER_SIZE));
- -------- ---------    memset(cmd_buf, 0, CMD_BUFFER_SIZE);
- -------- ---------
+ ++++++++++++++++++    u64 entry;
          +         
          +             if (cmd_buf == NULL)
          +                     return NULL;
          +         
          +             iommu->cmd_buf_size = CMD_BUFFER_SIZE;
          +         
- -------- ---------    if (iommu->cmd_buf)
- -------- ---------            free_pages((unsigned long)iommu->cmd_buf,
- -------- ---------                            get_order(CMD_BUFFER_SIZE));
          +             entry = (u64)virt_to_phys(cmd_buf);
          +             entry |= MMIO_CMD_SIZE_512;
          +             memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
          +                             &entry, sizeof(entry));
          +         
          +             iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
          +         
          +             return cmd_buf;
          +         }
          +         
          +         static void __init free_command_buffer(struct amd_iommu *iommu)
          +         {
- -------- ---------static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags)
+ ++++++++++++++++++    free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
          +         }
          +         
+ ++++++++++++++++++/* sets a specific bit in the device table entry. */
          +         static void set_dev_entry_bit(u16 devid, u8 bit)
          +         {
          +             int i = (bit >> 5) & 0x07;
          +             int _bit = bit & 0x1f;
          +         
          +             amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
          +         }
          +         
- -------- ---------}
+ ++++++++++++++++++/* Writes the specific IOMMU for a device into the rlookup table */
+ ++++++++++++++++++static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
+ ++++++++++++++++++{
+ ++++++++++++++++++    amd_iommu_rlookup_table[devid] = iommu;
+ ++++++++++++++++++}
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function takes the device specific flags read from the ACPI
+ ++++++++++++++++++ * table and sets up the device table entry with that information
+ ++++++++++++++++++ */
+ ++++++++++++++++++static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
+ ++++++++++++++++++                                       u16 devid, u32 flags, u32 ext_flags)
          +         {
          +             if (flags & ACPI_DEVFLAG_INITPASS)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
          +             if (flags & ACPI_DEVFLAG_EXTINT)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
          +             if (flags & ACPI_DEVFLAG_NMI)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
          +             if (flags & ACPI_DEVFLAG_SYSMGT1)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
          +             if (flags & ACPI_DEVFLAG_SYSMGT2)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
          +             if (flags & ACPI_DEVFLAG_LINT0)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
          +             if (flags & ACPI_DEVFLAG_LINT1)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
- -------- ---------static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
- -------- ---------{
- -------- ---------    amd_iommu_rlookup_table[devid] = iommu;
          +         
- -------- ---------    iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range));
- -------- ---------    iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range));
+ ++++++++++++++++++    set_iommu_for_device(iommu, devid);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Reads the device exclusion range from ACPI and initialize IOMMU with
+ ++++++++++++++++++ * it
+ ++++++++++++++++++ */
          +         static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
          +         {
          +             struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
          +         
          +             if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
          +                     return;
          +         
          +             if (iommu) {
+ ++++++++++++++++++            /*
+ ++++++++++++++++++             * We only can configure exclusion ranges per IOMMU, not
+ ++++++++++++++++++             * per device. But we can enable the exclusion range per
+ ++++++++++++++++++             * device. This is done here
+ ++++++++++++++++++             */
          +                     set_dev_entry_bit(m->devid, DEV_ENTRY_EX);
          +                     iommu->exclusion_start = m->range_start;
          +                     iommu->exclusion_length = m->range_length;
          +             }
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function reads some important data from the IOMMU PCI space and
+ ++++++++++++++++++ * initializes the driver data structure with it. It reads the hardware
+ ++++++++++++++++++ * capabilities and the first/last device entries
+ ++++++++++++++++++ */
          +         static void __init init_iommu_from_pci(struct amd_iommu *iommu)
          +         {
          +             int bus = PCI_BUS(iommu->devid);
          +             int dev = PCI_SLOT(iommu->devid);
          +             int fn  = PCI_FUNC(iommu->devid);
          +             int cap_ptr = iommu->cap_ptr;
          +             u32 range;
          +         
          +             iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET);
          +         
          +             range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
- -------- ---------    bool alias = 0;
+ ++++++++++++++++++    iommu->first_device = calc_devid(MMIO_GET_BUS(range),
+ ++++++++++++++++++                                     MMIO_GET_FD(range));
+ ++++++++++++++++++    iommu->last_device = calc_devid(MMIO_GET_BUS(range),
+ ++++++++++++++++++                                    MMIO_GET_LD(range));
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Takes a pointer to an AMD IOMMU entry in the ACPI table and
+ ++++++++++++++++++ * initializes the hardware and our data structures with it.
+ ++++++++++++++++++ */
          +         static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
          +                                             struct ivhd_header *h)
          +         {
          +             u8 *p = (u8 *)h;
          +             u8 *end = p, flags = 0;
          +             u16 dev_i, devid = 0, devid_start = 0, devid_to = 0;
          +             u32 ext_flags = 0;
- -------- ---------                            set_dev_entry_from_acpi(dev_i, e->flags, 0);
+ ++++++++++++++++++    bool alias = false;
          +             struct ivhd_entry *e;
          +         
          +             /*
          +              * First set the recommended feature enable bits from ACPI
          +              * into the IOMMU control registers
          +              */
          +             h->flags & IVHD_FLAG_HT_TUN_EN ?
          +                     iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
          +                     iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
          +         
          +             h->flags & IVHD_FLAG_PASSPW_EN ?
          +                     iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
          +                     iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
          +         
          +             h->flags & IVHD_FLAG_RESPASSPW_EN ?
          +                     iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
          +                     iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
          +         
          +             h->flags & IVHD_FLAG_ISOC_EN ?
          +                     iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
          +                     iommu_feature_disable(iommu, CONTROL_ISOC_EN);
          +         
          +             /*
          +              * make IOMMU memory accesses cache coherent
          +              */
          +             iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
          +         
          +             /*
          +              * Done. Now parse the device entries
          +              */
          +             p += sizeof(struct ivhd_header);
          +             end += h->length;
          +         
          +             while (p < end) {
          +                     e = (struct ivhd_entry *)p;
          +                     switch (e->type) {
          +                     case IVHD_DEV_ALL:
          +                             for (dev_i = iommu->first_device;
          +                                             dev_i <= iommu->last_device; ++dev_i)
- -------- ---------                    set_dev_entry_from_acpi(devid, e->flags, 0);
+ ++++++++++++++++++                            set_dev_entry_from_acpi(iommu, dev_i,
+ ++++++++++++++++++                                                    e->flags, 0);
          +                             break;
          +                     case IVHD_DEV_SELECT:
          +                             devid = e->devid;
- -------- ---------                    alias = 0;
+ ++++++++++++++++++                    set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
          +                             break;
          +                     case IVHD_DEV_SELECT_RANGE_START:
          +                             devid_start = e->devid;
          +                             flags = e->flags;
          +                             ext_flags = 0;
- -------- ---------                    set_dev_entry_from_acpi(devid, e->flags, 0);
+ ++++++++++++++++++                    alias = false;
          +                             break;
          +                     case IVHD_DEV_ALIAS:
          +                             devid = e->devid;
          +                             devid_to = e->ext >> 8;
- -------- ---------                    alias = 1;
+ ++++++++++++++++++                    set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
          +                             amd_iommu_alias_table[devid] = devid_to;
          +                             break;
          +                     case IVHD_DEV_ALIAS_RANGE:
          +                             devid_start = e->devid;
          +                             flags = e->flags;
          +                             devid_to = e->ext >> 8;
          +                             ext_flags = 0;
- -------- ---------                    set_dev_entry_from_acpi(devid, e->flags, e->ext);
+ ++++++++++++++++++                    alias = true;
          +                             break;
          +                     case IVHD_DEV_EXT_SELECT:
          +                             devid = e->devid;
- -------- ---------                    alias = 0;
+ ++++++++++++++++++                    set_dev_entry_from_acpi(iommu, devid, e->flags,
+ ++++++++++++++++++                                            e->ext);
          +                             break;
          +                     case IVHD_DEV_EXT_SELECT_RANGE:
          +                             devid_start = e->devid;
          +                             flags = e->flags;
          +                             ext_flags = e->ext;
- -------- ---------                            set_dev_entry_from_acpi(
+ ++++++++++++++++++                    alias = false;
          +                             break;
          +                     case IVHD_DEV_RANGE_END:
          +                             devid = e->devid;
          +                             for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
          +                                     if (alias)
          +                                             amd_iommu_alias_table[dev_i] = devid_to;
- -------- ---------    INIT_LIST_HEAD(&amd_iommu_list);
- -------- ---------
+ ++++++++++++++++++                            set_dev_entry_from_acpi(iommu,
          +                                                     amd_iommu_alias_table[dev_i],
          +                                                     flags, ext_flags);
          +                             }
          +                             break;
          +                     default:
          +                             break;
          +                     }
          +         
          +                     p += 0x04 << (e->type >> 6);
          +             }
          +         }
          +         
+ ++++++++++++++++++/* Initializes the device->iommu mapping for the driver */
          +         static int __init init_iommu_devices(struct amd_iommu *iommu)
          +         {
          +             u16 i;
          +         
          +             for (i = iommu->first_device; i <= iommu->last_device; ++i)
          +                     set_iommu_for_device(iommu, i);
          +         
          +             return 0;
          +         }
          +         
          +         static void __init free_iommu_one(struct amd_iommu *iommu)
          +         {
          +             free_command_buffer(iommu);
          +             iommu_unmap_mmio_space(iommu);
          +         }
          +         
          +         static void __init free_iommu_all(void)
          +         {
          +             struct amd_iommu *iommu, *next;
          +         
          +             list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) {
          +                     list_del(&iommu->list);
          +                     free_iommu_one(iommu);
          +                     kfree(iommu);
          +             }
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function clues the initialization function for one IOMMU
+ ++++++++++++++++++ * together and also allocates the command buffer and programs the
+ ++++++++++++++++++ * hardware. It does NOT enable the IOMMU. This is done afterwards.
+ ++++++++++++++++++ */
          +         static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
          +         {
          +             spin_lock_init(&iommu->lock);
          +             list_add_tail(&iommu->list, &amd_iommu_list);
          +         
          +             /*
          +              * Copy data from ACPI table entry to the iommu struct
          +              */
          +             iommu->devid = h->devid;
          +             iommu->cap_ptr = h->cap_ptr;
          +             iommu->mmio_phys = h->mmio_phys;
          +             iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys);
          +             if (!iommu->mmio_base)
          +                     return -ENOMEM;
          +         
          +             iommu_set_device_table(iommu);
          +             iommu->cmd_buf = alloc_command_buffer(iommu);
          +             if (!iommu->cmd_buf)
          +                     return -ENOMEM;
          +         
          +             init_iommu_from_pci(iommu);
          +             init_iommu_from_acpi(iommu, h);
          +             init_iommu_devices(iommu);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Iterates over all IOMMU entries in the ACPI table, allocates the
+ ++++++++++++++++++ * IOMMU structure and initializes it with init_iommu_one()
+ ++++++++++++++++++ */
          +         static int __init init_iommu_all(struct acpi_table_header *table)
          +         {
          +             u8 *p = (u8 *)table, *end = (u8 *)table;
          +             struct ivhd_header *h;
          +             struct amd_iommu *iommu;
          +             int ret;
          +         
- -------- ---------    INIT_LIST_HEAD(&amd_iommu_unity_map);
- -------- ---------
          +             end += table->length;
          +             p += IVRS_HEADER_LENGTH;
          +         
          +             while (p < end) {
          +                     h = (struct ivhd_header *)p;
          +                     switch (*p) {
          +                     case ACPI_IVHD_TYPE:
          +                             iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
          +                             if (iommu == NULL)
          +                                     return -ENOMEM;
          +                             ret = init_iommu_one(iommu, h);
          +                             if (ret)
          +                                     return ret;
          +                             break;
          +                     default:
          +                             break;
          +                     }
          +                     p += h->length;
          +         
          +             }
          +             WARN_ON(p != end);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the third pass of parsing the ACPI
+ ++++++++++++++++++ * table. In this last pass the memory mapping requirements are
+ ++++++++++++++++++ * gathered (like exclusion and unity mapping reanges).
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
          +         static void __init free_unity_maps(void)
          +         {
          +             struct unity_map_entry *entry, *next;
          +         
          +             list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
          +                     list_del(&entry->list);
          +                     kfree(entry);
          +             }
          +         }
          +         
+ ++++++++++++++++++/* called when we find an exclusion range definition in ACPI */
          +         static int __init init_exclusion_range(struct ivmd_header *m)
          +         {
          +             int i;
          +         
          +             switch (m->type) {
          +             case ACPI_IVMD_TYPE:
          +                     set_device_exclusion_range(m->devid, m);
          +                     break;
          +             case ACPI_IVMD_TYPE_ALL:
          +                     for (i = 0; i < amd_iommu_last_bdf; ++i)
          +                             set_device_exclusion_range(i, m);
          +                     break;
          +             case ACPI_IVMD_TYPE_RANGE:
          +                     for (i = m->devid; i <= m->aux; ++i)
          +                             set_device_exclusion_range(i, m);
          +                     break;
          +             default:
          +                     break;
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/* called for unity map ACPI definition */
          +         static int __init init_unity_map_range(struct ivmd_header *m)
          +         {
          +             struct unity_map_entry *e = 0;
          +         
          +             e = kzalloc(sizeof(*e), GFP_KERNEL);
          +             if (e == NULL)
          +                     return -ENOMEM;
          +         
          +             switch (m->type) {
          +             default:
          +             case ACPI_IVMD_TYPE:
          +                     e->devid_start = e->devid_end = m->devid;
          +                     break;
          +             case ACPI_IVMD_TYPE_ALL:
          +                     e->devid_start = 0;
          +                     e->devid_end = amd_iommu_last_bdf;
          +                     break;
          +             case ACPI_IVMD_TYPE_RANGE:
          +                     e->devid_start = m->devid;
          +                     e->devid_end = m->aux;
          +                     break;
          +             }
          +             e->address_start = PAGE_ALIGN(m->range_start);
          +             e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
          +             e->prot = m->flags >> 1;
          +         
          +             list_add_tail(&e->list, &amd_iommu_unity_map);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/* iterates over all memory definitions we find in the ACPI table */
          +         static int __init init_memory_definitions(struct acpi_table_header *table)
          +         {
          +             u8 *p = (u8 *)table, *end = (u8 *)table;
          +             struct ivmd_header *m;
          +         
- -------- ---------    dev_table_size     = TBL_SIZE(DEV_TABLE_ENTRY_SIZE);
- -------- ---------    alias_table_size   = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE);
- -------- ---------    rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE);
          +             end += table->length;
          +             p += IVRS_HEADER_LENGTH;
          +         
          +             while (p < end) {
          +                     m = (struct ivmd_header *)p;
          +                     if (m->flags & IVMD_FLAG_EXCL_RANGE)
          +                             init_exclusion_range(m);
          +                     else if (m->flags & IVMD_FLAG_UNITY_MAP)
          +                             init_unity_map_range(m);
          +         
          +                     p += m->length;
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function finally enables all IOMMUs found in the system after
+ ++++++++++++++++++ * they have been initialized
+ ++++++++++++++++++ */
          +         static void __init enable_iommus(void)
          +         {
          +             struct amd_iommu *iommu;
          +         
          +             list_for_each_entry(iommu, &amd_iommu_list, list) {
          +                     iommu_set_exclusion_range(iommu);
          +                     iommu_enable(iommu);
          +             }
          +         }
          +         
          +         /*
          +          * Suspend/Resume support
          +          * disable suspend until real resume implemented
          +          */
          +         
          +         static int amd_iommu_resume(struct sys_device *dev)
          +         {
          +             return 0;
          +         }
          +         
          +         static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
          +         {
          +             return -EINVAL;
          +         }
          +         
          +         static struct sysdev_class amd_iommu_sysdev_class = {
          +             .name = "amd_iommu",
          +             .suspend = amd_iommu_suspend,
          +             .resume = amd_iommu_resume,
          +         };
          +         
          +         static struct sys_device device_amd_iommu = {
          +             .id = 0,
          +             .cls = &amd_iommu_sysdev_class,
          +         };
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This is the core init function for AMD IOMMU hardware in the system.
+ ++++++++++++++++++ * This function is called from the generic x86 DMA layer initialization
+ ++++++++++++++++++ * code.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * This function basically parses the ACPI table for AMD IOMMU (IVRS)
+ ++++++++++++++++++ * three times:
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *  1 pass) Find the highest PCI device id the driver has to handle.
+ ++++++++++++++++++ *          Upon this information the size of the data structures is
+ ++++++++++++++++++ *          determined that needs to be allocated.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *  2 pass) Initialize the data structures just allocated with the
+ ++++++++++++++++++ *          information in the ACPI table about available AMD IOMMUs
+ ++++++++++++++++++ *          in the system. It also maps the PCI devices in the
+ ++++++++++++++++++ *          system to specific IOMMUs
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *  3 pass) After the basic data structures are allocated and
+ ++++++++++++++++++ *          initialized we update them with information about memory
+ ++++++++++++++++++ *          remapping requirements parsed out of the ACPI table in
+ ++++++++++++++++++ *          this last pass.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * After that the hardware is initialized and ready to go. In the last
+ ++++++++++++++++++ * step we do some Linux specific things like registering the driver in
+ ++++++++++++++++++ * the dma_ops interface and initializing the suspend/resume support
+ ++++++++++++++++++ * functions. Finally it prints some information about AMD IOMMUs and
+ ++++++++++++++++++ * the driver state and enables the hardware.
+ ++++++++++++++++++ */
          +         int __init amd_iommu_init(void)
          +         {
          +             int i, ret = 0;
          +         
          +         
          +             if (no_iommu) {
          +                     printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n");
          +                     return 0;
          +             }
          +         
          +             if (!amd_iommu_detected)
          +                     return -ENODEV;
          +         
          +             /*
          +              * First parse ACPI tables to find the largest Bus/Dev/Func
          +              * we need to handle. Upon this information the shared data
          +              * structures for the IOMMUs in the system will be allocated
          +              */
          +             if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
          +                     return -ENODEV;
          +         
- -------- ---------    amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++    dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE);
+ ++++++++++++++++++    alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
+ ++++++++++++++++++    rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
          +         
          +             ret = -ENOMEM;
          +         
          +             /* Device table - directly used by all IOMMUs */
- -------- ---------    amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++    amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
          +                                           get_order(dev_table_size));
          +             if (amd_iommu_dev_table == NULL)
          +                     goto out;
          +         
          +             /*
          +              * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
          +              * IOMMU see for that device
          +              */
          +             amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
          +                             get_order(alias_table_size));
          +             if (amd_iommu_alias_table == NULL)
          +                     goto free;
          +         
          +             /* IOMMU rlookup table - find the IOMMU for a specific device */
          +             amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL,
          +                             get_order(rlookup_table_size));
          +             if (amd_iommu_rlookup_table == NULL)
          +                     goto free;
          +         
          +             /*
          +              * Protection Domain table - maps devices to protection domains
          +              * This table has the same size as the rlookup_table
          +              */
- -------- ---------    amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++    amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
          +                                          get_order(rlookup_table_size));
          +             if (amd_iommu_pd_table == NULL)
          +                     goto free;
          +         
- -------- ---------     * memory is allocated now; initialize the device table with all zeroes
- -------- ---------     * and let all alias entries point to itself
+ ++++++++++++++++++    amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
+ ++++++++++++++++++                                        GFP_KERNEL | __GFP_ZERO,
          +                                                 get_order(MAX_DOMAIN_ID/8));
          +             if (amd_iommu_pd_alloc_bitmap == NULL)
          +                     goto free;
          +         
          +             /*
- -------- ---------    memset(amd_iommu_dev_table, 0, dev_table_size);
+ ++++++++++++++++++     * let all alias entries point to itself
          +              */
- -------- ---------    memset(amd_iommu_pd_table, 0, rlookup_table_size);
- -------- ---------    memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8);
- -------- ---------
          +             for (i = 0; i < amd_iommu_last_bdf; ++i)
          +                     amd_iommu_alias_table[i] = i;
          +         
- -------- ---------    if (amd_iommu_pd_alloc_bitmap)
- -------- ---------            free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
          +             /*
          +              * never allocate domain 0 because its used as the non-allocated and
          +              * error value placeholder
          +              */
          +             amd_iommu_pd_alloc_bitmap[0] = 1;
          +         
          +             /*
          +              * now the data structures are allocated and basically initialized
          +              * start the real acpi table scan
          +              */
          +             ret = -ENODEV;
          +             if (acpi_table_parse("IVRS", init_iommu_all) != 0)
          +                     goto free;
          +         
          +             if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
          +                     goto free;
          +         
          +             ret = amd_iommu_init_dma_ops();
          +             if (ret)
          +                     goto free;
          +         
          +             ret = sysdev_class_register(&amd_iommu_sysdev_class);
          +             if (ret)
          +                     goto free;
          +         
          +             ret = sysdev_register(&device_amd_iommu);
          +             if (ret)
          +                     goto free;
          +         
          +             enable_iommus();
          +         
          +             printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
          +                             (1 << (amd_iommu_aperture_order-20)));
          +         
          +             printk(KERN_INFO "AMD IOMMU: device isolation ");
          +             if (amd_iommu_isolate)
          +                     printk("enabled\n");
          +             else
          +                     printk("disabled\n");
          +         
          +         out:
          +             return ret;
          +         
          +         free:
- -------- ---------    if (amd_iommu_pd_table)
- -------- ---------            free_pages((unsigned long)amd_iommu_pd_table,
- -------- ---------                            get_order(rlookup_table_size));
+ ++++++++++++++++++    free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
          +         
- -------- ---------    if (amd_iommu_rlookup_table)
- -------- ---------            free_pages((unsigned long)amd_iommu_rlookup_table,
- -------- ---------                            get_order(rlookup_table_size));
+ ++++++++++++++++++    free_pages((unsigned long)amd_iommu_pd_table,
+ ++++++++++++++++++               get_order(rlookup_table_size));
          +         
- -------- ---------    if (amd_iommu_alias_table)
- -------- ---------            free_pages((unsigned long)amd_iommu_alias_table,
- -------- ---------                            get_order(alias_table_size));
+ ++++++++++++++++++    free_pages((unsigned long)amd_iommu_rlookup_table,
+ ++++++++++++++++++               get_order(rlookup_table_size));
          +         
- -------- ---------    if (amd_iommu_dev_table)
- -------- ---------            free_pages((unsigned long)amd_iommu_dev_table,
- -------- ---------                            get_order(dev_table_size));
+ ++++++++++++++++++    free_pages((unsigned long)amd_iommu_alias_table,
+ ++++++++++++++++++               get_order(alias_table_size));
          +         
- -------- ---------    if (swiotlb || no_iommu || iommu_detected)
+ ++++++++++++++++++    free_pages((unsigned long)amd_iommu_dev_table,
+ ++++++++++++++++++               get_order(dev_table_size));
          +         
          +             free_iommu_all();
          +         
          +             free_unity_maps();
          +         
          +             goto out;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * Early detect code. This code runs at IOMMU detection time in the DMA
+ ++++++++++++++++++ * layer. It just looks if there is an IVRS ACPI table to detect AMD
+ ++++++++++++++++++ * IOMMUs
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
          +         static int __init early_amd_iommu_detect(struct acpi_table_header *table)
          +         {
          +             return 0;
          +         }
          +         
          +         void __init amd_iommu_detect(void)
          +         {
- -------- ---------    for (; *str; ++str) {
- -------- ---------            if (strcmp(str, "32M") == 0)
- -------- ---------                    amd_iommu_aperture_order = 25;
- -------- ---------            if (strcmp(str, "64M") == 0)
- -------- ---------                    amd_iommu_aperture_order = 26;
- -------- ---------            if (strcmp(str, "128M") == 0)
- -------- ---------                    amd_iommu_aperture_order = 27;
- -------- ---------            if (strcmp(str, "256M") == 0)
- -------- ---------                    amd_iommu_aperture_order = 28;
- -------- ---------            if (strcmp(str, "512M") == 0)
- -------- ---------                    amd_iommu_aperture_order = 29;
- -------- ---------            if (strcmp(str, "1G") == 0)
- -------- ---------                    amd_iommu_aperture_order = 30;
- -------- ---------    }
+ ++++++++++++++++++    if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture))
          +                     return;
          +         
          +             if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
          +                     iommu_detected = 1;
          +                     amd_iommu_detected = 1;
          +         #ifdef CONFIG_GART_IOMMU
          +                     gart_iommu_aperture_disabled = 1;
          +                     gart_iommu_aperture = 0;
          +         #endif
          +             }
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * Parsing functions for the AMD IOMMU specific kernel command line
+ ++++++++++++++++++ * options.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
          +         static int __init parse_amd_iommu_options(char *str)
          +         {
          +             for (; *str; ++str) {
          +                     if (strcmp(str, "isolate") == 0)
          +                             amd_iommu_isolate = 1;
          +             }
          +         
          +             return 1;
          +         }
          +         
          +         static int __init parse_amd_iommu_size_options(char *str)
          +         {
+ ++++++++++++++++++    unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
+ ++++++++++++++++++
+ ++++++++++++++++++    if ((order > 24) && (order < 31))
+ ++++++++++++++++++            amd_iommu_aperture_order = order;
          +         
          +             return 1;
          +         }
          +         
          +         __setup("amd_iommu=", parse_amd_iommu_options);
          +         __setup("amd_iommu_size=", parse_amd_iommu_size_options);
index a437d027f20b6d8d7ba3dc88400220e796afe41e,a437d027f20b6d8d7ba3dc88400220e796afe41e,7f30c0f3dbe453d97feaa7e065bc8bc66a693152,a437d027f20b6d8d7ba3dc88400220e796afe41e,e9a00e5074b26ed68698fe03681d09937d718bed,a437d027f20b6d8d7ba3dc88400220e796afe41e,3e58b676d23b8fd96a87823e30fcc4566d60aad6,3e947208b9d96b781ef8ea36b91094e445bbaa11,a437d027f20b6d8d7ba3dc88400220e796afe41e,a437d027f20b6d8d7ba3dc88400220e796afe41e,4b99b1bdeb6cbb090e5868138922b259da7d98fe,a437d027f20b6d8d7ba3dc88400220e796afe41e,3e58b676d23b8fd96a87823e30fcc4566d60aad6,a437d027f20b6d8d7ba3dc88400220e796afe41e,a437d027f20b6d8d7ba3dc88400220e796afe41e,a437d027f20b6d8d7ba3dc88400220e796afe41e,3e58b676d23b8fd96a87823e30fcc4566d60aad6,a437d027f20b6d8d7ba3dc88400220e796afe41e,a437d027f20b6d8d7ba3dc88400220e796afe41e,a437d027f20b6d8d7ba3dc88400220e796afe41e..d6c8983583713d747790587861318a5fb58eb342
                    
                    unsigned long mp_lapic_addr;
                    
          -         DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
          -         EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
          -         
                    /*
                     * Knob to control our willingness to enable the local APIC.
                     *
          -          * -1=force-disable, +1=force-enable
          +          * +1=force-enable
                     */
          -         static int enable_local_apic __initdata;
          +         static int force_enable_local_apic;
          +         int disable_apic;
                    
                    /* Local APIC timer verification ok */
                    static int local_apic_timer_verify_ok;
          -         /* Disable local APIC timer from the kernel commandline or via dmi quirk
          -            or using CPU MSR check */
          -         int local_apic_timer_disabled;
          +         /* Disable local APIC timer from the kernel commandline or via dmi quirk */
          +         static int local_apic_timer_disabled;
                    /* Local APIC timer works in C2 */
                    int local_apic_timer_c2_ok;
                    EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
                    
          +         int first_system_vector = 0xfe;
          +         
          +         char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
          +         
                    /*
                     * Debug level, exported for io_apic.c
                     */
-- -----------------int apic_verbosity;
++ +++++++++++++++++unsigned int apic_verbosity;
          +         
          +         int pic_mode;
          +         
          +         /* Have we found an MP table */
          +         int smp_found_config;
          +         
          +         static struct resource lapic_resource = {
          +             .name = "Local APIC",
          +             .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
          +         };
                    
                    static unsigned int calibration_result;
                    
@@@@@@@@@@@@@@@@@@@@@ -177,7 -177,7 -177,7 -177,7 -177,7 -177,7 -177,7 -177,7 -177,7 -177,7 -166,7 -177,7 -177,7 -177,7 -177,7 -177,7 -177,7 -177,7 -177,7 -177,7 +177,7 @@@@@@@@@@@@@@@@@@@@@ void __cpuinit enable_NMI_through_LVT0(
                        /* Level triggered for 82489DX */
                        if (!lapic_is_integrated())
                                v |= APIC_LVT_LEVEL_TRIGGER;
-- -----------------    apic_write_around(APIC_LVT0, v);
++ +++++++++++++++++    apic_write(APIC_LVT0, v);
                    }
                    
                    /**
@@@@@@@@@@@@@@@@@@@@@ -212,9 -212,9 -212,6 -212,9 -212,9 -212,9 -212,9 -212,9 -212,9 -212,9 -201,9 -212,9 -212,9 -212,9 -212,9 -212,9 -212,9 -212,9 -212,9 -212,9 +212,6 @@@@@@@@@@@@@@@@@@@@@ int lapic_get_maxlvt(void
                     * this function twice on the boot CPU, once with a bogus timeout
                     * value, second time for real. The other (noncalibrating) CPUs
                     * call this function only once, with the real, calibrated value.
-- ----------------- *
-- ----------------- * We do reads before writes even if unnecessary, to get around the
-- ----------------- * P5 APIC double write bug.
                     */
                    static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
                    {
                        if (!irqen)
                                lvtt_value |= APIC_LVT_MASKED;
                    
-- -----------------    apic_write_around(APIC_LVTT, lvtt_value);
++ +++++++++++++++++    apic_write(APIC_LVTT, lvtt_value);
                    
                        /*
                         * Divide PICLK by 16
                         */
                        tmp_value = apic_read(APIC_TDCR);
-- -----------------    apic_write_around(APIC_TDCR, (tmp_value
-- -----------------                            & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
-- -----------------                            | APIC_TDR_DIV_16);
++ +++++++++++++++++    apic_write(APIC_TDCR,
++ +++++++++++++++++               (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
++ +++++++++++++++++               APIC_TDR_DIV_16);
                    
                        if (!oneshot)
-- -----------------            apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
++ +++++++++++++++++            apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
                    }
                    
                    /*
                    static int lapic_next_event(unsigned long delta,
                                            struct clock_event_device *evt)
                    {
-- -----------------    apic_write_around(APIC_TMICT, delta);
++ +++++++++++++++++    apic_write(APIC_TMICT, delta);
                        return 0;
                    }
                    
@@@@@@@@@@@@@@@@@@@@@ -278,7 -278,7 -275,7 -278,7 -278,7 -278,7 -278,7 -278,7 -278,7 -278,7 -267,7 -278,7 -278,7 -278,7 -278,7 -278,7 -278,7 -278,7 -278,7 -278,7 +275,7 @@@@@@@@@@@@@@@@@@@@@ static void lapic_timer_setup(enum cloc
                        case CLOCK_EVT_MODE_SHUTDOWN:
                                v = apic_read(APIC_LVTT);
                                v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-- -----------------            apic_write_around(APIC_LVTT, v);
++ +++++++++++++++++            apic_write(APIC_LVTT, v);
                                break;
                        case CLOCK_EVT_MODE_RESUME:
                                /* Nothing to do here */
@@@@@@@@@@@@@@@@@@@@@ -372,12 -372,12 -369,7 -372,12 -372,12 -372,12 -372,12 -372,12 -372,12 -372,12 -361,12 -372,12 -372,12 -372,12 -372,12 -372,12 -372,12 -372,12 -372,12 -372,12 +369,7 @@@@@@@@@@@@@@@@@@@@@ static void __init lapic_cal_handler(st
                        }
                    }
                    
-- -----------------/*
-- ----------------- * Setup the boot APIC
-- ----------------- *
-- ----------------- * Calibrate and verify the result.
-- ----------------- */
-- -----------------void __init setup_boot_APIC_clock(void)
++ +++++++++++++++++static int __init calibrate_APIC_clock(void)
                    {
                        struct clock_event_device *levt = &__get_cpu_var(lapic_events);
                        const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
                        long delta, deltapm;
                        int pm_referenced = 0;
                    
-- -----------------    /*
-- -----------------     * The local apic timer can be disabled via the kernel
-- -----------------     * commandline or from the CPU detection code. Register the lapic
-- -----------------     * timer as a dummy clock event source on SMP systems, so the
-- -----------------     * broadcast mechanism is used. On UP systems simply ignore it.
-- -----------------     */
-- -----------------    if (local_apic_timer_disabled) {
-- -----------------            /* No broadcast on UP ! */
-- -----------------            if (num_possible_cpus() > 1) {
-- -----------------                    lapic_clockevent.mult = 1;
-- -----------------                    setup_APIC_timer();
-- -----------------            }
-- -----------------            return;
-- -----------------    }
-- -----------------
-- -----------------    apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
-- -----------------                "calibrating APIC timer ...\n");
-- -----------------
                        local_irq_disable();
                    
                        /* Replace the global interrupt handler */
                                    calibration_result / (1000000 / HZ),
                                    calibration_result % (1000000 / HZ));
                    
-- -----------------    local_apic_timer_verify_ok = 1;
-- -----------------
                        /*
                         * Do a sanity check on the APIC calibration result
                         */
                                local_irq_enable();
                                printk(KERN_WARNING
                                       "APIC frequency too slow, disabling apic timer\n");
-- -----------------            /* No broadcast on UP ! */
-- -----------------            if (num_possible_cpus() > 1)
-- -----------------                    setup_APIC_timer();
-- -----------------            return;
++ +++++++++++++++++            return -1;
                        }
                    
++ +++++++++++++++++    local_apic_timer_verify_ok = 1;
++ +++++++++++++++++
                        /* We trust the pm timer based calibration */
                        if (!pm_referenced) {
                                apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
                        if (!local_apic_timer_verify_ok) {
                                printk(KERN_WARNING
                                       "APIC timer disabled due to verification failure.\n");
++ +++++++++++++++++                    return -1;
++ +++++++++++++++++    }
++ +++++++++++++++++
++ +++++++++++++++++    return 0;
++ +++++++++++++++++}
++ +++++++++++++++++
++ +++++++++++++++++/*
++ +++++++++++++++++ * Setup the boot APIC
++ +++++++++++++++++ *
++ +++++++++++++++++ * Calibrate and verify the result.
++ +++++++++++++++++ */
++ +++++++++++++++++void __init setup_boot_APIC_clock(void)
++ +++++++++++++++++{
++ +++++++++++++++++    /*
++ +++++++++++++++++     * The local apic timer can be disabled via the kernel
++ +++++++++++++++++     * commandline or from the CPU detection code. Register the lapic
++ +++++++++++++++++     * timer as a dummy clock event source on SMP systems, so the
++ +++++++++++++++++     * broadcast mechanism is used. On UP systems simply ignore it.
++ +++++++++++++++++     */
++ +++++++++++++++++    if (local_apic_timer_disabled) {
                                /* No broadcast on UP ! */
-- -----------------            if (num_possible_cpus() == 1)
-- -----------------                    return;
-- -----------------    } else {
-- -----------------            /*
-- -----------------             * If nmi_watchdog is set to IO_APIC, we need the
-- -----------------             * PIT/HPET going.  Otherwise register lapic as a dummy
-- -----------------             * device.
-- -----------------             */
-- -----------------            if (nmi_watchdog != NMI_IO_APIC)
-- -----------------                    lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-- -----------------            else
-- -----------------                    printk(KERN_WARNING "APIC timer registered as dummy,"
-- ------- ---------                            " due to nmi_watchdog=%d!\n", nmi_watchdog);
          -                                    " due to nmi_watchdog=1!\n");
++ +++++++++++++++++            if (num_possible_cpus() > 1) {
++ +++++++++++++++++                    lapic_clockevent.mult = 1;
++ +++++++++++++++++                    setup_APIC_timer();
++ +++++++++++++++++            }
++ +++++++++++++++++            return;
      ++  + +   +       }
      ++  + +   +   
++ +++++++++++++++++    apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
++ +++++++++++++++++                "calibrating APIC timer ...\n");
++ +++++++++++++++++
++ +++++++++++++++++    if (calibrate_APIC_clock()) {
++ +++++++++++++++++            /* No broadcast on UP ! */
++ +++++++++++++++++            if (num_possible_cpus() > 1)
++ +++++++++++++++++                    setup_APIC_timer();
++ +++++++++++++++++            return;
++ +++  ++ + +++ +++    }
++ +++  ++ + +++ +++
++ +++++++++++++++++    /*
++ +++++++++++++++++     * If nmi_watchdog is set to IO_APIC, we need the
++ +++++++++++++++++     * PIT/HPET going.  Otherwise register lapic as a dummy
++ +++++++++++++++++     * device.
++ +++++++++++++++++     */
++ +++++++++++++++++    if (nmi_watchdog != NMI_IO_APIC)
++ +++++++++++++++++            lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
++ +++++++++++++++++    else
++ +++++++++++++++++            printk(KERN_WARNING "APIC timer registered as dummy,"
++ +++++++++++++++++                    " due to nmi_watchdog=%d!\n", nmi_watchdog);
++ +++++++++++++++++
                        /* Setup the lapic or request the broadcast */
                        setup_APIC_timer();
                    }
                         */
                        if (maxlvt >= 3) {
                                v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
-- -----------------            apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
++ +++++++++++++++++            apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
                        }
                        /*
                         * Careful: we have to set masks only first to deassert
                         * any level-triggered sources.
                         */
                        v = apic_read(APIC_LVTT);
-- -----------------    apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
++ +++++++++++++++++    apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
                        v = apic_read(APIC_LVT0);
-- -----------------    apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
++ +++++++++++++++++    apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
                        v = apic_read(APIC_LVT1);
-- -----------------    apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
++ +++++++++++++++++    apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
                        if (maxlvt >= 4) {
                                v = apic_read(APIC_LVTPC);
-- -----------------            apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
++ +++++++++++++++++            apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
                        }
                    
                        /* lets not touch this if we didn't frob it */
                    #ifdef CONFIG_X86_MCE_P4THERMAL
                        if (maxlvt >= 5) {
                                v = apic_read(APIC_LVTTHMR);
-- -----------------            apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED);
++ +++++++++++++++++            apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
                        }
                    #endif
                        /*
                         * Clean APIC state for other OSs:
                         */
-- -----------------    apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
-- -----------------    apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
-- -----------------    apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
++ +++++++++++++++++    apic_write(APIC_LVTT, APIC_LVT_MASKED);
++ +++++++++++++++++    apic_write(APIC_LVT0, APIC_LVT_MASKED);
++ +++++++++++++++++    apic_write(APIC_LVT1, APIC_LVT_MASKED);
                        if (maxlvt >= 3)
-- -----------------            apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
++ +++++++++++++++++            apic_write(APIC_LVTERR, APIC_LVT_MASKED);
                        if (maxlvt >= 4)
-- -----------------            apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
++ +++++++++++++++++            apic_write(APIC_LVTPC, APIC_LVT_MASKED);
                    
                    #ifdef CONFIG_X86_MCE_P4THERMAL
                        if (maxlvt >= 5)
-- -----------------            apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
++ +++++++++++++++++            apic_write(APIC_LVTTHMR, APIC_LVT_MASKED);
                    #endif
                        /* Integrated APIC (!82489DX) ? */
                        if (lapic_is_integrated()) {
@@@@@@@@@@@@@@@@@@@@@ -756,7 -756,7 -760,7 -756,7 -756,7 -756,7 -756,7 -756,7 -756,7 -756,7 -745,7 -756,7 -756,7 -756,7 -756,7 -756,7 -756,7 -756,7 -756,7 -756,7 +760,7 @@@@@@@@@@@@@@@@@@@@@ void disable_local_APIC(void
                         */
                        value = apic_read(APIC_SPIV);
                        value &= ~APIC_SPIV_APIC_ENABLED;
-- -----------------    apic_write_around(APIC_SPIV, value);
++ +++++++++++++++++    apic_write(APIC_SPIV, value);
                    
                        /*
                         * When LAPIC was disabled by the BIOS and enabled by the kernel,
@@@@@@@@@@@@@@@@@@@@@ -865,8 -865,8 -869,8 -865,8 -865,8 -865,8 -865,8 -865,8 -865,8 -865,8 -854,8 -865,8 -865,8 -865,8 -865,8 -865,8 -865,8 -865,8 -865,8 -865,8 +869,8 @@@@@@@@@@@@@@@@@@@@@ void __init sync_Arb_IDs(void
                        apic_wait_icr_idle();
                    
                        apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
-- -----------------    apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
-- -----------------                            | APIC_DM_INIT);
++ +++++++++++++++++    apic_write(APIC_ICR,
++ +++++++++++++++++               APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT);
                    }
                    
                    /*
@@@@@@@@@@@@@@@@@@@@@ -902,16 -902,16 -906,16 -902,16 -902,16 -902,16 -902,16 -902,16 -902,16 -902,16 -891,16 -902,16 -902,16 -902,16 -902,16 -902,16 -902,16 -902,16 -902,16 -902,16 +906,16 @@@@@@@@@@@@@@@@@@@@@ void __init init_bsp_APIC(void
                        else
                                value |= APIC_SPIV_FOCUS_DISABLED;
                        value |= SPURIOUS_APIC_VECTOR;
-- -----------------    apic_write_around(APIC_SPIV, value);
++ +++++++++++++++++    apic_write(APIC_SPIV, value);
                    
                        /*
                         * Set up the virtual wire mode.
                         */
-- -----------------    apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
++ +++++++++++++++++    apic_write(APIC_LVT0, APIC_DM_EXTINT);
                        value = APIC_DM_NMI;
                        if (!lapic_is_integrated())             /* 82489DX */
                                value |= APIC_LVT_LEVEL_TRIGGER;
-- -----------------    apic_write_around(APIC_LVT1, value);
++ +++++++++++++++++    apic_write(APIC_LVT1, value);
                    }
                    
                    static void __cpuinit lapic_setup_esr(void)
                    
                                /* enables sending errors */
                                value = ERROR_APIC_VECTOR;
-- -----------------            apic_write_around(APIC_LVTERR, value);
++ +++++++++++++++++            apic_write(APIC_LVTERR, value);
                                /*
                                 * spec says clear errors after enabling vector.
                                 */
@@@@@@@@@@@@@@@@@@@@@ -974,7 -974,7 -978,7 -974,7 -974,7 -974,7 -974,7 -974,7 -974,7 -974,7 -963,7 -974,7 -974,7 -974,7 -974,7 -974,7 -974,7 -974,7 -974,7 -974,7 +978,7 @@@@@@@@@@@@@@@@@@@@@ void __cpuinit setup_local_APIC(void
                         * Double-check whether this APIC is really registered.
                         */
                        if (!apic_id_registered())
       -  -                     BUG();
       +  +                     WARN_ON_ONCE(1);
                    
                        /*
                         * Intel recommends to set DFR, LDR and TPR before enabling
                         */
                        value = apic_read(APIC_TASKPRI);
                        value &= ~APIC_TPRI_MASK;
-- -----------------    apic_write_around(APIC_TASKPRI, value);
++ +++++++++++++++++    apic_write(APIC_TASKPRI, value);
                    
                        /*
                         * After a crash, we no longer service the interrupts and a pending
                         * Set spurious IRQ vector
                         */
                        value |= SPURIOUS_APIC_VECTOR;
-- -----------------    apic_write_around(APIC_SPIV, value);
++ +++++++++++++++++    apic_write(APIC_SPIV, value);
                    
                        /*
                         * Set up LVT0, LVT1:
                                apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
                                                smp_processor_id());
                        }
-- -----------------    apic_write_around(APIC_LVT0, value);
++ +++++++++++++++++    apic_write(APIC_LVT0, value);
                    
                        /*
                         * only the BP should see the LINT1 NMI signal, obviously.
                                value = APIC_DM_NMI | APIC_LVT_MASKED;
                        if (!integrated)                /* 82489DX */
                                value |= APIC_LVT_LEVEL_TRIGGER;
-- -----------------    apic_write_around(APIC_LVT1, value);
++ +++++++++++++++++    apic_write(APIC_LVT1, value);
                    }
                    
                    void __cpuinit end_local_APIC_setup(void)
                        /* Disable the local apic timer */
                        value = apic_read(APIC_LVTT);
                        value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-- -----------------    apic_write_around(APIC_LVTT, value);
++ +++++++++++++++++    apic_write(APIC_LVTT, value);
                    
                        setup_apic_nmi_watchdog(NULL);
                        apic_pm_activate();
@@@@@@@@@@@@@@@@@@@@@ -1105,7 -1105,7 -1109,7 -1105,7 -1105,7 -1105,7 -1105,7 -1105,7 -1105,7 -1105,7 -1094,7 -1105,7 -1105,7 -1105,7 -1105,7 -1105,7 -1105,7 -1105,7 -1105,7 -1105,7 +1109,7 @@@@@@@@@@@@@@@@@@@@@ static int __init detect_init_APIC(void
                        u32 h, l, features;
                    
                        /* Disabled by kernel option? */
          -             if (enable_local_apic < 0)
          +             if (disable_apic)
                                return -1;
                    
                        switch (boot_cpu_data.x86_vendor) {
                                 * Over-ride BIOS and try to enable the local APIC only if
                                 * "lapic" specified.
                                 */
          -                     if (enable_local_apic <= 0) {
          +                     if (!force_enable_local_apic) {
                                        printk(KERN_INFO "Local APIC disabled by BIOS -- "
                                               "you can enable it with \"lapic\"\n");
                                        return -1;
                        if (l & MSR_IA32_APICBASE_ENABLE)
                                mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
                    
          -             if (nmi_watchdog != NMI_NONE && nmi_watchdog != NMI_DISABLED)
          -                     nmi_watchdog = NMI_LOCAL_APIC;
          -         
                        printk(KERN_INFO "Found and enabled local APIC!\n");
                    
                        apic_pm_activate();
@@@@@@@@@@@@@@@@@@@@@ -1203,6 -1203,6 -1207,6 -1203,6 -1203,6 -1203,6 -1203,6 -1203,6 -1203,6 -1203,6 -1195,36 -1203,6 -1203,6 -1203,6 -1203,6 -1203,6 -1203,6 -1203,6 -1203,6 -1203,6 +1207,6 @@@@@@@@@@@@@@@@@@@@@ void __init init_apic_mappings(void
                        if (boot_cpu_physical_apicid == -1U)
                                boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
                    
          -         #ifdef CONFIG_X86_IO_APIC
          -             {
          -                     unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
          -                     int i;
          -         
          -                     for (i = 0; i < nr_ioapics; i++) {
          -                             if (smp_found_config) {
          -                                     ioapic_phys = mp_ioapics[i].mpc_apicaddr;
          -                                     if (!ioapic_phys) {
          -                                             printk(KERN_ERR
          -                                                    "WARNING: bogus zero IO-APIC "
          -                                                    "address found in MPTABLE, "
          -                                                    "disabling IO/APIC support!\n");
          -                                             smp_found_config = 0;
          -                                             skip_ioapic_setup = 1;
          -                                             goto fake_ioapic_page;
          -                                     }
          -                             } else {
          -         fake_ioapic_page:
          -                                     ioapic_phys = (unsigned long)
          -                                                   alloc_bootmem_pages(PAGE_SIZE);
          -                                     ioapic_phys = __pa(ioapic_phys);
          -                             }
          -                             set_fixmap_nocache(idx, ioapic_phys);
          -                             printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
          -                                    __fix_to_virt(idx), ioapic_phys);
          -                             idx++;
          -                     }
          -             }
          -         #endif
                    }
                    
                    /*
                    
                    int __init APIC_init_uniprocessor(void)
                    {
---- ----- ---------    if (disable_apic)
          -             if (enable_local_apic < 0)
---- ---------------            clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
---- ---------------
                        if (!smp_found_config && !cpu_has_apic)
                                return -1;
                    
                    #ifdef CONFIG_CRASH_DUMP
                        boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
                    #endif
          -             phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
          +             physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
                    
                        setup_local_APIC();
                    
          +         #ifdef CONFIG_X86_IO_APIC
          +             if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
          +         #endif
          +                     localise_nmi_watchdog();
                        end_local_APIC_setup();
                    #ifdef CONFIG_X86_IO_APIC
                        if (smp_found_config)
                         * The reschedule interrupt is a CPU-to-CPU reschedule-helper
                         * IPI, driven by wakeup.
                         */
          -             set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
          +             alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
                    
                        /* IPI for invalidation */
          -             set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
          +             alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
                    
                        /* IPI for generic function call */
          -             set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
          +             alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
      ++  + +   +   
      ++  + +   +       /* IPI for single call function */
      ++  + +   +       set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
      ++  + +   +                               call_function_single_interrupt);
                    }
                    #endif
                    
                        smp_intr_init();
                    #endif
                        /* self generated IPI for local APIC timer */
          -             set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
          +             alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
                    
                        /* IPI vectors for APIC spurious and error interrupts */
          -             set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
          -             set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
          +             alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
          +             alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
                    
                        /* thermal monitor LVT interrupt */
                    #ifdef CONFIG_X86_MCE_P4THERMAL
          -             set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
          +             alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
                    #endif
                    }
                    
@@@@@@@@@@@@@@@@@@@@@ -1419,7 -1419,7 -1423,7 -1419,7 -1416,7 -1419,7 -1415,7 -1415,7 -1419,7 -1419,7 -1433,7 -1419,7 -1415,7 -1419,7 -1419,7 -1419,7 -1415,7 -1419,7 -1419,7 -1419,7 +1420,7 @@@@@@@@@@@@@@@@@@@@@ void disconnect_bsp_APIC(int virt_wire_
                                value &= ~APIC_VECTOR_MASK;
                                value |= APIC_SPIV_APIC_ENABLED;
                                value |= 0xf;
-- -----------------            apic_write_around(APIC_SPIV, value);
++ +++++++++++++++++            apic_write(APIC_SPIV, value);
                    
                                if (!virt_wire_setup) {
                                        /*
                                                APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
                                        value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
                                        value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
-- -----------------                    apic_write_around(APIC_LVT0, value);
++ +++++++++++++++++                    apic_write(APIC_LVT0, value);
                                } else {
                                        /* Disable LVT0 */
-- -----------------                    apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
++ +++++++++++++++++                    apic_write(APIC_LVT0, APIC_LVT_MASKED);
                                }
                    
                                /*
                                        APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
                                value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
                                value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
-- -----------------            apic_write_around(APIC_LVT1, value);
++ +++++++++++++++++            apic_write(APIC_LVT1, value);
                        }
                    }
                    
@@@@@@@@@@@@@@@@@@@@@ -1499,9 -1499,9 -1503,9 -1499,9 -1496,9 -1499,9 -1495,9 -1495,9 -1499,9 -1499,9 -1513,6 -1499,9 -1495,9 -1499,9 -1499,9 -1499,9 -1495,9 -1499,9 -1499,9 -1499,9 +1500,9 @@@@@@@@@@@@@@@@@@@@@ void __cpuinit generic_processor_info(i
                                 */
                                cpu = 0;
                    
          +             if (apicid > max_physical_apicid)
          +                     max_physical_apicid = apicid;
          +         
                        /*
                         * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
                         * but we need to work other dependencies like SMP_SUSPEND etc
                         * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
                         *       - Ashok Raj <ashok.raj@intel.com>
                         */
          -             if (num_processors > 8) {
          +             if (max_physical_apicid >= 8) {
                                switch (boot_cpu_data.x86_vendor) {
                                case X86_VENDOR_INTEL:
                                        if (!APIC_XAPIC(version)) {
                        }
                    #ifdef CONFIG_SMP
                        /* are we being called early in kernel startup? */
          -             if (x86_cpu_to_apicid_early_ptr) {
          -                     u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
          -                     u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
          +             if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
          +                     u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
          +                     u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
                    
                                cpu_to_apicid[cpu] = apicid;
                                bios_cpu_apicid[cpu] = apicid;
                     */
                    static int __init parse_lapic(char *arg)
                    {
          -             enable_local_apic = 1;
          +             force_enable_local_apic = 1;
                        return 0;
                    }
                    early_param("lapic", parse_lapic);
                    
                    static int __init parse_nolapic(char *arg)
                    {
          -             enable_local_apic = -1;
          -             clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
          +             disable_apic = 1;
---- ----- ---------    clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
++++ +++++++++++++++    setup_clear_cpu_cap(X86_FEATURE_APIC);
                        return 0;
                    }
                    early_param("nolapic", parse_nolapic);
                    }
                    __setup("apic=", apic_set_verbosity);
                    
          +         static int __init lapic_insert_resource(void)
          +         {
          +             if (!apic_phys)
          +                     return -1;
          +         
          +             /* Put local APIC into the resource map. */
          +             lapic_resource.start = apic_phys;
          +             lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
          +             insert_resource(&iomem_resource, &lapic_resource);
          +         
          +             return 0;
          +         }
          +         
          +         /*
          +          * need call insert after e820_reserve_resources()
          +          * that is using request_resource
          +          */
          +         late_initcall(lapic_insert_resource);
index 1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,98c70f044e19b12c1562056375cb865ce26343b0,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,16e586cacbdc306cef657b22c31ab9e2878668ca,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,0633cfd0dc291a1310f0632e1012cf663b5e89a9,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7,1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7..7f1f030da7ee4c048990eecf9dc00229472028ca
                    #include <mach_ipi.h>
                    #include <mach_apic.h>
                    
          -         int disable_apic_timer __cpuinitdata;
          +         static int disable_apic_timer __cpuinitdata;
                    static int apic_calibrate_pmtmr __initdata;
                    int disable_apic;
                    
@@@@@@@@@@@@@@@@@@@@@ -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,7 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 +54,10 @@@@@@@@@@@@@@@@@@@@@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_o
                    /*
                     * Debug level, exported for io_apic.c
                     */
-- -----------------int apic_verbosity;
++ +++++++++++++++++unsigned int apic_verbosity;
          +         
          +         /* Have we found an MP table */
          +         int smp_found_config;
                    
                    static struct resource lapic_resource = {
                        .name = "Local APIC",
@@@@@@@@@@@@@@@@@@@@@ -90,6 -90,6 -90,6 -90,6 -90,6 -90,6 -90,6 -90,6 -90,6 -90,6 -87,9 -90,6 -90,6 -90,6 -90,6 -90,6 -90,6 -90,6 -90,6 -90,6 +90,6 @@@@@@@@@@@@@@@@@@@@@ static unsigned long apic_phys
                    
                    unsigned long mp_lapic_addr;
                    
          -         DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
          -         EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
          -         
                    unsigned int __cpuinitdata maxcpus = NR_CPUS;
                    /*
                     * Get the LAPIC version
@@@@@@@@@@@@@@@@@@@@@ -314,7 -314,7 -314,7 -314,7 -314,7 -314,7 -314,7 -314,7 -314,7 -314,7 -314,7 -314,7 -314,7 -314,7 -314,7 -314,7 -314,7 -314,7 -314,7 -314,7 +314,7 @@@@@@@@@@@@@@@@@@@@@ static void setup_APIC_timer(void
                    
                    #define TICK_COUNT 100000000
                    
-- -----------------static void __init calibrate_APIC_clock(void)
++ +++++++++++++++++static int __init calibrate_APIC_clock(void)
                    {
                        unsigned apic, apic_start;
                        unsigned long tsc, tsc_start;
                                clockevent_delta2ns(0xF, &lapic_clockevent);
                    
                        calibration_result = result / HZ;
++ +++++++++++++++++
++ +++++++++++++++++    /*
++ +++++++++++++++++     * Do a sanity check on the APIC calibration result
++ +++++++++++++++++     */
++ +++++++++++++++++    if (calibration_result < (1000000 / HZ)) {
++ +++++++++++++++++            printk(KERN_WARNING
++ +++++++++++++++++                    "APIC frequency too slow, disabling apic timer\n");
++ +++++++++++++++++            return -1;
++ +++++++++++++++++    }
++ +++++++++++++++++
++ +++++++++++++++++    return 0;
                    }
                    
                    /*
@@@@@@@@@@@@@@@@@@@@@ -394,14 -394,14 -405,7 -394,14 -394,14 -394,14 -394,14 -394,14 -394,14 -394,14 -394,14 -394,14 -394,14 -394,14 -394,14 -394,14 -394,14 -394,14 -394,14 -394,14 +405,7 @@@@@@@@@@@@@@@@@@@@@ void __init setup_boot_APIC_clock(void
                        }
                    
                        printk(KERN_INFO "Using local APIC timer interrupts.\n");
-- -----------------    calibrate_APIC_clock();
-- -----------------
-- -----------------    /*
-- -----------------     * Do a sanity check on the APIC calibration result
-- -----------------     */
-- -----------------    if (calibration_result < (1000000 / HZ)) {
-- -----------------            printk(KERN_WARNING
-- -----------------                   "APIC frequency too slow, disabling apic timer\n");
++ +++++++++++++++++    if (calibrate_APIC_clock()) {
                                /* No broadcast on UP ! */
                                if (num_possible_cpus() > 1)
                                        setup_APIC_timer();
                                lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
                        else
                                printk(KERN_WARNING "APIC timer registered as dummy,"
          -                            " due to nmi_watchdog=1!\n");
          +                             " due to nmi_watchdog=%d!\n", nmi_watchdog);
                    
                        setup_APIC_timer();
                    }
                    
          -         /*
          -          * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the
          -          * C1E flag only in the secondary CPU, so when we detect the wreckage
          -          * we already have enabled the boot CPU local apic timer. Check, if
          -          * disable_apic_timer is set and the DUMMY flag is cleared. If yes,
          -          * set the DUMMY flag again and force the broadcast mode in the
          -          * clockevents layer.
          -          */
          -         static void __cpuinit check_boot_apic_timer_broadcast(void)
          -         {
          -             if (!disable_apic_timer ||
          -                 (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
          -                     return;
          -         
          -             printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n");
          -             lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY;
          -         
          -             local_irq_enable();
          -             clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
          -                                &boot_cpu_physical_apicid);
          -             local_irq_disable();
          -         }
          -         
                    void __cpuinit setup_secondary_APIC_clock(void)
                    {
          -             check_boot_apic_timer_broadcast();
                        setup_APIC_timer();
                    }
                    
@@@@@@@@@@@@@@@@@@@@@ -826,6 -826,6 -830,6 -826,6 -826,6 -826,6 -826,6 -826,6 -826,6 -826,6 -850,7 -826,6 -826,6 -826,6 -826,6 -826,6 -826,6 -826,6 -826,6 -826,6 +830,6 @@@@@@@@@@@@@@@@@@@@@ static void __cpuinit lapic_setup_esr(v
                    void __cpuinit end_local_APIC_setup(void)
                    {
                        lapic_setup_esr();
          -             nmi_watchdog_default();
                        setup_apic_nmi_watchdog(NULL);
                        apic_pm_activate();
                    }
@@@@@@@@@@@@@@@@@@@@@ -850,7 -850,7 -854,7 -850,7 -850,7 -850,7 -850,7 -850,7 -850,7 -850,7 -875,7 -850,7 -850,7 -850,7 -850,7 -850,7 -850,7 -850,7 -850,7 -850,7 +854,7 @@@@@@@@@@@@@@@@@@@@@ static int __init detect_init_APIC(void
                    
                    void __init early_init_lapic_mapping(void)
                    {
          -             unsigned long apic_phys;
          +             unsigned long phys_addr;
                    
                        /*
                         * If no local APIC can be found then go out
                        if (!smp_found_config)
                                return;
                    
          -             apic_phys = mp_lapic_addr;
          +             phys_addr = mp_lapic_addr;
                    
          -             set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
          +             set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
                        apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
          -                                      APIC_BASE, apic_phys);
          +                         APIC_BASE, phys_addr);
                    
                        /*
                         * Fetch the APIC ID of the BSP in case we have a
@@@@@@@@@@@@@@@@@@@@@ -917,9 -917,9 -921,9 -917,9 -917,9 -917,9 -917,9 -917,9 -917,9 -917,9 -942,7 -917,9 -917,9 -917,9 -917,9 -917,9 -917,9 -917,9 -917,9 -917,9 +921,9 @@@@@@@@@@@@@@@@@@@@@ int __init APIC_init_uniprocessor(void
                    
                        verify_local_APIC();
                    
          -             phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
          +             connect_bsp_APIC();
          +         
          +             physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
                        apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
                    
                        setup_local_APIC();
                        if (!skip_ioapic_setup && nr_ioapics)
                                enable_IO_APIC();
                    
          +             if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
          +                     localise_nmi_watchdog();
                        end_local_APIC_setup();
                    
                        if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
                        irq_exit();
                    }
                    
          +         /**
          +          *  * connect_bsp_APIC - attach the APIC to the interrupt system
          +          *   */
          +         void __init connect_bsp_APIC(void)
          +         {
          +             enable_apic_mode();
          +         }
          +         
                    void disconnect_bsp_APIC(int virt_wire_setup)
                    {
                        /* Go back to Virtual Wire compatibility mode */
                                 */
                                cpu = 0;
                        }
          +             if (apicid > max_physical_apicid)
          +                     max_physical_apicid = apicid;
          +         
                        /* are we being called early in kernel startup? */
          -             if (x86_cpu_to_apicid_early_ptr) {
          -                     u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
          -                     u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
          +             if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
          +                     u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
          +                     u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
                    
                                cpu_to_apicid[cpu] = apicid;
                                bios_cpu_apicid[cpu] = apicid;
@@@@@@@@@@@@@@@@@@@@@ -1259,7 -1259,7 -1263,7 -1259,7 -1259,7 -1259,7 -1259,7 -1259,7 -1259,7 -1259,7 -1269,7 -1259,7 -1259,7 -1259,7 -1259,7 -1259,7 -1259,7 -1259,7 -1259,7 -1259,7 +1263,7 @@@@@@@@@@@@@@@@@@@@@ __cpuinit int apic_is_clustered_box(voi
                        if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
                                return 0;
                    
          -             bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
          +             bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
                        bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
                    
                        for (i = 0; i < NR_CPUS; i++) {
@@@@@@@@@@@@@@@@@@@@@ -1337,7 -1337,7 -1341,7 -1337,7 -1337,7 -1337,7 -1337,7 -1337,7 -1337,7 -1337,7 -1347,7 -1337,7 -1337,7 -1337,7 -1337,7 -1337,7 -1337,7 -1337,7 -1337,7 -1337,7 +1341,7 @@@@@@@@@@@@@@@@@@@@@ early_param("apic", apic_set_verbosity)
                    static __init int setup_disableapic(char *str)
                    {
                        disable_apic = 1;
---- ---------------    clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
++++ +++++++++++++++    setup_clear_cpu_cap(X86_FEATURE_APIC);
                        return 0;
                    }
                    early_param("disableapic", setup_disableapic);
index 7b8cc72feb40e3ed8bfd02437fe3b6324b024f67,7b8cc72feb40e3ed8bfd02437fe3b6324b024f67,7b8cc72feb40e3ed8bfd02437fe3b6324b024f67,2a4475beea4a2335831b5a4b7d253d5fa51232b9,daee611f0140c211d26736991391f8d5f206f1e7,7b8cc72feb40e3ed8bfd02437fe3b6324b024f67,7b8cc72feb40e3ed8bfd02437fe3b6324b024f67,751850235291c2663aa930a33c5cbda2e123b1e0,7b8cc72feb40e3ed8bfd02437fe3b6324b024f67,7b8cc72feb40e3ed8bfd02437fe3b6324b024f67,0000000000000000000000000000000000000000,7b8cc72feb40e3ed8bfd02437fe3b6324b024f67,7b8cc72feb40e3ed8bfd02437fe3b6324b024f67,7b8cc72feb40e3ed8bfd02437fe3b6324b024f67,7b8cc72feb40e3ed8bfd02437fe3b6324b024f67,7b8cc72feb40e3ed8bfd02437fe3b6324b024f67,36537ab9e56ad7c36ed8abe9d402636d6aca9a5d,7b8cc72feb40e3ed8bfd02437fe3b6324b024f67,7b8cc72feb40e3ed8bfd02437fe3b6324b024f67,736f50fa433d4b54c29c56d3649bac8db8227539..dd6e3f15017eb87b04885fd5164681bbdd9a3073
mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644
--- /dev/null
--- ------ ---------#include <linux/string.h>
          +         #include <linux/init.h>
          +         #include <linux/kernel.h>
          +         #include <linux/sched.h>
          +         #include <linux/string.h>
          +         #include <linux/bootmem.h>
          +         #include <linux/bitops.h>
          +         #include <linux/module.h>
          +         #include <linux/kgdb.h>
          +         #include <linux/topology.h>
--- ------ ---------#include <linux/module.h>
          +         #include <linux/delay.h>
          +         #include <linux/smp.h>
--- ------ ---------#include <asm/processor.h>
          +         #include <linux/percpu.h>
       -        -       unsigned int n, dummy, eax, ebx, ecx, edx;
          +         #include <asm/i387.h>
          +         #include <asm/msr.h>
          +         #include <asm/io.h>
+++++++++++++++++++ #include <asm/linkage.h>
          +         #include <asm/mmu_context.h>
          +         #include <asm/mtrr.h>
          +         #include <asm/mce.h>
          +         #include <asm/pat.h>
          +         #include <asm/numa.h>
          +         #ifdef CONFIG_X86_LOCAL_APIC
          +         #include <asm/mpspec.h>
          +         #include <asm/apic.h>
          +         #include <mach_apic.h>
          +         #endif
          +         #include <asm/pda.h>
          +         #include <asm/pgtable.h>
          +         #include <asm/processor.h>
          +         #include <asm/desc.h>
          +         #include <asm/atomic.h>
          +         #include <asm/proto.h>
          +         #include <asm/sections.h>
          +         #include <asm/setup.h>
          +         #include <asm/genapic.h>
          +         
          +         #include "cpu.h"
          +         
          +         /* We need valid kernel segments for data and code in long mode too
          +          * IRET will check the segment types  kkeil 2000/10/28
          +          * Also sysret mandates a special GDT layout
          +          */
          +         /* The TLS descriptors are currently at a different place compared to i386.
          +            Hopefully nobody expects them at a fixed place (Wine?) */
          +         DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
          +             [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
          +             [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
          +             [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
          +             [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
          +             [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
          +             [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
          +         } };
          +         EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
          +         
          +         __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
          +         
          +         /* Current gdt points %fs at the "master" per-cpu area: after this,
          +          * it's on the real one. */
          +         void switch_to_new_gdt(void)
          +         {
          +             struct desc_ptr gdt_descr;
          +         
          +             gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
          +             gdt_descr.size = GDT_SIZE - 1;
          +             load_gdt(&gdt_descr);
          +         }
          +         
          +         struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
          +         
          +         static void __cpuinit default_init(struct cpuinfo_x86 *c)
          +         {
          +             display_cacheinfo(c);
          +         }
          +         
          +         static struct cpu_dev __cpuinitdata default_cpu = {
          +             .c_init = default_init,
          +             .c_vendor = "Unknown",
          +         };
          +         static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
          +         
          +         int __cpuinit get_model_name(struct cpuinfo_x86 *c)
          +         {
          +             unsigned int *v;
          +         
          +             if (c->extended_cpuid_level < 0x80000004)
          +                     return 0;
          +         
          +             v = (unsigned int *) c->x86_model_id;
          +             cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
          +             cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
          +             cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
          +             c->x86_model_id[48] = 0;
          +             return 1;
          +         }
          +         
          +         
          +         void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
          +         {
       -        -       if (n >= 0x80000008) {
       -        -               cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
       -        -               c->x86_virt_bits = (eax >> 8) & 0xff;
       -        -               c->x86_phys_bits = eax & 0xff;
       -        -       }
       +  +     +       unsigned int n, dummy, ebx, ecx, edx;
          +         
          +             n = c->extended_cpuid_level;
          +         
          +             if (n >= 0x80000005) {
          +                     cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
          +                     printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
          +                            "D cache %dK (%d bytes/line)\n",
          +                            edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
          +                     c->x86_cache_size = (ecx>>24) + (edx>>24);
          +                     /* On K8 L1 TLB is inclusive, so don't count it */
          +                     c->x86_tlbsize = 0;
          +             }
          +         
          +             if (n >= 0x80000006) {
          +                     cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
          +                     ecx = cpuid_ecx(0x80000006);
          +                     c->x86_cache_size = ecx >> 16;
          +                     c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
          +         
          +                     printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
          +                     c->x86_cache_size, ecx & 0xFF);
          +             }
---- ----- ---------    c->extended_cpuid_level = cpuid_eax(0x80000000);
          +         }
          +         
          +         void __cpuinit detect_ht(struct cpuinfo_x86 *c)
          +         {
          +         #ifdef CONFIG_SMP
          +             u32 eax, ebx, ecx, edx;
          +             int index_msb, core_bits;
          +         
          +             cpuid(1, &eax, &ebx, &ecx, &edx);
          +         
          +         
          +             if (!cpu_has(c, X86_FEATURE_HT))
          +                     return;
          +             if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
          +                     goto out;
          +         
          +             smp_num_siblings = (ebx & 0xff0000) >> 16;
          +         
          +             if (smp_num_siblings == 1) {
          +                     printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
          +             } else if (smp_num_siblings > 1) {
          +         
          +                     if (smp_num_siblings > NR_CPUS) {
          +                             printk(KERN_WARNING "CPU: Unsupported number of "
          +                                    "siblings %d", smp_num_siblings);
          +                             smp_num_siblings = 1;
          +                             return;
          +                     }
          +         
          +                     index_msb = get_count_order(smp_num_siblings);
          +                     c->phys_proc_id = phys_pkg_id(index_msb);
          +         
          +                     smp_num_siblings = smp_num_siblings / c->x86_max_cores;
          +         
          +                     index_msb = get_count_order(smp_num_siblings);
          +         
          +                     core_bits = get_count_order(c->x86_max_cores);
          +         
          +                     c->cpu_core_id = phys_pkg_id(index_msb) &
          +                                                    ((1 << core_bits) - 1);
          +             }
          +         out:
          +             if ((c->x86_max_cores * smp_num_siblings) > 1) {
          +                     printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
          +                            c->phys_proc_id);
          +                     printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
          +                            c->cpu_core_id);
          +             }
          +         
          +         #endif
          +         }
          +         
          +         static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
          +         {
          +             char *v = c->x86_vendor_id;
          +             int i;
          +             static int printed;
          +         
          +             for (i = 0; i < X86_VENDOR_NUM; i++) {
          +                     if (cpu_devs[i]) {
          +                             if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
          +                                 (cpu_devs[i]->c_ident[1] &&
          +                                 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
          +                                     c->x86_vendor = i;
          +                                     this_cpu = cpu_devs[i];
          +                                     return;
          +                             }
          +                     }
          +             }
          +             if (!printed) {
          +                     printed++;
          +                     printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
          +                     printk(KERN_ERR "CPU: Your system may be unstable.\n");
          +             }
          +             c->x86_vendor = X86_VENDOR_UNKNOWN;
          +         }
          +         
          +         static void __init early_cpu_support_print(void)
          +         {
          +             int i,j;
          +             struct cpu_dev *cpu_devx;
          +         
          +             printk("KERNEL supported cpus:\n");
          +             for (i = 0; i < X86_VENDOR_NUM; i++) {
          +                     cpu_devx = cpu_devs[i];
          +                     if (!cpu_devx)
          +                             continue;
          +                     for (j = 0; j < 2; j++) {
          +                             if (!cpu_devx->c_ident[j])
          +                                     continue;
          +                             printk("  %s %s\n", cpu_devx->c_vendor,
          +                                     cpu_devx->c_ident[j]);
          +                     }
          +             }
          +         }
          +         
          +         static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
          +         
          +         void __init early_cpu_init(void)
          +         {
          +                 struct cpu_vendor_dev *cvdev;
          +         
          +                 for (cvdev = __x86cpuvendor_start ;
          +                      cvdev < __x86cpuvendor_end   ;
          +                      cvdev++)
          +                         cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
          +             early_cpu_support_print();
          +             early_identify_cpu(&boot_cpu_data);
          +         }
          +         
          +         /* Do some early cpuid on the boot CPU to get some parameter that are
          +            needed before check_bugs. Everything advanced is in identify_cpu
          +            below. */
          +         static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
          +         {
          +             u32 tfms, xlvl;
          +         
          +             c->loops_per_jiffy = loops_per_jiffy;
          +             c->x86_cache_size = -1;
          +             c->x86_vendor = X86_VENDOR_UNKNOWN;
          +             c->x86_model = c->x86_mask = 0; /* So far unknown... */
          +             c->x86_vendor_id[0] = '\0'; /* Unset */
          +             c->x86_model_id[0] = '\0';  /* Unset */
          +             c->x86_clflush_size = 64;
          +             c->x86_cache_alignment = c->x86_clflush_size;
          +             c->x86_max_cores = 1;
          +             c->x86_coreid_bits = 0;
          +             c->extended_cpuid_level = 0;
          +             memset(&c->x86_capability, 0, sizeof c->x86_capability);
          +         
          +             /* Get vendor name */
          +             cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
          +                   (unsigned int *)&c->x86_vendor_id[0],
          +                   (unsigned int *)&c->x86_vendor_id[8],
          +                   (unsigned int *)&c->x86_vendor_id[4]);
          +         
          +             get_cpu_vendor(c);
          +         
          +             /* Initialize the standard set of capabilities */
          +             /* Note that the vendor-specific code below might override */
          +         
          +             /* Intel-defined flags: level 0x00000001 */
          +             if (c->cpuid_level >= 0x00000001) {
          +                     __u32 misc;
          +                     cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
          +                           &c->x86_capability[0]);
          +                     c->x86 = (tfms >> 8) & 0xf;
          +                     c->x86_model = (tfms >> 4) & 0xf;
          +                     c->x86_mask = tfms & 0xf;
          +                     if (c->x86 == 0xf)
          +                             c->x86 += (tfms >> 20) & 0xff;
          +                     if (c->x86 >= 0x6)
          +                             c->x86_model += ((tfms >> 16) & 0xF) << 4;
          +                     if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
          +                             c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
          +             } else {
          +                     /* Have CPUID level 0 only - unheard of */
          +                     c->x86 = 4;
          +             }
          +         
          +             c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
          +         #ifdef CONFIG_SMP
          +             c->phys_proc_id = c->initial_apicid;
          +         #endif
          +             /* AMD-defined flags: level 0x80000001 */
          +             xlvl = cpuid_eax(0x80000000);
          +             c->extended_cpuid_level = xlvl;
          +             if ((xlvl & 0xffff0000) == 0x80000000) {
          +                     if (xlvl >= 0x80000001) {
          +                             c->x86_capability[1] = cpuid_edx(0x80000001);
          +                             c->x86_capability[6] = cpuid_ecx(0x80000001);
          +                     }
          +                     if (xlvl >= 0x80000004)
          +                             get_model_name(c); /* Default name */
          +             }
          +         
          +             /* Transmeta-defined flags: level 0x80860001 */
          +             xlvl = cpuid_eax(0x80860000);
          +             if ((xlvl & 0xffff0000) == 0x80860000) {
          +                     /* Don't set x86_cpuid_level here for now to not confuse. */
          +                     if (xlvl >= 0x80860001)
          +                             c->x86_capability[2] = cpuid_edx(0x80860001);
          +             }
          +         
                -       /* Assume all 64-bit CPUs support 32-bit syscall */
                -       set_cpu_cap(c, X86_FEATURE_SYSCALL32);
          +             if (c->extended_cpuid_level >= 0x80000007)
          +                     c->x86_power = cpuid_edx(0x80000007);
          +         
------- -- ----- --     /* Assume all 64-bit CPUs support 32-bit syscall */
------- -- ----- --     set_cpu_cap(c, X86_FEATURE_SYSCALL32);
------- -- ----- -- 
       +  +     +       if (c->extended_cpuid_level >= 0x80000008) {
       +  +     +               u32 eax = cpuid_eax(0x80000008);
       +  +     +   
       +  +     +               c->x86_virt_bits = (eax >> 8) & 0xff;
       +  +     +               c->x86_phys_bits = eax & 0xff;
       +  +     +       }
       +  +         
---- ----- ---------
---- ----- ---------    /* early_param could clear that, but recall get it set again */
---- ----- ---------    if (disable_apic)
---- ----- ---------            clear_cpu_cap(c, X86_FEATURE_APIC);
          +             if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
          +                 cpu_devs[c->x86_vendor]->c_early_init)
          +                     cpu_devs[c->x86_vendor]->c_early_init(c);
          +         
          +             validate_pat_support(c);
---------- --------                        DEBUG_STKSZ]
---------- -------- __attribute__((section(".bss.page_aligned")));
          +         }
          +         
          +         /*
          +          * This does the hard work of actually picking apart the CPU stuff...
          +          */
          +         static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
          +         {
          +             int i;
          +         
          +             early_identify_cpu(c);
          +         
          +             init_scattered_cpuid_features(c);
          +         
          +             c->apicid = phys_pkg_id(0);
          +         
          +             /*
          +              * Vendor-specific initialization.  In this section we
          +              * canonicalize the feature flags, meaning if there are
          +              * features a certain CPU supports which CPUID doesn't
          +              * tell us, CPUID claiming incorrect flags, or other bugs,
          +              * we handle them here.
          +              *
          +              * At the end of this section, c->x86_capability better
          +              * indicate the features this CPU genuinely supports!
          +              */
          +             if (this_cpu->c_init)
          +                     this_cpu->c_init(c);
          +         
          +             detect_ht(c);
          +         
          +             /*
          +              * On SMP, boot_cpu_data holds the common feature set between
          +              * all CPUs; so make sure that we indicate which features are
          +              * common between the CPUs.  The first time this routine gets
          +              * executed, c == &boot_cpu_data.
          +              */
          +             if (c != &boot_cpu_data) {
          +                     /* AND the already accumulated flags with these */
          +                     for (i = 0; i < NCAPINTS; i++)
          +                             boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
          +             }
          +         
          +             /* Clear all flags overriden by options */
          +             for (i = 0; i < NCAPINTS; i++)
          +                     c->x86_capability[i] &= ~cleared_cpu_caps[i];
          +         
          +         #ifdef CONFIG_X86_MCE
          +             mcheck_init(c);
          +         #endif
          +             select_idle_routine(c);
          +         
          +         #ifdef CONFIG_NUMA
          +             numa_add_cpu(smp_processor_id());
          +         #endif
          +         
          +         }
          +         
          +         void __cpuinit identify_boot_cpu(void)
          +         {
          +             identify_cpu(&boot_cpu_data);
          +         }
          +         
          +         void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
          +         {
          +             BUG_ON(c == &boot_cpu_data);
          +             identify_cpu(c);
          +             mtrr_ap_init();
          +         }
          +         
          +         static __init int setup_noclflush(char *arg)
          +         {
          +             setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
          +             return 1;
          +         }
          +         __setup("noclflush", setup_noclflush);
          +         
          +         void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
          +         {
          +             if (c->x86_model_id[0])
          +                     printk(KERN_CONT "%s", c->x86_model_id);
          +         
          +             if (c->x86_mask || c->cpuid_level >= 0)
          +                     printk(KERN_CONT " stepping %02x\n", c->x86_mask);
          +             else
          +                     printk(KERN_CONT "\n");
          +         }
          +         
          +         static __init int setup_disablecpuid(char *arg)
          +         {
          +             int bit;
          +             if (get_option(&arg, &bit) && bit < NCAPINTS*32)
          +                     setup_clear_cpu_cap(bit);
          +             else
          +                     return 0;
          +             return 1;
          +         }
          +         __setup("clearcpuid=", setup_disablecpuid);
          +         
          +         cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
          +         
          +         struct x8664_pda **_cpu_pda __read_mostly;
          +         EXPORT_SYMBOL(_cpu_pda);
          +         
          +         struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
          +         
          +         char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
          +         
          +         unsigned long __supported_pte_mask __read_mostly = ~0UL;
          +         EXPORT_SYMBOL_GPL(__supported_pte_mask);
          +         
          +         static int do_not_nx __cpuinitdata;
          +         
          +         /* noexec=on|off
          +         Control non executable mappings for 64bit processes.
          +         
          +         on  Enable(default)
          +         off Disable
          +         */
          +         static int __init nonx_setup(char *str)
          +         {
          +             if (!str)
          +                     return -EINVAL;
          +             if (!strncmp(str, "on", 2)) {
          +                     __supported_pte_mask |= _PAGE_NX;
          +                     do_not_nx = 0;
          +             } else if (!strncmp(str, "off", 3)) {
          +                     do_not_nx = 1;
          +                     __supported_pte_mask &= ~_PAGE_NX;
          +             }
          +             return 0;
          +         }
          +         early_param("noexec", nonx_setup);
          +         
          +         int force_personality32;
          +         
          +         /* noexec32=on|off
          +         Control non executable heap for 32bit processes.
          +         To control the stack too use noexec=off
          +         
          +         on  PROT_READ does not imply PROT_EXEC for 32bit processes (default)
          +         off PROT_READ implies PROT_EXEC
          +         */
          +         static int __init nonx32_setup(char *str)
          +         {
          +             if (!strcmp(str, "on"))
          +                     force_personality32 &= ~READ_IMPLIES_EXEC;
          +             else if (!strcmp(str, "off"))
          +                     force_personality32 |= READ_IMPLIES_EXEC;
          +             return 1;
          +         }
          +         __setup("noexec32=", nonx32_setup);
          +         
          +         void pda_init(int cpu)
          +         {
          +             struct x8664_pda *pda = cpu_pda(cpu);
          +         
          +             /* Setup up data that may be needed in __get_free_pages early */
          +             loadsegment(fs, 0);
          +             loadsegment(gs, 0);
          +             /* Memory clobbers used to order PDA accessed */
          +             mb();
          +             wrmsrl(MSR_GS_BASE, pda);
          +             mb();
          +         
          +             pda->cpunumber = cpu;
          +             pda->irqcount = -1;
          +             pda->kernelstack = (unsigned long)stack_thread_info() -
          +                                      PDA_STACKOFFSET + THREAD_SIZE;
          +             pda->active_mm = &init_mm;
          +             pda->mmu_state = 0;
          +         
          +             if (cpu == 0) {
          +                     /* others are initialized in smpboot.c */
          +                     pda->pcurrent = &init_task;
          +                     pda->irqstackptr = boot_cpu_stack;
          +             } else {
          +                     pda->irqstackptr = (char *)
          +                             __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
          +                     if (!pda->irqstackptr)
          +                             panic("cannot allocate irqstack for cpu %d", cpu);
          +         
          +                     if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
          +                             pda->nodenumber = cpu_to_node(cpu);
          +             }
          +         
          +             pda->irqstackptr += IRQSTACKSIZE-64;
          +         }
          +         
          +         char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
+++++++++++++++++++                        DEBUG_STKSZ] __page_aligned_bss;
          +         
          +         extern asmlinkage void ignore_sysret(void);
          +         
          +         /* May not be marked __init: used by software suspend */
          +         void syscall_init(void)
          +         {
          +             /*
          +              * LSTAR and STAR live in a bit strange symbiosis.
          +              * They both write to the same internal register. STAR allows to
          +              * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
          +              */
          +             wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32);
          +             wrmsrl(MSR_LSTAR, system_call);
          +             wrmsrl(MSR_CSTAR, ignore_sysret);
          +         
          +         #ifdef CONFIG_IA32_EMULATION
          +             syscall32_cpu_init();
          +         #endif
          +         
          +             /* Flags to clear on syscall */
          +             wrmsrl(MSR_SYSCALL_MASK,
          +                    X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
          +         }
          +         
          +         void __cpuinit check_efer(void)
          +         {
          +             unsigned long efer;
          +         
          +             rdmsrl(MSR_EFER, efer);
          +             if (!(efer & EFER_NX) || do_not_nx)
          +                     __supported_pte_mask &= ~_PAGE_NX;
          +         }
          +         
          +         unsigned long kernel_eflags;
          +         
          +         /*
          +          * Copies of the original ist values from the tss are only accessed during
          +          * debugging, no special alignment required.
          +          */
          +         DEFINE_PER_CPU(struct orig_ist, orig_ist);
          +         
          +         /*
          +          * cpu_init() initializes state that is per-CPU. Some data is already
          +          * initialized (naturally) in the bootstrap process, such as the GDT
          +          * and IDT. We reload them nevertheless, this function acts as a
          +          * 'CPU state barrier', nothing should get across.
          +          * A lot of state is already set up in PDA init.
          +          */
          +         void __cpuinit cpu_init(void)
          +         {
          +             int cpu = stack_smp_processor_id();
          +             struct tss_struct *t = &per_cpu(init_tss, cpu);
          +             struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
          +             unsigned long v;
          +             char *estacks = NULL;
          +             struct task_struct *me;
          +             int i;
          +         
          +             /* CPU 0 is initialised in head64.c */
          +             if (cpu != 0)
          +                     pda_init(cpu);
          +             else
          +                     estacks = boot_exception_stacks;
          +         
          +             me = current;
          +         
          +             if (cpu_test_and_set(cpu, cpu_initialized))
          +                     panic("CPU#%d already initialized!\n", cpu);
          +         
          +             printk(KERN_INFO "Initializing CPU#%d\n", cpu);
          +         
          +             clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
          +         
          +             /*
          +              * Initialize the per-CPU GDT with the boot GDT,
          +              * and set up the GDT descriptor:
          +              */
          +         
          +             switch_to_new_gdt();
          +             load_idt((const struct desc_ptr *)&idt_descr);
          +         
          +             memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
          +             syscall_init();
          +         
          +             wrmsrl(MSR_FS_BASE, 0);
          +             wrmsrl(MSR_KERNEL_GS_BASE, 0);
          +             barrier();
          +         
          +             check_efer();
          +         
          +             /*
          +              * set up and load the per-CPU TSS
          +              */
          +             for (v = 0; v < N_EXCEPTION_STACKS; v++) {
          +                     static const unsigned int order[N_EXCEPTION_STACKS] = {
          +                             [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
          +                             [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
          +                     };
          +                     if (cpu) {
          +                             estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
          +                             if (!estacks)
          +                                     panic("Cannot allocate exception stack %ld %d\n",
          +                                           v, cpu);
          +                     }
          +                     estacks += PAGE_SIZE << order[v];
          +                     orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
          +             }
          +         
          +             t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
          +             /*
          +              * <= is required because the CPU will access up to
          +              * 8 bits beyond the end of the IO permission bitmap.
          +              */
          +             for (i = 0; i <= IO_BITMAP_LONGS; i++)
          +                     t->io_bitmap[i] = ~0UL;
          +         
          +             atomic_inc(&init_mm.mm_count);
          +             me->active_mm = &init_mm;
          +             if (me->mm)
          +                     BUG();
          +             enter_lazy_tlb(&init_mm, me);
          +         
          +             load_sp0(t, &current->thread);
          +             set_tss_desc(cpu, t);
          +             load_TR_desc();
          +             load_LDT(&init_mm.context);
          +         
          +         #ifdef CONFIG_KGDB
          +             /*
          +              * If the kgdb is connected no debug regs should be altered.  This
          +              * is only applicable when KGDB and a KGDB I/O module are built
          +              * into the kernel and you are using early debugging with
          +              * kgdbwait. KGDB will control the kernel HW breakpoint registers.
          +              */
          +             if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
          +                     arch_kgdb_ops.correct_hw_break();
          +             else {
          +         #endif
          +             /*
          +              * Clear all 6 debug registers:
          +              */
          +         
          +             set_debugreg(0UL, 0);
          +             set_debugreg(0UL, 1);
          +             set_debugreg(0UL, 2);
          +             set_debugreg(0UL, 3);
          +             set_debugreg(0UL, 6);
          +             set_debugreg(0UL, 7);
          +         #ifdef CONFIG_KGDB
          +             /* If the kgdb is connected no debug regs should be altered. */
          +             }
          +         #endif
          +         
          +             fpu_init();
          +         
          +             raw_local_save_flags(kernel_eflags);
          +         
          +             if (is_uv_system())
          +                     uv_cpu_init();
          +         }
index a0e11c0cc872f03b1ca9b85fd9521f19fa8a3269,a0e11c0cc872f03b1ca9b85fd9521f19fa8a3269,a0e11c0cc872f03b1ca9b85fd9521f19fa8a3269,a0e11c0cc872f03b1ca9b85fd9521f19fa8a3269,a0e11c0cc872f03b1ca9b85fd9521f19fa8a3269,a0e11c0cc872f03b1ca9b85fd9521f19fa8a3269,a4665f37cfc5dc9f3d3b7097c08166d061643de6,510b8e367732bffa2fa4804109dbc62f4755f2a5,a4665f37cfc5dc9f3d3b7097c08166d061643de6,a0e11c0cc872f03b1ca9b85fd9521f19fa8a3269,9f51e1ea9e8225e919ccb69ad6b4bc315c6f38cc,a0e11c0cc872f03b1ca9b85fd9521f19fa8a3269,a4665f37cfc5dc9f3d3b7097c08166d061643de6,a4665f37cfc5dc9f3d3b7097c08166d061643de6,a0e11c0cc872f03b1ca9b85fd9521f19fa8a3269,a0e11c0cc872f03b1ca9b85fd9521f19fa8a3269,a4665f37cfc5dc9f3d3b7097c08166d061643de6,a0e11c0cc872f03b1ca9b85fd9521f19fa8a3269,a0e11c0cc872f03b1ca9b85fd9521f19fa8a3269,a0e11c0cc872f03b1ca9b85fd9521f19fa8a3269..4353cf5e6fac8b4d329e18def887dadd3f55bbb8
                    #include <asm/dma.h>
                    #include <asm/io_apic.h>
                    #include <asm/apic.h>
------- ------------
------- ------------#ifdef CONFIG_GART_IOMMU
------- ------------#include <asm/gart.h>
------- ------------#endif
+++++++ ++++++++++++#include <asm/iommu.h>
                    
                    static void __init fix_hypertransport_config(int num, int slot, int func)
                    {
                    static void __init via_bugs(int  num, int slot, int func)
                    {
                    #ifdef CONFIG_GART_IOMMU
          -             if ((end_pfn > MAX_DMA32_PFN ||  force_iommu) &&
          +             if ((max_pfn > MAX_DMA32_PFN ||  force_iommu) &&
                            !gart_iommu_aperture_allowed) {
                                printk(KERN_INFO
                                       "Looks like a VIA chipset. Disabling IOMMU."
@@@@@@@@@@@@@@@@@@@@@ -98,6 -98,6 -98,6 -98,6 -98,6 -98,6 -98,6 -95,6 -98,6 -98,6 -98,17 -98,6 -98,6 -98,6 -98,6 -98,6 -98,6 -98,6 -98,6 -98,6 +95,6 @@@@@@@@@@@@@@@@@@@@@ static void __init nvidia_bugs(int num
                    
                    }
                    
          -         static void __init ati_bugs(int num, int slot, int func)
          -         {
          -         #ifdef CONFIG_X86_IO_APIC
          -             if (timer_over_8254 == 1) {
          -                     timer_over_8254 = 0;
          -                     printk(KERN_INFO
          -                     "ATI board detected. Disabling timer routing over 8254.\n");
          -             }
          -         #endif
          -         }
          -         
                    #define QFLAG_APPLY_ONCE    0x1
                    #define QFLAG_APPLIED               0x2
                    #define QFLAG_DONE          (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@@@@@@@@@@@@@@@@@@@@ -115,23 -115,23 -115,23 -115,23 -115,23 -115,23 -115,12 -112,12 -115,12 -115,23 -126,14 -115,23 -115,12 -115,12 -115,23 -115,23 -115,12 -115,23 -115,23 -115,23 +112,23 @@@@@@@@@@@@@@@@@@@@@ static struct chipset early_qrk[] __ini
                          PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs },
                        { PCI_VENDOR_ID_VIA, PCI_ANY_ID,
                          PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs },
          -             { PCI_VENDOR_ID_ATI, PCI_ANY_ID,
          -               PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, ati_bugs },
                        { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
                          PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config },
                        {}
                    };
                    
      --- - --  -   static void __init check_dev_quirk(int num, int slot, int func)
      +++ + ++  +   /**
      +++ + ++  +    * check_dev_quirk - apply early quirks to a given PCI device
      +++ + ++  +    * @num: bus number
      +++ + ++  +    * @slot: slot number
      +++ + ++  +    * @func: PCI function
      +++ + ++  +    *
      +++ + ++  +    * Check the vendor & device ID against the early quirks table.
      +++ + ++  +    *
      +++ + ++  +    * If the device is single function, let early_quirks() know so we don't
      +++ + ++  +    * poke at this device again.
      +++ + ++  +    */
      +++ + ++  +   static int __init check_dev_quirk(int num, int slot, int func)
                    {
                        u16 class;
                        u16 vendor;
                        class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE);
                    
                        if (class == 0xffff)
      --- - --  -               return;
      +++ + ++  +               return -1; /* no class, treat as single function */
                    
                        vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID);
                    
                        type = read_pci_config_byte(num, slot, func,
                                                    PCI_HEADER_TYPE);
                        if (!(type & 0x80))
      --- - --  -               return;
      +++ + ++  +               return -1;
      +++ + ++  +   
      +++ + ++  +       return 0;
                    }
                    
                    void __init early_quirks(void)
                        /* Poor man's PCI discovery */
                        for (num = 0; num < 32; num++)
                                for (slot = 0; slot < 32; slot++)
      --- - --  -                       for (func = 0; func < 8; func++)
      --- - --  -                               check_dev_quirk(num, slot, func);
      +++ + ++  +                       for (func = 0; func < 8; func++) {
      +++ + ++  +                               /* Only probe function 0 on single fn devices */
      +++ + ++  +                               if (check_dev_quirk(num, slot, func))
      +++ + ++  +                                       break;
      +++ + ++  +                       }
                    }
index 6bc07f0f1202eeb1eaac0b55064acad7f355371a,6bc07f0f1202eeb1eaac0b55064acad7f355371a,6bc07f0f1202eeb1eaac0b55064acad7f355371a,6bc07f0f1202eeb1eaac0b55064acad7f355371a,6bc07f0f1202eeb1eaac0b55064acad7f355371a,6bc07f0f1202eeb1eaac0b55064acad7f355371a,6bc07f0f1202eeb1eaac0b55064acad7f355371a,53393c306e11b553f1691c4ee607873f18c894af,6bc07f0f1202eeb1eaac0b55064acad7f355371a,6bc07f0f1202eeb1eaac0b55064acad7f355371a,c778e4fa55a2eacc79d34954c2fdcef1367d5732,6bc07f0f1202eeb1eaac0b55064acad7f355371a,6bc07f0f1202eeb1eaac0b55064acad7f355371a,6bc07f0f1202eeb1eaac0b55064acad7f355371a,6bc07f0f1202eeb1eaac0b55064acad7f355371a,cadf73f70d336f5716e0f464209492734fd6df58,cfe28a715434762352df73207da0fe422c76e713,6bc07f0f1202eeb1eaac0b55064acad7f355371a,ad5264c29e9b4b161a67c4d5e810795af838e819,6bc07f0f1202eeb1eaac0b55064acad7f355371a..cdfd94cc6b14e4fd1c06c058904e13c2f6575810
                    #include <asm/percpu.h>
                    #include <asm/dwarf2.h>
                    #include <asm/processor-flags.h>
          -         #include "irq_vectors.h"
       +  +     +   #include <asm/ftrace.h>
          +         #include <asm/irq_vectors.h>
                    
                    /*
                     * We use macros for low-level operations which need to be overridden
                     * for paravirtualization.  The following will never clobber any registers:
                     *   INTERRUPT_RETURN (aka. "iret")
                     *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
          -          *   ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit").
          +          *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
                     *
                     * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
                     * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
                        GET_THREAD_INFO(%ebp)
                    
                        /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
--------------- ----    testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+++++++++++++++ ++++    testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
                        jnz syscall_trace_entry
                        cmpl $(nr_syscalls), %eax
                        jae syscall_badsys
                        xorl %ebp,%ebp
                        TRACE_IRQS_ON
                    1:  mov  PT_FS(%esp), %fs
          -             ENABLE_INTERRUPTS_SYSCALL_RET
          +             ENABLE_INTERRUPTS_SYSEXIT
                        CFI_ENDPROC
                    .pushsection .fixup,"ax"
                    2:  movl $0,PT_FS(%esp)
                        GET_THREAD_INFO(%ebp)
                                                        # system call tracing in operation / emulation
                        /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
--------------- ----    testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+++++++++++++++ ++++    testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
                        jnz syscall_trace_entry
                        cmpl $(nr_syscalls), %eax
                        jae syscall_badsys
                                                        # setting need_resched or sigpending
                                                        # between sampling and the iret
                        TRACE_IRQS_OFF
--------------- ----    testl $X86_EFLAGS_TF,PT_EFLAGS(%esp)    # If tracing set singlestep flag on exit
--------------- ----    jz no_singlestep
--------------- ----    orl $_TIF_SINGLESTEP,TI_flags(%ebp)
--------------- ----no_singlestep:
                        movl TI_flags(%ebp), %ecx
                        testw $_TIF_ALLWORK_MASK, %cx   # current->work
                        jne syscall_exit_work
                    syscall_trace_entry:
                        movl $-ENOSYS,PT_EAX(%esp)
                        movl %esp, %eax
--------------- ----    xorl %edx,%edx
--------------- ----    call do_syscall_trace
--------------- ----    cmpl $0, %eax
--------------- ----    jne resume_userspace            # ret != 0 -> running under PTRACE_SYSEMU,
--------------- ----                                    # so must skip actual syscall
--------------- ----    movl PT_ORIG_EAX(%esp), %eax
+++++++++++++++ ++++    call syscall_trace_enter
+++++++++++++++ ++++    /* What it returned is what we'll actually use.  */
                        cmpl $(nr_syscalls), %eax
                        jnae syscall_call
                        jmp syscall_exit
                        # perform syscall exit tracing
                        ALIGN
                    syscall_exit_work:
--------------- ----    testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
+++++++++++++++ ++++    testb $_TIF_WORK_SYSCALL_EXIT, %cl
                        jz work_pending
                        TRACE_IRQS_ON
--------------- ----    ENABLE_INTERRUPTS(CLBR_ANY)     # could let do_syscall_trace() call
+++++++++++++++ ++++    ENABLE_INTERRUPTS(CLBR_ANY)     # could let syscall_trace_leave() call
                                                        # schedule() instead
                        movl %esp, %eax
--------------- ----    movl $1, %edx
--------------- ----    call do_syscall_trace
+++++++++++++++ ++++    call syscall_trace_leave
                        jmp resume_userspace
                    END(syscall_exit_work)
                        CFI_ENDPROC
                    .previous
                    END(native_iret)
                    
          -         ENTRY(native_irq_enable_syscall_ret)
          +         ENTRY(native_irq_enable_sysexit)
                        sti
                        sysexit
          -         END(native_irq_enable_syscall_ret)
          +         END(native_irq_enable_sysexit)
                    #endif
                    
                    KPROBE_ENTRY(int3)
                    ENTRY(xen_sysenter_target)
                        RING0_INT_FRAME
                        addl $5*4, %esp         /* remove xen-provided frame */
++++++++++++++++++ +    CFI_ADJUST_CFA_OFFSET -5*4
                        jmp sysenter_past_esp
       +  +             CFI_ENDPROC
                    
                    ENTRY(xen_hypervisor_callback)
                        CFI_STARTPROC
                    
                    #endif      /* CONFIG_XEN */
                    
       +  +     +   #ifdef CONFIG_FTRACE
       +  +     +   #ifdef CONFIG_DYNAMIC_FTRACE
       +  +     +   
       +  +     +   ENTRY(mcount)
       +  +     +       pushl %eax
       +  +     +       pushl %ecx
       +  +     +       pushl %edx
       +  +     +       movl 0xc(%esp), %eax
       +  +     +       subl $MCOUNT_INSN_SIZE, %eax
       +  +     +   
       +  +     +   .globl mcount_call
       +  +     +   mcount_call:
       +  +     +       call ftrace_stub
       +  +     +   
       +  +     +       popl %edx
       +  +     +       popl %ecx
       +  +     +       popl %eax
       +  +     +   
       +  +     +       ret
       +  +     +   END(mcount)
       +  +     +   
       +  +     +   ENTRY(ftrace_caller)
       +  +     +       pushl %eax
       +  +     +       pushl %ecx
       +  +     +       pushl %edx
       +  +     +       movl 0xc(%esp), %eax
       +  +     +       movl 0x4(%ebp), %edx
       +  +     +       subl $MCOUNT_INSN_SIZE, %eax
       +  +     +   
       +  +     +   .globl ftrace_call
       +  +     +   ftrace_call:
       +  +     +       call ftrace_stub
       +  +     +   
       +  +     +       popl %edx
       +  +     +       popl %ecx
       +  +     +       popl %eax
       +  +     +   
       +  +     +   .globl ftrace_stub
       +  +     +   ftrace_stub:
       +  +     +       ret
       +  +     +   END(ftrace_caller)
       +  +     +   
       +  +     +   #else /* ! CONFIG_DYNAMIC_FTRACE */
       +  +     +   
       +  +     +   ENTRY(mcount)
       +  +     +       cmpl $ftrace_stub, ftrace_trace_function
       +  +     +       jnz trace
       +  +     +   .globl ftrace_stub
       +  +     +   ftrace_stub:
       +  +     +       ret
       +  +     +   
       +  +     +       /* taken from glibc */
       +  +     +   trace:
       +  +     +       pushl %eax
       +  +     +       pushl %ecx
       +  +     +       pushl %edx
       +  +     +       movl 0xc(%esp), %eax
       +  +     +       movl 0x4(%ebp), %edx
       +  +     +       subl $MCOUNT_INSN_SIZE, %eax
       +  +     +   
       +  +     +       call *ftrace_trace_function
       +  +     +   
       +  +     +       popl %edx
       +  +     +       popl %ecx
       +  +     +       popl %eax
       +  +     +   
       +  +     +       jmp ftrace_stub
       +  +     +   END(mcount)
       +  +     +   #endif /* CONFIG_DYNAMIC_FTRACE */
       +  +     +   #endif /* CONFIG_FTRACE */
       +  +     +   
                    .section .rodata,"a"
                    #include "syscall_table_32.S"
                    
index ae63e584c340cbafd342af95e18bce946b474525,ae63e584c340cbafd342af95e18bce946b474525,ae63e584c340cbafd342af95e18bce946b474525,ae63e584c340cbafd342af95e18bce946b474525,ae63e584c340cbafd342af95e18bce946b474525,ae63e584c340cbafd342af95e18bce946b474525,ba41bf42748d7657548b8e39191d9496f86a9480,466b9284ed2f31a1aead9185bbf3d038196eac0f,ae63e584c340cbafd342af95e18bce946b474525,ae63e584c340cbafd342af95e18bce946b474525,556a8df522a7adcf9583164be4cad60e29b00a97,ae63e584c340cbafd342af95e18bce946b474525,ba41bf42748d7657548b8e39191d9496f86a9480,ae63e584c340cbafd342af95e18bce946b474525,ae63e584c340cbafd342af95e18bce946b474525,63001c6ecf6d0805afe72cfe78d906249d323f54,466b9284ed2f31a1aead9185bbf3d038196eac0f,ae63e584c340cbafd342af95e18bce946b474525,ae63e584c340cbafd342af95e18bce946b474525,80d5663db3bcecdc7ee5c6461478bda415635492..8410e26f418337d7fc37d77dba6a1f0e60e7f525
                    #include <asm/page.h>
                    #include <asm/irqflags.h>
                    #include <asm/paravirt.h>
       +  +     +   #include <asm/ftrace.h>
                    
                        .code64
                    
       +  +     +   #ifdef CONFIG_FTRACE
       +  +     +   #ifdef CONFIG_DYNAMIC_FTRACE
       +  +     +   ENTRY(mcount)
       +  +     +   
       +  +     +       subq $0x38, %rsp
       +  +     +       movq %rax, (%rsp)
       +  +     +       movq %rcx, 8(%rsp)
       +  +     +       movq %rdx, 16(%rsp)
       +  +     +       movq %rsi, 24(%rsp)
       +  +     +       movq %rdi, 32(%rsp)
       +  +     +       movq %r8, 40(%rsp)
       +  +     +       movq %r9, 48(%rsp)
       +  +     +   
       +  +     +       movq 0x38(%rsp), %rdi
       +  +     +       subq $MCOUNT_INSN_SIZE, %rdi
       +  +     +   
       +  +     +   .globl mcount_call
       +  +     +   mcount_call:
       +  +     +       call ftrace_stub
       +  +     +   
       +  +     +       movq 48(%rsp), %r9
       +  +     +       movq 40(%rsp), %r8
       +  +     +       movq 32(%rsp), %rdi
       +  +     +       movq 24(%rsp), %rsi
       +  +     +       movq 16(%rsp), %rdx
       +  +     +       movq 8(%rsp), %rcx
       +  +     +       movq (%rsp), %rax
       +  +     +       addq $0x38, %rsp
       +  +     +   
       +  +     +       retq
       +  +     +   END(mcount)
       +  +     +   
       +  +     +   ENTRY(ftrace_caller)
       +  +     +   
       +  +     +       /* taken from glibc */
       +  +     +       subq $0x38, %rsp
       +  +     +       movq %rax, (%rsp)
       +  +     +       movq %rcx, 8(%rsp)
       +  +     +       movq %rdx, 16(%rsp)
       +  +     +       movq %rsi, 24(%rsp)
       +  +     +       movq %rdi, 32(%rsp)
       +  +     +       movq %r8, 40(%rsp)
       +  +     +       movq %r9, 48(%rsp)
       +  +     +   
       +  +     +       movq 0x38(%rsp), %rdi
       +  +     +       movq 8(%rbp), %rsi
       +  +     +       subq $MCOUNT_INSN_SIZE, %rdi
       +  +     +   
       +  +     +   .globl ftrace_call
       +  +     +   ftrace_call:
       +  +     +       call ftrace_stub
       +  +     +   
       +  +     +       movq 48(%rsp), %r9
       +  +     +       movq 40(%rsp), %r8
       +  +     +       movq 32(%rsp), %rdi
       +  +     +       movq 24(%rsp), %rsi
       +  +     +       movq 16(%rsp), %rdx
       +  +     +       movq 8(%rsp), %rcx
       +  +     +       movq (%rsp), %rax
       +  +     +       addq $0x38, %rsp
       +  +     +   
       +  +     +   .globl ftrace_stub
       +  +     +   ftrace_stub:
       +  +     +       retq
       +  +     +   END(ftrace_caller)
       +  +     +   
       +  +     +   #else /* ! CONFIG_DYNAMIC_FTRACE */
       +  +     +   ENTRY(mcount)
       +  +     +       cmpq $ftrace_stub, ftrace_trace_function
       +  +     +       jnz trace
       +  +     +   .globl ftrace_stub
       +  +     +   ftrace_stub:
       +  +     +       retq
       +  +     +   
       +  +     +   trace:
       +  +     +       /* taken from glibc */
       +  +     +       subq $0x38, %rsp
       +  +     +       movq %rax, (%rsp)
       +  +     +       movq %rcx, 8(%rsp)
       +  +     +       movq %rdx, 16(%rsp)
       +  +     +       movq %rsi, 24(%rsp)
       +  +     +       movq %rdi, 32(%rsp)
       +  +     +       movq %r8, 40(%rsp)
       +  +     +       movq %r9, 48(%rsp)
       +  +     +   
       +  +     +       movq 0x38(%rsp), %rdi
       +  +     +       movq 8(%rbp), %rsi
       +  +     +       subq $MCOUNT_INSN_SIZE, %rdi
       +  +     +   
       +  +     +       call   *ftrace_trace_function
       +  +     +   
       +  +     +       movq 48(%rsp), %r9
       +  +     +       movq 40(%rsp), %r8
       +  +     +       movq 32(%rsp), %rdi
       +  +     +       movq 24(%rsp), %rsi
       +  +     +       movq 16(%rsp), %rdx
       +  +     +       movq 8(%rsp), %rcx
       +  +     +       movq (%rsp), %rax
       +  +     +       addq $0x38, %rsp
       +  +     +   
       +  +     +       jmp ftrace_stub
       +  +     +   END(mcount)
       +  +     +   #endif /* CONFIG_DYNAMIC_FTRACE */
       +  +     +   #endif /* CONFIG_FTRACE */
       +  +     +   
                    #ifndef CONFIG_PREEMPT
                    #define retint_kernel retint_restore_args
                    #endif      
                    
                    #ifdef CONFIG_PARAVIRT
          -         ENTRY(native_irq_enable_syscall_ret)
          -             movq    %gs:pda_oldrsp,%rsp
          +         ENTRY(native_usergs_sysret64)
                        swapgs
                        sysretq
                    #endif /* CONFIG_PARAVIRT */
                        .macro FAKE_STACK_FRAME child_rip
                        /* push in order ss, rsp, eflags, cs, rip */
                        xorl %eax, %eax
          -             pushq %rax /* ss */
          +             pushq $__KERNEL_DS /* ss */
                        CFI_ADJUST_CFA_OFFSET   8
                        /*CFI_REL_OFFSET        ss,0*/
                        pushq %rax /* rsp */
                        CFI_ADJUST_CFA_OFFSET -4
                        call schedule_tail
                        GET_THREAD_INFO(%rcx)
          -             testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
          +             testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
                        jnz rff_trace
                    rff_action: 
                        RESTORE_REST
                        testl $3,CS-ARGOFFSET(%rsp)     # from kernel_thread?
                        je   int_ret_from_sys_call
          -             testl $_TIF_IA32,threadinfo_flags(%rcx)
          +             testl $_TIF_IA32,TI_flags(%rcx)
                        jnz  int_ret_from_sys_call
                        RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
                        jmp ret_from_sys_call
@@@@@@@@@@@@@@@@@@@@@ -349,8 -349,8 -349,8 -349,8 -349,8 -349,8 -349,8 -243,8 -349,8 -349,8 -244,7 -349,8 -349,8 -349,8 -349,8 -349,7 -243,8 -349,8 -349,8 -349,8 +349,7 @@@@@@@@@@@@@@@@@@@@@ ENTRY(system_call_after_swapgs
                        movq  %rcx,RIP-ARGOFFSET(%rsp)
                        CFI_REL_OFFSET rip,RIP-ARGOFFSET
                        GET_THREAD_INFO(%rcx)
---------- ---- ----    testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----            TI_flags(%rcx)
          -             testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
+++++++++++++++ ++++    testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
                        jnz tracesys
                        cmpq $__NR_syscall_max,%rax
                        ja badsys
                        GET_THREAD_INFO(%rcx)
                        DISABLE_INTERRUPTS(CLBR_NONE)
                        TRACE_IRQS_OFF
          -             movl threadinfo_flags(%rcx),%edx
          +             movl TI_flags(%rcx),%edx
                        andl %edi,%edx
                        jnz  sysret_careful 
                        CFI_REMEMBER_STATE
                        CFI_REGISTER    rip,rcx
                        RESTORE_ARGS 0,-ARG_SKIP,1
                        /*CFI_REGISTER  rflags,r11*/
          -             ENABLE_INTERRUPTS_SYSCALL_RET
          +             movq    %gs:pda_oldrsp, %rsp
          +             USERGS_SYSRET64
                    
                        CFI_RESTORE_STATE
                        /* Handle reschedules */
                        leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
                        xorl %esi,%esi # oldset -> arg2
                        call ptregscall_common
       -  -     -   1:  movl $_TIF_NEED_RESCHED,%edi
       +  +     +   1:  movl $_TIF_WORK_MASK,%edi
                        /* Use IRET because user could have changed frame. This
                           works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
                        DISABLE_INTERRUPTS(CLBR_NONE)
                        FIXUP_TOP_OF_STACK %rdi
                        movq %rsp,%rdi
                        call syscall_trace_enter
--------------- ----    LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
+++++++++++++++ ++++    /*
+++++++++++++++ ++++     * Reload arg registers from stack in case ptrace changed them.
+++++++++++++++ ++++     * We don't reload %rax because syscall_trace_enter() returned
+++++++++++++++ ++++     * the value it wants us to use in the table lookup.
+++++++++++++++ ++++     */
+++++++++++++++ ++++    LOAD_ARGS ARGOFFSET, 1
                        RESTORE_REST
                        cmpq $__NR_syscall_max,%rax
                        ja   int_ret_from_sys_call      /* RAX(%rsp) set to -ENOSYS above */
                    int_with_check:
                        LOCKDEP_SYS_EXIT_IRQ
                        GET_THREAD_INFO(%rcx)
          -             movl threadinfo_flags(%rcx),%edx
          +             movl TI_flags(%rcx),%edx
                        andl %edi,%edx
                        jnz   int_careful
          -             andl    $~TS_COMPAT,threadinfo_status(%rcx)
          +             andl    $~TS_COMPAT,TI_status(%rcx)
                        jmp   retint_swapgs
                    
                        /* Either reschedule or signal or syscall exit tracking needed. */
                        ENABLE_INTERRUPTS(CLBR_NONE)
                        SAVE_REST
                        /* Check for syscall exit trace */      
--------------- ----    testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
+++++++++++++++ ++++    testl $_TIF_WORK_SYSCALL_EXIT,%edx
                        jz int_signal
                        pushq %rdi
                        CFI_ADJUST_CFA_OFFSET 8
                        call syscall_trace_leave
                        popq %rdi
                        CFI_ADJUST_CFA_OFFSET -8
--------------- ----    andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
+++++++++++++++ ++++    andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
                        jmp int_restore_rest
                        
                    int_signal:
                        movq %rsp,%rdi          # &ptregs -> arg1
                        xorl %esi,%esi          # oldset -> arg2
                        call do_notify_resume
       -  -     -   1:  movl $_TIF_NEED_RESCHED,%edi    
       +  +     +   1:  movl $_TIF_WORK_MASK,%edi
                    int_restore_rest:
                        RESTORE_REST
                        DISABLE_INTERRUPTS(CLBR_NONE)
                        PTREGSCALL stub_clone, sys_clone, %r8
                        PTREGSCALL stub_fork, sys_fork, %rdi
                        PTREGSCALL stub_vfork, sys_vfork, %rdi
          -             PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
                        PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
                        PTREGSCALL stub_iopl, sys_iopl, %rsi
                    
@@@@@@@@@@@@@@@@@@@@@ -665,7 -665,7 -665,7 -665,7 -665,7 -665,7 -665,7 -559,7 -665,7 -665,7 -559,7 -665,7 -665,7 -665,7 -665,7 -669,7 -559,7 -665,7 -665,7 -665,7 +669,7 @@@@@@@@@@@@@@@@@@@@@ retint_with_reschedule
                        movl $_TIF_WORK_MASK,%edi
                    retint_check:
                        LOCKDEP_SYS_EXIT_IRQ
          -             movl threadinfo_flags(%rcx),%edx
          +             movl TI_flags(%rcx),%edx
                        andl %edi,%edx
                        CFI_REMEMBER_STATE
                        jnz  retint_careful
                        RESTORE_REST
                        DISABLE_INTERRUPTS(CLBR_NONE)
                        TRACE_IRQS_OFF
       -  -     -       movl $_TIF_NEED_RESCHED,%edi
                        GET_THREAD_INFO(%rcx)
       -  -     -       jmp retint_check
       +  +     +       jmp retint_with_reschedule
                    
                    #ifdef CONFIG_PREEMPT
                        /* Returning to kernel space. Check if we need preemption */
                        /* rcx:  threadinfo. interrupts off. */
                    ENTRY(retint_kernel)
          -             cmpl $0,threadinfo_preempt_count(%rcx)
          +             cmpl $0,TI_preempt_count(%rcx)
                        jnz  retint_restore_args
          -             bt  $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
          +             bt  $TIF_NEED_RESCHED,TI_flags(%rcx)
                        jnc  retint_restore_args
                        bt   $9,EFLAGS-ARGOFFSET(%rsp)  /* interrupts off? */
                        jnc  retint_restore_args
@@@@@@@@@@@@@@@@@@@@@ -816,9 -816,9 -816,9 -816,9 -816,9 -816,9 -816,6 -711,6 -816,9 -816,9 -711,6 -816,9 -816,6 -816,9 -816,9 -820,9 -711,6 -816,9 -816,9 -816,9 +820,9 @@@@@@@@@@@@@@@@@@@@@ END(invalidate_interrupt\num
                    ENTRY(call_function_interrupt)
                        apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
                    END(call_function_interrupt)
      ++  + +   +   ENTRY(call_function_single_interrupt)
      ++  + +   +       apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
      ++  + +   +   END(call_function_single_interrupt)
                    ENTRY(irq_move_cleanup_interrupt)
                        apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
                    END(irq_move_cleanup_interrupt)
                        apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
                    END(apic_timer_interrupt)
                    
          +         ENTRY(uv_bau_message_intr1)
          +             apicinterrupt 220,uv_bau_message_interrupt
          +         END(uv_bau_message_intr1)
          +         
                    ENTRY(error_interrupt)
                        apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
                    END(error_interrupt)
@@@@@@@@@@@@@@@@@@@@@ -845,7 -845,7 -845,7 -845,7 -845,7 -845,7 -842,7 -737,7 -845,7 -845,7 -733,6 -845,7 -842,7 -845,7 -845,7 -849,7 -737,7 -845,7 -845,7 -845,7 +849,7 @@@@@@@@@@@@@@@@@@@@@ END(spurious_interrupt
                     */                 
                        .macro zeroentry sym
                        INTR_FRAME
          +             PARAVIRT_ADJUST_EXCEPTION_FRAME
                        pushq $0        /* push error code/oldrax */ 
                        CFI_ADJUST_CFA_OFFSET 8
                        pushq %rax      /* push real oldrax to the rdi slot */ 
                    
                        .macro errorentry sym
                        XCPT_FRAME
          +             PARAVIRT_ADJUST_EXCEPTION_FRAME
                        pushq %rax
                        CFI_ADJUST_CFA_OFFSET 8
                        CFI_REL_OFFSET rax,0
@@@@@@@@@@@@@@@@@@@@@ -928,7 -928,7 -928,7 -928,7 -928,7 -928,7 -925,7 -820,7 -928,7 -928,7 -814,7 -928,7 -925,7 -928,7 -928,7 -932,7 -820,7 -928,7 -928,7 -928,7 +932,7 @@@@@@@@@@@@@@@@@@@@@ paranoid_restore\trace
                        jmp irq_return
                    paranoid_userspace\trace:
                        GET_THREAD_INFO(%rcx)
          -             movl threadinfo_flags(%rcx),%ebx
          +             movl TI_flags(%rcx),%ebx
                        andl $_TIF_WORK_MASK,%ebx
                        jz paranoid_swapgs\trace
                        movq %rsp,%rdi                  /* &pt_regs */
                        testl %eax,%eax
                        jne  retint_kernel
                        LOCKDEP_SYS_EXIT_IRQ
          -             movl  threadinfo_flags(%rcx),%edx
          +             movl  TI_flags(%rcx),%edx
                        movl  $_TIF_WORK_MASK,%edi
                        andl  %edi,%edx
                        jnz  retint_careful
                           iret run with kernel gs again, so don't set the user space flag.
                           B stepping K8s sometimes report an truncated RIP for IRET 
                           exceptions returning to compat mode. Check for these here too. */
          -             leaq irq_return(%rip),%rbp
          -             cmpq %rbp,RIP(%rsp) 
          +             leaq irq_return(%rip),%rcx
          +             cmpq %rcx,RIP(%rsp)
                        je   error_swapgs
          -             movl %ebp,%ebp  /* zero extend */
          -             cmpq %rbp,RIP(%rsp) 
          +             movl %ecx,%ecx  /* zero extend */
          +             cmpq %rcx,RIP(%rsp)
                        je   error_swapgs
                        cmpq $gs_change,RIP(%rsp)
                            je   error_swapgs
                        
                           /* Reload gs selector with exception handling */
                           /* edi:  new selector */ 
          -         ENTRY(load_gs_index)
          +         ENTRY(native_load_gs_index)
                        CFI_STARTPROC
                        pushf
                        CFI_ADJUST_CFA_OFFSET 8
                        CFI_ADJUST_CFA_OFFSET -8
                            ret
                        CFI_ENDPROC
          -         ENDPROC(load_gs_index)
          +         ENDPROC(native_load_gs_index)
                           
                            .section __ex_table,"a"
                            .align 8
                        /* runs on exception stack */
                    KPROBE_ENTRY(debug)
                        INTR_FRAME
+++++++++++++++++++     PARAVIRT_ADJUST_EXCEPTION_FRAME
                        pushq $0
                        CFI_ADJUST_CFA_OFFSET 8         
                        paranoidentry do_debug, DEBUG_STACK
                        /* runs on exception stack */   
                    KPROBE_ENTRY(nmi)
                        INTR_FRAME
+++++++++++++++++++     PARAVIRT_ADJUST_EXCEPTION_FRAME
                        pushq $-1
                        CFI_ADJUST_CFA_OFFSET 8
                        paranoidentry do_nmi, 0, 0
                    
                    KPROBE_ENTRY(int3)
                        INTR_FRAME
+++++++++++++++++++     PARAVIRT_ADJUST_EXCEPTION_FRAME
                        pushq $0
                        CFI_ADJUST_CFA_OFFSET 8
                        paranoidentry do_int3, DEBUG_STACK
                        zeroentry do_coprocessor_segment_overrun
                    END(coprocessor_segment_overrun)
                    
          -         ENTRY(reserved)
          -             zeroentry do_reserved
          -         END(reserved)
          -         
                        /* runs on exception stack */
                    ENTRY(double_fault)
                        XCPT_FRAME
+++++++++++++++++++     PARAVIRT_ADJUST_EXCEPTION_FRAME
                        paranoidentry do_double_fault
                        jmp paranoid_exit1
                        CFI_ENDPROC
                        /* runs on exception stack */
                    ENTRY(stack_segment)
                        XCPT_FRAME
+++++++++++++++++++     PARAVIRT_ADJUST_EXCEPTION_FRAME
                        paranoidentry do_stack_segment
                        jmp paranoid_exit1
                        CFI_ENDPROC
                        /* runs on exception stack */
                    ENTRY(machine_check)
                        INTR_FRAME
+++++++++++++++++++     PARAVIRT_ADJUST_EXCEPTION_FRAME
                        pushq $0
                        CFI_ADJUST_CFA_OFFSET 8 
                        paranoidentry do_machine_check
                        sysret
                        CFI_ENDPROC
                    ENDPROC(ignore_sysret)
+++++++++++++++++++ 
+++++++++++++++++++ #ifdef CONFIG_XEN
+++++++++++++++++++ ENTRY(xen_hypervisor_callback)
+++++++++++++++++++     zeroentry xen_do_hypervisor_callback
+++++++++++++++++++ END(xen_hypervisor_callback)
+++++++++++++++++++ 
+++++++++++++++++++ /*
+++++++++++++++++++ # A note on the "critical region" in our callback handler.
+++++++++++++++++++ # We want to avoid stacking callback handlers due to events occurring
+++++++++++++++++++ # during handling of the last event. To do this, we keep events disabled
+++++++++++++++++++ # until we've done all processing. HOWEVER, we must enable events before
+++++++++++++++++++ # popping the stack frame (can't be done atomically) and so it would still
+++++++++++++++++++ # be possible to get enough handler activations to overflow the stack.
+++++++++++++++++++ # Although unlikely, bugs of that kind are hard to track down, so we'd
+++++++++++++++++++ # like to avoid the possibility.
+++++++++++++++++++ # So, on entry to the handler we detect whether we interrupted an
+++++++++++++++++++ # existing activation in its critical region -- if so, we pop the current
+++++++++++++++++++ # activation and restart the handler using the previous one.
+++++++++++++++++++ */
+++++++++++++++++++ ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
+++++++++++++++++++     CFI_STARTPROC
+++++++++++++++++++ /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
+++++++++++++++++++    see the correct pointer to the pt_regs */
+++++++++++++++++++     movq %rdi, %rsp            # we don't return, adjust the stack frame
+++++++++++++++++++     CFI_ENDPROC
+++++++++++++++++++     CFI_DEFAULT_STACK
+++++++++++++++++++ 11: incl %gs:pda_irqcount
+++++++++++++++++++     movq %rsp,%rbp
+++++++++++++++++++     CFI_DEF_CFA_REGISTER rbp
+++++++++++++++++++     cmovzq %gs:pda_irqstackptr,%rsp
+++++++++++++++++++     pushq %rbp                      # backlink for old unwinder
+++++++++++++++++++     call xen_evtchn_do_upcall
+++++++++++++++++++     popq %rsp
+++++++++++++++++++     CFI_DEF_CFA_REGISTER rsp
+++++++++++++++++++     decl %gs:pda_irqcount
+++++++++++++++++++     jmp  error_exit
+++++++++++++++++++     CFI_ENDPROC
+++++++++++++++++++ END(do_hypervisor_callback)
+++++++++++++++++++ 
+++++++++++++++++++ /*
+++++++++++++++++++ # Hypervisor uses this for application faults while it executes.
+++++++++++++++++++ # We get here for two reasons:
+++++++++++++++++++ #  1. Fault while reloading DS, ES, FS or GS
+++++++++++++++++++ #  2. Fault while executing IRET
+++++++++++++++++++ # Category 1 we do not need to fix up as Xen has already reloaded all segment
+++++++++++++++++++ # registers that could be reloaded and zeroed the others.
+++++++++++++++++++ # Category 2 we fix up by killing the current process. We cannot use the
+++++++++++++++++++ # normal Linux return path in this case because if we use the IRET hypercall
+++++++++++++++++++ # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
+++++++++++++++++++ # We distinguish between categories by comparing each saved segment register
+++++++++++++++++++ # with its current contents: any discrepancy means we in category 1.
+++++++++++++++++++ */
+++++++++++++++++++ ENTRY(xen_failsafe_callback)
+++++++++++++++++++     framesz = (RIP-0x30)    /* workaround buggy gas */
+++++++++++++++++++     _frame framesz
+++++++++++++++++++     CFI_REL_OFFSET rcx, 0
+++++++++++++++++++     CFI_REL_OFFSET r11, 8
+++++++++++++++++++     movw %ds,%cx
+++++++++++++++++++     cmpw %cx,0x10(%rsp)
+++++++++++++++++++     CFI_REMEMBER_STATE
+++++++++++++++++++     jne 1f
+++++++++++++++++++     movw %es,%cx
+++++++++++++++++++     cmpw %cx,0x18(%rsp)
+++++++++++++++++++     jne 1f
+++++++++++++++++++     movw %fs,%cx
+++++++++++++++++++     cmpw %cx,0x20(%rsp)
+++++++++++++++++++     jne 1f
+++++++++++++++++++     movw %gs,%cx
+++++++++++++++++++     cmpw %cx,0x28(%rsp)
+++++++++++++++++++     jne 1f
+++++++++++++++++++     /* All segments match their saved values => Category 2 (Bad IRET). */
+++++++++++++++++++     movq (%rsp),%rcx
+++++++++++++++++++     CFI_RESTORE rcx
+++++++++++++++++++     movq 8(%rsp),%r11
+++++++++++++++++++     CFI_RESTORE r11
+++++++++++++++++++     addq $0x30,%rsp
+++++++++++++++++++     CFI_ADJUST_CFA_OFFSET -0x30
+++++++++++++++++++     pushq $0
+++++++++++++++++++     CFI_ADJUST_CFA_OFFSET 8
+++++++++++++++++++     pushq %r11
+++++++++++++++++++     CFI_ADJUST_CFA_OFFSET 8
+++++++++++++++++++     pushq %rcx
+++++++++++++++++++     CFI_ADJUST_CFA_OFFSET 8
+++++++++++++++++++     jmp general_protection
+++++++++++++++++++     CFI_RESTORE_STATE
+++++++++++++++++++ 1:  /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
+++++++++++++++++++     movq (%rsp),%rcx
+++++++++++++++++++     CFI_RESTORE rcx
+++++++++++++++++++     movq 8(%rsp),%r11
+++++++++++++++++++     CFI_RESTORE r11
+++++++++++++++++++     addq $0x30,%rsp
+++++++++++++++++++     CFI_ADJUST_CFA_OFFSET -0x30
+++++++++++++++++++     pushq $0
+++++++++++++++++++     CFI_ADJUST_CFA_OFFSET 8
+++++++++++++++++++     SAVE_ALL
+++++++++++++++++++     jmp error_exit
+++++++++++++++++++     CFI_ENDPROC
+++++++++++++++++++ END(xen_failsafe_callback)
+++++++++++++++++++ 
+++++++++++++++++++ #endif /* CONFIG_XEN */
diff --combined arch/x86/kernel/nmi.c
index ec024b3baad0764821c036d0aa2552397f76f017,ec024b3baad0764821c036d0aa2552397f76f017,384b49fed598033316fecaccd0f74b2f9f06e3ed,ec024b3baad0764821c036d0aa2552397f76f017,ec024b3baad0764821c036d0aa2552397f76f017,ec024b3baad0764821c036d0aa2552397f76f017,716b89284be02841cf74e81a73ad0d0a203cbfca,8dfe9db87a9e678233b7bfc593d2cebba262ca78,ec024b3baad0764821c036d0aa2552397f76f017,ec024b3baad0764821c036d0aa2552397f76f017,5a29ded994fa345fb67f92cc19b9fa9dd5606cf8,e0b44b7b717ace86322cb3b279d17efd4fc3b8fa,716b89284be02841cf74e81a73ad0d0a203cbfca,ec024b3baad0764821c036d0aa2552397f76f017,ec024b3baad0764821c036d0aa2552397f76f017,ec024b3baad0764821c036d0aa2552397f76f017,716b89284be02841cf74e81a73ad0d0a203cbfca,ec024b3baad0764821c036d0aa2552397f76f017,ec024b3baad0764821c036d0aa2552397f76f017,ec024b3baad0764821c036d0aa2552397f76f017..ac6d51222e7d3562abb1e7b5e8bd4a16a1c1c1f4
                     *  Fixes:
                     *  Mikael Pettersson       : AMD K7 support for local APIC NMI watchdog.
                     *  Mikael Pettersson       : Power Management for local APIC NMI watchdog.
          +          *  Mikael Pettersson       : Pentium 4 support for local APIC NMI watchdog.
                     *  Pavel Machek and
                     *  Mikael Pettersson       : PM converted to driver model. Disable/enable API.
                     */
                    
          +         #include <asm/apic.h>
          +         
                    #include <linux/nmi.h>
                    #include <linux/mm.h>
                    #include <linux/delay.h>
                    #include <linux/module.h>
                    #include <linux/sysdev.h>
                    #include <linux/sysctl.h>
          +         #include <linux/percpu.h>
                    #include <linux/kprobes.h>
                    #include <linux/cpumask.h>
          +         #include <linux/kernel_stat.h>
                    #include <linux/kdebug.h>
          +         #include <linux/smp.h>
                    
          +         #include <asm/i8259.h>
          +         #include <asm/io_apic.h>
                    #include <asm/smp.h>
                    #include <asm/nmi.h>
                    #include <asm/proto.h>
          +         #include <asm/timer.h>
          +         
                    #include <asm/mce.h>
                    
                    #include <mach_traps.h>
                    
                    int unknown_nmi_panic;
                    int nmi_watchdog_enabled;
          -         int panic_on_unrecovered_nmi;
                    
                    static cpumask_t backtrace_mask = CPU_MASK_NONE;
                    
                     *  0: the lapic NMI watchdog is disabled, but can be enabled
                     */
                    atomic_t nmi_active = ATOMIC_INIT(0);               /* oprofile uses this */
          +         EXPORT_SYMBOL(nmi_active);
          +         
          +         unsigned int nmi_watchdog = NMI_NONE;
          +         EXPORT_SYMBOL(nmi_watchdog);
          +         
                    static int panic_on_timeout;
                    
          -         unsigned int nmi_watchdog = NMI_DEFAULT;
                    static unsigned int nmi_hz = HZ;
          -         
                    static DEFINE_PER_CPU(short, wd_enabled);
          +         static int endflag __initdata;
                    
          -         /* Run after command line and cpu_init init, but before all other checks */
          -         void nmi_watchdog_default(void)
          +         static inline unsigned int get_nmi_count(int cpu)
                    {
          -             if (nmi_watchdog != NMI_DEFAULT)
          -                     return;
          -             nmi_watchdog = NMI_NONE;
          +         #ifdef CONFIG_X86_64
          +             return cpu_pda(cpu)->__nmi_count;
          +         #else
          +             return nmi_count(cpu);
          +         #endif
          +         }
          +         
          +         static inline int mce_in_progress(void)
          +         {
          +         #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
          +             return atomic_read(&mce_entry) > 0;
          +         #endif
          +             return 0;
                    }
                    
          -         static int endflag __initdata = 0;
          +         /*
          +          * Take the local apic timer and PIT/HPET into account. We don't
          +          * know which one is active, when we have highres/dyntick on
          +          */
          +         static inline unsigned int get_timer_irqs(int cpu)
          +         {
          +         #ifdef CONFIG_X86_64
          +             return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
          +         #else
          +             return per_cpu(irq_stat, cpu).apic_timer_irqs +
          +                     per_cpu(irq_stat, cpu).irq0_irqs;
          +         #endif
          +         }
                    
                    #ifdef CONFIG_SMP
          -         /* The performance counters used by NMI_LOCAL_APIC don't trigger when
          +         /*
          +          * The performance counters used by NMI_LOCAL_APIC don't trigger when
                     * the CPU is idle. To make sure the NMI watchdog really ticks on all
                     * CPUs during the test make them busy.
                     */
                    static __init void nmi_cpu_busy(void *data)
                    {
                        local_irq_enable_in_hardirq();
          -             /* Intentionally don't use cpu_relax here. This is
          -                to make sure that the performance counter really ticks,
          -                even if there is a simulator or similar that catches the
          -                pause instruction. On a real HT machine this is fine because
          -                all other CPUs are busy with "useless" delay loops and don't
          -                care if they get somewhat less cycles. */
          +             /*
          +              * Intentionally don't use cpu_relax here. This is
          +              * to make sure that the performance counter really ticks,
          +              * even if there is a simulator or similar that catches the
          +              * pause instruction. On a real HT machine this is fine because
          +              * all other CPUs are busy with "useless" delay loops and don't
          +              * care if they get somewhat less cycles.
          +              */
                        while (endflag == 0)
                                mb();
                    }
                    
                    int __init check_nmi_watchdog(void)
                    {
          -             int *prev_nmi_count;
          +             unsigned int *prev_nmi_count;
                        int cpu;
                    
          -             if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED))
          -                     return 0;
          -         
          -             if (!atomic_read(&nmi_active))
          +             if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
                                return 0;
                    
          -             prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
          +             prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
                        if (!prev_nmi_count)
          -                     return -1;
          +                     goto error;
                    
                        printk(KERN_INFO "Testing NMI watchdog ... ");
                    
                    #ifdef CONFIG_SMP
                        if (nmi_watchdog == NMI_LOCAL_APIC)
      --  - -   -               smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
      ++  + +   +               smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
                    #endif
                    
          -             for (cpu = 0; cpu < NR_CPUS; cpu++)
          -                     prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count;
          +             for_each_possible_cpu(cpu)
          +                     prev_nmi_count[cpu] = get_nmi_count(cpu);
                        local_irq_enable();
          -             mdelay((20*1000)/nmi_hz); // wait 20 ticks
          +             mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
                    
                        for_each_online_cpu(cpu) {
                                if (!per_cpu(wd_enabled, cpu))
                                        continue;
          -                     if (cpu_pda(cpu)->__nmi_count - prev_nmi_count[cpu] <= 5) {
          +                     if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
                                        printk(KERN_WARNING "WARNING: CPU#%d: NMI "
          -                                    "appears to be stuck (%d->%d)!\n",
          +                                     "appears to be stuck (%d->%d)!\n",
                                                cpu,
                                                prev_nmi_count[cpu],
          -                                     cpu_pda(cpu)->__nmi_count);
          +                                     get_nmi_count(cpu));
                                        per_cpu(wd_enabled, cpu) = 0;
                                        atomic_dec(&nmi_active);
                                }
                        if (!atomic_read(&nmi_active)) {
                                kfree(prev_nmi_count);
                                atomic_set(&nmi_active, -1);
          -                     return -1;
          +                     goto error;
                        }
                        printk("OK.\n");
                    
          -             /* now that we know it works we can reduce NMI frequency to
          -                something more reasonable; makes a difference in some configs */
          +             /*
          +              * now that we know it works we can reduce NMI frequency to
          +              * something more reasonable; makes a difference in some configs
          +              */
                        if (nmi_watchdog == NMI_LOCAL_APIC)
                                nmi_hz = lapic_adjust_nmi_hz(1);
                    
                        kfree(prev_nmi_count);
                        return 0;
          +         error:
          +             if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
          +                     disable_8259A_irq(0);
       +  +         #ifdef CONFIG_X86_32
       +  +             timer_ack = 0;
       +  +         #endif
          +             return -1;
                    }
                    
                    static int __init setup_nmi_watchdog(char *str)
                    {
          -             int nmi;
          +             unsigned int nmi;
                    
          -             if (!strncmp(str,"panic",5)) {
          +             if (!strncmp(str, "panic", 5)) {
                                panic_on_timeout = 1;
                                str = strchr(str, ',');
                                if (!str)
                    
                        get_option(&str, &nmi);
                    
          -             if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
          +             if (nmi >= NMI_INVALID)
                                return 0;
                    
                        nmi_watchdog = nmi;
                        return 1;
                    }
          -         
                    __setup("nmi_watchdog=", setup_nmi_watchdog);
                    
          +         /*
          +          * Suspend/resume support
          +          */
                    #ifdef CONFIG_PM
                    
                    static int nmi_pm_active; /* nmi_active before suspend */
@@@@@@@@@@@@@@@@@@@@@ -240,8 -240,8 -240,8 -240,8 -240,8 -240,8 -240,8 -237,8 -240,8 -240,8 -195,7 -240,8 -240,8 -240,8 -240,8 -240,8 -240,8 -240,8 -240,8 -240,8 +240,8 @@@@@@@@@@@@@@@@@@@@@ static int __init init_lapic_nmi_sysfs(
                    {
                        int error;
                    
          -             /* should really be a BUG_ON but b/c this is an
          +             /*
          +              * should really be a BUG_ON but b/c this is an
                         * init call, it just doesn't work.  -dcz
                         */
                        if (nmi_watchdog != NMI_LOCAL_APIC)
                                error = sysdev_register(&device_lapic_nmi);
                        return error;
                    }
          +         
                    /* must come after the local APIC's device_initcall() */
                    late_initcall(init_lapic_nmi_sysfs);
                    
                    
                    static void __acpi_nmi_enable(void *__unused)
                    {
-- ------- ---------    apic_write_around(APIC_LVT0, APIC_DM_NMI);
++ +++++++ +++++++++    apic_write(APIC_LVT0, APIC_DM_NMI);
                    }
                    
                    /*
                    void acpi_nmi_enable(void)
                    {
                        if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
      --  - -   -               on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
      ++  + +   +               on_each_cpu(__acpi_nmi_enable, NULL, 1);
                    }
                    
                    static void __acpi_nmi_disable(void *__unused)
                    {
-- ------- ---------    apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
++ +++++++ +++++++++    apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
                    }
                    
                    /*
                    void acpi_nmi_disable(void)
                    {
                        if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
      --  - -   -               on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
      ++  + +   +               on_each_cpu(__acpi_nmi_disable, NULL, 1);
                    }
                    
                    void setup_apic_nmi_watchdog(void *unused)
                    
                        /* cheap hack to support suspend/resume */
                        /* if cpu0 is not active neither should the other cpus */
          -             if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
          +             if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
                                return;
                    
                        switch (nmi_watchdog) {
                        case NMI_LOCAL_APIC:
          +                      /* enable it before to avoid race with handler */
                                __get_cpu_var(wd_enabled) = 1;
                                if (lapic_watchdog_init(nmi_hz) < 0) {
                                        __get_cpu_var(wd_enabled) = 0;
                    void stop_apic_nmi_watchdog(void *unused)
                    {
                        /* only support LOCAL and IO APICs for now */
          -             if ((nmi_watchdog != NMI_LOCAL_APIC) &&
          -                 (nmi_watchdog != NMI_IO_APIC))
          -                     return;
          +             if (!nmi_watchdog_active())
          +                     return;
                        if (__get_cpu_var(wd_enabled) == 0)
                                return;
                        if (nmi_watchdog == NMI_LOCAL_APIC)
                     *
                     * as these watchdog NMI IRQs are generated on every CPU, we only
                     * have to check the current processor.
          +          *
          +          * since NMIs don't listen to _any_ locks, we have to be extremely
          +          * careful not to rely on unsafe variables. The printk might lock
          +          * up though, so we have to break up any console locks first ...
          +          * [when there will be more tty-related locks, break them up here too!]
                     */
                    
                    static DEFINE_PER_CPU(unsigned, last_irq_sum);
@@@@@@@@@@@@@@@@@@@@@ -347,11 -347,11 -347,11 -347,11 -347,11 -347,11 -347,11 -344,11 -347,11 -347,11 -295,11 -347,11 -347,11 -347,11 -347,11 -347,11 -347,11 -347,11 -347,11 -347,11 +347,11 @@@@@@@@@@@@@@@@@@@@@ static DEFINE_PER_CPU(int, nmi_touch)
                    
                    void touch_nmi_watchdog(void)
                    {
          -             if (nmi_watchdog > 0) {
          +             if (nmi_watchdog_active()) {
                                unsigned cpu;
                    
                                /*
          -                      * Tell other CPUs to reset their alert counters. We cannot
          +                      * Tell other CPUs to reset their alert counters. We cannot
                                 * do it ourselves because the alert count increase is not
                                 * atomic.
                                 */
                                }
                        }
                    
          +             /*
          +              * Tickle the softlockup detector too:
          +              */
                        touch_softlockup_watchdog();
                    }
                    EXPORT_SYMBOL(touch_nmi_watchdog);
                    notrace __kprobes int
                    nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
                    {
          -             int sum;
          +             /*
          +              * Since current_thread_info()-> is always on the stack, and we
          +              * always switch the stack NMI-atomically, it's safe to use
          +              * smp_processor_id().
          +              */
          +             unsigned int sum;
                        int touched = 0;
                        int cpu = smp_processor_id();
                        int rc = 0;
                                touched = 1;
                        }
                    
          -             sum = read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
          +             sum = get_timer_irqs(cpu);
          +         
                        if (__get_cpu_var(nmi_touch)) {
                                __get_cpu_var(nmi_touch) = 0;
                                touched = 1;
                                static DEFINE_SPINLOCK(lock);   /* Serialise the printks */
                    
                                spin_lock(&lock);
          -                     printk("NMI backtrace for cpu %d\n", cpu);
          +                     printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
                                dump_stack();
                                spin_unlock(&lock);
                                cpu_clear(cpu, backtrace_mask);
                        }
                    
          -         #ifdef CONFIG_X86_MCE
          -             /* Could check oops_in_progress here too, but it's safer
          -                not too */
          -             if (atomic_read(&mce_entry) > 0)
          +             /* Could check oops_in_progress here too, but it's safer not to */
          +             if (mce_in_progress())
                                touched = 1;
          -         #endif
          -             /* if the apic timer isn't firing, this cpu isn't doing much */
          +         
          +             /* if the none of the timers isn't firing, this cpu isn't doing much */
                        if (!touched && __get_cpu_var(last_irq_sum) == sum) {
                                /*
                                 * Ayiee, looks like this CPU is stuck ...
                                 * wait a few IRQs (5 seconds) before doing the oops ...
                                 */
                                local_inc(&__get_cpu_var(alert_counter));
          -                     if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz)
          -                             die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs,
          -                                     panic_on_timeout);
          +                     if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz)
          +                             /*
          +                              * die_nmi will return ONLY if NOTIFY_STOP happens..
          +                              */
          +                             die_nmi("BUG: NMI Watchdog detected LOCKUP",
          +                                     regs, panic_on_timeout);
                        } else {
                                __get_cpu_var(last_irq_sum) = sum;
                                local_set(&__get_cpu_var(alert_counter), 0);
                                rc |= lapic_wd_event(nmi_hz);
                                break;
                        case NMI_IO_APIC:
          -                     /* don't know how to accurately check for this.
          +                     /*
          +                      * don't know how to accurately check for this.
                                 * just assume it was a watchdog timer interrupt
                                 * This matches the old behaviour.
                                 */
                        return rc;
                    }
                    
          -         static unsigned ignore_nmis;
          -         
          -         asmlinkage notrace __kprobes void
          -         do_nmi(struct pt_regs *regs, long error_code)
          -         {
          -             nmi_enter();
          -             add_pda(__nmi_count,1);
          -             if (!ignore_nmis)
          -                     default_do_nmi(regs);
          -             nmi_exit();
          -         }
          -         
          -         void stop_nmi(void)
          -         {
          -             acpi_nmi_disable();
          -             ignore_nmis++;
          -         }
          +         #ifdef CONFIG_SYSCTL
                    
          -         void restart_nmi(void)
+++++++++++ ++++++++static int __init setup_unknown_nmi_panic(char *str)
++++++++++  ++++++++{
          -             ignore_nmis--;
          -             acpi_nmi_enable();
+++++++++++ ++++++++    unknown_nmi_panic = 1;
+++++++++++ ++++++++    return 1;
++++++++++  ++++++++}
          -         
          -         #ifdef CONFIG_SYSCTL
+++++++++++ ++++++++__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
++++++++++  ++++++++
                    static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
                    {
                        unsigned char reason = get_nmi_reason();
                        char buf[64];
                    
                        sprintf(buf, "NMI received for unknown reason %02x\n", reason);
          -             die_nmi(buf, regs, 1);  /* Always panic here */
          +             die_nmi(buf, regs, 1); /* Always panic here */
                        return 0;
                    }
                    
@@@@@@@@@@@@@@@@@@@@@ -472,26 -472,26 -472,26 -472,26 -472,26 -472,26 -472,26 -469,26 -472,26 -472,26 -433,28 -479,26 -472,26 -472,26 -472,26 -472,26 -472,26 -472,26 -472,26 -472,26 +479,26 @@@@@@@@@@@@@@@@@@@@@ int proc_nmi_enabled(struct ctl_table *
                        if (!!old_state == !!nmi_watchdog_enabled)
                                return 0;
                    
          -             if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) {
          -                     printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
          +             if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
          +                     printk(KERN_WARNING
          +                             "NMI watchdog is permanently disabled\n");
                                return -EIO;
                        }
                    
          -             /* if nmi_watchdog is not set yet, then set it */
          -             nmi_watchdog_default();
          -         
                        if (nmi_watchdog == NMI_LOCAL_APIC) {
                                if (nmi_watchdog_enabled)
                                        enable_lapic_nmi_watchdog();
                                else
                                        disable_lapic_nmi_watchdog();
                        } else {
          -                     printk( KERN_WARNING
          +                     printk(KERN_WARNING
                                        "NMI watchdog doesn't know what hardware to touch\n");
                                return -EIO;
                        }
                        return 0;
                    }
                    
          -         #endif
          +         #endif /* CONFIG_SYSCTL */
                    
                    int do_nmi_callback(struct pt_regs *regs, int cpu)
                    {
@@@@@@@@@@@@@@@@@@@@@ -514,3 -514,3 -514,3 -514,3 -514,3 -514,3 -514,3 -511,3 -514,3 -514,3 -477,6 -521,3 -514,3 -514,3 -514,3 -514,3 -514,3 -514,3 -514,3 -514,3 +521,3 @@@@@@@@@@@@@@@@@@@@@ void __trigger_all_cpu_backtrace(void
                                mdelay(1);
                        }
                    }
          -         
          -         EXPORT_SYMBOL(nmi_active);
          -         EXPORT_SYMBOL(nmi_watchdog);
index e0f571d58c19c0bfa4eeee39d89972ed2239f55f,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,5d7326a60b7c1eaf24705c909f3a5cfffad21367,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,74f0c5ea2a0388af848faa058804cd9df86f4391,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,e0f571d58c19c0bfa4eeee39d89972ed2239f55f,2963ab5d91eec94f7b9748f53e98004ed4d19663..b4564d089b43b91bdfcbe36eb432e0cd245fc593
                    #include <asm/desc.h>
                    #include <asm/setup.h>
                    #include <asm/arch_hooks.h>
+++++++++++++++++++ #include <asm/pgtable.h>
                    #include <asm/time.h>
          +         #include <asm/pgalloc.h>
                    #include <asm/irq.h>
                    #include <asm/delay.h>
                    #include <asm/fixmap.h>
@@@@@@@@@@@@@@@@@@@@@ -140,9 -140,9 -140,9 -140,9 -140,9 -140,9 -140,9 -140,9 -140,9 -140,9 -139,7 -140,9 -140,9 -140,9 -140,9 -140,9 -140,9 -140,9 -140,9 -141,9 +141,9 @@@@@@@@@@@@@@@@@@@@@ unsigned paravirt_patch_default(u8 type
                                /* If the operation is a nop, then nop the callsite */
                                ret = paravirt_patch_nop();
                        else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
          -                      type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret))
          +                      type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
          +                      type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
          +                      type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
                                /* If operation requires a jmp, then jmp */
                                ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
                        else
@@@@@@@@@@@@@@@@@@@@@ -193,9 -193,9 -193,9 -193,9 -193,9 -193,9 -193,9 -193,9 -193,9 -193,9 -190,7 -193,9 -193,9 -193,9 -193,9 -193,9 -193,9 -193,9 -193,9 -194,9 +194,9 @@@@@@@@@@@@@@@@@@@@@ static void native_flush_tlb_single(uns
                    
                    /* These are in entry.S */
                    extern void native_iret(void);
          -         extern void native_irq_enable_syscall_ret(void);
          +         extern void native_irq_enable_sysexit(void);
          +         extern void native_usergs_sysret32(void);
          +         extern void native_usergs_sysret64(void);
                    
                    static int __init print_banner(void)
                    {
@@@@@@@@@@@@@@@@@@@@@ -285,7 -285,7 -285,7 -285,7 -285,7 -285,7 -285,7 -285,7 -285,7 -285,7 -280,7 -285,7 -285,7 -285,7 -285,7 -285,7 -285,7 -285,7 -285,7 -286,7 +286,7 @@@@@@@@@@@@@@@@@@@@@ struct pv_time_ops pv_time_ops = 
                        .get_wallclock = native_get_wallclock,
                        .set_wallclock = native_set_wallclock,
                        .sched_clock = native_sched_clock,
          -             .get_cpu_khz = native_calculate_cpu_khz,
          +             .get_tsc_khz = native_calibrate_tsc,
                    };
                    
                    struct pv_irq_ops pv_irq_ops = {
                        .irq_enable = native_irq_enable,
                        .safe_halt = native_safe_halt,
                        .halt = native_halt,
          +         #ifdef CONFIG_X86_64
          +             .adjust_exception_frame = paravirt_nop,
          +         #endif
                    };
                    
                    struct pv_cpu_ops pv_cpu_ops = {
                        .store_idt = native_store_idt,
                        .store_tr = native_store_tr,
                        .load_tls = native_load_tls,
          +         #ifdef CONFIG_X86_64
          +             .load_gs_index = native_load_gs_index,
          +         #endif
                        .write_ldt_entry = native_write_ldt_entry,
                        .write_gdt_entry = native_write_gdt_entry,
                        .write_idt_entry = native_write_idt_entry,
                        .load_sp0 = native_load_sp0,
                    
          -             .irq_enable_syscall_ret = native_irq_enable_syscall_ret,
          +         #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
          +             .irq_enable_sysexit = native_irq_enable_sysexit,
          +         #endif
          +         #ifdef CONFIG_X86_64
          +         #ifdef CONFIG_IA32_EMULATION
          +             .usergs_sysret32 = native_usergs_sysret32,
          +         #endif
          +             .usergs_sysret64 = native_usergs_sysret64,
          +         #endif
                        .iret = native_iret,
                        .swapgs = native_swapgs,
                    
                    struct pv_apic_ops pv_apic_ops = {
                    #ifdef CONFIG_X86_LOCAL_APIC
                        .apic_write = native_apic_write,
-- -----------------    .apic_write_atomic = native_apic_write_atomic,
                        .apic_read = native_apic_read,
                        .setup_boot_clock = setup_boot_APIC_clock,
                        .setup_secondary_clock = setup_secondary_APIC_clock,
@@@@@@@@@@@@@@@@@@@@@ -373,6 -373,6 -372,6 -373,6 -373,6 -373,6 -373,6 -373,6 -373,6 -373,6 -354,6 -373,6 -373,6 -373,6 -373,6 -373,6 -373,6 -373,6 -373,6 -374,9 +373,9 @@@@@@@@@@@@@@@@@@@@@ struct pv_mmu_ops pv_mmu_ops = 
                    #ifndef CONFIG_X86_64
                        .pagetable_setup_start = native_pagetable_setup_start,
                        .pagetable_setup_done = native_pagetable_setup_done,
+++++++++++++++++++ #else
+++++++++++++++++++     .pagetable_setup_start = paravirt_nop,
+++++++++++++++++++     .pagetable_setup_done = paravirt_nop,
                    #endif
                    
                        .read_cr2 = native_read_cr2,
                        .flush_tlb_single = native_flush_tlb_single,
                        .flush_tlb_others = native_flush_tlb_others,
                    
          +             .pgd_alloc = __paravirt_pgd_alloc,
          +             .pgd_free = paravirt_nop,
          +         
                        .alloc_pte = paravirt_nop,
                        .alloc_pmd = paravirt_nop,
                        .alloc_pmd_clone = paravirt_nop,
                        .pte_update = paravirt_nop,
                        .pte_update_defer = paravirt_nop,
                    
          +             .ptep_modify_prot_start = __ptep_modify_prot_start,
          +             .ptep_modify_prot_commit = __ptep_modify_prot_commit,
          +         
                    #ifdef CONFIG_HIGHPTE
                        .kmap_atomic_pte = kmap_atomic,
                    #endif
                    #endif /* PAGETABLE_LEVELS >= 3 */
                    
                        .pte_val = native_pte_val,
          +             .pte_flags = native_pte_val,
                        .pgd_val = native_pgd_val,
                    
                        .make_pte = native_make_pte,
                                .enter = paravirt_nop,
                                .leave = paravirt_nop,
                        },
          +         
          +             .set_fixmap = native_set_fixmap,
                    };
                    
                    EXPORT_SYMBOL_GPL(pv_time_ops);
index 8467ec2320f178584afb402cfb2b48859a3eb48e,8467ec2320f178584afb402cfb2b48859a3eb48e,8467ec2320f178584afb402cfb2b48859a3eb48e,702714bd1511a755dfa84a4f63c1d5d08c1be5da,8467ec2320f178584afb402cfb2b48859a3eb48e,8467ec2320f178584afb402cfb2b48859a3eb48e,8467ec2320f178584afb402cfb2b48859a3eb48e,d12945de0565f7c993cd55a95df00c78ba78995c,8467ec2320f178584afb402cfb2b48859a3eb48e,8467ec2320f178584afb402cfb2b48859a3eb48e,dc00a1331acef73c204ed0979d8527450af5246f,8467ec2320f178584afb402cfb2b48859a3eb48e,8467ec2320f178584afb402cfb2b48859a3eb48e,8467ec2320f178584afb402cfb2b48859a3eb48e,8467ec2320f178584afb402cfb2b48859a3eb48e,8467ec2320f178584afb402cfb2b48859a3eb48e,8467ec2320f178584afb402cfb2b48859a3eb48e,8467ec2320f178584afb402cfb2b48859a3eb48e,8467ec2320f178584afb402cfb2b48859a3eb48e,8467ec2320f178584afb402cfb2b48859a3eb48e..a4213c00dffc355a6a8b8958c85399256a81be40
                    
                    #include <asm/proto.h>
                    #include <asm/dma.h>
------- ------------#include <asm/gart.h>
+++++++ ++++++++++++#include <asm/iommu.h>
                    #include <asm/calgary.h>
          +         #include <asm/amd_iommu.h>
                    
--- ----------------int forbid_dac __read_mostly;
--- ----------------EXPORT_SYMBOL(forbid_dac);
+++ ++++++++++++++++static int forbid_dac __read_mostly;
                    
                    const struct dma_mapping_ops *dma_ops;
                    EXPORT_SYMBOL(dma_ops);
@@@@@@@@@@@@@@@@@@@@@ -75,17 -75,17 -75,17 -74,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -74,13 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 +74,17 @@@@@@@@@@@@@@@@@@@@@ early_param("dma32_size", parse_dma32_s
                    void __init dma32_reserve_bootmem(void)
                    {
                        unsigned long size, align;
          -             if (end_pfn <= MAX_DMA32_PFN)
          +             if (max_pfn <= MAX_DMA32_PFN)
                                return;
                    
          +             /*
          +              * check aperture_64.c allocate_aperture() for reason about
          +              * using 512M as goal
          +              */
                        align = 64ULL<<20;
                        size = round_up(dma32_bootmem_size, align);
                        dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
          -                                      __pa(MAX_DMA_ADDRESS));
          +                                      512ULL<<20);
                        if (dma32_bootmem_ptr)
                                dma32_bootmem_size = size;
                        else
                    }
                    static void __init dma32_free_bootmem(void)
                    {
          -             int node;
                    
          -             if (end_pfn <= MAX_DMA32_PFN)
          +             if (max_pfn <= MAX_DMA32_PFN)
                                return;
                    
                        if (!dma32_bootmem_ptr)
                                return;
                    
          -             for_each_online_node(node)
          -                     free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr),
          -                                       dma32_bootmem_size);
          +             free_bootmem(__pa(dma32_bootmem_ptr), dma32_bootmem_size);
                    
                        dma32_bootmem_ptr = NULL;
                        dma32_bootmem_size = 0;
@@@@@@@@@@@@@@@@@@@@@ -114,21 -114,21 -114,21 -113,21 -114,21 -114,21 -114,21 -114,15 -114,21 -114,21 -112,19 -114,21 -114,21 -114,21 -114,21 -114,21 -114,21 -114,21 -114,21 -114,21 +113,15 @@@@@@@@@@@@@@@@@@@@@ void __init pci_iommu_alloc(void
                         * The order of these functions is important for
                         * fall-back/fail-over reasons
                         */
------- ------------#ifdef CONFIG_GART_IOMMU
                        gart_iommu_hole_init();
------- ------------#endif
                    
------- ------------#ifdef CONFIG_CALGARY_IOMMU
                        detect_calgary();
------- ------------#endif
                    
                        detect_intel_iommu();
                    
          -         #ifdef CONFIG_SWIOTLB
          +             amd_iommu_detect();
          +         
------- -- ---------#ifdef CONFIG_SWIOTLB
                        pci_swiotlb_init();
------- ------------#endif
                    }
                    #endif
                    
@@@@@@@@@@@@@@@@@@@@@ -184,9 -184,9 -184,9 -183,9 -184,9 -184,9 -184,9 -178,7 -184,9 -184,9 -180,9 -184,9 -184,9 -184,9 -184,9 -184,9 -184,9 -184,9 -184,9 -184,9 +177,7 @@@@@@@@@@@@@@@@@@@@@ static __init int iommu_setup(char *p
                                        swiotlb = 1;
                    #endif
                    
------- ------------#ifdef CONFIG_GART_IOMMU
                                gart_parse_options(p);
------- ------------#endif
                    
                    #ifdef CONFIG_CALGARY_IOMMU
                                if (!strncmp(p, "calgary", 7))
@@@@@@@@@@@@@@@@@@@@@ -361,7 -361,7 -361,7 -360,7 -361,7 -361,7 -361,7 -353,7 -361,7 -361,7 -357,7 -361,7 -361,7 -361,7 -361,7 -361,7 -361,7 -361,7 -361,7 -361,7 +352,7 @@@@@@@@@@@@@@@@@@@@@ int dma_supported(struct device *dev, u
                    EXPORT_SYMBOL(dma_supported);
                    
                    /* Allocate DMA memory on node near device */
          -         noinline struct page *
          +         static noinline struct page *
                    dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
                    {
                        int node;
@@@@@@@@@@@@@@@@@@@@@ -500,17 -500,17 -500,17 -499,17 -500,17 -500,17 -500,17 -492,13 -500,17 -500,17 -496,15 -500,17 -500,17 -500,17 -500,17 -500,17 -500,17 -500,17 -500,17 -500,17 +491,13 @@@@@@@@@@@@@@@@@@@@@ EXPORT_SYMBOL(dma_free_coherent)
                    
                    static int __init pci_iommu_init(void)
                    {
------- ------------#ifdef CONFIG_CALGARY_IOMMU
                        calgary_iommu_init();
------- ------------#endif
                    
                        intel_iommu_init();
                    
          -         #ifdef CONFIG_GART_IOMMU
          +             amd_iommu_init();
          +         
------- -- ---------#ifdef CONFIG_GART_IOMMU
                        gart_iommu_init();
------- ------------#endif
                    
                        no_iommu_init();
                        return 0;
index c3fe78406d1897b40e380f0528cf4db832d0a191,c3fe78406d1897b40e380f0528cf4db832d0a191,c3fe78406d1897b40e380f0528cf4db832d0a191,c3fe78406d1897b40e380f0528cf4db832d0a191,c3fe78406d1897b40e380f0528cf4db832d0a191,c3fe78406d1897b40e380f0528cf4db832d0a191,c3fe78406d1897b40e380f0528cf4db832d0a191,949ca985deb061edd7ad05ba1cca2504002afb07,c3fe78406d1897b40e380f0528cf4db832d0a191,c3fe78406d1897b40e380f0528cf4db832d0a191,aa8ec928caa85ff43d4da3a0fe3e7d570b57f08a,c3fe78406d1897b40e380f0528cf4db832d0a191,c3fe78406d1897b40e380f0528cf4db832d0a191,c3fe78406d1897b40e380f0528cf4db832d0a191,c3fe78406d1897b40e380f0528cf4db832d0a191,c3fe78406d1897b40e380f0528cf4db832d0a191,d0d18db5d2a4809c72126d665d92843267147297,c3fe78406d1897b40e380f0528cf4db832d0a191,c3fe78406d1897b40e380f0528cf4db832d0a191,c3fe78406d1897b40e380f0528cf4db832d0a191..be60961f8695681b2b96dcda25b694933b5f0d47
                    #include <asm/mtrr.h>
                    #include <asm/pgtable.h>
                    #include <asm/proto.h>
+++++++ ++++++++++++#include <asm/iommu.h>
                    #include <asm/gart.h>
                    #include <asm/cacheflush.h>
                    #include <asm/swiotlb.h>
@@@@@@@@@@@@@@@@@@@@@ -104,6 -104,6 -104,6 -104,6 -104,6 -104,6 -104,6 -105,6 -104,6 -104,6 -104,7 -104,6 -104,6 -104,6 -104,6 -104,6 -104,6 -104,6 -104,6 -104,6 +105,6 @@@@@@@@@@@@@@@@@@@@@ static unsigned long alloc_iommu(struc
                                                          size, base_index, boundary_size, 0);
                        }
                        if (offset != -1) {
          -                     set_bit_string(iommu_gart_bitmap, offset, size);
                                next_bit = offset+size;
                                if (next_bit >= iommu_pages) {
                                        next_bit = 0;
@@@@@@@@@@@@@@@@@@@@@ -533,8 -533,8 -533,8 -533,8 -533,8 -533,8 -533,8 -534,8 -533,8 -533,8 -534,8 -533,8 -533,8 -533,8 -533,8 -533,8 -533,8 -533,8 -533,8 -533,8 +534,8 @@@@@@@@@@@@@@@@@@@@@ static __init unsigned read_aperture(st
                        unsigned aper_size = 0, aper_base_32, aper_order;
                        u64 aper_base;
                    
          -             pci_read_config_dword(dev, 0x94, &aper_base_32);
          -             pci_read_config_dword(dev, 0x90, &aper_order);
          +             pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32);
          +             pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order);
                        aper_order = (aper_order >> 1) & 7;
                    
                        aper_base = aper_base_32 & 0x7fff;
                        return aper_base;
                    }
                    
          +         static void enable_gart_translations(void)
          +         {
          +             int i;
          +         
          +             for (i = 0; i < num_k8_northbridges; i++) {
          +                     struct pci_dev *dev = k8_northbridges[i];
          +         
          +                     enable_gart_translation(dev, __pa(agp_gatt_table));
          +             }
          +         }
          +         
          +         /*
          +          * If fix_up_north_bridges is set, the north bridges have to be fixed up on
          +          * resume in the same way as they are handled in gart_iommu_hole_init().
          +          */
          +         static bool fix_up_north_bridges;
          +         static u32 aperture_order;
          +         static u32 aperture_alloc;
          +         
          +         void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
          +         {
          +             fix_up_north_bridges = true;
          +             aperture_order = aper_order;
          +             aperture_alloc = aper_alloc;
          +         }
          +         
                    static int gart_resume(struct sys_device *dev)
                    {
          +             printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n");
          +         
          +             if (fix_up_north_bridges) {
          +                     int i;
          +         
          +                     printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n");
          +         
          +                     for (i = 0; i < num_k8_northbridges; i++) {
          +                             struct pci_dev *dev = k8_northbridges[i];
          +         
          +                             /*
          +                              * Don't enable translations just yet.  That is the next
          +                              * step.  Restore the pre-suspend aperture settings.
          +                              */
          +                             pci_write_config_dword(dev, AMD64_GARTAPERTURECTL,
          +                                                     aperture_order << 1);
          +                             pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE,
          +                                                     aperture_alloc >> 25);
          +                     }
          +             }
          +         
          +             enable_gart_translations();
          +         
                        return 0;
                    }
                    
                    static int gart_suspend(struct sys_device *dev, pm_message_t state)
                    {
          -             return -EINVAL;
          +             return 0;
                    }
                    
                    static struct sysdev_class gart_sysdev_class = {
@@@@@@@@@@@@@@@@@@@@@ -630,7 -630,7 -630,7 -630,7 -630,7 -630,7 -630,7 -631,6 -630,7 -630,7 -582,6 -630,7 -630,7 -630,7 -630,7 -630,7 -630,6 -630,7 -630,7 -630,7 +631,7 @@@@@@@@@@@@@@@@@@@@@ static __init int init_k8_gatt(struct a
                        struct pci_dev *dev;
                        void *gatt;
                        int i, error;
       +  +     +       unsigned long start_pfn, end_pfn;
                    
                        printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
                        aper_size = aper_base = info->aper_size = 0;
                        memset(gatt, 0, gatt_size);
                        agp_gatt_table = gatt;
                    
          -             for (i = 0; i < num_k8_northbridges; i++) {
          -                     u32 gatt_reg;
          -                     u32 ctl;
          -         
          -                     dev = k8_northbridges[i];
          -                     gatt_reg = __pa(gatt) >> 12;
          -                     gatt_reg <<= 4;
          -                     pci_write_config_dword(dev, 0x98, gatt_reg);
          -                     pci_read_config_dword(dev, 0x90, &ctl);
          -         
          -                     ctl |= 1;
          -                     ctl &= ~((1<<4) | (1<<5));
          -         
          -                     pci_write_config_dword(dev, 0x90, ctl);
          -             }
          +             enable_gart_translations();
                    
                        error = sysdev_class_register(&gart_sysdev_class);
                        if (!error)
                                error = sysdev_register(&device_gart);
                        if (error)
                                panic("Could not register gart_sysdev -- would corrupt data on next suspend");
          +         
                        flush_gart();
                    
                        printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
                               aper_base, aper_size>>10);
       +  +     +   
       +  +     +       /* need to map that range */
       +  +     +       end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
       +  +     +       if (end_pfn > max_low_pfn_mapped) {
       +  +     +               start_pfn = (aper_base>>PAGE_SHIFT);
       +  +     +               init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
       +  +     +       }
                        return 0;
                    
                     nommu:
                                u32 ctl;
                    
                                dev = k8_northbridges[i];
          -                     pci_read_config_dword(dev, 0x90, &ctl);
          +                     pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
                    
          -                     ctl &= ~1;
          +                     ctl &= ~GARTEN;
                    
          -                     pci_write_config_dword(dev, 0x90, ctl);
          +                     pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
                        }
                    }
                    
@@@@@@@@@@@@@@@@@@@@@ -759,10 -759,10 -759,10 -759,10 -759,10 -759,10 -759,10 -752,10 -759,10 -759,10 -716,10 -759,10 -759,10 -759,10 -759,10 -759,10 -751,10 -759,10 -759,10 -759,10 +760,10 @@@@@@@@@@@@@@@@@@@@@ void __init gart_iommu_init(void
                                return;
                    
                        if (no_iommu ||
          -                 (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
          +                 (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
                            !gart_iommu_aperture ||
                            (no_agp && init_k8_gatt(&info) < 0)) {
          -                     if (end_pfn > MAX_DMA32_PFN) {
          +                     if (max_pfn > MAX_DMA32_PFN) {
                                        printk(KERN_WARNING "More than 4GB of memory "
                                                          "but GART IOMMU not available.\n"
                                               KERN_WARNING "falling back to iommu=soft.\n");
                        wbinvd();
                    
                        /*
          -              * Try to workaround a bug (thanks to BenH)
          +              * Try to workaround a bug (thanks to BenH):
                         * Set unmapped entries to a scratch page instead of 0.
                         * Any prefetches that hit unmapped entries won't get an bus abort
          -              * then.
          +              * then. (P2P bridge may be prefetching on DMA reads).
                         */
                        scratch = get_zeroed_page(GFP_KERNEL);
                        if (!scratch)
index 4d629c62f4f8fbb993a49c0d5f12d7e88bbd94b5,4d629c62f4f8fbb993a49c0d5f12d7e88bbd94b5,4d629c62f4f8fbb993a49c0d5f12d7e88bbd94b5,9f94bb1c81170f8b047aaf7782291e13919fc15a,4d629c62f4f8fbb993a49c0d5f12d7e88bbd94b5,74f2d196adb4e70d14b357ac979b43fea9f7f81e,4061d63aabe74bb12f32f5d162c564240ae8d8a5,4061d63aabe74bb12f32f5d162c564240ae8d8a5,7dceea947232f5739b7844c10ba9c2b22f336e95,4d629c62f4f8fbb993a49c0d5f12d7e88bbd94b5,ba370dc8685bf8f9bf31e9100f78c4e800ea755e,4d629c62f4f8fbb993a49c0d5f12d7e88bbd94b5,4061d63aabe74bb12f32f5d162c564240ae8d8a5,7dceea947232f5739b7844c10ba9c2b22f336e95,4d629c62f4f8fbb993a49c0d5f12d7e88bbd94b5,4d629c62f4f8fbb993a49c0d5f12d7e88bbd94b5,4061d63aabe74bb12f32f5d162c564240ae8d8a5,4d629c62f4f8fbb993a49c0d5f12d7e88bbd94b5,4d629c62f4f8fbb993a49c0d5f12d7e88bbd94b5,4d629c62f4f8fbb993a49c0d5f12d7e88bbd94b5..7fc4d5b0a6a0f99a4d1d9c4df685a5a4d3a135bb
                    #include <linux/sched.h>
                    #include <linux/module.h>
                    #include <linux/pm.h>
          +         #include <linux/clockchips.h>
      +++ + ++  +   #include <asm/system.h>
      +++ + ++  +   
      +++ + ++  +   unsigned long idle_halt;
      +++ + ++  +   EXPORT_SYMBOL(idle_halt);
      +++ + ++  +   unsigned long idle_nomwait;
      +++ + ++  +   EXPORT_SYMBOL(idle_nomwait);
                    
                    struct kmem_cache *task_xstate_cachep;
+++++ ++++++++++++++static int force_mwait __cpuinitdata;
                    
                    int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
                    {
@@@@@@@@@@@@@@@@@@@@@ -52,76 -52,76 -52,76 -52,76 -52,76 -53,76 -46,76 -46,76 -46,76 -52,76 -45,6 -52,76 -46,76 -46,76 -52,76 -52,76 -46,76 -52,76 -52,76 -52,76 +53,76 @@@@@@@@@@@@@@@@@@@@@ void arch_task_cache_init(void
                                                  SLAB_PANIC, NULL);
                    }
                    
          +         /*
          +          * Idle related variables and functions
          +          */
          +         unsigned long boot_option_idle_override = 0;
          +         EXPORT_SYMBOL(boot_option_idle_override);
          +         
          +         /*
          +          * Powermanagement idle function, if any..
          +          */
          +         void (*pm_idle)(void);
          +         EXPORT_SYMBOL(pm_idle);
          +         
          +         #ifdef CONFIG_X86_32
          +         /*
          +          * This halt magic was a workaround for ancient floppy DMA
          +          * wreckage. It should be safe to remove.
          +          */
          +         static int hlt_counter;
          +         void disable_hlt(void)
          +         {
          +             hlt_counter++;
          +         }
          +         EXPORT_SYMBOL(disable_hlt);
          +         
          +         void enable_hlt(void)
          +         {
          +             hlt_counter--;
          +         }
          +         EXPORT_SYMBOL(enable_hlt);
          +         
          +         static inline int hlt_use_halt(void)
          +         {
          +             return (!hlt_counter && boot_cpu_data.hlt_works_ok);
          +         }
          +         #else
          +         static inline int hlt_use_halt(void)
          +         {
          +             return 1;
          +         }
          +         #endif
          +         
          +         /*
          +          * We use this if we don't have any better
          +          * idle routine..
          +          */
          +         void default_idle(void)
          +         {
          +             if (hlt_use_halt()) {
          +                     current_thread_info()->status &= ~TS_POLLING;
          +                     /*
          +                      * TS_POLLING-cleared state must be visible before we
          +                      * test NEED_RESCHED:
          +                      */
          +                     smp_mb();
          +         
          +                     if (!need_resched())
          +                             safe_halt();    /* enables interrupts racelessly */
          +                     else
          +                             local_irq_enable();
          +                     current_thread_info()->status |= TS_POLLING;
          +             } else {
          +                     local_irq_enable();
          +                     /* loop is done by the caller */
          +                     cpu_relax();
          +             }
          +         }
          +         #ifdef CONFIG_APM_MODULE
          +         EXPORT_SYMBOL(default_idle);
          +         #endif
          +         
                    static void do_nothing(void *unused)
                    {
                    }
@@@@@@@@@@@@@@@@@@@@@ -138,7 -138,7 -138,7 -138,7 -138,7 -139,7 -132,7 -132,7 -132,7 -138,7 -61,7 -138,7 -132,7 -132,7 -138,7 -138,7 -132,7 -138,7 -138,7 -138,7 +139,7 @@@@@@@@@@@@@@@@@@@@@ void cpu_idle_wait(void
                    {
                        smp_mb();
                        /* kick all the CPUs so that they exit out of pm_idle */
      --  - -   -       smp_call_function(do_nothing, NULL, 0, 1);
      ++  + +   +       smp_call_function(do_nothing, NULL, 1);
                    }
                    EXPORT_SYMBOL_GPL(cpu_idle_wait);
                    
                     *
                     * idle=mwait overrides this decision and forces the usage of mwait.
                     */
+++ ++++++++++++++++static int __cpuinitdata force_mwait;
          +         
          +         #define MWAIT_INFO                  0x05
          +         #define MWAIT_ECX_EXTENDED_INFO             0x01
          +         #define MWAIT_EDX_C1                        0xf0
          +         
                    static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
                    {
          +             u32 eax, ebx, ecx, edx;
          +         
                        if (force_mwait)
                                return 1;
                    
          -             if (c->x86_vendor == X86_VENDOR_AMD) {
          -                     switch(c->x86) {
          -                     case 0x10:
          -                     case 0x11:
          -                             return 0;
          -                     }
          -             }
          +             if (c->cpuid_level < MWAIT_INFO)
          +                     return 0;
          +         
          +             cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx);
          +             /* Check, whether EDX has extended info about MWAIT */
          +             if (!(ecx & MWAIT_ECX_EXTENDED_INFO))
          +                     return 1;
          +         
          +             /*
          +              * edx enumeratios MONITOR/MWAIT extensions. Check, whether
          +              * C1  supports MWAIT
          +              */
          +             return (edx & MWAIT_EDX_C1);
          +         }
          +         
          +         /*
          +          * Check for AMD CPUs, which have potentially C1E support
          +          */
          +         static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
          +         {
          +             if (c->x86_vendor != X86_VENDOR_AMD)
          +                     return 0;
          +         
          +             if (c->x86 < 0x0F)
          +                     return 0;
          +         
          +             /* Family 0x0f models < rev F do not have C1E */
          +             if (c->x86 == 0x0f && c->x86_model < 0x40)
          +                     return 0;
          +         
                        return 1;
                    }
                    
          -         void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
          +         /*
          +          * C1E aware idle routine. We check for C1E active in the interrupt
          +          * pending message MSR. If we detect C1E, then we handle it the same
          +          * way as C3 power states (local apic timer and TSC stop)
          +          */
          +         static void c1e_idle(void)
                    {
          -             static int selected;
          +             static cpumask_t c1e_mask = CPU_MASK_NONE;
          +             static int c1e_detected;
                    
          -             if (selected)
          +             if (need_resched())
                                return;
          +         
          +             if (!c1e_detected) {
          +                     u32 lo, hi;
          +         
          +                     rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
          +                     if (lo & K8_INTP_C1E_ACTIVE_MASK) {
          +                             c1e_detected = 1;
          +                             mark_tsc_unstable("TSC halt in C1E");
          +                             printk(KERN_INFO "System has C1E enabled\n");
          +                     }
          +             }
          +         
          +             if (c1e_detected) {
          +                     int cpu = smp_processor_id();
          +         
          +                     if (!cpu_isset(cpu, c1e_mask)) {
          +                             cpu_set(cpu, c1e_mask);
          +                             /*
          +                              * Force broadcast so ACPI can not interfere. Needs
          +                              * to run with interrupts enabled as it uses
          +                              * smp_function_call.
          +                              */
          +                             local_irq_enable();
          +                             clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
          +                                                &cpu);
          +                             printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
          +                                    cpu);
          +                             local_irq_disable();
          +                     }
          +                     clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
          +         
          +                     default_idle();
          +         
          +                     /*
          +                      * The switch back from broadcast mode needs to be
          +                      * called with interrupts disabled.
          +                      */
          +                      local_irq_disable();
          +                      clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
          +                      local_irq_enable();
          +             } else
          +                     default_idle();
          +         }
          +         
          +         void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
          +         {
                    #ifdef CONFIG_X86_SMP
                        if (pm_idle == poll_idle && smp_num_siblings > 1) {
                                printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
                                        " performance may degrade.\n");
                        }
                    #endif
          +             if (pm_idle)
          +                     return;
          +         
                        if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
                                /*
          -                      * Skip, if setup has overridden idle.
                                 * One CPU supports mwait => All CPUs supports mwait
                                 */
          -                     if (!pm_idle) {
          -                             printk(KERN_INFO "using mwait in idle threads.\n");
          -                             pm_idle = mwait_idle;
          -                     }
          -             }
          -             selected = 1;
          +                     printk(KERN_INFO "using mwait in idle threads.\n");
          +                     pm_idle = mwait_idle;
          +             } else if (check_c1e_idle(c)) {
          +                     printk(KERN_INFO "using C1E aware idle routine\n");
          +                     pm_idle = c1e_idle;
          +             } else
          +                     pm_idle = default_idle;
                    }
                    
                    static int __init idle_setup(char *str)
                    {
+++ ++++++++++++++++    if (!str)
+++ ++++++++++++++++            return -EINVAL;
+++ ++++++++++++++++
                        if (!strcmp(str, "poll")) {
                                printk("using polling idle threads.\n");
                                pm_idle = poll_idle;
                        } else if (!strcmp(str, "mwait"))
                                force_mwait = 1;
      --- - --  -       else
      +++ + ++  +       else if (!strcmp(str, "halt")) {
      +++ + ++  +               /*
      +++ + ++  +                * When the boot option of idle=halt is added, halt is
      +++ + ++  +                * forced to be used for CPU idle. In such case CPU C2/C3
      +++ + ++  +                * won't be used again.
      +++ + ++  +                * To continue to load the CPU idle driver, don't touch
      +++ + ++  +                * the boot_option_idle_override.
      +++ + ++  +                */
      +++ + ++  +               pm_idle = default_idle;
      +++ + ++  +               idle_halt = 1;
      +++ + ++  +               return 0;
      +++ + ++  +       } else if (!strcmp(str, "nomwait")) {
      +++ + ++  +               /*
      +++ + ++  +                * If the boot option of "idle=nomwait" is added,
      +++ + ++  +                * it means that mwait will be disabled for CPU C2/C3
      +++ + ++  +                * states. In such case it won't touch the variable
      +++ + ++  +                * of boot_option_idle_override.
      +++ + ++  +                */
      +++ + ++  +               idle_nomwait = 1;
      +++ + ++  +               return 0;
      +++ + ++  +       } else
                                return -1;
                    
                        boot_option_idle_override = 1;
diff --combined arch/x86/kernel/setup.c
index 531b55b8e81a1de1827eac5691d5f8aef1d8d10c,531b55b8e81a1de1827eac5691d5f8aef1d8d10c,531b55b8e81a1de1827eac5691d5f8aef1d8d10c,4064616cfa8516abc81fa5d9d9597ea6cecea35e,531b55b8e81a1de1827eac5691d5f8aef1d8d10c,531b55b8e81a1de1827eac5691d5f8aef1d8d10c,36c540d4ac4b6164dbd02d6b23951ea1315f134f,e5d208934bfc153789aeedac922d40e3b04c879d,36c540d4ac4b6164dbd02d6b23951ea1315f134f,531b55b8e81a1de1827eac5691d5f8aef1d8d10c,6f80b852a1961a6b496bc2404c5194769557d97a,531b55b8e81a1de1827eac5691d5f8aef1d8d10c,36c540d4ac4b6164dbd02d6b23951ea1315f134f,36c540d4ac4b6164dbd02d6b23951ea1315f134f,531b55b8e81a1de1827eac5691d5f8aef1d8d10c,531b55b8e81a1de1827eac5691d5f8aef1d8d10c,987b6fde3a99c51b5acfd6c0acbf69b866337bb2,531b55b8e81a1de1827eac5691d5f8aef1d8d10c,531b55b8e81a1de1827eac5691d5f8aef1d8d10c,c9010f82141d93d16af0839838a3d18bb578d994..ec952aa5394a403a42308de114a370a656968a34
          -         #include <linux/kernel.h>
          +         /*
          +          *  Copyright (C) 1995  Linus Torvalds
          +          *
          +          *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
          +          *
          +          *  Memory region support
          +          *  David Parsons <orc@pell.chi.il.us>, July-August 1999
          +          *
          +          *  Added E820 sanitization routine (removes overlapping memory regions);
          +          *  Brian Moyle <bmoyle@mvista.com>, February 2001
          +          *
          +          * Moved CPU detection code to cpu/${cpu}.c
          +          *    Patrick Mochel <mochel@osdl.org>, March 2002
          +          *
          +          *  Provisions for empty E820 memory regions (reported by certain BIOSes).
          +          *  Alex Achenbach <xela@slit.de>, December 2002.
          +          *
          +          */
          +         
          +         /*
          +          * This file handles the architecture-dependent parts of initialization
          +          */
          +         
          +         #include <linux/sched.h>
          +         #include <linux/mm.h>
          +         #include <linux/mmzone.h>
          +         #include <linux/screen_info.h>
          +         #include <linux/ioport.h>
          +         #include <linux/acpi.h>
          +         #include <linux/apm_bios.h>
          +         #include <linux/initrd.h>
          +         #include <linux/bootmem.h>
          +         #include <linux/seq_file.h>
          +         #include <linux/console.h>
          +         #include <linux/mca.h>
          +         #include <linux/root_dev.h>
          +         #include <linux/highmem.h>
                    #include <linux/module.h>
          +         #include <linux/efi.h>
                    #include <linux/init.h>
          -         #include <linux/bootmem.h>
          +         #include <linux/edd.h>
          +         #include <linux/iscsi_ibft.h>
          +         #include <linux/nodemask.h>
          +         #include <linux/kexec.h>
          +         #include <linux/dmi.h>
          +         #include <linux/pfn.h>
          +         #include <linux/pci.h>
          +         #include <asm/pci-direct.h>
          +         #include <linux/init_ohci1394_dma.h>
          +         #include <linux/kvm_para.h>
          +         
          +         #include <linux/errno.h>
          +         #include <linux/kernel.h>
          +         #include <linux/stddef.h>
          +         #include <linux/unistd.h>
          +         #include <linux/ptrace.h>
          +         #include <linux/slab.h>
          +         #include <linux/user.h>
          +         #include <linux/delay.h>
--- ------ ---------#include <linux/highmem.h>
          +         
          +         #include <linux/kallsyms.h>
--- ------ ---------#include <linux/edd.h>
--- ------ ---------#include <linux/iscsi_ibft.h>
--- ------ ---------#include <linux/kexec.h>
          +         #include <linux/cpufreq.h>
          +         #include <linux/dma-mapping.h>
          +         #include <linux/ctype.h>
          +         #include <linux/uaccess.h>
          +         
                    #include <linux/percpu.h>
          -         #include <asm/smp.h>
          -         #include <asm/percpu.h>
          +         #include <linux/crash_dump.h>
          +         
          +         #include <video/edid.h>
          +         
          +         #include <asm/mtrr.h>
          +         #include <asm/apic.h>
          +         #include <asm/e820.h>
          +         #include <asm/mpspec.h>
          +         #include <asm/setup.h>
          +         #include <asm/arch_hooks.h>
          +         #include <asm/efi.h>
                    #include <asm/sections.h>
          +         #include <asm/dmi.h>
          +         #include <asm/io_apic.h>
          +         #include <asm/ist.h>
          +         #include <asm/vmi.h>
          +         #include <setup_arch.h>
          +         #include <asm/bios_ebda.h>
          +         #include <asm/cacheflush.h>
                    #include <asm/processor.h>
          -         #include <asm/setup.h>
          +         #include <asm/bugs.h>
          +         
          +         #include <asm/system.h>
          +         #include <asm/vsyscall.h>
          +         #include <asm/smp.h>
          +         #include <asm/desc.h>
          +         #include <asm/dma.h>
------- -- ---------#include <asm/gart.h>
+++++++ ++++++++++++#include <asm/iommu.h>
          +         #include <asm/mmu_context.h>
          +         #include <asm/proto.h>
          +         
          +         #include <mach_apic.h>
          +         #include <asm/paravirt.h>
          +         
          +         #include <asm/percpu.h>
--- ------ ---------#include <asm/sections.h>
                    #include <asm/topology.h>
          -         #include <asm/mpspec.h>
                    #include <asm/apicdef.h>
          +         #ifdef CONFIG_X86_64
          +         #include <asm/numa_64.h>
          +         #endif
                    
          -         #ifdef CONFIG_X86_LOCAL_APIC
          -         unsigned int num_processors;
          -         unsigned disabled_cpus __cpuinitdata;
          -         /* Processor that is doing the boot up */
          -         unsigned int boot_cpu_physical_apicid = -1U;
          -         EXPORT_SYMBOL(boot_cpu_physical_apicid);
          +         #ifndef ARCH_SETUP
          +         #define ARCH_SETUP
          +         #endif
                    
          -         DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
          -         EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
          +         #ifndef CONFIG_DEBUG_BOOT_PARAMS
          +         struct boot_params __initdata boot_params;
          +         #else
          +         struct boot_params boot_params;
          +         #endif
                    
          -         /* Bitmask of physically existing CPUs */
          -         physid_mask_t phys_cpu_present_map;
          +         /*
          +          * Machine setup..
          +          */
          +         static struct resource data_resource = {
          +             .name   = "Kernel data",
          +             .start  = 0,
          +             .end    = 0,
          +             .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
          +         };
          +         
          +         static struct resource code_resource = {
          +             .name   = "Kernel code",
          +             .start  = 0,
          +             .end    = 0,
          +             .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
          +         };
          +         
          +         static struct resource bss_resource = {
          +             .name   = "Kernel bss",
          +             .start  = 0,
          +             .end    = 0,
          +             .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
          +         };
          +         
          +         
          +         #ifdef CONFIG_X86_32
          +         /* This value is set up by the early boot code to point to the value
          +            immediately after the boot time page tables.  It contains a *physical*
          +            address, and must not be in the .bss segment! */
          +         unsigned long init_pg_tables_start __initdata = ~0UL;
          +         unsigned long init_pg_tables_end __initdata = ~0UL;
          +         
          +         static struct resource video_ram_resource = {
          +             .name   = "Video RAM area",
          +             .start  = 0xa0000,
          +             .end    = 0xbffff,
          +             .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
          +         };
          +         
          +         /* cpu data as detected by the assembly code in head.S */
          +         struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1};
          +         /* common cpu data for all cpus */
          +         struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1};
          +         EXPORT_SYMBOL(boot_cpu_data);
          +         static void set_mca_bus(int x)
          +         {
          +         #ifdef CONFIG_MCA
          +             MCA_bus = x;
          +         #endif
          +         }
          +         
          +         unsigned int def_to_bigsmp;
          +         
          +         /* for MCA, but anyone else can use it if they want */
          +         unsigned int machine_id;
          +         unsigned int machine_submodel_id;
          +         unsigned int BIOS_revision;
          +         
          +         struct apm_info apm_info;
          +         EXPORT_SYMBOL(apm_info);
          +         
          +         #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
          +             defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
          +         struct ist_info ist_info;
          +         EXPORT_SYMBOL(ist_info);
          +         #else
          +         struct ist_info ist_info;
                    #endif
                    
          -         #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
          +         #else
          +         struct cpuinfo_x86 boot_cpu_data __read_mostly;
          +         EXPORT_SYMBOL(boot_cpu_data);
          +         #endif
          +         
          +         
          +         #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
          +         unsigned long mmu_cr4_features;
          +         #else
          +         unsigned long mmu_cr4_features = X86_CR4_PAE;
          +         #endif
          +         
          +         /* Boot loader ID as an integer, for the benefit of proc_dointvec */
          +         int bootloader_type;
          +         
                    /*
          -          * Copy data used in early init routines from the initial arrays to the
          -          * per cpu data areas.  These arrays then become expendable and the
          -          * *_early_ptr's are zeroed indicating that the static arrays are gone.
          +          * Early DMI memory
                     */
          -         static void __init setup_per_cpu_maps(void)
          +         int dmi_alloc_index;
          +         char dmi_alloc_data[DMI_MAX_DATA];
          +         
          +         /*
          +          * Setup options
          +          */
          +         struct screen_info screen_info;
          +         EXPORT_SYMBOL(screen_info);
          +         struct edid_info edid_info;
          +         EXPORT_SYMBOL_GPL(edid_info);
          +         
          +         extern int root_mountflags;
          +         
          +         unsigned long saved_video_mode;
          +         
          +         #define RAMDISK_IMAGE_START_MASK    0x07FF
          +         #define RAMDISK_PROMPT_FLAG         0x8000
          +         #define RAMDISK_LOAD_FLAG           0x4000
          +         
          +         static char __initdata command_line[COMMAND_LINE_SIZE];
          +         
          +         #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
          +         struct edd edd;
          +         #ifdef CONFIG_EDD_MODULE
          +         EXPORT_SYMBOL(edd);
          +         #endif
          +         /**
          +          * copy_edd() - Copy the BIOS EDD information
          +          *              from boot_params into a safe place.
          +          *
          +          */
          +         static inline void copy_edd(void)
          +         {
          +              memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
          +                 sizeof(edd.mbr_signature));
          +              memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
          +              edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
          +              edd.edd_info_nr = boot_params.eddbuf_entries;
          +         }
          +         #else
          +         static inline void copy_edd(void)
          +         {
          +         }
          +         #endif
          +         
          +         #ifdef CONFIG_BLK_DEV_INITRD
          +         
          +         #ifdef CONFIG_X86_32
          +         
          +         #define MAX_MAP_CHUNK       (NR_FIX_BTMAPS << PAGE_SHIFT)
          +         static void __init relocate_initrd(void)
                    {
          -             int cpu;
                    
          -             for_each_possible_cpu(cpu) {
          -                     per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu];
          -                     per_cpu(x86_bios_cpu_apicid, cpu) =
          -                                                     x86_bios_cpu_apicid_init[cpu];
          -         #ifdef CONFIG_NUMA
          -                     per_cpu(x86_cpu_to_node_map, cpu) =
          -                                                     x86_cpu_to_node_map_init[cpu];
          +             u64 ramdisk_image = boot_params.hdr.ramdisk_image;
          +             u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
          +             u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
          +             u64 ramdisk_here;
          +             unsigned long slop, clen, mapaddr;
          +             char *p, *q;
          +         
          +             /* We need to move the initrd down into lowmem */
          +             ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size,
          +                                              PAGE_SIZE);
          +         
          +             if (ramdisk_here == -1ULL)
          +                     panic("Cannot find place for new RAMDISK of size %lld\n",
          +                              ramdisk_size);
          +         
          +             /* Note: this includes all the lowmem currently occupied by
          +                the initrd, we rely on that fact to keep the data intact. */
          +             reserve_early(ramdisk_here, ramdisk_here + ramdisk_size,
          +                              "NEW RAMDISK");
          +             initrd_start = ramdisk_here + PAGE_OFFSET;
          +             initrd_end   = initrd_start + ramdisk_size;
          +             printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
          +                              ramdisk_here, ramdisk_here + ramdisk_size);
          +         
          +             q = (char *)initrd_start;
          +         
          +             /* Copy any lowmem portion of the initrd */
          +             if (ramdisk_image < end_of_lowmem) {
          +                     clen = end_of_lowmem - ramdisk_image;
          +                     p = (char *)__va(ramdisk_image);
          +                     memcpy(q, p, clen);
          +                     q += clen;
          +                     ramdisk_image += clen;
          +                     ramdisk_size  -= clen;
          +             }
          +         
          +             /* Copy the highmem portion of the initrd */
          +             while (ramdisk_size) {
          +                     slop = ramdisk_image & ~PAGE_MASK;
          +                     clen = ramdisk_size;
          +                     if (clen > MAX_MAP_CHUNK-slop)
          +                             clen = MAX_MAP_CHUNK-slop;
          +                     mapaddr = ramdisk_image & PAGE_MASK;
          +                     p = early_ioremap(mapaddr, clen+slop);
          +                     memcpy(q, p+slop, clen);
          +                     early_iounmap(p, clen+slop);
          +                     q += clen;
          +                     ramdisk_image += clen;
          +                     ramdisk_size  -= clen;
          +             }
          +             /* high pages is not converted by early_res_to_bootmem */
          +             ramdisk_image = boot_params.hdr.ramdisk_image;
          +             ramdisk_size  = boot_params.hdr.ramdisk_size;
          +             printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to"
          +                     " %08llx - %08llx\n",
          +                     ramdisk_image, ramdisk_image + ramdisk_size - 1,
          +                     ramdisk_here, ramdisk_here + ramdisk_size - 1);
          +         }
                    #endif
          +         
          +         static void __init reserve_initrd(void)
          +         {
          +             u64 ramdisk_image = boot_params.hdr.ramdisk_image;
          +             u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
          +             u64 ramdisk_end   = ramdisk_image + ramdisk_size;
          +             u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
          +         
          +             if (!boot_params.hdr.type_of_loader ||
          +                 !ramdisk_image || !ramdisk_size)
          +                     return;         /* No initrd provided by bootloader */
          +         
          +             initrd_start = 0;
          +         
          +             if (ramdisk_size >= (end_of_lowmem>>1)) {
          +                     free_early(ramdisk_image, ramdisk_end);
          +                     printk(KERN_ERR "initrd too large to handle, "
          +                            "disabling initrd\n");
          +                     return;
          +             }
          +         
          +             printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image,
          +                             ramdisk_end);
          +         
          +         
          +             if (ramdisk_end <= end_of_lowmem) {
          +                     /* All in lowmem, easy case */
          +                     /*
          +                      * don't need to reserve again, already reserved early
          +                      * in i386_start_kernel
          +                      */
          +                     initrd_start = ramdisk_image + PAGE_OFFSET;
          +                     initrd_end = initrd_start + ramdisk_size;
          +                     return;
                        }
                    
          -             /* indicate the early static arrays will soon be gone */
          -             x86_cpu_to_apicid_early_ptr = NULL;
          -             x86_bios_cpu_apicid_early_ptr = NULL;
          -         #ifdef CONFIG_NUMA
          -             x86_cpu_to_node_map_early_ptr = NULL;
          +         #ifdef CONFIG_X86_32
          +             relocate_initrd();
          +         #else
          +             printk(KERN_ERR "initrd extends beyond end of memory "
          +                    "(0x%08llx > 0x%08llx)\ndisabling initrd\n",
          +                    ramdisk_end, end_of_lowmem);
          +             initrd_start = 0;
                    #endif
          +             free_early(ramdisk_image, ramdisk_end);
                    }
          +         #else
          +         static void __init reserve_initrd(void)
          +         {
          +         }
          +         #endif /* CONFIG_BLK_DEV_INITRD */
          +         
          +         static void __init parse_setup_data(void)
          +         {
          +             struct setup_data *data;
          +             u64 pa_data;
          +         
          +             if (boot_params.hdr.version < 0x0209)
          +                     return;
          +             pa_data = boot_params.hdr.setup_data;
          +             while (pa_data) {
          +                     data = early_ioremap(pa_data, PAGE_SIZE);
          +                     switch (data->type) {
          +                     case SETUP_E820_EXT:
          +                             parse_e820_ext(data, pa_data);
          +                             break;
          +                     default:
          +                             break;
          +                     }
          +                     pa_data = data->next;
          +                     early_iounmap(data, PAGE_SIZE);
          +             }
          +         }
          +         
          +         static void __init e820_reserve_setup_data(void)
          +         {
          +             struct setup_data *data;
          +             u64 pa_data;
          +             int found = 0;
          +         
          +             if (boot_params.hdr.version < 0x0209)
          +                     return;
          +             pa_data = boot_params.hdr.setup_data;
          +             while (pa_data) {
          +                     data = early_ioremap(pa_data, sizeof(*data));
          +                     e820_update_range(pa_data, sizeof(*data)+data->len,
          +                              E820_RAM, E820_RESERVED_KERN);
          +                     found = 1;
          +                     pa_data = data->next;
          +                     early_iounmap(data, sizeof(*data));
          +             }
          +             if (!found)
          +                     return;
                    
          -         #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
          -         cpumask_t *cpumask_of_cpu_map __read_mostly;
          -         EXPORT_SYMBOL(cpumask_of_cpu_map);
          +             sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
          +             memcpy(&e820_saved, &e820, sizeof(struct e820map));
          +             printk(KERN_INFO "extended physical RAM map:\n");
          +             e820_print_map("reserve setup_data");
          +         }
                    
          -         /* requires nr_cpu_ids to be initialized */
          -         static void __init setup_cpumask_of_cpu(void)
          +         static void __init reserve_early_setup_data(void)
                    {
          -             int i;
          +             struct setup_data *data;
          +             u64 pa_data;
          +             char buf[32];
          +         
          +             if (boot_params.hdr.version < 0x0209)
          +                     return;
          +             pa_data = boot_params.hdr.setup_data;
          +             while (pa_data) {
          +                     data = early_ioremap(pa_data, sizeof(*data));
          +                     sprintf(buf, "setup data %x", data->type);
          +                     reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
          +                     pa_data = data->next;
          +                     early_iounmap(data, sizeof(*data));
          +             }
          +         }
          +         
          +         /*
          +          * --------- Crashkernel reservation ------------------------------
          +          */
          +         
          +         #ifdef CONFIG_KEXEC
          +         
          +         /**
          +          * Reserve @size bytes of crashkernel memory at any suitable offset.
          +          *
          +          * @size: Size of the crashkernel memory to reserve.
          +          * Returns the base address on success, and -1ULL on failure.
          +          */
          +         unsigned long long find_and_reserve_crashkernel(unsigned long long size)
          +         {
          +             const unsigned long long alignment = 16<<20;    /* 16M */
          +             unsigned long long start = 0LL;
          +         
          +             while (1) {
          +                     int ret;
          +         
          +                     start = find_e820_area(start, ULONG_MAX, size, alignment);
          +                     if (start == -1ULL)
          +                             return start;
          +         
          +                     /* try to reserve it */
          +                     ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
          +                     if (ret >= 0)
          +                             return start;
                    
          -             /* alloc_bootmem zeroes memory */
          -             cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
          -             for (i = 0; i < nr_cpu_ids; i++)
          -                     cpu_set(i, cpumask_of_cpu_map[i]);
          +                     start += alignment;
          +             }
          +         }
          +         
          +         static inline unsigned long long get_total_mem(void)
          +         {
          +             unsigned long long total;
          +         
          +             total = max_low_pfn - min_low_pfn;
          +         #ifdef CONFIG_HIGHMEM
          +             total += highend_pfn - highstart_pfn;
          +         #endif
          +         
          +             return total << PAGE_SHIFT;
          +         }
          +         
          +         static void __init reserve_crashkernel(void)
          +         {
          +             unsigned long long total_mem;
          +             unsigned long long crash_size, crash_base;
          +             int ret;
          +         
          +             total_mem = get_total_mem();
          +         
          +             ret = parse_crashkernel(boot_command_line, total_mem,
          +                             &crash_size, &crash_base);
          +             if (ret != 0 || crash_size <= 0)
          +                     return;
          +         
          +             /* 0 means: find the address automatically */
          +             if (crash_base <= 0) {
          +                     crash_base = find_and_reserve_crashkernel(crash_size);
          +                     if (crash_base == -1ULL) {
          +                             pr_info("crashkernel reservation failed. "
          +                                     "No suitable area found.\n");
          +                             return;
          +                     }
          +             } else {
          +                     ret = reserve_bootmem_generic(crash_base, crash_size,
          +                                             BOOTMEM_EXCLUSIVE);
          +                     if (ret < 0) {
          +                             pr_info("crashkernel reservation failed - "
          +                                     "memory is in use\n");
          +                             return;
          +                     }
          +             }
          +         
          +             printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
          +                             "for crashkernel (System RAM: %ldMB)\n",
          +                             (unsigned long)(crash_size >> 20),
          +                             (unsigned long)(crash_base >> 20),
          +                             (unsigned long)(total_mem >> 20));
          +         
          +             crashk_res.start = crash_base;
          +             crashk_res.end   = crash_base + crash_size - 1;
          +             insert_resource(&iomem_resource, &crashk_res);
                    }
                    #else
          -         static inline void setup_cpumask_of_cpu(void) { }
          +         static void __init reserve_crashkernel(void)
          +         {
          +         }
                    #endif
                    
          -         #ifdef CONFIG_X86_32
          -         /*
          -          * Great future not-so-futuristic plan: make i386 and x86_64 do it
          -          * the same way
          +         static struct resource standard_io_resources[] = {
          +             { .name = "dma1", .start = 0x00, .end = 0x1f,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "pic1", .start = 0x20, .end = 0x21,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "timer0", .start = 0x40, .end = 0x43,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "timer1", .start = 0x50, .end = 0x53,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "keyboard", .start = 0x60, .end = 0x60,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "keyboard", .start = 0x64, .end = 0x64,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "dma page reg", .start = 0x80, .end = 0x8f,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "pic2", .start = 0xa0, .end = 0xa1,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "dma2", .start = 0xc0, .end = 0xdf,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "fpu", .start = 0xf0, .end = 0xff,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO }
          +         };
          +         
          +         static void __init reserve_standard_io_resources(void)
          +         {
          +             int i;
          +         
          +             /* request I/O space for devices used on all i[345]86 PCs */
          +             for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
          +                     request_resource(&ioport_resource, &standard_io_resources[i]);
          +         
          +         }
          +         
          +         #ifdef CONFIG_PROC_VMCORE
          +         /* elfcorehdr= specifies the location of elf core header
          +          * stored by the crashed kernel. This option will be passed
          +          * by kexec loader to the capture kernel.
                     */
          -         unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
          -         EXPORT_SYMBOL(__per_cpu_offset);
          +         static int __init setup_elfcorehdr(char *arg)
          +         {
          +             char *end;
          +             if (!arg)
          +                     return -EINVAL;
          +             elfcorehdr_addr = memparse(arg, &end);
          +             return end > arg ? 0 : -EINVAL;
          +         }
          +         early_param("elfcorehdr", setup_elfcorehdr);
                    #endif
                    
+++ ++++++++++++++++static struct x86_quirks default_x86_quirks __initdata;
+++ ++++++++++++++++
+++ ++++++++++++++++struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
+++ ++++++++++++++++
          +         /*
          +          * Determine if we were loaded by an EFI loader.  If so, then we have also been
          +          * passed the efi memmap, systab, etc., so we should use these data structures
          +          * for initialization.  Note, the efi init code path is determined by the
          +          * global efi_enabled. This allows the same kernel image to be used on existing
          +          * systems (with a traditional BIOS) as well as on EFI systems.
          +          */
                    /*
          -          * Great future plan:
          -          * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
          -          * Always point %gs to its beginning
          +          * setup_arch - architecture-specific boot-time initializations
          +          *
          +          * Note: On x86_64, fixmaps are ready for use even before this is called.
                     */
          -         void __init setup_per_cpu_areas(void)
          +         
          +         void __init setup_arch(char **cmdline_p)
                    {
          -             int i, highest_cpu = 0;
          -             unsigned long size;
          +         #ifdef CONFIG_X86_32
          +             memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
       +  +             visws_early_detect();
          +             pre_setup_arch_hook();
          +             early_cpu_init();
          +         #else
          +             printk(KERN_INFO "Command line: %s\n", boot_command_line);
          +         #endif
                    
          -         #ifdef CONFIG_HOTPLUG_CPU
          -             prefill_possible_map();
          +             early_ioremap_init();
          +         
          +             ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
          +             screen_info = boot_params.screen_info;
          +             edid_info = boot_params.edid_info;
          +         #ifdef CONFIG_X86_32
          +             apm_info.bios = boot_params.apm_bios_info;
          +             ist_info = boot_params.ist_info;
          +             if (boot_params.sys_desc_table.length != 0) {
          +                     set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
          +                     machine_id = boot_params.sys_desc_table.table[0];
          +                     machine_submodel_id = boot_params.sys_desc_table.table[1];
          +                     BIOS_revision = boot_params.sys_desc_table.table[2];
          +             }
          +         #endif
          +             saved_video_mode = boot_params.hdr.vid_mode;
          +             bootloader_type = boot_params.hdr.type_of_loader;
          +         
          +         #ifdef CONFIG_BLK_DEV_RAM
          +             rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
          +             rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
          +             rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
          +         #endif
          +         #ifdef CONFIG_EFI
          +             if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
          +         #ifdef CONFIG_X86_32
          +                          "EL32",
          +         #else
          +                          "EL64",
                    #endif
          +              4)) {
          +                     efi_enabled = 1;
          +                     efi_reserve_early();
          +             }
          +         #endif
          +         
          +             ARCH_SETUP
          +         
          +             setup_memory_map();
          +             parse_setup_data();
          +             /* update the e820_saved too */
          +             e820_reserve_setup_data();
                    
          -             /* Copy section for each CPU (we discard the original) */
          -             size = PERCPU_ENOUGH_ROOM;
          -             printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
          -                               size);
          +             copy_edd();
                    
          -             for_each_possible_cpu(i) {
          -                     char *ptr;
          -         #ifndef CONFIG_NEED_MULTIPLE_NODES
          -                     ptr = alloc_bootmem_pages(size);
          +             if (!boot_params.hdr.root_flags)
          +                     root_mountflags &= ~MS_RDONLY;
          +             init_mm.start_code = (unsigned long) _text;
          +             init_mm.end_code = (unsigned long) _etext;
          +             init_mm.end_data = (unsigned long) _edata;
          +         #ifdef CONFIG_X86_32
          +             init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
                    #else
          -                     int node = early_cpu_to_node(i);
          -                     if (!node_online(node) || !NODE_DATA(node)) {
          -                             ptr = alloc_bootmem_pages(size);
          -                             printk(KERN_INFO
          -                                    "cpu %d has no node or node-local memory\n", i);
          -                     }
          -                     else
          -                             ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
          +             init_mm.brk = (unsigned long) &_end;
                    #endif
          -                     if (!ptr)
          -                             panic("Cannot allocate cpu data for CPU %d\n", i);
          +         
          +             code_resource.start = virt_to_phys(_text);
          +             code_resource.end = virt_to_phys(_etext)-1;
          +             data_resource.start = virt_to_phys(_etext);
          +             data_resource.end = virt_to_phys(_edata)-1;
          +             bss_resource.start = virt_to_phys(&__bss_start);
          +             bss_resource.end = virt_to_phys(&__bss_stop)-1;
          +         
                    #ifdef CONFIG_X86_64
          -                     cpu_pda(i)->data_offset = ptr - __per_cpu_start;
          +             early_cpu_init();
          +         #endif
          +             strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
          +             *cmdline_p = command_line;
          +         
          +             parse_early_param();
          +         
          +             /* after early param, so could get panic from serial */
          +             reserve_early_setup_data();
          +         
          +             if (acpi_mps_check()) {
          +         #ifdef CONFIG_X86_LOCAL_APIC
          +                     disable_apic = 1;
          +         #endif
          +                     clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
          +             }
          +         
      +++ + ++  +   #ifdef CONFIG_PCI
      +++ + ++  +       if (pci_early_dump_regs)
      +++ + ++  +               early_dump_pci_devices();
      +++ + ++  +   #endif
      +++ + ++  +   
          +             finish_e820_parsing();
          +         
          +         #ifdef CONFIG_X86_32
          +             probe_roms();
          +         #endif
          +         
          +             /* after parse_early_param, so could debug it */
          +             insert_resource(&iomem_resource, &code_resource);
          +             insert_resource(&iomem_resource, &data_resource);
          +             insert_resource(&iomem_resource, &bss_resource);
          +         
          +             if (efi_enabled)
          +                     efi_init();
          +         
          +         #ifdef CONFIG_X86_32
          +             if (ppro_with_ram_bug()) {
          +                     e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
          +                                       E820_RESERVED);
          +                     sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
          +                     printk(KERN_INFO "fixed physical RAM map:\n");
          +                     e820_print_map("bad_ppro");
          +             }
          +         #else
          +             early_gart_iommu_check();
          +         #endif
          +         
          +             /*
          +              * partially used pages are not usable - thus
          +              * we are rounding upwards:
          +              */
          +             max_pfn = e820_end_of_ram_pfn();
          +         
          +             /* preallocate 4k for mptable mpc */
          +             early_reserve_e820_mpc_new();
          +             /* update e820 for memory not covered by WB MTRRs */
          +             mtrr_bp_init();
          +             if (mtrr_trim_uncached_memory(max_pfn))
          +                     max_pfn = e820_end_of_ram_pfn();
          +         
          +         #ifdef CONFIG_X86_32
          +             /* max_low_pfn get updated here */
          +             find_low_pfn_range();
                    #else
          -                     __per_cpu_offset[i] = ptr - __per_cpu_start;
          +             num_physpages = max_pfn;
          +         
          +             check_efer();
          +         
          +             /* How many end-of-memory variables you have, grandma! */
          +             /* need this before calling reserve_initrd */
          +             if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
          +                     max_low_pfn = e820_end_of_low_ram_pfn();
          +             else
          +                     max_low_pfn = max_pfn;
          +         
          +             high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
                    #endif
          -                     memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
                    
          -                     highest_cpu = i;
          +             /* max_pfn_mapped is updated here */
          +             max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
          +             max_pfn_mapped = max_low_pfn_mapped;
          +         
          +         #ifdef CONFIG_X86_64
          +             if (max_pfn > max_low_pfn) {
          +                     max_pfn_mapped = init_memory_mapping(1UL<<32,
          +                                                          max_pfn<<PAGE_SHIFT);
          +                     /* can we preseve max_low_pfn ?*/
          +                     max_low_pfn = max_pfn;
                        }
          +         #endif
                    
          -             nr_cpu_ids = highest_cpu + 1;
          -             printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids);
          +             /*
          +              * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
          +              */
                    
          -             /* Setup percpu data maps */
          -             setup_per_cpu_maps();
          +         #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
          +             if (init_ohci1394_dma_early)
          +                     init_ohci1394_dma_on_all_controllers();
          +         #endif
                    
          -             /* Setup cpumask_of_cpu map */
          -             setup_cpumask_of_cpu();
          -         }
          +             reserve_initrd();
          +         
          +         #ifdef CONFIG_X86_64
          +             vsmp_init();
          +         #endif
          +         
          +             dmi_scan_machine();
          +         
          +             io_delay_init();
          +         
          +             /*
          +              * Parse the ACPI tables for possible boot-time SMP configuration.
          +              */
          +             acpi_boot_table_init();
          +         
          +         #ifdef CONFIG_ACPI_NUMA
          +             /*
          +              * Parse SRAT to discover nodes.
          +              */
          +             acpi_numa_init();
          +         #endif
          +         
          +             initmem_init(0, max_pfn);
          +         
          +         #ifdef CONFIG_X86_64
          +             dma32_reserve_bootmem();
          +         #endif
                    
          +         #ifdef CONFIG_ACPI_SLEEP
          +             /*
          +              * Reserve low memory region for sleep support.
          +              */
          +             acpi_reserve_bootmem();
                    #endif
--- --- -- ----- ---#ifdef CONFIG_X86_NUMAQ
--- --- -- ----- ---    /*
--- --- -- ----- ---     * need to check online nodes num, call it
--- --- -- ----- ---     * here before time_init/tsc_init
--- --- -- ----- ---     */
--- --- -- ----- ---    numaq_tsc_disable();
--- --- -- ----- ---#endif
--- --- -- ----- ---
          +         #ifdef CONFIG_X86_FIND_SMP_CONFIG
          +             /*
          +              * Find and reserve possible boot-time SMP configuration:
          +              */
          +             find_smp_config();
          +         #endif
          +             reserve_crashkernel();
          +         
          +             reserve_ibft_region();
          +         
          +         #ifdef CONFIG_KVM_CLOCK
          +             kvmclock_init();
          +         #endif
          +         
          +         #if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
          +             /*
          +              * Must be after max_low_pfn is determined, and before kernel
          +              * pagetables are setup.
          +              */
          +             vmi_init();
          +         #endif
          +         
+++++++++++++++++++     paravirt_pagetable_setup_start(swapper_pg_dir);
          +             paging_init();
+++++++++++++++++++     paravirt_pagetable_setup_done(swapper_pg_dir);
+++++++++++++++++++     paravirt_post_allocator_init();
          +         
          +         #ifdef CONFIG_X86_64
          +             map_vsyscall();
          +         #endif
          +         
          +         #ifdef CONFIG_X86_GENERICARCH
          +             generic_apic_probe();
          +         #endif
          +         
          +             early_quirks();
          +         
          +             /*
          +              * Read APIC and some other early information from ACPI tables.
          +              */
          +             acpi_boot_init();
          +         
          +         #if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
          +             /*
          +              * get boot-time SMP configuration:
          +              */
          +             if (smp_found_config)
          +                     get_smp_config();
          +         #endif
          +         
          +             prefill_possible_map();
          +         #ifdef CONFIG_X86_64
          +             init_cpu_to_node();
          +         #endif
          +         
          +             init_apic_mappings();
          +             ioapic_init_mappings();
          +         
          +         #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
          +             if (def_to_bigsmp)
          +                     printk(KERN_WARNING "More than 8 CPUs detected and "
          +                             "CONFIG_X86_PC cannot handle it.\nUse "
          +                             "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
          +         #endif
          +             kvm_guest_init();
          +         
          +             e820_reserve_resources();
          +             e820_mark_nosave_regions(max_low_pfn);
          +         
          +         #ifdef CONFIG_X86_32
          +             request_resource(&iomem_resource, &video_ram_resource);
          +         #endif
          +             reserve_standard_io_resources();
          +         
          +             e820_setup_gap();
          +         
          +         #ifdef CONFIG_VT
          +         #if defined(CONFIG_VGA_CONSOLE)
          +             if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
          +                     conswitchp = &vga_con;
          +         #elif defined(CONFIG_DUMMY_CONSOLE)
          +             conswitchp = &dummy_con;
          +         #endif
          +         #endif
          +         }
index d92373630963f980fb5471a0d96d903b5d4b18bf,d92373630963f980fb5471a0d96d903b5d4b18bf,d92373630963f980fb5471a0d96d903b5d4b18bf,d633d801f85850e22070f59ac96702c612d72a23,d92373630963f980fb5471a0d96d903b5d4b18bf,d92373630963f980fb5471a0d96d903b5d4b18bf,d92373630963f980fb5471a0d96d903b5d4b18bf,d92373630963f980fb5471a0d96d903b5d4b18bf,d92373630963f980fb5471a0d96d903b5d4b18bf,d92373630963f980fb5471a0d96d903b5d4b18bf,d92373630963f980fb5471a0d96d903b5d4b18bf,d92373630963f980fb5471a0d96d903b5d4b18bf,d92373630963f980fb5471a0d96d903b5d4b18bf,d92373630963f980fb5471a0d96d903b5d4b18bf,d92373630963f980fb5471a0d96d903b5d4b18bf,295b5f5c9389d2d0a75bbe4e8e15d7c48638e313,d92373630963f980fb5471a0d96d903b5d4b18bf,d92373630963f980fb5471a0d96d903b5d4b18bf,d92373630963f980fb5471a0d96d903b5d4b18bf,d92373630963f980fb5471a0d96d903b5d4b18bf..07faaa5109cb78003c4b2e5bbbad15da7136ca34
@@@@@@@@@@@@@@@@@@@@@ -212,7 -212,7 -212,7 -212,7 -212,7 -212,7 -212,7 -212,7 -212,7 -212,7 -212,7 -212,7 -212,7 -212,7 -212,7 -212,7 -212,7 -212,7 -212,7 -212,7 +212,7 @@@@@@@@@@@@@@@@@@@@@ asmlinkage unsigned long sys_sigreturn(
                    
                    badframe:
                        if (show_unhandled_signals && printk_ratelimit()) {
--- ----------------            printk(KERN_INFO "%s%s[%d] bad frame in sigreturn frame:"
+++ ++++++++++++++++            printk("%s%s[%d] bad frame in sigreturn frame:"
                                        "%p ip:%lx sp:%lx oeax:%lx",
                                    task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
                                    current->comm, task_pid_nr(current), frame, regs->ip,
@@@@@@@@@@@@@@@@@@@@@ -657,12 -657,12 -657,12 -657,12 -657,12 -657,12 -657,12 -657,12 -657,12 -657,12 -657,12 -657,12 -657,12 -657,12 -657,12 -657,6 -657,12 -657,12 -657,12 -657,12 +657,6 @@@@@@@@@@@@@@@@@@@@@ static void do_signal(struct pt_regs *r
                    void
                    do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
                    {
--------------- ----    /* Pending single-step? */
--------------- ----    if (thread_info_flags & _TIF_SINGLESTEP) {
--------------- ----            regs->flags |= X86_EFLAGS_TF;
--------------- ----            clear_thread_flag(TIF_SINGLESTEP);
--------------- ----    }
--------------- ----
                        /* deal with pending signal delivery */
                        if (thread_info_flags & _TIF_SIGPENDING)
                                do_signal(regs);
index 27456574f070eb4b22c1aca6e95feee5949bdf7a,687376ab07e82ece4ab1eeb609d738d76245d226,f251f5c38823118a03b126dab96f5912406e436a,a9ca7dadc85215f988fc68a75722833dbeb05f1d,687376ab07e82ece4ab1eeb609d738d76245d226,687376ab07e82ece4ab1eeb609d738d76245d226,f35c2d8016ac412c1c0433bf4382829c1d2cef31,f35c2d8016ac412c1c0433bf4382829c1d2cef31,687376ab07e82ece4ab1eeb609d738d76245d226,687376ab07e82ece4ab1eeb609d738d76245d226,56078d61c79315847ee3a2e761184846b9e1345f,687376ab07e82ece4ab1eeb609d738d76245d226,f35c2d8016ac412c1c0433bf4382829c1d2cef31,687376ab07e82ece4ab1eeb609d738d76245d226,687376ab07e82ece4ab1eeb609d738d76245d226,687376ab07e82ece4ab1eeb609d738d76245d226,f35c2d8016ac412c1c0433bf4382829c1d2cef31,687376ab07e82ece4ab1eeb609d738d76245d226,687376ab07e82ece4ab1eeb609d738d76245d226,1deb3b624a793a762f4a5561e0a32540fd2764ca..27640196eb7ccadea18f712cc065aab3800e21d0
                    #include <asm/pgtable.h>
                    #include <asm/tlbflush.h>
                    #include <asm/mtrr.h>
          -         #include <asm/nmi.h>
                    #include <asm/vmi.h>
                    #include <asm/genapic.h>
                    #include <linux/mc146818rtc.h>
                    #include <mach_wakecpu.h>
                    #include <smpboot_hooks.h>
                    
          -         /*
          -          * FIXME: For x86_64, those are defined in other files. But moving them here,
          -          * would make the setup areas dependent on smp, which is a loss. When we
          -          * integrate apic between arches, we can probably do a better job, but
          -          * right now, they'll stay here -- glommer
          -          */
          -         
          -         /* which logical CPU number maps to which CPU (physical APIC ID) */
          -         u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
          -                             { [0 ... NR_CPUS-1] = BAD_APICID };
          -         void *x86_cpu_to_apicid_early_ptr;
          -         
          -         u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
          -                                     = { [0 ... NR_CPUS-1] = BAD_APICID };
          -         void *x86_bios_cpu_apicid_early_ptr;
          -         
                    #ifdef CONFIG_X86_32
                    u8 apicid_2_node[MAX_APICID];
                    static int low_mappings;
@@@@@@@@@@@@@@@@@@@@@ -181,12 -181,12 -181,12 -181,12 -181,12 -181,12 -181,12 -181,12 -181,12 -181,12 -198,13 -181,12 -181,12 -181,12 -181,12 -181,12 -181,12 -181,12 -181,12 -181,12 +181,12 @@@@@@@@@@@@@@@@@@@@@ static void map_cpu_to_logical_apicid(v
                        map_cpu_to_node(cpu, node);
                    }
                    
          -         static void unmap_cpu_to_logical_apicid(int cpu)
          +         void numa_remove_cpu(int cpu)
                    {
                        cpu_2_logical_apicid[cpu] = BAD_APICID;
                        unmap_cpu_to_node(cpu);
                    }
                    #else
          -         #define unmap_cpu_to_logical_apicid(cpu) do {} while (0)
                    #define map_cpu_to_logical_apicid()  do {} while (0)
                    #endif
                    
@@@@@@@@@@@@@@@@@@@@@ -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -345,19 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 +327,12 @@@@@@@@@@@@@@@@@@@@@ static void __cpuinit start_secondary(v
                         * lock helps us to not include this cpu in a currently in progress
                         * smp_call_function().
                         */
      --  - -   -       lock_ipi_call_lock();
          -         #ifdef CONFIG_X86_64
          -             spin_lock(&vector_lock);
          -         
          -             /* Setup the per cpu irq handling data structures */
          -             __setup_vector_irq(smp_processor_id());
          -             /*
          -              * Allow the master to continue.
          -              */
          -             spin_unlock(&vector_lock);
      ++  + +   +       ipi_call_lock_irq();
          +         #ifdef CONFIG_X86_IO_APIC
          +             setup_vector_irq(smp_processor_id());
                    #endif
                        cpu_set(smp_processor_id(), cpu_online_map);
      --  - -   -       unlock_ipi_call_lock();
      ++  + +   +       ipi_call_unlock_irq();
                        per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
                    
                        setup_secondary_clock();
                        cpu_idle();
                    }
                    
          -         #ifdef CONFIG_X86_32
          -         /*
          -          * Everything has been set up for the secondary
          -          * CPUs - they just need to reload everything
          -          * from the task structure
          -          * This function must not return.
          -          */
          -         void __devinit initialize_secondary(void)
          -         {
          -             /*
          -              * We don't actually need to load the full TSS,
          -              * basically just the stack pointer and the ip.
          -              */
          -         
          -             asm volatile(
          -                     "movl %0,%%esp\n\t"
          -                     "jmp *%1"
          -                     :
          -                     :"m" (current->thread.sp), "m" (current->thread.ip));
          -         }
          -         #endif
          -         
                    static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c)
                    {
          -         #ifdef CONFIG_X86_32
                        /*
                         * Mask B, Pentium, but not Pentium MMX
                         */
                    
                    valid_k7:
                        ;
          -         #endif
                    }
                    
                    static void __cpuinit smp_checks(void)
@@@@@@@@@@@@@@@@@@@@@ -506,6 -506,6 -506,6 -506,6 -506,6 -506,6 -506,6 -506,6 -506,6 -506,6 -555,23 -506,6 -506,6 -506,6 -506,6 -506,6 -506,6 -506,6 -506,6 -506,6 +506,6 @@@@@@@@@@@@@@@@@@@@@ cpumask_t cpu_coregroup_map(int cpu
                                return c->llc_shared_map;
                    }
                    
          -         #ifdef CONFIG_X86_32
          -         /*
          -          * We are called very early to get the low memory for the
          -          * SMP bootup trampoline page.
          -          */
          -         void __init smp_alloc_memory(void)
          -         {
          -             trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
          -             /*
          -              * Has to be in very low memory so we can execute
          -              * real-mode AP code.
          -              */
          -             if (__pa(trampoline_base) >= 0x9F000)
          -                     BUG();
          -         }
          -         #endif
          -         
                    static void impress_friends(void)
                    {
                        int cpu;
@@@@@@@@@@@@@@@@@@@@@ -546,8 -546,8 -546,8 -546,8 -546,8 -546,8 -546,8 -546,8 -546,8 -546,8 -612,8 -546,8 -546,8 -546,8 -546,8 -546,8 -546,8 -546,8 -546,8 -546,8 +546,8 @@@@@@@@@@@@@@@@@@@@@ static inline void __inquire_remote_api
                                        printk(KERN_CONT
                                               "a previous APIC delivery may have failed\n");
                    
-- -----------------            apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
-- -----------------            apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
++ +++++++++++++++++            apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
++ +++++++++++++++++            apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
                    
                                timeout = 0;
                                do {
@@@@@@@@@@@@@@@@@@@@@ -579,11 -579,11 -579,11 -579,11 -579,11 -579,11 -579,11 -579,11 -579,11 -579,11 -645,11 -579,11 -579,11 -579,11 -579,11 -579,11 -579,11 -579,11 -579,11 -579,11 +579,11 @@@@@@@@@@@@@@@@@@@@@ wakeup_secondary_cpu(int logical_apicid
                        int maxlvt;
                    
                        /* Target chip */
-- -----------------    apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
++ +++++++++++++++++    apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
                    
                        /* Boot on the stack */
                        /* Kick the second */
-- -----------------    apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
++ +++++++++++++++++    apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
                    
                        Dprintk("Waiting for send to finish...\n");
                        send_status = safe_apic_wait_icr_idle();
                         * Give the other CPU some time to accept the IPI.
                         */
                        udelay(200);
-- -----------------    /*
-- -----------------     * Due to the Pentium erratum 3AP.
-- -----------------     */
                        maxlvt = lapic_get_maxlvt();
-- -----------------    if (maxlvt > 3) {
-- -----------------            apic_read_around(APIC_SPIV);
++ +++++++++++++++++    if (maxlvt > 3)                 /* Due to the Pentium erratum 3AP.  */
                                apic_write(APIC_ESR, 0);
-- -----------------    }
                        accept_status = (apic_read(APIC_ESR) & 0xEF);
                        Dprintk("NMI sent.\n");
                    
@@@@@@@@@@@@@@@@@@@@@ -625,12 -625,12 -620,14 -625,12 -625,12 -625,12 -625,12 -625,12 -625,12 -625,12 -691,12 -625,12 -625,12 -625,12 -625,12 -625,12 -625,12 -625,12 -625,12 -625,12 +620,14 @@@@@@@@@@@@@@@@@@@@@ wakeup_secondary_cpu(int phys_apicid, u
                                return send_status;
                        }
                    
++ +++++++++++++++++    maxlvt = lapic_get_maxlvt();
++ +++++++++++++++++
                        /*
                         * Be paranoid about clearing APIC errors.
                         */
                        if (APIC_INTEGRATED(apic_version[phys_apicid])) {
-- -----------------            apic_read_around(APIC_SPIV);
-- -----------------            apic_write(APIC_ESR, 0);
++ +++++++++++++++++            if (maxlvt > 3)         /* Due to the Pentium erratum 3AP.  */
++ +++++++++++++++++                    apic_write(APIC_ESR, 0);
                                apic_read(APIC_ESR);
                        }
                    
                        /*
                         * Turn INIT on target chip
                         */
-- -----------------    apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
++ +++++++++++++++++    apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
                    
                        /*
                         * Send IPI
                         */
-- -----------------    apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
-- -----------------                            | APIC_DM_INIT);
++ +++++++++++++++++    apic_write(APIC_ICR,
++ +++++++++++++++++               APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
                    
                        Dprintk("Waiting for send to finish...\n");
                        send_status = safe_apic_wait_icr_idle();
                        Dprintk("Deasserting INIT.\n");
                    
                        /* Target chip */
-- -----------------    apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
++ +++++++++++++++++    apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
                    
                        /* Send IPI */
-- -----------------    apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
++ +++++++++++++++++    apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
                    
                        Dprintk("Waiting for send to finish...\n");
                        send_status = safe_apic_wait_icr_idle();
                         * target processor state.
                         */
                        startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
          -         #ifdef CONFIG_X86_64
          -                              (unsigned long)init_rsp);
          -         #else
                                         (unsigned long)stack_start.sp);
          -         #endif
                    
                        /*
                         * Run STARTUP IPI loop.
                         */
                        Dprintk("#startup loops: %d.\n", num_starts);
                    
-- -----------------    maxlvt = lapic_get_maxlvt();
-- -----------------
                        for (j = 1; j <= num_starts; j++) {
                                Dprintk("Sending STARTUP #%d.\n", j);
-- -----------------            apic_read_around(APIC_SPIV);
-- -----------------            apic_write(APIC_ESR, 0);
++ +++++++++++++++++            if (maxlvt > 3)         /* Due to the Pentium erratum 3AP.  */
++ +++++++++++++++++                    apic_write(APIC_ESR, 0);
                                apic_read(APIC_ESR);
                                Dprintk("After apic_write.\n");
                    
                                 */
                    
                                /* Target chip */
-- -----------------            apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
++ +++++++++++++++++            apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
                    
                                /* Boot on the stack */
                                /* Kick the second */
-- -----------------            apic_write_around(APIC_ICR, APIC_DM_STARTUP
-- -----------------                                    | (start_eip >> 12));
++ +++++++++++++++++            apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12));
                    
                                /*
                                 * Give the other CPU some time to accept the IPI.
                                 * Give the other CPU some time to accept the IPI.
                                 */
                                udelay(200);
-- -----------------            /*
-- -----------------             * Due to the Pentium erratum 3AP.
-- -----------------             */
-- -----------------            if (maxlvt > 3) {
-- -----------------                    apic_read_around(APIC_SPIV);
++ +++++++++++++++++            if (maxlvt > 3)         /* Due to the Pentium erratum 3AP.  */
                                        apic_write(APIC_ESR, 0);
-- -----------------            }
                                accept_status = (apic_read(APIC_ESR) & 0xEF);
                                if (send_status || accept_status)
                                        break;
@@@@@@@@@@@@@@@@@@@@@ -762,45 -762,45 -751,45 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 -832,6 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 +751,45 @@@@@@@@@@@@@@@@@@@@@ static void __cpuinit do_fork_idle(stru
                        complete(&c_idle->done);
                    }
                    
---------- -------- static int __cpuinit get_local_pda(int cpu)
          +         #ifdef CONFIG_X86_64
          +         /*
          +          * Allocate node local memory for the AP pda.
          +          *
          +          * Must be called after the _cpu_pda pointer table is initialized.
          +          */
+++++++++++++++++++ int __cpuinit get_local_pda(int cpu)
          +         {
          +             struct x8664_pda *oldpda, *newpda;
          +             unsigned long size = sizeof(struct x8664_pda);
          +             int node = cpu_to_node(cpu);
          +         
          +             if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
          +                     return 0;
          +         
          +             oldpda = cpu_pda(cpu);
          +             newpda = kmalloc_node(size, GFP_ATOMIC, node);
          +             if (!newpda) {
          +                     printk(KERN_ERR "Could not allocate node local PDA "
          +                             "for CPU %d on node %d\n", cpu, node);
          +         
          +                     if (oldpda)
          +                             return 0;       /* have a usable pda */
          +                     else
          +                             return -1;
          +             }
          +         
          +             if (oldpda) {
          +                     memcpy(newpda, oldpda, size);
          +                     if (!after_bootmem)
          +                             free_bootmem((unsigned long)oldpda, size);
          +             }
          +         
          +             newpda->in_bootmem = 0;
          +             cpu_pda(cpu) = newpda;
          +             return 0;
          +         }
          +         #endif /* CONFIG_X86_64 */
          +         
                    static int __cpuinit do_boot_cpu(int apicid, int cpu)
                    /*
                     * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
                                .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
                        };
                        INIT_WORK(&c_idle.work, do_fork_idle);
          -         #ifdef CONFIG_X86_64
          -             /* allocate memory for gdts of secondary cpus. Hotplug is considered */
          -             if (!cpu_gdt_descr[cpu].address &&
          -                     !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
          -                     printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu);
          -                     return -1;
          -             }
                    
          +         #ifdef CONFIG_X86_64
                        /* Allocate node local memory for AP pdas */
          -             if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
          -                     struct x8664_pda *newpda, *pda;
          -                     int node = cpu_to_node(cpu);
          -                     pda = cpu_pda(cpu);
          -                     newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC,
          -                                           node);
          -                     if (newpda) {
          -                             memcpy(newpda, pda, sizeof(struct x8664_pda));
          -                             cpu_pda(cpu) = newpda;
          -                     } else
          -                             printk(KERN_ERR
          -                     "Could not allocate node local PDA for CPU %d on node %d\n",
          -                                     cpu, node);
          +             if (cpu > 0) {
          +                     boot_error = get_local_pda(cpu);
          +                     if (boot_error)
          +                             goto restore_state;
          +                             /* if can't get pda memory, can't start cpu */
                        }
                    #endif
                    
                    #ifdef CONFIG_X86_32
                        per_cpu(current_task, cpu) = c_idle.idle;
                        init_gdt(cpu);
          -             early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
          -             c_idle.idle->thread.ip = (unsigned long) start_secondary;
                        /* Stack for startup_32 can be just as for start_secondary onwards */
          -             stack_start.sp = (void *) c_idle.idle->thread.sp;
                        irq_ctx_init(cpu);
                    #else
                        cpu_pda(cpu)->pcurrent = c_idle.idle;
          -             init_rsp = c_idle.idle->thread.sp;
          -             load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread);
          -             initial_code = (unsigned long)start_secondary;
                        clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
                    #endif
          +             early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
          +             initial_code = (unsigned long)start_secondary;
          +             stack_start.sp = (void *) c_idle.idle->thread.sp;
                    
                        /* start_ip had better be page-aligned! */
                        start_ip = setup_trampoline();
                                                inquire_remote_apic(apicid);
                                }
                        }
          -         
          -             if (boot_error) {
          -                     /* Try to put things back the way they were before ... */
          -                     unmap_cpu_to_logical_apicid(cpu);
                    #ifdef CONFIG_X86_64
          -                     clear_node_cpumask(cpu); /* was set by numa_add_cpu */
          +         restore_state:
                    #endif
          +             if (boot_error) {
          +                     /* Try to put things back the way they were before ... */
          +                     numa_remove_cpu(cpu); /* was set by numa_add_cpu */
                                cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */
                                cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
          -                     cpu_clear(cpu, cpu_possible_map);
                                cpu_clear(cpu, cpu_present_map);
                                per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
                        }
                    {
                        cpu_present_map = cpumask_of_cpu(0);
                        cpu_possible_map = cpumask_of_cpu(0);
          -         #ifdef CONFIG_X86_32
                        smpboot_clear_io_apic_irqs();
          -         #endif
          +         
                        if (smp_found_config)
          -                     phys_cpu_present_map =
          -                                     physid_mask_of_physid(boot_cpu_physical_apicid);
          +                     physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
                        else
          -                     phys_cpu_present_map = physid_mask_of_physid(0);
          +                     physid_set_mask_of_physid(0, &phys_cpu_present_map);
                        map_cpu_to_logical_apicid();
                        cpu_set(0, per_cpu(cpu_sibling_map, 0));
                        cpu_set(0, per_cpu(cpu_core_map, 0));
                         * If SMP should be disabled, then really disable it!
                         */
                        if (!max_cpus) {
          -                     printk(KERN_INFO "SMP mode deactivated,"
          -                                      "forcing use of dummy APIC emulation.\n");
          +                     printk(KERN_INFO "SMP mode deactivated.\n");
                                smpboot_clear_io_apic();
          -         #ifdef CONFIG_X86_32
          +         
          +                     localise_nmi_watchdog();
          +         
                                connect_bsp_APIC();
          -         #endif
                                setup_local_APIC();
                                end_local_APIC_setup();
                                return -1;
@@@@@@@@@@@@@@@@@@@@@ -1139,6 -1139,6 -1128,6 -1139,6 -1139,6 -1139,6 -1139,6 -1139,6 -1139,6 -1139,6 -1191,7 -1139,6 -1139,6 -1139,6 -1139,6 -1139,6 -1139,6 -1139,6 -1139,6 -1139,6 +1128,6 @@@@@@@@@@@@@@@@@@@@@ static void __init smp_cpu_index_defaul
                    void __init native_smp_prepare_cpus(unsigned int max_cpus)
                    {
                        preempt_disable();
          -             nmi_watchdog_default();
                        smp_cpu_index_default();
                        current_cpu_data = boot_cpu_data;
                        cpu_callin_map = cpumask_of_cpu(0);
                        }
                        preempt_enable();
                    
          -         #ifdef CONFIG_X86_32
                        connect_bsp_APIC();
          -         #endif
          +         
                        /*
                         * Switch from PIC to APIC mode.
                         */
@@@@@@@@@@@@@@@@@@@@@ -1204,8 -1204,8 -1193,8 -1204,8 -1204,8 -1204,8 -1204,8 -1204,8 -1204,8 -1204,8 -1258,8 -1204,8 -1204,8 -1204,8 -1204,8 -1204,8 -1204,8 -1204,8 -1204,8 -1204,8 +1193,8 @@@@@@@@@@@@@@@@@@@@@ void __init native_smp_prepare_boot_cpu
                        int me = smp_processor_id();
                    #ifdef CONFIG_X86_32
                        init_gdt(me);
          -             switch_to_new_gdt();
                    #endif
          +             switch_to_new_gdt();
                        /* already set me in cpu_online_map in boot_cpu_init() */
                        cpu_set(me, cpu_callout_map);
                        per_cpu(cpu_state, me) = CPU_ONLINE;
@@@@@@@@@@@@@@@@@@@@@ -1225,6 -1225,6 -1214,6 -1225,6 -1225,6 -1225,6 -1225,6 -1225,6 -1225,6 -1225,6 -1279,23 -1225,6 -1225,6 -1225,6 -1225,6 -1225,6 -1225,6 -1225,6 -1225,6 -1225,6 +1214,6 @@@@@@@@@@@@@@@@@@@@@ void __init native_smp_cpus_done(unsign
                    
                    #ifdef CONFIG_HOTPLUG_CPU
                    
          -         #  ifdef CONFIG_X86_32
          -         void cpu_exit_clear(void)
          -         {
          -             int cpu = raw_smp_processor_id();
          -         
          -             idle_task_exit();
          -         
          -             cpu_uninit();
          -             irq_ctx_exit(cpu);
          -         
          -             cpu_clear(cpu, cpu_callout_map);
          -             cpu_clear(cpu, cpu_callin_map);
          -         
          -             unmap_cpu_to_logical_apicid(cpu);
          -         }
          -         #  endif /* CONFIG_X86_32 */
          -         
                    static void remove_siblinginfo(int cpu)
                    {
                        int sibling;
                        int i;
                        int possible;
                    
          +             /* no processor from mptable or madt */
          +             if (!num_processors)
          +                     num_processors = 1;
          +         
          +         #ifdef CONFIG_HOTPLUG_CPU
                        if (additional_cpus == -1) {
                                if (disabled_cpus > 0)
                                        additional_cpus = disabled_cpus;
                                else
                                        additional_cpus = 0;
                        }
          +         #else
          +             additional_cpus = 0;
          +         #endif
                        possible = num_processors + additional_cpus;
                        if (possible > NR_CPUS)
                                possible = NR_CPUS;
                    
                        for (i = 0; i < possible; i++)
                                cpu_set(i, cpu_possible_map);
          +         
          +             nr_cpu_ids = possible;
                    }
                    
                    static void __ref remove_cpu_from_maps(int cpu)
                    {
                        cpu_clear(cpu, cpu_online_map);
          -         #ifdef CONFIG_X86_64
                        cpu_clear(cpu, cpu_callout_map);
                        cpu_clear(cpu, cpu_callin_map);
                        /* was set by cpu_init() */
 -------------------    clear_bit(cpu, (unsigned long *)&cpu_initialized);
          -             clear_node_cpumask(cpu);
          -         #endif
 +++++++++++++++++++    cpu_clear(cpu, cpu_initialized);
          +             numa_remove_cpu(cpu);
                    }
                    
                    int __cpu_disable(void)
@@@@@@@@@@@@@@@@@@@@@ -1390,7 -1390,7 -1379,7 -1390,8 -1390,7 -1390,7 -1390,7 -1390,7 -1390,7 -1390,7 -1453,7 -1390,7 -1390,7 -1390,7 -1390,7 -1390,7 -1390,7 -1390,7 -1390,7 -1390,7 +1379,8 @@@@@@@@@@@@@@@@@@@@@ static int __init parse_maxcpus(char *a
                    {
                        extern unsigned int maxcpus;
                    
--- ----------------    maxcpus = simple_strtoul(arg, NULL, 0);
+++ ++++++++++++++++    if (arg)
+++ ++++++++++++++++            maxcpus = simple_strtoul(arg, NULL, 0);
                        return 0;
                    }
                    early_param("maxcpus", parse_maxcpus);
diff --combined arch/x86/mm/init_32.c
index 9689a5138e6472e33c6d0862b3ae56194ffcedb4,9689a5138e6472e33c6d0862b3ae56194ffcedb4,9689a5138e6472e33c6d0862b3ae56194ffcedb4,9689a5138e6472e33c6d0862b3ae56194ffcedb4,9689a5138e6472e33c6d0862b3ae56194ffcedb4,9689a5138e6472e33c6d0862b3ae56194ffcedb4,9689a5138e6472e33c6d0862b3ae56194ffcedb4,029e8cffca9e11cf1794b0cad1c74029eaefa815,9689a5138e6472e33c6d0862b3ae56194ffcedb4,3eeab6d0065ffa52d0945d349bb755fd4ce5f8ab,ec30d10154b657a63ab07b5e25b4298c10f4aabe,9689a5138e6472e33c6d0862b3ae56194ffcedb4,9689a5138e6472e33c6d0862b3ae56194ffcedb4,9689a5138e6472e33c6d0862b3ae56194ffcedb4,9689a5138e6472e33c6d0862b3ae56194ffcedb4,9689a5138e6472e33c6d0862b3ae56194ffcedb4,029e8cffca9e11cf1794b0cad1c74029eaefa815,9689a5138e6472e33c6d0862b3ae56194ffcedb4,9689a5138e6472e33c6d0862b3ae56194ffcedb4,7113acd8ac45c4d9a4af70588c12c411e5be313d..d37f29376b0ce455ae3907051a58779f4b995a25
                    
                    unsigned int __VMALLOC_RESERVE = 128 << 20;
                    
          +         unsigned long max_low_pfn_mapped;
                    unsigned long max_pfn_mapped;
                    
                    DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@@@@@@@@@@@@@@@@@@@@ -58,27 -58,27 -58,27 -58,27 -58,27 -58,27 -58,27 -58,27 -58,27 -58,27 -57,6 -58,27 -58,27 -58,27 -58,27 -58,27 -58,27 -58,27 -58,27 -58,27 +58,27 @@@@@@@@@@@@@@@@@@@@@ unsigned long highstart_pfn, highend_pf
                    
                    static noinline int do_test_wp_bit(void);
                    
          +         
          +         static unsigned long __initdata table_start;
          +         static unsigned long __meminitdata table_end;
          +         static unsigned long __meminitdata table_top;
          +         
          +         static int __initdata after_init_bootmem;
          +         
          +         static __init void *alloc_low_page(unsigned long *phys)
          +         {
          +             unsigned long pfn = table_end++;
          +             void *adr;
          +         
          +             if (pfn >= table_top)
          +                     panic("alloc_low_page: ran out of memory");
          +         
          +             adr = __va(pfn * PAGE_SIZE);
          +             memset(adr, 0, PAGE_SIZE);
          +             *phys  = pfn * PAGE_SIZE;
          +             return adr;
          +         }
          +         
                    /*
                     * Creates a middle page table and puts a pointer to it in the
                     * given global directory entry. This only returns the gd entry
@@@@@@@@@@@@@@@@@@@@@ -90,12 -90,12 -90,12 -90,12 -90,12 -90,12 -90,12 -90,12 -90,12 -90,12 -68,9 -90,12 -90,12 -90,12 -90,12 -90,12 -90,12 -90,12 -90,12 -90,12 +90,12 @@@@@@@@@@@@@@@@@@@@@ static pmd_t * __init one_md_table_init
                        pmd_t *pmd_table;
                    
                    #ifdef CONFIG_X86_PAE
          +             unsigned long phys;
                        if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
          -                     pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
          -         
          +                     if (after_init_bootmem)
          +                             pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
          +                     else
          +                             pmd_table = (pmd_t *)alloc_low_page(&phys);
                                paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
                                set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
                                pud = pud_offset(pgd, 0);
@@@@@@@@@@@@@@@@@@@@@ -117,16 -117,16 -117,16 -117,16 -117,16 -117,16 -117,16 -117,16 -117,16 -117,16 -92,12 -117,16 -117,16 -117,16 -117,16 -117,16 -117,16 -117,16 -117,16 -117,16 +117,16 @@@@@@@@@@@@@@@@@@@@@ static pte_t * __init one_page_table_in
                        if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
                                pte_t *page_table = NULL;
                    
          +                     if (after_init_bootmem) {
                    #ifdef CONFIG_DEBUG_PAGEALLOC
          -                     page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
          +                             page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
                    #endif
          -                     if (!page_table) {
          -                             page_table =
          +                             if (!page_table)
          +                                     page_table =
                                                (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
          +                     } else {
          +                             unsigned long phys;
          +                             page_table = (pte_t *)alloc_low_page(&phys);
                                }
                    
                                paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
@@@@@@@@@@@@@@@@@@@@@ -184,44 -184,44 -184,44 -184,44 -184,44 -184,44 -184,44 -184,44 -184,44 -184,44 -155,38 -184,44 -184,44 -184,44 -184,44 -184,44 -184,44 -184,44 -184,44 -184,44 +184,44 @@@@@@@@@@@@@@@@@@@@@ static inline int is_kernel_text(unsign
                     * of max_low_pfn pages, by creating page tables starting from address
                     * PAGE_OFFSET:
                     */
          -         static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
          +         static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
          +                                                     unsigned long start_pfn,
          +                                                     unsigned long end_pfn,
          +                                                     int use_pse)
                    {
                        int pgd_idx, pmd_idx, pte_ofs;
                        unsigned long pfn;
                        pgd_t *pgd;
                        pmd_t *pmd;
                        pte_t *pte;
          +             unsigned pages_2m = 0, pages_4k = 0;
                    
          -             pgd_idx = pgd_index(PAGE_OFFSET);
          -             pgd = pgd_base + pgd_idx;
          -             pfn = 0;
          +             if (!cpu_has_pse)
          +                     use_pse = 0;
                    
          +             pfn = start_pfn;
          +             pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
          +             pgd = pgd_base + pgd_idx;
                        for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
                                pmd = one_md_table_init(pgd);
          -                     if (pfn >= max_low_pfn)
          -                             continue;
                    
          -                     for (pmd_idx = 0;
          -                          pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn;
          +                     if (pfn >= end_pfn)
          +                             continue;
          +         #ifdef CONFIG_X86_PAE
          +                     pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
          +                     pmd += pmd_idx;
          +         #else
          +                     pmd_idx = 0;
          +         #endif
          +                     for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;
                                     pmd++, pmd_idx++) {
                                        unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
                    
                                        /*
                                         * Map with big pages if possible, otherwise
                                         * create normal page tables:
          -                              *
          -                              * Don't use a large page for the first 2/4MB of memory
          -                              * because there are often fixed size MTRRs in there
          -                              * and overlapping MTRRs into large pages can cause
          -                              * slowdowns.
                                         */
          -                             if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) {
          +                             if (use_pse) {
                                                unsigned int addr2;
                                                pgprot_t prot = PAGE_KERNEL_LARGE;
                    
                                                    is_kernel_text(addr2))
                                                        prot = PAGE_KERNEL_LARGE_EXEC;
                    
          +                                     pages_2m++;
                                                set_pmd(pmd, pfn_pmd(pfn, prot));
                    
                                                pfn += PTRS_PER_PTE;
          -                                     max_pfn_mapped = pfn;
                                                continue;
                                        }
                                        pte = one_page_table_init(pmd);
                    
          -                             for (pte_ofs = 0;
          -                                  pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
          +                             pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
          +                             pte += pte_ofs;
          +                             for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
                                             pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
                                                pgprot_t prot = PAGE_KERNEL;
                    
                                                if (is_kernel_text(addr))
                                                        prot = PAGE_KERNEL_EXEC;
                    
          +                                     pages_4k++;
                                                set_pte(pte, pfn_pte(pfn, prot));
                                        }
          -                             max_pfn_mapped = pfn;
                                }
                        }
          -         }
          -         
          -         static inline int page_kills_ppro(unsigned long pagenr)
          -         {
          -             if (pagenr >= 0x70000 && pagenr <= 0x7003F)
          -                     return 1;
          -             return 0;
          +             update_page_count(PG_LEVEL_2M, pages_2m);
          +             update_page_count(PG_LEVEL_4K, pages_4k);
                    }
                    
                    /*
@@@@@@@@@@@@@@@@@@@@@ -318,62 -318,62 -318,62 -318,62 -318,62 -318,62 -318,62 -318,62 -318,62 -318,62 -287,29 -318,62 -318,62 -318,62 -318,62 -318,62 -318,62 -318,62 -318,62 -318,62 +318,62 @@@@@@@@@@@@@@@@@@@@@ static void __init permanent_kmaps_init
                        pkmap_page_table = pte;
                    }
                    
          -         void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
          +         static void __init add_one_highpage_init(struct page *page, int pfn)
                    {
          -             if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
          -                     ClearPageReserved(page);
          -                     init_page_count(page);
          -                     __free_page(page);
          -                     totalhigh_pages++;
          -             } else
          -                     SetPageReserved(page);
          +             ClearPageReserved(page);
          +             init_page_count(page);
          +             __free_page(page);
          +             totalhigh_pages++;
                    }
                    
          -         #ifndef CONFIG_NUMA
          -         static void __init set_highmem_pages_init(int bad_ppro)
          +         struct add_highpages_data {
          +             unsigned long start_pfn;
          +             unsigned long end_pfn;
          +         };
          +         
          +         static int __init add_highpages_work_fn(unsigned long start_pfn,
          +                                              unsigned long end_pfn, void *datax)
                    {
          -             int pfn;
          +             int node_pfn;
          +             struct page *page;
          +             unsigned long final_start_pfn, final_end_pfn;
          +             struct add_highpages_data *data;
                    
          -             for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) {
          -                     /*
          -                      * Holes under sparsemem might not have no mem_map[]:
          -                      */
          -                     if (pfn_valid(pfn))
          -                             add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
          +             data = (struct add_highpages_data *)datax;
          +         
          +             final_start_pfn = max(start_pfn, data->start_pfn);
          +             final_end_pfn = min(end_pfn, data->end_pfn);
          +             if (final_start_pfn >= final_end_pfn)
          +                     return 0;
          +         
          +             for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
          +                  node_pfn++) {
          +                     if (!pfn_valid(node_pfn))
          +                             continue;
          +                     page = pfn_to_page(node_pfn);
          +                     add_one_highpage_init(page, node_pfn);
                        }
          +         
          +             return 0;
          +         
          +         }
          +         
          +         void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
          +                                                   unsigned long end_pfn)
          +         {
          +             struct add_highpages_data data;
          +         
          +             data.start_pfn = start_pfn;
          +             data.end_pfn = end_pfn;
          +         
          +             work_with_active_regions(nid, add_highpages_work_fn, &data);
          +         }
          +         
          +         #ifndef CONFIG_NUMA
          +         static void __init set_highmem_pages_init(void)
          +         {
          +             add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
          +         
                        totalram_pages += totalhigh_pages;
                    }
                    #endif /* !CONFIG_NUMA */
                    #else
                    # define kmap_init()                                do { } while (0)
                    # define permanent_kmaps_init(pgd_base)             do { } while (0)
          -         # define set_highmem_pages_init(bad_ppro)   do { } while (0)
          +         # define set_highmem_pages_init()   do { } while (0)
                    #endif /* CONFIG_HIGHMEM */
                    
          -         pteval_t __PAGE_KERNEL = _PAGE_KERNEL;
          -         EXPORT_SYMBOL(__PAGE_KERNEL);
          -         
          -         pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
          -         
                    void __init native_pagetable_setup_start(pgd_t *base)
                    {
                        unsigned long pfn, va;
@@@@@@@@@@@@@@@@@@@@@ -439,10 -439,10 -439,10 -439,10 -439,10 -439,10 -439,10 -439,10 -439,10 -439,10 -380,27 -439,10 -439,10 -439,10 -439,10 -439,10 -439,10 -439,10 -439,10 -439,10 +439,10 @@@@@@@@@@@@@@@@@@@@@ void __init native_pagetable_setup_done
                     * be partially populated, and so it avoids stomping on any existing
                     * mappings.
                     */
          -         static void __init pagetable_init(void)
          +         static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base)
                    {
          -             pgd_t *pgd_base = swapper_pg_dir;
                        unsigned long vaddr, end;
                    
          -             paravirt_pagetable_setup_start(pgd_base);
          -         
          -             /* Enable PSE if available */
          -             if (cpu_has_pse)
          -                     set_in_cr4(X86_CR4_PSE);
          -         
          -             /* Enable PGE if available */
          -             if (cpu_has_pge) {
          -                     set_in_cr4(X86_CR4_PGE);
          -                     __PAGE_KERNEL |= _PAGE_GLOBAL;
          -                     __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
          -             }
          -         
          -             kernel_physical_mapping_init(pgd_base);
          -             remap_numa_kva();
          -         
                        /*
                         * Fixed mappings, only the page table structure has to be
                         * created - mappings will be set by set_fixmap():
                        end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
                        page_table_range_init(vaddr, end, pgd_base);
                        early_ioremap_reset();
          +         }
          +         
          +         static void __init pagetable_init(void)
          +         {
          +             pgd_t *pgd_base = swapper_pg_dir;
          +         
          +             paravirt_pagetable_setup_start(pgd_base);
                    
                        permanent_kmaps_init(pgd_base);
                    
@@@@@@@@@@@@@@@@@@@@@ -505,7 -505,7 -505,7 -505,7 -505,7 -505,7 -505,7 -505,7 -505,7 -505,7 -456,7 -505,7 -505,7 -505,7 -505,7 -505,7 -505,7 -505,7 -505,7 -505,7 +505,7 @@@@@@@@@@@@@@@@@@@@@ void zap_low_mappings(void
                    
                    int nx_enabled;
                    
          -         pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX;
          +         pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
                    EXPORT_SYMBOL_GPL(__supported_pte_mask);
                    
                    #ifdef CONFIG_X86_PAE
                    }
                    #endif
                    
          +         /* user-defined highmem size */
          +         static unsigned int highmem_pages = -1;
          +         
                    /*
          -          * paging_init() sets up the page tables - note that the first 8MB are
          -          * already mapped by head.S.
          -          *
          -          * This routines also unmaps the page at virtual kernel address 0, so
          -          * that we can trap those pesky NULL-reference errors in the kernel.
          +          * highmem=size forces highmem to be exactly 'size' bytes.
          +          * This works even on boxes that have no highmem otherwise.
          +          * This also works to reduce highmem size on bigger boxes.
                     */
          -         void __init paging_init(void)
          +         static int __init parse_highmem(char *arg)
          +         {
          +             if (!arg)
          +                     return -EINVAL;
          +         
          +             highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
          +             return 0;
          +         }
          +         early_param("highmem", parse_highmem);
          +         
          +         /*
          +          * Determine low and high memory ranges:
          +          */
          +         void __init find_low_pfn_range(void)
          +         {
          +             /* it could update max_pfn */
          +         
          +             /* max_low_pfn is 0, we already have early_res support */
          +         
          +             max_low_pfn = max_pfn;
          +             if (max_low_pfn > MAXMEM_PFN) {
          +                     if (highmem_pages == -1)
          +                             highmem_pages = max_pfn - MAXMEM_PFN;
          +                     if (highmem_pages + MAXMEM_PFN < max_pfn)
          +                             max_pfn = MAXMEM_PFN + highmem_pages;
          +                     if (highmem_pages + MAXMEM_PFN > max_pfn) {
          +                             printk(KERN_WARNING "only %luMB highmem pages "
          +                                     "available, ignoring highmem size of %uMB.\n",
          +                                     pages_to_mb(max_pfn - MAXMEM_PFN),
          +                                     pages_to_mb(highmem_pages));
          +                             highmem_pages = 0;
          +                     }
          +                     max_low_pfn = MAXMEM_PFN;
          +         #ifndef CONFIG_HIGHMEM
          +                     /* Maximum memory usable is what is directly addressable */
          +                     printk(KERN_WARNING "Warning only %ldMB will be used.\n",
          +                                             MAXMEM>>20);
          +                     if (max_pfn > MAX_NONPAE_PFN)
          +                             printk(KERN_WARNING
          +                                      "Use a HIGHMEM64G enabled kernel.\n");
          +                     else
          +                             printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
          +                     max_pfn = MAXMEM_PFN;
          +         #else /* !CONFIG_HIGHMEM */
          +         #ifndef CONFIG_HIGHMEM64G
          +                     if (max_pfn > MAX_NONPAE_PFN) {
          +                             max_pfn = MAX_NONPAE_PFN;
          +                             printk(KERN_WARNING "Warning only 4GB will be used."
          +                                     "Use a HIGHMEM64G enabled kernel.\n");
          +                     }
          +         #endif /* !CONFIG_HIGHMEM64G */
          +         #endif /* !CONFIG_HIGHMEM */
          +             } else {
          +                     if (highmem_pages == -1)
          +                             highmem_pages = 0;
          +         #ifdef CONFIG_HIGHMEM
          +                     if (highmem_pages >= max_pfn) {
          +                             printk(KERN_ERR "highmem size specified (%uMB) is "
          +                                     "bigger than pages available (%luMB)!.\n",
          +                                     pages_to_mb(highmem_pages),
          +                                     pages_to_mb(max_pfn));
          +                             highmem_pages = 0;
          +                     }
          +                     if (highmem_pages) {
          +                             if (max_low_pfn - highmem_pages <
          +                                 64*1024*1024/PAGE_SIZE){
          +                                     printk(KERN_ERR "highmem size %uMB results in "
          +                                     "smaller than 64MB lowmem, ignoring it.\n"
          +                                             , pages_to_mb(highmem_pages));
          +                                     highmem_pages = 0;
          +                             }
          +                             max_low_pfn -= highmem_pages;
          +                     }
          +         #else
          +                     if (highmem_pages)
          +                             printk(KERN_ERR "ignoring highmem size on non-highmem"
          +                                             " kernel!\n");
          +         #endif
          +             }
          +         }
          +         
          +         #ifndef CONFIG_NEED_MULTIPLE_NODES
          +         void __init initmem_init(unsigned long start_pfn,
          +                                       unsigned long end_pfn)
                    {
          +         #ifdef CONFIG_HIGHMEM
          +             highstart_pfn = highend_pfn = max_pfn;
          +             if (max_pfn > max_low_pfn)
          +                     highstart_pfn = max_low_pfn;
          +             memory_present(0, 0, highend_pfn);
          +             e820_register_active_regions(0, 0, highend_pfn);
          +             printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
          +                     pages_to_mb(highend_pfn - highstart_pfn));
          +             num_physpages = highend_pfn;
          +             high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
          +         #else
          +             memory_present(0, 0, max_low_pfn);
          +             e820_register_active_regions(0, 0, max_low_pfn);
          +             num_physpages = max_low_pfn;
          +             high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
          +         #endif
          +         #ifdef CONFIG_FLATMEM
          +             max_mapnr = num_physpages;
          +         #endif
          +             printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
          +                             pages_to_mb(max_low_pfn));
          +         
          +             setup_bootmem_allocator();
          +         }
          +         #endif /* !CONFIG_NEED_MULTIPLE_NODES */
          +         
          +         static void __init zone_sizes_init(void)
          +         {
          +             unsigned long max_zone_pfns[MAX_NR_ZONES];
          +             memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
          +             max_zone_pfns[ZONE_DMA] =
          +                     virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
          +             max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
          +         #ifdef CONFIG_HIGHMEM
          +             max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
          +         #endif
          +         
          +             free_area_init_nodes(max_zone_pfns);
          +         }
          +         
          +         void __init setup_bootmem_allocator(void)
          +         {
          +             int i;
          +             unsigned long bootmap_size, bootmap;
          +             /*
          +              * Initialize the boot-time allocator (with low memory only):
          +              */
          +             bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
          +             bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
          +                                      max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
          +                                      PAGE_SIZE);
          +             if (bootmap == -1L)
          +                     panic("Cannot find bootmem map of size %ld\n", bootmap_size);
          +             reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
          +         
          +             /* don't touch min_low_pfn */
          +             bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
          +                                              min_low_pfn, max_low_pfn);
          +             printk(KERN_INFO "  mapped low ram: 0 - %08lx\n",
          +                      max_pfn_mapped<<PAGE_SHIFT);
          +             printk(KERN_INFO "  low ram: %08lx - %08lx\n",
          +                      min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
          +             printk(KERN_INFO "  bootmap %08lx - %08lx\n",
          +                      bootmap, bootmap + bootmap_size);
          +             for_each_online_node(i)
          +                     free_bootmem_with_active_regions(i, max_low_pfn);
          +             early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
          +         
          +             after_init_bootmem = 1;
          +         }
          +         
          +         static void __init find_early_table_space(unsigned long end)
          +         {
          +             unsigned long puds, pmds, ptes, tables, start;
          +         
          +             puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
          +             tables = PAGE_ALIGN(puds * sizeof(pud_t));
          +         
          +             pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
          +             tables += PAGE_ALIGN(pmds * sizeof(pmd_t));
          +         
          +             if (cpu_has_pse) {
          +                     unsigned long extra;
          +         
          +                     extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
          +                     extra += PMD_SIZE;
          +                     ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
          +             } else
          +                     ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
          +         
          +             tables += PAGE_ALIGN(ptes * sizeof(pte_t));
          +         
          +             /* for fixmap */
          +             tables += PAGE_SIZE * 2;
          +         
          +             /*
          +              * RED-PEN putting page tables only on node 0 could
          +              * cause a hotspot and fill up ZONE_DMA. The page tables
          +              * need roughly 0.5KB per GB.
          +              */
          +             start = 0x7000;
          +             table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
          +                                             tables, PAGE_SIZE);
          +             if (table_start == -1UL)
          +                     panic("Cannot find space for the kernel page tables");
          +         
          +             table_start >>= PAGE_SHIFT;
          +             table_end = table_start;
          +             table_top = table_start + (tables>>PAGE_SHIFT);
          +         
          +             printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
          +                     end, table_start << PAGE_SHIFT,
          +                     (table_start << PAGE_SHIFT) + tables);
          +         }
          +         
          +         unsigned long __init_refok init_memory_mapping(unsigned long start,
          +                                                     unsigned long end)
          +         {
          +             pgd_t *pgd_base = swapper_pg_dir;
          +             unsigned long start_pfn, end_pfn;
          +             unsigned long big_page_start;
          +         
          +             /*
          +              * Find space for the kernel direct mapping tables.
          +              */
          +             if (!after_init_bootmem)
          +                     find_early_table_space(end);
          +         
                    #ifdef CONFIG_X86_PAE
                        set_nx();
                        if (nx_enabled)
                                printk(KERN_INFO "NX (Execute Disable) protection: active\n");
                    #endif
          -             pagetable_init();
          +         
          +             /* Enable PSE if available */
          +             if (cpu_has_pse)
          +                     set_in_cr4(X86_CR4_PSE);
          +         
          +             /* Enable PGE if available */
          +             if (cpu_has_pge) {
          +                     set_in_cr4(X86_CR4_PGE);
          +                     __supported_pte_mask |= _PAGE_GLOBAL;
          +             }
          +         
          +             /*
          +              * Don't use a large page for the first 2/4MB of memory
          +              * because there are often fixed size MTRRs in there
          +              * and overlapping MTRRs into large pages can cause
          +              * slowdowns.
          +              */
          +             big_page_start = PMD_SIZE;
          +         
          +             if (start < big_page_start) {
          +                     start_pfn = start >> PAGE_SHIFT;
          +                     end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT);
          +             } else {
          +                     /* head is not big page alignment ? */
          +                     start_pfn = start >> PAGE_SHIFT;
          +                     end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
          +                                      << (PMD_SHIFT - PAGE_SHIFT);
          +             }
          +             if (start_pfn < end_pfn)
          +                     kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0);
          +         
          +             /* big page range */
          +             start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
          +                              << (PMD_SHIFT - PAGE_SHIFT);
          +             if (start_pfn < (big_page_start >> PAGE_SHIFT))
          +                     start_pfn =  big_page_start >> PAGE_SHIFT;
          +             end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
          +             if (start_pfn < end_pfn)
          +                     kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn,
          +                                                     cpu_has_pse);
          +         
          +             /* tail is not big page alignment ? */
          +             start_pfn = end_pfn;
          +             if (start_pfn > (big_page_start>>PAGE_SHIFT)) {
          +                     end_pfn = end >> PAGE_SHIFT;
          +                     if (start_pfn < end_pfn)
          +                             kernel_physical_mapping_init(pgd_base, start_pfn,
          +                                                              end_pfn, 0);
          +             }
          +         
          +             early_ioremap_page_table_range_init(pgd_base);
                    
                        load_cr3(swapper_pg_dir);
                    
                        __flush_tlb_all();
                    
          +             if (!after_init_bootmem)
          +                     reserve_early(table_start << PAGE_SHIFT,
          +                                      table_end << PAGE_SHIFT, "PGTABLE");
          +         
+++++++++ ++++++++++    if (!after_init_bootmem)
+++++++++ ++++++++++            early_memtest(start, end);
+++++++++ ++++++++++
          +             return end >> PAGE_SHIFT;
          +         }
          +         
          +         
          +         /*
          +          * paging_init() sets up the page tables - note that the first 8MB are
          +          * already mapped by head.S.
          +          *
          +          * This routines also unmaps the page at virtual kernel address 0, so
          +          * that we can trap those pesky NULL-reference errors in the kernel.
          +          */
          +         void __init paging_init(void)
          +         {
          +             pagetable_init();
          +         
          +             __flush_tlb_all();
          +         
                        kmap_init();
---------- -------- 
---------- --------     paravirt_post_allocator_init();
          +         
          +             /*
          +              * NOTE: at this point the bootmem allocator is fully available.
          +              */
          +             sparse_init();
          +             zone_sizes_init();
                    }
                    
                    /*
@@@@@@@@@@@@@@@@@@@@@ -904,10 -904,10 -904,10 -904,10 -904,10 -904,10 -904,10 -904,10 -904,10 -907,10 -564,23 -904,10 -904,10 -904,10 -904,10 -904,10 -904,10 -904,10 -904,10 -902,10 +905,10 @@@@@@@@@@@@@@@@@@@@@ static struct kcore_list kcore_mem, kco
                    void __init mem_init(void)
                    {
                        int codesize, reservedpages, datasize, initsize;
          -             int tmp, bad_ppro;
          +             int tmp;
                    
                    #ifdef CONFIG_FLATMEM
                        BUG_ON(!mem_map);
          -         #endif
          -             bad_ppro = ppro_with_ram_bug();
          -         
          -         #ifdef CONFIG_HIGHMEM
          -             /* check that fixmap and pkmap do not overlap */
          -             if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
          -                     printk(KERN_ERR
          -                             "fixmap and kmap areas overlap - this will crash\n");
          -                     printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
          -                                     PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE,
          -                                     FIXADDR_START);
          -                     BUG();
          -             }
                    #endif
                        /* this will put all low memory onto the freelists */
                        totalram_pages += free_all_bootmem();
                                if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
                                        reservedpages++;
                    
          -             set_highmem_pages_init(bad_ppro);
          +             set_highmem_pages_init();
                    
                        codesize =  (unsigned long) &_etext - (unsigned long) &_text;
                        datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
                                (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
                               );
                    
          -         #if 1 /* double-sanity-check paranoia */
                        printk(KERN_INFO "virtual kernel memory layout:\n"
                                "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
                    #ifdef CONFIG_HIGHMEM
                    #endif
                        BUG_ON(VMALLOC_START                            > VMALLOC_END);
                        BUG_ON((unsigned long)high_memory               > VMALLOC_START);
          -         #endif /* double-sanity-check paranoia */
                    
                        if (boot_cpu_data.wp_works_ok < 0)
                                test_wp_bit();
                        unsigned long start = PFN_ALIGN(_text);
                        unsigned long size = PFN_ALIGN(_etext) - start;
                    
       +  +     +   #ifndef CONFIG_DYNAMIC_FTRACE
       +  +     +       /* Dynamic tracing modifies the kernel text section */
                        set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
                        printk(KERN_INFO "Write protecting the kernel text: %luk\n",
                                size >> 10);
                        printk(KERN_INFO "Testing CPA: write protecting again\n");
                        set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
                    #endif
       +  +     +   #endif /* CONFIG_DYNAMIC_FTRACE */
       +  +     +   
                        start += size;
                        size = (unsigned long)__end_rodata - start;
                        set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
@@@@@@@@@@@@@@@@@@@@@ -1113,9 -1113,9 -1113,9 -1113,9 -1113,9 -1113,9 -1113,9 -1109,9 -1113,9 -1116,9 -784,3 -1113,9 -1113,9 -1113,9 -1113,9 -1113,9 -1109,9 -1113,9 -1113,9 -1111,9 +1114,9 @@@@@@@@@@@@@@@@@@@@@ void free_initrd_mem(unsigned long star
                        free_init_pages("initrd memory", start, end);
                    }
                    #endif
          +         
          +         int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
          +                                        int flags)
          +         {
          +             return reserve_bootmem(phys, len, flags);
          +         }
diff --combined arch/x86/mm/pat.c
index 6bb597f4d70133e4aefb9db3d8e4b1bec259c804,d4585077977a0bd1c6c7234b09051d671851a6aa,d4585077977a0bd1c6c7234b09051d671851a6aa,d4585077977a0bd1c6c7234b09051d671851a6aa,d4585077977a0bd1c6c7234b09051d671851a6aa,d4585077977a0bd1c6c7234b09051d671851a6aa,d4585077977a0bd1c6c7234b09051d671851a6aa,749766c3c5cd68eaac4c8b6c45646919cff68e00,d4585077977a0bd1c6c7234b09051d671851a6aa,d4585077977a0bd1c6c7234b09051d671851a6aa,06b7a1c90fb89e43eadd396592cbf6625080020b,d4585077977a0bd1c6c7234b09051d671851a6aa,0917a540a55e39374b09c74ab4026a07520ed121,d4585077977a0bd1c6c7234b09051d671851a6aa,d4585077977a0bd1c6c7234b09051d671851a6aa,d4585077977a0bd1c6c7234b09051d671851a6aa,749766c3c5cd68eaac4c8b6c45646919cff68e00,d4585077977a0bd1c6c7234b09051d671851a6aa,d4585077977a0bd1c6c7234b09051d671851a6aa,d4585077977a0bd1c6c7234b09051d671851a6aa..2fe30916d4b66aba6ea1121a9d5bfadb60f1c82f
                    #include <linux/gfp.h>
                    #include <linux/fs.h>
                    #include <linux/bootmem.h>
++++++++++++ +++++++#include <linux/debugfs.h>
++++++++++++ +++++++#include <linux/seq_file.h>
                    
                    #include <asm/msr.h>
                    #include <asm/tlbflush.h>
                    #include <asm/io.h>
                    
                    #ifdef CONFIG_X86_PAT
          -         int __read_mostly pat_wc_enabled = 1;
          +         int __read_mostly pat_enabled = 1;
                    
                    void __cpuinit pat_disable(char *reason)
                    {
          -             pat_wc_enabled = 0;
          +             pat_enabled = 0;
                        printk(KERN_INFO "%s\n", reason);
                    }
                    
@@@@@@@@@@@@@@@@@@@@@ -42,19 -42,19 -42,19 -42,19 -42,19 -42,19 -42,19 -42,19 -42,19 -42,19 -42,6 -42,19 -44,19 -42,19 -42,19 -42,19 -42,19 -42,19 -42,19 -42,19 +44,19 @@@@@@@@@@@@@@@@@@@@@ static int __init nopat(char *str
                    early_param("nopat", nopat);
                    #endif
                    
          +         
          +         static int debug_enable;
          +         static int __init pat_debug_setup(char *str)
          +         {
          +             debug_enable = 1;
          +             return 0;
          +         }
          +         __setup("debugpat", pat_debug_setup);
          +         
          +         #define dprintk(fmt, arg...) \
          +             do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0)
          +         
          +         
                    static u64 __read_mostly boot_pat_state;
                    
                    enum {
                        PAT_UC_MINUS = 7,       /* UC, but can be overriden by MTRR */
                    };
                    
          -         #define PAT(x,y)    ((u64)PAT_ ## y << ((x)*8))
          +         #define PAT(x, y)   ((u64)PAT_ ## y << ((x)*8))
                    
                    void pat_init(void)
                    {
                        u64 pat;
                    
          -             if (!pat_wc_enabled)
          +             if (!pat_enabled)
                                return;
                    
                        /* Paranoia check. */
          -             if (!cpu_has_pat) {
          -                     printk(KERN_ERR "PAT enabled, but CPU feature cleared\n");
          +             if (!cpu_has_pat && boot_pat_state) {
                                /*
          -                      * Panic if this happens on the secondary CPU, and we
          +                      * If this happens we are on a secondary CPU, but
                                 * switched to PAT on the boot CPU. We have no way to
                                 * undo PAT.
          -                     */
          -                     BUG_ON(boot_pat_state);
          +                      */
          +                     printk(KERN_ERR "PAT enabled, "
          +                            "but not supported by secondary CPU\n");
          +                     BUG();
                        }
                    
                        /* Set PWT to Write-Combining. All other bits stay the same */
                         *      011 UC          _PAGE_CACHE_UC
                         * PAT bit unused
                         */
          -             pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) |
          -                   PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC);
          +             pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
          +                   PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
                    
                        /* Boot CPU check */
                        if (!boot_pat_state)
                    static char *cattr_name(unsigned long flags)
                    {
                        switch (flags & _PAGE_CACHE_MASK) {
          -                     case _PAGE_CACHE_UC:            return "uncached";
          -                     case _PAGE_CACHE_UC_MINUS:      return "uncached-minus";
          -                     case _PAGE_CACHE_WB:            return "write-back";
          -                     case _PAGE_CACHE_WC:            return "write-combining";
          -                     default:                        return "broken";
          +             case _PAGE_CACHE_UC:            return "uncached";
          +             case _PAGE_CACHE_UC_MINUS:      return "uncached-minus";
          +             case _PAGE_CACHE_WB:            return "write-back";
          +             case _PAGE_CACHE_WC:            return "write-combining";
          +             default:                        return "broken";
                        }
                    }
                    
@@@@@@@@@@@@@@@@@@@@@ -159,50 -159,50 -159,50 -159,50 -159,50 -159,50 -159,50 -159,50 -159,50 -159,50 -145,47 -159,50 -161,50 -159,50 -159,50 -159,50 -159,50 -159,50 -159,50 -159,50 +161,50 @@@@@@@@@@@@@@@@@@@@@ static DEFINE_SPINLOCK(memtype_lock);       
                     * The intersection is based on "Effective Memory Type" tables in IA-32
                     * SDM vol 3a
                     */
          -         static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
          -                                     unsigned long *ret_prot)
          +         static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
                    {
          -             unsigned long pat_type;
          -             u8 mtrr_type;
          -         
          -             pat_type = prot & _PAGE_CACHE_MASK;
          -             prot &= (~_PAGE_CACHE_MASK);
          -         
          -             /*
          -              * We return the PAT request directly for types where PAT takes
          -              * precedence with respect to MTRR and for UC_MINUS.
          -              * Consistency checks with other PAT requests is done later
          -              * while going through memtype list.
          -              */
          -             if (pat_type == _PAGE_CACHE_WC) {
          -                     *ret_prot = prot | _PAGE_CACHE_WC;
          -                     return 0;
          -             } else if (pat_type == _PAGE_CACHE_UC_MINUS) {
          -                     *ret_prot = prot | _PAGE_CACHE_UC_MINUS;
          -                     return 0;
          -             } else if (pat_type == _PAGE_CACHE_UC) {
          -                     *ret_prot = prot | _PAGE_CACHE_UC;
          -                     return 0;
          -             }
          -         
                        /*
                         * Look for MTRR hint to get the effective type in case where PAT
                         * request is for WB.
                         */
          -             mtrr_type = mtrr_type_lookup(start, end);
          +             if (req_type == _PAGE_CACHE_WB) {
          +                     u8 mtrr_type;
          +         
          +                     mtrr_type = mtrr_type_lookup(start, end);
          +                     if (mtrr_type == MTRR_TYPE_UNCACHABLE)
          +                             return _PAGE_CACHE_UC;
          +                     if (mtrr_type == MTRR_TYPE_WRCOMB)
          +                             return _PAGE_CACHE_WC;
          +             }
                    
          -             if (mtrr_type == MTRR_TYPE_UNCACHABLE) {
          -                     *ret_prot = prot | _PAGE_CACHE_UC;
          -             } else if (mtrr_type == MTRR_TYPE_WRCOMB) {
          -                     *ret_prot = prot | _PAGE_CACHE_WC;
          -             } else {
          -                     *ret_prot = prot | _PAGE_CACHE_WB;
          +             return req_type;
          +         }
          +         
          +         static int chk_conflict(struct memtype *new, struct memtype *entry,
          +                             unsigned long *type)
          +         {
          +             if (new->type != entry->type) {
          +                     if (type) {
          +                             new->type = entry->type;
          +                             *type = entry->type;
          +                     } else
          +                             goto conflict;
                        }
                    
          +              /* check overlaps with more than one entry in the list */
          +             list_for_each_entry_continue(entry, &memtype_list, nd) {
          +                     if (new->end <= entry->start)
          +                             break;
          +                     else if (new->type != entry->type)
          +                             goto conflict;
          +             }
                        return 0;
          +         
          +          conflict:
          +             printk(KERN_INFO "%s:%d conflicting memory types "
          +                    "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start,
          +                    new->end, cattr_name(new->type), cattr_name(entry->type));
          +             return -EBUSY;
                    }
                    
                    /*
                     * req_type will have a special case value '-1', when requester want to inherit
                     * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
                     *
          -          * If ret_type is NULL, function will return an error if it cannot reserve the
          -          * region with req_type. If ret_type is non-null, function will return
          -          * available type in ret_type in case of no error. In case of any error
          +          * If new_type is NULL, function will return an error if it cannot reserve the
          +          * region with req_type. If new_type is non-NULL, function will return
          +          * available type in new_type in case of no error. In case of any error
                     * it will return a negative return value.
                     */
                    int reserve_memtype(u64 start, u64 end, unsigned long req_type,
          -                             unsigned long *ret_type)
          +                             unsigned long *new_type)
                    {
          -             struct memtype *new_entry = NULL;
          -             struct memtype *parse;
          +             struct memtype *new, *entry;
                        unsigned long actual_type;
          +             struct list_head *where;
                        int err = 0;
                    
          -             /* Only track when pat_wc_enabled */
          -             if (!pat_wc_enabled) {
          +             BUG_ON(start >= end); /* end is exclusive */
          +         
          +             if (!pat_enabled) {
                                /* This is identical to page table setting without PAT */
          -                     if (ret_type) {
          -                             if (req_type == -1) {
          -                                     *ret_type = _PAGE_CACHE_WB;
          -                             } else {
          -                                     *ret_type = req_type;
          -                             }
          +                     if (new_type) {
          +                             if (req_type == -1)
          +                                     *new_type = _PAGE_CACHE_WB;
          +                             else
          +                                     *new_type = req_type & _PAGE_CACHE_MASK;
                                }
                                return 0;
                        }
                    
                        /* Low ISA region is always mapped WB in page table. No need to track */
          -             if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) {
          -                     if (ret_type)
          -                             *ret_type = _PAGE_CACHE_WB;
          -         
          +             if (is_ISA_range(start, end - 1)) {
          +                     if (new_type)
          +                             *new_type = _PAGE_CACHE_WB;
                                return 0;
                        }
                    
                                 */
                                u8 mtrr_type = mtrr_type_lookup(start, end);
                    
          -                     if (mtrr_type == MTRR_TYPE_WRBACK) {
          -                             req_type = _PAGE_CACHE_WB;
          +                     if (mtrr_type == MTRR_TYPE_WRBACK)
                                        actual_type = _PAGE_CACHE_WB;
          -                     } else {
          -                             req_type = _PAGE_CACHE_UC_MINUS;
          +                     else
                                        actual_type = _PAGE_CACHE_UC_MINUS;
          -                     }
          -             } else {
          -                     req_type &= _PAGE_CACHE_MASK;
          -                     err = pat_x_mtrr_type(start, end, req_type, &actual_type);
          -             }
          -         
          -             if (err) {
          -                     if (ret_type)
          -                             *ret_type = actual_type;
          +             } else
          +                     actual_type = pat_x_mtrr_type(start, end,
          +                                                   req_type & _PAGE_CACHE_MASK);
                    
          -                     return -EINVAL;
          -             }
          -         
          -             new_entry  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
          -             if (!new_entry)
          +             new  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
          +             if (!new)
                                return -ENOMEM;
                    
          -             new_entry->start = start;
          -             new_entry->end = end;
          -             new_entry->type = actual_type;
          +             new->start = start;
          +             new->end = end;
          +             new->type = actual_type;
                    
          -             if (ret_type)
          -                     *ret_type = actual_type;
          +             if (new_type)
          +                     *new_type = actual_type;
                    
                        spin_lock(&memtype_lock);
                    
                        /* Search for existing mapping that overlaps the current range */
          -             list_for_each_entry(parse, &memtype_list, nd) {
          -                     struct memtype *saved_ptr;
          -         
          -                     if (parse->start >= end) {
          -                             pr_debug("New Entry\n");
          -                             list_add(&new_entry->nd, parse->nd.prev);
          -                             new_entry = NULL;
          +             where = NULL;
          +             list_for_each_entry(entry, &memtype_list, nd) {
          +                     if (end <= entry->start) {
          +                             where = entry->nd.prev;
                                        break;
          -                     }
          -         
          -                     if (start <= parse->start && end >= parse->start) {
          -                             if (actual_type != parse->type && ret_type) {
          -                                     actual_type = parse->type;
          -                                     *ret_type = actual_type;
          -                                     new_entry->type = actual_type;
          +                     } else if (start <= entry->start) { /* end > entry->start */
          +                             err = chk_conflict(new, entry, new_type);
          +                             if (!err) {
          +                                     dprintk("Overlap at 0x%Lx-0x%Lx\n",
          +                                             entry->start, entry->end);
          +                                     where = entry->nd.prev;
                                        }
          -         
          -                             if (actual_type != parse->type) {
          -                                     printk(
          -                     KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
          -                                             current->comm, current->pid,
          -                                             start, end,
          -                                             cattr_name(actual_type),
          -                                             cattr_name(parse->type));
          -                                     err = -EBUSY;
          -                                     break;
          -                             }
          -         
          -                             saved_ptr = parse;
          -                             /*
          -                              * Check to see whether the request overlaps more
          -                              * than one entry in the list
          -                              */
          -                             list_for_each_entry_continue(parse, &memtype_list, nd) {
          -                                     if (end <= parse->start) {
          -                                             break;
          -                                     }
          -         
          -                                     if (actual_type != parse->type) {
          -                                             printk(
          -                     KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
          -                                                     current->comm, current->pid,
          -                                                     start, end,
          -                                                     cattr_name(actual_type),
          -                                                     cattr_name(parse->type));
          -                                             err = -EBUSY;
          -                                             break;
          -                                     }
          -                             }
          -         
          -                             if (err) {
          -                                     break;
          -                             }
          -         
          -                             pr_debug("Overlap at 0x%Lx-0x%Lx\n",
          -                                    saved_ptr->start, saved_ptr->end);
          -                             /* No conflict. Go ahead and add this new entry */
          -                             list_add(&new_entry->nd, saved_ptr->nd.prev);
          -                             new_entry = NULL;
                                        break;
          -                     }
          -         
          -                     if (start < parse->end) {
          -                             if (actual_type != parse->type && ret_type) {
          -                                     actual_type = parse->type;
          -                                     *ret_type = actual_type;
          -                                     new_entry->type = actual_type;
          -                             }
          -         
          -                             if (actual_type != parse->type) {
          -                                     printk(
          -                     KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
          -                                             current->comm, current->pid,
          -                                             start, end,
          -                                             cattr_name(actual_type),
          -                                             cattr_name(parse->type));
          -                                     err = -EBUSY;
          -                                     break;
          -                             }
          -         
          -                             saved_ptr = parse;
          -                             /*
          -                              * Check to see whether the request overlaps more
          -                              * than one entry in the list
          -                              */
          -                             list_for_each_entry_continue(parse, &memtype_list, nd) {
          -                                     if (end <= parse->start) {
          -                                             break;
          -                                     }
          -         
          -                                     if (actual_type != parse->type) {
          -                                             printk(
          -                     KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
          -                                                     current->comm, current->pid,
          -                                                     start, end,
          -                                                     cattr_name(actual_type),
          -                                                     cattr_name(parse->type));
          -                                             err = -EBUSY;
          -                                             break;
          -                                     }
          +                     } else if (start < entry->end) { /* start > entry->start */
          +                             err = chk_conflict(new, entry, new_type);
          +                             if (!err) {
          +                                     dprintk("Overlap at 0x%Lx-0x%Lx\n",
          +                                             entry->start, entry->end);
          +                                     where = &entry->nd;
                                        }
          -         
          -                             if (err) {
          -                                     break;
          -                             }
          -         
          -                             pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
          -                                      saved_ptr->start, saved_ptr->end);
          -                             /* No conflict. Go ahead and add this new entry */
          -                             list_add(&new_entry->nd, &saved_ptr->nd);
          -                             new_entry = NULL;
                                        break;
                                }
                        }
                    
                        if (err) {
          -                     printk(KERN_INFO
          -             "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
          -                             start, end, cattr_name(new_entry->type),
          -                             cattr_name(req_type));
          -                     kfree(new_entry);
          +                     printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, "
          +                            "track %s, req %s\n",
          +                            start, end, cattr_name(new->type), cattr_name(req_type));
          +                     kfree(new);
                                spin_unlock(&memtype_lock);
                                return err;
                        }
                    
          -             if (new_entry) {
          -                     /* No conflict. Not yet added to the list. Add to the tail */
          -                     list_add_tail(&new_entry->nd, &memtype_list);
          -                     pr_debug("New Entry\n");
          -             }
          -         
          -             if (ret_type) {
          -                     pr_debug(
          -             "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
          -                             start, end, cattr_name(actual_type),
          -                             cattr_name(req_type), cattr_name(*ret_type));
          -             } else {
          -                     pr_debug(
          -             "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
          -                             start, end, cattr_name(actual_type),
          -                             cattr_name(req_type));
          -             }
          +             if (where)
          +                     list_add(&new->nd, where);
          +             else
          +                     list_add_tail(&new->nd, &memtype_list);
                    
                        spin_unlock(&memtype_lock);
          +         
          +             dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
          +                     start, end, cattr_name(new->type), cattr_name(req_type),
          +                     new_type ? cattr_name(*new_type) : "-");
          +         
                        return err;
                    }
                    
                    int free_memtype(u64 start, u64 end)
                    {
          -             struct memtype *ml;
          +             struct memtype *entry;
                        int err = -EINVAL;
                    
          -             /* Only track when pat_wc_enabled */
          -             if (!pat_wc_enabled) {
          +             if (!pat_enabled)
                                return 0;
          -             }
                    
                        /* Low ISA region is always mapped WB. No need to track */
          -             if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) {
          +             if (is_ISA_range(start, end - 1))
                                return 0;
          -             }
                    
                        spin_lock(&memtype_lock);
          -             list_for_each_entry(ml, &memtype_list, nd) {
          -                     if (ml->start == start && ml->end == end) {
          -                             list_del(&ml->nd);
          -                             kfree(ml);
          +             list_for_each_entry(entry, &memtype_list, nd) {
          +                     if (entry->start == start && entry->end == end) {
          +                             list_del(&entry->nd);
          +                             kfree(entry);
                                        err = 0;
                                        break;
                                }
                                        current->comm, current->pid, start, end);
                        }
                    
          -             pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end);
          +             dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end);
                        return err;
                    }
                    
@@@@@@@@@@@@@@@@@@@@@ -373,8 -373,8 -373,8 -373,8 -373,8 -373,8 -373,8 -373,8 -373,8 -373,8 -471,8 -373,8 -375,8 -373,8 -373,8 -373,8 -373,8 -373,8 -373,8 -373,8 +375,8 @@@@@@@@@@@@@@@@@@@@@ pgprot_t phys_mem_access_prot(struct fi
                        return vma_prot;
                    }
                    
 -------------------#ifdef CONFIG_NONPROMISC_DEVMEM
 -------------------/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/
 +++++++++++++++++++#ifdef CONFIG_STRICT_DEVMEM
 +++++++++++++++++++/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/
                    static inline int range_is_allowed(unsigned long pfn, unsigned long size)
                    {
                        return 1;
@@@@@@@@@@@@@@@@@@@@@ -398,7 -398,7 -398,7 -398,7 -398,7 -398,7 -398,7 -398,7 -398,7 -398,7 -496,7 -398,7 -400,7 -398,7 -398,7 -398,7 -398,7 -398,7 -398,7 -398,7 +400,7 @@@@@@@@@@@@@@@@@@@@@ static inline int range_is_allowed(unsi
                        }
                        return 1;
                    }
 -------------------#endif /* CONFIG_NONPROMISC_DEVMEM */
 +++++++++++++++++++#endif /* CONFIG_STRICT_DEVMEM */
                    
                    int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
                                                unsigned long size, pgprot_t *vma_prot)
                         * caching for the high addresses through the KEN pin, but
                         * we maintain the tradition of paranoia in this code.
                         */
          -             if (!pat_wc_enabled &&
          -                 ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
          -                     test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
          -                     test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
          -                     test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) &&
          -                (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
          +             if (!pat_enabled &&
          +                 !(boot_cpu_has(X86_FEATURE_MTRR) ||
          +                   boot_cpu_has(X86_FEATURE_K6_MTRR) ||
          +                   boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
          +                   boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
          +                 (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
                                flags = _PAGE_CACHE_UC;
                        }
                    #endif
                        if (retval < 0)
                                return 0;
                    
       -        -       if (((pfn <= max_low_pfn_mapped) ||
       -        -            (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn <= max_pfn_mapped)) &&
          -             if (pfn <= max_pfn_mapped &&
          -                     ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
       +  +     +       if (((pfn < max_low_pfn_mapped) ||
       +  +     +            (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) &&
          +                 ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
                                free_memtype(offset, offset + size);
                                printk(KERN_INFO
                                "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
@@@@@@@@@@@@@@@@@@@@@ -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -586,4 -489,3 -491,89 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 +491,89 @@@@@@@@@@@@@@@@@@@@@ void unmap_devmem(unsigned long pfn, un
                    
                        free_memtype(addr, addr + size);
                    }
++++++++++ + +++++++
++++++++++++ +++++++#if defined(CONFIG_DEBUG_FS)
++++++++++++ +++++++
++++++++++++ +++++++/* get Nth element of the linked list */
++++++++++++ +++++++static struct memtype *memtype_get_idx(loff_t pos)
++++++++++++ +++++++{
++++++++++++ +++++++    struct memtype *list_node, *print_entry;
++++++++++++ +++++++    int i = 1;
++++++++++++ +++++++
++++++++++++ +++++++    print_entry  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
++++++++++++ +++++++    if (!print_entry)
++++++++++++ +++++++            return NULL;
++++++++++++ +++++++
++++++++++++ +++++++    spin_lock(&memtype_lock);
++++++++++++ +++++++    list_for_each_entry(list_node, &memtype_list, nd) {
++++++++++++ +++++++            if (pos == i) {
++++++++++++ +++++++                    *print_entry = *list_node;
++++++++++++ +++++++                    spin_unlock(&memtype_lock);
++++++++++++ +++++++                    return print_entry;
++++++++++++ +++++++            }
++++++++++++ +++++++            ++i;
++++++++++++ +++++++    }
++++++++++++ +++++++    spin_unlock(&memtype_lock);
++++++++++++ +++++++    kfree(print_entry);
++++++++++++ +++++++    return NULL;
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
++++++++++++ +++++++{
++++++++++++ +++++++    if (*pos == 0) {
++++++++++++ +++++++            ++*pos;
++++++++++++ +++++++            seq_printf(seq, "PAT memtype list:\n");
++++++++++++ +++++++    }
++++++++++++ +++++++
++++++++++++ +++++++    return memtype_get_idx(*pos);
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++++++++++++ +++++++{
++++++++++++ +++++++    ++*pos;
++++++++++++ +++++++    return memtype_get_idx(*pos);
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static void memtype_seq_stop(struct seq_file *seq, void *v)
++++++++++++ +++++++{
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static int memtype_seq_show(struct seq_file *seq, void *v)
++++++++++++ +++++++{
++++++++++++ +++++++    struct memtype *print_entry = (struct memtype *)v;
++++++++++++ +++++++
++++++++++++ +++++++    seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
++++++++++++ +++++++                    print_entry->start, print_entry->end);
++++++++++++ +++++++    kfree(print_entry);
++++++++++++ +++++++    return 0;
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static struct seq_operations memtype_seq_ops = {
++++++++++++ +++++++    .start = memtype_seq_start,
++++++++++++ +++++++    .next  = memtype_seq_next,
++++++++++++ +++++++    .stop  = memtype_seq_stop,
++++++++++++ +++++++    .show  = memtype_seq_show,
++++++++++++ +++++++};
++++++++++++ +++++++
++++++++++++ +++++++static int memtype_seq_open(struct inode *inode, struct file *file)
++++++++++++ +++++++{
++++++++++++ +++++++    return seq_open(file, &memtype_seq_ops);
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static const struct file_operations memtype_fops = {
++++++++++++ +++++++    .open    = memtype_seq_open,
++++++++++++ +++++++    .read    = seq_read,
++++++++++++ +++++++    .llseek  = seq_lseek,
++++++++++++ +++++++    .release = seq_release,
++++++++++++ +++++++};
++++++++++++ +++++++
++++++++++++ +++++++static int __init pat_memtype_list_init(void)
++++++++++++ +++++++{
++++++++++++ +++++++    debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir,
++++++++++++ +++++++                            NULL, &memtype_fops);
++++++++++++ +++++++    return 0;
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++late_initcall(pat_memtype_list_init);
++++++++++++ +++++++
++++++++++++ +++++++#endif /* CONFIG_DEBUG_FS */
diff --combined arch/x86/pci/pci.h
index 3e25deb821ac9152feb484db2414565c541f18de,3e25deb821ac9152feb484db2414565c541f18de,3e25deb821ac9152feb484db2414565c541f18de,3e25deb821ac9152feb484db2414565c541f18de,3e25deb821ac9152feb484db2414565c541f18de,3e25deb821ac9152feb484db2414565c541f18de,b2270a55b0cf48ed934913d48a6c297cee38e1c8,58241748470f0e673d73b37444e670bff5f8383e,b2270a55b0cf48ed934913d48a6c297cee38e1c8,3e25deb821ac9152feb484db2414565c541f18de,720c4c55453462d78770d03506c26ae45d6d2edd,3e25deb821ac9152feb484db2414565c541f18de,b2270a55b0cf48ed934913d48a6c297cee38e1c8,b2270a55b0cf48ed934913d48a6c297cee38e1c8,3e25deb821ac9152feb484db2414565c541f18de,3e25deb821ac9152feb484db2414565c541f18de,a2c55ee98aff877b51057831ce6053e7c4b7bb4d,3e25deb821ac9152feb484db2414565c541f18de,3e25deb821ac9152feb484db2414565c541f18de,3e25deb821ac9152feb484db2414565c541f18de..15b9cf6be729c0c7cddee3ff54d1ce9809f32d58
                    #define PCI_CAN_SKIP_ISA_ALIGN      0x8000
                    #define PCI_USE__CRS                0x10000
                    #define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000
          +         #define PCI_HAS_IO_ECS              0x40000
      +++ + ++  +   #define PCI_NOASSIGN_ROMS   0x80000
                    
                    extern unsigned int pci_probe;
                    extern unsigned long pirq_table_addr;
@@@@@@@@@@@@@@@@@@@@@ -40,6 -40,6 -40,6 -40,6 -40,6 -40,6 -39,6 -39,6 -39,6 -40,6 -38,9 -40,6 -39,6 -39,6 -40,6 -40,6 -39,6 -40,6 -40,6 -40,6 +40,6 @@@@@@@@@@@@@@@@@@@@@ enum pci_bf_sort_state 
                        pci_dmi_bf,
                    };
                    
          -         extern void __init dmi_check_pciprobe(void);
          -         extern void __init dmi_check_skip_isa_align(void);
          -         
                    /* pci-i386.c */
                    
                    extern unsigned int pcibios_max_latency;
@@@@@@@@@@@@@@@@@@@@@ -97,19 -97,19 -97,19 -97,19 -97,19 -97,19 -96,19 -96,18 -96,19 -97,19 -98,10 -97,19 -96,19 -96,19 -97,19 -97,19 -96,20 -97,19 -97,19 -97,19 +97,20 @@@@@@@@@@@@@@@@@@@@@ extern struct pci_raw_ops *raw_pci_ext_
                    
                    extern struct pci_raw_ops pci_direct_conf1;
                    
          +         /* arch_initcall level */
                    extern int pci_direct_probe(void);
                    extern void pci_direct_init(int type);
                    extern void pci_pcbios_init(void);
                    extern int pci_olpc_init(void);
------- -- ----- ---extern int __init pci_numa_init(void);
          +         extern void __init dmi_check_pciprobe(void);
          +         extern void __init dmi_check_skip_isa_align(void);
          +         
          +         /* some common used subsys_initcalls */
          +         extern int __init pci_acpi_init(void);
          +         extern int __init pcibios_irq_init(void);
++++++++++++++++ +++extern int __init pci_visws_init(void);
++++++++++++++++ +++extern int __init pci_numaq_init(void);
          +         extern int __init pcibios_init(void);
                    
                    /* pci-mmconfig.c */
                    
diff --combined arch/x86/xen/enlighten.c
index bb508456ef523e1fa50f77a2993bf481b06f03f0,bb508456ef523e1fa50f77a2993bf481b06f03f0,7f26c37187777ebd55fe3b6fb42d56499b5a6bbe,bb508456ef523e1fa50f77a2993bf481b06f03f0,bb508456ef523e1fa50f77a2993bf481b06f03f0,bb508456ef523e1fa50f77a2993bf481b06f03f0,dcd4e51f2f16829e2e9519615606bc93e4a4cd1e,dcd4e51f2f16829e2e9519615606bc93e4a4cd1e,bb508456ef523e1fa50f77a2993bf481b06f03f0,bb508456ef523e1fa50f77a2993bf481b06f03f0,f09c1c69c37a1498da07524c477e98bfc397ebe6,bb508456ef523e1fa50f77a2993bf481b06f03f0,dcd4e51f2f16829e2e9519615606bc93e4a4cd1e,bb508456ef523e1fa50f77a2993bf481b06f03f0,bb508456ef523e1fa50f77a2993bf481b06f03f0,bb508456ef523e1fa50f77a2993bf481b06f03f0,dcd4e51f2f16829e2e9519615606bc93e4a4cd1e,bb508456ef523e1fa50f77a2993bf481b06f03f0,5328e46d9cf739e89ade6fc9c3cf5b18bf141066,3da6acb7eafcef8679f13d35dc25b16f3ccb6f4c..194bbd6e32410dd4c00c1aa360bca80b312d41b1
                    #include <xen/interface/sched.h>
                    #include <xen/features.h>
                    #include <xen/page.h>
+++++++++++++++++++ #include <xen/hvc-console.h>
                    
                    #include <asm/paravirt.h>
                    #include <asm/page.h>
                    #include <asm/xen/hypervisor.h>
                    #include <asm/fixmap.h>
                    #include <asm/processor.h>
+++++++++++++++++++ #include <asm/msr-index.h>
                    #include <asm/setup.h>
                    #include <asm/desc.h>
                    #include <asm/pgtable.h>
                    #include <asm/tlbflush.h>
                    #include <asm/reboot.h>
---------- -------- #include <asm/pgalloc.h>
                    
                    #include "xen-ops.h"
                    #include "mmu.h"
@@@@@@@@@@@@@@@@@@@@@ -56,6 -56,6 -56,6 -56,6 -56,6 -56,6 -56,6 -56,6 -56,6 -56,6 -55,6 -56,6 -56,6 -56,6 -56,6 -56,6 -56,6 -56,6 -56,6 -57,18 +57,18 @@@@@@@@@@@@@@@@@@@@@ EXPORT_SYMBOL_GPL(hypercall_page)
                    DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
                    DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
                    
+++++++++++++++++++ /*
+++++++++++++++++++  * Identity map, in addition to plain kernel map.  This needs to be
+++++++++++++++++++  * large enough to allocate page table pages to allocate the rest.
+++++++++++++++++++  * Each page can map 2MB.
+++++++++++++++++++  */
+++++++++++++++++++ static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
+++++++++++++++++++ 
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++ /* l3 pud for userspace vsyscall mapping */
+++++++++++++++++++ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
+++++++++++++++++++ #endif /* CONFIG_X86_64 */
+++++++++++++++++++ 
                    /*
                     * Note about cr3 (pagetable base) values:
                     *
@@@@@@@@@@@@@@@@@@@@@ -76,13 -76,13 -76,13 -76,13 -76,13 -76,13 -76,13 -76,13 -76,13 -76,13 -75,13 -76,13 -76,13 -76,13 -76,13 -76,13 -76,13 -76,13 -76,13 -89,13 +89,13 @@@@@@@@@@@@@@@@@@@@@ DEFINE_PER_CPU(unsigned long, xen_curre
                    struct start_info *xen_start_info;
                    EXPORT_SYMBOL_GPL(xen_start_info);
                    
          -         static /* __initdata */ struct shared_info dummy_shared_info;
          +         struct shared_info xen_dummy_shared_info;
                    
                    /*
                     * Point at some empty memory to start with. We map the real shared_info
                     * page as soon as fixmap is up and running.
                     */
          -         struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
          +         struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
                    
                    /*
                     * Flag to determine whether vcpu info placement is available on all
                     */
                    static int have_vcpu_info_placement = 1;
                    
          -         static void __init xen_vcpu_setup(int cpu)
          +         static void xen_vcpu_setup(int cpu)
                    {
                        struct vcpu_register_vcpu_info info;
                        int err;
                        struct vcpu_info *vcpup;
                    
          -             BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info);
          +             BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
                        per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
                    
                        if (!have_vcpu_info_placement)
                        }
                    }
                    
          +         /*
          +          * On restore, set the vcpu placement up again.
          +          * If it fails, then we're in a bad state, since
          +          * we can't back out from using it...
          +          */
          +         void xen_vcpu_restore(void)
          +         {
          +             if (have_vcpu_info_placement) {
          +                     int cpu;
          +         
          +                     for_each_online_cpu(cpu) {
          +                             bool other_cpu = (cpu != smp_processor_id());
          +         
          +                             if (other_cpu &&
          +                                 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
          +                                     BUG();
          +         
          +                             xen_vcpu_setup(cpu);
          +         
          +                             if (other_cpu &&
          +                                 HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
          +                                     BUG();
          +                     }
          +         
          +                     BUG_ON(!have_vcpu_info_placement);
          +             }
          +         }
          +         
                    static void __init xen_banner(void)
                    {
++++++++++++++++++ +    unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
++++++++++++++++++ +    struct xen_extraversion extra;
++++++++++++++++++ +    HYPERVISOR_xen_version(XENVER_extraversion, &extra);
++++++++++++++++++ +
                        printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
                               pv_info.name);
---------- ------- -    printk(KERN_INFO "Hypervisor signature: %s%s\n",
---------- ------- -           xen_start_info->magic,
          -             printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
++++++++++++++++++ +    printk(KERN_INFO "Xen version: %d.%d%s%s\n",
++++++++++++++++++ +           version >> 16, version & 0xffff, extra.extraversion,
          +                    xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
                    }
                    
                    static void xen_cpuid(unsigned int *ax, unsigned int *bx,
@@@@@@@@@@@@@@@@@@@@@ -266,13 -266,13 -266,13 -266,13 -266,13 -266,13 -266,13 -266,13 -266,13 -266,13 -235,13 -266,13 -266,13 -266,13 -266,13 -266,13 -266,13 -266,13 -270,13 -279,13 +283,13 @@@@@@@@@@@@@@@@@@@@@ static void xen_irq_enable(void
                    {
                        struct vcpu_info *vcpu;
                    
          -             /* There's a one instruction preempt window here.  We need to
          -                make sure we're don't switch CPUs between getting the vcpu
          -                pointer and updating the mask. */
          -             preempt_disable();
          +             /* We don't need to worry about being preempted here, since
          +                either a) interrupts are disabled, so no preemption, or b)
          +                the caller is confused and is trying to re-enable interrupts
          +                on an indeterminate processor. */
          +         
                        vcpu = x86_read_percpu(xen_vcpu);
                        vcpu->evtchn_upcall_mask = 0;
          -             preempt_enable_no_resched();
                    
                        /* Doesn't matter if we get preempted here, because any
                           pending event will get dealt with anyway. */
                    static void xen_safe_halt(void)
                    {
                        /* Blocking includes an implicit local_irq_enable(). */
          -             if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0)
          +             if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
                                BUG();
                    }
                    
@@@@@@@@@@@@@@@@@@@@@ -363,14 -363,14 -363,14 -363,14 -363,14 -363,14 -363,14 -363,14 -363,14 -363,14 -332,14 -363,14 -363,14 -363,14 -363,14 -363,14 -363,14 -363,14 -367,14 -376,6 +380,6 @@@@@@@@@@@@@@@@@@@@@ static void load_TLS_descriptor(struct 
                    
                    static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
                    {
-------------------     xen_mc_batch();
------------------- 
-------------------     load_TLS_descriptor(t, cpu, 0);
-------------------     load_TLS_descriptor(t, cpu, 1);
-------------------     load_TLS_descriptor(t, cpu, 2);
------------------- 
-------------------     xen_mc_issue(PARAVIRT_LAZY_CPU);
------------------- 
                        /*
                         * XXX sleazy hack: If we're being called in a lazy-cpu zone,
                         * it means we're in a context switch, and %gs has just been
                         * Either way, it has been saved, and the new value will get
                         * loaded properly.  This will go away as soon as Xen has been
                         * modified to not save/restore %gs for normal hypercalls.
+++++++++++++++++++      *
+++++++++++++++++++      * On x86_64, this hack is not used for %gs, because gs points
+++++++++++++++++++      * to KERNEL_GS_BASE (and uses it for PDA references), so we
+++++++++++++++++++      * must not zero %gs on x86_64
+++++++++++++++++++      *
+++++++++++++++++++      * For x86_64, we need to zero %fs, otherwise we may get an
+++++++++++++++++++      * exception between the new %fs descriptor being loaded and
+++++++++++++++++++      * %fs being effectively cleared at __switch_to().
                         */
-------------------     if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
+++++++++++++++++++     if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
+++++++++++++++++++ #ifdef CONFIG_X86_32
                                loadsegment(gs, 0);
+++++++++++++++++++ #else
+++++++++++++++++++             loadsegment(fs, 0);
+++++++++++++++++++ #endif
+++++++++++++++++++     }
+++++++++++++++++++ 
+++++++++++++++++++     xen_mc_batch();
+++++++++++++++++++ 
+++++++++++++++++++     load_TLS_descriptor(t, cpu, 0);
+++++++++++++++++++     load_TLS_descriptor(t, cpu, 1);
+++++++++++++++++++     load_TLS_descriptor(t, cpu, 2);
+++++++++++++++++++ 
+++++++++++++++++++     xen_mc_issue(PARAVIRT_LAZY_CPU);
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++ static void xen_load_gs_index(unsigned int idx)
+++++++++++++++++++ {
+++++++++++++++++++     if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
+++++++++++++++++++             BUG();
                    }
+++++++++++++++++++ #endif
                    
                    static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
                                                const void *ptr)
                        preempt_enable();
                    }
                    
------------------- static int cvt_gate_to_trap(int vector, u32 low, u32 high,
+++++++++++++++++++ static int cvt_gate_to_trap(int vector, const gate_desc *val,
                                            struct trap_info *info)
                    {
-------------------     u8 type, dpl;
------------------- 
-------------------     type = (high >> 8) & 0x1f;
-------------------     dpl = (high >> 13) & 3;
------------------- 
-------------------     if (type != 0xf && type != 0xe)
+++++++++++++++++++     if (val->type != 0xf && val->type != 0xe)
                                return 0;
                    
                        info->vector = vector;
-------------------     info->address = (high & 0xffff0000) | (low & 0x0000ffff);
-------------------     info->cs = low >> 16;
-------------------     info->flags = dpl;
+++++++++++++++++++     info->address = gate_offset(*val);
+++++++++++++++++++     info->cs = gate_segment(*val);
+++++++++++++++++++     info->flags = val->dpl;
                        /* interrupt gates clear IF */
-------------------     if (type == 0xe)
+++++++++++++++++++     if (val->type == 0xe)
                                info->flags |= 4;
                    
                        return 1;
@@@@@@@@@@@@@@@@@@@@@ -443,11 -443,11 -443,11 -443,11 -443,11 -443,11 -443,11 -443,11 -443,11 -443,11 -412,11 -443,11 -443,11 -443,11 -443,11 -443,11 -443,11 -443,11 -447,11 -472,10 +476,10 @@@@@@@@@@@@@@@@@@@@@ static void xen_write_idt_entry(gate_de
                    
                        if (p >= start && (p + 8) <= end) {
                                struct trap_info info[2];
-------------------             u32 *desc = (u32 *)g;
                    
                                info[1].address = 0;
                    
-------------------             if (cvt_gate_to_trap(entrynum, desc[0], desc[1], &info[0]))
+++++++++++++++++++             if (cvt_gate_to_trap(entrynum, g, &info[0]))
                                        if (HYPERVISOR_set_trap_table(info))
                                                BUG();
                        }
@@@@@@@@@@@@@@@@@@@@@ -460,13 -460,13 -460,13 -460,13 -460,13 -460,13 -460,13 -460,13 -460,13 -460,13 -429,13 -460,13 -460,13 -460,13 -460,13 -460,13 -460,13 -460,13 -464,13 -488,13 +492,13 @@@@@@@@@@@@@@@@@@@@@ static void xen_convert_trap_info(cons
                    {
                        unsigned in, out, count;
                    
-------------------     count = (desc->size+1) / 8;
+++++++++++++++++++     count = (desc->size+1) / sizeof(gate_desc);
                        BUG_ON(count > 256);
                    
                        for (in = out = 0; in < count; in++) {
-------------------             const u32 *entry = (u32 *)(desc->address + in * 8);
+++++++++++++++++++             gate_desc *entry = (gate_desc*)(desc->address) + in;
                    
-------------------             if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out]))
+++++++++++++++++++             if (cvt_gate_to_trap(in, entry, &traps[out]))
                                        out++;
                        }
                        traps[out].address = 0;
@@@@@@@@@@@@@@@@@@@@@ -638,30 -638,30 -638,30 -638,30 -638,30 -638,30 -638,30 -638,30 -638,30 -638,30 -607,6 -638,30 -638,30 -638,30 -638,30 -638,30 -638,30 -638,30 -642,30 -666,30 +670,30 @@@@@@@@@@@@@@@@@@@@@ static void xen_flush_tlb_others(const 
                        xen_mc_issue(PARAVIRT_LAZY_MMU);
                    }
                    
          +         static void xen_clts(void)
          +         {
          +             struct multicall_space mcs;
          +         
          +             mcs = xen_mc_entry(0);
          +         
          +             MULTI_fpu_taskswitch(mcs.mc, 0);
          +         
          +             xen_mc_issue(PARAVIRT_LAZY_CPU);
          +         }
          +         
          +         static void xen_write_cr0(unsigned long cr0)
          +         {
          +             struct multicall_space mcs;
          +         
          +             /* Only pay attention to cr0.TS; everything else is
          +                ignored. */
          +             mcs = xen_mc_entry(0);
          +         
          +             MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
          +         
          +             xen_mc_issue(PARAVIRT_LAZY_CPU);
          +         }
          +         
                    static void xen_write_cr2(unsigned long cr2)
                    {
                        x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
@@@@@@@@@@@@@@@@@@@@@ -679,10 -679,10 -679,10 -679,10 -679,10 -679,10 -679,10 -679,10 -679,10 -679,10 -624,8 -679,10 -679,10 -679,10 -679,10 -679,10 -679,10 -679,10 -683,10 -707,10 +711,10 @@@@@@@@@@@@@@@@@@@@@ static unsigned long xen_read_cr2_direc
                    
                    static void xen_write_cr4(unsigned long cr4)
                    {
          -             /* Just ignore cr4 changes; Xen doesn't allow us to do
          -                anything anyway. */
          +             cr4 &= ~X86_CR4_PGE;
          +             cr4 &= ~X86_CR4_PSE;
          +         
          +             native_write_cr4(cr4);
                    }
                    
                    static unsigned long xen_read_cr3(void)
@@@@@@@@@@@@@@@@@@@@@ -695,33 -695,33 -695,33 -695,33 -695,33 -695,33 -695,33 -695,33 -695,33 -695,33 -638,33 -695,33 -695,33 -695,33 -695,33 -695,33 -695,33 -695,33 -699,33 -723,89 +727,89 @@@@@@@@@@@@@@@@@@@@@ static void set_current_cr3(void *v
                        x86_write_percpu(xen_current_cr3, (unsigned long)v);
                    }
                    
------------------- static void xen_write_cr3(unsigned long cr3)
+++++++++++++++++++ static void __xen_write_cr3(bool kernel, unsigned long cr3)
                    {
                        struct mmuext_op *op;
                        struct multicall_space mcs;
-------------------     unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
+++++++++++++++++++     unsigned long mfn;
                    
-------------------     BUG_ON(preemptible());
+++++++++++++++++++     if (cr3)
+++++++++++++++++++             mfn = pfn_to_mfn(PFN_DOWN(cr3));
+++++++++++++++++++     else
+++++++++++++++++++             mfn = 0;
                    
-------------------     mcs = xen_mc_entry(sizeof(*op));  /* disables interrupts */
+++++++++++++++++++     WARN_ON(mfn == 0 && kernel);
                    
-------------------     /* Update while interrupts are disabled, so its atomic with
-------------------        respect to ipis */
-------------------     x86_write_percpu(xen_cr3, cr3);
+++++++++++++++++++     mcs = __xen_mc_entry(sizeof(*op));
                    
                        op = mcs.args;
-------------------     op->cmd = MMUEXT_NEW_BASEPTR;
+++++++++++++++++++     op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
                        op->arg1.mfn = mfn;
                    
                        MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
                    
-------------------     /* Update xen_update_cr3 once the batch has actually
-------------------        been submitted. */
-------------------     xen_mc_callback(set_current_cr3, (void *)cr3);
+++++++++++++++++++     if (kernel) {
+++++++++++++++++++             x86_write_percpu(xen_cr3, cr3);
+++++++++++++++++++ 
+++++++++++++++++++             /* Update xen_current_cr3 once the batch has actually
+++++++++++++++++++                been submitted. */
+++++++++++++++++++             xen_mc_callback(set_current_cr3, (void *)cr3);
+++++++++++++++++++     }
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ static void xen_write_cr3(unsigned long cr3)
+++++++++++++++++++ {
+++++++++++++++++++     BUG_ON(preemptible());
+++++++++++++++++++ 
+++++++++++++++++++     xen_mc_batch();  /* disables interrupts */
+++++++++++++++++++ 
+++++++++++++++++++     /* Update while interrupts are disabled, so its atomic with
+++++++++++++++++++        respect to ipis */
+++++++++++++++++++     x86_write_percpu(xen_cr3, cr3);
+++++++++++++++++++ 
+++++++++++++++++++     __xen_write_cr3(true, cr3);
+++++++++++++++++++ 
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     {
+++++++++++++++++++             pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
+++++++++++++++++++             if (user_pgd)
+++++++++++++++++++                     __xen_write_cr3(false, __pa(user_pgd));
+++++++++++++++++++             else
+++++++++++++++++++                     __xen_write_cr3(false, 0);
+++++++++++++++++++     }
+++++++++++++++++++ #endif
                    
                        xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
                    }
                    
+++++++++++++++++++ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
+++++++++++++++++++ {
+++++++++++++++++++     int ret;
+++++++++++++++++++ 
+++++++++++++++++++     ret = 0;
+++++++++++++++++++ 
+++++++++++++++++++     switch(msr) {
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++             unsigned which;
+++++++++++++++++++             u64 base;
+++++++++++++++++++ 
+++++++++++++++++++     case MSR_FS_BASE:               which = SEGBASE_FS; goto set;
+++++++++++++++++++     case MSR_KERNEL_GS_BASE:        which = SEGBASE_GS_USER; goto set;
+++++++++++++++++++     case MSR_GS_BASE:               which = SEGBASE_GS_KERNEL; goto set;
+++++++++++++++++++ 
+++++++++++++++++++     set:
+++++++++++++++++++             base = ((u64)high << 32) | low;
+++++++++++++++++++             if (HYPERVISOR_set_segment_base(which, base) != 0)
+++++++++++++++++++                     ret = -EFAULT;
+++++++++++++++++++             break;
+++++++++++++++++++ #endif
+++++++++++++++++++     default:
+++++++++++++++++++             ret = native_write_msr_safe(msr, low, high);
+++++++++++++++++++     }
+++++++++++++++++++ 
+++++++++++++++++++     return ret;
+++++++++++++++++++ }
+++++++++++++++++++ 
                    /* Early in boot, while setting up the initial pagetable, assume
                       everything is pinned. */
                    static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
@@@@@@@@@@@@@@@@@@@@@ -778,6 -778,6 -778,6 -778,6 -778,6 -778,6 -778,6 -778,6 -778,6 -778,6 -721,6 -778,6 -778,6 -778,6 -778,6 -778,6 -778,6 -778,6 -782,6 -862,48 +866,48 @@@@@@@@@@@@@@@@@@@@@ static void xen_alloc_pmd(struct mm_str
                        xen_alloc_ptpage(mm, pfn, PT_PMD);
                    }
                    
+++++++++++++++++++ static int xen_pgd_alloc(struct mm_struct *mm)
+++++++++++++++++++ {
+++++++++++++++++++     pgd_t *pgd = mm->pgd;
+++++++++++++++++++     int ret = 0;
+++++++++++++++++++ 
+++++++++++++++++++     BUG_ON(PagePinned(virt_to_page(pgd)));
+++++++++++++++++++ 
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     {
+++++++++++++++++++             struct page *page = virt_to_page(pgd);
+++++++++++++++++++             pgd_t *user_pgd;
+++++++++++++++++++ 
+++++++++++++++++++             BUG_ON(page->private != 0);
+++++++++++++++++++ 
+++++++++++++++++++             ret = -ENOMEM;
+++++++++++++++++++ 
+++++++++++++++++++             user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+++++++++++++++++++             page->private = (unsigned long)user_pgd;
+++++++++++++++++++ 
+++++++++++++++++++             if (user_pgd != NULL) {
+++++++++++++++++++                     user_pgd[pgd_index(VSYSCALL_START)] =
+++++++++++++++++++                             __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
+++++++++++++++++++                     ret = 0;
+++++++++++++++++++             }
+++++++++++++++++++ 
+++++++++++++++++++             BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
+++++++++++++++++++     }
+++++++++++++++++++ #endif
+++++++++++++++++++ 
+++++++++++++++++++     return ret;
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
+++++++++++++++++++ {
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     pgd_t *user_pgd = xen_get_user_pgd(pgd);
+++++++++++++++++++ 
+++++++++++++++++++     if (user_pgd)
+++++++++++++++++++             free_page((unsigned long)user_pgd);
+++++++++++++++++++ #endif
+++++++++++++++++++ }
+++++++++++++++++++ 
                    /* This should never happen until we're OK to use struct page */
                    static void xen_release_ptpage(u32 pfn, unsigned level)
                    {
@@@@@@@@@@@@@@@@@@@@@ -803,6 -803,6 -803,6 -803,6 -803,6 -803,6 -803,6 -803,6 -803,6 -803,6 -746,6 -803,6 -803,6 -803,6 -803,6 -803,6 -803,6 -803,6 -807,6 -929,18 +933,18 @@@@@@@@@@@@@@@@@@@@@ static void xen_release_pmd(u32 pfn
                        xen_release_ptpage(pfn, PT_PMD);
                    }
                    
+++++++++++++++++++ #if PAGETABLE_LEVELS == 4
+++++++++++++++++++ static void xen_alloc_pud(struct mm_struct *mm, u32 pfn)
+++++++++++++++++++ {
+++++++++++++++++++     xen_alloc_ptpage(mm, pfn, PT_PUD);
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ static void xen_release_pud(u32 pfn)
+++++++++++++++++++ {
+++++++++++++++++++     xen_release_ptpage(pfn, PT_PUD);
+++++++++++++++++++ }
+++++++++++++++++++ #endif
+++++++++++++++++++ 
                    #ifdef CONFIG_HIGHPTE
                    static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
                    {
@@@@@@@@@@@@@@@@@@@@@ -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -784,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -845,68 -979,16 +983,16 @@@@@@@@@@@@@@@@@@@@@ static __init void xen_set_pte_init(pte
                    
                    static __init void xen_pagetable_setup_start(pgd_t *base)
                    {
-------------------     pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
-------------------     int i;
------------------- 
-------------------     /* special set_pte for pagetable initialization */
-------------------     pv_mmu_ops.set_pte = xen_set_pte_init;
------------------- 
-------------------     init_mm.pgd = base;
-------------------     /*
-------------------      * copy top-level of Xen-supplied pagetable into place.  This
-------------------      * is a stand-in while we copy the pmd pages.
-------------------      */
-------------------     memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
------------------- 
-------------------     /*
-------------------      * For PAE, need to allocate new pmds, rather than
-------------------      * share Xen's, since Xen doesn't like pmd's being
-------------------      * shared between address spaces.
-------------------      */
-------------------     for (i = 0; i < PTRS_PER_PGD; i++) {
-------------------             if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
-------------------                     pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
------------------- 
-------------------                     memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
-------------------                            PAGE_SIZE);
------------------- 
-------------------                     make_lowmem_page_readonly(pmd);
------------------- 
-------------------                     set_pgd(&base[i], __pgd(1 + __pa(pmd)));
-------------------             } else
-------------------                     pgd_clear(&base[i]);
-------------------     }
------------------- 
-------------------     /* make sure zero_page is mapped RO so we can use it in pagetables */
-------------------     make_lowmem_page_readonly(empty_zero_page);
-------------------     make_lowmem_page_readonly(base);
-------------------     /*
-------------------      * Switch to new pagetable.  This is done before
-------------------      * pagetable_init has done anything so that the new pages
-------------------      * added to the table can be prepared properly for Xen.
-------------------      */
-------------------     xen_write_cr3(__pa(base));
------------------- 
-------------------     /* Unpin initial Xen pagetable */
-------------------     pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
-------------------                       PFN_DOWN(__pa(xen_start_info->pt_base)));
                    }
                    
          -         static __init void setup_shared_info(void)
          +         void xen_setup_shared_info(void)
                    {
                        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-------------------             unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
------------------- 
-------------------             /*
-------------------              * Create a mapping for the shared info page.
-------------------              * Should be set_fixmap(), but shared_info is a machine
-------------------              * address with no corresponding pseudo-phys address.
-------------------              */
-------------------             set_pte_mfn(addr,
-------------------                         PFN_DOWN(xen_start_info->shared_info),
-------------------                         PAGE_KERNEL);
------------------- 
-------------------             HYPERVISOR_shared_info = (struct shared_info *)addr;
+++++++++++++++++++             set_fixmap(FIX_PARAVIRT_BOOTMAP,
+++++++++++++++++++                        xen_start_info->shared_info);
+++++++++++++++++++ 
+++++++++++++++++++             HYPERVISOR_shared_info =
+++++++++++++++++++                     (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
                        } else
                                HYPERVISOR_shared_info =
                                        (struct shared_info *)__va(xen_start_info->shared_info);
                        /* In UP this is as good a place as any to set up shared info */
                        xen_setup_vcpu_info_placement();
                    #endif
          +         
          +             xen_setup_mfn_list_list();
                    }
                    
                    static __init void xen_pagetable_setup_done(pgd_t *base)
                    {
---------- --------     /* This will work as long as patching hasn't happened yet
---------- --------        (which it hasn't) */
---------- --------     pv_mmu_ops.alloc_pte = xen_alloc_pte;
---------- --------     pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
---------- --------     pv_mmu_ops.release_pte = xen_release_pte;
---------- --------     pv_mmu_ops.release_pmd = xen_release_pmd;
---------- --------     pv_mmu_ops.set_pte = xen_set_pte;
---------- -------- 
          +             xen_setup_shared_info();
---------- -------- 
---------- --------     /* Actually pin the pagetable down, but we can't set PG_pinned
---------- --------        yet because the page structures don't exist yet. */
---------- --------     pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
          +         }
          +         
          +         static __init void xen_post_allocator_init(void)
          +         {
+++++++++++++++++++     pv_mmu_ops.set_pte = xen_set_pte;
          +             pv_mmu_ops.set_pmd = xen_set_pmd;
          +             pv_mmu_ops.set_pud = xen_set_pud;
+++++++++++++++++++ #if PAGETABLE_LEVELS == 4
+++++++++++++++++++     pv_mmu_ops.set_pgd = xen_set_pgd;
+++++++++++++++++++ #endif
+++++++++++++++++++ 
++++++++++ ++++++++     /* This will work as long as patching hasn't happened yet
++++++++++ ++++++++        (which it hasn't) */
++++++++++ ++++++++     pv_mmu_ops.alloc_pte = xen_alloc_pte;
++++++++++ ++++++++     pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
++++++++++ ++++++++     pv_mmu_ops.release_pte = xen_release_pte;
++++++++++ ++++++++     pv_mmu_ops.release_pmd = xen_release_pmd;
          -             pv_mmu_ops.set_pte = xen_set_pte;
          -         
          -             setup_shared_info();
+++++++++++++++++++ #if PAGETABLE_LEVELS == 4
+++++++++++++++++++     pv_mmu_ops.alloc_pud = xen_alloc_pud;
+++++++++++++++++++     pv_mmu_ops.release_pud = xen_release_pud;
+++++++++++++++++++ #endif
                    
          -             /* Actually pin the pagetable down, but we can't set PG_pinned
          -                yet because the page structures don't exist yet. */
          -             pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     SetPagePinned(virt_to_page(level3_user_vsyscall));
+++++++++++++++++++ #endif
          +             xen_mark_init_mm_pinned();
                    }
                    
                    /* This is called once we have the cpu_possible_map */
          -         void __init xen_setup_vcpu_info_placement(void)
          +         void xen_setup_vcpu_info_placement(void)
                    {
                        int cpu;
                    
                    
                        /* xen_vcpu_setup managed to place the vcpu_info within the
                           percpu area for all cpus, so make use of it */
+++++++++++++++++++ #ifdef CONFIG_X86_32
                        if (have_vcpu_info_placement) {
                                printk(KERN_INFO "Xen: using vcpu_info placement\n");
                    
                                pv_irq_ops.irq_enable = xen_irq_enable_direct;
                                pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
                        }
+++++++++++++++++++ #endif
                    }
                    
                    static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
                        goto patch_site
                    
                        switch (type) {
+++++++++++++++++++ #ifdef CONFIG_X86_32
                                SITE(pv_irq_ops, irq_enable);
                                SITE(pv_irq_ops, irq_disable);
                                SITE(pv_irq_ops, save_fl);
                                SITE(pv_irq_ops, restore_fl);
+++++++++++++++++++ #endif /* CONFIG_X86_32 */
                    #undef SITE
                    
                        patch_site:
                        return ret;
                    }
                    
          +         static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
          +         {
          +             pte_t pte;
          +         
          +             phys >>= PAGE_SHIFT;
          +         
          +             switch (idx) {
          +             case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
          +         #ifdef CONFIG_X86_F00F_BUG
          +             case FIX_F00F_IDT:
          +         #endif
+++++++++++++++++++ #ifdef CONFIG_X86_32
          +             case FIX_WP_TEST:
          +             case FIX_VDSO:
+++++++++++++++++++ # ifdef CONFIG_HIGHMEM
+++++++++++++++++++     case FIX_KMAP_BEGIN ... FIX_KMAP_END:
+++++++++++++++++++ # endif
+++++++++++++++++++ #else
+++++++++++++++++++     case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
+++++++++++++++++++ #endif
          +         #ifdef CONFIG_X86_LOCAL_APIC
          +             case FIX_APIC_BASE:     /* maps dummy local APIC */
          +         #endif
          +                     pte = pfn_pte(phys, prot);
          +                     break;
          +         
          +             default:
          +                     pte = mfn_pte(phys, prot);
          +                     break;
          +             }
          +         
          +             __native_set_fixmap(idx, pte);
+++++++++++++++++++ 
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     /* Replicate changes to map the vsyscall page into the user
+++++++++++++++++++        pagetable vsyscall mapping. */
+++++++++++++++++++     if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+++++++++++++++++++             unsigned long vaddr = __fix_to_virt(idx);
+++++++++++++++++++             set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
+++++++++++++++++++     }
+++++++++++++++++++ #endif
          +         }
          +         
                    static const struct pv_info xen_info __initdata = {
                        .paravirt_enabled = 1,
                        .shared_kernel_pmd = 0,
@@@@@@@@@@@@@@@@@@@@@ -1054,7 -1054,7 -1054,7 -1054,7 -1054,7 -1054,7 -1054,7 -1054,7 -1054,7 -1054,7 -960,7 -1054,7 -1054,7 -1054,7 -1054,7 -1054,7 -1054,7 -1054,7 -1058,7 -1166,7 +1170,7 @@@@@@@@@@@@@@@@@@@@@ static const struct pv_init_ops xen_ini
                        .banner = xen_banner,
                        .memory_setup = xen_memory_setup,
                        .arch_setup = xen_arch_setup,
          -             .post_allocator_init = xen_mark_init_mm_pinned,
          +             .post_allocator_init = xen_post_allocator_init,
                    };
                    
                    static const struct pv_time_ops xen_time_ops __initdata = {
                    
                        .set_wallclock = xen_set_wallclock,
                        .get_wallclock = xen_get_wallclock,
          -             .get_cpu_khz = xen_cpu_khz,
          +             .get_tsc_khz = xen_tsc_khz,
                        .sched_clock = xen_sched_clock,
                    };
                    
                        .set_debugreg = xen_set_debugreg,
                        .get_debugreg = xen_get_debugreg,
                    
          -             .clts = native_clts,
          +             .clts = xen_clts,
                    
                        .read_cr0 = native_read_cr0,
          -             .write_cr0 = native_write_cr0,
          +             .write_cr0 = xen_write_cr0,
                    
                        .read_cr4 = native_read_cr4,
                        .read_cr4_safe = native_read_cr4_safe,
                        .wbinvd = native_wbinvd,
                    
                        .read_msr = native_read_msr_safe,
-------------------     .write_msr = native_write_msr_safe,
+++++++++++++++++++     .write_msr = xen_write_msr_safe,
                        .read_tsc = native_read_tsc,
                        .read_pmc = native_read_pmc,
                    
                        .iret = xen_iret,
          -             .irq_enable_syscall_ret = xen_sysexit,
          +             .irq_enable_sysexit = xen_sysexit,
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     .usergs_sysret32 = xen_sysret32,
+++++++++++++++++++     .usergs_sysret64 = xen_sysret64,
+++++++++++++++++++ #endif
                    
                        .load_tr_desc = paravirt_nop,
                        .set_ldt = xen_set_ldt,
                        .load_gdt = xen_load_gdt,
                        .load_idt = xen_load_idt,
                        .load_tls = xen_load_tls,
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     .load_gs_index = xen_load_gs_index,
+++++++++++++++++++ #endif
                    
                        .store_gdt = native_store_gdt,
                        .store_idt = native_store_idt,
                        .set_iopl_mask = xen_set_iopl_mask,
                        .io_delay = xen_io_delay,
                    
+++++++++++++++++++     /* Xen takes care of %gs when switching to usermode for us */
+++++++++++++++++++     .swapgs = paravirt_nop,
+++++++++++++++++++ 
                        .lazy_mode = {
                                .enter = paravirt_enter_lazy_cpu,
                                .leave = xen_leave_lazy,
                        },
                    };
                    
+++++++++++++++++++ static void __init __xen_init_IRQ(void)
+++++++++++++++++++ {
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     int i;
+++++++++++++++++++ 
+++++++++++++++++++     /* Create identity vector->irq map */
+++++++++++++++++++     for(i = 0; i < NR_VECTORS; i++) {
+++++++++++++++++++             int cpu;
+++++++++++++++++++ 
+++++++++++++++++++             for_each_possible_cpu(cpu)
+++++++++++++++++++                     per_cpu(vector_irq, cpu)[i] = i;
+++++++++++++++++++     }
+++++++++++++++++++ #endif      /* CONFIG_X86_64 */
+++++++++++++++++++ 
+++++++++++++++++++     xen_init_IRQ();
+++++++++++++++++++ }
+++++++++++++++++++ 
                    static const struct pv_irq_ops xen_irq_ops __initdata = {
-------------------     .init_IRQ = xen_init_IRQ,
+++++++++++++++++++     .init_IRQ = __xen_init_IRQ,
                        .save_fl = xen_save_fl,
                        .restore_fl = xen_restore_fl,
                        .irq_disable = xen_irq_disable,
                        .irq_enable = xen_irq_enable,
                        .safe_halt = xen_safe_halt,
                        .halt = xen_halt,
---------- --------     .adjust_exception_frame = paravirt_nop,
          +         #ifdef CONFIG_X86_64
+++++++++++++++++++     .adjust_exception_frame = xen_adjust_exception_frame,
          +         #endif
                    };
                    
                    static const struct pv_apic_ops xen_apic_ops __initdata = {
                    #ifdef CONFIG_X86_LOCAL_APIC
                        .apic_write = xen_apic_write,
-- -----------------    .apic_write_atomic = xen_apic_write,
                        .apic_read = xen_apic_read,
                        .setup_boot_clock = paravirt_nop,
                        .setup_secondary_clock = paravirt_nop,
@@@@@@@@@@@@@@@@@@@@@ -1157,9 -1157,9 -1156,9 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1060,6 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1161,9 -1296,9 +1299,9 @@@@@@@@@@@@@@@@@@@@@ static const struct pv_mmu_ops xen_mmu_
                        .pte_update = paravirt_nop,
                        .pte_update_defer = paravirt_nop,
                    
---------- --------     .pgd_alloc = __paravirt_pgd_alloc,
---------- --------     .pgd_free = paravirt_nop,
+++++++++++++++++++     .pgd_alloc = xen_pgd_alloc,
+++++++++++++++++++     .pgd_free = xen_pgd_free,
          +         
                        .alloc_pte = xen_alloc_pte_init,
                        .release_pte = xen_release_pte_init,
                        .alloc_pmd = xen_alloc_pte_init,
                        .kmap_atomic_pte = xen_kmap_atomic_pte,
                    #endif
                    
-------------------     .set_pte = NULL,        /* see xen_pagetable_setup_* */
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     .set_pte = xen_set_pte,
+++++++++++++++++++ #else
+++++++++++++++++++     .set_pte = xen_set_pte_init,
+++++++++++++++++++ #endif
                        .set_pte_at = xen_set_pte_at,
          -             .set_pmd = xen_set_pmd,
          +             .set_pmd = xen_set_pmd_hyper,
          +         
          +             .ptep_modify_prot_start = __ptep_modify_prot_start,
          +             .ptep_modify_prot_commit = __ptep_modify_prot_commit,
                    
                        .pte_val = xen_pte_val,
          +             .pte_flags = native_pte_val,
                        .pgd_val = xen_pgd_val,
                    
                        .make_pte = xen_make_pte,
                        .make_pgd = xen_make_pgd,
                    
+++++++++++++++++++ #ifdef CONFIG_X86_PAE
                        .set_pte_atomic = xen_set_pte_atomic,
                        .set_pte_present = xen_set_pte_at,
---------- --------     .set_pud = xen_set_pud_hyper,
          -             .set_pud = xen_set_pud,
                        .pte_clear = xen_pte_clear,
                        .pmd_clear = xen_pmd_clear,
+++++++++++++++++++ #endif      /* CONFIG_X86_PAE */
+++++++++++++++++++     .set_pud = xen_set_pud_hyper,
                    
                        .make_pmd = xen_make_pmd,
                        .pmd_val = xen_pmd_val,
                    
+++++++++++++++++++ #if PAGETABLE_LEVELS == 4
+++++++++++++++++++     .pud_val = xen_pud_val,
+++++++++++++++++++     .make_pud = xen_make_pud,
+++++++++++++++++++     .set_pgd = xen_set_pgd_hyper,
+++++++++++++++++++ 
+++++++++++++++++++     .alloc_pud = xen_alloc_pte_init,
+++++++++++++++++++     .release_pud = xen_release_pte_init,
+++++++++++++++++++ #endif      /* PAGETABLE_LEVELS == 4 */
+++++++++++++++++++ 
                        .activate_mm = xen_activate_mm,
                        .dup_mmap = xen_dup_mmap,
                        .exit_mmap = xen_exit_mmap,
                                .enter = paravirt_enter_lazy_mmu,
                                .leave = xen_leave_lazy,
                        },
          -         };
                    
          -         #ifdef CONFIG_SMP
          -         static const struct smp_ops xen_smp_ops __initdata = {
          -             .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
          -             .smp_prepare_cpus = xen_smp_prepare_cpus,
          -             .cpu_up = xen_cpu_up,
          -             .smp_cpus_done = xen_smp_cpus_done,
          -         
          -             .smp_send_stop = xen_smp_send_stop,
          -             .smp_send_reschedule = xen_smp_send_reschedule,
          -             .smp_call_function_mask = xen_smp_call_function_mask,
          +             .set_fixmap = xen_set_fixmap,
                    };
          -         #endif      /* CONFIG_SMP */
                    
---------- -------- #ifdef CONFIG_SMP
---------- -------- static const struct smp_ops xen_smp_ops __initdata = {
---------- --------     .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
---------- --------     .smp_prepare_cpus = xen_smp_prepare_cpus,
---------- --------     .cpu_up = xen_cpu_up,
---------- --------     .smp_cpus_done = xen_smp_cpus_done,
---------- -------- 
---------- --------     .smp_send_stop = xen_smp_send_stop,
---------- --------     .smp_send_reschedule = xen_smp_send_reschedule,
------  -- - --- -- 
------  -- - --- --     .send_call_func_ipi = xen_smp_send_call_function_ipi,
------  -- - --- --     .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
      --    -   -       .smp_call_function_mask = xen_smp_call_function_mask,
---------- -------- };
---------- -------- #endif      /* CONFIG_SMP */
---------- -------- 
                    static void xen_reboot(int reason)
                    {
          +             struct sched_shutdown r = { .reason = reason };
          +         
                    #ifdef CONFIG_SMP
                        smp_send_stop();
                    #endif
                    
          -             if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason))
          +             if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
                                BUG();
                    }
                    
@@@@@@@@@@@@@@@@@@@@@ -1264,6 -1264,6 -1263,6 -1264,6 -1264,6 -1264,6 -1262,6 -1262,6 -1264,6 -1264,6 -1154,6 -1264,6 -1262,6 -1264,6 -1264,6 -1264,6 -1262,6 -1264,6 -1268,6 -1403,7 +1406,7 @@@@@@@@@@@@@@@@@@@@@ static const struct machine_ops __initd
                    
                    static void __init xen_reserve_top(void)
                    {
+++++++++++++++++++ #ifdef CONFIG_X86_32
                        unsigned long top = HYPERVISOR_VIRT_START;
                        struct xen_platform_parameters pp;
                    
                                top = pp.virt_start;
                    
                        reserve_top_address(-top + 2 * PAGE_SIZE);
+++++++++++++++++++ #endif      /* CONFIG_X86_32 */
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ /*
+++++++++++++++++++  * Like __va(), but returns address in the kernel mapping (which is
+++++++++++++++++++  * all we have until the physical memory mapping has been set up.
+++++++++++++++++++  */
+++++++++++++++++++ static void *__ka(phys_addr_t paddr)
+++++++++++++++++++ {
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     return (void *)(paddr + __START_KERNEL_map);
+++++++++++++++++++ #else
+++++++++++++++++++     return __va(paddr);
+++++++++++++++++++ #endif
      ++  + +   + + }
      ++  + +   + + 
+++++++++++++++++++ /* Convert a machine address to physical address */
+++++++++++++++++++ static unsigned long m2p(phys_addr_t maddr)
+++++++++++++++++++ {
+++++++++++++++++++     phys_addr_t paddr;
+++++++++++++++++++ 
+++++++++++++++++++     maddr &= PTE_MASK;
+++++++++++++++++++     paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
+++++++++++++++++++ 
+++++++++++++++++++     return paddr;
++++++++++ ++++++++ }
++++++++++ ++++++++ 
+++++++++++++++++++ /* Convert a machine address to kernel virtual */
+++++++++++++++++++ static void *m2v(phys_addr_t maddr)
+++++++++++++++++++ {
+++++++++++++++++++     return __ka(m2p(maddr));
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++ static void walk(pgd_t *pgd, unsigned long addr)
+++++++++++++++++++ {
+++++++++++++++++++     unsigned l4idx = pgd_index(addr);
+++++++++++++++++++     unsigned l3idx = pud_index(addr);
+++++++++++++++++++     unsigned l2idx = pmd_index(addr);
+++++++++++++++++++     unsigned l1idx = pte_index(addr);
+++++++++++++++++++     pgd_t l4;
+++++++++++++++++++     pud_t l3;
+++++++++++++++++++     pmd_t l2;
+++++++++++++++++++     pte_t l1;
+++++++++++++++++++ 
+++++++++++++++++++     xen_raw_printk("walk %p, %lx -> %d %d %d %d\n",
+++++++++++++++++++                    pgd, addr, l4idx, l3idx, l2idx, l1idx);
+++++++++++++++++++ 
+++++++++++++++++++     l4 = pgd[l4idx];
+++++++++++++++++++     xen_raw_printk("  l4: %016lx\n", l4.pgd);
+++++++++++++++++++     xen_raw_printk("      %016lx\n", pgd_val(l4));
+++++++++++++++++++ 
+++++++++++++++++++     l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx];
+++++++++++++++++++     xen_raw_printk("  l3: %016lx\n", l3.pud);
+++++++++++++++++++     xen_raw_printk("      %016lx\n", pud_val(l3));
+++++++++++++++++++ 
+++++++++++++++++++     l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx];
+++++++++++++++++++     xen_raw_printk("  l2: %016lx\n", l2.pmd);
+++++++++++++++++++     xen_raw_printk("      %016lx\n", pmd_val(l2));
+++++++++++++++++++ 
+++++++++++++++++++     l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx];
+++++++++++++++++++     xen_raw_printk("  l1: %016lx\n", l1.pte);
+++++++++++++++++++     xen_raw_printk("      %016lx\n", pte_val(l1));
+++++++++++++++++++ }
+++++++++++++++++++ #endif
+++++++++++++++++++ 
+++++++++++++++++++ static void set_page_prot(void *addr, pgprot_t prot)
+++++++++++++++++++ {
+++++++++++++++++++     unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
+++++++++++++++++++     pte_t pte = pfn_pte(pfn, prot);
+++++++++++++++++++ 
+++++++++++++++++++     xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n",
+++++++++++++++++++                    addr, pfn, get_phys_to_machine(pfn),
+++++++++++++++++++                    pgprot_val(prot), pte.pte);
+++++++++++++++++++ 
+++++++++++++++++++     if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
+++++++++++++++++++             BUG();
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+++++++++++++++++++ {
+++++++++++++++++++     unsigned pmdidx, pteidx;
+++++++++++++++++++     unsigned ident_pte;
+++++++++++++++++++     unsigned long pfn;
+++++++++++++++++++ 
+++++++++++++++++++     ident_pte = 0;
+++++++++++++++++++     pfn = 0;
+++++++++++++++++++     for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
+++++++++++++++++++             pte_t *pte_page;
+++++++++++++++++++ 
+++++++++++++++++++             /* Reuse or allocate a page of ptes */
+++++++++++++++++++             if (pmd_present(pmd[pmdidx]))
+++++++++++++++++++                     pte_page = m2v(pmd[pmdidx].pmd);
+++++++++++++++++++             else {
+++++++++++++++++++                     /* Check for free pte pages */
+++++++++++++++++++                     if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
+++++++++++++++++++                             break;
+++++++++++++++++++ 
+++++++++++++++++++                     pte_page = &level1_ident_pgt[ident_pte];
+++++++++++++++++++                     ident_pte += PTRS_PER_PTE;
+++++++++++++++++++ 
+++++++++++++++++++                     pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
+++++++++++++++++++             }
+++++++++++++++++++ 
+++++++++++++++++++             /* Install mappings */
+++++++++++++++++++             for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
+++++++++++++++++++                     pte_t pte;
+++++++++++++++++++ 
+++++++++++++++++++                     if (pfn > max_pfn_mapped)
+++++++++++++++++++                             max_pfn_mapped = pfn;
+++++++++++++++++++ 
+++++++++++++++++++                     if (!pte_none(pte_page[pteidx]))
+++++++++++++++++++                             continue;
+++++++++++++++++++ 
+++++++++++++++++++                     pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
+++++++++++++++++++                     pte_page[pteidx] = pte;
+++++++++++++++++++             }
+++++++++++++++++++     }
+++++++++++++++++++ 
+++++++++++++++++++     for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
+++++++++++++++++++             set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
+++++++++++++++++++ 
+++++++++++++++++++     set_page_prot(pmd, PAGE_KERNEL_RO);
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++ static void convert_pfn_mfn(void *v)
+++++++++++++++++++ {
+++++++++++++++++++     pte_t *pte = v;
+++++++++++++++++++     int i;
+++++++++++++++++++ 
+++++++++++++++++++     /* All levels are converted the same way, so just treat them
+++++++++++++++++++        as ptes. */
+++++++++++++++++++     for(i = 0; i < PTRS_PER_PTE; i++)
+++++++++++++++++++             pte[i] = xen_make_pte(pte[i].pte);
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ /*
+++++++++++++++++++  * Set up the inital kernel pagetable.
+++++++++++++++++++  *
+++++++++++++++++++  * We can construct this by grafting the Xen provided pagetable into
+++++++++++++++++++  * head_64.S's preconstructed pagetables.  We copy the Xen L2's into
+++++++++++++++++++  * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt.  This
+++++++++++++++++++  * means that only the kernel has a physical mapping to start with -
+++++++++++++++++++  * but that's enough to get __va working.  We need to fill in the rest
+++++++++++++++++++  * of the physical mapping once some sort of allocator has been set
+++++++++++++++++++  * up.
+++++++++++++++++++  */
+++++++++++++++++++ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
+++++++++++++++++++ {
+++++++++++++++++++     pud_t *l3;
+++++++++++++++++++     pmd_t *l2;
+++++++++++++++++++ 
+++++++++++++++++++     /* Zap identity mapping */
+++++++++++++++++++     init_level4_pgt[0] = __pgd(0);
+++++++++++++++++++ 
+++++++++++++++++++     /* Pre-constructed entries are in pfn, so convert to mfn */
+++++++++++++++++++     convert_pfn_mfn(init_level4_pgt);
+++++++++++++++++++     convert_pfn_mfn(level3_ident_pgt);
+++++++++++++++++++     convert_pfn_mfn(level3_kernel_pgt);
+++++++++++++++++++ 
+++++++++++++++++++     l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
+++++++++++++++++++     l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
+++++++++++++++++++ 
+++++++++++++++++++     memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+++++++++++++++++++     memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+++++++++++++++++++ 
+++++++++++++++++++     l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
+++++++++++++++++++     l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
+++++++++++++++++++     memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+++++++++++++++++++ 
+++++++++++++++++++     /* Set up identity map */
+++++++++++++++++++     xen_map_identity_early(level2_ident_pgt, max_pfn);
+++++++++++++++++++ 
+++++++++++++++++++     /* Make pagetable pieces RO */
+++++++++++++++++++     set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++     set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++     set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++     set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
+++++++++++++++++++     set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++     set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++ 
+++++++++++++++++++     /* Pin down new L4 */
+++++++++++++++++++     pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
+++++++++++++++++++                       PFN_DOWN(__pa_symbol(init_level4_pgt)));
+++++++++++++++++++ 
+++++++++++++++++++     /* Unpin Xen-provided one */
+++++++++++++++++++     pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+++++++++++++++++++ 
+++++++++++++++++++     /* Switch over */
+++++++++++++++++++     pgd = init_level4_pgt;
+++++++++++++++++++ 
+++++++++++++++++++     /*
+++++++++++++++++++      * At this stage there can be no user pgd, and no page
+++++++++++++++++++      * structure to attach it to, so make sure we just set kernel
+++++++++++++++++++      * pgd.
+++++++++++++++++++      */
+++++++++++++++++++     xen_mc_batch();
+++++++++++++++++++     __xen_write_cr3(true, __pa(pgd));
+++++++++++++++++++     xen_mc_issue(PARAVIRT_LAZY_CPU);
+++++++++++++++++++ 
+++++++++++++++++++     reserve_early(__pa(xen_start_info->pt_base),
+++++++++++++++++++                   __pa(xen_start_info->pt_base +
+++++++++++++++++++                        xen_start_info->nr_pt_frames * PAGE_SIZE),
+++++++++++++++++++                   "XEN PAGETABLES");
+++++++++++++++++++ 
+++++++++++++++++++     return pgd;
+++++++++++++++++++ }
+++++++++++++++++++ #else       /* !CONFIG_X86_64 */
+++++++++++++++++++ static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
+++++++++++++++++++ 
+++++++++++++++++++ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
+++++++++++++++++++ {
+++++++++++++++++++     pmd_t *kernel_pmd;
+++++++++++++++++++ 
+++++++++++++++++++     init_pg_tables_start = __pa(pgd);
+++++++++++++++++++     init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
+++++++++++++++++++     max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
+++++++++++++++++++ 
+++++++++++++++++++     kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
+++++++++++++++++++     memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
+++++++++++++++++++ 
+++++++++++++++++++     xen_map_identity_early(level2_kernel_pgt, max_pfn);
+++++++++++++++++++ 
+++++++++++++++++++     memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+++++++++++++++++++     set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
+++++++++++++++++++                     __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
+++++++++++++++++++ 
+++++++++++++++++++     set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++     set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+++++++++++++++++++     set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
+++++++++++++++++++ 
+++++++++++++++++++     pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+++++++++++++++++++ 
+++++++++++++++++++     xen_write_cr3(__pa(swapper_pg_dir));
+++++++++++++++++++ 
+++++++++++++++++++     pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
+++++++++++++++++++ 
+++++++++++++++++++     return swapper_pg_dir;
++++++  ++++ +++ +  }
+++++++++++++++++++ #endif      /* CONFIG_X86_64 */
++++++  ++++ +++ +  
                    /* First C function to be called on Xen boot */
                    asmlinkage void __init xen_start_kernel(void)
                    {
                    
                        BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
                    
          +             xen_setup_features();
          +         
                        /* Install Xen paravirt ops */
                        pv_info = xen_info;
                        pv_init_ops = xen_init_ops;
                        pv_apic_ops = xen_apic_ops;
                        pv_mmu_ops = xen_mmu_ops;
                    
          +             if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
          +                     pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
          +                     pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
          +             }
          +         
                        machine_ops = xen_machine_ops;
                    
------------------- #ifdef CONFIG_SMP
-------------------     smp_ops = xen_smp_ops;
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     /* Disable until direct per-cpu data access. */
+++++++++++++++++++     have_vcpu_info_placement = 0;
+++++++++++++++++++     x86_64_init_pda();
                    #endif
                    
          -             xen_setup_features();
+++++++++++++++++++     xen_smp_init();
++++++++++ ++++++++ 
                        /* Get mfn list */
                        if (!xen_feature(XENFEAT_auto_translated_physmap))
          -                     phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list;
          +                     xen_build_dynamic_phys_to_machine();
                    
                        pgd = (pgd_t *)xen_start_info->pt_base;
                    
---------- --------     init_pg_tables_start = __pa(pgd);
-------------------     init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
---------- --------     max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT;
------------------- 
-------------------     init_mm.pgd = pgd; /* use the Xen pagetables to start */
------------------- 
-------------------     /* keep using Xen gdt for now; no urgent need to change it */
------------------- 
-------------------     x86_write_percpu(xen_cr3, __pa(pgd));
-------------------     x86_write_percpu(xen_current_cr3, __pa(pgd));
+++++++++++++++++++     /* Prevent unwanted bits from being set in PTEs. */
+++++++++++++++++++     __supported_pte_mask &= ~_PAGE_GLOBAL;
+++++++++++++++++++     if (!is_initial_xendomain())
+++++++++++++++++++             __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
                    
                        /* Don't do the full vcpu_info placement stuff until we have a
                           possible map and a non-dummy shared_info. */
                        per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
                    
+++++++++++++++++++     xen_raw_console_write("mapping kernel into physical memory\n");
+++++++++++++++++++     pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
+++++++++++++++++++ 
+++++++++++++++++++     init_mm.pgd = pgd;
+++++++++++++++++++ 
+++++++++++++++++++     /* keep using Xen gdt for now; no urgent need to change it */
+++++++++++++++++++ 
                        pv_info.kernel_rpl = 1;
                        if (xen_feature(XENFEAT_supervisor_mode_kernel))
                                pv_info.kernel_rpl = 0;
                    
-------------------     /* Prevent unwanted bits from being set in PTEs. */
-------------------     __supported_pte_mask &= ~_PAGE_GLOBAL;
-------------------     if (!is_initial_xendomain())
-------------------             __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
------------------- 
                        /* set the limit of our address space */
                        xen_reserve_top();
                    
+++++++++++++++++++ #ifdef CONFIG_X86_32
                        /* set up basic CPUID stuff */
                        cpu_detect(&new_cpu_data);
                        new_cpu_data.hard_math = 1;
                        new_cpu_data.x86_capability[0] = cpuid_edx(1);
+++++++++++++++++++ #endif
                    
                        /* Poke various useful things into boot_params */
                        boot_params.hdr.type_of_loader = (9 << 4) | 0;
                        boot_params.hdr.ramdisk_image = xen_start_info->mod_start
                                ? __pa(xen_start_info->mod_start) : 0;
                        boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
+++++++++++++++++++     boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
                    
          -             if (!is_initial_xendomain())
          +             if (!is_initial_xendomain()) {
          +                     add_preferred_console("xenboot", 0, NULL);
          +                     add_preferred_console("tty", 0, NULL);
                                add_preferred_console("hvc", 0, NULL);
          +             }
          +         
+++++++++++++++++++     xen_raw_console_write("about to get started...\n");
+++++++++++++++++++ 
+++++++++++++++++++ #if 0
+++++++++++++++++++     xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n",
+++++++++++++++++++                    &boot_params, __pa_symbol(&boot_params),
+++++++++++++++++++                    __va(__pa_symbol(&boot_params)));
+++++++++++++++++++ 
+++++++++++++++++++     walk(pgd, &boot_params);
+++++++++++++++++++     walk(pgd, __va(__pa(&boot_params)));
+++++++++++++++++++ #endif
++++++++++ ++++++++ 
                        /* Start the world */
          -             start_kernel();
+++++++++++++++++++ #ifdef CONFIG_X86_32
          +             i386_start_kernel();
+++++++++++++++++++ #else
+++++++++++++++++++     x86_64_start_reservations((char *)__pa_symbol(&boot_params));
+++++++++++++++++++ #endif
                    }
index 3f7b81c065d25188e17d82665b63575a91c92e19,3f7b81c065d25188e17d82665b63575a91c92e19,3f7b81c065d25188e17d82665b63575a91c92e19,3f7b81c065d25188e17d82665b63575a91c92e19,3f7b81c065d25188e17d82665b63575a91c92e19,3f7b81c065d25188e17d82665b63575a91c92e19,bb0642318a959453e9ef0c68190070ac7d9290a4,7868065f6f2d132f59096694366c039d5e851b2d,bb0642318a959453e9ef0c68190070ac7d9290a4,3f7b81c065d25188e17d82665b63575a91c92e19,66c0fd21894b155e9db690b65a86bfddffca29c4,3f7b81c065d25188e17d82665b63575a91c92e19,bb0642318a959453e9ef0c68190070ac7d9290a4,bb0642318a959453e9ef0c68190070ac7d9290a4,3f7b81c065d25188e17d82665b63575a91c92e19,3f7b81c065d25188e17d82665b63575a91c92e19,bb0642318a959453e9ef0c68190070ac7d9290a4,3f7b81c065d25188e17d82665b63575a91c92e19,3f7b81c065d25188e17d82665b63575a91c92e19,3f7b81c065d25188e17d82665b63575a91c92e19..8d0e60ac849cb5f34e609fdaf9215688091728a7
                    #include "intel-iommu.h"
                    #include <asm/proto.h> /* force_iommu in this header in x86-64*/
                    #include <asm/cacheflush.h>
------- ------------#include <asm/gart.h>
+++++++ ++++++++++++#include <asm/iommu.h>
                    #include "pci.h"
                    
                    #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
                    }
                    
                    #ifdef CONFIG_DMAR_GFX_WA
          -         extern int arch_get_ram_range(int slot, u64 *addr, u64 *size);
          +         struct iommu_prepare_data {
          +             struct pci_dev *pdev;
          +             int ret;
          +         };
          +         
          +         static int __init iommu_prepare_work_fn(unsigned long start_pfn,
          +                                              unsigned long end_pfn, void *datax)
          +         {
          +             struct iommu_prepare_data *data;
          +         
          +             data = (struct iommu_prepare_data *)datax;
          +         
          +             data->ret = iommu_prepare_identity_map(data->pdev,
          +                                     start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
          +             return data->ret;
          +         
          +         }
          +         
          +         static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
          +         {
          +             int nid;
          +             struct iommu_prepare_data data;
          +         
          +             data.pdev = pdev;
          +             data.ret = 0;
          +         
          +             for_each_online_node(nid) {
          +                     work_with_active_regions(nid, iommu_prepare_work_fn, &data);
          +                     if (data.ret)
          +                             return data.ret;
          +             }
          +             return data.ret;
          +         }
          +         
                    static void __init iommu_prepare_gfx_mapping(void)
                    {
                        struct pci_dev *pdev = NULL;
          -             u64 base, size;
          -             int slot;
                        int ret;
                    
                        for_each_pci_dev(pdev) {
                                        continue;
                                printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
                                        pci_name(pdev));
          -                     slot = arch_get_ram_range(0, &base, &size);
          -                     while (slot >= 0) {
          -                             ret = iommu_prepare_identity_map(pdev,
          -                                             base, base + size);
          -                             if (ret)
          -                                     goto error;
          -                             slot = arch_get_ram_range(slot, &base, &size);
          -                     }
          -                     continue;
          -         error:
          -                     printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
          +                     ret = iommu_prepare_with_active_regions(pdev);
          +                     if (ret)
          +                             printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
                        }
                    }
                    #endif
                        deferred_flush = kzalloc(g_num_of_iommus *
                                sizeof(struct deferred_flush_tables), GFP_KERNEL);
                        if (!deferred_flush) {
      --- - --  -               kfree(g_iommus);
                                ret = -ENOMEM;
                                goto error;
                        }
index ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,719d959d0bc48dab3b27df2226a5ae4444f94ef6,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,0f13b945e2400323cb65dfd6f8a7cc5b035c3319,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9,eef8095a09dcca01b0cb7ad4aa4c76ef52fe10f9..695ce9383f52620590c627ace65d63b2d47912fe
@@@@@@@@@@@@@@@@@@@@@ -84,7 -84,7 -84,7 -84,7 -84,7 -84,7 -84,7 -84,7 -84,7 -84,7 -84,7 -84,7 -84,7 -84,7 -84,7 -84,7 -84,7 -84,7 -84,7 -84,7 +84,7 @@@@@@@@@@@@@@@@@@@@@ struct pv_time_ops 
                        int (*set_wallclock)(unsigned long);
                    
                        unsigned long long (*sched_clock)(void);
          -             unsigned long (*get_cpu_khz)(void);
          +             unsigned long (*get_tsc_khz)(void);
                    };
                    
                    struct pv_cpu_ops {
                        void (*set_ldt)(const void *desc, unsigned entries);
                        unsigned long (*store_tr)(void);
                        void (*load_tls)(struct thread_struct *t, unsigned int cpu);
          +         #ifdef CONFIG_X86_64
          +             void (*load_gs_index)(unsigned int idx);
          +         #endif
                        void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
                                                const void *desc);
                        void (*write_gdt_entry)(struct desc_struct *,
                        u64 (*read_pmc)(int counter);
                        unsigned long long (*read_tscp)(unsigned int *aux);
                    
          -             /* These two are jmp to, not actually called. */
          -             void (*irq_enable_syscall_ret)(void);
          +             /*
          +              * Atomically enable interrupts and return to userspace.  This
          +              * is only ever used to return to 32-bit processes; in a
          +              * 64-bit kernel, it's used for 32-on-64 compat processes, but
          +              * never native 64-bit processes.  (Jump, not call.)
          +              */
          +             void (*irq_enable_sysexit)(void);
          +         
          +             /*
          +              * Switch to usermode gs and return to 64-bit usermode using
          +              * sysret.  Only used in 64-bit kernels to return to 64-bit
          +              * processes.  Usermode register state, including %rsp, must
          +              * already be restored.
          +              */
          +             void (*usergs_sysret64)(void);
          +         
          +             /*
          +              * Switch to usermode gs and return to 32-bit usermode using
          +              * sysret.  Used to return to 32-on-64 compat processes.
          +              * Other usermode register state, including %esp, must already
          +              * be restored.
          +              */
          +             void (*usergs_sysret32)(void);
          +         
          +             /* Normal iret.  Jump to this with the standard iret stack
          +                frame set up. */
                        void (*iret)(void);
                    
                        void (*swapgs)(void);
                        void (*irq_enable)(void);
                        void (*safe_halt)(void);
                        void (*halt)(void);
          +         
          +         #ifdef CONFIG_X86_64
          +             void (*adjust_exception_frame)(void);
          +         #endif
                    };
                    
                    struct pv_apic_ops {
                         * these shouldn't be in this interface.
                         */
                        void (*apic_write)(unsigned long reg, u32 v);
-- -----------------    void (*apic_write_atomic)(unsigned long reg, u32 v);
                        u32 (*apic_read)(unsigned long reg);
                        void (*setup_boot_clock)(void);
                        void (*setup_secondary_clock)(void);
                        void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
                                                 unsigned long va);
                    
          -             /* Hooks for allocating/releasing pagetable pages */
          +             /* Hooks for allocating and freeing a pagetable top-level */
          +             int  (*pgd_alloc)(struct mm_struct *mm);
          +             void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd);
          +         
          +             /*
          +              * Hooks for allocating/releasing pagetable pages when they're
          +              * attached to a pagetable
          +              */
                        void (*alloc_pte)(struct mm_struct *mm, u32 pfn);
                        void (*alloc_pmd)(struct mm_struct *mm, u32 pfn);
                        void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
                        void (*pte_update_defer)(struct mm_struct *mm,
                                                 unsigned long addr, pte_t *ptep);
                    
          +             pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
          +                                             pte_t *ptep);
          +             void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
          +                                             pte_t *ptep, pte_t pte);
          +         
                        pteval_t (*pte_val)(pte_t);
          +             pteval_t (*pte_flags)(pte_t);
                        pte_t (*make_pte)(pteval_t pte);
                    
                        pgdval_t (*pgd_val)(pgd_t);
                    #endif
                    
                        struct pv_lazy_ops lazy_mode;
          +         
          +             /* dom0 ops */
          +         
          +             /* Sometimes the physical address is a pfn, and sometimes its
          +                an mfn.  We can tell which is which from the index. */
          +             void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
          +                                unsigned long phys, pgprot_t flags);
                    };
                    
                    /* This contains all the paravirt structures: we get a convenient
@@@@@@@@@@@@@@@@@@@@@ -490,17 -490,17 -489,17 -490,17 -490,17 -490,17 -490,17 -490,17 -490,17 -490,17 -439,10 -490,17 -490,17 -490,17 -490,17 -490,17 -490,17 -490,17 -490,17 -490,17 +489,17 @@@@@@@@@@@@@@@@@@@@@ int paravirt_disable_iospace(void)
                    #define VEXTRA_CLOBBERS      , "rax", "r8", "r9", "r10", "r11"
                    #endif
                    
          +         #ifdef CONFIG_PARAVIRT_DEBUG
          +         #define PVOP_TEST_NULL(op)  BUG_ON(op == NULL)
          +         #else
          +         #define PVOP_TEST_NULL(op)  ((void)op)
          +         #endif
          +         
                    #define __PVOP_CALL(rettype, op, pre, post, ...)                    \
                        ({                                                              \
                                rettype __ret;                                          \
                                PVOP_CALL_ARGS;                                 \
          +                     PVOP_TEST_NULL(op);                                     \
                                /* This is 32-bit specific, but is okay in 64-bit */    \
                                /* since this condition will never hold */              \
                                if (sizeof(rettype) > sizeof(unsigned long)) {          \
                    #define __PVOP_VCALL(op, pre, post, ...)                            \
                        ({                                                              \
                                PVOP_VCALL_ARGS;                                        \
          +                     PVOP_TEST_NULL(op);                                     \
                                asm volatile(pre                                        \
                                             paravirt_alt(PARAVIRT_CALL)                \
                                             post                                       \
@@@@@@@@@@@@@@@@@@@@@ -779,7 -779,7 -778,7 -779,7 -779,7 -779,7 -779,7 -779,7 -779,7 -779,7 -720,7 -779,7 -779,7 -779,7 -779,7 -779,7 -779,7 -779,7 -779,7 -779,7 +778,7 @@@@@@@@@@@@@@@@@@@@@ static inline unsigned long long paravi
                    {
                        return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
                    }
          -         #define calculate_cpu_khz() (pv_time_ops.get_cpu_khz())
          +         #define calibrate_tsc() (pv_time_ops.get_tsc_khz())
                    
                    static inline unsigned long long paravirt_read_pmc(int counter)
                    {
@@@@@@@@@@@@@@@@@@@@@ -848,13 -848,13 -847,13 -848,13 -848,13 -848,13 -848,13 -848,13 -848,13 -848,13 -789,6 -848,13 -848,13 -848,13 -848,13 -848,13 -848,13 -848,13 -848,13 -848,13 +847,13 @@@@@@@@@@@@@@@@@@@@@ static inline void load_TLS(struct thre
                        PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
                    }
                    
          +         #ifdef CONFIG_X86_64
          +         static inline void load_gs_index(unsigned int gs)
          +         {
          +             PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs);
          +         }
          +         #endif
          +         
                    static inline void write_ldt_entry(struct desc_struct *dt, int entry,
                                                   const void *desc)
                    {
@@@@@@@@@@@@@@@@@@@@@ -896,11 -896,11 -895,6 -896,11 -896,11 -896,11 -896,11 -896,11 -896,11 -896,11 -830,11 -896,11 -896,11 -896,11 -896,11 -896,11 -896,11 -896,11 -896,11 -896,11 +895,6 @@@@@@@@@@@@@@@@@@@@@ static inline void apic_write(unsigned 
                        PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
                    }
                    
-- -----------------static inline void apic_write_atomic(unsigned long reg, u32 v)
-- -----------------{
-- -----------------    PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v);
-- -----------------}
-- -----------------
                    static inline u32 apic_read(unsigned long reg)
                    {
                        return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
@@@@@@@@@@@@@@@@@@@@@ -978,16 -978,16 -972,16 -978,16 -978,16 -978,16 -978,16 -978,16 -978,16 -978,16 -912,6 -978,16 -978,16 -978,16 -978,16 -978,16 -978,16 -978,16 -978,16 -978,16 +972,16 @@@@@@@@@@@@@@@@@@@@@ static inline void flush_tlb_others(cpu
                        PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
                    }
                    
          +         static inline int paravirt_pgd_alloc(struct mm_struct *mm)
          +         {
          +             return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm);
          +         }
          +         
          +         static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd)
          +         {
          +             PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd);
          +         }
          +         
                    static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn)
                    {
                        PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
                        return ret;
                    }
                    
          +         static inline pteval_t pte_flags(pte_t pte)
          +         {
          +             pteval_t ret;
          +         
          +             if (sizeof(pteval_t) > sizeof(long))
          +                     ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags,
          +                                      pte.pte, (u64)pte.pte >> 32);
          +             else
          +                     ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
          +                                      pte.pte);
          +         
          +             return ret;
          +         }
          +         
                    static inline pgd_t __pgd(pgdval_t val)
                    {
                        pgdval_t ret;
                        return ret;
                    }
                    
          +         #define  __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
          +         static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
          +                                                pte_t *ptep)
          +         {
          +             pteval_t ret;
          +         
          +             ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start,
          +                              mm, addr, ptep);
          +         
          +             return (pte_t) { .pte = ret };
          +         }
          +         
          +         static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
          +                                                pte_t *ptep, pte_t pte)
          +         {
          +             if (sizeof(pteval_t) > sizeof(long))
          +                     /* 5 arg words */
          +                     pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte);
          +             else
          +                     PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit,
          +                                 mm, addr, ptep, pte.pte);
          +         }
          +         
                    static inline void set_pte(pte_t *ptep, pte_t pte)
                    {
                        if (sizeof(pteval_t) > sizeof(long))
                        }
                    }
                    
          +         static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
          +                                     unsigned long phys, pgprot_t flags)
          +         {
          +             pv_mmu_ops.set_fixmap(idx, phys, flags);
          +         }
          +         
                    void _paravirt_nop(void);
                    #define paravirt_nop        ((void *)_paravirt_nop)
                    
@@@@@@@@@@@@@@@@@@@@@ -1396,8 -1396,8 -1390,8 -1396,8 -1396,8 -1396,8 -1396,8 -1396,8 -1396,8 -1396,8 -1277,8 -1396,8 -1396,8 -1396,8 -1396,8 -1396,8 -1396,8 -1396,8 -1396,8 -1396,8 +1390,8 @@@@@@@@@@@@@@@@@@@@@ extern struct paravirt_patch_site __par
                     * caller saved registers but the argument parameter */
                    #define PV_SAVE_REGS "pushq %%rdi;"
                    #define PV_RESTORE_REGS "popq %%rdi;"
------------------- #define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx"
------------------- #define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx"
+++++++++++++++++++ #define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi"
+++++++++++++++++++ #define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi"
                    #define PV_FLAGS_ARG "D"
                    #endif
                    
                    
                    
                    #ifdef CONFIG_X86_64
------------------- #define PV_SAVE_REGS   pushq %rax; pushq %rdi; pushq %rcx; pushq %rdx
------------------- #define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax
+++++++++++++++++++ #define PV_SAVE_REGS                                \
+++++++++++++++++++     push %rax;                              \
+++++++++++++++++++     push %rcx;                              \
+++++++++++++++++++     push %rdx;                              \
+++++++++++++++++++     push %rsi;                              \
+++++++++++++++++++     push %rdi;                              \
+++++++++++++++++++     push %r8;                               \
+++++++++++++++++++     push %r9;                               \
+++++++++++++++++++     push %r10;                              \
+++++++++++++++++++     push %r11
+++++++++++++++++++ #define PV_RESTORE_REGS                             \
+++++++++++++++++++     pop %r11;                               \
+++++++++++++++++++     pop %r10;                               \
+++++++++++++++++++     pop %r9;                                \
+++++++++++++++++++     pop %r8;                                \
+++++++++++++++++++     pop %rdi;                               \
+++++++++++++++++++     pop %rsi;                               \
+++++++++++++++++++     pop %rdx;                               \
+++++++++++++++++++     pop %rcx;                               \
+++++++++++++++++++     pop %rax
                    #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
                    #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
          +         #define PARA_INDIRECT(addr) *addr(%rip)
                    #else
                    #define PV_SAVE_REGS   pushl %eax; pushl %edi; pushl %ecx; pushl %edx
                    #define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
                    #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
                    #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
          +         #define PARA_INDIRECT(addr) *%cs:addr
                    #endif
                    
                    #define INTERRUPT_RETURN                                            \
                        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,       \
          -                       jmp *%cs:pv_cpu_ops+PV_CPU_iret)
          +                       jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
                    
                    #define DISABLE_INTERRUPTS(clobbers)                                        \
                        PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
          -                       PV_SAVE_REGS;                 \
          -                       call *%cs:pv_irq_ops+PV_IRQ_irq_disable;              \
          +                       PV_SAVE_REGS;                                         \
          +                       call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);    \
                                  PV_RESTORE_REGS;)                     \
                    
                    #define ENABLE_INTERRUPTS(clobbers)                                 \
                        PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,  \
          -                       PV_SAVE_REGS;                 \
          -                       call *%cs:pv_irq_ops+PV_IRQ_irq_enable;               \
          +                       PV_SAVE_REGS;                                         \
          +                       call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);     \
                                  PV_RESTORE_REGS;)
                    
          -         #define ENABLE_INTERRUPTS_SYSCALL_RET                                       \
          -             PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\
          +         #define USERGS_SYSRET32                                                     \
          +             PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32),       \
                                  CLBR_NONE,                                            \
          -                       jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_syscall_ret)
          -         
          +                       jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
                    
                    #ifdef CONFIG_X86_32
          -         #define GET_CR0_INTO_EAX                    \
          -             push %ecx; push %edx;                   \
          -             call *pv_cpu_ops+PV_CPU_read_cr0;       \
          +         #define GET_CR0_INTO_EAX                            \
          +             push %ecx; push %edx;                           \
          +             call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
                        pop %edx; pop %ecx
          -         #else
          +         
          +         #define ENABLE_INTERRUPTS_SYSEXIT                                   \
          +             PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),    \
          +                       CLBR_NONE,                                            \
          +                       jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
          +         
          +         
          +         #else       /* !CONFIG_X86_32 */
          +         
          +         /*
          +          * If swapgs is used while the userspace stack is still current,
          +          * there's no way to call a pvop.  The PV replacement *must* be
          +          * inlined, or the swapgs instruction must be trapped and emulated.
          +          */
          +         #define SWAPGS_UNSAFE_STACK                                         \
          +             PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,     \
          +                       swapgs)
          +         
                    #define SWAPGS                                                              \
                        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,     \
                                  PV_SAVE_REGS;                                         \
          -                       call *pv_cpu_ops+PV_CPU_swapgs;                       \
          +                       call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs);         \
                                  PV_RESTORE_REGS                                       \
                                 )
                    
          -         #define GET_CR2_INTO_RCX                    \
          -             call *pv_mmu_ops+PV_MMU_read_cr2;       \
          -             movq %rax, %rcx;                        \
          +         #define GET_CR2_INTO_RCX                            \
          +             call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \
          +             movq %rax, %rcx;                                \
                        xorq %rax, %rax;
                    
          -         #endif
          +         #define PARAVIRT_ADJUST_EXCEPTION_FRAME                                     \
          +             PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
          +                       CLBR_NONE,                                            \
          +                       call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
          +         
          +         #define USERGS_SYSRET64                                                     \
          +             PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
          +                       CLBR_NONE,                                            \
          +                       jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
          +         
          +         #define ENABLE_INTERRUPTS_SYSEXIT32                                 \
          +             PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),    \
          +                       CLBR_NONE,                                            \
          +                       jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
          +         #endif      /* CONFIG_X86_32 */
                    
                    #endif /* __ASSEMBLY__ */
                    #endif /* CONFIG_PARAVIRT */
diff --combined include/asm-x86/setup.h
index 90ab2225e71bbefe9fb06c9d85127c555e8a8445,90ab2225e71bbefe9fb06c9d85127c555e8a8445,90ab2225e71bbefe9fb06c9d85127c555e8a8445,f003ceaad6af8337961cc02d3ebfff1585443a28,90ab2225e71bbefe9fb06c9d85127c555e8a8445,90ab2225e71bbefe9fb06c9d85127c555e8a8445,90ab2225e71bbefe9fb06c9d85127c555e8a8445,1d121c632d9e7ad57a61144161886508d5ccb664,90ab2225e71bbefe9fb06c9d85127c555e8a8445,90ab2225e71bbefe9fb06c9d85127c555e8a8445,fa6763af8d2686c8d1b52a023f72c70806ec5bc4,90ab2225e71bbefe9fb06c9d85127c555e8a8445,90ab2225e71bbefe9fb06c9d85127c555e8a8445,90ab2225e71bbefe9fb06c9d85127c555e8a8445,90ab2225e71bbefe9fb06c9d85127c555e8a8445,90ab2225e71bbefe9fb06c9d85127c555e8a8445,90ab2225e71bbefe9fb06c9d85127c555e8a8445,90ab2225e71bbefe9fb06c9d85127c555e8a8445,90ab2225e71bbefe9fb06c9d85127c555e8a8445,659492624e747f26a647bfd8640efd60332b4659..a07c6f1c01e15b9480f14fcca8dc13d875334fc9
                    /* Interrupt control for vSMPowered x86_64 systems */
                    void vsmp_init(void);
                    
          -         char *machine_specific_memory_setup(void);
       +  +         #ifdef CONFIG_X86_VISWS
       +  +         extern void visws_early_detect(void);
       +  +         extern int is_visws_box(void);
       +  +         #else
       +  +         static inline void visws_early_detect(void) { }
       +  +         static inline int is_visws_box(void) { return 0; }
       +  +         #endif
       +  +         
       +  +         /*
       +  +          * Any setup quirks to be performed?
       +  +          */
--- --- -- ---------extern int (*arch_time_init_quirk)(void);
--- --- -- ---------extern int (*arch_pre_intr_init_quirk)(void);
--- --- -- ---------extern int (*arch_intr_init_quirk)(void);
--- --- -- ---------extern int (*arch_trap_init_quirk)(void);
--- --- -- ---------extern char * (*arch_memory_setup_quirk)(void);
--- --- -- ---------extern int (*mach_get_smp_config_quirk)(unsigned int early);
--- --- -- ---------extern int (*mach_find_smp_config_quirk)(unsigned int reserve);
+++ ++++++++++++++++struct mpc_config_processor;
+++ ++++++++++++++++struct mpc_config_bus;
+++ ++++++++++++++++struct mp_config_oemtable;
+++ ++++++++++++++++struct x86_quirks {
+++ ++++++++++++++++    int (*arch_pre_time_init)(void);
+++ ++++++++++++++++    int (*arch_time_init)(void);
+++ ++++++++++++++++    int (*arch_pre_intr_init)(void);
+++ ++++++++++++++++    int (*arch_intr_init)(void);
+++ ++++++++++++++++    int (*arch_trap_init)(void);
+++ ++++++++++++++++    char * (*arch_memory_setup)(void);
+++ ++++++++++++++++    int (*mach_get_smp_config)(unsigned int early);
+++ ++++++++++++++++    int (*mach_find_smp_config)(unsigned int reserve);
+++ ++++++++++++++++
+++ ++++++++++++++++    int *mpc_record;
+++ ++++++++++++++++    int (*mpc_apic_id)(struct mpc_config_processor *m);
+++ ++++++++++++++++    void (*mpc_oem_bus_info)(struct mpc_config_bus *m, char *name);
+++ ++++++++++++++++    void (*mpc_oem_pci_bus)(struct mpc_config_bus *m);
+++ ++++++++++++++++    void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
+++ ++++++++++++++++                                    unsigned short oemsize);
+++ ++++++++++++++++};
+++ ++++++++++++++++
+++ ++++++++++++++++extern struct x86_quirks *x86_quirks;
       +  +         
                    #ifndef CONFIG_PARAVIRT
                    #define paravirt_post_allocator_init()      do {} while (0)
                    #endif
                     */
                    extern struct boot_params boot_params;
                    
          -         #ifdef __i386__
                    /*
                     * Do NOT EVER look at the BIOS memory size location.
                     * It does not work on many machines.
                     */
                    #define LOWMEMSIZE()        (0x9f000)
                    
          -         struct e820entry;
          -         
          -         char * __init machine_specific_memory_setup(void);
          -         char *memory_setup(void);
          +         #ifdef __i386__
                    
          -         int __init copy_e820_map(struct e820entry *biosmap, int nr_map);
          -         int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map);
          -         void __init add_memory_region(unsigned long long start,
          -                                   unsigned long long size, int type);
          +         void __init i386_start_kernel(void);
          +         extern void probe_roms(void);
                    
          +         extern unsigned long init_pg_tables_start;
                    extern unsigned long init_pg_tables_end;
                    
          -         
          +         #else
+++++++++++++++++++ void __init x86_64_init_pda(void);
          +         void __init x86_64_start_kernel(char *real_mode);
          +         void __init x86_64_start_reservations(char *real_mode_data);
                    
                    #endif /* __i386__ */
                    #endif /* _SETUP */