]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 8 Jan 2009 22:25:41 +0000 (14:25 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 8 Jan 2009 22:25:41 +0000 (14:25 -0800)
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6: (84 commits)
  wimax: fix kernel-doc for debufs_dentry member of struct wimax_dev
  net: convert pegasus driver to net_device_ops
  bnx2x: Prevent eeprom set when driver is down
  net: switch kaweth driver to netdevops
  pcnet32: round off carrier watch timer
  i2400m/usb: wrap USB power saving in #ifdef CONFIG_PM
  wimax: testing for rfkill support should also test for CONFIG_RFKILL_MODULE
  wimax: fix kconfig interactions with rfkill and input layers
  wimax: fix '#ifndef CONFIG_BUG' layout to avoid warning
  r6040: bump release number to 0.20
  r6040: warn about MAC address being unset
  r6040: check PHY status when bringing interface up
  r6040: make printks consistent with DRV_NAME
  gianfar: Fixup use of BUS_ID_SIZE
  mlx4_en: Returning real Max in get_ringparam
  mlx4_en: Consider inline packets on completion
  netdev: bfin_mac: enable bfin_mac net dev driver for BF51x
  qeth: convert to net_device_ops
  vlan: add neigh_setup
  dm9601: warn on invalid mac address
  ...

279 files changed:
Documentation/cgroups/cgroups.txt
Documentation/controllers/memcg_test.txt [new file with mode: 0644]
Documentation/controllers/memory.txt
Documentation/hwmon/abituguru-datasheet
Documentation/kernel-parameters.txt
Documentation/powerpc/dts-bindings/fsl/board.txt
Documentation/scsi/scsi_fc_transport.txt
Documentation/w1/masters/00-INDEX
Documentation/w1/masters/mxc-w1 [new file with mode: 0644]
Documentation/w1/w1.netlink
MAINTAINERS
arch/arm/configs/clps7500_defconfig [deleted file]
arch/arm/kernel/isa.c
arch/arm/mach-at91/at91cap9.c
arch/arm/mach-at91/at91rm9200.c
arch/arm/mach-at91/at91sam9260.c
arch/arm/mach-at91/at91sam9261.c
arch/arm/mach-at91/at91sam9263.c
arch/arm/mach-at91/at91sam9rl.c
arch/arm/mach-at91/board-sam9rlek.c
arch/arm/mach-clps711x/edb7211-mm.c
arch/arm/mach-clps711x/fortunet.c
arch/arm/mach-davinci/devices.c
arch/arm/mach-davinci/include/mach/gpio.h
arch/arm/mach-footbridge/common.c
arch/arm/mach-footbridge/common.h
arch/arm/mach-footbridge/dc21285.c
arch/arm/mach-footbridge/isa-irq.c
arch/arm/mach-h720x/h7202-eval.c
arch/arm/mach-kirkwood/common.c
arch/arm/mach-kirkwood/pcie.c
arch/arm/mach-ks8695/devices.c
arch/arm/mach-msm/devices.c
arch/arm/mach-mv78xx0/pcie.c
arch/arm/mach-mx2/devices.c
arch/arm/mach-mx3/devices.c
arch/arm/mach-netx/fb.c
arch/arm/mach-netx/time.c
arch/arm/mach-netx/xc.c
arch/arm/mach-omap1/mcbsp.c
arch/arm/mach-omap2/mcbsp.c
arch/arm/mach-orion5x/pci.c
arch/arm/mach-pnx4008/gpio.c
arch/arm/mach-pnx4008/i2c.c
arch/arm/mach-pxa/e350.c
arch/arm/mach-pxa/e400.c
arch/arm/mach-pxa/e740.c
arch/arm/mach-pxa/e750.c
arch/arm/mach-pxa/e800.c
arch/arm/mach-pxa/include/mach/pxa3xx-regs.h
arch/arm/mach-realview/platsmp.c
arch/arm/mach-s3c2410/include/mach/gpio.h
arch/arm/mach-s3c2410/include/mach/irqs.h
arch/arm/mach-s3c2440/mach-at2440evb.c
arch/arm/mach-s3c6400/include/mach/irqs.h
arch/arm/plat-omap/i2c.c
arch/arm/plat-s3c/dev-fb.c
arch/arm/plat-s3c/dev-i2c0.c
arch/arm/plat-s3c/dev-i2c1.c
arch/arm/plat-s3c24xx/gpiolib.c
arch/arm/plat-s3c24xx/pwm.c
arch/arm/plat-s3c64xx/include/plat/irqs.h
arch/powerpc/Kconfig
arch/powerpc/boot/Makefile
arch/powerpc/boot/dts/mpc836x_mds.dts
arch/powerpc/boot/dts/mpc836x_rdk.dts
arch/powerpc/boot/dts/mpc8641_hpcn.dts
arch/powerpc/boot/install.sh
arch/powerpc/configs/85xx/mpc8572_ds_defconfig
arch/powerpc/include/asm/ioctls.h
arch/powerpc/include/asm/kexec.h
arch/powerpc/include/asm/ps3.h
arch/powerpc/include/asm/qe.h
arch/powerpc/include/asm/qe_ic.h
arch/powerpc/include/asm/spu.h
arch/powerpc/kernel/Makefile
arch/powerpc/kernel/cacheinfo.c [new file with mode: 0644]
arch/powerpc/kernel/cacheinfo.h [new file with mode: 0644]
arch/powerpc/kernel/pci-common.c
arch/powerpc/kernel/pci_64.c
arch/powerpc/kernel/ppc_ksyms.c
arch/powerpc/kernel/prom.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/sysfs.c
arch/powerpc/mm/mmu_decl.h
arch/powerpc/mm/numa.c
arch/powerpc/mm/pgtable_32.c
arch/powerpc/mm/tlb_nohash.c
arch/powerpc/oprofile/cell/pr_util.h
arch/powerpc/platforms/52xx/mpc52xx_common.c
arch/powerpc/platforms/83xx/mpc831x_rdb.c
arch/powerpc/platforms/83xx/mpc832x_mds.c
arch/powerpc/platforms/83xx/mpc832x_rdb.c
arch/powerpc/platforms/83xx/mpc836x_mds.c
arch/powerpc/platforms/83xx/mpc836x_rdk.c
arch/powerpc/platforms/83xx/mpc837x_mds.c
arch/powerpc/platforms/83xx/mpc837x_rdb.c
arch/powerpc/platforms/83xx/mpc83xx.h
arch/powerpc/platforms/85xx/mpc85xx_ds.c
arch/powerpc/platforms/85xx/smp.c
arch/powerpc/platforms/Kconfig
arch/powerpc/platforms/Kconfig.cputype
arch/powerpc/platforms/cell/beat_htab.c
arch/powerpc/platforms/cell/beat_udbg.c
arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
arch/powerpc/platforms/cell/interrupt.c
arch/powerpc/platforms/cell/io-workarounds.c
arch/powerpc/platforms/cell/iommu.c
arch/powerpc/platforms/cell/spufs/spufs.h
arch/powerpc/platforms/iseries/Kconfig
arch/powerpc/platforms/iseries/setup.c
arch/powerpc/platforms/pasemi/cpufreq.c
arch/powerpc/platforms/pasemi/dma_lib.c
arch/powerpc/platforms/powermac/pci.c
arch/powerpc/platforms/powermac/time.c
arch/powerpc/platforms/ps3/device-init.c
arch/powerpc/sysdev/Makefile
arch/powerpc/sysdev/fsl_pci.c
arch/powerpc/sysdev/fsl_soc.h
arch/powerpc/sysdev/qe_lib/Kconfig
arch/powerpc/sysdev/qe_lib/gpio.c
arch/powerpc/sysdev/simple_gpio.c [new file with mode: 0644]
arch/powerpc/sysdev/simple_gpio.h [new file with mode: 0644]
arch/sparc/kernel/sun4m_smp.c
arch/x86/kernel/cpu/cpufreq/longhaul.c
drivers/amba/bus.c
drivers/ata/ahci.c
drivers/ata/ata_piix.c
drivers/ata/libata-core.c
drivers/ata/libata-sff.c
drivers/ata/pata_ali.c
drivers/ata/pata_amd.c
drivers/ata/pata_hpt366.c
drivers/ata/pata_hpt3x3.c
drivers/ata/pata_mpiix.c
drivers/ata/pata_platform.c
drivers/ata/pata_sil680.c
drivers/ata/sata_sil24.c
drivers/atm/iphase.c
drivers/char/Kconfig
drivers/char/hvc_beat.c
drivers/char/pty.c
drivers/char/tpm/tpm_nsc.c
drivers/char/vt.c
drivers/firmware/dcdbas.c
drivers/firmware/dcdbas.h
drivers/firmware/memmap.c
drivers/infiniband/hw/nes/nes_cm.c
drivers/input/mouse/pxa930_trkball.c
drivers/isdn/hardware/eicon/debuglib.h
drivers/isdn/hardware/eicon/os_4bri.c
drivers/isdn/hardware/eicon/os_bri.c
drivers/isdn/hardware/eicon/os_pri.c
drivers/md/bitmap.c
drivers/md/faulty.c
drivers/md/linear.c
drivers/md/md.c
drivers/md/multipath.c
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/misc/Kconfig
drivers/misc/Makefile
drivers/misc/dell-laptop.c [new file with mode: 0644]
drivers/mtd/devices/Kconfig
drivers/mtd/devices/Makefile
drivers/mtd/devices/ps3vram.c [new file with mode: 0644]
drivers/mtd/ubi/kapi.c
drivers/net/wireless/ath5k/dma.c
drivers/net/wireless/zd1211rw/zd_mac.c
drivers/rtc/rtc-ds1307.c
drivers/s390/block/dasd_3990_erp.c
drivers/s390/block/dasd_int.h
drivers/s390/char/tape_3590.c
drivers/s390/cio/cio.c
drivers/s390/cio/qdio_main.c
drivers/scsi/Kconfig
drivers/serial/Kconfig
drivers/serial/Makefile
drivers/serial/nwpserial.c [new file with mode: 0644]
drivers/serial/of_serial.c
drivers/video/amba-clcd.c
drivers/w1/masters/Kconfig
drivers/w1/masters/Makefile
drivers/w1/masters/mxc_w1.c [new file with mode: 0644]
drivers/w1/w1.h
drivers/w1/w1_io.c
drivers/w1/w1_netlink.c
drivers/w1/w1_netlink.h
drivers/xen/Kconfig
drivers/xen/Makefile
drivers/xen/xenbus/xenbus_client.c
drivers/xen/xenbus/xenbus_probe.c
drivers/xen/xenbus/xenbus_xs.c
drivers/xen/xenfs/Makefile [new file with mode: 0644]
drivers/xen/xenfs/super.c [new file with mode: 0644]
drivers/xen/xenfs/xenbus.c [new file with mode: 0644]
drivers/xen/xenfs/xenfs.h [new file with mode: 0644]
fs/binfmt_elf.c
fs/block_dev.c
fs/coda/sysctl.c
fs/dcache.c
fs/dquot.c
fs/ext2/ialloc.c
fs/ext2/inode.c
fs/ext2/ioctl.c
fs/ext2/super.c
fs/ext3/ialloc.c
fs/ext3/ioctl.c
fs/ext3/namei.c
fs/ext3/super.c
fs/ext4/extents.c
fs/ext4/namei.c
fs/jbd/commit.c
fs/jbd/transaction.c
fs/ocfs2/alloc.c
fs/ocfs2/dlmglue.c
fs/ocfs2/file.c
fs/proc/vmcore.c
fs/romfs/inode.c
fs/splice.c
fs/super.c
include/linux/auxvec.h
include/linux/cgroup.h
include/linux/cpuset.h
include/linux/ext2_fs.h
include/linux/ext2_fs_sb.h
include/linux/ext3_fs.h
include/linux/ext3_fs_sb.h
include/linux/jbd.h
include/linux/kernel.h
include/linux/libata.h
include/linux/magic.h
include/linux/memcontrol.h
include/linux/mm_inline.h
include/linux/mmzone.h
include/linux/nwpserial.h [new file with mode: 0644]
include/linux/page_cgroup.h
include/linux/pid.h
include/linux/pid_namespace.h
include/linux/raid/md_k.h
include/linux/raid/md_p.h
include/linux/raid/raid0.h
include/linux/res_counter.h
include/linux/serial_core.h
include/linux/swap.h
include/xen/xenbus.h
init/Kconfig
ipc/mqueue.c
kernel/async.c
kernel/cgroup.c
kernel/cpuset.c
kernel/fork.c
kernel/ns_cgroup.c
kernel/pid.c
kernel/res_counter.c
kernel/sched_fair.c
lib/sort.c
mm/filemap.c
mm/memcontrol.c
mm/memory.c
mm/migrate.c
mm/oom_kill.c
mm/page_alloc.c
mm/page_cgroup.c
mm/shmem.c
mm/swap.c
mm/swap_state.c
mm/swapfile.c
mm/vmscan.c
net/ipv6/route.c
net/ipv6/sysctl_net_ipv6.c
net/sched/sch_sfq.c
net/sctp/auth.c
security/device_cgroup.c
security/smack/smackfs.c
sound/soc/au1x/dbdma2.c
sound/soc/davinci/davinci-pcm.c

index d9014aa0eb68b469836b29afe932532b168a63ee..e33ee74eee77000bc6a2df9ac437a0aa88632dcf 100644 (file)
@@ -227,7 +227,6 @@ Each cgroup is represented by a directory in the cgroup file system
 containing the following files describing that cgroup:
 
  - tasks: list of tasks (by pid) attached to that cgroup
- - releasable flag: cgroup currently removeable?
  - notify_on_release flag: run the release agent on exit?
  - release_agent: the path to use for release notifications (this file
    exists in the top cgroup only)
@@ -360,7 +359,7 @@ Now you want to do something with this cgroup.
 
 In this directory you can find several files:
 # ls
-notify_on_release releasable tasks
+notify_on_release tasks
 (plus whatever files added by the attached subsystems)
 
 Now attach your shell to this cgroup:
@@ -479,7 +478,6 @@ newly-created cgroup if an error occurs after this subsystem's
 create() method has been called for the new cgroup).
 
 void pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
-(cgroup_mutex held by caller)
 
 Called before checking the reference count on each subsystem. This may
 be useful for subsystems which have some extra references even if
@@ -498,6 +496,7 @@ remain valid while the caller holds cgroup_mutex.
 
 void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
            struct cgroup *old_cgrp, struct task_struct *task)
+(cgroup_mutex held by caller)
 
 Called after the task has been attached to the cgroup, to allow any
 post-attachment activity that requires memory allocations or blocking.
@@ -511,6 +510,7 @@ void exit(struct cgroup_subsys *ss, struct task_struct *task)
 Called during task exit.
 
 int populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
+(cgroup_mutex held by caller)
 
 Called after creation of a cgroup to allow a subsystem to populate
 the cgroup directory with file entries.  The subsystem should make
@@ -520,6 +520,7 @@ method can return an error code, the error code is currently not
 always handled well.
 
 void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp)
+(cgroup_mutex held by caller)
 
 Called at the end of cgroup_clone() to do any paramater
 initialization which might be required before a task could attach.  For
@@ -527,7 +528,7 @@ example in cpusets, no task may attach before 'cpus' and 'mems' are set
 up.
 
 void bind(struct cgroup_subsys *ss, struct cgroup *root)
-(cgroup_mutex held by caller)
+(cgroup_mutex and ss->hierarchy_mutex held by caller)
 
 Called when a cgroup subsystem is rebound to a different hierarchy
 and root cgroup. Currently this will only involve movement between
diff --git a/Documentation/controllers/memcg_test.txt b/Documentation/controllers/memcg_test.txt
new file mode 100644 (file)
index 0000000..08d4d3e
--- /dev/null
@@ -0,0 +1,342 @@
+Memory Resource Controller(Memcg)  Implementation Memo.
+Last Updated: 2008/12/15
+Base Kernel Version: based on 2.6.28-rc8-mm.
+
+Because VM is getting complex (one of reasons is memcg...), memcg's behavior
+is complex. This is a document for memcg's internal behavior.
+Please note that implementation details can be changed.
+
+(*) Topics on API should be in Documentation/controllers/memory.txt)
+
+0. How to record usage ?
+   2 objects are used.
+
+   page_cgroup ....an object per page.
+       Allocated at boot or memory hotplug. Freed at memory hot removal.
+
+   swap_cgroup ... an entry per swp_entry.
+       Allocated at swapon(). Freed at swapoff().
+
+   The page_cgroup has USED bit and double count against a page_cgroup never
+   occurs. swap_cgroup is used only when a charged page is swapped-out.
+
+1. Charge
+
+   a page/swp_entry may be charged (usage += PAGE_SIZE) at
+
+       mem_cgroup_newpage_charge()
+         Called at new page fault and Copy-On-Write.
+
+       mem_cgroup_try_charge_swapin()
+         Called at do_swap_page() (page fault on swap entry) and swapoff.
+         Followed by charge-commit-cancel protocol. (With swap accounting)
+         At commit, a charge recorded in swap_cgroup is removed.
+
+       mem_cgroup_cache_charge()
+         Called at add_to_page_cache()
+
+       mem_cgroup_cache_charge_swapin()
+         Called at shmem's swapin.
+
+       mem_cgroup_prepare_migration()
+         Called before migration. "extra" charge is done and followed by
+         charge-commit-cancel protocol.
+         At commit, charge against oldpage or newpage will be committed.
+
+2. Uncharge
+  a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
+
+       mem_cgroup_uncharge_page()
+         Called when an anonymous page is fully unmapped. I.e., mapcount goes
+         to 0. If the page is SwapCache, uncharge is delayed until
+         mem_cgroup_uncharge_swapcache().
+
+       mem_cgroup_uncharge_cache_page()
+         Called when a page-cache is deleted from radix-tree. If the page is
+         SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache().
+
+       mem_cgroup_uncharge_swapcache()
+         Called when SwapCache is removed from radix-tree. The charge itself
+         is moved to swap_cgroup. (If mem+swap controller is disabled, no
+         charge to swap occurs.)
+
+       mem_cgroup_uncharge_swap()
+         Called when swp_entry's refcnt goes down to 0. A charge against swap
+         disappears.
+
+       mem_cgroup_end_migration(old, new)
+       At success of migration old is uncharged (if necessary), a charge
+       to new page is committed. At failure, charge to old page is committed.
+
+3. charge-commit-cancel
+       In some case, we can't know this "charge" is valid or not at charging
+       (because of races).
+       To handle such case, there are charge-commit-cancel functions.
+               mem_cgroup_try_charge_XXX
+               mem_cgroup_commit_charge_XXX
+               mem_cgroup_cancel_charge_XXX
+       these are used in swap-in and migration.
+
+       At try_charge(), there are no flags to say "this page is charged".
+       at this point, usage += PAGE_SIZE.
+
+       At commit(), the function checks the page should be charged or not
+       and set flags or avoid charging.(usage -= PAGE_SIZE)
+
+       At cancel(), simply usage -= PAGE_SIZE.
+
+Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
+
+4. Anonymous
+       Anonymous page is newly allocated at
+                 - page fault into MAP_ANONYMOUS mapping.
+                 - Copy-On-Write.
+       It is charged right after it's allocated before doing any page table
+       related operations. Of course, it's uncharged when another page is used
+       for the fault address.
+
+       At freeing anonymous page (by exit() or munmap()), zap_pte() is called
+       and pages for ptes are freed one by one.(see mm/memory.c). Uncharges
+       are done at page_remove_rmap() when page_mapcount() goes down to 0.
+
+       Another page freeing is by page-reclaim (vmscan.c) and anonymous
+       pages are swapped out. In this case, the page is marked as
+       PageSwapCache(). uncharge() routine doesn't uncharge the page marked
+       as SwapCache(). It's delayed until __delete_from_swap_cache().
+
+       4.1 Swap-in.
+       At swap-in, the page is taken from swap-cache. There are 2 cases.
+
+       (a) If the SwapCache is newly allocated and read, it has no charges.
+       (b) If the SwapCache has been mapped by processes, it has been
+           charged already.
+
+       This swap-in is one of the most complicated work. In do_swap_page(),
+       following events occur when pte is unchanged.
+
+       (1) the page (SwapCache) is looked up.
+       (2) lock_page()
+       (3) try_charge_swapin()
+       (4) reuse_swap_page() (may call delete_swap_cache())
+       (5) commit_charge_swapin()
+       (6) swap_free().
+
+       Considering following situation for example.
+
+       (A) The page has not been charged before (2) and reuse_swap_page()
+           doesn't call delete_from_swap_cache().
+       (B) The page has not been charged before (2) and reuse_swap_page()
+           calls delete_from_swap_cache().
+       (C) The page has been charged before (2) and reuse_swap_page() doesn't
+           call delete_from_swap_cache().
+       (D) The page has been charged before (2) and reuse_swap_page() calls
+           delete_from_swap_cache().
+
+           memory.usage/memsw.usage changes to this page/swp_entry will be
+        Case          (A)      (B)       (C)     (D)
+         Event
+       Before (2)     0/ 1     0/ 1      1/ 1    1/ 1
+          ===========================================
+          (3)        +1/+1    +1/+1     +1/+1   +1/+1
+          (4)          -       0/ 0       -     -1/ 0
+          (5)         0/-1     0/ 0     -1/-1    0/ 0
+          (6)          -       0/-1       -      0/-1
+          ===========================================
+       Result         1/ 1     1/ 1      1/ 1    1/ 1
+
+       In any cases, charges to this page should be 1/ 1.
+
+       4.2 Swap-out.
+       At swap-out, typical state transition is below.
+
+       (a) add to swap cache. (marked as SwapCache)
+           swp_entry's refcnt += 1.
+       (b) fully unmapped.
+           swp_entry's refcnt += # of ptes.
+       (c) write back to swap.
+       (d) delete from swap cache. (remove from SwapCache)
+           swp_entry's refcnt -= 1.
+
+
+       At (b), the page is marked as SwapCache and not uncharged.
+       At (d), the page is removed from SwapCache and a charge in page_cgroup
+       is moved to swap_cgroup.
+
+       Finally, at task exit,
+       (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
+       Here, a charge in swap_cgroup disappears.
+
+5. Page Cache
+       Page Cache is charged at
+       - add_to_page_cache_locked().
+
+       uncharged at
+       - __remove_from_page_cache().
+
+       The logic is very clear. (About migration, see below)
+       Note: __remove_from_page_cache() is called by remove_from_page_cache()
+       and __remove_mapping().
+
+6. Shmem(tmpfs) Page Cache
+       Memcg's charge/uncharge have special handlers of shmem. The best way
+       to understand shmem's page state transition is to read mm/shmem.c.
+       But brief explanation of the behavior of memcg around shmem will be
+       helpful to understand the logic.
+
+       Shmem's page (just leaf page, not direct/indirect block) can be on
+               - radix-tree of shmem's inode.
+               - SwapCache.
+               - Both on radix-tree and SwapCache. This happens at swap-in
+                 and swap-out,
+
+       It's charged when...
+       - A new page is added to shmem's radix-tree.
+       - A swp page is read. (move a charge from swap_cgroup to page_cgroup)
+       It's uncharged when
+       - A page is removed from radix-tree and not SwapCache.
+       - When SwapCache is removed, a charge is moved to swap_cgroup.
+       - When swp_entry's refcnt goes down to 0, a charge in swap_cgroup
+         disappears.
+
+7. Page Migration
+       One of the most complicated functions is page-migration-handler.
+       Memcg has 2 routines. Assume that we are migrating a page's contents
+       from OLDPAGE to NEWPAGE.
+
+       Usual migration logic is..
+       (a) remove the page from LRU.
+       (b) allocate NEWPAGE (migration target)
+       (c) lock by lock_page().
+       (d) unmap all mappings.
+       (e-1) If necessary, replace entry in radix-tree.
+       (e-2) move contents of a page.
+       (f) map all mappings again.
+       (g) pushback the page to LRU.
+       (-) OLDPAGE will be freed.
+
+       Before (g), memcg should complete all necessary charge/uncharge to
+       NEWPAGE/OLDPAGE.
+
+       The point is....
+       - If OLDPAGE is anonymous, all charges will be dropped at (d) because
+          try_to_unmap() drops all mapcount and the page will not be
+         SwapCache.
+
+       - If OLDPAGE is SwapCache, charges will be kept at (g) because
+         __delete_from_swap_cache() isn't called at (e-1)
+
+       - If OLDPAGE is page-cache, charges will be kept at (g) because
+         remove_from_swap_cache() isn't called at (e-1)
+
+       memcg provides following hooks.
+
+       - mem_cgroup_prepare_migration(OLDPAGE)
+         Called after (b) to account a charge (usage += PAGE_SIZE) against
+         memcg which OLDPAGE belongs to.
+
+        - mem_cgroup_end_migration(OLDPAGE, NEWPAGE)
+         Called after (f) before (g).
+         If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already
+         charged, a charge by prepare_migration() is automatically canceled.
+         If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE.
+
+         But zap_pte() (by exit or munmap) can be called while migration,
+         we have to check if OLDPAGE/NEWPAGE is a valid page after commit().
+
+8. LRU
+        Each memcg has its own private LRU. Now, it's handling is under global
+       VM's control (means that it's handled under global zone->lru_lock).
+       Almost all routines around memcg's LRU is called by global LRU's
+       list management functions under zone->lru_lock().
+
+       A special function is mem_cgroup_isolate_pages(). This scans
+       memcg's private LRU and call __isolate_lru_page() to extract a page
+       from LRU.
+       (By __isolate_lru_page(), the page is removed from both of global and
+        private LRU.)
+
+
+9. Typical Tests.
+
+ Tests for racy cases.
+
+ 9.1 Small limit to memcg.
+       When you do test to do racy case, it's good test to set memcg's limit
+       to be very small rather than GB. Many races found in the test under
+       xKB or xxMB limits.
+       (Memory behavior under GB and Memory behavior under MB shows very
+        different situation.)
+
+ 9.2 Shmem
+       Historically, memcg's shmem handling was poor and we saw some amount
+       of troubles here. This is because shmem is page-cache but can be
+       SwapCache. Test with shmem/tmpfs is always good test.
+
+ 9.3 Migration
+       For NUMA, migration is an another special case. To do easy test, cpuset
+       is useful. Following is a sample script to do migration.
+
+       mount -t cgroup -o cpuset none /opt/cpuset
+
+       mkdir /opt/cpuset/01
+       echo 1 > /opt/cpuset/01/cpuset.cpus
+       echo 0 > /opt/cpuset/01/cpuset.mems
+       echo 1 > /opt/cpuset/01/cpuset.memory_migrate
+       mkdir /opt/cpuset/02
+       echo 1 > /opt/cpuset/02/cpuset.cpus
+       echo 1 > /opt/cpuset/02/cpuset.mems
+       echo 1 > /opt/cpuset/02/cpuset.memory_migrate
+
+       In above set, when you moves a task from 01 to 02, page migration to
+       node 0 to node 1 will occur. Following is a script to migrate all
+       under cpuset.
+       --
+       move_task()
+       {
+       for pid in $1
+        do
+                /bin/echo $pid >$2/tasks 2>/dev/null
+               echo -n $pid
+               echo -n " "
+        done
+       echo END
+       }
+
+       G1_TASK=`cat ${G1}/tasks`
+       G2_TASK=`cat ${G2}/tasks`
+       move_task "${G1_TASK}" ${G2} &
+       --
+ 9.4 Memory hotplug.
+       memory hotplug test is one of good test.
+       to offline memory, do following.
+       # echo offline > /sys/devices/system/memory/memoryXXX/state
+       (XXX is the place of memory)
+       This is an easy way to test page migration, too.
+
+ 9.5 mkdir/rmdir
+       When using hierarchy, mkdir/rmdir test should be done.
+       Use tests like the following.
+
+       echo 1 >/opt/cgroup/01/memory/use_hierarchy
+       mkdir /opt/cgroup/01/child_a
+       mkdir /opt/cgroup/01/child_b
+
+       set limit to 01.
+       add limit to 01/child_b
+       run jobs under child_a and child_b
+
+       create/delete following groups at random while jobs are running.
+       /opt/cgroup/01/child_a/child_aa
+       /opt/cgroup/01/child_b/child_bb
+       /opt/cgroup/01/child_c
+
+       running new jobs in new group is also good.
+
+ 9.6 Mount with other subsystems.
+       Mounting with other subsystems is a good test because there is a
+       race and lock dependency with other cgroup subsystems.
+
+       example)
+       # mount -t cgroup none /cgroup -t cpuset,memory,cpu,devices
+
+       and do task move, mkdir, rmdir etc...under this.
index 1c07547d3f81f28a19edb9de8752f8865b44478d..e1501964df1e4ddb46c8a1e704d197de870e3d49 100644 (file)
@@ -137,7 +137,32 @@ behind this approach is that a cgroup that aggressively uses a shared
 page will eventually get charged for it (once it is uncharged from
 the cgroup that brought it in -- this will happen on memory pressure).
 
-2.4 Reclaim
+Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used..
+When you do swapoff and make swapped-out pages of shmem(tmpfs) to
+be backed into memory in force, charges for pages are accounted against the
+caller of swapoff rather than the users of shmem.
+
+
+2.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP)
+Swap Extension allows you to record charge for swap. A swapped-in page is
+charged back to original page allocator if possible.
+
+When swap is accounted, following files are added.
+ - memory.memsw.usage_in_bytes.
+ - memory.memsw.limit_in_bytes.
+
+usage of mem+swap is limited by memsw.limit_in_bytes.
+
+Note: why 'mem+swap' rather than swap.
+The global LRU(kswapd) can swap out arbitrary pages. Swap-out means
+to move account from memory to swap...there is no change in usage of
+mem+swap.
+
+In other words, when we want to limit the usage of swap without affecting
+global LRU, mem+swap limit is better than just limiting swap from OS point
+of view.
+
+2.5 Reclaim
 
 Each cgroup maintains a per cgroup LRU that consists of an active
 and inactive list. When a cgroup goes over its limit, we first try
@@ -207,12 +232,6 @@ exceeded.
 The memory.stat file gives accounting information. Now, the number of
 caches, RSS and Active pages/Inactive pages are shown.
 
-The memory.force_empty gives an interface to drop *all* charges by force.
-
-# echo 1 > memory.force_empty
-
-will drop all charges in cgroup. Currently, this is maintained for test.
-
 4. Testing
 
 Balbir posted lmbench, AIM9, LTP and vmmstress results [10] and [11].
@@ -242,10 +261,106 @@ reclaimed.
 
 A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
 cgroup might have some charge associated with it, even though all
-tasks have migrated away from it. Such charges are automatically dropped at
-rmdir() if there are no tasks.
+tasks have migrated away from it.
+Such charges are freed(at default) or moved to its parent. When moved,
+both of RSS and CACHES are moved to parent.
+If both of them are busy, rmdir() returns -EBUSY. See 5.1 Also.
+
+Charges recorded in swap information is not updated at removal of cgroup.
+Recorded information is discarded and a cgroup which uses swap (swapcache)
+will be charged as a new owner of it.
+
+
+5. Misc. interfaces.
+
+5.1 force_empty
+  memory.force_empty interface is provided to make cgroup's memory usage empty.
+  You can use this interface only when the cgroup has no tasks.
+  When writing anything to this
+
+  # echo 0 > memory.force_empty
+
+  Almost all pages tracked by this memcg will be unmapped and freed. Some of
+  pages cannot be freed because it's locked or in-use. Such pages are moved
+  to parent and this cgroup will be empty. But this may return -EBUSY in
+  some too busy case.
+
+  Typical use case of this interface is that calling this before rmdir().
+  Because rmdir() moves all pages to parent, some out-of-use page caches can be
+  moved to the parent. If you want to avoid that, force_empty will be useful.
+
+5.2 stat file
+  memory.stat file includes following statistics (now)
+       cache                   - # of pages from page-cache and shmem.
+       rss                     - # of pages from anonymous memory.
+       pgpgin                  - # of event of charging
+       pgpgout                 - # of event of uncharging
+       active_anon             - # of pages on active lru of anon, shmem.
+       inactive_anon           - # of pages on active lru of anon, shmem
+       active_file             - # of pages on active lru of file-cache
+       inactive_file           - # of pages on inactive lru of file cache
+       unevictable             - # of pages cannot be reclaimed.(mlocked etc)
+
+       Below is depend on CONFIG_DEBUG_VM.
+       inactive_ratio          - VM inernal parameter. (see mm/page_alloc.c)
+       recent_rotated_anon     - VM internal parameter. (see mm/vmscan.c)
+       recent_rotated_file     - VM internal parameter. (see mm/vmscan.c)
+       recent_scanned_anon     - VM internal parameter. (see mm/vmscan.c)
+       recent_scanned_file     - VM internal parameter. (see mm/vmscan.c)
+
+  Memo:
+       recent_rotated means recent frequency of lru rotation.
+       recent_scanned means recent # of scans to lru.
+       showing for better debug please see the code for meanings.
+
+
+5.3 swappiness
+  Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
+
+  Following cgroup's swapiness can't be changed.
+  - root cgroup (uses /proc/sys/vm/swappiness).
+  - a cgroup which uses hierarchy and it has child cgroup.
+  - a cgroup which uses hierarchy and not the root of hierarchy.
+
+
+6. Hierarchy support
+
+The memory controller supports a deep hierarchy and hierarchical accounting.
+The hierarchy is created by creating the appropriate cgroups in the
+cgroup filesystem. Consider for example, the following cgroup filesystem
+hierarchy
+
+               root
+            /  |   \
+           /   |    \
+         a     b       c
+                       | \
+                       |  \
+                       d   e
+
+In the diagram above, with hierarchical accounting enabled, all memory
+usage of e, is accounted to its ancestors up until the root (i.e, c and root),
+that has memory.use_hierarchy enabled.  If one of the ancestors goes over its
+limit, the reclaim algorithm reclaims from the tasks in the ancestor and the
+children of the ancestor.
+
+6.1 Enabling hierarchical accounting and reclaim
+
+The memory controller by default disables the hierarchy feature. Support
+can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup
+
+# echo 1 > memory.use_hierarchy
+
+The feature can be disabled by
+
+# echo 0 > memory.use_hierarchy
+
+NOTE1: Enabling/disabling will fail if the cgroup already has other
+cgroups created below it.
+
+NOTE2: This feature can be enabled/disabled per subtree.
 
-5. TODO
+7. TODO
 
 1. Add support for accounting huge pages (as a separate controller)
 2. Make per-cgroup scanner reclaim not-shared pages first
index 4d184f2db0ea668e0ce6440693a61b28c202f98d..d9251efdcec72a1a92cf7b4e3c75505e85caf4d1 100644 (file)
@@ -121,7 +121,7 @@ Once all bytes have been read data will hold 0x09, but there is no reason to
 test for this. Notice that the number of bytes is bank address dependent see
 above and below.
 
-After completing a successfull read it is advised to put the uGuru back in
+After completing a successful read it is advised to put the uGuru back in
 ready mode, so that it is ready for the next read / write cycle. This way
 if your program / driver is unloaded and later loaded again the detection
 algorithm described above will still work.
@@ -141,7 +141,7 @@ don't ask why this is the way it is.
 
 Once DATA holds 0x01 read CMD it should hold 0xAC now.
 
-After completing a successfull write it is advised to put the uGuru back in
+After completing a successful write it is advised to put the uGuru back in
 ready mode, so that it is ready for the next read / write cycle. This way
 if your program / driver is unloaded and later loaded again the detection
 algorithm described above will still work.
index 532eacbbed625bba1ac92acaedab31948ccc79de..fb849020aea9349c9c7c6a095b2526316d95529f 100644 (file)
@@ -1562,6 +1562,9 @@ and is between 256 and 4096 characters. It is defined in the file
 
        nosoftlockup    [KNL] Disable the soft-lockup detector.
 
+       noswapaccount   [KNL] Disable accounting of swap in memory resource
+                       controller. (See Documentation/controllers/memory.txt)
+
        nosync          [HW,M68K] Disables sync negotiation for all devices.
 
        notsc           [BUGS=X86-32] Disable Time Stamp Counter
index 81a917ef96e9afbeb609fd6984f8f0eaa1b316d2..6c974d28eeb404f03c63d3d0b79bfd5a5b00675b 100644 (file)
@@ -18,7 +18,7 @@ This is the memory-mapped registers for on board FPGA.
 
 Required properities:
 - compatible : should be "fsl,fpga-pixis".
-- reg : should contain the address and the lenght of the FPPGA register
+- reg : should contain the address and the length of the FPPGA register
   set.
 
 Example (MPC8610HPCD):
@@ -27,3 +27,33 @@ Example (MPC8610HPCD):
                compatible = "fsl,fpga-pixis";
                reg = <0xe8000000 32>;
        };
+
+* Freescale BCSR GPIO banks
+
+Some BCSR registers act as simple GPIO controllers, each such
+register can be represented by the gpio-controller node.
+
+Required properities:
+- compatible : Should be "fsl,<board>-bcsr-gpio".
+- reg : Should contain the address and the length of the GPIO bank
+  register.
+- #gpio-cells : Should be two. The first cell is the pin number and the
+  second cell is used to specify optional paramters (currently unused).
+- gpio-controller : Marks the port as GPIO controller.
+
+Example:
+
+       bcsr@1,0 {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               compatible = "fsl,mpc8360mds-bcsr";
+               reg = <1 0 0x8000>;
+               ranges = <0 1 0 0x8000>;
+
+               bcsr13: gpio-controller@d {
+                       #gpio-cells = <2>;
+                       compatible = "fsl,mpc8360mds-bcsr-gpio";
+                       reg = <0xd 1>;
+                       gpio-controller;
+               };
+       };
index 38d324d62b253be77166a40347583b11b1490754..e5b071d466196de262e4e79bf41d359f4260251a 100644 (file)
@@ -191,7 +191,7 @@ Vport States:
       This is equivalent to a driver "attach" on an adapter, which is
       independent of the adapter's link state.
     - Instantiation of the vport on the FC link via ELS traffic, etc.
-      This is equivalent to a "link up" and successfull link initialization.
+      This is equivalent to a "link up" and successful link initialization.
   Further information can be found in the interfaces section below for
   Vport Creation.
 
@@ -320,7 +320,7 @@ Vport Creation:
       This is equivalent to a driver "attach" on an adapter, which is
       independent of the adapter's link state.
     - Instantiation of the vport on the FC link via ELS traffic, etc.
-      This is equivalent to a "link up" and successfull link initialization.
+      This is equivalent to a "link up" and successful link initialization.
 
   The LLDD's vport_create() function will not synchronously wait for both
   parts to be fully completed before returning. It must validate that the
index 7b0ceaaad7af916ab0ed24d2e8ed05738bd830e0..d63fa024ac05901252a3ea9af825c85b866845e3 100644 (file)
@@ -4,5 +4,7 @@ ds2482
        - The Maxim/Dallas Semiconductor DS2482 provides 1-wire busses.
 ds2490
        - The Maxim/Dallas Semiconductor DS2490 builds USB <-> W1 bridges.
+mxc_w1
+       - W1 master controller driver found on Freescale MX2/MX3 SoCs
 w1-gpio
        - GPIO 1-wire bus master driver.
diff --git a/Documentation/w1/masters/mxc-w1 b/Documentation/w1/masters/mxc-w1
new file mode 100644 (file)
index 0000000..97f6199
--- /dev/null
@@ -0,0 +1,11 @@
+Kernel driver mxc_w1
+====================
+
+Supported chips:
+  * Freescale MX27, MX31 and probably other i.MX SoCs
+    Datasheets:
+        http://www.freescale.com/files/32bit/doc/data_sheet/MCIMX31.pdf?fpsp=1
+       http://www.freescale.com/files/dsp/MCIMX27.pdf?fpsp=1
+
+Author: Originally based on Freescale code, prepared for mainline by
+       Sascha Hauer <s.hauer@pengutronix.de>
index 3640c7c87d457683e249fe57df0c654b0c7d439f..804445f745ed4bcd900fb9ef8962fca6c4742415 100644 (file)
@@ -5,69 +5,157 @@ Message types.
 =============
 
 There are three types of messages between w1 core and userspace:
-1. Events. They are generated each time new master or slave device found
-       either due to automatic or requested search.
-2. Userspace commands. Includes read/write and search/alarm search comamnds.
+1. Events. They are generated each time new master or slave device
+       found either due to automatic or requested search.
+2. Userspace commands.
 3. Replies to userspace commands.
 
 
 Protocol.
 ========
 
-[struct cn_msg] - connector header. It's length field is equal to size of the attached data.
+[struct cn_msg] - connector header.
+       Its length field is equal to size of the attached data
 [struct w1_netlink_msg] - w1 netlink header.
        __u8 type       - message type.
-                       W1_SLAVE_ADD/W1_SLAVE_REMOVE - slave add/remove events.
-                       W1_MASTER_ADD/W1_MASTER_REMOVE - master add/remove events.
-                       W1_MASTER_CMD - userspace command for bus master device (search/alarm search).
-                       W1_SLAVE_CMD - userspace command for slave device (read/write/ search/alarm search
-                                       for bus master device where given slave device found).
+                       W1_LIST_MASTERS
+                               list current bus masters
+                       W1_SLAVE_ADD/W1_SLAVE_REMOVE
+                               slave add/remove events
+                       W1_MASTER_ADD/W1_MASTER_REMOVE
+                               master add/remove events
+                       W1_MASTER_CMD
+                               userspace command for bus master
+                               device (search/alarm search)
+                       W1_SLAVE_CMD
+                               userspace command for slave device
+                               (read/write/touch)
        __u8 res        - reserved
-       __u16 len       - size of attached to this header data.
+       __u16 len       - size of data attached to this header data
        union {
-               __u8 id;                         - slave unique device id
+               __u8 id[8];                      - slave unique device id
                struct w1_mst {
-                       __u32           id;      - master's id.
+                       __u32           id;      - master's id
                        __u32           res;     - reserved
                } mst;
        } id;
 
-[strucrt w1_netlink_cmd] - command for gived master or slave device.
+[struct w1_netlink_cmd] - command for given master or slave device.
        __u8 cmd        - command opcode.
-                       W1_CMD_READ     - read command.
-                       W1_CMD_WRITE    - write command.
-                       W1_CMD_SEARCH   - search command.
-                       W1_CMD_ALARM_SEARCH - alarm search command.
+                       W1_CMD_READ     - read command
+                       W1_CMD_WRITE    - write command
+                       W1_CMD_TOUCH    - touch command
+                               (write and sample data back to userspace)
+                       W1_CMD_SEARCH   - search command
+                       W1_CMD_ALARM_SEARCH - alarm search command
        __u8 res        - reserved
-       __u16 len       - length of data for this command.
-                       For read command data must be allocated like for write command.
-       __u8 data[0]    - data for this command.
+       __u16 len       - length of data for this command
+               For read command data must be allocated like for write command
+       __u8 data[0]    - data for this command
 
 
-Each connector message can include one or more w1_netlink_msg with zero of more attached w1_netlink_cmd messages.
+Each connector message can include one or more w1_netlink_msg with
+zero or more attached w1_netlink_cmd messages.
 
-For event messages there are no w1_netlink_cmd embedded structures, only connector header
-and w1_netlink_msg strucutre with "len" field being zero and filled type (one of event types)
-and id - either 8 bytes of slave unique id in host order, or master's id, which is assigned
-to bus master device when it is added to w1 core.
+For event messages there are no w1_netlink_cmd embedded structures,
+only connector header and w1_netlink_msg strucutre with "len" field
+being zero and filled type (one of event types) and id:
+either 8 bytes of slave unique id in host order,
+or master's id, which is assigned to bus master device
+when it is added to w1 core.
+
+Currently replies to userspace commands are only generated for read
+command request. One reply is generated exactly for one w1_netlink_cmd
+read request. Replies are not combined when sent - i.e. typical reply
+messages looks like the following:
 
-Currently replies to userspace commands are only generated for read command request.
-One reply is generated exactly for one w1_netlink_cmd read request.
-Replies are not combined when sent - i.e. typical reply messages looks like the following:
 [cn_msg][w1_netlink_msg][w1_netlink_cmd]
-cn_msg.len = sizeof(struct w1_netlink_msg) + sizeof(struct w1_netlink_cmd) + cmd->len;
+cn_msg.len = sizeof(struct w1_netlink_msg) +
+            sizeof(struct w1_netlink_cmd) +
+            cmd->len;
 w1_netlink_msg.len = sizeof(struct w1_netlink_cmd) + cmd->len;
 w1_netlink_cmd.len = cmd->len;
 
+Replies to W1_LIST_MASTERS should send a message back to the userspace
+which will contain list of all registered master ids in the following
+format:
+
+       cn_msg (CN_W1_IDX.CN_W1_VAL as id, len is equal to sizeof(struct
+       w1_netlink_msg) plus number of masters multipled by 4)
+       w1_netlink_msg (type: W1_LIST_MASTERS, len is equal to
+               number of masters multiplied by 4 (u32 size))
+       id0 ... idN
+
+       Each message is at most 4k in size, so if number of master devices
+       exceeds this, it will be split into several messages,
+       cn.seq will be increased for each one.
+
+W1 search and alarm search commands.
+request:
+[cn_msg]
+  [w1_netlink_msg type = W1_MASTER_CMD
+       id is equal to the bus master id to use for searching]
+  [w1_netlink_cmd cmd = W1_CMD_SEARCH or W1_CMD_ALARM_SEARCH]
+
+reply:
+  [cn_msg, ack = 1 and increasing, 0 means the last message,
+       seq is equal to the request seq]
+  [w1_netlink_msg type = W1_MASTER_CMD]
+  [w1_netlink_cmd cmd = W1_CMD_SEARCH or W1_CMD_ALARM_SEARCH
+       len is equal to number of IDs multiplied by 8]
+  [64bit-id0 ... 64bit-idN]
+Length in each header corresponds to the size of the data behind it, so
+w1_netlink_cmd->len = N * 8; where N is number of IDs in this message.
+       Can be zero.
+w1_netlink_msg->len = sizeof(struct w1_netlink_cmd) + N * 8;
+cn_msg->len = sizeof(struct w1_netlink_msg) +
+             sizeof(struct w1_netlink_cmd) +
+             N*8;
+
+W1 reset command.
+[cn_msg]
+  [w1_netlink_msg type = W1_MASTER_CMD
+       id is equal to the bus master id to use for searching]
+  [w1_netlink_cmd cmd = W1_CMD_RESET]
+
+
+Command status replies.
+======================
+
+Each command (either root, master or slave with or without w1_netlink_cmd
+structure) will be 'acked' by the w1 core. Format of the reply is the same
+as request message except that length parameters do not account for data
+requested by the user, i.e. read/write/touch IO requests will not contain
+data, so w1_netlink_cmd.len will be 0, w1_netlink_msg.len will be size
+of the w1_netlink_cmd structure and cn_msg.len will be equal to the sum
+of the sizeof(struct w1_netlink_msg) and sizeof(struct w1_netlink_cmd).
+If reply is generated for master or root command (which do not have
+w1_netlink_cmd attached), reply will contain only cn_msg and w1_netlink_msg
+structires.
+
+w1_netlink_msg.status field will carry positive error value
+(EINVAL for example) or zero in case of success.
+
+All other fields in every structure will mirror the same parameters in the
+request message (except lengths as described above).
+
+Status reply is generated for every w1_netlink_cmd embedded in the
+w1_netlink_msg, if there are no w1_netlink_cmd structures,
+reply will be generated for the w1_netlink_msg.
+
+All w1_netlink_cmd command structures are handled in every w1_netlink_msg,
+even if there were errors, only length mismatch interrupts message processing.
+
 
 Operation steps in w1 core when new command is received.
 =======================================================
 
-When new message (w1_netlink_msg) is received w1 core detects if it is master of slave request,
-according to w1_netlink_msg.type field.
+When new message (w1_netlink_msg) is received w1 core detects if it is
+master or slave request, according to w1_netlink_msg.type field.
 Then master or slave device is searched for.
-When found, master device (requested or those one on where slave device is found) is locked.
-If slave command is requested, then reset/select procedure is started to select given device.
+When found, master device (requested or those one on where slave device
+is found) is locked. If slave command is requested, then reset/select
+procedure is started to select given device.
 
 Then all requested in w1_netlink_msg operations are performed one by one.
 If command requires reply (like read command) it is sent on command completion.
@@ -82,8 +170,8 @@ Connector [1] specific documentation.
 Each connector message includes two u32 fields as "address".
 w1 uses CN_W1_IDX and CN_W1_VAL defined in include/linux/connector.h header.
 Each message also includes sequence and acknowledge numbers.
-Sequence number for event messages is appropriate bus master sequence number increased with
-each event message sent "through" this master.
+Sequence number for event messages is appropriate bus master sequence number
+increased with each event message sent "through" this master.
 Sequence number for userspace requests is set by userspace application.
 Sequence number for reply is the same as was in request, and
 acknowledge number is set to seq+1.
@@ -93,6 +181,6 @@ Additional documantion, source code examples.
 ============================================
 
 1. Documentation/connector
-2. http://tservice.net.ru/~s0mbre/archive/w1
-This archive includes userspace application w1d.c which
-uses read/write/search commands for all master/slave devices found on the bus.
+2. http://www.ioremap.net/archive/w1
+This archive includes userspace application w1d.c which uses
+read/write/search commands for all master/slave devices found on the bus.
index a01884407fe45065b99891c82909dca5b5094564..57e0309243cc5c8c3ef1accd4c52987ef129ccc1 100644 (file)
@@ -1360,6 +1360,11 @@ P:       Maciej W. Rozycki
 M:     macro@linux-mips.org
 S:     Maintained
 
+DELL LAPTOP DRIVER
+P:     Matthew Garrett
+M:     mjg59@srcf.ucam.org
+S:     Maintained
+
 DELL LAPTOP SMM DRIVER
 P:     Massimo Dal Zotto
 M:     dz@debian.org
@@ -3484,6 +3489,12 @@ L:       linuxppc-dev@ozlabs.org
 L:     cbe-oss-dev@ozlabs.org
 S:     Supported
 
+PS3VRAM DRIVER
+P:     Jim Paris
+M:     jim@jtan.com
+L:     cbe-oss-dev@ozlabs.org
+S:     Maintained
+
 PVRUSB2 VIDEO4LINUX DRIVER
 P:     Mike Isely
 M:     isely@pobox.com
diff --git a/arch/arm/configs/clps7500_defconfig b/arch/arm/configs/clps7500_defconfig
deleted file mode 100644 (file)
index 49e9f9d..0000000
+++ /dev/null
@@ -1,801 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.12-rc1-bk2
-# Sun Mar 27 17:20:48 2005
-#
-CONFIG_ARM=y
-CONFIG_MMU=y
-CONFIG_UID16=y
-CONFIG_RWSEM_GENERIC_SPINLOCK=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_GENERIC_IOMAP=y
-
-#
-# Code maturity level options
-#
-CONFIG_EXPERIMENTAL=y
-CONFIG_CLEAN_COMPILE=y
-CONFIG_BROKEN_ON_SMP=y
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-# CONFIG_POSIX_MQUEUE is not set
-# CONFIG_BSD_PROCESS_ACCT is not set
-# CONFIG_SYSCTL is not set
-# CONFIG_AUDIT is not set
-# CONFIG_HOTPLUG is not set
-CONFIG_KOBJECT_UEVENT=y
-# CONFIG_IKCONFIG is not set
-CONFIG_EMBEDDED=y
-CONFIG_KALLSYMS=y
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_EPOLL=y
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-# CONFIG_TINY_SHMEM is not set
-CONFIG_BASE_SMALL=0
-
-#
-# Loadable module support
-#
-# CONFIG_MODULES is not set
-
-#
-# System Type
-#
-CONFIG_ARCH_CLPS7500=y
-# CONFIG_ARCH_CLPS711X is not set
-# CONFIG_ARCH_CO285 is not set
-# CONFIG_ARCH_EBSA110 is not set
-# CONFIG_ARCH_FOOTBRIDGE is not set
-# CONFIG_ARCH_INTEGRATOR is not set
-# CONFIG_ARCH_IOP3XX is not set
-# CONFIG_ARCH_IXP4XX is not set
-# CONFIG_ARCH_IXP2000 is not set
-# CONFIG_ARCH_L7200 is not set
-# CONFIG_ARCH_PXA is not set
-# CONFIG_ARCH_RPC is not set
-# CONFIG_ARCH_SA1100 is not set
-# CONFIG_ARCH_S3C2410 is not set
-# CONFIG_ARCH_SHARK is not set
-# CONFIG_ARCH_LH7A40X is not set
-# CONFIG_ARCH_OMAP is not set
-# CONFIG_ARCH_VERSATILE is not set
-# CONFIG_ARCH_IMX is not set
-# CONFIG_ARCH_H720X is not set
-
-#
-# Processor Type
-#
-CONFIG_CPU_32=y
-CONFIG_CPU_ARM710=y
-CONFIG_CPU_32v3=y
-CONFIG_CPU_CACHE_V3=y
-CONFIG_CPU_CACHE_VIVT=y
-CONFIG_CPU_COPY_V3=y
-CONFIG_CPU_TLB_V3=y
-
-#
-# Processor Features
-#
-CONFIG_TIMER_ACORN=y
-
-#
-# Bus support
-#
-CONFIG_ISA=y
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# Kernel Features
-#
-# CONFIG_PREEMPT is not set
-CONFIG_ALIGNMENT_TRAP=y
-
-#
-# Boot options
-#
-CONFIG_ZBOOT_ROM_TEXT=0x0
-CONFIG_ZBOOT_ROM_BSS=0x0
-CONFIG_CMDLINE="mem=16M root=nfs"
-# CONFIG_XIP_KERNEL is not set
-
-#
-# Floating point emulation
-#
-
-#
-# At least one emulation must be selected
-#
-# CONFIG_FPE_NWFPE is not set
-
-#
-# Userspace binary formats
-#
-CONFIG_BINFMT_ELF=y
-# CONFIG_BINFMT_AOUT is not set
-# CONFIG_BINFMT_MISC is not set
-# CONFIG_ARTHUR is not set
-
-#
-# Power management options
-#
-# CONFIG_PM is not set
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-CONFIG_MTD=y
-# CONFIG_MTD_DEBUG is not set
-# CONFIG_MTD_CONCAT is not set
-# CONFIG_MTD_PARTITIONS is not set
-
-#
-# User Modules And Translation Layers
-#
-# CONFIG_MTD_CHAR is not set
-# CONFIG_MTD_BLOCK is not set
-# CONFIG_MTD_BLOCK_RO is not set
-# CONFIG_FTL is not set
-# CONFIG_NFTL is not set
-# CONFIG_INFTL is not set
-
-#
-# RAM/ROM/Flash chip drivers
-#
-# CONFIG_MTD_CFI is not set
-# CONFIG_MTD_JEDECPROBE is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-# CONFIG_MTD_CFI_I4 is not set
-# CONFIG_MTD_CFI_I8 is not set
-# CONFIG_MTD_RAM is not set
-# CONFIG_MTD_ROM is not set
-# CONFIG_MTD_ABSENT is not set
-
-#
-# Mapping drivers for chip access
-#
-# CONFIG_MTD_COMPLEX_MAPPINGS is not set
-
-#
-# Self-contained MTD device drivers
-#
-# CONFIG_MTD_SLRAM is not set
-# CONFIG_MTD_PHRAM is not set
-# CONFIG_MTD_MTDRAM is not set
-# CONFIG_MTD_BLKMTD is not set
-# CONFIG_MTD_BLOCK2MTD is not set
-
-#
-# Disk-On-Chip Device Drivers
-#
-# CONFIG_MTD_DOC2000 is not set
-# CONFIG_MTD_DOC2001 is not set
-# CONFIG_MTD_DOC2001PLUS is not set
-
-#
-# NAND Flash Device Drivers
-#
-# CONFIG_MTD_NAND is not set
-
-#
-# Parallel port support
-#
-CONFIG_PARPORT=y
-CONFIG_PARPORT_PC=y
-CONFIG_PARPORT_PC_FIFO=y
-# CONFIG_PARPORT_PC_SUPERIO is not set
-# CONFIG_PARPORT_ARC is not set
-# CONFIG_PARPORT_GSC is not set
-CONFIG_PARPORT_1284=y
-
-#
-# Plug and Play support
-#
-# CONFIG_PNP is not set
-
-#
-# Block devices
-#
-# CONFIG_BLK_DEV_FD is not set
-# CONFIG_BLK_DEV_XD is not set
-# CONFIG_PARIDE is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
-# CONFIG_BLK_DEV_LOOP is not set
-CONFIG_BLK_DEV_NBD=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=4096
-# CONFIG_BLK_DEV_INITRD is not set
-CONFIG_INITRAMFS_SOURCE=""
-# CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-# CONFIG_ATA_OVER_ETH is not set
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-# CONFIG_SCSI is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-# CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
-
-#
-# IEEE 1394 (FireWire) support
-#
-
-#
-# I2O device support
-#
-
-#
-# Networking support
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-# CONFIG_PACKET is not set
-# CONFIG_NETLINK_DEV is not set
-CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-# CONFIG_IP_MULTICAST is not set
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_PNP=y
-# CONFIG_IP_PNP_DHCP is not set
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_IP_PNP_RARP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_IP_TCPDIAG=y
-# CONFIG_IP_TCPDIAG_IPV6 is not set
-# CONFIG_IPV6 is not set
-# CONFIG_NETFILTER is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=y
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-
-#
-# ARCnet devices
-#
-# CONFIG_ARCNET is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
-CONFIG_NET_ETHERNET=y
-# CONFIG_MII is not set
-# CONFIG_NET_VENDOR_3COM is not set
-# CONFIG_LANCE is not set
-# CONFIG_NET_VENDOR_SMC is not set
-# CONFIG_SMC91X is not set
-# CONFIG_NET_VENDOR_RACAL is not set
-# CONFIG_AT1700 is not set
-# CONFIG_DEPCA is not set
-# CONFIG_HP100 is not set
-# CONFIG_NET_ISA is not set
-CONFIG_NET_PCI=y
-# CONFIG_AC3200 is not set
-# CONFIG_APRICOT is not set
-CONFIG_CS89x0=y
-# CONFIG_NET_POCKET is not set
-
-#
-# Ethernet (1000 Mbit)
-#
-
-#
-# Ethernet (10000 Mbit)
-#
-
-#
-# Token Ring devices
-#
-# CONFIG_TR is not set
-
-#
-# Wireless LAN (non-hamradio)
-#
-# CONFIG_NET_RADIO is not set
-
-#
-# Wan interfaces
-#
-# CONFIG_WAN is not set
-# CONFIG_PLIP is not set
-CONFIG_PPP=y
-# CONFIG_PPP_MULTILINK is not set
-# CONFIG_PPP_FILTER is not set
-# CONFIG_PPP_ASYNC is not set
-# CONFIG_PPP_SYNC_TTY is not set
-# CONFIG_PPP_DEFLATE is not set
-# CONFIG_PPP_BSDCOMP is not set
-# CONFIG_PPPOE is not set
-CONFIG_SLIP=y
-CONFIG_SLIP_COMPRESSED=y
-# CONFIG_SLIP_SMART is not set
-# CONFIG_SLIP_MODE_SLIP6 is not set
-# CONFIG_SHAPER is not set
-# CONFIG_NETCONSOLE is not set
-
-#
-# ISDN subsystem
-#
-# CONFIG_ISDN is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=y
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-CONFIG_INPUT_KEYBOARD=y
-CONFIG_KEYBOARD_ATKBD=y
-# CONFIG_KEYBOARD_SUNKBD is not set
-# CONFIG_KEYBOARD_LKKBD is not set
-# CONFIG_KEYBOARD_XTKBD is not set
-# CONFIG_KEYBOARD_NEWTON is not set
-CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=y
-# CONFIG_MOUSE_SERIAL is not set
-# CONFIG_MOUSE_INPORT is not set
-# CONFIG_MOUSE_LOGIBM is not set
-# CONFIG_MOUSE_PC110PAD is not set
-# CONFIG_MOUSE_VSXXXAA is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=y
-# CONFIG_SERIO_SERPORT is not set
-# CONFIG_SERIO_PARKBD is not set
-CONFIG_SERIO_RPCKBD=y
-CONFIG_SERIO_LIBPS2=y
-# CONFIG_SERIO_RAW is not set
-# CONFIG_GAMEPORT is not set
-CONFIG_SOUND_GAMEPORT=y
-
-#
-# Character devices
-#
-CONFIG_VT=y
-CONFIG_VT_CONSOLE=y
-CONFIG_HW_CONSOLE=y
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=4
-# CONFIG_SERIAL_8250_EXTENDED is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_UNIX98_PTYS=y
-CONFIG_LEGACY_PTYS=y
-CONFIG_LEGACY_PTY_COUNT=256
-CONFIG_PRINTER=y
-# CONFIG_LP_CONSOLE is not set
-# CONFIG_PPDEV is not set
-# CONFIG_TIPAR is not set
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
-# CONFIG_NVRAM is not set
-# CONFIG_RTC is not set
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-
-#
-# Ftape, the floppy tape device driver
-#
-# CONFIG_DRM is not set
-# CONFIG_RAW_DRIVER is not set
-
-#
-# TPM devices
-#
-# CONFIG_TCG_TPM is not set
-
-#
-# I2C support
-#
-CONFIG_I2C=y
-# CONFIG_I2C_CHARDEV is not set
-
-#
-# I2C Algorithms
-#
-CONFIG_I2C_ALGOBIT=y
-# CONFIG_I2C_ALGOPCF is not set
-# CONFIG_I2C_ALGOPCA is not set
-
-#
-# I2C Hardware Bus support
-#
-# CONFIG_I2C_ELEKTOR is not set
-# CONFIG_I2C_PARPORT is not set
-# CONFIG_I2C_PARPORT_LIGHT is not set
-# CONFIG_I2C_PCA_ISA is not set
-
-#
-# Hardware Sensors Chip support
-#
-# CONFIG_I2C_SENSOR is not set
-# CONFIG_SENSORS_ADM1021 is not set
-# CONFIG_SENSORS_ADM1025 is not set
-# CONFIG_SENSORS_ADM1026 is not set
-# CONFIG_SENSORS_ADM1031 is not set
-# CONFIG_SENSORS_ASB100 is not set
-# CONFIG_SENSORS_DS1621 is not set
-# CONFIG_SENSORS_FSCHER is not set
-# CONFIG_SENSORS_FSCPOS is not set
-# CONFIG_SENSORS_GL518SM is not set
-# CONFIG_SENSORS_GL520SM is not set
-# CONFIG_SENSORS_IT87 is not set
-# CONFIG_SENSORS_LM63 is not set
-# CONFIG_SENSORS_LM75 is not set
-# CONFIG_SENSORS_LM77 is not set
-# CONFIG_SENSORS_LM78 is not set
-# CONFIG_SENSORS_LM80 is not set
-# CONFIG_SENSORS_LM83 is not set
-# CONFIG_SENSORS_LM85 is not set
-# CONFIG_SENSORS_LM87 is not set
-# CONFIG_SENSORS_LM90 is not set
-# CONFIG_SENSORS_MAX1619 is not set
-# CONFIG_SENSORS_PC87360 is not set
-# CONFIG_SENSORS_SMSC47B397 is not set
-# CONFIG_SENSORS_SMSC47M1 is not set
-# CONFIG_SENSORS_W83781D is not set
-# CONFIG_SENSORS_W83L785TS is not set
-# CONFIG_SENSORS_W83627HF is not set
-
-#
-# Other I2C Chip support
-#
-# CONFIG_SENSORS_EEPROM is not set
-# CONFIG_SENSORS_PCF8574 is not set
-# CONFIG_SENSORS_PCF8591 is not set
-# CONFIG_SENSORS_RTC8564 is not set
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# CONFIG_I2C_DEBUG_CHIP is not set
-
-#
-# Misc devices
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
-
-#
-# Graphics support
-#
-CONFIG_FB=y
-CONFIG_FB_CFB_FILLRECT=y
-CONFIG_FB_CFB_COPYAREA=y
-CONFIG_FB_CFB_IMAGEBLIT=y
-CONFIG_FB_SOFT_CURSOR=y
-# CONFIG_FB_MODE_HELPERS is not set
-# CONFIG_FB_TILEBLITTING is not set
-CONFIG_FB_ACORN=y
-# CONFIG_FB_VIRTUAL is not set
-
-#
-# Console display driver support
-#
-# CONFIG_VGA_CONSOLE is not set
-# CONFIG_MDA_CONSOLE is not set
-CONFIG_DUMMY_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FONTS=y
-CONFIG_FONT_8x8=y
-CONFIG_FONT_8x16=y
-# CONFIG_FONT_6x11 is not set
-# CONFIG_FONT_PEARL_8x8 is not set
-# CONFIG_FONT_ACORN_8x8 is not set
-# CONFIG_FONT_MINI_4x6 is not set
-# CONFIG_FONT_SUN8x16 is not set
-# CONFIG_FONT_SUN12x22 is not set
-
-#
-# Logo configuration
-#
-CONFIG_LOGO=y
-CONFIG_LOGO_LINUX_MONO=y
-CONFIG_LOGO_LINUX_VGA16=y
-CONFIG_LOGO_LINUX_CLUT224=y
-# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-
-#
-# USB support
-#
-CONFIG_USB_ARCH_HAS_HCD=y
-# CONFIG_USB_ARCH_HAS_OHCI is not set
-# CONFIG_USB is not set
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# MMC/SD Card support
-#
-# CONFIG_MMC is not set
-
-#
-# File systems
-#
-CONFIG_EXT2_FS=y
-# CONFIG_EXT2_FS_XATTR is not set
-# CONFIG_EXT3_FS is not set
-# CONFIG_JBD is not set
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-
-#
-# XFS support
-#
-# CONFIG_XFS_FS is not set
-CONFIG_MINIX_FS=y
-# CONFIG_ROMFS_FS is not set
-# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_SYSFS=y
-# CONFIG_DEVFS_FS is not set
-# CONFIG_DEVPTS_FS_XATTR is not set
-# CONFIG_TMPFS is not set
-# CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_JFFS_FS is not set
-# CONFIG_JFFS2_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
-CONFIG_NFS_FS=y
-# CONFIG_NFS_V3 is not set
-# CONFIG_NFS_V4 is not set
-# CONFIG_NFS_DIRECTIO is not set
-# CONFIG_NFSD is not set
-CONFIG_ROOT_NFS=y
-CONFIG_LOCKD=y
-CONFIG_SUNRPC=y
-# CONFIG_RPCSEC_GSS_KRB5 is not set
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-
-#
-# Partition Types
-#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-# CONFIG_MAC_PARTITION is not set
-# CONFIG_MSDOS_PARTITION is not set
-# CONFIG_LDM_PARTITION is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_EFI_PARTITION is not set
-
-#
-# Native Language Support
-#
-# CONFIG_NLS is not set
-
-#
-# Profiling support
-#
-# CONFIG_PROFILING is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-# CONFIG_DEBUG_KERNEL is not set
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_DEBUG_BUGVERBOSE is not set
-CONFIG_FRAME_POINTER=y
-# CONFIG_DEBUG_USER is not set
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-# CONFIG_CRYPTO is not set
-
-#
-# Hardware crypto devices
-#
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-CONFIG_CRC32=y
-# CONFIG_LIBCRC32C is not set
index 50a30bc91872652c5d8f68ad2d013ce9afed7503..8ac9b8424007457c72231f5f07942dff7fc3c76b 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/fs.h>
 #include <linux/sysctl.h>
 #include <linux/init.h>
+#include <linux/io.h>
 
 static unsigned int isa_membase, isa_portbase, isa_portshift;
 
index 0a38c69fdbc450f3d09ed7545ef60af14e45fb50..73376170fb914a692d04882a375d75b3a131de84 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 
+#include <asm/irq.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 
index 28594fcc88e3a4dfc482c234d8c00924238a3a4a..2e9ecad97f3dc76367a842eba110a046f7e20766 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <linux/module.h>
 
+#include <asm/irq.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <mach/at91rm9200.h>
index accb69ec478e53aca7bffb3e0aedefae89a167b5..0894f1077be7db5368568fefd64b9831280d447c 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 
+#include <asm/irq.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <mach/cpu.h>
index 7b51a59ae8b30f3b35dfcc2f7c22d37a15e6ed40..3acd7d7e6a423116411805f7b546099ba674d447 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 
+#include <asm/irq.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <mach/at91sam9261.h>
index ada4b6769107569318f9c6e21009a90728f7e79e..942792d630d8c457ed6bf0051babba6dea36a83c 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 
+#include <asm/irq.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <mach/at91sam9263.h>
index 252e954b49fda7d1325a9eaffb697c9bf2c181bf..211c5c14a1e605d6d4835203c798b9bc47419c9f 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 
+#include <asm/irq.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <mach/cpu.h>
index 9b937ee4815a804cd8b92dc5e32aca82e9c04d29..35e12a49d1a684372f736fe374abed15763cd6bd 100644 (file)
@@ -29,6 +29,7 @@
 #include <mach/hardware.h>
 #include <mach/board.h>
 #include <mach/gpio.h>
+#include <mach/at91sam9_smc.h>
 #include <mach/at91_shdwc.h>
 
 #include "sam9_smc.h"
index c58e32ec4c5d822203c7e438d47ee499a7b75a8f..0bea1454ae03fff25ee689b61e3999a826559a36 100644 (file)
@@ -24,7 +24,6 @@
 
 #include <mach/hardware.h>
 #include <asm/page.h>
-#include <asm/pgtable.h>
 #include <asm/sizes.h>
  
 #include <asm/mach/map.h>
index 7122b3d2104347afb0abf0c963a476daa8da1e3d..7430e4049d87b1ca789b372f2641c4088547c31c 100644 (file)
@@ -24,7 +24,6 @@
 #include <linux/initrd.h>
 
 #include <mach/hardware.h>
-#include <asm/irq.h>
 #include <asm/setup.h>
 #include <asm/mach-types.h>
 
index 3d4b1de8f8981291176cff004db625eae5daf41b..808633f9f03c3da165f4d42b1e23bd531118fdfa 100644 (file)
@@ -20,6 +20,7 @@
 
 #include <mach/hardware.h>
 #include <mach/i2c.h>
+#include <mach/irqs.h>
 
 static struct resource i2c_resources[] = {
        {
index b3a2961f0f46f7780c21a2f78eceaabc3133dffd..b456f079f43ff4d2682e765cfd33ab681689c990 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/io.h>
 #include <asm-generic/gpio.h>
 #include <mach/hardware.h>
+#include <mach/irqs.h>
 
 /*
  * basic gpio routines
index 36ff06d4df15e30ba0272ee5645a811aa0f6b9cf..b97f529e58e8455016f675d965f2cc568fda1b2e 100644 (file)
 
 #include "common.h"
 
-extern void __init isa_init_irq(unsigned int irq);
-
 unsigned int mem_fclk_21285 = 50000000;
 
 EXPORT_SYMBOL(mem_fclk_21285);
 
+static void __init early_fclk(char **arg)
+{
+       mem_fclk_21285 = simple_strtoul(*arg, arg, 0);
+}
+
+__early_param("mem_fclk_21285=", early_fclk);
+
 static int __init parse_tag_memclk(const struct tag *tag)
 {
        mem_fclk_21285 = tag->u.memclk.fmemclk;
index 580e31bbc711847599b6a5b8d385549c0ead619e..b05e662d21ad8162ec21542849116efb05fda0a2 100644 (file)
@@ -7,3 +7,4 @@ extern void isa_rtc_init(void);
 extern void footbridge_map_io(void);
 extern void footbridge_init_irq(void);
 
+extern void isa_init_irq(unsigned int irq);
index 133086019e3ea776b98c236a9bebc3a86146a9c0..3ffa54841ec55c9d1c2c900a1f469a796114c037 100644 (file)
@@ -287,6 +287,9 @@ struct pci_bus * __init dc21285_scan_bus(int nr, struct pci_sys_data *sys)
        return pci_scan_bus(0, &dc21285_ops, sys);
 }
 
+#define dc21285_request_irq(_a, _b, _c, _d, _e) \
+       WARN_ON(request_irq(_a, _b, _c, _d, _e) < 0)
+
 void __init dc21285_preinit(void)
 {
        unsigned int mem_size, mem_mask;
@@ -335,16 +338,16 @@ void __init dc21285_preinit(void)
        /*
         * We don't care if these fail.
         */
-       request_irq(IRQ_PCI_SERR, dc21285_serr_irq, IRQF_DISABLED,
-                   "PCI system error", &serr_timer);
-       request_irq(IRQ_PCI_PERR, dc21285_parity_irq, IRQF_DISABLED,
-                   "PCI parity error", &perr_timer);
-       request_irq(IRQ_PCI_ABORT, dc21285_abort_irq, IRQF_DISABLED,
-                   "PCI abort", NULL);
-       request_irq(IRQ_DISCARD_TIMER, dc21285_discard_irq, IRQF_DISABLED,
-                   "Discard timer", NULL);
-       request_irq(IRQ_PCI_DPERR, dc21285_dparity_irq, IRQF_DISABLED,
-                   "PCI data parity", NULL);
+       dc21285_request_irq(IRQ_PCI_SERR, dc21285_serr_irq, IRQF_DISABLED,
+                           "PCI system error", &serr_timer);
+       dc21285_request_irq(IRQ_PCI_PERR, dc21285_parity_irq, IRQF_DISABLED,
+                           "PCI parity error", &perr_timer);
+       dc21285_request_irq(IRQ_PCI_ABORT, dc21285_abort_irq, IRQF_DISABLED,
+                           "PCI abort", NULL);
+       dc21285_request_irq(IRQ_DISCARD_TIMER, dc21285_discard_irq, IRQF_DISABLED,
+                           "Discard timer", NULL);
+       dc21285_request_irq(IRQ_PCI_DPERR, dc21285_dparity_irq, IRQF_DISABLED,
+                           "PCI data parity", NULL);
 
        if (cfn_mode) {
                static struct resource csrio;
index 9ee80a211d3cb670e71c8fcb58127193485dc392..8bfd06aeb64d3e5910bc21996e36df1241886fd0 100644 (file)
@@ -28,6 +28,8 @@
 #include <asm/irq.h>
 #include <asm/mach-types.h>
 
+#include "common.h"
+
 static void isa_mask_pic_lo_irq(unsigned int irq)
 {
        unsigned int mask = 1 << (irq & 7);
index 56161d55cf47e4ea4c6b3cd7743cf008f7eab566..8c0ba99d683fea6747cd9b39cb7ca6f4a68d796a 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/mach/arch.h>
+#include <mach/irqs.h>
 #include <mach/hardware.h>
 #include "common.h"
 
index 7b8ef97fb5016a99127a34b3ce0cdcd5ae3a7574..b3404b7775b318d9beef5e617d9b3ac5e1cd405f 100644 (file)
@@ -698,6 +698,7 @@ void __init kirkwood_init(void)
        printk(KERN_INFO "Kirkwood: %s, TCLK=%d.\n",
                kirkwood_id(), kirkwood_tclk);
        kirkwood_ge00_shared_data.t_clk = kirkwood_tclk;
+       kirkwood_ge01_shared_data.t_clk = kirkwood_tclk;
        kirkwood_spi_plat_data.tclk = kirkwood_tclk;
        kirkwood_uart0_data[0].uartclk = kirkwood_tclk;
        kirkwood_uart1_data[0].uartclk = kirkwood_tclk;
index f6b08f207c894189d49bb52fec1de1b2bf65c457..73fccacd1a73d25a861b8da1fc18ff812b1e091e 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/mbus.h>
+#include <asm/irq.h>
 #include <asm/mach/pci.h>
 #include <plat/pcie.h>
 #include "common.h"
index 36ab0fd3d9b687ace1c166a3d4231262d6dedb12..b89fb6d46cccc9efc5ed9f112875ef039da17595 100644 (file)
@@ -22,6 +22,7 @@
 
 #include <linux/platform_device.h>
 
+#include <mach/irqs.h>
 #include <mach/regs-wan.h>
 #include <mach/regs-lan.h>
 #include <mach/regs-hpna.h>
index f2a74b92a97fa59085a7fa7266711b99728a77b8..31b6b30e98bf1099d6f65b5e5453faa9be82fc50 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 
+#include <mach/irqs.h>
 #include <mach/msm_iomap.h>
 #include "devices.h"
 
index 430ea84d587dfd2c2d87fbc079c10a6a43d98136..aad3a7a2f8307342ae8ce2d7cc2dd489efc12146 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/mbus.h>
+#include <asm/irq.h>
 #include <asm/mach/pci.h>
 #include <plat/pcie.h>
 #include "common.h"
index af121f5ab710152dd359fb60ae390fdb1c41c93f..2f9240be1c769203fe15804fb9e3b3f441e04ef1 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
 
+#include <mach/irqs.h>
 #include <mach/hardware.h>
 
 /*
index 1d46cb4adf96a5b30f2d4710a73dd57a626e16f0..f8428800f28602c0e870590f32343c5128919f9f 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/serial.h>
 #include <linux/gpio.h>
 #include <mach/hardware.h>
+#include <mach/irqs.h>
 #include <mach/imx-uart.h>
 
 static struct resource uart0[] = {
index ea8fa8898fe8570455d499705f6cba8fd5c68c74..1d844e228ea92e6a674e92a6b0fcb3e2cb3eef42 100644 (file)
@@ -24,6 +24,8 @@
 #include <linux/amba/clcd.h>
 #include <linux/err.h>
 
+#include <asm/irq.h>
+
 #include <mach/netx-regs.h>
 #include <mach/hardware.h>
 
index d51d627ce7cfb081cef650b6ac54bd98f859532d..f201fddb594fb2a823097deb0061f2f96511e2cb 100644 (file)
@@ -163,7 +163,7 @@ static void __init netx_timer_init(void)
         * Adding some safety ... */
        netx_clockevent.min_delta_ns =
                clockevent_delta2ns(0xa00, &netx_clockevent);
-       netx_clockevent.cpumask = cpumask_of_cpu(0);
+       netx_clockevent.cpumask = cpumask_of(0);
        clockevents_register_device(&netx_clockevent);
 }
 
index 8fc6205dc3a5417fd3f0e12ea339db1d8932929a..181a78ba81654bdae53bda56c7ede38faad6063b 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/io.h>
 
 #include <mach/hardware.h>
+#include <mach/irqs.h>
 #include <mach/netx-regs.h>
 
 #include <mach/xc.h>
index 7de7c69155840f2e84fddc23574bb9d74546afe5..4474da7bc88a0f613f2e4a544491b44bf6e889dd 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/platform_device.h>
 
 #include <mach/dma.h>
+#include <mach/irqs.h>
 #include <mach/mux.h>
 #include <mach/cpu.h>
 #include <mach/mcbsp.h>
index cae3ebe249b3cdbacb18ece2b34e06aba82d17d5..acdc709901cd1ca55f231ac79be495e1e0e67dc7 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/platform_device.h>
 
 #include <mach/dma.h>
+#include <mach/irqs.h>
 #include <mach/mux.h>
 #include <mach/cpu.h>
 #include <mach/mcbsp.h>
index a7b7d77b1b09198d0385cc6df9920f967ab1d785..d0a785a3b8801ac04625f1469eccf44d1e4a63a4 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/mbus.h>
+#include <asm/irq.h>
 #include <asm/mach/pci.h>
 #include <plat/pcie.h>
 #include "common.h"
index 015cc21d5f55ff382e3f28cf58afda8e04851529..f219914f5b291efdb9fdbe9391a26ac6c7307932 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/io.h>
+#include <mach/hardware.h>
 #include <mach/platform.h>
 #include <mach/gpio.h>
 
index 87c093286ff930eb079a0f1b9b5a1f88867ac1dc..f3fea29c00d3b3072008bf52f2c224370f544357 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/err.h>
 #include <mach/platform.h>
+#include <mach/irqs.h>
 #include <mach/i2c.h>
 
 static int set_clock_run(struct platform_device *pdev)
index 251129391d7dbcde5b723346cc402a2e4b07b4df..edcd9d5ce5455b600e5ad5a693f14340f0d4d7f7 100644 (file)
@@ -20,6 +20,7 @@
 #include <asm/mach/arch.h>
 #include <asm/mach-types.h>
 
+#include <mach/irqs.h>
 #include <mach/mfp-pxa25x.h>
 #include <mach/pxa-regs.h>
 #include <mach/hardware.h>
index bed0336aca3d1c1366d4c3a5826bbefed4875c59..77bb8e2c48c0ad7291c0df847284504fa69268eb 100644 (file)
@@ -28,6 +28,7 @@
 #include <mach/eseries-gpio.h>
 #include <mach/pxafb.h>
 #include <mach/udc.h>
+#include <mach/irqs.h>
 
 #include "generic.h"
 #include "eseries.h"
index b00d670b2ea6cb65039acf903b0da0e1a99fab72..6d48e00f4f0b444913f49cdcd58c01900cddf10d 100644 (file)
@@ -30,6 +30,7 @@
 #include <mach/eseries-gpio.h>
 #include <mach/udc.h>
 #include <mach/irda.h>
+#include <mach/irqs.h>
 
 #include "generic.h"
 #include "eseries.h"
index 84d7c1aac58d713658a2f2a8de2278e02ad5f86c..be1ab8edb973eab021c7730c66246bdcac856b29 100644 (file)
@@ -29,6 +29,7 @@
 #include <mach/eseries-gpio.h>
 #include <mach/udc.h>
 #include <mach/irda.h>
+#include <mach/irqs.h>
 
 #include "generic.h"
 #include "eseries.h"
@@ -105,6 +106,57 @@ static struct platform_device e750_fb_device = {
        .resource       = e750_fb_resources,
 };
 
+/* -------------------- e750 MFP parameters -------------------- */
+
+static unsigned long e750_pin_config[] __initdata = {
+       /* Chip selects */
+       GPIO15_nCS_1,   /* CS1 - Flash */
+       GPIO79_nCS_3,   /* CS3 - IMAGEON */
+       GPIO80_nCS_4,   /* CS4 - TMIO */
+
+       /* Clocks */
+       GPIO11_3_6MHz,
+
+       /* BTUART */
+       GPIO42_BTUART_RXD,
+       GPIO43_BTUART_TXD,
+       GPIO44_BTUART_CTS,
+
+       /* TMIO controller */
+       GPIO19_GPIO, /* t7l66xb #PCLR */
+       GPIO45_GPIO, /* t7l66xb #SUSPEND (NOT BTUART!) */
+
+       /* UDC */
+       GPIO13_GPIO,
+       GPIO3_GPIO,
+
+       /* IrDA */
+       GPIO38_GPIO | MFP_LPM_DRIVE_HIGH,
+
+       /* PC Card */
+       GPIO8_GPIO,   /* CD0 */
+       GPIO44_GPIO,  /* CD1 */
+       GPIO11_GPIO,  /* IRQ0 */
+       GPIO6_GPIO,   /* IRQ1 */
+       GPIO27_GPIO,  /* RST0 */
+       GPIO24_GPIO,  /* RST1 */
+       GPIO20_GPIO,  /* PWR0 */
+       GPIO23_GPIO,  /* PWR1 */
+       GPIO48_nPOE,
+       GPIO49_nPWE,
+       GPIO50_nPIOR,
+       GPIO51_nPIOW,
+       GPIO52_nPCE_1,
+       GPIO53_nPCE_2,
+       GPIO54_nPSKTSEL,
+       GPIO55_nPREG,
+       GPIO56_nPWAIT,
+       GPIO57_nIOIS16,
+
+       /* wakeup */
+       GPIO0_GPIO | WAKEUP_ON_EDGE_RISE,
+};
+
 /* ----------------- e750 tc6393xb parameters ------------------ */
 
 static struct tc6393xb_platform_data e750_tc6393xb_info = {
@@ -137,6 +189,7 @@ static struct platform_device *devices[] __initdata = {
 
 static void __init e750_init(void)
 {
+       pxa2xx_mfp_config(ARRAY_AND_SIZE(e750_pin_config));
        clk_add_alias("CLK_CK3P6MI", &e750_tc6393xb_device.dev,
                        "GPIO11_CLK", NULL),
        eseries_get_tmio_gpios();
index 9a86a426f92408093485ddb9917b7a3857a1b81d..cc9b1293e8667b647349736b3f4584d9aba14687 100644 (file)
@@ -28,6 +28,7 @@
 #include <mach/hardware.h>
 #include <mach/eseries-gpio.h>
 #include <mach/udc.h>
+#include <mach/irqs.h>
 
 #include "generic.h"
 #include "eseries.h"
index b1fcd10ab6c6bfeeba709a4efae8f1d878b194cc..bcf3fb2c4b3a1f686fa9ec1e6a7d107d38518c5a 100644 (file)
 #define CKEN_MINI_IM   48      /* < Mini-IM */
 #define CKEN_MINI_LCD  49      /* < Mini LCD */
 
-#if defined(CONFIG_CPU_PXA310)
 #define CKEN_MMC3      5       /* < MMC3 Clock Enable */
 #define CKEN_MVED      43      /* < MVED clock enable */
-#endif
 
 /* Note: GCU clock enable bit differs on PXA300/PXA310 and PXA320 */
 #define PXA300_CKEN_GRAPHICS   42      /* Graphics controller clock enable */
index 8fce85f330332d6ffbb27d2a4186a8117c5bccaa..ea3c75595fa9a575d021a82c7d6c4e92e8510b6e 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/errno.h>
 #include <linux/delay.h>
 #include <linux/device.h>
+#include <linux/jiffies.h>
 #include <linux/smp.h>
 #include <linux/io.h>
 
index e0349af8a483f29be67a42ff302f155670792b94..00476a573bbe8c43aabce34e3f4ae3860da40ec1 100644 (file)
@@ -14,6 +14,7 @@
 #define gpio_get_value __gpio_get_value
 #define gpio_set_value __gpio_set_value
 #define gpio_cansleep  __gpio_cansleep
+#define gpio_to_irq    __gpio_to_irq
 
 /* some boards require extra gpio capacity to support external
  * devices that need GPIO.
index 9565903d490b971f864a8f9ae271fdc9c4f07215..49efce8cd4a723e6b6bdc03f6bae2dc8afa6bba1 100644 (file)
 #ifndef __ASM_ARCH_IRQS_H
 #define __ASM_ARCH_IRQS_H __FILE__
 
-#ifndef __ASM_ARM_IRQ_H
-#error "Do not include this directly, instead #include <asm/irq.h>"
-#endif
-
 /* we keep the first set of CPU IRQs out of the range of
  * the ISA space, so that the PC104 has them to itself
  * and we don't end up having to do horrible things to the
index 0a6d0a5d961b10a6ce76aae9dcd0945262910626..315c42e312784993a19c3be53604a0b646071b6e 100644 (file)
@@ -47,7 +47,7 @@
 #include <plat/clock.h>
 #include <plat/devs.h>
 #include <plat/cpu.h>
-#include <asm/plat-s3c24xx/mci.h>
+#include <plat/mci.h>
 
 static struct map_desc at2440evb_iodesc[] __initdata = {
        /* Nothing here */
index b38c47cffc28cd3481eddf6c05883168cfe615ff..4c97f9a4370b50c189e4d42284f9881adb1b99a5 100644 (file)
 #ifndef __ASM_ARCH_IRQS_H
 #define __ASM_ARCH_IRQS_H __FILE__
 
-#ifndef __ASM_ARM_IRQ_H
-#error "Do not include this directly, instead #include <asm/irq.h>"
-#endif
-
 #include <plat/irqs.h>
 
 #endif /* __ASM_ARCH_IRQ_H */
index 89a6ab0b7db81f21ad32ce1dee89c490bc582838..467531edefd3e0621cce0f5d7fca92d7b6fbe1dc 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
+#include <mach/irqs.h>
 #include <mach/mux.h>
 
 #define OMAP_I2C_SIZE          0x3f
index 0454b8ec02e2aa9aac62186780024cc49ef1ea60..a90198fc4b0f3d31312c3314e4ffaed4186bf3e4 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/platform_device.h>
 #include <linux/fb.h>
 
+#include <mach/irqs.h>
 #include <mach/map.h>
 #include <mach/regs-fb.h>
 
index 2c0128c77c6e98385a11fc9c157be8caca7f5b44..fe327074037ea05d4d996cbe52be7ab9a2deae78 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/string.h>
 #include <linux/platform_device.h>
 
+#include <mach/irqs.h>
 #include <mach/map.h>
 
 #include <plat/regs-iic.h>
index 9658fb0aec951dd70924fa0cbbf73754877e7d34..2387fbf57af6da984b49dce28fcf08a55a6afee5 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/string.h>
 #include <linux/platform_device.h>
 
+#include <mach/irqs.h>
 #include <mach/map.h>
 
 #include <plat/regs-iic.h>
index f95c6c9d9f1a95ad93d64bc17ca8f7f9b894b59f..94a341aaa4e4ce01c4a3a2e40109201824ac8671 100644 (file)
@@ -59,6 +59,22 @@ static int s3c24xx_gpiolib_banka_output(struct gpio_chip *chip,
        return 0;
 }
 
+static int s3c24xx_gpiolib_bankf_toirq(struct gpio_chip *chip, unsigned offset)
+{
+       if (offset < 4)
+               return IRQ_EINT0 + offset;
+       
+       if (offset < 8)
+               return IRQ_EINT4 + offset - 4;
+       
+       return -EINVAL;
+}
+
+static int s3c24xx_gpiolib_bankg_toirq(struct gpio_chip *chip, unsigned offset)
+{
+       return IRQ_EINT8 + offset;
+}
+
 struct s3c_gpio_chip s3c24xx_gpios[] = {
        [0] = {
                .base   = S3C24XX_GPIO_BASE(S3C2410_GPA0),
@@ -114,6 +130,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
                        .owner                  = THIS_MODULE,
                        .label                  = "GPIOF",
                        .ngpio                  = 8,
+                       .to_irq                 = s3c24xx_gpiolib_bankf_toirq,
                },
        },
        [6] = {
@@ -123,6 +140,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
                        .owner                  = THIS_MODULE,
                        .label                  = "GPIOG",
                        .ngpio                  = 10,
+                       .to_irq                 = s3c24xx_gpiolib_bankg_toirq,
                },
        },
 };
index ec56b88866c43d597b885290c7e3507c3cff9982..0120b760315ba3a881fdf1829739b015829479a4 100644 (file)
@@ -19,6 +19,8 @@
 #include <linux/io.h>
 #include <linux/pwm.h>
 
+#include <mach/irqs.h>
+
 #include <plat/devs.h>
 #include <plat/regs-timer.h>
 
index 02e8dd4c97d51bf2a33c1cd4d548b2c54ea5eb3c..2846f550b727792b043268cce7728a5ccd71b0c3 100644 (file)
 #define IRQ_EINT_GROUP8_BASE   (IRQ_EINT_GROUP7_BASE + IRQ_EINT_GROUP7_NR)
 #define IRQ_EINT_GROUP9_BASE   (IRQ_EINT_GROUP8_BASE + IRQ_EINT_GROUP8_NR)
 
-#define IRQ_EINT_GROUP(group, no)      (IRQ_EINT_GROUP##group##__BASE + (x))
+#define IRQ_EINT_GROUP(group, no)      (IRQ_EINT_GROUP##group##_BASE + (no))
 
 /* Set the default NR_IRQS */
 
index 79f25cef32dfe72b0d230ce6642ce95c747f080d..84b861316ce78b65f90febaf44227d3ffed37c83 100644 (file)
@@ -108,6 +108,8 @@ config ARCH_NO_VIRT_TO_BUS
 config PPC
        bool
        default y
+       select HAVE_FTRACE_MCOUNT_RECORD
+       select HAVE_DYNAMIC_FTRACE
        select HAVE_FUNCTION_TRACER
        select ARCH_WANT_OPTIONAL_GPIOLIB
        select HAVE_IDE
@@ -326,7 +328,8 @@ config KEXEC
 
 config CRASH_DUMP
        bool "Build a kdump crash kernel"
-       depends on (PPC64 && RELOCATABLE) || 6xx
+       depends on PPC64 || 6xx
+       select RELOCATABLE if PPC64
        help
          Build a kernel suitable for use as a kdump capture kernel.
          The same kernel binary can be used as production kernel and dump
index ab6dda37243870f782572c1a5d2502413d0ea528..e84df338ea298762cb3f75c8393aef3dc14a3167 100644 (file)
@@ -356,7 +356,7 @@ $(obj)/zImage.initrd:       $(addprefix $(obj)/, $(initrd-y))
        @rm -f $@; ln $< $@
 
 install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y))
-       sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $<
+       sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $^
 
 # anything not in $(targets)
 clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \
index 14534d04e4db1cf1e9709184754e9278011c0531..6e34f170fa62ba3b38a60eab064138b7b75e34f6 100644 (file)
                };
 
                bcsr@1,0 {
+                       #address-cells = <1>;
+                       #size-cells = <1>;
                        compatible = "fsl,mpc8360mds-bcsr";
                        reg = <1 0 0x8000>;
+                       ranges = <0 1 0 0x8000>;
+
+                       bcsr13: gpio-controller@d {
+                               #gpio-cells = <2>;
+                               compatible = "fsl,mpc8360mds-bcsr-gpio";
+                               reg = <0xd 1>;
+                               gpio-controller;
+                       };
                };
        };
 
                };
 
                par_io@1400 {
+                       #address-cells = <1>;
+                       #size-cells = <1>;
                        reg = <0x1400 0x100>;
+                       ranges = <0 0x1400 0x100>;
                        device_type = "par_io";
                        num-ports = <7>;
 
+                       qe_pio_b: gpio-controller@18 {
+                               #gpio-cells = <2>;
+                               compatible = "fsl,mpc8360-qe-pario-bank",
+                                            "fsl,mpc8323-qe-pario-bank";
+                               reg = <0x18 0x18>;
+                               gpio-controller;
+                       };
+
                        pio1: ucc_pin@01 {
                                pio-map = <
                        /* port  pin  dir  open_drain  assignment  has_irq */
                        };
                };
 
+               timer@440 {
+                       compatible = "fsl,mpc8360-qe-gtm",
+                                    "fsl,qe-gtm", "fsl,gtm";
+                       reg = <0x440 0x40>;
+                       clock-frequency = <132000000>;
+                       interrupts = <12 13 14 15>;
+                       interrupt-parent = <&qeic>;
+               };
+
                spi@4c0 {
                        cell-index = <0>;
                        compatible = "fsl,spi";
                };
 
                usb@6c0 {
-                       compatible = "qe_udc";
+                       compatible = "fsl,mpc8360-qe-usb",
+                                    "fsl,mpc8323-qe-usb";
                        reg = <0x6c0 0x40 0x8b00 0x100>;
                        interrupts = <11>;
                        interrupt-parent = <&qeic>;
-                       mode = "slave";
+                       fsl,fullspeed-clock = "clk21";
+                       fsl,lowspeed-clock = "brg9";
+                       gpios = <&qe_pio_b  2 0   /* USBOE */
+                                &qe_pio_b  3 0   /* USBTP */
+                                &qe_pio_b  8 0   /* USBTN */
+                                &qe_pio_b  9 0   /* USBRP */
+                                &qe_pio_b 11 0   /* USBRN */
+                                &bcsr13    5 0   /* SPEED */
+                                &bcsr13    4 1>; /* POWER */
                };
 
                enet0: ucc@2000 {
index decadf3d9e989e935faa898c3d56d5d0676531e1..37b789510d68bc160b191a53717a916e293ea361 100644 (file)
                                reg = <0x440 0x40>;
                                interrupts = <12 13 14 15>;
                                interrupt-parent = <&qeic>;
-                               /* filled by u-boot */
-                               clock-frequency = <0>;
+                               clock-frequency = <166666666>;
+                       };
+
+                       usb@6c0 {
+                               compatible = "fsl,mpc8360-qe-usb",
+                                            "fsl,mpc8323-qe-usb";
+                               reg = <0x6c0 0x40 0x8b00 0x100>;
+                               interrupts = <11>;
+                               interrupt-parent = <&qeic>;
+                               fsl,fullspeed-clock = "clk21";
+                               gpios = <&qe_pio_b  2 0 /* USBOE */
+                                        &qe_pio_b  3 0 /* USBTP */
+                                        &qe_pio_b  8 0 /* USBTN */
+                                        &qe_pio_b  9 0 /* USBRP */
+                                        &qe_pio_b 11 0 /* USBRN */
+                                        &qe_pio_e 20 0 /* SPEED */
+                                        &qe_pio_e 21 1 /* POWER */>;
                        };
 
                        spi@4c0 {
index 35d5e248ccd7939768ce2bd14e8a54cc5101353f..4481532cbe7751c95dcb6b49a816a8640ffb09d5 100644 (file)
                serial1 = &serial1;
                pci0 = &pci0;
                pci1 = &pci1;
-               rapidio0 = &rapidio0;
+/*
+ * Only one of Rapid IO or PCI can be present due to HW limitations and
+ * due to the fact that the 2 now share address space in the new memory
+ * map.  The most likely case is that we have PCI, so comment out the
+ * rapidio node.  Leave it here for reference.
+ */
+               /* rapidio0 = &rapidio0; */
        };
 
        cpus {
                reg = <0x00000000 0x40000000>;  // 1G at 0x0
        };
 
-       localbus@f8005000 {
+       localbus@ffe05000 {
                #address-cells = <2>;
                #size-cells = <1>;
                compatible = "fsl,mpc8641-localbus", "simple-bus";
-               reg = <0xf8005000 0x1000>;
+               reg = <0xffe05000 0x1000>;
                interrupts = <19 2>;
                interrupt-parent = <&mpic>;
 
-               ranges = <0 0 0xff800000 0x00800000
-                         1 0 0xfe000000 0x01000000
-                         2 0 0xf8200000 0x00100000
-                         3 0 0xf8100000 0x00100000>;
+               ranges = <0 0 0xef800000 0x00800000
+                         2 0 0xffdf8000 0x00008000
+                         3 0 0xffdf0000 0x00008000>;
 
                flash@0,0 {
                        compatible = "cfi-flash";
                };
        };
 
-       soc8641@f8000000 {
+       soc8641@ffe00000 {
                #address-cells = <1>;
                #size-cells = <1>;
                device_type = "soc";
                compatible = "simple-bus";
-               ranges = <0x00000000 0xf8000000 0x00100000>;
-               reg = <0xf8000000 0x00001000>;  // CCSRBAR
+               ranges = <0x00000000 0xffe00000 0x00100000>;
+               reg = <0xffe00000 0x00001000>;  // CCSRBAR
                bus-frequency = <0>;
 
                i2c@3000 {
                };
        };
 
-       pci0: pcie@f8008000 {
+       pci0: pcie@ffe08000 {
                cell-index = <0>;
                compatible = "fsl,mpc8641-pcie";
                device_type = "pci";
                #interrupt-cells = <1>;
                #size-cells = <2>;
                #address-cells = <3>;
-               reg = <0xf8008000 0x1000>;
+               reg = <0xffe08000 0x1000>;
                bus-range = <0x0 0xff>;
                ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x20000000
-                         0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00100000>;
+                         0x01000000 0x0 0x00000000 0xffc00000 0x0 0x00010000>;
                clock-frequency = <33333333>;
                interrupt-parent = <&mpic>;
                interrupts = <24 2>;
 
                                  0x01000000 0x0 0x00000000
                                  0x01000000 0x0 0x00000000
-                                 0x0 0x00100000>;
+                                 0x0 0x00010000>;
                        uli1575@0 {
                                reg = <0 0 0 0 0>;
                                #size-cells = <2>;
                                          0x0 0x20000000
                                          0x01000000 0x0 0x00000000
                                          0x01000000 0x0 0x00000000
-                                         0x0 0x00100000>;
+                                         0x0 0x00010000>;
                                isa@1e {
                                        device_type = "isa";
                                        #interrupt-cells = <2>;
 
        };
 
-       pci1: pcie@f8009000 {
+       pci1: pcie@ffe09000 {
                cell-index = <1>;
                compatible = "fsl,mpc8641-pcie";
                device_type = "pci";
                #interrupt-cells = <1>;
                #size-cells = <2>;
                #address-cells = <3>;
-               reg = <0xf8009000 0x1000>;
+               reg = <0xffe09000 0x1000>;
                bus-range = <0 0xff>;
                ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000
-                         0x01000000 0x0 0x00000000 0xe3000000 0x0 0x00100000>;
+                         0x01000000 0x0 0x00000000 0xffc10000 0x0 0x00010000>;
                clock-frequency = <33333333>;
                interrupt-parent = <&mpic>;
                interrupts = <25 2>;
 
                                  0x01000000 0x0 0x00000000
                                  0x01000000 0x0 0x00000000
-                                 0x0 0x00100000>;
+                                 0x0 0x00010000>;
                };
        };
-       rapidio0: rapidio@f80c0000 {
+/*
+       rapidio0: rapidio@ffec0000 {
                #address-cells = <2>;
                #size-cells = <2>;
                compatible = "fsl,rapidio-delta";
-               reg = <0xf80c0000 0x20000>;
-               ranges = <0 0 0xc0000000 0 0x20000000>;
+               reg = <0xffec0000 0x20000>;
+               ranges = <0 0 0x80000000 0 0x20000000>;
                interrupt-parent = <&mpic>;
-               /* err_irq bell_outb_irq bell_inb_irq
-                       msg1_tx_irq msg1_rx_irq msg2_tx_irq msg2_rx_irq */
+               // err_irq bell_outb_irq bell_inb_irq
+               //      msg1_tx_irq msg1_rx_irq msg2_tx_irq msg2_rx_irq
                interrupts = <48 2 49 2 50 2 53 2 54 2 55 2 56 2>;
        };
+*/
+
 };
index b002bfd56786bd4d174e38f62a9d8cd59bd5f2c0..51b2387bdba0e82dfd8b75de48b56a18cd4fd2ef 100644 (file)
@@ -15,7 +15,7 @@
 #   $2 - kernel image file
 #   $3 - kernel map file
 #   $4 - default install path (blank if root directory)
-#   $5 - kernel boot file, the zImage
+#   $5 and more - kernel boot files; zImage*, uImage, cuImage.*, etc.
 #
 
 # User may have a custom install script
@@ -38,3 +38,15 @@ fi
 
 cat $2 > $4/$image_name
 cp $3 $4/System.map
+
+# Copy all the bootable image files
+path=$4
+shift 4
+while [ $# -ne 0 ]; do
+       image_name=`basename $1`
+       if [ -f $path/$image_name ]; then
+               mv $path/$image_name $path/$image_name.old
+       fi
+       cat $1 > $path/$image_name
+       shift
+done;
index 635588319e0d47f65e1e9654163dbdd8f97dd3c8..32aeb79216f720beba44a5d196ce3db2a895c3c6 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28-rc3
-# Sat Nov  8 12:40:13 2008
+# Linux kernel version: 2.6.28-rc8
+# Tue Dec 30 11:17:46 2008
 #
 # CONFIG_PPC64 is not set
 
@@ -21,7 +21,10 @@ CONFIG_FSL_BOOKE=y
 CONFIG_FSL_EMB_PERFMON=y
 # CONFIG_PHYS_64BIT is not set
 CONFIG_SPE=y
+CONFIG_PPC_MMU_NOHASH=y
 # CONFIG_PPC_MM_SLICES is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
 CONFIG_PPC32=y
 CONFIG_WORD_SIZE=32
 # CONFIG_ARCH_PHYS_ADDR_T_64BIT is not set
@@ -50,7 +53,7 @@ CONFIG_ARCH_MAY_HAVE_PC_FDC=y
 CONFIG_PPC_OF=y
 CONFIG_OF=y
 CONFIG_PPC_UDBG_16550=y
-# CONFIG_GENERIC_TBSYNC is not set
+CONFIG_GENERIC_TBSYNC=y
 CONFIG_AUDIT_ARCH=y
 CONFIG_GENERIC_BUG=y
 CONFIG_DEFAULT_UIMAGE=y
@@ -62,7 +65,7 @@ CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 # General setup
 #
 CONFIG_EXPERIMENTAL=y
-CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
@@ -126,6 +129,7 @@ CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_USE_GENERIC_SMP_HELPERS=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -138,6 +142,7 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
 CONFIG_BLOCK=y
 CONFIG_LBD=y
 # CONFIG_BLK_DEV_IO_TRACE is not set
@@ -197,6 +202,7 @@ CONFIG_PPC_I8259=y
 # CONFIG_CPM2 is not set
 CONFIG_FSL_ULI1575=y
 # CONFIG_MPC8xxx_GPIO is not set
+# CONFIG_SIMPLE_GPIO is not set
 
 #
 # Kernel options
@@ -224,6 +230,7 @@ CONFIG_MATH_EMULATION=y
 CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
 CONFIG_ARCH_HAS_WALK_MEMORY=y
 CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
+# CONFIG_IRQ_ALL_CPUS is not set
 CONFIG_ARCH_FLATMEM_ENABLE=y
 CONFIG_ARCH_POPULATES_NODE_MAP=y
 CONFIG_SELECT_MEMORY_MODEL=y
@@ -241,6 +248,9 @@ CONFIG_ZONE_DMA_FLAG=1
 CONFIG_BOUNCE=y
 CONFIG_VIRT_TO_BUS=y
 CONFIG_UNEVICTABLE_LRU=y
+CONFIG_PPC_4K_PAGES=y
+# CONFIG_PPC_16K_PAGES is not set
+# CONFIG_PPC_64K_PAGES is not set
 CONFIG_FORCE_MAX_ZONEORDER=11
 CONFIG_PROC_DEVICETREE=y
 # CONFIG_CMDLINE_BOOL is not set
@@ -443,8 +453,10 @@ CONFIG_MISC_DEVICES=y
 # CONFIG_EEPROM_93CX6 is not set
 # CONFIG_SGI_IOC4 is not set
 # CONFIG_TIFM_CORE is not set
+# CONFIG_ICS932S401 is not set
 # CONFIG_ENCLOSURE_SERVICES is not set
 # CONFIG_HP_ILO is not set
+# CONFIG_C2PORT is not set
 CONFIG_HAVE_IDE=y
 # CONFIG_IDE is not set
 
@@ -784,6 +796,7 @@ CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_UNIX98_PTYS=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_HVC_UDBG is not set
 # CONFIG_IPMI_HANDLER is not set
 CONFIG_HW_RANDOM=y
 CONFIG_NVRAM=y
@@ -869,11 +882,11 @@ CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
 # CONFIG_THERMAL is not set
 # CONFIG_THERMAL_HWMON is not set
 # CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
 
 #
 # Sonics Silicon Backplane
 #
-CONFIG_SSB_POSSIBLE=y
 # CONFIG_SSB is not set
 
 #
@@ -886,14 +899,7 @@ CONFIG_SSB_POSSIBLE=y
 # CONFIG_PMIC_DA903X is not set
 # CONFIG_MFD_WM8400 is not set
 # CONFIG_MFD_WM8350_I2C is not set
-
-#
-# Voltage and Current regulators
-#
 # CONFIG_REGULATOR is not set
-# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
-# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set
-# CONFIG_REGULATOR_BQ24022 is not set
 
 #
 # Multimedia devices
@@ -1252,11 +1258,11 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y
 # CONFIG_USB_TMC is not set
 
 #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
 #
 
 #
-# may also be needed; see USB_STORAGE Help for more information
+# see USB_STORAGE Help for more information
 #
 CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
@@ -1348,6 +1354,7 @@ CONFIG_RTC_INTF_DEV=y
 # CONFIG_RTC_DRV_M41T80 is not set
 # CONFIG_RTC_DRV_S35390A is not set
 # CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
 
 #
 # SPI RTC drivers
@@ -1624,6 +1631,7 @@ CONFIG_HAVE_FUNCTION_TRACER=y
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
+CONFIG_PRINT_STACK_DEPTH=64
 # CONFIG_DEBUG_STACKOVERFLOW is not set
 # CONFIG_DEBUG_STACK_USAGE is not set
 # CONFIG_DEBUG_PAGEALLOC is not set
@@ -1649,11 +1657,16 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_FIPS is not set
 CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
 CONFIG_CRYPTO_AEAD=y
+CONFIG_CRYPTO_AEAD2=y
 CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
 CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_RNG=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
 CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
 # CONFIG_CRYPTO_GF128MUL is not set
 # CONFIG_CRYPTO_NULL is not set
 # CONFIG_CRYPTO_CRYPTD is not set
index 279a6229584b010d136036823cb00f02950e6c3b..1842186d872c33818d862cff49ecc882dd7347cb 100644 (file)
@@ -89,6 +89,8 @@
 #define TIOCSBRK       0x5427  /* BSD compatibility */
 #define TIOCCBRK       0x5428  /* BSD compatibility */
 #define TIOCGSID       0x5429  /* Return the session ID of FD */
+#define TIOCGRS485     0x542e
+#define TIOCSRS485     0x542f
 #define TIOCGPTN       _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
 #define TIOCSPTLCK     _IOW('T',0x31, int)  /* Lock/unlock Pty */
 
index 6dbffc9817024d684420ea7144d68e66f477d119..7e06b43720d3ce0ce439c50b4ba980f6e398fc01 100644 (file)
@@ -48,63 +48,8 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
 {
        if (oldregs)
                memcpy(newregs, oldregs, sizeof(*newregs));
-#ifdef __powerpc64__
-       else {
-               /* FIXME Merge this with xmon_save_regs ?? */
-               unsigned long tmp1, tmp2;
-               __asm__ __volatile__ (
-                       "std    0,0(%2)\n"
-                       "std    1,8(%2)\n"
-                       "std    2,16(%2)\n"
-                       "std    3,24(%2)\n"
-                       "std    4,32(%2)\n"
-                       "std    5,40(%2)\n"
-                       "std    6,48(%2)\n"
-                       "std    7,56(%2)\n"
-                       "std    8,64(%2)\n"
-                       "std    9,72(%2)\n"
-                       "std    10,80(%2)\n"
-                       "std    11,88(%2)\n"
-                       "std    12,96(%2)\n"
-                       "std    13,104(%2)\n"
-                       "std    14,112(%2)\n"
-                       "std    15,120(%2)\n"
-                       "std    16,128(%2)\n"
-                       "std    17,136(%2)\n"
-                       "std    18,144(%2)\n"
-                       "std    19,152(%2)\n"
-                       "std    20,160(%2)\n"
-                       "std    21,168(%2)\n"
-                       "std    22,176(%2)\n"
-                       "std    23,184(%2)\n"
-                       "std    24,192(%2)\n"
-                       "std    25,200(%2)\n"
-                       "std    26,208(%2)\n"
-                       "std    27,216(%2)\n"
-                       "std    28,224(%2)\n"
-                       "std    29,232(%2)\n"
-                       "std    30,240(%2)\n"
-                       "std    31,248(%2)\n"
-                       "mfmsr  %0\n"
-                       "std    %0, 264(%2)\n"
-                       "mfctr  %0\n"
-                       "std    %0, 280(%2)\n"
-                       "mflr   %0\n"
-                       "std    %0, 288(%2)\n"
-                       "bl     1f\n"
-               "1:     mflr   %1\n"
-                       "std    %1, 256(%2)\n"
-                       "mtlr   %0\n"
-                       "mfxer  %0\n"
-                       "std    %0, 296(%2)\n"
-                       : "=&r" (tmp1), "=&r" (tmp2)
-                       : "b" (newregs)
-                       : "memory");
-       }
-#else
        else
                ppc_save_regs(newregs);
-#endif /* __powerpc64__ */
 }
 
 extern void kexec_smp_wait(void);      /* get and clear naca physid, wait for
index cff30c0ef1fffa12d29c1497cefa4150eabe3894..eead5c67197abb3478d4a3e5dd25815949e9e488 100644 (file)
@@ -320,6 +320,7 @@ enum ps3_match_id {
 
 enum ps3_match_sub_id {
        PS3_MATCH_SUB_ID_GPU_FB         = 1,
+       PS3_MATCH_SUB_ID_GPU_RAMDISK    = 2,
 };
 
 #define PS3_MODULE_ALIAS_EHCI          "ps3:1:0"
@@ -332,6 +333,7 @@ enum ps3_match_sub_id {
 #define PS3_MODULE_ALIAS_STOR_FLASH    "ps3:8:0"
 #define PS3_MODULE_ALIAS_SOUND         "ps3:9:0"
 #define PS3_MODULE_ALIAS_GPU_FB                "ps3:10:1"
+#define PS3_MODULE_ALIAS_GPU_RAMDISK   "ps3:10:2"
 #define PS3_MODULE_ALIAS_LPM           "ps3:11:0"
 
 enum ps3_system_bus_device_type {
index edee15d269eaaf285d64e1e650609310edd4c11e..a0a15311d0d82bc8117c8a064cecce1ca5bc09df 100644 (file)
@@ -17,6 +17,8 @@
 #ifdef __KERNEL__
 
 #include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/err.h>
 #include <asm/cpm.h>
 #include <asm/immap_qe.h>
 
@@ -84,7 +86,11 @@ static inline bool qe_clock_is_brg(enum qe_clock clk)
 extern spinlock_t cmxgcr_lock;
 
 /* Export QE common operations */
+#ifdef CONFIG_QUICC_ENGINE
 extern void __init qe_reset(void);
+#else
+static inline void qe_reset(void) {}
+#endif
 
 /* QE PIO */
 #define QE_PIO_PINS 32
@@ -101,16 +107,43 @@ struct qe_pio_regs {
 #endif
 };
 
-extern int par_io_init(struct device_node *np);
-extern int par_io_of_config(struct device_node *np);
 #define QE_PIO_DIR_IN  2
 #define QE_PIO_DIR_OUT 1
 extern void __par_io_config_pin(struct qe_pio_regs __iomem *par_io, u8 pin,
                                int dir, int open_drain, int assignment,
                                int has_irq);
+#ifdef CONFIG_QUICC_ENGINE
+extern int par_io_init(struct device_node *np);
+extern int par_io_of_config(struct device_node *np);
 extern int par_io_config_pin(u8 port, u8 pin, int dir, int open_drain,
                             int assignment, int has_irq);
 extern int par_io_data_set(u8 port, u8 pin, u8 val);
+#else
+static inline int par_io_init(struct device_node *np) { return -ENOSYS; }
+static inline int par_io_of_config(struct device_node *np) { return -ENOSYS; }
+static inline int par_io_config_pin(u8 port, u8 pin, int dir, int open_drain,
+               int assignment, int has_irq) { return -ENOSYS; }
+static inline int par_io_data_set(u8 port, u8 pin, u8 val) { return -ENOSYS; }
+#endif /* CONFIG_QUICC_ENGINE */
+
+/*
+ * Pin multiplexing functions.
+ */
+struct qe_pin;
+#ifdef CONFIG_QE_GPIO
+extern struct qe_pin *qe_pin_request(struct device_node *np, int index);
+extern void qe_pin_free(struct qe_pin *qe_pin);
+extern void qe_pin_set_gpio(struct qe_pin *qe_pin);
+extern void qe_pin_set_dedicated(struct qe_pin *pin);
+#else
+static inline struct qe_pin *qe_pin_request(struct device_node *np, int index)
+{
+       return ERR_PTR(-ENOSYS);
+}
+static inline void qe_pin_free(struct qe_pin *qe_pin) {}
+static inline void qe_pin_set_gpio(struct qe_pin *qe_pin) {}
+static inline void qe_pin_set_dedicated(struct qe_pin *pin) {}
+#endif /* CONFIG_QE_GPIO */
 
 /* QE internal API */
 int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol, u32 cmd_input);
index 56a7745ca343d438c1b4430186f1c1592fa0b5ad..cf519663a79183e7c776a580624fc78d5f343650 100644 (file)
@@ -17,6 +17,9 @@
 
 #include <linux/irq.h>
 
+struct device_node;
+struct qe_ic;
+
 #define NUM_OF_QE_IC_GROUPS    6
 
 /* Flags when we init the QE IC */
@@ -54,17 +57,27 @@ enum qe_ic_grp_id {
        QE_IC_GRP_RISCB         /* QE interrupt controller RISC group B */
 };
 
+#ifdef CONFIG_QUICC_ENGINE
 void qe_ic_init(struct device_node *node, unsigned int flags,
                void (*low_handler)(unsigned int irq, struct irq_desc *desc),
                void (*high_handler)(unsigned int irq, struct irq_desc *desc));
+unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic);
+unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic);
+#else
+static inline void qe_ic_init(struct device_node *node, unsigned int flags,
+               void (*low_handler)(unsigned int irq, struct irq_desc *desc),
+               void (*high_handler)(unsigned int irq, struct irq_desc *desc))
+{}
+static inline unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic)
+{ return 0; }
+static inline unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic)
+{ return 0; }
+#endif /* CONFIG_QUICC_ENGINE */
+
 void qe_ic_set_highest_priority(unsigned int virq, int high);
 int qe_ic_set_priority(unsigned int virq, unsigned int priority);
 int qe_ic_set_high_priority(unsigned int virq, unsigned int priority, int high);
 
-struct qe_ic;
-unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic);
-unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic);
-
 static inline void qe_ic_cascade_low_ipic(unsigned int irq,
                                          struct irq_desc *desc)
 {
index 8b2eb044270ad65b02f9a6bc145701df04c921ce..0ab8d869e3d61d83c8984c4492019e6d022e2e46 100644 (file)
@@ -128,7 +128,7 @@ struct spu {
        int number;
        unsigned int irqs[3];
        u32 node;
-       u64 flags;
+       unsigned long flags;
        u64 class_0_pending;
        u64 class_0_dar;
        u64 class_1_dar;
index 1308a86e9070814a1680d5f40e3ec00fe2b5c637..8d1a419df35d784a65168090a2dce93bb63ef1fc 100644 (file)
@@ -29,7 +29,7 @@ endif
 obj-y                          := cputable.o ptrace.o syscalls.o \
                                   irq.o align.o signal_32.o pmc.o vdso.o \
                                   init_task.o process.o systbl.o idle.o \
-                                  signal.o sysfs.o
+                                  signal.o sysfs.o cacheinfo.o
 obj-y                          += vdso32/
 obj-$(CONFIG_PPC64)            += setup_64.o sys_ppc32.o \
                                   signal_64.o ptrace32.o \
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
new file mode 100644 (file)
index 0000000..b33f041
--- /dev/null
@@ -0,0 +1,837 @@
+/*
+ * Processor cache information made available to userspace via sysfs;
+ * intended to be compatible with x86 intel_cacheinfo implementation.
+ *
+ * Copyright 2008 IBM Corporation
+ * Author: Nathan Lynch
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/percpu.h>
+#include <asm/prom.h>
+
+#include "cacheinfo.h"
+
+/* per-cpu object for tracking:
+ * - a "cache" kobject for the top-level directory
+ * - a list of "index" objects representing the cpu's local cache hierarchy
+ */
+struct cache_dir {
+       struct kobject *kobj; /* bare (not embedded) kobject for cache
+                              * directory */
+       struct cache_index_dir *index; /* list of index objects */
+};
+
+/* "index" object: each cpu's cache directory has an index
+ * subdirectory corresponding to a cache object associated with the
+ * cpu.  This object's lifetime is managed via the embedded kobject.
+ */
+struct cache_index_dir {
+       struct kobject kobj;
+       struct cache_index_dir *next; /* next index in parent directory */
+       struct cache *cache;
+};
+
+/* Template for determining which OF properties to query for a given
+ * cache type */
+struct cache_type_info {
+       const char *name;
+       const char *size_prop;
+
+       /* Allow for both [di]-cache-line-size and
+        * [di]-cache-block-size properties.  According to the PowerPC
+        * Processor binding, -line-size should be provided if it
+        * differs from the cache block size (that which is operated
+        * on by cache instructions), so we look for -line-size first.
+        * See cache_get_line_size(). */
+
+       const char *line_size_props[2];
+       const char *nr_sets_prop;
+};
+
+/* These are used to index the cache_type_info array. */
+#define CACHE_TYPE_UNIFIED     0
+#define CACHE_TYPE_INSTRUCTION 1
+#define CACHE_TYPE_DATA        2
+
+static const struct cache_type_info cache_type_info[] = {
+       {
+               /* PowerPC Processor binding says the [di]-cache-*
+                * must be equal on unified caches, so just use
+                * d-cache properties. */
+               .name            = "Unified",
+               .size_prop       = "d-cache-size",
+               .line_size_props = { "d-cache-line-size",
+                                    "d-cache-block-size", },
+               .nr_sets_prop    = "d-cache-sets",
+       },
+       {
+               .name            = "Instruction",
+               .size_prop       = "i-cache-size",
+               .line_size_props = { "i-cache-line-size",
+                                    "i-cache-block-size", },
+               .nr_sets_prop    = "i-cache-sets",
+       },
+       {
+               .name            = "Data",
+               .size_prop       = "d-cache-size",
+               .line_size_props = { "d-cache-line-size",
+                                    "d-cache-block-size", },
+               .nr_sets_prop    = "d-cache-sets",
+       },
+};
+
+/* Cache object: each instance of this corresponds to a distinct cache
+ * in the system.  There are separate objects for Harvard caches: one
+ * each for instruction and data, and each refers to the same OF node.
+ * The refcount of the OF node is elevated for the lifetime of the
+ * cache object.  A cache object is released when its shared_cpu_map
+ * is cleared (see cache_cpu_clear).
+ *
+ * A cache object is on two lists: an unsorted global list
+ * (cache_list) of cache objects; and a singly-linked list
+ * representing the local cache hierarchy, which is ordered by level
+ * (e.g. L1d -> L1i -> L2 -> L3).
+ */
+struct cache {
+       struct device_node *ofnode;    /* OF node for this cache, may be cpu */
+       struct cpumask shared_cpu_map; /* online CPUs using this cache */
+       int type;                      /* split cache disambiguation */
+       int level;                     /* level not explicit in device tree */
+       struct list_head list;         /* global list of cache objects */
+       struct cache *next_local;      /* next cache of >= level */
+};
+
+static DEFINE_PER_CPU(struct cache_dir *, cache_dir);
+
+/* traversal/modification of this list occurs only at cpu hotplug time;
+ * access is serialized by cpu hotplug locking
+ */
+static LIST_HEAD(cache_list);
+
+static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *k)
+{
+       return container_of(k, struct cache_index_dir, kobj);
+}
+
+static const char *cache_type_string(const struct cache *cache)
+{
+       return cache_type_info[cache->type].name;
+}
+
+static void __cpuinit cache_init(struct cache *cache, int type, int level, struct device_node *ofnode)
+{
+       cache->type = type;
+       cache->level = level;
+       cache->ofnode = of_node_get(ofnode);
+       INIT_LIST_HEAD(&cache->list);
+       list_add(&cache->list, &cache_list);
+}
+
+static struct cache *__cpuinit new_cache(int type, int level, struct device_node *ofnode)
+{
+       struct cache *cache;
+
+       cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+       if (cache)
+               cache_init(cache, type, level, ofnode);
+
+       return cache;
+}
+
+static void release_cache_debugcheck(struct cache *cache)
+{
+       struct cache *iter;
+
+       list_for_each_entry(iter, &cache_list, list)
+               WARN_ONCE(iter->next_local == cache,
+                         "cache for %s(%s) refers to cache for %s(%s)\n",
+                         iter->ofnode->full_name,
+                         cache_type_string(iter),
+                         cache->ofnode->full_name,
+                         cache_type_string(cache));
+}
+
+static void release_cache(struct cache *cache)
+{
+       if (!cache)
+               return;
+
+       pr_debug("freeing L%d %s cache for %s\n", cache->level,
+                cache_type_string(cache), cache->ofnode->full_name);
+
+       release_cache_debugcheck(cache);
+       list_del(&cache->list);
+       of_node_put(cache->ofnode);
+       kfree(cache);
+}
+
+static void cache_cpu_set(struct cache *cache, int cpu)
+{
+       struct cache *next = cache;
+
+       while (next) {
+               WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map),
+                         "CPU %i already accounted in %s(%s)\n",
+                         cpu, next->ofnode->full_name,
+                         cache_type_string(next));
+               cpumask_set_cpu(cpu, &next->shared_cpu_map);
+               next = next->next_local;
+       }
+}
+
+static int cache_size(const struct cache *cache, unsigned int *ret)
+{
+       const char *propname;
+       const u32 *cache_size;
+
+       propname = cache_type_info[cache->type].size_prop;
+
+       cache_size = of_get_property(cache->ofnode, propname, NULL);
+       if (!cache_size)
+               return -ENODEV;
+
+       *ret = *cache_size;
+       return 0;
+}
+
+static int cache_size_kb(const struct cache *cache, unsigned int *ret)
+{
+       unsigned int size;
+
+       if (cache_size(cache, &size))
+               return -ENODEV;
+
+       *ret = size / 1024;
+       return 0;
+}
+
+/* not cache_line_size() because that's a macro in include/linux/cache.h */
+static int cache_get_line_size(const struct cache *cache, unsigned int *ret)
+{
+       const u32 *line_size;
+       int i, lim;
+
+       lim = ARRAY_SIZE(cache_type_info[cache->type].line_size_props);
+
+       for (i = 0; i < lim; i++) {
+               const char *propname;
+
+               propname = cache_type_info[cache->type].line_size_props[i];
+               line_size = of_get_property(cache->ofnode, propname, NULL);
+               if (line_size)
+                       break;
+       }
+
+       if (!line_size)
+               return -ENODEV;
+
+       *ret = *line_size;
+       return 0;
+}
+
+static int cache_nr_sets(const struct cache *cache, unsigned int *ret)
+{
+       const char *propname;
+       const u32 *nr_sets;
+
+       propname = cache_type_info[cache->type].nr_sets_prop;
+
+       nr_sets = of_get_property(cache->ofnode, propname, NULL);
+       if (!nr_sets)
+               return -ENODEV;
+
+       *ret = *nr_sets;
+       return 0;
+}
+
+static int cache_associativity(const struct cache *cache, unsigned int *ret)
+{
+       unsigned int line_size;
+       unsigned int nr_sets;
+       unsigned int size;
+
+       if (cache_nr_sets(cache, &nr_sets))
+               goto err;
+
+       /* If the cache is fully associative, there is no need to
+        * check the other properties.
+        */
+       if (nr_sets == 1) {
+               *ret = 0;
+               return 0;
+       }
+
+       if (cache_get_line_size(cache, &line_size))
+               goto err;
+       if (cache_size(cache, &size))
+               goto err;
+
+       if (!(nr_sets > 0 && size > 0 && line_size > 0))
+               goto err;
+
+       *ret = (size / nr_sets) / line_size;
+       return 0;
+err:
+       return -ENODEV;
+}
+
+/* helper for dealing with split caches */
+static struct cache *cache_find_first_sibling(struct cache *cache)
+{
+       struct cache *iter;
+
+       if (cache->type == CACHE_TYPE_UNIFIED)
+               return cache;
+
+       list_for_each_entry(iter, &cache_list, list)
+               if (iter->ofnode == cache->ofnode && iter->next_local == cache)
+                       return iter;
+
+       return cache;
+}
+
+/* return the first cache on a local list matching node */
+static struct cache *cache_lookup_by_node(const struct device_node *node)
+{
+       struct cache *cache = NULL;
+       struct cache *iter;
+
+       list_for_each_entry(iter, &cache_list, list) {
+               if (iter->ofnode != node)
+                       continue;
+               cache = cache_find_first_sibling(iter);
+               break;
+       }
+
+       return cache;
+}
+
+static bool cache_node_is_unified(const struct device_node *np)
+{
+       return of_get_property(np, "cache-unified", NULL);
+}
+
+static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node *node, int level)
+{
+       struct cache *cache;
+
+       pr_debug("creating L%d ucache for %s\n", level, node->full_name);
+
+       cache = new_cache(CACHE_TYPE_UNIFIED, level, node);
+
+       return cache;
+}
+
+static struct cache *__cpuinit cache_do_one_devnode_split(struct device_node *node, int level)
+{
+       struct cache *dcache, *icache;
+
+       pr_debug("creating L%d dcache and icache for %s\n", level,
+                node->full_name);
+
+       dcache = new_cache(CACHE_TYPE_DATA, level, node);
+       icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
+
+       if (!dcache || !icache)
+               goto err;
+
+       dcache->next_local = icache;
+
+       return dcache;
+err:
+       release_cache(dcache);
+       release_cache(icache);
+       return NULL;
+}
+
+static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, int level)
+{
+       struct cache *cache;
+
+       if (cache_node_is_unified(node))
+               cache = cache_do_one_devnode_unified(node, level);
+       else
+               cache = cache_do_one_devnode_split(node, level);
+
+       return cache;
+}
+
+static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *node, int level)
+{
+       struct cache *cache;
+
+       cache = cache_lookup_by_node(node);
+
+       WARN_ONCE(cache && cache->level != level,
+                 "cache level mismatch on lookup (got %d, expected %d)\n",
+                 cache->level, level);
+
+       if (!cache)
+               cache = cache_do_one_devnode(node, level);
+
+       return cache;
+}
+
+static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigger)
+{
+       while (smaller->next_local) {
+               if (smaller->next_local == bigger)
+                       return; /* already linked */
+               smaller = smaller->next_local;
+       }
+
+       smaller->next_local = bigger;
+}
+
+static void __cpuinit do_subsidiary_caches_debugcheck(struct cache *cache)
+{
+       WARN_ON_ONCE(cache->level != 1);
+       WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu"));
+}
+
+static void __cpuinit do_subsidiary_caches(struct cache *cache)
+{
+       struct device_node *subcache_node;
+       int level = cache->level;
+
+       do_subsidiary_caches_debugcheck(cache);
+
+       while ((subcache_node = of_find_next_cache_node(cache->ofnode))) {
+               struct cache *subcache;
+
+               level++;
+               subcache = cache_lookup_or_instantiate(subcache_node, level);
+               of_node_put(subcache_node);
+               if (!subcache)
+                       break;
+
+               link_cache_lists(cache, subcache);
+               cache = subcache;
+       }
+}
+
+static struct cache *__cpuinit cache_chain_instantiate(unsigned int cpu_id)
+{
+       struct device_node *cpu_node;
+       struct cache *cpu_cache = NULL;
+
+       pr_debug("creating cache object(s) for CPU %i\n", cpu_id);
+
+       cpu_node = of_get_cpu_node(cpu_id, NULL);
+       WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
+       if (!cpu_node)
+               goto out;
+
+       cpu_cache = cache_lookup_or_instantiate(cpu_node, 1);
+       if (!cpu_cache)
+               goto out;
+
+       do_subsidiary_caches(cpu_cache);
+
+       cache_cpu_set(cpu_cache, cpu_id);
+out:
+       of_node_put(cpu_node);
+
+       return cpu_cache;
+}
+
+static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id)
+{
+       struct cache_dir *cache_dir;
+       struct sys_device *sysdev;
+       struct kobject *kobj = NULL;
+
+       sysdev = get_cpu_sysdev(cpu_id);
+       WARN_ONCE(!sysdev, "no sysdev for CPU %i\n", cpu_id);
+       if (!sysdev)
+               goto err;
+
+       kobj = kobject_create_and_add("cache", &sysdev->kobj);
+       if (!kobj)
+               goto err;
+
+       cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL);
+       if (!cache_dir)
+               goto err;
+
+       cache_dir->kobj = kobj;
+
+       WARN_ON_ONCE(per_cpu(cache_dir, cpu_id) != NULL);
+
+       per_cpu(cache_dir, cpu_id) = cache_dir;
+
+       return cache_dir;
+err:
+       kobject_put(kobj);
+       return NULL;
+}
+
+static void cache_index_release(struct kobject *kobj)
+{
+       struct cache_index_dir *index;
+
+       index = kobj_to_cache_index_dir(kobj);
+
+       pr_debug("freeing index directory for L%d %s cache\n",
+                index->cache->level, cache_type_string(index->cache));
+
+       kfree(index);
+}
+
+static ssize_t cache_index_show(struct kobject *k, struct attribute *attr, char *buf)
+{
+       struct kobj_attribute *kobj_attr;
+
+       kobj_attr = container_of(attr, struct kobj_attribute, attr);
+
+       return kobj_attr->show(k, kobj_attr, buf);
+}
+
+static struct cache *index_kobj_to_cache(struct kobject *k)
+{
+       struct cache_index_dir *index;
+
+       index = kobj_to_cache_index_dir(k);
+
+       return index->cache;
+}
+
+static ssize_t size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+       unsigned int size_kb;
+       struct cache *cache;
+
+       cache = index_kobj_to_cache(k);
+
+       if (cache_size_kb(cache, &size_kb))
+               return -ENODEV;
+
+       return sprintf(buf, "%uK\n", size_kb);
+}
+
+static struct kobj_attribute cache_size_attr =
+       __ATTR(size, 0444, size_show, NULL);
+
+
+static ssize_t line_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+       unsigned int line_size;
+       struct cache *cache;
+
+       cache = index_kobj_to_cache(k);
+
+       if (cache_get_line_size(cache, &line_size))
+               return -ENODEV;
+
+       return sprintf(buf, "%u\n", line_size);
+}
+
+static struct kobj_attribute cache_line_size_attr =
+       __ATTR(coherency_line_size, 0444, line_size_show, NULL);
+
+static ssize_t nr_sets_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+       unsigned int nr_sets;
+       struct cache *cache;
+
+       cache = index_kobj_to_cache(k);
+
+       if (cache_nr_sets(cache, &nr_sets))
+               return -ENODEV;
+
+       return sprintf(buf, "%u\n", nr_sets);
+}
+
+static struct kobj_attribute cache_nr_sets_attr =
+       __ATTR(number_of_sets, 0444, nr_sets_show, NULL);
+
+static ssize_t associativity_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+       unsigned int associativity;
+       struct cache *cache;
+
+       cache = index_kobj_to_cache(k);
+
+       if (cache_associativity(cache, &associativity))
+               return -ENODEV;
+
+       return sprintf(buf, "%u\n", associativity);
+}
+
+static struct kobj_attribute cache_assoc_attr =
+       __ATTR(ways_of_associativity, 0444, associativity_show, NULL);
+
+static ssize_t type_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+       struct cache *cache;
+
+       cache = index_kobj_to_cache(k);
+
+       return sprintf(buf, "%s\n", cache_type_string(cache));
+}
+
+static struct kobj_attribute cache_type_attr =
+       __ATTR(type, 0444, type_show, NULL);
+
+static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+       struct cache_index_dir *index;
+       struct cache *cache;
+
+       index = kobj_to_cache_index_dir(k);
+       cache = index->cache;
+
+       return sprintf(buf, "%d\n", cache->level);
+}
+
+static struct kobj_attribute cache_level_attr =
+       __ATTR(level, 0444, level_show, NULL);
+
+static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+       struct cache_index_dir *index;
+       struct cache *cache;
+       int len;
+       int n = 0;
+
+       index = kobj_to_cache_index_dir(k);
+       cache = index->cache;
+       len = PAGE_SIZE - 2;
+
+       if (len > 1) {
+               n = cpumask_scnprintf(buf, len, &cache->shared_cpu_map);
+               buf[n++] = '\n';
+               buf[n] = '\0';
+       }
+       return n;
+}
+
+static struct kobj_attribute cache_shared_cpu_map_attr =
+       __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL);
+
+/* Attributes which should always be created -- the kobject/sysfs core
+ * does this automatically via kobj_type->default_attrs.  This is the
+ * minimum data required to uniquely identify a cache.
+ */
+static struct attribute *cache_index_default_attrs[] = {
+       &cache_type_attr.attr,
+       &cache_level_attr.attr,
+       &cache_shared_cpu_map_attr.attr,
+       NULL,
+};
+
+/* Attributes which should be created if the cache device node has the
+ * right properties -- see cacheinfo_create_index_opt_attrs
+ */
+static struct kobj_attribute *cache_index_opt_attrs[] = {
+       &cache_size_attr,
+       &cache_line_size_attr,
+       &cache_nr_sets_attr,
+       &cache_assoc_attr,
+};
+
+static struct sysfs_ops cache_index_ops = {
+       .show = cache_index_show,
+};
+
+static struct kobj_type cache_index_type = {
+       .release = cache_index_release,
+       .sysfs_ops = &cache_index_ops,
+       .default_attrs = cache_index_default_attrs,
+};
+
+static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
+{
+       const char *cache_name;
+       const char *cache_type;
+       struct cache *cache;
+       char *buf;
+       int i;
+
+       buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!buf)
+               return;
+
+       cache = dir->cache;
+       cache_name = cache->ofnode->full_name;
+       cache_type = cache_type_string(cache);
+
+       /* We don't want to create an attribute that can't provide a
+        * meaningful value.  Check the return value of each optional
+        * attribute's ->show method before registering the
+        * attribute.
+        */
+       for (i = 0; i < ARRAY_SIZE(cache_index_opt_attrs); i++) {
+               struct kobj_attribute *attr;
+               ssize_t rc;
+
+               attr = cache_index_opt_attrs[i];
+
+               rc = attr->show(&dir->kobj, attr, buf);
+               if (rc <= 0) {
+                       pr_debug("not creating %s attribute for "
+                                "%s(%s) (rc = %zd)\n",
+                                attr->attr.name, cache_name,
+                                cache_type, rc);
+                       continue;
+               }
+               if (sysfs_create_file(&dir->kobj, &attr->attr))
+                       pr_debug("could not create %s attribute for %s(%s)\n",
+                                attr->attr.name, cache_name, cache_type);
+       }
+
+       kfree(buf);
+}
+
+static void __cpuinit cacheinfo_create_index_dir(struct cache *cache, int index, struct cache_dir *cache_dir)
+{
+       struct cache_index_dir *index_dir;
+       int rc;
+
+       index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL);
+       if (!index_dir)
+               goto err;
+
+       index_dir->cache = cache;
+
+       rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type,
+                                 cache_dir->kobj, "index%d", index);
+       if (rc)
+               goto err;
+
+       index_dir->next = cache_dir->index;
+       cache_dir->index = index_dir;
+
+       cacheinfo_create_index_opt_attrs(index_dir);
+
+       return;
+err:
+       kfree(index_dir);
+}
+
+static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache *cache_list)
+{
+       struct cache_dir *cache_dir;
+       struct cache *cache;
+       int index = 0;
+
+       cache_dir = cacheinfo_create_cache_dir(cpu_id);
+       if (!cache_dir)
+               return;
+
+       cache = cache_list;
+       while (cache) {
+               cacheinfo_create_index_dir(cache, index, cache_dir);
+               index++;
+               cache = cache->next_local;
+       }
+}
+
+void __cpuinit cacheinfo_cpu_online(unsigned int cpu_id)
+{
+       struct cache *cache;
+
+       cache = cache_chain_instantiate(cpu_id);
+       if (!cache)
+               return;
+
+       cacheinfo_sysfs_populate(cpu_id, cache);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU /* functions needed for cpu offline */
+
+static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)
+{
+       struct device_node *cpu_node;
+       struct cache *cache;
+
+       cpu_node = of_get_cpu_node(cpu_id, NULL);
+       WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
+       if (!cpu_node)
+               return NULL;
+
+       cache = cache_lookup_by_node(cpu_node);
+       of_node_put(cpu_node);
+
+       return cache;
+}
+
+static void remove_index_dirs(struct cache_dir *cache_dir)
+{
+       struct cache_index_dir *index;
+
+       index = cache_dir->index;
+
+       while (index) {
+               struct cache_index_dir *next;
+
+               next = index->next;
+               kobject_put(&index->kobj);
+               index = next;
+       }
+}
+
+static void remove_cache_dir(struct cache_dir *cache_dir)
+{
+       remove_index_dirs(cache_dir);
+
+       kobject_put(cache_dir->kobj);
+
+       kfree(cache_dir);
+}
+
+static void cache_cpu_clear(struct cache *cache, int cpu)
+{
+       while (cache) {
+               struct cache *next = cache->next_local;
+
+               WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map),
+                         "CPU %i not accounted in %s(%s)\n",
+                         cpu, cache->ofnode->full_name,
+                         cache_type_string(cache));
+
+               cpumask_clear_cpu(cpu, &cache->shared_cpu_map);
+
+               /* Release the cache object if all the cpus using it
+                * are offline */
+               if (cpumask_empty(&cache->shared_cpu_map))
+                       release_cache(cache);
+
+               cache = next;
+       }
+}
+
+void cacheinfo_cpu_offline(unsigned int cpu_id)
+{
+       struct cache_dir *cache_dir;
+       struct cache *cache;
+
+       /* Prevent userspace from seeing inconsistent state - remove
+        * the sysfs hierarchy first */
+       cache_dir = per_cpu(cache_dir, cpu_id);
+
+       /* careful, sysfs population may have failed */
+       if (cache_dir)
+               remove_cache_dir(cache_dir);
+
+       per_cpu(cache_dir, cpu_id) = NULL;
+
+       /* clear the CPU's bit in its cache chain, possibly freeing
+        * cache objects */
+       cache = cache_lookup_by_cpu(cpu_id);
+       if (cache)
+               cache_cpu_clear(cache, cpu_id);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/kernel/cacheinfo.h b/arch/powerpc/kernel/cacheinfo.h
new file mode 100644 (file)
index 0000000..a7b74d3
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef _PPC_CACHEINFO_H
+#define _PPC_CACHEINFO_H
+
+/* These are just hooks for sysfs.c to use. */
+extern void cacheinfo_cpu_online(unsigned int cpu_id);
+extern void cacheinfo_cpu_offline(unsigned int cpu_id);
+
+#endif /* _PPC_CACHEINFO_H */
index 2538030954d85102c3ac929b076ad3979d98a4c1..da5a3855a0c474c0619c8d97c07b4f494980f8b8 100644 (file)
@@ -16,7 +16,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#undef DEBUG
+#define DEBUG
 
 #include <linux/kernel.h>
 #include <linux/pci.h>
@@ -1356,6 +1356,63 @@ static void __init pcibios_allocate_resources(int pass)
        }
 }
 
+static void __init pcibios_reserve_legacy_regions(struct pci_bus *bus)
+{
+       struct pci_controller *hose = pci_bus_to_host(bus);
+       resource_size_t offset;
+       struct resource *res, *pres;
+       int i;
+
+       pr_debug("Reserving legacy ranges for domain %04x\n", pci_domain_nr(bus));
+
+       /* Check for IO */
+       if (!(hose->io_resource.flags & IORESOURCE_IO))
+               goto no_io;
+       offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+       res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+       BUG_ON(res == NULL);
+       res->name = "Legacy IO";
+       res->flags = IORESOURCE_IO;
+       res->start = offset;
+       res->end = (offset + 0xfff) & 0xfffffffful;
+       pr_debug("Candidate legacy IO: %pR\n", res);
+       if (request_resource(&hose->io_resource, res)) {
+               printk(KERN_DEBUG
+                      "PCI %04x:%02x Cannot reserve Legacy IO %pR\n",
+                      pci_domain_nr(bus), bus->number, res);
+               kfree(res);
+       }
+
+ no_io:
+       /* Check for memory */
+       offset = hose->pci_mem_offset;
+       pr_debug("hose mem offset: %016llx\n", (unsigned long long)offset);
+       for (i = 0; i < 3; i++) {
+               pres = &hose->mem_resources[i];
+               if (!(pres->flags & IORESOURCE_MEM))
+                       continue;
+               pr_debug("hose mem res: %pR\n", pres);
+               if ((pres->start - offset) <= 0xa0000 &&
+                   (pres->end - offset) >= 0xbffff)
+                       break;
+       }
+       if (i >= 3)
+               return;
+       res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+       BUG_ON(res == NULL);
+       res->name = "Legacy VGA memory";
+       res->flags = IORESOURCE_MEM;
+       res->start = 0xa0000 + offset;
+       res->end = 0xbffff + offset;
+       pr_debug("Candidate VGA memory: %pR\n", res);
+       if (request_resource(pres, res)) {
+               printk(KERN_DEBUG
+                      "PCI %04x:%02x Cannot reserve VGA memory %pR\n",
+                      pci_domain_nr(bus), bus->number, res);
+               kfree(res);
+       }
+}
+
 void __init pcibios_resource_survey(void)
 {
        struct pci_bus *b;
@@ -1371,6 +1428,18 @@ void __init pcibios_resource_survey(void)
                pcibios_allocate_resources(1);
        }
 
+       /* Before we start assigning unassigned resource, we try to reserve
+        * the low IO area and the VGA memory area if they intersect the
+        * bus available resources to avoid allocating things on top of them
+        */
+       if (!(ppc_pci_flags & PPC_PCI_PROBE_ONLY)) {
+               list_for_each_entry(b, &pci_root_buses, node)
+                       pcibios_reserve_legacy_regions(b);
+       }
+
+       /* Now, if the platform didn't decide to blindly trust the firmware,
+        * we proceed to assigning things that were left unassigned
+        */
        if (!(ppc_pci_flags & PPC_PCI_PROBE_ONLY)) {
                pr_debug("PCI: Assigning unassigned resouces...\n");
                pci_assign_unassigned_resources();
index 39fadc6e149219771985bfc1d697a47afe27f885..586962f65c2a033d59a240e5daf85ece91859532 100644 (file)
@@ -560,9 +560,14 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,
         * G5 machines... So when something asks for bus 0 io base
         * (bus 0 is HT root), we return the AGP one instead.
         */
-       if (machine_is_compatible("MacRISC4"))
-               if (in_bus == 0)
+       if (in_bus == 0 && machine_is_compatible("MacRISC4")) {
+               struct device_node *agp;
+
+               agp = of_find_compatible_node(NULL, NULL, "u3-agp");
+               if (agp)
                        in_bus = 0xf0;
+               of_node_put(agp);
+       }
 
        /* That syscall isn't quite compatible with PCI domains, but it's
         * used on pre-domains setup. We return the first match
index dcec1325d3404adf5f06df1a4b440ca07838a5d9..c8b27bb4dbdec45eba0de471c106728bb892fdee 100644 (file)
@@ -165,6 +165,7 @@ EXPORT_SYMBOL(timer_interrupt);
 EXPORT_SYMBOL(irq_desc);
 EXPORT_SYMBOL(tb_ticks_per_jiffy);
 EXPORT_SYMBOL(cacheable_memcpy);
+EXPORT_SYMBOL(cacheable_memzero);
 #endif
 
 #ifdef CONFIG_PPC32
index 6f73c739f1e2f443d988f3bb440dec4c648b44d9..c09cffafb6ee5f8f2697fd4f55a1cf991f5db9cf 100644 (file)
@@ -824,11 +824,11 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 #endif
 
 #ifdef CONFIG_KEXEC
-       lprop = (u64*)of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL);
+       lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL);
        if (lprop)
                crashk_res.start = *lprop;
 
-       lprop = (u64*)of_get_flat_dt_prop(node, "linux,crashkernel-size", NULL);
+       lprop = of_get_flat_dt_prop(node, "linux,crashkernel-size", NULL);
        if (lprop)
                crashk_res.end = crashk_res.start + *lprop - 1;
 #endif
@@ -893,12 +893,12 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
        u64 base, size, lmb_size;
        unsigned int is_kexec_kdump = 0, rngs;
 
-       ls = (cell_t *)of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
+       ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
        if (ls == NULL || l < dt_root_size_cells * sizeof(cell_t))
                return 0;
        lmb_size = dt_mem_next_cell(dt_root_size_cells, &ls);
 
-       dm = (cell_t *)of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);
+       dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);
        if (dm == NULL || l < sizeof(cell_t))
                return 0;
 
@@ -907,7 +907,7 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
                return 0;
 
        /* check if this is a kexec/kdump kernel. */
-       usm = (cell_t *)of_get_flat_dt_prop(node, "linux,drconf-usable-memory",
+       usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory",
                                                 &l);
        if (usm != NULL)
                is_kexec_kdump = 1;
@@ -981,9 +981,9 @@ static int __init early_init_dt_scan_memory(unsigned long node,
        } else if (strcmp(type, "memory") != 0)
                return 0;
 
-       reg = (cell_t *)of_get_flat_dt_prop(node, "linux,usable-memory", &l);
+       reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
        if (reg == NULL)
-               reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
+               reg = of_get_flat_dt_prop(node, "reg", &l);
        if (reg == NULL)
                return 0;
 
index 2445945d3761771612a6abe4e5c873492b0d5c3c..7f1b33d5e30d9baf51ddba748f1b0a0573daf754 100644 (file)
@@ -1210,7 +1210,7 @@ static void __init prom_initialize_tce_table(void)
                /* Initialize the table to have a one-to-one mapping
                 * over the allocated size.
                 */
-               tce_entryp = (unsigned long *)base;
+               tce_entryp = (u64 *)base;
                for (i = 0; i < (minsize >> 3) ;tce_entryp++, i++) {
                        tce_entry = (i << PAGE_SHIFT);
                        tce_entry |= 0x3;
index 0c64f10087b9833b871ac62b55e177a48f2f5af3..4a2ee08af6a7f6c2448a45f71e51cbb973238ebd 100644 (file)
@@ -18,6 +18,8 @@
 #include <asm/machdep.h>
 #include <asm/smp.h>
 
+#include "cacheinfo.h"
+
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
 #include <asm/lppaca.h>
@@ -25,8 +27,6 @@
 
 static DEFINE_PER_CPU(struct cpu, cpu_devices);
 
-static DEFINE_PER_CPU(struct kobject *, cache_toplevel);
-
 /*
  * SMT snooze delay stuff, 64-bit only for now
  */
@@ -343,283 +343,6 @@ static struct sysdev_attribute pa6t_attrs[] = {
 #endif /* HAS_PPC_PMC_PA6T */
 #endif /* HAS_PPC_PMC_CLASSIC */
 
-struct cache_desc {
-       struct kobject kobj;
-       struct cache_desc *next;
-       const char *type;       /* Instruction, Data, or Unified */
-       u32 size;               /* total cache size in KB */
-       u32 line_size;          /* in bytes */
-       u32 nr_sets;            /* number of sets */
-       u32 level;              /* e.g. 1, 2, 3... */
-       u32 associativity;      /* e.g. 8-way... 0 is fully associative */
-};
-
-DEFINE_PER_CPU(struct cache_desc *, cache_desc);
-
-static struct cache_desc *kobj_to_cache_desc(struct kobject *k)
-{
-       return container_of(k, struct cache_desc, kobj);
-}
-
-static void cache_desc_release(struct kobject *k)
-{
-       struct cache_desc *desc = kobj_to_cache_desc(k);
-
-       pr_debug("%s: releasing %s\n", __func__, kobject_name(k));
-
-       if (desc->next)
-               kobject_put(&desc->next->kobj);
-
-       kfree(kobj_to_cache_desc(k));
-}
-
-static ssize_t cache_desc_show(struct kobject *k, struct attribute *attr, char *buf)
-{
-       struct kobj_attribute *kobj_attr;
-
-       kobj_attr = container_of(attr, struct kobj_attribute, attr);
-
-       return kobj_attr->show(k, kobj_attr, buf);
-}
-
-static struct sysfs_ops cache_desc_sysfs_ops = {
-       .show = cache_desc_show,
-};
-
-static struct kobj_type cache_desc_type = {
-       .release = cache_desc_release,
-       .sysfs_ops = &cache_desc_sysfs_ops,
-};
-
-static ssize_t cache_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
-       struct cache_desc *cache = kobj_to_cache_desc(k);
-
-       return sprintf(buf, "%uK\n", cache->size);
-}
-
-static struct kobj_attribute cache_size_attr =
-       __ATTR(size, 0444, cache_size_show, NULL);
-
-static ssize_t cache_line_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
-       struct cache_desc *cache = kobj_to_cache_desc(k);
-
-       return sprintf(buf, "%u\n", cache->line_size);
-}
-
-static struct kobj_attribute cache_line_size_attr =
-       __ATTR(coherency_line_size, 0444, cache_line_size_show, NULL);
-
-static ssize_t cache_nr_sets_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
-       struct cache_desc *cache = kobj_to_cache_desc(k);
-
-       return sprintf(buf, "%u\n", cache->nr_sets);
-}
-
-static struct kobj_attribute cache_nr_sets_attr =
-       __ATTR(number_of_sets, 0444, cache_nr_sets_show, NULL);
-
-static ssize_t cache_type_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
-       struct cache_desc *cache = kobj_to_cache_desc(k);
-
-       return sprintf(buf, "%s\n", cache->type);
-}
-
-static struct kobj_attribute cache_type_attr =
-       __ATTR(type, 0444, cache_type_show, NULL);
-
-static ssize_t cache_level_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
-       struct cache_desc *cache = kobj_to_cache_desc(k);
-
-       return sprintf(buf, "%u\n", cache->level);
-}
-
-static struct kobj_attribute cache_level_attr =
-       __ATTR(level, 0444, cache_level_show, NULL);
-
-static ssize_t cache_assoc_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
-       struct cache_desc *cache = kobj_to_cache_desc(k);
-
-       return sprintf(buf, "%u\n", cache->associativity);
-}
-
-static struct kobj_attribute cache_assoc_attr =
-       __ATTR(ways_of_associativity, 0444, cache_assoc_show, NULL);
-
-struct cache_desc_info {
-       const char *type;
-       const char *size_prop;
-       const char *line_size_prop;
-       const char *nr_sets_prop;
-};
-
-/* PowerPC Processor binding says the [di]-cache-* must be equal on
- * unified caches, so just use d-cache properties. */
-static struct cache_desc_info ucache_info = {
-       .type = "Unified",
-       .size_prop = "d-cache-size",
-       .line_size_prop = "d-cache-line-size",
-       .nr_sets_prop = "d-cache-sets",
-};
-
-static struct cache_desc_info dcache_info = {
-       .type = "Data",
-       .size_prop = "d-cache-size",
-       .line_size_prop = "d-cache-line-size",
-       .nr_sets_prop = "d-cache-sets",
-};
-
-static struct cache_desc_info icache_info = {
-       .type = "Instruction",
-       .size_prop = "i-cache-size",
-       .line_size_prop = "i-cache-line-size",
-       .nr_sets_prop = "i-cache-sets",
-};
-
-static struct cache_desc * __cpuinit create_cache_desc(struct device_node *np, struct kobject *parent, int index, int level, struct cache_desc_info *info)
-{
-       const u32 *cache_line_size;
-       struct cache_desc *new;
-       const u32 *cache_size;
-       const u32 *nr_sets;
-       int rc;
-
-       new = kzalloc(sizeof(*new), GFP_KERNEL);
-       if (!new)
-               return NULL;
-
-       rc = kobject_init_and_add(&new->kobj, &cache_desc_type, parent,
-                                 "index%d", index);
-       if (rc)
-               goto err;
-
-       /* type */
-       new->type = info->type;
-       rc = sysfs_create_file(&new->kobj, &cache_type_attr.attr);
-       WARN_ON(rc);
-
-       /* level */
-       new->level = level;
-       rc = sysfs_create_file(&new->kobj, &cache_level_attr.attr);
-       WARN_ON(rc);
-
-       /* size */
-       cache_size = of_get_property(np, info->size_prop, NULL);
-       if (cache_size) {
-               new->size = *cache_size / 1024;
-               rc = sysfs_create_file(&new->kobj,
-                                      &cache_size_attr.attr);
-               WARN_ON(rc);
-       }
-
-       /* coherency_line_size */
-       cache_line_size = of_get_property(np, info->line_size_prop, NULL);
-       if (cache_line_size) {
-               new->line_size = *cache_line_size;
-               rc = sysfs_create_file(&new->kobj,
-                                      &cache_line_size_attr.attr);
-               WARN_ON(rc);
-       }
-
-       /* number_of_sets */
-       nr_sets = of_get_property(np, info->nr_sets_prop, NULL);
-       if (nr_sets) {
-               new->nr_sets = *nr_sets;
-               rc = sysfs_create_file(&new->kobj,
-                                      &cache_nr_sets_attr.attr);
-               WARN_ON(rc);
-       }
-
-       /* ways_of_associativity */
-       if (new->nr_sets == 1) {
-               /* fully associative */
-               new->associativity = 0;
-               goto create_assoc;
-       }
-
-       if (new->nr_sets && new->size && new->line_size) {
-               /* If we have values for all of these we can derive
-                * the associativity. */
-               new->associativity =
-                       ((new->size * 1024) / new->nr_sets) / new->line_size;
-create_assoc:
-               rc = sysfs_create_file(&new->kobj,
-                                      &cache_assoc_attr.attr);
-               WARN_ON(rc);
-       }
-
-       return new;
-err:
-       kfree(new);
-       return NULL;
-}
-
-static bool cache_is_unified(struct device_node *np)
-{
-       return of_get_property(np, "cache-unified", NULL);
-}
-
-static struct cache_desc * __cpuinit create_cache_index_info(struct device_node *np, struct kobject *parent, int index, int level)
-{
-       struct device_node *next_cache;
-       struct cache_desc *new, **end;
-
-       pr_debug("%s(node = %s, index = %d)\n", __func__, np->full_name, index);
-
-       if (cache_is_unified(np)) {
-               new = create_cache_desc(np, parent, index, level,
-                                       &ucache_info);
-       } else {
-               new = create_cache_desc(np, parent, index, level,
-                                       &dcache_info);
-               if (new) {
-                       index++;
-                       new->next = create_cache_desc(np, parent, index, level,
-                                                     &icache_info);
-               }
-       }
-       if (!new)
-               return NULL;
-
-       end = &new->next;
-       while (*end)
-               end = &(*end)->next;
-
-       next_cache = of_find_next_cache_node(np);
-       if (!next_cache)
-               goto out;
-
-       *end = create_cache_index_info(next_cache, parent, ++index, ++level);
-
-       of_node_put(next_cache);
-out:
-       return new;
-}
-
-static void __cpuinit create_cache_info(struct sys_device *sysdev)
-{
-       struct kobject *cache_toplevel;
-       struct device_node *np = NULL;
-       int cpu = sysdev->id;
-
-       cache_toplevel = kobject_create_and_add("cache", &sysdev->kobj);
-       if (!cache_toplevel)
-               return;
-       per_cpu(cache_toplevel, cpu) = cache_toplevel;
-       np = of_get_cpu_node(cpu, NULL);
-       if (np != NULL) {
-               per_cpu(cache_desc, cpu) =
-                       create_cache_index_info(np, cache_toplevel, 0, 1);
-               of_node_put(np);
-       }
-       return;
-}
-
 static void __cpuinit register_cpu_online(unsigned int cpu)
 {
        struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -684,25 +407,10 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
                sysdev_create_file(s, &attr_dscr);
 #endif /* CONFIG_PPC64 */
 
-       create_cache_info(s);
+       cacheinfo_cpu_online(cpu);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void remove_cache_info(struct sys_device *sysdev)
-{
-       struct kobject *cache_toplevel;
-       struct cache_desc *cache_desc;
-       int cpu = sysdev->id;
-
-       cache_desc = per_cpu(cache_desc, cpu);
-       if (cache_desc != NULL)
-               kobject_put(&cache_desc->kobj);
-
-       cache_toplevel = per_cpu(cache_toplevel, cpu);
-       if (cache_toplevel != NULL)
-               kobject_put(cache_toplevel);
-}
-
 static void unregister_cpu_online(unsigned int cpu)
 {
        struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -769,7 +477,7 @@ static void unregister_cpu_online(unsigned int cpu)
                sysdev_remove_file(s, &attr_dscr);
 #endif /* CONFIG_PPC64 */
 
-       remove_cache_info(s);
+       cacheinfo_cpu_offline(cpu);
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
index 4314b39b6faf6c5c83f517c4bed95d00b5191123..ad123bced404a4cb56d467cdfbec04d761d9afef 100644 (file)
 #if defined(CONFIG_40x) || defined(CONFIG_8xx)
 static inline void _tlbil_all(void)
 {
-       asm volatile ("sync; tlbia; isync" : : : "memory")
+       asm volatile ("sync; tlbia; isync" : : : "memory");
 }
 static inline void _tlbil_pid(unsigned int pid)
 {
-       asm volatile ("sync; tlbia; isync" : : : "memory")
+       asm volatile ("sync; tlbia; isync" : : : "memory");
 }
 #else /* CONFIG_40x || CONFIG_8xx */
 extern void _tlbil_all(void);
@@ -47,7 +47,7 @@ extern void _tlbil_pid(unsigned int pid);
 #ifdef CONFIG_8xx
 static inline void _tlbil_va(unsigned long address, unsigned int pid)
 {
-       asm volatile ("tlbie %0; sync" : : "r" (address) : "memory")
+       asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
 }
 #else /* CONFIG_8xx */
 extern void _tlbil_va(unsigned long address, unsigned int pid);
index cf81049e1e51a0ce98d0ebdf9b7a74d5901de211..7393bd76d698406f85a70b423113c8e163afacae 100644 (file)
@@ -822,42 +822,50 @@ static void __init dump_numa_memory_topology(void)
  * required. nid is the preferred node and end is the physical address of
  * the highest address in the node.
  *
- * Returns the physical address of the memory.
+ * Returns the virtual address of the memory.
  */
-static void __init *careful_allocation(int nid, unsigned long size,
+static void __init *careful_zallocation(int nid, unsigned long size,
                                       unsigned long align,
                                       unsigned long end_pfn)
 {
+       void *ret;
        int new_nid;
-       unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
+       unsigned long ret_paddr;
+
+       ret_paddr = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
 
        /* retry over all memory */
-       if (!ret)
-               ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
+       if (!ret_paddr)
+               ret_paddr = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
 
-       if (!ret)
-               panic("numa.c: cannot allocate %lu bytes on node %d",
+       if (!ret_paddr)
+               panic("numa.c: cannot allocate %lu bytes for node %d",
                      size, nid);
 
+       ret = __va(ret_paddr);
+
        /*
-        * If the memory came from a previously allocated node, we must
-        * retry with the bootmem allocator.
+        * We initialize the nodes in numeric order: 0, 1, 2...
+        * and hand over control from the LMB allocator to the
+        * bootmem allocator.  If this function is called for
+        * node 5, then we know that all nodes <5 are using the
+        * bootmem allocator instead of the LMB allocator.
+        *
+        * So, check the nid from which this allocation came
+        * and double check to see if we need to use bootmem
+        * instead of the LMB.  We don't free the LMB memory
+        * since it would be useless.
         */
-       new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT);
+       new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
        if (new_nid < nid) {
-               ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid),
+               ret = __alloc_bootmem_node(NODE_DATA(new_nid),
                                size, align, 0);
 
-               if (!ret)
-                       panic("numa.c: cannot allocate %lu bytes on node %d",
-                             size, new_nid);
-
-               ret = __pa(ret);
-
-               dbg("alloc_bootmem %lx %lx\n", ret, size);
+               dbg("alloc_bootmem %p %lx\n", ret, size);
        }
 
-       return (void *)ret;
+       memset(ret, 0, size);
+       return ret;
 }
 
 static struct notifier_block __cpuinitdata ppc64_numa_nb = {
@@ -952,7 +960,7 @@ void __init do_init_bootmem(void)
 
        for_each_online_node(nid) {
                unsigned long start_pfn, end_pfn;
-               unsigned long bootmem_paddr;
+               void *bootmem_vaddr;
                unsigned long bootmap_pages;
 
                get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
@@ -964,11 +972,9 @@ void __init do_init_bootmem(void)
                 * previous nodes' bootmem to be initialized and have
                 * all reserved areas marked.
                 */
-               NODE_DATA(nid) = careful_allocation(nid,
+               NODE_DATA(nid) = careful_zallocation(nid,
                                        sizeof(struct pglist_data),
                                        SMP_CACHE_BYTES, end_pfn);
-               NODE_DATA(nid) = __va(NODE_DATA(nid));
-               memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
 
                dbg("node %d\n", nid);
                dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
@@ -984,20 +990,20 @@ void __init do_init_bootmem(void)
                dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
 
                bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
-               bootmem_paddr = (unsigned long)careful_allocation(nid,
+               bootmem_vaddr = careful_zallocation(nid,
                                        bootmap_pages << PAGE_SHIFT,
                                        PAGE_SIZE, end_pfn);
-               memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT);
 
-               dbg("bootmap_paddr = %lx\n", bootmem_paddr);
+               dbg("bootmap_vaddr = %p\n", bootmem_vaddr);
 
-               init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
+               init_bootmem_node(NODE_DATA(nid),
+                                 __pa(bootmem_vaddr) >> PAGE_SHIFT,
                                  start_pfn, end_pfn);
 
                free_bootmem_with_active_regions(nid, end_pfn);
                /*
                 * Be very careful about moving this around.  Future
-                * calls to careful_allocation() depend on this getting
+                * calls to careful_zallocation() depend on this getting
                 * done correctly.
                 */
                mark_reserved_regions_for_nid(nid);
index 38ff35f2142a5faa37aeb7947fb6f33ce34e9df5..22972cd83cc981b33cf8274f8e6e35e9b99989ad 100644 (file)
@@ -266,7 +266,8 @@ int map_page(unsigned long va, phys_addr_t pa, int flags)
                /* The PTE should never be already set nor present in the
                 * hash table
                 */
-               BUG_ON(pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE));
+               BUG_ON((pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE)) &&
+                      flags);
                set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT,
                                                     __pgprot(flags)));
        }
index 803a64c02b06ea3930d273a335db284d9b2f37f4..39ac22b13c73ddaa10a0379129e8b2c279f97c95 100644 (file)
@@ -189,8 +189,9 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
        smp_call_function(do_flush_tlb_mm_ipi, NULL, 1);
        _tlbil_pid(0);
        preempt_enable();
-#endif
+#else
        _tlbil_pid(0);
+#endif
 }
 EXPORT_SYMBOL(flush_tlb_kernel_range);
 
index 628009c01958fc261ed1d14b3ff6e1352fa9afa5..dfdbffa068182b8383590f67d789449ca4f73171 100644 (file)
@@ -79,7 +79,7 @@ struct spu_buffer {
  * the vma-to-fileoffset map.
  */
 struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
-                                            u64 objectid);
+                                            unsigned long objectid);
 unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
                            unsigned int vma, const struct spu *aSpu,
                            int *grd_val);
index ae7c34f37e1c2cca2d4b6885b61e24ddfa434386..98367a0255f34aee02c47b7d308ef7f03eaf6364 100644 (file)
@@ -42,7 +42,7 @@ static struct of_device_id mpc52xx_bus_ids[] __initdata = {
  * from interrupt context while node mapping (which calls ioremap())
  * cannot be used at such point.
  */
-static spinlock_t mpc52xx_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(mpc52xx_lock);
 static struct mpc52xx_gpt __iomem *mpc52xx_wdt;
 static struct mpc52xx_cdm __iomem *mpc52xx_cdm;
 
index a428f8d1ac80c0d1a8b16964f99286c6b36a5397..5177bdd2c62a8f7014d0f3bc216da1eff397a3de 100644 (file)
@@ -42,7 +42,7 @@ static void __init mpc831x_rdb_setup_arch(void)
        mpc831x_usb_cfg();
 }
 
-void __init mpc831x_rdb_init_IRQ(void)
+static void __init mpc831x_rdb_init_IRQ(void)
 {
        struct device_node *np;
 
index ec43477caa63f66b63d84f125106c6997fb6d8b7..ec0b401bc9cf1b9afbc1d2527ed7a36ed7c0f817 100644 (file)
@@ -49,8 +49,6 @@
 #define DBG(fmt...)
 #endif
 
-static u8 *bcsr_regs = NULL;
-
 /* ************************************************************************
  *
  * Setup the architecture
@@ -59,13 +57,14 @@ static u8 *bcsr_regs = NULL;
 static void __init mpc832x_sys_setup_arch(void)
 {
        struct device_node *np;
+       u8 __iomem *bcsr_regs = NULL;
 
        if (ppc_md.progress)
                ppc_md.progress("mpc832x_sys_setup_arch()", 0);
 
        /* Map BCSR area */
        np = of_find_node_by_name(NULL, "bcsr");
-       if (np != 0) {
+       if (np) {
                struct resource res;
 
                of_address_to_resource(np, 0, &res);
@@ -93,9 +92,9 @@ static void __init mpc832x_sys_setup_arch(void)
                        != NULL){
                /* Reset the Ethernet PHYs */
 #define BCSR8_FETH_RST 0x50
-               bcsr_regs[8] &= ~BCSR8_FETH_RST;
+               clrbits8(&bcsr_regs[8], BCSR8_FETH_RST);
                udelay(1000);
-               bcsr_regs[8] |= BCSR8_FETH_RST;
+               setbits8(&bcsr_regs[8], BCSR8_FETH_RST);
                iounmap(bcsr_regs);
                of_node_put(np);
        }
index 0300268ce5b844708c66ba54a3c3d494b02db164..2a1295f1983254c7a4083b65b83694ef83bf6129 100644 (file)
@@ -38,6 +38,7 @@
 #define DBG(fmt...)
 #endif
 
+#ifdef CONFIG_QUICC_ENGINE
 static void mpc83xx_spi_activate_cs(u8 cs, u8 polarity)
 {
        pr_debug("%s %d %d\n", __func__, cs, polarity);
@@ -77,8 +78,8 @@ static int __init mpc832x_spi_init(void)
                            mpc83xx_spi_activate_cs,
                            mpc83xx_spi_deactivate_cs);
 }
-
 machine_device_initcall(mpc832x_rdb, mpc832x_spi_init);
+#endif /* CONFIG_QUICC_ENGINE */
 
 /* ************************************************************************
  *
@@ -130,7 +131,7 @@ static int __init mpc832x_declare_of_platform_devices(void)
 }
 machine_device_initcall(mpc832x_rdb, mpc832x_declare_of_platform_devices);
 
-void __init mpc832x_rdb_init_IRQ(void)
+static void __init mpc832x_rdb_init_IRQ(void)
 {
 
        struct device_node *np;
index 9d46e5bdd101539454f3b9134a4728bcab4c2a34..09e9d6fb74115327a98167104f136ecb8961d196 100644 (file)
@@ -18,6 +18,7 @@
 
 #include <linux/stddef.h>
 #include <linux/kernel.h>
+#include <linux/compiler.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/reboot.h>
@@ -43,6 +44,7 @@
 #include <asm/udbg.h>
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
+#include <sysdev/simple_gpio.h>
 #include <asm/qe.h>
 #include <asm/qe_ic.h>
 
@@ -55,8 +57,6 @@
 #define DBG(fmt...)
 #endif
 
-static u8 *bcsr_regs = NULL;
-
 /* ************************************************************************
  *
  * Setup the architecture
@@ -65,13 +65,14 @@ static u8 *bcsr_regs = NULL;
 static void __init mpc836x_mds_setup_arch(void)
 {
        struct device_node *np;
+       u8 __iomem *bcsr_regs = NULL;
 
        if (ppc_md.progress)
                ppc_md.progress("mpc836x_mds_setup_arch()", 0);
 
        /* Map BCSR area */
        np = of_find_node_by_name(NULL, "bcsr");
-       if (np != 0) {
+       if (np) {
                struct resource res;
 
                of_address_to_resource(np, 0, &res);
@@ -93,6 +94,16 @@ static void __init mpc836x_mds_setup_arch(void)
 
                for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
                        par_io_of_config(np);
+#ifdef CONFIG_QE_USB
+               /* Must fixup Par IO before QE GPIO chips are registered. */
+               par_io_config_pin(1,  2, 1, 0, 3, 0); /* USBOE  */
+               par_io_config_pin(1,  3, 1, 0, 3, 0); /* USBTP  */
+               par_io_config_pin(1,  8, 1, 0, 1, 0); /* USBTN  */
+               par_io_config_pin(1, 10, 2, 0, 3, 0); /* USBRXD */
+               par_io_config_pin(1,  9, 2, 1, 3, 0); /* USBRP  */
+               par_io_config_pin(1, 11, 2, 1, 3, 0); /* USBRN  */
+               par_io_config_pin(2, 20, 2, 0, 1, 0); /* CLK21  */
+#endif /* CONFIG_QE_USB */
        }
 
        if ((np = of_find_compatible_node(NULL, "network", "ucc_geth"))
@@ -151,6 +162,70 @@ static int __init mpc836x_declare_of_platform_devices(void)
 }
 machine_device_initcall(mpc836x_mds, mpc836x_declare_of_platform_devices);
 
+#ifdef CONFIG_QE_USB
+static int __init mpc836x_usb_cfg(void)
+{
+       u8 __iomem *bcsr;
+       struct device_node *np;
+       const char *mode;
+       int ret = 0;
+
+       np = of_find_compatible_node(NULL, NULL, "fsl,mpc8360mds-bcsr");
+       if (!np)
+               return -ENODEV;
+
+       bcsr = of_iomap(np, 0);
+       of_node_put(np);
+       if (!bcsr)
+               return -ENOMEM;
+
+       np = of_find_compatible_node(NULL, NULL, "fsl,mpc8323-qe-usb");
+       if (!np) {
+               ret = -ENODEV;
+               goto err;
+       }
+
+#define BCSR8_TSEC1M_MASK      (0x3 << 6)
+#define BCSR8_TSEC1M_RGMII     (0x0 << 6)
+#define BCSR8_TSEC2M_MASK      (0x3 << 4)
+#define BCSR8_TSEC2M_RGMII     (0x0 << 4)
+       /*
+        * Default is GMII (2), but we should set it to RGMII (0) if we use
+        * USB (Eth PHY is in RGMII mode anyway).
+        */
+       clrsetbits_8(&bcsr[8], BCSR8_TSEC1M_MASK | BCSR8_TSEC2M_MASK,
+                              BCSR8_TSEC1M_RGMII | BCSR8_TSEC2M_RGMII);
+
+#define BCSR13_USBMASK 0x0f
+#define BCSR13_nUSBEN  0x08 /* 1 - Disable, 0 - Enable                 */
+#define BCSR13_USBSPEED        0x04 /* 1 - Full, 0 - Low                       */
+#define BCSR13_USBMODE 0x02 /* 1 - Host, 0 - Function                  */
+#define BCSR13_nUSBVCC 0x01 /* 1 - gets VBUS, 0 - supplies VBUS        */
+
+       clrsetbits_8(&bcsr[13], BCSR13_USBMASK, BCSR13_USBSPEED);
+
+       mode = of_get_property(np, "mode", NULL);
+       if (mode && !strcmp(mode, "peripheral")) {
+               setbits8(&bcsr[13], BCSR13_nUSBVCC);
+               qe_usb_clock_set(QE_CLK21, 48000000);
+       } else {
+               setbits8(&bcsr[13], BCSR13_USBMODE);
+               /*
+                * The BCSR GPIOs are used to control power and
+                * speed of the USB transceiver. This is needed for
+                * the USB Host only.
+                */
+               simple_gpiochip_init("fsl,mpc8360mds-bcsr-gpio");
+       }
+
+       of_node_put(np);
+err:
+       iounmap(bcsr);
+       return ret;
+}
+machine_arch_initcall(mpc836x_mds, mpc836x_usb_cfg);
+#endif /* CONFIG_QE_USB */
+
 static void __init mpc836x_mds_init_IRQ(void)
 {
        struct device_node *np;
index a5273bb28e1bcaa23d5fd19b3f2342d5b056adbb..b0090aac9642283ba4c62a00789edd92baab8ce7 100644 (file)
@@ -51,8 +51,9 @@ static void __init mpc836x_rdk_setup_arch(void)
        for_each_compatible_node(np, "pci", "fsl,mpc8349-pci")
                mpc83xx_add_bridge(np);
 #endif
-
+#ifdef CONFIG_QUICC_ENGINE
        qe_reset();
+#endif
 }
 
 static void __init mpc836x_rdk_init_IRQ(void)
@@ -71,13 +72,14 @@ static void __init mpc836x_rdk_init_IRQ(void)
         */
        ipic_set_default_priority();
        of_node_put(np);
-
+#ifdef CONFIG_QUICC_ENGINE
        np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
        if (!np)
                return;
 
        qe_ic_init(np, 0, qe_ic_cascade_low_ipic, qe_ic_cascade_high_ipic);
        of_node_put(np);
+#endif
 }
 
 /*
index 8bb13c807142c9155940da690e26cdc6b8de48cc..530ef990ca7c778ff72cb2bc2342498d594da407 100644 (file)
@@ -26,7 +26,6 @@
 #define BCSR12_USB_SER_MASK    0x8a
 #define BCSR12_USB_SER_PIN     0x80
 #define BCSR12_USB_SER_DEVICE  0x02
-extern int mpc837x_usb_cfg(void);
 
 static int mpc837xmds_usb_cfg(void)
 {
index da030afa2e2cd92039c82fe34f890cfb46f132c4..1d096545322b04ceecdbec07088affb82d92aa9c 100644 (file)
@@ -21,8 +21,6 @@
 
 #include "mpc83xx.h"
 
-extern int mpc837x_usb_cfg(void);
-
 /* ************************************************************************
  *
  * Setup the architecture
index 2a7cbabb410a89ead86b06b481eedd8b3673a8ba..83cfe51526ec26a5219ccb458dc0e56c1e996538 100644 (file)
@@ -61,6 +61,7 @@
 
 extern void mpc83xx_restart(char *cmd);
 extern long mpc83xx_time_init(void);
+extern int mpc837x_usb_cfg(void);
 extern int mpc834x_usb_cfg(void);
 extern int mpc831x_usb_cfg(void);
 
index a8301c8ad5376bb6487f7f968de2f70a3565f91a..7326d904202c1e8ec8bac7e6a52382a3e141b51d 100644 (file)
@@ -148,6 +148,9 @@ static int mpc85xx_exclude_device(struct pci_controller *hose,
 /*
  * Setup the architecture
  */
+#ifdef CONFIG_SMP
+extern void __init mpc85xx_smp_init(void);
+#endif
 static void __init mpc85xx_ds_setup_arch(void)
 {
 #ifdef CONFIG_PCI
@@ -173,6 +176,10 @@ static void __init mpc85xx_ds_setup_arch(void)
        ppc_md.pci_exclude_device = mpc85xx_exclude_device;
 #endif
 
+#ifdef CONFIG_SMP
+       mpc85xx_smp_init();
+#endif
+
        printk("MPC85xx DS board from Freescale Semiconductor\n");
 }
 
index d652c713f496040f1011d87be69351866bd2140f..79a0df17078bbb2e141d29035994d5cfa40fd600 100644 (file)
@@ -58,6 +58,7 @@ smp_85xx_kick_cpu(int nr)
 
        if (cpu_rel_addr == NULL) {
                printk(KERN_ERR "No cpu-release-addr for cpu %d\n", nr);
+               local_irq_restore(flags);
                return;
        }
 
index 47e956c871fe8cfc3aea613e5a1552771eccb9b4..47fe2bea9865db32befbbdff10e8a14d2325de19 100644 (file)
@@ -312,4 +312,15 @@ config MPC8xxx_GPIO
          Say Y here if you're going to use hardware that connects to the
          MPC831x/834x/837x/8572/8610 GPIOs.
 
+config SIMPLE_GPIO
+       bool "Support for simple, memory-mapped GPIO controllers"
+       depends on PPC
+       select GENERIC_GPIO
+       select ARCH_REQUIRE_GPIOLIB
+       help
+         Say Y here to support simple, memory-mapped GPIO controllers.
+         These are usually BCSRs used to control board's switches, LEDs,
+         chip-selects, Ethernet/USB PHY's power and various other small
+         on-board peripherals.
+
 endmenu
index 3d0c776f888d83f96d666b789f7595f2ad6a2dbc..e868b5c50723d348cbf2357e8adb3c9ec28998b5 100644 (file)
@@ -231,7 +231,7 @@ config VIRT_CPU_ACCOUNTING
          If in doubt, say Y here.
 
 config SMP
-       depends on PPC_STD_MMU
+       depends on PPC_STD_MMU || FSL_BOOKE
        bool "Symmetric multi-processing support"
        ---help---
          This enables support for systems with more than one CPU. If you have
index 2e67bd840e011f6b10a284441c83ea2ac467dde7..35b1ec492715613becadbfad8680c6f2fc479117 100644 (file)
@@ -44,8 +44,8 @@ static DEFINE_SPINLOCK(beat_htab_lock);
 
 static inline unsigned int beat_read_mask(unsigned hpte_group)
 {
-       unsigned long hpte_v[5];
        unsigned long rmask = 0;
+       u64 hpte_v[5];
 
        beat_read_htab_entries(0, hpte_group + 0, hpte_v);
        if (!(hpte_v[0] & HPTE_V_BOLTED))
@@ -93,8 +93,7 @@ static long beat_lpar_hpte_insert(unsigned long hpte_group,
                                  int psize, int ssize)
 {
        unsigned long lpar_rc;
-       unsigned long slot;
-       unsigned long hpte_v, hpte_r;
+       u64 hpte_v, hpte_r, slot;
 
        /* same as iseries */
        if (vflags & HPTE_V_SECONDARY)
@@ -153,8 +152,9 @@ static long beat_lpar_hpte_remove(unsigned long hpte_group)
 
 static unsigned long beat_lpar_hpte_getword0(unsigned long slot)
 {
-       unsigned long dword0, dword[5];
+       unsigned long dword0;
        unsigned long lpar_rc;
+       u64 dword[5];
 
        lpar_rc = beat_read_htab_entries(0, slot & ~3UL, dword);
 
@@ -170,7 +170,7 @@ static void beat_lpar_hptab_clear(void)
        unsigned long size_bytes = 1UL << ppc64_pft_size;
        unsigned long hpte_count = size_bytes >> 4;
        int i;
-       unsigned long dummy0, dummy1;
+       u64 dummy0, dummy1;
 
        /* TODO: Use bulk call */
        for (i = 0; i < hpte_count; i++)
@@ -189,7 +189,8 @@ static long beat_lpar_hpte_updatepp(unsigned long slot,
                                    int psize, int ssize, int local)
 {
        unsigned long lpar_rc;
-       unsigned long dummy0, dummy1, want_v;
+       u64 dummy0, dummy1;
+       unsigned long want_v;
 
        want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
 
@@ -255,7 +256,8 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
                                          unsigned long ea,
                                          int psize, int ssize)
 {
-       unsigned long lpar_rc, slot, vsid, va, dummy0, dummy1;
+       unsigned long lpar_rc, slot, vsid, va;
+       u64 dummy0, dummy1;
 
        vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
        va = (vsid << 28) | (ea & 0x0fffffff);
@@ -276,7 +278,7 @@ static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
 {
        unsigned long want_v;
        unsigned long lpar_rc;
-       unsigned long dummy1, dummy2;
+       u64 dummy1, dummy2;
        unsigned long flags;
 
        DBG_LOW("    inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
@@ -315,8 +317,7 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
                                  int psize, int ssize)
 {
        unsigned long lpar_rc;
-       unsigned long slot;
-       unsigned long hpte_v, hpte_r;
+       u64 hpte_v, hpte_r, slot;
 
        /* same as iseries */
        if (vflags & HPTE_V_SECONDARY)
index 6b418f6b61750ec6f94e8f4fc89606696f6ab1f9..350735bc88882fe7668d33841e2f354f7773d01b 100644 (file)
@@ -40,8 +40,8 @@ static void udbg_putc_beat(char c)
 }
 
 /* Buffered chars getc */
-static long inbuflen;
-static long inbuf[2];  /* must be 2 longs */
+static u64 inbuflen;
+static u64 inbuf[2];   /* must be 2 u64s */
 
 static int udbg_getc_poll_beat(void)
 {
index 70fa7aef5edd9a6404f0fdc7b58e57565903821e..20472e487b6f713f5cd6018420005e519ec3b3ba 100644 (file)
@@ -54,7 +54,7 @@ int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode)
 {
        struct cbe_pmd_regs __iomem *pmd_regs;
        struct cbe_mic_tm_regs __iomem *mic_tm_regs;
-       u64 flags;
+       unsigned long flags;
        u64 value;
 #ifdef DEBUG
        long time;
index 2d5bb22d6c092419cd4582bdbdd8f364135db833..28c04dab263350110055b2883a916355c39deba6 100644 (file)
@@ -148,7 +148,7 @@ static unsigned int iic_get_irq(void)
 
        iic = &__get_cpu_var(iic);
        *(unsigned long *) &pending =
-               in_be64((unsigned long __iomem *) &iic->regs->pending_destr);
+               in_be64((u64 __iomem *) &iic->regs->pending_destr);
        if (!(pending.flags & CBE_IIC_IRQ_VALID))
                return NO_IRQ;
        virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending));
index b5f84e8f089990756daa13ca1101a9ccc4db74d6..059cad6c3f694864901ce067d2409fc33ce6d6f8 100644 (file)
@@ -130,14 +130,14 @@ static const struct ppc_pci_io __devinitconst iowa_pci_io = {
 
 };
 
-static void __iomem *iowa_ioremap(unsigned long addr, unsigned long size,
+static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
                                                unsigned long flags)
 {
        struct iowa_bus *bus;
        void __iomem *res = __ioremap(addr, size, flags);
        int busno;
 
-       bus = iowa_pci_find(0, addr);
+       bus = iowa_pci_find(0, (unsigned long)addr);
        if (bus != NULL) {
                busno = bus - iowa_busses;
                PCI_SET_ADDR_TOKEN(res, busno + 1);
index 86db4dd170a0e42d89e26e0f7d20c7f388bf355d..88d94b59a7cb9ff7b4d4f8f301c1ef7c9739f548 100644 (file)
@@ -150,8 +150,8 @@ static int cbe_nr_iommus;
 static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
                long n_ptes)
 {
-       unsigned long __iomem *reg;
-       unsigned long val;
+       u64 __iomem *reg;
+       u64 val;
        long n;
 
        reg = iommu->xlate_regs + IOC_IOPT_CacheInvd;
index 15c62d3ca129f55d6ebfe56fe7b93fcb8939f42c..3bf908e2873adb9216f48df40e0481787fb4bab3 100644 (file)
@@ -314,7 +314,7 @@ extern char *isolated_loader;
  *     we need to call spu_release(ctx) before sleeping, and
  *     then spu_acquire(ctx) when awoken.
  *
- *     Returns with state_mutex re-acquired when successfull or
+ *     Returns with state_mutex re-acquired when successful or
  *     with -ERESTARTSYS and the state_mutex dropped when interrupted.
  */
 
index ed3753d8c1090b5108fd792844b0e91835135be0..7ddd0a2c80276e3538088dc6e290a881285e2788 100644 (file)
@@ -10,18 +10,21 @@ menu "iSeries device drivers"
 config VIODASD
        tristate "iSeries Virtual I/O disk support"
        depends on BLOCK
+       select VIOPATH
        help
          If you are running on an iSeries system and you want to use
          virtual disks created and managed by OS/400, say Y.
 
 config VIOCD
        tristate "iSeries Virtual I/O CD support"
+       select VIOPATH
        help
          If you are running Linux on an IBM iSeries system and you want to
          read a CD drive owned by OS/400, say Y here.
 
 config VIOTAPE
        tristate "iSeries Virtual Tape Support"
+       select VIOPATH
        help
          If you are running Linux on an iSeries system and you want Linux
          to read and/or write a tape drive owned by OS/400, say Y here.
@@ -30,5 +33,3 @@ endmenu
 
 config VIOPATH
        bool
-       depends on VIODASD || VIOCD || VIOTAPE || ISERIES_VETH
-       default y
index 70b688c1aefb9f7512c74d0d20d1583454328c70..24519b96d6ad07b22bf1d64cd419fb603076aa61 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/string.h>
 #include <linux/seq_file.h>
 #include <linux/kdev_t.h>
+#include <linux/kexec.h>
 #include <linux/major.h>
 #include <linux/root_dev.h>
 #include <linux/kernel.h>
@@ -638,6 +639,13 @@ static int __init iseries_probe(void)
        return 1;
 }
 
+#ifdef CONFIG_KEXEC
+static int iseries_kexec_prepare(struct kimage *image)
+{
+       return -ENOSYS;
+}
+#endif
+
 define_machine(iseries) {
        .name                   = "iSeries",
        .setup_arch             = iSeries_setup_arch,
@@ -658,6 +666,9 @@ define_machine(iseries) {
        .probe                  = iseries_probe,
        .ioremap                = iseries_ioremap,
        .iounmap                = iseries_iounmap,
+#ifdef CONFIG_KEXEC
+       .machine_kexec_prepare  = iseries_kexec_prepare,
+#endif
        /* XXX Implement enable_pmcs for iSeries */
 };
 
index 58556b028a4c142b9886fe78c48b69a752c8fd2c..86db47c1b6656571f9c9ce3af315fc88eb727085 100644 (file)
@@ -112,7 +112,7 @@ static int get_gizmo_latency(void)
 
 static void set_astate(int cpu, unsigned int astate)
 {
-       u64 flags;
+       unsigned long flags;
 
        /* Return if called before init has run */
        if (unlikely(!sdcasr_mapbase))
index 217af321b0ca4d95b1c9aa2a9a39afbe68d98c7f..a6152d92224304bc20da05b3e5f44550956e376a 100644 (file)
@@ -509,7 +509,7 @@ fallback:
  */
 int pasemi_dma_init(void)
 {
-       static spinlock_t init_lock = SPIN_LOCK_UNLOCKED;
+       static DEFINE_SPINLOCK(init_lock);
        struct pci_dev *iob_pdev;
        struct pci_dev *pdev;
        struct resource res;
index 54b7b76ed4f090242668a145ac8199afac7c80a5..04cdd32624d40b08ffac327f44e9d1124b58e41f 100644 (file)
@@ -661,6 +661,7 @@ static void __init init_second_ohare(void)
                        pci_find_hose_for_OF_device(np);
                if (!hose) {
                        printk(KERN_ERR "Can't find PCI hose for OHare2 !\n");
+                       of_node_put(np);
                        return;
                }
                early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd);
@@ -669,6 +670,7 @@ static void __init init_second_ohare(void)
                early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd);
        }
        has_second_ohare = 1;
+       of_node_put(np);
 }
 
 /*
index 59eb840d8ce20782c7cf5809dc46356e2651244c..1810e4226e5615543999e7242a8c467991707907 100644 (file)
@@ -265,12 +265,15 @@ int __init via_calibrate_decr(void)
        struct resource rsrc;
 
        vias = of_find_node_by_name(NULL, "via-cuda");
-       if (vias == 0)
+       if (vias == NULL)
                vias = of_find_node_by_name(NULL, "via-pmu");
-       if (vias == 0)
+       if (vias == NULL)
                vias = of_find_node_by_name(NULL, "via");
-       if (vias == 0 || of_address_to_resource(vias, 0, &rsrc))
+       if (vias == NULL || of_address_to_resource(vias, 0, &rsrc)) {
+               of_node_put(vias);
                return 0;
+       }
+       of_node_put(vias);
        via = ioremap(rsrc.start, rsrc.end - rsrc.start + 1);
        if (via == NULL) {
                printk(KERN_ERR "Failed to map VIA for timer calibration !\n");
@@ -297,7 +300,7 @@ int __init via_calibrate_decr(void)
        ppc_tb_freq = (dstart - dend) * 100 / 6;
 
        iounmap(via);
-       
+
        return 1;
 }
 #endif
index dbc124e056461038c3620d8cf92a60ed3008bbce..ca71a12b764c3cdbf2b3834aac3354212301e262 100644 (file)
@@ -518,6 +518,41 @@ fail_device_register:
        return result;
 }
 
+static int __init ps3_register_ramdisk_device(void)
+{
+       int result;
+       struct layout {
+               struct ps3_system_bus_device dev;
+       } *p;
+
+       pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+       p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+       if (!p)
+               return -ENOMEM;
+
+       p->dev.match_id = PS3_MATCH_ID_GPU;
+       p->dev.match_sub_id = PS3_MATCH_SUB_ID_GPU_RAMDISK;
+       p->dev.dev_type = PS3_DEVICE_TYPE_IOC0;
+
+       result = ps3_system_bus_device_register(&p->dev);
+
+       if (result) {
+               pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+                       __func__, __LINE__);
+               goto fail_device_register;
+       }
+
+       pr_debug(" <- %s:%d\n", __func__, __LINE__);
+       return 0;
+
+fail_device_register:
+       kfree(p);
+       pr_debug(" <- %s:%d failed\n", __func__, __LINE__);
+       return result;
+}
+
 /**
  * ps3_setup_dynamic_device - Setup a dynamic device from the repository
  */
@@ -946,6 +981,8 @@ static int __init ps3_register_devices(void)
 
        ps3_register_lpm_devices();
 
+       ps3_register_ramdisk_device();
+
        pr_debug(" <- %s:%d\n", __func__, __LINE__);
        return 0;
 }
index 5afce115ab1f8533d25d7d3c3a8e4ff304ec51e0..b33b28a6fe1235d97c837cb9615cb7e4bf0464bf 100644 (file)
@@ -17,6 +17,7 @@ obj-$(CONFIG_FSL_PCI)         += fsl_pci.o $(fsl-msi-obj-y)
 obj-$(CONFIG_FSL_LBC)          += fsl_lbc.o
 obj-$(CONFIG_FSL_GTM)          += fsl_gtm.o
 obj-$(CONFIG_MPC8xxx_GPIO)     += mpc8xxx_gpio.o
+obj-$(CONFIG_SIMPLE_GPIO)      += simple_gpio.o
 obj-$(CONFIG_RAPIDIO)          += fsl_rio.o
 obj-$(CONFIG_TSI108_BRIDGE)    += tsi108_pci.o tsi108_dev.o
 obj-$(CONFIG_QUICC_ENGINE)     += qe_lib/
index d5f9ae0f1b75e7c4612a2beee8ff28174ac3be9b..f611d0369cc8ceb275cc45a94ca04451dbe84950 100644 (file)
@@ -29,7 +29,8 @@
 
 #if defined(CONFIG_PPC_85xx) || defined(CONFIG_PPC_86xx)
 /* atmu setup for fsl pci/pcie controller */
-void __init setup_pci_atmu(struct pci_controller *hose, struct resource *rsrc)
+static void __init setup_pci_atmu(struct pci_controller *hose,
+                                 struct resource *rsrc)
 {
        struct ccsr_pci __iomem *pci;
        int i;
@@ -86,7 +87,7 @@ void __init setup_pci_atmu(struct pci_controller *hose, struct resource *rsrc)
        out_be32(&pci->piw[2].piwar, PIWAR_2G);
 }
 
-void __init setup_pci_cmd(struct pci_controller *hose)
+static void __init setup_pci_cmd(struct pci_controller *hose)
 {
        u16 cmd;
        int cap_x;
@@ -130,7 +131,7 @@ static void __init quirk_fsl_pcie_header(struct pci_dev *dev)
        return ;
 }
 
-int __init fsl_pcie_check_link(struct pci_controller *hose)
+static int __init fsl_pcie_check_link(struct pci_controller *hose)
 {
        u32 val;
        early_read_config_dword(hose, 0, 0, PCIE_LTSSM, &val);
index 60f7f227327ce04dbb0fc81c82e424bfbfcdb2a8..9c744e4285a023c21fe7d7e12876e4443fc43a51 100644 (file)
@@ -5,8 +5,13 @@
 #include <asm/mmu.h>
 
 extern phys_addr_t get_immrbase(void);
+#if defined(CONFIG_CPM2) || defined(CONFIG_QUICC_ENGINE) || defined(CONFIG_8xx)
 extern u32 get_brgfreq(void);
 extern u32 get_baudrate(void);
+#else
+static inline u32 get_brgfreq(void) { return -1; }
+static inline u32 get_baudrate(void) { return -1; }
+#endif
 extern u32 fsl_get_sys_freq(void);
 
 struct spi_board_info;
index 76ffbc48d4b949f410e611b15d6550a647bdb30b..41ac3dfac98e1e85dabb16a01693984b7ba70414 100644 (file)
@@ -22,5 +22,6 @@ config UCC
 
 config QE_USB
        bool
+       default y if USB_GADGET_FSL_QE
        help
-         QE USB Host Controller support
+         QE USB Controller support
index 8e5a0bc36d0be40c1da806b91cb3a721348e84d6..3485288dce31bc6e0b4ad79aa96444ce57bf9afb 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
+#include <linux/err.h>
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_gpio.h>
@@ -24,8 +25,14 @@ struct qe_gpio_chip {
        struct of_mm_gpio_chip mm_gc;
        spinlock_t lock;
 
+       unsigned long pin_flags[QE_PIO_PINS];
+#define QE_PIN_REQUESTED 0
+
        /* shadowed data register to clear/set bits safely */
        u32 cpdata;
+
+       /* saved_regs used to restore dedicated functions */
+       struct qe_pio_regs saved_regs;
 };
 
 static inline struct qe_gpio_chip *
@@ -40,6 +47,12 @@ static void qe_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
        struct qe_pio_regs __iomem *regs = mm_gc->regs;
 
        qe_gc->cpdata = in_be32(&regs->cpdata);
+       qe_gc->saved_regs.cpdata = qe_gc->cpdata;
+       qe_gc->saved_regs.cpdir1 = in_be32(&regs->cpdir1);
+       qe_gc->saved_regs.cpdir2 = in_be32(&regs->cpdir2);
+       qe_gc->saved_regs.cppar1 = in_be32(&regs->cppar1);
+       qe_gc->saved_regs.cppar2 = in_be32(&regs->cppar2);
+       qe_gc->saved_regs.cpodr = in_be32(&regs->cpodr);
 }
 
 static int qe_gpio_get(struct gpio_chip *gc, unsigned int gpio)
@@ -103,6 +116,188 @@ static int qe_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
        return 0;
 }
 
+struct qe_pin {
+       /*
+        * The qe_gpio_chip name is unfortunate, we should change that to
+        * something like qe_pio_controller. Someday.
+        */
+       struct qe_gpio_chip *controller;
+       int num;
+};
+
+/**
+ * qe_pin_request - Request a QE pin
+ * @np:                device node to get a pin from
+ * @index:     index of a pin in the device tree
+ * Context:    non-atomic
+ *
+ * This function return qe_pin so that you could use it with the rest of
+ * the QE Pin Multiplexing API.
+ */
+struct qe_pin *qe_pin_request(struct device_node *np, int index)
+{
+       struct qe_pin *qe_pin;
+       struct device_node *gc;
+       struct of_gpio_chip *of_gc = NULL;
+       struct of_mm_gpio_chip *mm_gc;
+       struct qe_gpio_chip *qe_gc;
+       int err;
+       int size;
+       const void *gpio_spec;
+       const u32 *gpio_cells;
+       unsigned long flags;
+
+       qe_pin = kzalloc(sizeof(*qe_pin), GFP_KERNEL);
+       if (!qe_pin) {
+               pr_debug("%s: can't allocate memory\n", __func__);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       err = of_parse_phandles_with_args(np, "gpios", "#gpio-cells", index,
+                                         &gc, &gpio_spec);
+       if (err) {
+               pr_debug("%s: can't parse gpios property\n", __func__);
+               goto err0;
+       }
+
+       if (!of_device_is_compatible(gc, "fsl,mpc8323-qe-pario-bank")) {
+               pr_debug("%s: tried to get a non-qe pin\n", __func__);
+               err = -EINVAL;
+               goto err1;
+       }
+
+       of_gc = gc->data;
+       if (!of_gc) {
+               pr_debug("%s: gpio controller %s isn't registered\n",
+                        np->full_name, gc->full_name);
+               err = -ENODEV;
+               goto err1;
+       }
+
+       gpio_cells = of_get_property(gc, "#gpio-cells", &size);
+       if (!gpio_cells || size != sizeof(*gpio_cells) ||
+                       *gpio_cells != of_gc->gpio_cells) {
+               pr_debug("%s: wrong #gpio-cells for %s\n",
+                        np->full_name, gc->full_name);
+               err = -EINVAL;
+               goto err1;
+       }
+
+       err = of_gc->xlate(of_gc, np, gpio_spec, NULL);
+       if (err < 0)
+               goto err1;
+
+       mm_gc = to_of_mm_gpio_chip(&of_gc->gc);
+       qe_gc = to_qe_gpio_chip(mm_gc);
+
+       spin_lock_irqsave(&qe_gc->lock, flags);
+
+       if (test_and_set_bit(QE_PIN_REQUESTED, &qe_gc->pin_flags[err]) == 0) {
+               qe_pin->controller = qe_gc;
+               qe_pin->num = err;
+               err = 0;
+       } else {
+               err = -EBUSY;
+       }
+
+       spin_unlock_irqrestore(&qe_gc->lock, flags);
+
+       if (!err)
+               return qe_pin;
+err1:
+       of_node_put(gc);
+err0:
+       kfree(qe_pin);
+       pr_debug("%s failed with status %d\n", __func__, err);
+       return ERR_PTR(err);
+}
+EXPORT_SYMBOL(qe_pin_request);
+
+/**
+ * qe_pin_free - Free a pin
+ * @qe_pin:    pointer to the qe_pin structure
+ * Context:    any
+ *
+ * This function frees the qe_pin structure and makes a pin available
+ * for further qe_pin_request() calls.
+ */
+void qe_pin_free(struct qe_pin *qe_pin)
+{
+       struct qe_gpio_chip *qe_gc = qe_pin->controller;
+       unsigned long flags;
+       const int pin = qe_pin->num;
+
+       spin_lock_irqsave(&qe_gc->lock, flags);
+       test_and_clear_bit(QE_PIN_REQUESTED, &qe_gc->pin_flags[pin]);
+       spin_unlock_irqrestore(&qe_gc->lock, flags);
+
+       kfree(qe_pin);
+}
+EXPORT_SYMBOL(qe_pin_free);
+
+/**
+ * qe_pin_set_dedicated - Revert a pin to a dedicated peripheral function mode
+ * @qe_pin:    pointer to the qe_pin structure
+ * Context:    any
+ *
+ * This function resets a pin to a dedicated peripheral function that
+ * has been set up by the firmware.
+ */
+void qe_pin_set_dedicated(struct qe_pin *qe_pin)
+{
+       struct qe_gpio_chip *qe_gc = qe_pin->controller;
+       struct qe_pio_regs __iomem *regs = qe_gc->mm_gc.regs;
+       struct qe_pio_regs *sregs = &qe_gc->saved_regs;
+       int pin = qe_pin->num;
+       u32 mask1 = 1 << (QE_PIO_PINS - (pin + 1));
+       u32 mask2 = 0x3 << (QE_PIO_PINS - (pin % (QE_PIO_PINS / 2) + 1) * 2);
+       bool second_reg = pin > (QE_PIO_PINS / 2) - 1;
+       unsigned long flags;
+
+       spin_lock_irqsave(&qe_gc->lock, flags);
+
+       if (second_reg) {
+               clrsetbits_be32(&regs->cpdir2, mask2, sregs->cpdir2 & mask2);
+               clrsetbits_be32(&regs->cppar2, mask2, sregs->cppar2 & mask2);
+       } else {
+               clrsetbits_be32(&regs->cpdir1, mask2, sregs->cpdir1 & mask2);
+               clrsetbits_be32(&regs->cppar1, mask2, sregs->cppar1 & mask2);
+       }
+
+       if (sregs->cpdata & mask1)
+               qe_gc->cpdata |= mask1;
+       else
+               qe_gc->cpdata &= ~mask1;
+
+       out_be32(&regs->cpdata, qe_gc->cpdata);
+       clrsetbits_be32(&regs->cpodr, mask1, sregs->cpodr & mask1);
+
+       spin_unlock_irqrestore(&qe_gc->lock, flags);
+}
+EXPORT_SYMBOL(qe_pin_set_dedicated);
+
+/**
+ * qe_pin_set_gpio - Set a pin to the GPIO mode
+ * @qe_pin:    pointer to the qe_pin structure
+ * Context:    any
+ *
+ * This function sets a pin to the GPIO mode.
+ */
+void qe_pin_set_gpio(struct qe_pin *qe_pin)
+{
+       struct qe_gpio_chip *qe_gc = qe_pin->controller;
+       struct qe_pio_regs __iomem *regs = qe_gc->mm_gc.regs;
+       unsigned long flags;
+
+       spin_lock_irqsave(&qe_gc->lock, flags);
+
+       /* Let's make it input by default, GPIO API is able to change that. */
+       __par_io_config_pin(regs, qe_pin->num, QE_PIO_DIR_IN, 0, 0, 0);
+
+       spin_unlock_irqrestore(&qe_gc->lock, flags);
+}
+EXPORT_SYMBOL(qe_pin_set_gpio);
+
 static int __init qe_add_gpiochips(void)
 {
        struct device_node *np;
diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
new file mode 100644 (file)
index 0000000..43c4569
--- /dev/null
@@ -0,0 +1,155 @@
+/*
+ * Simple Memory-Mapped GPIOs
+ *
+ * Copyright (c) MontaVista Software, Inc. 2008.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/gpio.h>
+#include <asm/prom.h>
+#include "simple_gpio.h"
+
+struct u8_gpio_chip {
+       struct of_mm_gpio_chip mm_gc;
+       spinlock_t lock;
+
+       /* shadowed data register to clear/set bits safely */
+       u8 data;
+};
+
+static struct u8_gpio_chip *to_u8_gpio_chip(struct of_mm_gpio_chip *mm_gc)
+{
+       return container_of(mm_gc, struct u8_gpio_chip, mm_gc);
+}
+
+static u8 u8_pin2mask(unsigned int pin)
+{
+       return 1 << (8 - 1 - pin);
+}
+
+static int u8_gpio_get(struct gpio_chip *gc, unsigned int gpio)
+{
+       struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+
+       return in_8(mm_gc->regs) & u8_pin2mask(gpio);
+}
+
+static void u8_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+       struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+       struct u8_gpio_chip *u8_gc = to_u8_gpio_chip(mm_gc);
+       unsigned long flags;
+
+       spin_lock_irqsave(&u8_gc->lock, flags);
+
+       if (val)
+               u8_gc->data |= u8_pin2mask(gpio);
+       else
+               u8_gc->data &= ~u8_pin2mask(gpio);
+
+       out_8(mm_gc->regs, u8_gc->data);
+
+       spin_unlock_irqrestore(&u8_gc->lock, flags);
+}
+
+static int u8_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
+{
+       return 0;
+}
+
+static int u8_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+       u8_gpio_set(gc, gpio, val);
+       return 0;
+}
+
+static void u8_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
+{
+       struct u8_gpio_chip *u8_gc = to_u8_gpio_chip(mm_gc);
+
+       u8_gc->data = in_8(mm_gc->regs);
+}
+
+static int __init u8_simple_gpiochip_add(struct device_node *np)
+{
+       int ret;
+       struct u8_gpio_chip *u8_gc;
+       struct of_mm_gpio_chip *mm_gc;
+       struct of_gpio_chip *of_gc;
+       struct gpio_chip *gc;
+
+       u8_gc = kzalloc(sizeof(*u8_gc), GFP_KERNEL);
+       if (!u8_gc)
+               return -ENOMEM;
+
+       spin_lock_init(&u8_gc->lock);
+
+       mm_gc = &u8_gc->mm_gc;
+       of_gc = &mm_gc->of_gc;
+       gc = &of_gc->gc;
+
+       mm_gc->save_regs = u8_gpio_save_regs;
+       of_gc->gpio_cells = 2;
+       gc->ngpio = 8;
+       gc->direction_input = u8_gpio_dir_in;
+       gc->direction_output = u8_gpio_dir_out;
+       gc->get = u8_gpio_get;
+       gc->set = u8_gpio_set;
+
+       ret = of_mm_gpiochip_add(np, mm_gc);
+       if (ret)
+               goto err;
+       return 0;
+err:
+       kfree(u8_gc);
+       return ret;
+}
+
+void __init simple_gpiochip_init(const char *compatible)
+{
+       struct device_node *np;
+
+       for_each_compatible_node(np, NULL, compatible) {
+               int ret;
+               struct resource r;
+
+               ret = of_address_to_resource(np, 0, &r);
+               if (ret)
+                       goto err;
+
+               switch (resource_size(&r)) {
+               case 1:
+                       ret = u8_simple_gpiochip_add(np);
+                       if (ret)
+                               goto err;
+                       break;
+               default:
+                       /*
+                        * Whenever you need support for GPIO bank width > 1,
+                        * please just turn u8_ code into huge macros, and
+                        * construct needed uX_ code with it.
+                        */
+                       ret = -ENOSYS;
+                       goto err;
+               }
+               continue;
+err:
+               pr_err("%s: registration failed, status %d\n",
+                      np->full_name, ret);
+       }
+}
diff --git a/arch/powerpc/sysdev/simple_gpio.h b/arch/powerpc/sysdev/simple_gpio.h
new file mode 100644 (file)
index 0000000..3a7b0c5
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef __SYSDEV_SIMPLE_GPIO_H
+#define __SYSDEV_SIMPLE_GPIO_H
+
+#include <linux/errno.h>
+
+#ifdef CONFIG_SIMPLE_GPIO
+extern void simple_gpiochip_init(const char *compatible);
+#else
+static inline void simple_gpiochip_init(const char *compatible) {}
+#endif /* CONFIG_SIMPLE_GPIO */
+
+#endif /* __SYSDEV_SIMPLE_GPIO_H */
index 4f8d60586b076caa72349094d1baa5f4a98d555c..8040376c4890cc11c95e7c783f331abc7becb456 100644 (file)
@@ -54,7 +54,8 @@ extern int __smp4m_processor_id(void);
 #define SMP_PRINTK(x)
 #endif
 
-static inline unsigned long swap(volatile unsigned long *ptr, unsigned long val)
+static inline unsigned long
+swap_ulong(volatile unsigned long *ptr, unsigned long val)
 {
        __asm__ __volatile__("swap [%1], %0\n\t" :
                             "=&r" (val), "=&r" (ptr) :
@@ -90,7 +91,7 @@ void __cpuinit smp4m_callin(void)
         * to call the scheduler code.
         */
        /* Allow master to continue. */
-       swap(&cpu_callin_map[cpuid], 1);
+       swap_ulong(&cpu_callin_map[cpuid], 1);
 
        /* XXX: What's up with all the flushes? */
        local_flush_cache_all();
index b0461856acfb1ca4b236bd355c4cd94604b36503..a4cff5d6e380fa8b027eaf3413f1911fbe821599 100644 (file)
@@ -982,7 +982,7 @@ static int __init longhaul_init(void)
        case 10:
                printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n");
        default:
-               ;;
+               ;
        }
 
        return -ENODEV;
index 6b94fb7be5f280bbe2a8e5c0ddace6355cae0185..00c46e0b40e47eec54b2710f06cbe59d7308905f 100644 (file)
 #include <linux/device.h>
 #include <linux/string.h>
 #include <linux/slab.h>
+#include <linux/io.h>
 #include <linux/amba/bus.h>
 
-#include <asm/io.h>
+#include <asm/irq.h>
 #include <asm/sizes.h>
 
 #define to_amba_device(d)      container_of(d, struct amba_device, dev)
index 656448c7fef9df9965091d0d41c75d5db5778086..7f701cbe14ab83400992c0dd9344705db8f54e68 100644 (file)
@@ -105,7 +105,7 @@ enum {
        board_ahci_ign_iferr    = 2,
        board_ahci_sb600        = 3,
        board_ahci_mv           = 4,
-       board_ahci_sb700        = 5,
+       board_ahci_sb700        = 5, /* for SB700 and SB800 */
        board_ahci_mcp65        = 6,
        board_ahci_nopmp        = 7,
 
@@ -439,7 +439,7 @@ static const struct ata_port_info ahci_port_info[] = {
                .udma_mask      = ATA_UDMA6,
                .port_ops       = &ahci_ops,
        },
-       /* board_ahci_sb700 */
+       /* board_ahci_sb700, for SB700 and SB800 */
        {
                AHCI_HFLAGS     (AHCI_HFLAG_IGN_SERR_INTERNAL),
                .flags          = AHCI_FLAG_COMMON,
@@ -2446,6 +2446,8 @@ static void ahci_print_info(struct ata_host *host)
                speed_s = "1.5";
        else if (speed == 2)
                speed_s = "3";
+       else if (speed == 3)
+               speed_s = "6";
        else
                speed_s = "?";
 
@@ -2610,6 +2612,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
            (pdev->revision == 0xa1 || pdev->revision == 0xa2))
                hpriv->flags |= AHCI_HFLAG_NO_MSI;
 
+       /* SB800 does NOT need the workaround to ignore SERR_INTERNAL */
+       if (board_id == board_ahci_sb700 && pdev->revision >= 0x40)
+               hpriv->flags &= ~AHCI_HFLAG_IGN_SERR_INTERNAL;
+
        if ((hpriv->flags & AHCI_HFLAG_NO_MSI) || pci_enable_msi(pdev))
                pci_intx(pdev, 1);
 
index 5fdf1678d0ccc716c4fc73ec5d8b97328cdb5253..887d8f46a287312fd554d77a1c498f37a30cd307 100644 (file)
@@ -154,11 +154,13 @@ struct piix_map_db {
 
 struct piix_host_priv {
        const int *map;
+       u32 saved_iocfg;
        void __iomem *sidpr;
 };
 
 static int piix_init_one(struct pci_dev *pdev,
                         const struct pci_device_id *ent);
+static void piix_remove_one(struct pci_dev *pdev);
 static int piix_pata_prereset(struct ata_link *link, unsigned long deadline);
 static void piix_set_piomode(struct ata_port *ap, struct ata_device *adev);
 static void piix_set_dmamode(struct ata_port *ap, struct ata_device *adev);
@@ -296,7 +298,7 @@ static struct pci_driver piix_pci_driver = {
        .name                   = DRV_NAME,
        .id_table               = piix_pci_tbl,
        .probe                  = piix_init_one,
-       .remove                 = ata_pci_remove_one,
+       .remove                 = piix_remove_one,
 #ifdef CONFIG_PM
        .suspend                = piix_pci_device_suspend,
        .resume                 = piix_pci_device_resume,
@@ -308,7 +310,7 @@ static struct scsi_host_template piix_sht = {
 };
 
 static struct ata_port_operations piix_pata_ops = {
-       .inherits               = &ata_bmdma_port_ops,
+       .inherits               = &ata_bmdma32_port_ops,
        .cable_detect           = ata_cable_40wire,
        .set_piomode            = piix_set_piomode,
        .set_dmamode            = piix_set_dmamode,
@@ -610,8 +612,9 @@ static const struct ich_laptop ich_laptop[] = {
 static int ich_pata_cable_detect(struct ata_port *ap)
 {
        struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+       struct piix_host_priv *hpriv = ap->host->private_data;
        const struct ich_laptop *lap = &ich_laptop[0];
-       u8 tmp, mask;
+       u8 mask;
 
        /* Check for specials - Acer Aspire 5602WLMi */
        while (lap->device) {
@@ -625,8 +628,7 @@ static int ich_pata_cable_detect(struct ata_port *ap)
 
        /* check BIOS cable detect results */
        mask = ap->port_no == 0 ? PIIX_80C_PRI : PIIX_80C_SEC;
-       pci_read_config_byte(pdev, PIIX_IOCFG, &tmp);
-       if ((tmp & mask) == 0)
+       if ((hpriv->saved_iocfg & mask) == 0)
                return ATA_CBL_PATA40;
        return ATA_CBL_PATA80;
 }
@@ -1350,7 +1352,7 @@ static int __devinit piix_init_sidpr(struct ata_host *host)
        return 0;
 }
 
-static void piix_iocfg_bit18_quirk(struct pci_dev *pdev)
+static void piix_iocfg_bit18_quirk(struct ata_host *host)
 {
        static const struct dmi_system_id sysids[] = {
                {
@@ -1367,7 +1369,8 @@ static void piix_iocfg_bit18_quirk(struct pci_dev *pdev)
 
                { }     /* terminate list */
        };
-       u32 iocfg;
+       struct pci_dev *pdev = to_pci_dev(host->dev);
+       struct piix_host_priv *hpriv = host->private_data;
 
        if (!dmi_check_system(sysids))
                return;
@@ -1376,12 +1379,11 @@ static void piix_iocfg_bit18_quirk(struct pci_dev *pdev)
         * seem to use it to disable a channel.  Clear the bit on the
         * affected systems.
         */
-       pci_read_config_dword(pdev, PIIX_IOCFG, &iocfg);
-       if (iocfg & (1 << 18)) {
+       if (hpriv->saved_iocfg & (1 << 18)) {
                dev_printk(KERN_INFO, &pdev->dev,
                           "applying IOCFG bit18 quirk\n");
-               iocfg &= ~(1 << 18);
-               pci_write_config_dword(pdev, PIIX_IOCFG, iocfg);
+               pci_write_config_dword(pdev, PIIX_IOCFG,
+                                      hpriv->saved_iocfg & ~(1 << 18));
        }
 }
 
@@ -1430,6 +1432,17 @@ static int __devinit piix_init_one(struct pci_dev *pdev,
        if (rc)
                return rc;
 
+       hpriv = devm_kzalloc(dev, sizeof(*hpriv), GFP_KERNEL);
+       if (!hpriv)
+               return -ENOMEM;
+
+       /* Save IOCFG, this will be used for cable detection, quirk
+        * detection and restoration on detach.  This is necessary
+        * because some ACPI implementations mess up cable related
+        * bits on _STM.  Reported on kernel bz#11879.
+        */
+       pci_read_config_dword(pdev, PIIX_IOCFG, &hpriv->saved_iocfg);
+
        /* ICH6R may be driven by either ata_piix or ahci driver
         * regardless of BIOS configuration.  Make sure AHCI mode is
         * off.
@@ -1441,10 +1454,6 @@ static int __devinit piix_init_one(struct pci_dev *pdev,
        }
 
        /* SATA map init can change port_info, do it before prepping host */
-       hpriv = devm_kzalloc(dev, sizeof(*hpriv), GFP_KERNEL);
-       if (!hpriv)
-               return -ENOMEM;
-
        if (port_flags & ATA_FLAG_SATA)
                hpriv->map = piix_init_sata_map(pdev, port_info,
                                        piix_map_db_table[ent->driver_data]);
@@ -1463,7 +1472,7 @@ static int __devinit piix_init_one(struct pci_dev *pdev,
        }
 
        /* apply IOCFG bit18 quirk */
-       piix_iocfg_bit18_quirk(pdev);
+       piix_iocfg_bit18_quirk(host);
 
        /* On ICH5, some BIOSen disable the interrupt using the
         * PCI_COMMAND_INTX_DISABLE bit added in PCI 2.3.
@@ -1488,6 +1497,16 @@ static int __devinit piix_init_one(struct pci_dev *pdev,
        return ata_pci_sff_activate_host(host, ata_sff_interrupt, &piix_sht);
 }
 
+static void piix_remove_one(struct pci_dev *pdev)
+{
+       struct ata_host *host = dev_get_drvdata(&pdev->dev);
+       struct piix_host_priv *hpriv = host->private_data;
+
+       pci_write_config_dword(pdev, PIIX_IOCFG, hpriv->saved_iocfg);
+
+       ata_pci_remove_one(pdev);
+}
+
 static int __init piix_init(void)
 {
        int rc;
index f178a450ec08e6b56db21736fe9ed438ccf76aa3..175df54eb6646c595cc23f3410d80a8eaa61eab4 100644 (file)
@@ -1007,6 +1007,7 @@ static const char *sata_spd_string(unsigned int spd)
        static const char * const spd_str[] = {
                "1.5 Gbps",
                "3.0 Gbps",
+               "6.0 Gbps",
        };
 
        if (spd == 0 || (spd - 1) >= ARRAY_SIZE(spd_str))
@@ -2000,6 +2001,10 @@ unsigned int ata_pio_need_iordy(const struct ata_device *adev)
           as the caller should know this */
        if (adev->link->ap->flags & ATA_FLAG_NO_IORDY)
                return 0;
+       /* CF spec. r4.1 Table 22 says no iordy on PIO5 and PIO6.  */
+       if (ata_id_is_cfa(adev->id)
+           && (adev->pio_mode == XFER_PIO_5 || adev->pio_mode == XFER_PIO_6))
+               return 0;
        /* PIO3 and higher it is mandatory */
        if (adev->pio_mode > XFER_PIO_2)
                return 1;
index 9033d164c4ece13f876777fd7fbb549e4c27456b..c59ad76c84b1ebe2e3e28b7a08166ffb5f0313fd 100644 (file)
@@ -66,6 +66,7 @@ const struct ata_port_operations ata_sff_port_ops = {
 
        .port_start             = ata_sff_port_start,
 };
+EXPORT_SYMBOL_GPL(ata_sff_port_ops);
 
 const struct ata_port_operations ata_bmdma_port_ops = {
        .inherits               = &ata_sff_port_ops,
@@ -77,6 +78,14 @@ const struct ata_port_operations ata_bmdma_port_ops = {
        .bmdma_stop             = ata_bmdma_stop,
        .bmdma_status           = ata_bmdma_status,
 };
+EXPORT_SYMBOL_GPL(ata_bmdma_port_ops);
+
+const struct ata_port_operations ata_bmdma32_port_ops = {
+       .inherits               = &ata_bmdma_port_ops,
+
+       .sff_data_xfer          = ata_sff_data_xfer32,
+};
+EXPORT_SYMBOL_GPL(ata_bmdma32_port_ops);
 
 /**
  *     ata_fill_sg - Fill PCI IDE PRD table
@@ -166,8 +175,9 @@ static void ata_fill_sg_dumb(struct ata_queued_cmd *qc)
                        blen = len & 0xffff;
                        ap->prd[pi].addr = cpu_to_le32(addr);
                        if (blen == 0) {
-                          /* Some PATA chipsets like the CS5530 can't
-                             cope with 0x0000 meaning 64K as the spec says */
+                               /* Some PATA chipsets like the CS5530 can't
+                                  cope with 0x0000 meaning 64K as the spec
+                                  says */
                                ap->prd[pi].flags_len = cpu_to_le32(0x8000);
                                blen = 0x8000;
                                ap->prd[++pi].addr = cpu_to_le32(addr + 0x8000);
@@ -200,6 +210,7 @@ void ata_sff_qc_prep(struct ata_queued_cmd *qc)
 
        ata_fill_sg(qc);
 }
+EXPORT_SYMBOL_GPL(ata_sff_qc_prep);
 
 /**
  *     ata_sff_dumb_qc_prep - Prepare taskfile for submission
@@ -217,6 +228,7 @@ void ata_sff_dumb_qc_prep(struct ata_queued_cmd *qc)
 
        ata_fill_sg_dumb(qc);
 }
+EXPORT_SYMBOL_GPL(ata_sff_dumb_qc_prep);
 
 /**
  *     ata_sff_check_status - Read device status reg & clear interrupt
@@ -233,6 +245,7 @@ u8 ata_sff_check_status(struct ata_port *ap)
 {
        return ioread8(ap->ioaddr.status_addr);
 }
+EXPORT_SYMBOL_GPL(ata_sff_check_status);
 
 /**
  *     ata_sff_altstatus - Read device alternate status reg
@@ -275,7 +288,7 @@ static u8 ata_sff_irq_status(struct ata_port *ap)
                status = ata_sff_altstatus(ap);
                /* Not us: We are busy */
                if (status & ATA_BUSY)
-                       return status;
+                       return status;
        }
        /* Clear INTRQ latch */
        status = ap->ops->sff_check_status(ap);
@@ -319,6 +332,7 @@ void ata_sff_pause(struct ata_port *ap)
        ata_sff_sync(ap);
        ndelay(400);
 }
+EXPORT_SYMBOL_GPL(ata_sff_pause);
 
 /**
  *     ata_sff_dma_pause       -       Pause before commencing DMA
@@ -327,7 +341,7 @@ void ata_sff_pause(struct ata_port *ap)
  *     Perform I/O fencing and ensure sufficient cycle delays occur
  *     for the HDMA1:0 transition
  */
+
 void ata_sff_dma_pause(struct ata_port *ap)
 {
        if (ap->ops->sff_check_altstatus || ap->ioaddr.altstatus_addr) {
@@ -341,6 +355,7 @@ void ata_sff_dma_pause(struct ata_port *ap)
           corruption. */
        BUG();
 }
+EXPORT_SYMBOL_GPL(ata_sff_dma_pause);
 
 /**
  *     ata_sff_busy_sleep - sleep until BSY clears, or timeout
@@ -396,6 +411,7 @@ int ata_sff_busy_sleep(struct ata_port *ap,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ata_sff_busy_sleep);
 
 static int ata_sff_check_ready(struct ata_link *link)
 {
@@ -422,6 +438,7 @@ int ata_sff_wait_ready(struct ata_link *link, unsigned long deadline)
 {
        return ata_wait_ready(link, deadline, ata_sff_check_ready);
 }
+EXPORT_SYMBOL_GPL(ata_sff_wait_ready);
 
 /**
  *     ata_sff_dev_select - Select device 0/1 on ATA bus
@@ -449,6 +466,7 @@ void ata_sff_dev_select(struct ata_port *ap, unsigned int device)
        iowrite8(tmp, ap->ioaddr.device_addr);
        ata_sff_pause(ap);      /* needed; also flushes, for mmio */
 }
+EXPORT_SYMBOL_GPL(ata_sff_dev_select);
 
 /**
  *     ata_dev_select - Select device 0/1 on ATA bus
@@ -513,6 +531,7 @@ u8 ata_sff_irq_on(struct ata_port *ap)
 
        return tmp;
 }
+EXPORT_SYMBOL_GPL(ata_sff_irq_on);
 
 /**
  *     ata_sff_irq_clear - Clear PCI IDE BMDMA interrupt.
@@ -534,6 +553,7 @@ void ata_sff_irq_clear(struct ata_port *ap)
 
        iowrite8(ioread8(mmio + ATA_DMA_STATUS), mmio + ATA_DMA_STATUS);
 }
+EXPORT_SYMBOL_GPL(ata_sff_irq_clear);
 
 /**
  *     ata_sff_tf_load - send taskfile registers to host controller
@@ -593,6 +613,7 @@ void ata_sff_tf_load(struct ata_port *ap, const struct ata_taskfile *tf)
 
        ata_wait_idle(ap);
 }
+EXPORT_SYMBOL_GPL(ata_sff_tf_load);
 
 /**
  *     ata_sff_tf_read - input device's ATA taskfile shadow registers
@@ -633,6 +654,7 @@ void ata_sff_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
                        WARN_ON(1);
        }
 }
+EXPORT_SYMBOL_GPL(ata_sff_tf_read);
 
 /**
  *     ata_sff_exec_command - issue ATA command to host controller
@@ -652,6 +674,7 @@ void ata_sff_exec_command(struct ata_port *ap, const struct ata_taskfile *tf)
        iowrite8(tf->command, ap->ioaddr.command_addr);
        ata_sff_pause(ap);
 }
+EXPORT_SYMBOL_GPL(ata_sff_exec_command);
 
 /**
  *     ata_tf_to_host - issue ATA taskfile to host controller
@@ -717,6 +740,53 @@ unsigned int ata_sff_data_xfer(struct ata_device *dev, unsigned char *buf,
 
        return words << 1;
 }
+EXPORT_SYMBOL_GPL(ata_sff_data_xfer);
+
+/**
+ *     ata_sff_data_xfer32 - Transfer data by PIO
+ *     @dev: device to target
+ *     @buf: data buffer
+ *     @buflen: buffer length
+ *     @rw: read/write
+ *
+ *     Transfer data from/to the device data register by PIO using 32bit
+ *     I/O operations.
+ *
+ *     LOCKING:
+ *     Inherited from caller.
+ *
+ *     RETURNS:
+ *     Bytes consumed.
+ */
+
+unsigned int ata_sff_data_xfer32(struct ata_device *dev, unsigned char *buf,
+                              unsigned int buflen, int rw)
+{
+       struct ata_port *ap = dev->link->ap;
+       void __iomem *data_addr = ap->ioaddr.data_addr;
+       unsigned int words = buflen >> 2;
+       int slop = buflen & 3;
+
+       /* Transfer multiple of 4 bytes */
+       if (rw == READ)
+               ioread32_rep(data_addr, buf, words);
+       else
+               iowrite32_rep(data_addr, buf, words);
+
+       if (unlikely(slop)) {
+               __le32 pad;
+               if (rw == READ) {
+                       pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr));
+                       memcpy(buf + buflen - slop, &pad, slop);
+               } else {
+                       memcpy(&pad, buf + buflen - slop, slop);
+                       iowrite32(le32_to_cpu(pad), ap->ioaddr.data_addr);
+               }
+               words++;
+       }
+       return words << 2;
+}
+EXPORT_SYMBOL_GPL(ata_sff_data_xfer32);
 
 /**
  *     ata_sff_data_xfer_noirq - Transfer data by PIO
@@ -746,6 +816,7 @@ unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf,
 
        return consumed;
 }
+EXPORT_SYMBOL_GPL(ata_sff_data_xfer_noirq);
 
 /**
  *     ata_pio_sector - Transfer a sector of data.
@@ -922,13 +993,15 @@ next_sg:
                buf = kmap_atomic(page, KM_IRQ0);
 
                /* do the actual data transfer */
-               consumed = ap->ops->sff_data_xfer(dev,  buf + offset, count, rw);
+               consumed = ap->ops->sff_data_xfer(dev,  buf + offset,
+                                                               count, rw);
 
                kunmap_atomic(buf, KM_IRQ0);
                local_irq_restore(flags);
        } else {
                buf = page_address(page);
-               consumed = ap->ops->sff_data_xfer(dev,  buf + offset, count, rw);
+               consumed = ap->ops->sff_data_xfer(dev,  buf + offset,
+                                                               count, rw);
        }
 
        bytes -= min(bytes, consumed);
@@ -1013,18 +1086,19 @@ static void atapi_pio_bytes(struct ata_queued_cmd *qc)
  *     RETURNS:
  *     1 if ok in workqueue, 0 otherwise.
  */
-static inline int ata_hsm_ok_in_wq(struct ata_port *ap, struct ata_queued_cmd *qc)
+static inline int ata_hsm_ok_in_wq(struct ata_port *ap,
+                                               struct ata_queued_cmd *qc)
 {
        if (qc->tf.flags & ATA_TFLAG_POLLING)
                return 1;
 
        if (ap->hsm_task_state == HSM_ST_FIRST) {
                if (qc->tf.protocol == ATA_PROT_PIO &&
-                   (qc->tf.flags & ATA_TFLAG_WRITE))
+                  (qc->tf.flags & ATA_TFLAG_WRITE))
                    return 1;
 
                if (ata_is_atapi(qc->tf.protocol) &&
-                   !(qc->dev->flags & ATA_DFLAG_CDB_INTR))
+                  !(qc->dev->flags & ATA_DFLAG_CDB_INTR))
                        return 1;
        }
 
@@ -1338,6 +1412,7 @@ fsm_start:
 
        return poll_next;
 }
+EXPORT_SYMBOL_GPL(ata_sff_hsm_move);
 
 void ata_pio_task(struct work_struct *work)
 {
@@ -1507,6 +1582,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ata_sff_qc_issue);
 
 /**
  *     ata_sff_qc_fill_rtf - fill result TF using ->sff_tf_read
@@ -1526,6 +1602,7 @@ bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc)
        qc->ap->ops->sff_tf_read(qc->ap, &qc->result_tf);
        return true;
 }
+EXPORT_SYMBOL_GPL(ata_sff_qc_fill_rtf);
 
 /**
  *     ata_sff_host_intr - Handle host interrupt for given (port, task)
@@ -1623,6 +1700,7 @@ idle_irq:
 #endif
        return 0;       /* irq not handled */
 }
+EXPORT_SYMBOL_GPL(ata_sff_host_intr);
 
 /**
  *     ata_sff_interrupt - Default ATA host interrupt handler
@@ -1667,6 +1745,7 @@ irqreturn_t ata_sff_interrupt(int irq, void *dev_instance)
 
        return IRQ_RETVAL(handled);
 }
+EXPORT_SYMBOL_GPL(ata_sff_interrupt);
 
 /**
  *     ata_sff_freeze - Freeze SFF controller port
@@ -1695,6 +1774,7 @@ void ata_sff_freeze(struct ata_port *ap)
 
        ap->ops->sff_irq_clear(ap);
 }
+EXPORT_SYMBOL_GPL(ata_sff_freeze);
 
 /**
  *     ata_sff_thaw - Thaw SFF controller port
@@ -1712,6 +1792,7 @@ void ata_sff_thaw(struct ata_port *ap)
        ap->ops->sff_irq_clear(ap);
        ap->ops->sff_irq_on(ap);
 }
+EXPORT_SYMBOL_GPL(ata_sff_thaw);
 
 /**
  *     ata_sff_prereset - prepare SFF link for reset
@@ -1753,6 +1834,7 @@ int ata_sff_prereset(struct ata_link *link, unsigned long deadline)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ata_sff_prereset);
 
 /**
  *     ata_devchk - PATA device presence detection
@@ -1865,6 +1947,7 @@ unsigned int ata_sff_dev_classify(struct ata_device *dev, int present,
 
        return class;
 }
+EXPORT_SYMBOL_GPL(ata_sff_dev_classify);
 
 /**
  *     ata_sff_wait_after_reset - wait for devices to become ready after reset
@@ -1941,6 +2024,7 @@ int ata_sff_wait_after_reset(struct ata_link *link, unsigned int devmask,
 
        return ret;
 }
+EXPORT_SYMBOL_GPL(ata_sff_wait_after_reset);
 
 static int ata_bus_softreset(struct ata_port *ap, unsigned int devmask,
                             unsigned long deadline)
@@ -2013,6 +2097,7 @@ int ata_sff_softreset(struct ata_link *link, unsigned int *classes,
        DPRINTK("EXIT, classes[0]=%u [1]=%u\n", classes[0], classes[1]);
        return 0;
 }
+EXPORT_SYMBOL_GPL(ata_sff_softreset);
 
 /**
  *     sata_sff_hardreset - reset host port via SATA phy reset
@@ -2045,6 +2130,7 @@ int sata_sff_hardreset(struct ata_link *link, unsigned int *class,
        DPRINTK("EXIT, class=%u\n", *class);
        return rc;
 }
+EXPORT_SYMBOL_GPL(sata_sff_hardreset);
 
 /**
  *     ata_sff_postreset - SFF postreset callback
@@ -2080,6 +2166,7 @@ void ata_sff_postreset(struct ata_link *link, unsigned int *classes)
        if (ap->ioaddr.ctl_addr)
                iowrite8(ap->ctl, ap->ioaddr.ctl_addr);
 }
+EXPORT_SYMBOL_GPL(ata_sff_postreset);
 
 /**
  *     ata_sff_error_handler - Stock error handler for BMDMA controller
@@ -2152,6 +2239,7 @@ void ata_sff_error_handler(struct ata_port *ap)
        ata_do_eh(ap, ap->ops->prereset, softreset, hardreset,
                  ap->ops->postreset);
 }
+EXPORT_SYMBOL_GPL(ata_sff_error_handler);
 
 /**
  *     ata_sff_post_internal_cmd - Stock post_internal_cmd for SFF controller
@@ -2174,6 +2262,7 @@ void ata_sff_post_internal_cmd(struct ata_queued_cmd *qc)
 
        spin_unlock_irqrestore(ap->lock, flags);
 }
+EXPORT_SYMBOL_GPL(ata_sff_post_internal_cmd);
 
 /**
  *     ata_sff_port_start - Set port up for dma.
@@ -2194,6 +2283,7 @@ int ata_sff_port_start(struct ata_port *ap)
                return ata_port_start(ap);
        return 0;
 }
+EXPORT_SYMBOL_GPL(ata_sff_port_start);
 
 /**
  *     ata_sff_std_ports - initialize ioaddr with standard port offsets.
@@ -2219,6 +2309,7 @@ void ata_sff_std_ports(struct ata_ioports *ioaddr)
        ioaddr->status_addr = ioaddr->cmd_addr + ATA_REG_STATUS;
        ioaddr->command_addr = ioaddr->cmd_addr + ATA_REG_CMD;
 }
+EXPORT_SYMBOL_GPL(ata_sff_std_ports);
 
 unsigned long ata_bmdma_mode_filter(struct ata_device *adev,
                                    unsigned long xfer_mask)
@@ -2230,6 +2321,7 @@ unsigned long ata_bmdma_mode_filter(struct ata_device *adev,
                xfer_mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA);
        return xfer_mask;
 }
+EXPORT_SYMBOL_GPL(ata_bmdma_mode_filter);
 
 /**
  *     ata_bmdma_setup - Set up PCI IDE BMDMA transaction
@@ -2258,6 +2350,7 @@ void ata_bmdma_setup(struct ata_queued_cmd *qc)
        /* issue r/w command */
        ap->ops->sff_exec_command(ap, &qc->tf);
 }
+EXPORT_SYMBOL_GPL(ata_bmdma_setup);
 
 /**
  *     ata_bmdma_start - Start a PCI IDE BMDMA transaction
@@ -2290,6 +2383,7 @@ void ata_bmdma_start(struct ata_queued_cmd *qc)
         * unneccessarily delayed for MMIO
         */
 }
+EXPORT_SYMBOL_GPL(ata_bmdma_start);
 
 /**
  *     ata_bmdma_stop - Stop PCI IDE BMDMA transfer
@@ -2314,6 +2408,7 @@ void ata_bmdma_stop(struct ata_queued_cmd *qc)
        /* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
        ata_sff_dma_pause(ap);
 }
+EXPORT_SYMBOL_GPL(ata_bmdma_stop);
 
 /**
  *     ata_bmdma_status - Read PCI IDE BMDMA status
@@ -2330,6 +2425,7 @@ u8 ata_bmdma_status(struct ata_port *ap)
 {
        return ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_STATUS);
 }
+EXPORT_SYMBOL_GPL(ata_bmdma_status);
 
 /**
  *     ata_bus_reset - reset host port and associated ATA channel
@@ -2422,6 +2518,7 @@ err_out:
 
        DPRINTK("EXIT\n");
 }
+EXPORT_SYMBOL_GPL(ata_bus_reset);
 
 #ifdef CONFIG_PCI
 
@@ -2449,6 +2546,7 @@ int ata_pci_bmdma_clear_simplex(struct pci_dev *pdev)
                return -EOPNOTSUPP;
        return 0;
 }
+EXPORT_SYMBOL_GPL(ata_pci_bmdma_clear_simplex);
 
 /**
  *     ata_pci_bmdma_init - acquire PCI BMDMA resources and init ATA host
@@ -2501,11 +2599,12 @@ int ata_pci_bmdma_init(struct ata_host *host)
                        host->flags |= ATA_HOST_SIMPLEX;
 
                ata_port_desc(ap, "bmdma 0x%llx",
-                       (unsigned long long)pci_resource_start(pdev, 4) + 8 * i);
+                   (unsigned long long)pci_resource_start(pdev, 4) + 8 * i);
        }
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ata_pci_bmdma_init);
 
 static int ata_resources_present(struct pci_dev *pdev, int port)
 {
@@ -2513,7 +2612,7 @@ static int ata_resources_present(struct pci_dev *pdev, int port)
 
        /* Check the PCI resources for this channel are enabled */
        port = port * 2;
-       for (i = 0; i < 2; i ++) {
+       for (i = 0; i < 2; i++) {
                if (pci_resource_start(pdev, port + i) == 0 ||
                    pci_resource_len(pdev, port + i) == 0)
                        return 0;
@@ -2598,6 +2697,7 @@ int ata_pci_sff_init_host(struct ata_host *host)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ata_pci_sff_init_host);
 
 /**
  *     ata_pci_sff_prepare_host - helper to prepare native PCI ATA host
@@ -2615,7 +2715,7 @@ int ata_pci_sff_init_host(struct ata_host *host)
  *     0 on success, -errno otherwise.
  */
 int ata_pci_sff_prepare_host(struct pci_dev *pdev,
-                            const struct ata_port_info * const * ppi,
+                            const struct ata_port_info * const *ppi,
                             struct ata_host **r_host)
 {
        struct ata_host *host;
@@ -2645,17 +2745,18 @@ int ata_pci_sff_prepare_host(struct pci_dev *pdev,
        *r_host = host;
        return 0;
 
- err_bmdma:
+err_bmdma:
        /* This is necessary because PCI and iomap resources are
         * merged and releasing the top group won't release the
         * acquired resources if some of those have been acquired
         * before entering this function.
         */
        pcim_iounmap_regions(pdev, 0xf);
- err_out:
+err_out:
        devres_release_group(&pdev->dev, NULL);
        return rc;
 }
+EXPORT_SYMBOL_GPL(ata_pci_sff_prepare_host);
 
 /**
  *     ata_pci_sff_activate_host - start SFF host, request IRQ and register it
@@ -2741,7 +2842,7 @@ int ata_pci_sff_activate_host(struct ata_host *host,
        }
 
        rc = ata_host_register(host, sht);
- out:
+out:
        if (rc == 0)
                devres_remove_group(dev, NULL);
        else
@@ -2749,6 +2850,7 @@ int ata_pci_sff_activate_host(struct ata_host *host,
 
        return rc;
 }
+EXPORT_SYMBOL_GPL(ata_pci_sff_activate_host);
 
 /**
  *     ata_pci_sff_init_one - Initialize/register PCI IDE host controller
@@ -2776,7 +2878,7 @@ int ata_pci_sff_activate_host(struct ata_host *host,
  *     Zero on success, negative on errno-based value on error.
  */
 int ata_pci_sff_init_one(struct pci_dev *pdev,
-                        const struct ata_port_info * const * ppi,
+                        const struct ata_port_info * const *ppi,
                         struct scsi_host_template *sht, void *host_priv)
 {
        struct device *dev = &pdev->dev;
@@ -2815,7 +2917,7 @@ int ata_pci_sff_init_one(struct pci_dev *pdev,
 
        pci_set_master(pdev);
        rc = ata_pci_sff_activate_host(host, ata_sff_interrupt, sht);
- out:
+out:
        if (rc == 0)
                devres_remove_group(&pdev->dev, NULL);
        else
@@ -2823,54 +2925,7 @@ int ata_pci_sff_init_one(struct pci_dev *pdev,
 
        return rc;
 }
+EXPORT_SYMBOL_GPL(ata_pci_sff_init_one);
 
 #endif /* CONFIG_PCI */
 
-EXPORT_SYMBOL_GPL(ata_sff_port_ops);
-EXPORT_SYMBOL_GPL(ata_bmdma_port_ops);
-EXPORT_SYMBOL_GPL(ata_sff_qc_prep);
-EXPORT_SYMBOL_GPL(ata_sff_dumb_qc_prep);
-EXPORT_SYMBOL_GPL(ata_sff_dev_select);
-EXPORT_SYMBOL_GPL(ata_sff_check_status);
-EXPORT_SYMBOL_GPL(ata_sff_dma_pause);
-EXPORT_SYMBOL_GPL(ata_sff_pause);
-EXPORT_SYMBOL_GPL(ata_sff_busy_sleep);
-EXPORT_SYMBOL_GPL(ata_sff_wait_ready);
-EXPORT_SYMBOL_GPL(ata_sff_tf_load);
-EXPORT_SYMBOL_GPL(ata_sff_tf_read);
-EXPORT_SYMBOL_GPL(ata_sff_exec_command);
-EXPORT_SYMBOL_GPL(ata_sff_data_xfer);
-EXPORT_SYMBOL_GPL(ata_sff_data_xfer_noirq);
-EXPORT_SYMBOL_GPL(ata_sff_irq_on);
-EXPORT_SYMBOL_GPL(ata_sff_irq_clear);
-EXPORT_SYMBOL_GPL(ata_sff_hsm_move);
-EXPORT_SYMBOL_GPL(ata_sff_qc_issue);
-EXPORT_SYMBOL_GPL(ata_sff_qc_fill_rtf);
-EXPORT_SYMBOL_GPL(ata_sff_host_intr);
-EXPORT_SYMBOL_GPL(ata_sff_interrupt);
-EXPORT_SYMBOL_GPL(ata_sff_freeze);
-EXPORT_SYMBOL_GPL(ata_sff_thaw);
-EXPORT_SYMBOL_GPL(ata_sff_prereset);
-EXPORT_SYMBOL_GPL(ata_sff_dev_classify);
-EXPORT_SYMBOL_GPL(ata_sff_wait_after_reset);
-EXPORT_SYMBOL_GPL(ata_sff_softreset);
-EXPORT_SYMBOL_GPL(sata_sff_hardreset);
-EXPORT_SYMBOL_GPL(ata_sff_postreset);
-EXPORT_SYMBOL_GPL(ata_sff_error_handler);
-EXPORT_SYMBOL_GPL(ata_sff_post_internal_cmd);
-EXPORT_SYMBOL_GPL(ata_sff_port_start);
-EXPORT_SYMBOL_GPL(ata_sff_std_ports);
-EXPORT_SYMBOL_GPL(ata_bmdma_mode_filter);
-EXPORT_SYMBOL_GPL(ata_bmdma_setup);
-EXPORT_SYMBOL_GPL(ata_bmdma_start);
-EXPORT_SYMBOL_GPL(ata_bmdma_stop);
-EXPORT_SYMBOL_GPL(ata_bmdma_status);
-EXPORT_SYMBOL_GPL(ata_bus_reset);
-#ifdef CONFIG_PCI
-EXPORT_SYMBOL_GPL(ata_pci_bmdma_clear_simplex);
-EXPORT_SYMBOL_GPL(ata_pci_bmdma_init);
-EXPORT_SYMBOL_GPL(ata_pci_sff_init_host);
-EXPORT_SYMBOL_GPL(ata_pci_sff_prepare_host);
-EXPORT_SYMBOL_GPL(ata_pci_sff_activate_host);
-EXPORT_SYMBOL_GPL(ata_pci_sff_init_one);
-#endif /* CONFIG_PCI */
index 73c466e452ca3a4d2181fcd06e5015c301cfa8b1..a7999c19f0c92bc4185a4ae532ec5395c3eb3253 100644 (file)
@@ -19,7 +19,9 @@
  *
  *  TODO/CHECK
  *     Cannot have ATAPI on both master & slave for rev < c2 (???) but
- *     otherwise should do atapi DMA.
+ *     otherwise should do atapi DMA (For now for old we do PIO only for
+ *     ATAPI)
+ *     Review Sunblade workaround.
  */
 
 #include <linux/kernel.h>
 #include <linux/dmi.h>
 
 #define DRV_NAME "pata_ali"
-#define DRV_VERSION "0.7.5"
+#define DRV_VERSION "0.7.8"
 
 static int ali_atapi_dma = 0;
 module_param_named(atapi_dma, ali_atapi_dma, int, 0644);
 MODULE_PARM_DESC(atapi_dma, "Enable ATAPI DMA (0=disable, 1=enable)");
 
+static struct pci_dev *isa_bridge;
+
 /*
  *     Cable special cases
  */
@@ -147,8 +151,7 @@ static void ali_fifo_control(struct ata_port *ap, struct ata_device *adev, int o
 
        pci_read_config_byte(pdev, pio_fifo, &fifo);
        fifo &= ~(0x0F << shift);
-       if (on)
-               fifo |= (on << shift);
+       fifo |= (on << shift);
        pci_write_config_byte(pdev, pio_fifo, fifo);
 }
 
@@ -337,6 +340,23 @@ static int ali_check_atapi_dma(struct ata_queued_cmd *qc)
        return 0;
 }
 
+static void ali_c2_c3_postreset(struct ata_link *link, unsigned int *classes)
+{
+       u8 r;
+       int port_bit = 4 << link->ap->port_no;
+
+       /* If our bridge is an ALI 1533 then do the extra work */
+       if (isa_bridge) {
+               /* Tristate and re-enable the bus signals */
+               pci_read_config_byte(isa_bridge, 0x58, &r);
+               r &= ~port_bit;
+               pci_write_config_byte(isa_bridge, 0x58, r);
+               r |= port_bit;
+               pci_write_config_byte(isa_bridge, 0x58, r);
+       }
+       ata_sff_postreset(link, classes);
+}
+
 static struct scsi_host_template ali_sht = {
        ATA_BMDMA_SHT(DRV_NAME),
 };
@@ -349,10 +369,11 @@ static struct ata_port_operations ali_early_port_ops = {
        .inherits       = &ata_sff_port_ops,
        .cable_detect   = ata_cable_40wire,
        .set_piomode    = ali_set_piomode,
+       .sff_data_xfer  = ata_sff_data_xfer32,
 };
 
 static const struct ata_port_operations ali_dma_base_ops = {
-       .inherits       = &ata_bmdma_port_ops,
+       .inherits       = &ata_bmdma32_port_ops,
        .set_piomode    = ali_set_piomode,
        .set_dmamode    = ali_set_dmamode,
 };
@@ -377,6 +398,17 @@ static struct ata_port_operations ali_c2_port_ops = {
        .check_atapi_dma = ali_check_atapi_dma,
        .cable_detect   = ali_c2_cable_detect,
        .dev_config     = ali_lock_sectors,
+       .postreset      = ali_c2_c3_postreset,
+};
+
+/*
+ *     Port operations for DMA capable ALi with cable detect
+ */
+static struct ata_port_operations ali_c4_port_ops = {
+       .inherits       = &ali_dma_base_ops,
+       .check_atapi_dma = ali_check_atapi_dma,
+       .cable_detect   = ali_c2_cable_detect,
+       .dev_config     = ali_lock_sectors,
 };
 
 /*
@@ -401,52 +433,49 @@ static struct ata_port_operations ali_c5_port_ops = {
 static void ali_init_chipset(struct pci_dev *pdev)
 {
        u8 tmp;
-       struct pci_dev *north, *isa_bridge;
+       struct pci_dev *north;
 
        /*
         * The chipset revision selects the driver operations and
         * mode data.
         */
 
-       if (pdev->revision >= 0x20 && pdev->revision < 0xC2) {
-               /* 1543-E/F, 1543C-C, 1543C-D, 1543C-E */
-               pci_read_config_byte(pdev, 0x4B, &tmp);
-               /* Clear CD-ROM DMA write bit */
-               tmp &= 0x7F;
-               pci_write_config_byte(pdev, 0x4B, tmp);
-       } else if (pdev->revision >= 0xC2) {
-               /* Enable cable detection logic */
+       if (pdev->revision <= 0x20) {
+               pci_read_config_byte(pdev, 0x53, &tmp);
+               tmp |= 0x03;
+               pci_write_config_byte(pdev, 0x53, tmp);
+       } else {
+               pci_read_config_byte(pdev, 0x4a, &tmp);
+               pci_write_config_byte(pdev, 0x4a, tmp | 0x20);
                pci_read_config_byte(pdev, 0x4B, &tmp);
-               pci_write_config_byte(pdev, 0x4B, tmp | 0x08);
-       }
-       north = pci_get_bus_and_slot(0, PCI_DEVFN(0,0));
-       isa_bridge = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
-
-       if (north && north->vendor == PCI_VENDOR_ID_AL && isa_bridge) {
-               /* Configure the ALi bridge logic. For non ALi rely on BIOS.
-                  Set the south bridge enable bit */
-               pci_read_config_byte(isa_bridge, 0x79, &tmp);
-               if (pdev->revision == 0xC2)
-                       pci_write_config_byte(isa_bridge, 0x79, tmp | 0x04);
-               else if (pdev->revision > 0xC2 && pdev->revision < 0xC5)
-                       pci_write_config_byte(isa_bridge, 0x79, tmp | 0x02);
-       }
-       if (pdev->revision >= 0x20) {
+               if (pdev->revision < 0xC2)
+                       /* 1543-E/F, 1543C-C, 1543C-D, 1543C-E */
+                       /* Clear CD-ROM DMA write bit */
+                       tmp &= 0x7F;
+               /* Cable and UDMA */
+               pci_write_config_byte(pdev, 0x4B, tmp | 0x09);
                /*
                 * CD_ROM DMA on (0x53 bit 0). Enable this even if we want
                 * to use PIO. 0x53 bit 1 (rev 20 only) - enable FIFO control
                 * via 0x54/55.
                 */
                pci_read_config_byte(pdev, 0x53, &tmp);
-               if (pdev->revision <= 0x20)
-                       tmp &= ~0x02;
                if (pdev->revision >= 0xc7)
                        tmp |= 0x03;
                else
                        tmp |= 0x01;    /* CD_ROM enable for DMA */
                pci_write_config_byte(pdev, 0x53, tmp);
        }
-       pci_dev_put(isa_bridge);
+       north = pci_get_bus_and_slot(0, PCI_DEVFN(0,0));
+       if (north && north->vendor == PCI_VENDOR_ID_AL && isa_bridge) {
+               /* Configure the ALi bridge logic. For non ALi rely on BIOS.
+                  Set the south bridge enable bit */
+               pci_read_config_byte(isa_bridge, 0x79, &tmp);
+               if (pdev->revision == 0xC2)
+                       pci_write_config_byte(isa_bridge, 0x79, tmp | 0x04);
+               else if (pdev->revision > 0xC2 && pdev->revision < 0xC5)
+                       pci_write_config_byte(isa_bridge, 0x79, tmp | 0x02);
+       }
        pci_dev_put(north);
        ata_pci_bmdma_clear_simplex(pdev);
 }
@@ -503,7 +532,7 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
                .pio_mask = 0x1f,
                .mwdma_mask = 0x07,
                .udma_mask = ATA_UDMA5,
-               .port_ops = &ali_c2_port_ops
+               .port_ops = &ali_c4_port_ops
        };
        /* Revision 0xC5 is UDMA133 with LBA48 DMA */
        static const struct ata_port_info info_c5 = {
@@ -516,7 +545,6 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 
        const struct ata_port_info *ppi[] = { NULL, NULL };
        u8 tmp;
-       struct pci_dev *isa_bridge;
        int rc;
 
        rc = pcim_enable_device(pdev);
@@ -543,14 +571,12 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 
        ali_init_chipset(pdev);
 
-       isa_bridge = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
        if (isa_bridge && pdev->revision >= 0x20 && pdev->revision < 0xC2) {
                /* Are we paired with a UDMA capable chip */
                pci_read_config_byte(isa_bridge, 0x5E, &tmp);
                if ((tmp & 0x1E) == 0x12)
                        ppi[0] = &info_20_udma;
        }
-       pci_dev_put(isa_bridge);
 
        return ata_pci_sff_init_one(pdev, ppi, &ali_sht, NULL);
 }
@@ -590,13 +616,20 @@ static struct pci_driver ali_pci_driver = {
 
 static int __init ali_init(void)
 {
-       return pci_register_driver(&ali_pci_driver);
+       int ret;
+       isa_bridge = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
+
+       ret = pci_register_driver(&ali_pci_driver);
+       if (ret < 0)
+               pci_dev_put(isa_bridge);
+       return ret;
 }
 
 
 static void __exit ali_exit(void)
 {
        pci_unregister_driver(&ali_pci_driver);
+       pci_dev_put(isa_bridge);
 }
 
 
index 0ec9c7d9fe9d607250a07bc47a6ce4d3c087041a..63719ab9ea4448839d64d81faa187250ef21637d 100644 (file)
@@ -24,7 +24,7 @@
 #include <linux/libata.h>
 
 #define DRV_NAME "pata_amd"
-#define DRV_VERSION "0.3.10"
+#define DRV_VERSION "0.3.11"
 
 /**
  *     timing_setup            -       shared timing computation and load
@@ -345,7 +345,7 @@ static struct scsi_host_template amd_sht = {
 };
 
 static const struct ata_port_operations amd_base_port_ops = {
-       .inherits       = &ata_bmdma_port_ops,
+       .inherits       = &ata_bmdma32_port_ops,
        .prereset       = amd_pre_reset,
 };
 
index e0c4f05d7d579a807becb30af2bf44b3a9ea7260..65c28e5a6cd7281b1a7e4493cb2cdbb1c38ab17a 100644 (file)
@@ -30,7 +30,7 @@
 #define DRV_VERSION    "0.6.2"
 
 struct hpt_clock {
-       u8      xfer_speed;
+       u8      xfer_mode;
        u32     timing;
 };
 
@@ -189,28 +189,6 @@ static unsigned long hpt366_filter(struct ata_device *adev, unsigned long mask)
        return ata_bmdma_mode_filter(adev, mask);
 }
 
-/**
- *     hpt36x_find_mode        -       reset the hpt36x bus
- *     @ap: ATA port
- *     @speed: transfer mode
- *
- *     Return the 32bit register programming information for this channel
- *     that matches the speed provided.
- */
-
-static u32 hpt36x_find_mode(struct ata_port *ap, int speed)
-{
-       struct hpt_clock *clocks = ap->host->private_data;
-
-       while(clocks->xfer_speed) {
-               if (clocks->xfer_speed == speed)
-                       return clocks->timing;
-               clocks++;
-       }
-       BUG();
-       return 0xffffffffU;     /* silence compiler warning */
-}
-
 static int hpt36x_cable_detect(struct ata_port *ap)
 {
        struct pci_dev *pdev = to_pci_dev(ap->host->dev);
@@ -226,25 +204,16 @@ static int hpt36x_cable_detect(struct ata_port *ap)
        return ATA_CBL_PATA80;
 }
 
-/**
- *     hpt366_set_piomode              -       PIO setup
- *     @ap: ATA interface
- *     @adev: device on the interface
- *
- *     Perform PIO mode setup.
- */
-
-static void hpt366_set_piomode(struct ata_port *ap, struct ata_device *adev)
+static void hpt366_set_mode(struct ata_port *ap, struct ata_device *adev,
+                           u8 mode)
 {
+       struct hpt_clock *clocks = ap->host->private_data;
        struct pci_dev *pdev = to_pci_dev(ap->host->dev);
-       u32 addr1, addr2;
-       u32 reg;
-       u32 mode;
+       u32 addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no);
+       u32 addr2 = 0x51 + 4 * ap->port_no;
+       u32 mask, reg;
        u8 fast;
 
-       addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no);
-       addr2 = 0x51 + 4 * ap->port_no;
-
        /* Fast interrupt prediction disable, hold off interrupt disable */
        pci_read_config_byte(pdev, addr2, &fast);
        if (fast & 0x80) {
@@ -252,12 +221,43 @@ static void hpt366_set_piomode(struct ata_port *ap, struct ata_device *adev)
                pci_write_config_byte(pdev, addr2, fast);
        }
 
+       /* determine timing mask and find matching clock entry */
+       if (mode < XFER_MW_DMA_0)
+               mask = 0xc1f8ffff;
+       else if (mode < XFER_UDMA_0)
+               mask = 0x303800ff;
+       else
+               mask = 0x30070000;
+
+       while (clocks->xfer_mode) {
+               if (clocks->xfer_mode == mode)
+                       break;
+               clocks++;
+       }
+       if (!clocks->xfer_mode)
+               BUG();
+
+       /*
+        * Combine new mode bits with old config bits and disable
+        * on-chip PIO FIFO/buffer (and PIO MST mode as well) to avoid
+        * problems handling I/O errors later.
+        */
        pci_read_config_dword(pdev, addr1, &reg);
-       mode = hpt36x_find_mode(ap, adev->pio_mode);
-       mode &= ~0x8000000;     /* No FIFO in PIO */
-       mode &= ~0x30070000;    /* Leave config bits alone */
-       reg &= 0x30070000;      /* Strip timing bits */
-       pci_write_config_dword(pdev, addr1, reg | mode);
+       reg = ((reg & ~mask) | (clocks->timing & mask)) & ~0xc0000000;
+       pci_write_config_dword(pdev, addr1, reg);
+}
+
+/**
+ *     hpt366_set_piomode              -       PIO setup
+ *     @ap: ATA interface
+ *     @adev: device on the interface
+ *
+ *     Perform PIO mode setup.
+ */
+
+static void hpt366_set_piomode(struct ata_port *ap, struct ata_device *adev)
+{
+       hpt366_set_mode(ap, adev, adev->pio_mode);
 }
 
 /**
@@ -271,28 +271,7 @@ static void hpt366_set_piomode(struct ata_port *ap, struct ata_device *adev)
 
 static void hpt366_set_dmamode(struct ata_port *ap, struct ata_device *adev)
 {
-       struct pci_dev *pdev = to_pci_dev(ap->host->dev);
-       u32 addr1, addr2;
-       u32 reg;
-       u32 mode;
-       u8 fast;
-
-       addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no);
-       addr2 = 0x51 + 4 * ap->port_no;
-
-       /* Fast interrupt prediction disable, hold off interrupt disable */
-       pci_read_config_byte(pdev, addr2, &fast);
-       if (fast & 0x80) {
-               fast &= ~0x80;
-               pci_write_config_byte(pdev, addr2, fast);
-       }
-
-       pci_read_config_dword(pdev, addr1, &reg);
-       mode = hpt36x_find_mode(ap, adev->dma_mode);
-       mode |= 0x8000000;      /* FIFO in MWDMA or UDMA */
-       mode &= ~0xC0000000;    /* Leave config bits alone */
-       reg &= 0xC0000000;      /* Strip timing bits */
-       pci_write_config_dword(pdev, addr1, reg | mode);
+       hpt366_set_mode(ap, adev, adev->dma_mode);
 }
 
 static struct scsi_host_template hpt36x_sht = {
index f11a320337c00c52415b453d8e7638e3f6bda447..f19cc645881adbb22652c4d8ee1add7d837f6d00 100644 (file)
@@ -23,7 +23,7 @@
 #include <linux/libata.h>
 
 #define DRV_NAME       "pata_hpt3x3"
-#define DRV_VERSION    "0.5.3"
+#define DRV_VERSION    "0.6.1"
 
 /**
  *     hpt3x3_set_piomode              -       PIO setup
@@ -80,14 +80,48 @@ static void hpt3x3_set_dmamode(struct ata_port *ap, struct ata_device *adev)
        r2 &= ~(0x11 << dn);    /* Clear MWDMA and UDMA bits */
 
        if (adev->dma_mode >= XFER_UDMA_0)
-               r2 |= (0x10 << dn);     /* Ultra mode */
+               r2 |= (0x01 << dn);     /* Ultra mode */
        else
-               r2 |= (0x01 << dn);     /* MWDMA */
+               r2 |= (0x10 << dn);     /* MWDMA */
 
        pci_write_config_dword(pdev, 0x44, r1);
        pci_write_config_dword(pdev, 0x48, r2);
 }
-#endif /* CONFIG_PATA_HPT3X3_DMA */
+
+/**
+ *     hpt3x3_freeze           -       DMA workaround
+ *     @ap: port to freeze
+ *
+ *     When freezing an HPT3x3 we must stop any pending DMA before
+ *     writing to the control register or the chip will hang
+ */
+
+static void hpt3x3_freeze(struct ata_port *ap)
+{
+       void __iomem *mmio = ap->ioaddr.bmdma_addr;
+
+       iowrite8(ioread8(mmio + ATA_DMA_CMD) & ~ ATA_DMA_START,
+                       mmio + ATA_DMA_CMD);
+       ata_sff_dma_pause(ap);
+       ata_sff_freeze(ap);
+}
+
+/**
+ *     hpt3x3_bmdma_setup      -       DMA workaround
+ *     @qc: Queued command
+ *
+ *     When issuing BMDMA we must clean up the error/active bits in
+ *     software on this device
+ */
+
+static void hpt3x3_bmdma_setup(struct ata_queued_cmd *qc)
+{
+       struct ata_port *ap = qc->ap;
+       u8 r = ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_STATUS);
+       r |= ATA_DMA_INTR | ATA_DMA_ERR;
+       iowrite8(r, ap->ioaddr.bmdma_addr + ATA_DMA_STATUS);
+       return ata_bmdma_setup(qc);
+}
 
 /**
  *     hpt3x3_atapi_dma        -       ATAPI DMA check
@@ -101,18 +135,23 @@ static int hpt3x3_atapi_dma(struct ata_queued_cmd *qc)
        return 1;
 }
 
+#endif /* CONFIG_PATA_HPT3X3_DMA */
+
 static struct scsi_host_template hpt3x3_sht = {
        ATA_BMDMA_SHT(DRV_NAME),
 };
 
 static struct ata_port_operations hpt3x3_port_ops = {
        .inherits       = &ata_bmdma_port_ops,
-       .check_atapi_dma= hpt3x3_atapi_dma,
        .cable_detect   = ata_cable_40wire,
        .set_piomode    = hpt3x3_set_piomode,
 #if defined(CONFIG_PATA_HPT3X3_DMA)
        .set_dmamode    = hpt3x3_set_dmamode,
+       .bmdma_setup    = hpt3x3_bmdma_setup,
+       .check_atapi_dma= hpt3x3_atapi_dma,
+       .freeze         = hpt3x3_freeze,
 #endif
+       
 };
 
 /**
index 7c8faa48b5f3b054c9017981ed8588bf339f2b37..aa576cac4d1756f7071d95fbf66ce6c579b26a8b 100644 (file)
@@ -35,7 +35,7 @@
 #include <linux/libata.h>
 
 #define DRV_NAME "pata_mpiix"
-#define DRV_VERSION "0.7.6"
+#define DRV_VERSION "0.7.7"
 
 enum {
        IDETIM = 0x6C,          /* IDE control register */
@@ -146,6 +146,7 @@ static struct ata_port_operations mpiix_port_ops = {
        .cable_detect   = ata_cable_40wire,
        .set_piomode    = mpiix_set_piomode,
        .prereset       = mpiix_pre_reset,
+       .sff_data_xfer  = ata_sff_data_xfer32,
 };
 
 static int mpiix_init_one(struct pci_dev *dev, const struct pci_device_id *id)
index 6afa07a37648ef7aac343f1d7bdb7e5d5de57161..d8d743af32250fe60cbf766ff77eb6a0079bde50 100644 (file)
@@ -186,7 +186,7 @@ EXPORT_SYMBOL_GPL(__pata_platform_probe);
  *     A platform bus ATA device has been unplugged. Perform the needed
  *     cleanup. Also called on module unload for any active devices.
  */
-int __devexit __pata_platform_remove(struct device *dev)
+int __pata_platform_remove(struct device *dev)
 {
        struct ata_host *host = dev_get_drvdata(dev);
 
index 83580a59db58bf7e4fdddd62dbd8da7d596fc3dc..9e764e5747e6c7d1ff273d6bd562d4fcdc77aaf8 100644 (file)
@@ -32,7 +32,7 @@
 #include <linux/libata.h>
 
 #define DRV_NAME "pata_sil680"
-#define DRV_VERSION "0.4.8"
+#define DRV_VERSION "0.4.9"
 
 #define SIL680_MMIO_BAR                5
 
@@ -195,7 +195,7 @@ static struct scsi_host_template sil680_sht = {
 };
 
 static struct ata_port_operations sil680_port_ops = {
-       .inherits       = &ata_bmdma_port_ops,
+       .inherits       = &ata_bmdma32_port_ops,
        .cable_detect   = sil680_cable_detect,
        .set_piomode    = sil680_set_piomode,
        .set_dmamode    = sil680_set_dmamode,
index ccee930f1e1225f8d2918cedd84793ac30b3c007..2590c2279fa79f685e5d8a81eeb99d216e87f2e8 100644 (file)
@@ -51,13 +51,6 @@ struct sil24_sge {
        __le32  flags;
 };
 
-/*
- * Port multiplier
- */
-struct sil24_port_multiplier {
-       __le32  diag;
-       __le32  sactive;
-};
 
 enum {
        SIL24_HOST_BAR          = 0,
index 088885ed51b9918ab6cfbd9d5f773552bd2086eb..e1c7611e9144cdcd0d6b50d0acf284da77a0a4d7 100644 (file)
@@ -64,7 +64,7 @@
 #include <linux/jiffies.h>
 #include "iphase.h"              
 #include "suni.h"                
-#define swap(x) (((x & 0xff) << 8) | ((x & 0xff00) >> 8))  
+#define swap_byte_order(x) (((x & 0xff) << 8) | ((x & 0xff00) >> 8))
 
 #define PRIV(dev) ((struct suni_priv *) dev->phy_data)
 
@@ -1306,7 +1306,7 @@ static void rx_dle_intr(struct atm_dev *dev)
           // get real pkt length  pwang_test
           trailer = (struct cpcs_trailer*)((u_char *)skb->data +
                                  skb->len - sizeof(*trailer));
-          length =  swap(trailer->length);
+         length = swap_byte_order(trailer->length);
           if ((length > iadev->rx_buf_sz) || (length > 
                               (skb->len - sizeof(struct cpcs_trailer))))
           {
@@ -2995,7 +2995,7 @@ static int ia_pkt_tx (struct atm_vcc *vcc, struct sk_buff *skb) {
                skb->len, PCI_DMA_TODEVICE);
        wr_ptr->local_pkt_addr = (buf_desc_ptr->buf_start_hi << 16) | 
                                                   buf_desc_ptr->buf_start_lo;  
-       /* wr_ptr->bytes = swap(total_len);     didn't seem to affect ?? */  
+       /* wr_ptr->bytes = swap_byte_order(total_len); didn't seem to affect?? */
        wr_ptr->bytes = skb->len;  
 
         /* hw bug - DLEs of 0x2d, 0x2e, 0x2f cause DMA lockup */
index 35914b6e1d2aee32ddf29dc63b14a6d09e8298e8..f5be8081cd8187e87a5e3d85254abefc2be1e85b 100644 (file)
@@ -616,6 +616,7 @@ config HVC_ISERIES
        default y
        select HVC_DRIVER
        select HVC_IRQ
+       select VIOPATH
        help
          iSeries machines support a hypervisor virtual console.
 
index 91cdb35a920480f384fdff9efad6f28fb572021f..0afc8b82212e78dd17d7d2b254ec099b5642cfb7 100644 (file)
@@ -44,7 +44,7 @@ static int hvc_beat_get_chars(uint32_t vtermno, char *buf, int cnt)
        static unsigned char q[sizeof(unsigned long) * 2]
                __attribute__((aligned(sizeof(unsigned long))));
        static int qlen = 0;
-       unsigned long got;
+       u64 got;
 
 again:
        if (qlen) {
@@ -63,7 +63,7 @@ again:
                }
        }
        if (beat_get_term_char(vtermno, &got,
-               ((unsigned long *)q), ((unsigned long *)q) + 1) == 0) {
+               ((u64 *)q), ((u64 *)q) + 1) == 0) {
                qlen = got;
                goto again;
        }
index 112a6ba9a96fef2b3fec186bbaddce9944df752f..146c97613da0c2719f8084228d9cbca4a742814d 100644 (file)
@@ -32,7 +32,7 @@
 
 /* These are global because they are accessed in tty_io.c */
 #ifdef CONFIG_UNIX98_PTYS
-struct tty_driver *ptm_driver;
+static struct tty_driver *ptm_driver;
 static struct tty_driver *pts_driver;
 #endif
 
index ab18c1e7b115fd098ef56599241560d67fb19597..70efba2ee05321a8960d6391b9cc742eb953dae7 100644 (file)
@@ -273,12 +273,23 @@ static void tpm_nsc_remove(struct device *dev)
        }
 }
 
-static struct device_driver nsc_drv = {
-       .name = "tpm_nsc",
-       .bus = &platform_bus_type,
-       .owner = THIS_MODULE,
-       .suspend = tpm_pm_suspend,
-       .resume = tpm_pm_resume,
+static int tpm_nsc_suspend(struct platform_device *dev, pm_message_t msg)
+{
+       return tpm_pm_suspend(&dev->dev, msg);
+}
+
+static int tpm_nsc_resume(struct platform_device *dev)
+{
+       return tpm_pm_resume(&dev->dev);
+}
+
+static struct platform_driver nsc_drv = {
+       .suspend         = tpm_nsc_suspend,
+       .resume          = tpm_nsc_resume,
+       .driver          = {
+               .name    = "tpm_nsc",
+               .owner   = THIS_MODULE,
+       },
 };
 
 static int __init init_nsc(void)
@@ -297,7 +308,7 @@ static int __init init_nsc(void)
                        return -ENODEV;
        }
 
-       err = driver_register(&nsc_drv);
+       err = platform_driver_register(&nsc_drv);
        if (err)
                return err;
 
@@ -308,17 +319,15 @@ static int __init init_nsc(void)
        /* enable the DPM module */
        tpm_write_index(nscAddrBase, NSC_LDC_INDEX, 0x01);
 
-       pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL);
+       pdev = platform_device_alloc("tpm_nscl0", -1);
        if (!pdev) {
                rc = -ENOMEM;
                goto err_unreg_drv;
        }
 
-       pdev->name = "tpm_nscl0";
-       pdev->id = -1;
        pdev->num_resources = 0;
+       pdev->dev.driver = &nsc_drv.driver;
        pdev->dev.release = tpm_nsc_remove;
-       pdev->dev.driver = &nsc_drv;
 
        if ((rc = platform_device_register(pdev)) < 0)
                goto err_free_dev;
@@ -377,7 +386,7 @@ err_unreg_dev:
 err_free_dev:
        kfree(pdev);
 err_unreg_drv:
-       driver_unregister(&nsc_drv);
+       platform_driver_unregister(&nsc_drv);
        return rc;
 }
 
@@ -390,7 +399,7 @@ static void __exit cleanup_nsc(void)
                pdev = NULL;
        }
 
-       driver_unregister(&nsc_drv);
+       platform_driver_unregister(&nsc_drv);
 }
 
 module_init(init_nsc);
index 80014213fb5397a7c7694e5b4c023b6e308bb197..7900bd63b36da74529eb76bf2923c165cf8d8a1f 100644 (file)
@@ -969,8 +969,7 @@ int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int rows)
  *     Takes the console sem and the called methods then take the tty
  *     termios_mutex and the tty ctrl_lock in that order.
  */
-
-int vt_resize(struct tty_struct *tty, struct winsize *ws)
+static int vt_resize(struct tty_struct *tty, struct winsize *ws)
 {
        struct vc_data *vc = tty->driver_data;
        int ret;
index 50a071f1c945c0c3d8079ffdf713c577315a8415..777fba48d2d3ddbf69343b9abd0c3aa8351ab9d9 100644 (file)
@@ -238,11 +238,11 @@ static ssize_t host_control_on_shutdown_store(struct device *dev,
 }
 
 /**
- * smi_request: generate SMI request
+ * dcdbas_smi_request: generate SMI request
  *
  * Called with smi_data_lock.
  */
-static int smi_request(struct smi_cmd *smi_cmd)
+int dcdbas_smi_request(struct smi_cmd *smi_cmd)
 {
        cpumask_t old_mask;
        int ret = 0;
@@ -309,14 +309,14 @@ static ssize_t smi_request_store(struct device *dev,
        switch (val) {
        case 2:
                /* Raw SMI */
-               ret = smi_request(smi_cmd);
+               ret = dcdbas_smi_request(smi_cmd);
                if (!ret)
                        ret = count;
                break;
        case 1:
                /* Calling Interface SMI */
                smi_cmd->ebx = (u32) virt_to_phys(smi_cmd->command_buffer);
-               ret = smi_request(smi_cmd);
+               ret = dcdbas_smi_request(smi_cmd);
                if (!ret)
                        ret = count;
                break;
@@ -333,6 +333,7 @@ out:
        mutex_unlock(&smi_data_lock);
        return ret;
 }
+EXPORT_SYMBOL(dcdbas_smi_request);
 
 /**
  * host_control_smi: generate host control SMI
index 87bc3417de2745c62de30ddc88c9fc8cc6e5a8e1..ca3cb0a54ab67a3f50bd22b3d579942a5a1057ae 100644 (file)
@@ -101,5 +101,7 @@ struct apm_cmd {
        } __attribute__ ((packed)) parameters;
 } __attribute__ ((packed));
 
+int dcdbas_smi_request(struct smi_cmd *smi_cmd);
+
 #endif /* _DCDBAS_H_ */
 
index 3bf8ee120d42d9e64cb64246157e46ed0b430c6f..261b9aa3f248e6c2df0c67d5492b35586cf1bb96 100644 (file)
@@ -56,9 +56,9 @@ struct memmap_attribute {
        ssize_t (*show)(struct firmware_map_entry *entry, char *buf);
 };
 
-struct memmap_attribute memmap_start_attr = __ATTR_RO(start);
-struct memmap_attribute memmap_end_attr   = __ATTR_RO(end);
-struct memmap_attribute memmap_type_attr  = __ATTR_RO(type);
+static struct memmap_attribute memmap_start_attr = __ATTR_RO(start);
+static struct memmap_attribute memmap_end_attr   = __ATTR_RO(end);
+static struct memmap_attribute memmap_type_attr  = __ATTR_RO(type);
 
 /*
  * These are default attributes that are added for every memmap entry.
index a812db24347756a2e755988e590c416bc12dab60..6ba57e91d7ab742288208649cd707a845e672572 100644 (file)
@@ -2705,7 +2705,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                        sizeof(struct ietf_mpa_frame));
 
 
-       /* notify OF layer that accept event was successfull */
+       /* notify OF layer that accept event was successful */
        cm_id->add_ref(cm_id);
 
        cm_event.event = IW_CM_EVENT_ESTABLISHED;
index a0f45c4fc198aef847dacae8d5184972f7d54e45..d297accf9a7feeb8077737c187d39d64c3f89c0b 100644 (file)
@@ -186,7 +186,7 @@ static int __devinit pxa930_trkball_probe(struct platform_device *pdev)
        error = request_irq(irq, pxa930_trkball_interrupt, IRQF_DISABLED,
                            pdev->name, trkball);
        if (error) {
-               dev_err(&pdev->dev, "failed to request irq: %d\n", ret);
+               dev_err(&pdev->dev, "failed to request irq: %d\n", error);
                goto failed_free_io;
        }
 
@@ -227,7 +227,7 @@ failed_free_io:
        iounmap(trkball->mmio_base);
 failed:
        kfree(trkball);
-       return ret;
+       return error;
 }
 
 static int __devexit pxa930_trkball_remove(struct platform_device *pdev)
index 016410cf227340e679e81a88f662e7fc8a1b14cb..8ea587783e149f886b2bc140683a884fcd3099fc 100644 (file)
@@ -235,7 +235,7 @@ typedef void ( * DbgOld) (unsigned short, char *, va_list) ;
 typedef void ( * DbgEv)  (unsigned short, unsigned long, va_list) ;
 typedef void ( * DbgIrq) (unsigned short, int, char *, va_list) ;
 typedef struct _DbgHandle_
-{ char    Registered ; /* driver successfull registered */
+{ char    Registered ; /* driver successfully registered */
 #define DBG_HANDLE_REG_NEW 0x01  /* this (new) structure    */
 #define DBG_HANDLE_REG_OLD 0x7f  /* old structure (see below)  */
  char    Version;  /* version of this structure  */
index 7b4ec3f60dbf6c61b5d0b24e584d6aa137f7a73f..c964b8d91ada568a4b6813409e2f29e852dc2495 100644 (file)
@@ -997,7 +997,7 @@ diva_4bri_start_adapter(PISDN_ADAPTER IoAdapter,
        diva_xdi_display_adapter_features(IoAdapter->ANum);
 
        for (i = 0; i < IoAdapter->tasks; i++) {
-               DBG_LOG(("A(%d) %s adapter successfull started",
+               DBG_LOG(("A(%d) %s adapter successfully started",
                         IoAdapter->QuadroList->QuadroAdapter[i]->ANum,
                         (IoAdapter->tasks == 1) ? "BRI 2.0" : "4BRI"))
                diva_xdi_didd_register_adapter(IoAdapter->QuadroList->QuadroAdapter[i]->ANum);
index f31bba5b16ffa7ae2afd25c52c6ece5b188e9a33..08f01993f46b4a91548d27f81e2047fc050d5bc9 100644 (file)
@@ -736,7 +736,7 @@ diva_bri_start_adapter(PISDN_ADAPTER IoAdapter,
 
        IoAdapter->Properties.Features = (word) features;
        diva_xdi_display_adapter_features(IoAdapter->ANum);
-       DBG_LOG(("A(%d) BRI adapter successfull started", IoAdapter->ANum))
+       DBG_LOG(("A(%d) BRI adapter successfully started", IoAdapter->ANum))
            /*
               Register with DIDD
             */
index 903356547b79c715a896253ecd0658e74f6b2c29..5d65405c75f466c3df2aa33faeb62d24dbbe1db4 100644 (file)
@@ -513,7 +513,7 @@ diva_pri_start_adapter(PISDN_ADAPTER IoAdapter,
 
        diva_xdi_display_adapter_features(IoAdapter->ANum);
 
-       DBG_LOG(("A(%d) PRI adapter successfull started", IoAdapter->ANum))
+       DBG_LOG(("A(%d) PRI adapter successfully started", IoAdapter->ANum))
        /*
           Register with DIDD
         */
index ab7c8e4a61f943c516ae5a3534e8775c33180f6b..719943763391263c7a4ab98ad0201244ce1d3d62 100644 (file)
@@ -215,7 +215,6 @@ static struct page *read_sb_page(mddev_t *mddev, long offset,
        /* choose a good rdev and read the page from there */
 
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
        sector_t target;
 
        if (!page)
@@ -223,7 +222,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset,
        if (!page)
                return ERR_PTR(-ENOMEM);
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                if (! test_bit(In_sync, &rdev->flags)
                    || test_bit(Faulty, &rdev->flags))
                        continue;
@@ -964,9 +963,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
                                 */
                                page = bitmap->sb_page;
                                offset = sizeof(bitmap_super_t);
-                               read_sb_page(bitmap->mddev, bitmap->offset,
-                                            page,
-                                            index, count);
+                               if (!file)
+                                       read_sb_page(bitmap->mddev,
+                                                    bitmap->offset,
+                                                    page,
+                                                    index, count);
                        } else if (file) {
                                page = read_page(file, index, bitmap, count);
                                offset = 0;
index f26c1f9a475b8d9cbad5f885016e50723b580cd5..86d9adf90e791857efdf674117168398b40735a1 100644 (file)
@@ -283,7 +283,6 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size)
 static int run(mddev_t *mddev)
 {
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
        int i;
 
        conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL);
@@ -296,7 +295,7 @@ static int run(mddev_t *mddev)
        }
        conf->nfaults = 0;
 
-       rdev_for_each(rdev, tmp, mddev)
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                conf->rdev = rdev;
 
        mddev->array_sectors = mddev->size * 2;
index 3b90c5c924ecc25a33ba4268d22d65b74142fece..1e3aea9eecf18009f4ff0bc2e4c474914e1bcf74 100644 (file)
@@ -105,7 +105,6 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
        int i, nb_zone, cnt;
        sector_t min_sectors;
        sector_t curr_sector;
-       struct list_head *tmp;
 
        conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t),
                        GFP_KERNEL);
@@ -115,7 +114,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
        cnt = 0;
        conf->array_sectors = 0;
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                int j = rdev->raid_disk;
                dev_info_t *disk = conf->disks + j;
 
index 1b1d32694f6fc8eddf7a8c4cccc6d836b5e88054..41e2509bf896c9356f9f9bd55f93f811120d7639 100644 (file)
@@ -214,20 +214,33 @@ static inline mddev_t *mddev_get(mddev_t *mddev)
        return mddev;
 }
 
+static void mddev_delayed_delete(struct work_struct *ws)
+{
+       mddev_t *mddev = container_of(ws, mddev_t, del_work);
+       kobject_del(&mddev->kobj);
+       kobject_put(&mddev->kobj);
+}
+
 static void mddev_put(mddev_t *mddev)
 {
        if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
                return;
-       if (!mddev->raid_disks && list_empty(&mddev->disks)) {
+       if (!mddev->raid_disks && list_empty(&mddev->disks) &&
+           !mddev->hold_active) {
                list_del(&mddev->all_mddevs);
-               spin_unlock(&all_mddevs_lock);
-               blk_cleanup_queue(mddev->queue);
-               if (mddev->sysfs_state)
-                       sysfs_put(mddev->sysfs_state);
-               mddev->sysfs_state = NULL;
-               kobject_put(&mddev->kobj);
-       } else
-               spin_unlock(&all_mddevs_lock);
+               if (mddev->gendisk) {
+                       /* we did a probe so need to clean up.
+                        * Call schedule_work inside the spinlock
+                        * so that flush_scheduled_work() after
+                        * mddev_find will succeed in waiting for the
+                        * work to be done.
+                        */
+                       INIT_WORK(&mddev->del_work, mddev_delayed_delete);
+                       schedule_work(&mddev->del_work);
+               } else
+                       kfree(mddev);
+       }
+       spin_unlock(&all_mddevs_lock);
 }
 
 static mddev_t * mddev_find(dev_t unit)
@@ -236,15 +249,50 @@ static mddev_t * mddev_find(dev_t unit)
 
  retry:
        spin_lock(&all_mddevs_lock);
-       list_for_each_entry(mddev, &all_mddevs, all_mddevs)
-               if (mddev->unit == unit) {
-                       mddev_get(mddev);
+
+       if (unit) {
+               list_for_each_entry(mddev, &all_mddevs, all_mddevs)
+                       if (mddev->unit == unit) {
+                               mddev_get(mddev);
+                               spin_unlock(&all_mddevs_lock);
+                               kfree(new);
+                               return mddev;
+                       }
+
+               if (new) {
+                       list_add(&new->all_mddevs, &all_mddevs);
                        spin_unlock(&all_mddevs_lock);
-                       kfree(new);
-                       return mddev;
+                       new->hold_active = UNTIL_IOCTL;
+                       return new;
                }
-
-       if (new) {
+       } else if (new) {
+               /* find an unused unit number */
+               static int next_minor = 512;
+               int start = next_minor;
+               int is_free = 0;
+               int dev = 0;
+               while (!is_free) {
+                       dev = MKDEV(MD_MAJOR, next_minor);
+                       next_minor++;
+                       if (next_minor > MINORMASK)
+                               next_minor = 0;
+                       if (next_minor == start) {
+                               /* Oh dear, all in use. */
+                               spin_unlock(&all_mddevs_lock);
+                               kfree(new);
+                               return NULL;
+                       }
+                               
+                       is_free = 1;
+                       list_for_each_entry(mddev, &all_mddevs, all_mddevs)
+                               if (mddev->unit == dev) {
+                                       is_free = 0;
+                                       break;
+                               }
+               }
+               new->unit = dev;
+               new->md_minor = MINOR(dev);
+               new->hold_active = UNTIL_STOP;
                list_add(&new->all_mddevs, &all_mddevs);
                spin_unlock(&all_mddevs_lock);
                return new;
@@ -275,16 +323,6 @@ static mddev_t * mddev_find(dev_t unit)
        new->resync_max = MaxSector;
        new->level = LEVEL_NONE;
 
-       new->queue = blk_alloc_queue(GFP_KERNEL);
-       if (!new->queue) {
-               kfree(new);
-               return NULL;
-       }
-       /* Can be unlocked because the queue is new: no concurrency */
-       queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue);
-
-       blk_queue_make_request(new->queue, md_fail_request);
-
        goto retry;
 }
 
@@ -307,25 +345,23 @@ static inline void mddev_unlock(mddev_t * mddev)
 
 static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
 {
-       mdk_rdev_t * rdev;
-       struct list_head *tmp;
+       mdk_rdev_t *rdev;
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                if (rdev->desc_nr == nr)
                        return rdev;
-       }
+
        return NULL;
 }
 
 static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
 {
-       struct list_head *tmp;
        mdk_rdev_t *rdev;
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                if (rdev->bdev->bd_dev == dev)
                        return rdev;
-       }
+
        return NULL;
 }
 
@@ -861,7 +897,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 {
        mdp_super_t *sb;
-       struct list_head *tmp;
        mdk_rdev_t *rdev2;
        int next_spare = mddev->raid_disks;
 
@@ -933,7 +968,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
                sb->state |= (1<<MD_SB_BITMAP_PRESENT);
 
        sb->disks[0].state = (1<<MD_DISK_REMOVED);
-       rdev_for_each(rdev2, tmp, mddev) {
+       list_for_each_entry(rdev2, &mddev->disks, same_set) {
                mdp_disk_t *d;
                int desc_nr;
                if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
@@ -1259,7 +1294,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 {
        struct mdp_superblock_1 *sb;
-       struct list_head *tmp;
        mdk_rdev_t *rdev2;
        int max_dev, i;
        /* make rdev->sb match mddev and rdev data. */
@@ -1307,7 +1341,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
        }
 
        max_dev = 0;
-       rdev_for_each(rdev2, tmp, mddev)
+       list_for_each_entry(rdev2, &mddev->disks, same_set)
                if (rdev2->desc_nr+1 > max_dev)
                        max_dev = rdev2->desc_nr+1;
 
@@ -1316,7 +1350,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
        for (i=0; i<max_dev;i++)
                sb->dev_roles[i] = cpu_to_le16(0xfffe);
        
-       rdev_for_each(rdev2, tmp, mddev) {
+       list_for_each_entry(rdev2, &mddev->disks, same_set) {
                i = rdev2->desc_nr;
                if (test_bit(Faulty, &rdev2->flags))
                        sb->dev_roles[i] = cpu_to_le16(0xfffe);
@@ -1466,6 +1500,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 
        list_add_rcu(&rdev->same_set, &mddev->disks);
        bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
+
+       /* May as well allow recovery to be retried once */
+       mddev->recovery_disabled = 0;
        return 0;
 
  fail:
@@ -1571,8 +1608,7 @@ static void kick_rdev_from_array(mdk_rdev_t * rdev)
 
 static void export_array(mddev_t *mddev)
 {
-       struct list_head *tmp;
-       mdk_rdev_t *rdev;
+       mdk_rdev_t *rdev, *tmp;
 
        rdev_for_each(rdev, tmp, mddev) {
                if (!rdev->mddev) {
@@ -1593,7 +1629,7 @@ static void print_desc(mdp_disk_t *desc)
                desc->major,desc->minor,desc->raid_disk,desc->state);
 }
 
-static void print_sb(mdp_super_t *sb)
+static void print_sb_90(mdp_super_t *sb)
 {
        int i;
 
@@ -1624,10 +1660,57 @@ static void print_sb(mdp_super_t *sb)
        }
        printk(KERN_INFO "md:     THIS: ");
        print_desc(&sb->this_disk);
-
 }
 
-static void print_rdev(mdk_rdev_t *rdev)
+static void print_sb_1(struct mdp_superblock_1 *sb)
+{
+       __u8 *uuid;
+
+       uuid = sb->set_uuid;
+       printk(KERN_INFO "md:  SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x"
+                       ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n"
+              KERN_INFO "md:    Name: \"%s\" CT:%llu\n",
+               le32_to_cpu(sb->major_version),
+               le32_to_cpu(sb->feature_map),
+               uuid[0], uuid[1], uuid[2], uuid[3],
+               uuid[4], uuid[5], uuid[6], uuid[7],
+               uuid[8], uuid[9], uuid[10], uuid[11],
+               uuid[12], uuid[13], uuid[14], uuid[15],
+               sb->set_name,
+               (unsigned long long)le64_to_cpu(sb->ctime)
+                      & MD_SUPERBLOCK_1_TIME_SEC_MASK);
+
+       uuid = sb->device_uuid;
+       printk(KERN_INFO "md:       L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
+                       " RO:%llu\n"
+              KERN_INFO "md:     Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x"
+                       ":%02x%02x%02x%02x%02x%02x\n"
+              KERN_INFO "md:       (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
+              KERN_INFO "md:         (MaxDev:%u) \n",
+               le32_to_cpu(sb->level),
+               (unsigned long long)le64_to_cpu(sb->size),
+               le32_to_cpu(sb->raid_disks),
+               le32_to_cpu(sb->layout),
+               le32_to_cpu(sb->chunksize),
+               (unsigned long long)le64_to_cpu(sb->data_offset),
+               (unsigned long long)le64_to_cpu(sb->data_size),
+               (unsigned long long)le64_to_cpu(sb->super_offset),
+               (unsigned long long)le64_to_cpu(sb->recovery_offset),
+               le32_to_cpu(sb->dev_number),
+               uuid[0], uuid[1], uuid[2], uuid[3],
+               uuid[4], uuid[5], uuid[6], uuid[7],
+               uuid[8], uuid[9], uuid[10], uuid[11],
+               uuid[12], uuid[13], uuid[14], uuid[15],
+               sb->devflags,
+               (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
+               (unsigned long long)le64_to_cpu(sb->events),
+               (unsigned long long)le64_to_cpu(sb->resync_offset),
+               le32_to_cpu(sb->sb_csum),
+               le32_to_cpu(sb->max_dev)
+               );
+}
+
+static void print_rdev(mdk_rdev_t *rdev, int major_version)
 {
        char b[BDEVNAME_SIZE];
        printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n",
@@ -1635,15 +1718,22 @@ static void print_rdev(mdk_rdev_t *rdev)
                test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
                rdev->desc_nr);
        if (rdev->sb_loaded) {
-               printk(KERN_INFO "md: rdev superblock:\n");
-               print_sb((mdp_super_t*)page_address(rdev->sb_page));
+               printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
+               switch (major_version) {
+               case 0:
+                       print_sb_90((mdp_super_t*)page_address(rdev->sb_page));
+                       break;
+               case 1:
+                       print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page));
+                       break;
+               }
        } else
                printk(KERN_INFO "md: no rdev superblock!\n");
 }
 
 static void md_print_devices(void)
 {
-       struct list_head *tmp, *tmp2;
+       struct list_head *tmp;
        mdk_rdev_t *rdev;
        mddev_t *mddev;
        char b[BDEVNAME_SIZE];
@@ -1658,12 +1748,12 @@ static void md_print_devices(void)
                        bitmap_print_sb(mddev->bitmap);
                else
                        printk("%s: ", mdname(mddev));
-               rdev_for_each(rdev, tmp2, mddev)
+               list_for_each_entry(rdev, &mddev->disks, same_set)
                        printk("<%s>", bdevname(rdev->bdev,b));
                printk("\n");
 
-               rdev_for_each(rdev, tmp2, mddev)
-                       print_rdev(rdev);
+               list_for_each_entry(rdev, &mddev->disks, same_set)
+                       print_rdev(rdev, mddev->major_version);
        }
        printk("md:     **********************************\n");
        printk("\n");
@@ -1679,9 +1769,8 @@ static void sync_sbs(mddev_t * mddev, int nospares)
         * with the rest of the array)
         */
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                if (rdev->sb_events == mddev->events ||
                    (nospares &&
                     rdev->raid_disk < 0 &&
@@ -1699,7 +1788,6 @@ static void sync_sbs(mddev_t * mddev, int nospares)
 
 static void md_update_sb(mddev_t * mddev, int force_change)
 {
-       struct list_head *tmp;
        mdk_rdev_t *rdev;
        int sync_req;
        int nospares = 0;
@@ -1790,7 +1878,7 @@ repeat:
                mdname(mddev),mddev->in_sync);
 
        bitmap_update_sb(mddev->bitmap);
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                char b[BDEVNAME_SIZE];
                dprintk(KERN_INFO "md: ");
                if (rdev->sb_loaded != 1)
@@ -1999,7 +2087,6 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
                md_wakeup_thread(rdev->mddev->thread);
        } else if (rdev->mddev->pers) {
                mdk_rdev_t *rdev2;
-               struct list_head *tmp;
                /* Activating a spare .. or possibly reactivating
                 * if we every get bitmaps working here.
                 */
@@ -2010,7 +2097,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
                if (rdev->mddev->pers->hot_add_disk == NULL)
                        return -EINVAL;
 
-               rdev_for_each(rdev2, tmp, rdev->mddev)
+               list_for_each_entry(rdev2, &rdev->mddev->disks, same_set)
                        if (rdev2->raid_disk == slot)
                                return -EEXIST;
 
@@ -2125,14 +2212,14 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
                 */
                mddev_t *mddev;
                int overlap = 0;
-               struct list_head *tmp, *tmp2;
+               struct list_head *tmp;
 
                mddev_unlock(my_mddev);
                for_each_mddev(mddev, tmp) {
                        mdk_rdev_t *rdev2;
 
                        mddev_lock(mddev);
-                       rdev_for_each(rdev2, tmp2, mddev)
+                       list_for_each_entry(rdev2, &mddev->disks, same_set)
                                if (test_bit(AllReserved, &rdev2->flags) ||
                                    (rdev->bdev == rdev2->bdev &&
                                     rdev != rdev2 &&
@@ -2328,8 +2415,7 @@ abort_free:
 static void analyze_sbs(mddev_t * mddev)
 {
        int i;
-       struct list_head *tmp;
-       mdk_rdev_t *rdev, *freshest;
+       mdk_rdev_t *rdev, *freshest, *tmp;
        char b[BDEVNAME_SIZE];
 
        freshest = NULL;
@@ -3046,7 +3132,7 @@ action_store(mddev_t *mddev, const char *page, size_t len)
        }
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
-       sysfs_notify(&mddev->kobj, NULL, "sync_action");
+       sysfs_notify_dirent(mddev->sysfs_action);
        return len;
 }
 
@@ -3404,6 +3490,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
        rv = mddev_lock(mddev);
+       if (mddev->hold_active == UNTIL_IOCTL)
+               mddev->hold_active = 0;
        if (!rv) {
                rv = entry->store(mddev, page, length);
                mddev_unlock(mddev);
@@ -3414,6 +3502,17 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
 static void md_free(struct kobject *ko)
 {
        mddev_t *mddev = container_of(ko, mddev_t, kobj);
+
+       if (mddev->sysfs_state)
+               sysfs_put(mddev->sysfs_state);
+
+       if (mddev->gendisk) {
+               del_gendisk(mddev->gendisk);
+               put_disk(mddev->gendisk);
+       }
+       if (mddev->queue)
+               blk_cleanup_queue(mddev->queue);
+
        kfree(mddev);
 }
 
@@ -3429,34 +3528,74 @@ static struct kobj_type md_ktype = {
 
 int mdp_major = 0;
 
-static struct kobject *md_probe(dev_t dev, int *part, void *data)
+static int md_alloc(dev_t dev, char *name)
 {
        static DEFINE_MUTEX(disks_mutex);
        mddev_t *mddev = mddev_find(dev);
        struct gendisk *disk;
-       int partitioned = (MAJOR(dev) != MD_MAJOR);
-       int shift = partitioned ? MdpMinorShift : 0;
-       int unit = MINOR(dev) >> shift;
+       int partitioned;
+       int shift;
+       int unit;
        int error;
 
        if (!mddev)
-               return NULL;
+               return -ENODEV;
+
+       partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
+       shift = partitioned ? MdpMinorShift : 0;
+       unit = MINOR(mddev->unit) >> shift;
+
+       /* wait for any previous instance if this device
+        * to be completed removed (mddev_delayed_delete).
+        */
+       flush_scheduled_work();
 
        mutex_lock(&disks_mutex);
        if (mddev->gendisk) {
                mutex_unlock(&disks_mutex);
                mddev_put(mddev);
-               return NULL;
+               return -EEXIST;
+       }
+
+       if (name) {
+               /* Need to ensure that 'name' is not a duplicate.
+                */
+               mddev_t *mddev2;
+               spin_lock(&all_mddevs_lock);
+
+               list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
+                       if (mddev2->gendisk &&
+                           strcmp(mddev2->gendisk->disk_name, name) == 0) {
+                               spin_unlock(&all_mddevs_lock);
+                               return -EEXIST;
+                       }
+               spin_unlock(&all_mddevs_lock);
+       }
+
+       mddev->queue = blk_alloc_queue(GFP_KERNEL);
+       if (!mddev->queue) {
+               mutex_unlock(&disks_mutex);
+               mddev_put(mddev);
+               return -ENOMEM;
        }
+       /* Can be unlocked because the queue is new: no concurrency */
+       queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
+
+       blk_queue_make_request(mddev->queue, md_fail_request);
+
        disk = alloc_disk(1 << shift);
        if (!disk) {
                mutex_unlock(&disks_mutex);
+               blk_cleanup_queue(mddev->queue);
+               mddev->queue = NULL;
                mddev_put(mddev);
-               return NULL;
+               return -ENOMEM;
        }
-       disk->major = MAJOR(dev);
+       disk->major = MAJOR(mddev->unit);
        disk->first_minor = unit << shift;
-       if (partitioned)
+       if (name)
+               strcpy(disk->disk_name, name);
+       else if (partitioned)
                sprintf(disk->disk_name, "md_d%d", unit);
        else
                sprintf(disk->disk_name, "md%d", unit);
@@ -3464,7 +3603,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
        disk->private_data = mddev;
        disk->queue = mddev->queue;
        /* Allow extended partitions.  This makes the
-        * 'mdp' device redundant, but we can really
+        * 'mdp' device redundant, but we can't really
         * remove it now.
         */
        disk->flags |= GENHD_FL_EXT_DEVT;
@@ -3480,9 +3619,35 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
                kobject_uevent(&mddev->kobj, KOBJ_ADD);
                mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
        }
+       mddev_put(mddev);
+       return 0;
+}
+
+static struct kobject *md_probe(dev_t dev, int *part, void *data)
+{
+       md_alloc(dev, NULL);
        return NULL;
 }
 
+static int add_named_array(const char *val, struct kernel_param *kp)
+{
+       /* val must be "md_*" where * is not all digits.
+        * We allocate an array with a large free minor number, and
+        * set the name to val.  val must not already be an active name.
+        */
+       int len = strlen(val);
+       char buf[DISK_NAME_LEN];
+
+       while (len && val[len-1] == '\n')
+               len--;
+       if (len >= DISK_NAME_LEN)
+               return -E2BIG;
+       strlcpy(buf, val, len+1);
+       if (strncmp(buf, "md_", 3) != 0)
+               return -EINVAL;
+       return md_alloc(0, buf);
+}
+
 static void md_safemode_timeout(unsigned long data)
 {
        mddev_t *mddev = (mddev_t *) data;
@@ -3501,7 +3666,6 @@ static int do_md_run(mddev_t * mddev)
 {
        int err;
        int chunk_size;
-       struct list_head *tmp;
        mdk_rdev_t *rdev;
        struct gendisk *disk;
        struct mdk_personality *pers;
@@ -3540,7 +3704,7 @@ static int do_md_run(mddev_t * mddev)
                }
 
                /* devices must have minimum size of one chunk */
-               rdev_for_each(rdev, tmp, mddev) {
+               list_for_each_entry(rdev, &mddev->disks, same_set) {
                        if (test_bit(Faulty, &rdev->flags))
                                continue;
                        if (rdev->size < chunk_size / 1024) {
@@ -3565,7 +3729,7 @@ static int do_md_run(mddev_t * mddev)
         * the only valid external interface is through the md
         * device.
         */
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                if (test_bit(Faulty, &rdev->flags))
                        continue;
                sync_blockdev(rdev->bdev);
@@ -3630,10 +3794,10 @@ static int do_md_run(mddev_t * mddev)
                 */
                char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
                mdk_rdev_t *rdev2;
-               struct list_head *tmp2;
                int warned = 0;
-               rdev_for_each(rdev, tmp, mddev) {
-                       rdev_for_each(rdev2, tmp2, mddev) {
+
+               list_for_each_entry(rdev, &mddev->disks, same_set)
+                       list_for_each_entry(rdev2, &mddev->disks, same_set) {
                                if (rdev < rdev2 &&
                                    rdev->bdev->bd_contains ==
                                    rdev2->bdev->bd_contains) {
@@ -3647,7 +3811,7 @@ static int do_md_run(mddev_t * mddev)
                                        warned = 1;
                                }
                        }
-               }
+
                if (warned)
                        printk(KERN_WARNING
                               "True protection against single-disk"
@@ -3684,6 +3848,7 @@ static int do_md_run(mddev_t * mddev)
                        printk(KERN_WARNING
                               "md: cannot register extra attributes for %s\n",
                               mdname(mddev));
+               mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
        } else if (mddev->ro == 2) /* auto-readonly not meaningful */
                mddev->ro = 0;
 
@@ -3694,7 +3859,7 @@ static int do_md_run(mddev_t * mddev)
        mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
        mddev->in_sync = 1;
 
-       rdev_for_each(rdev, tmp, mddev)
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                if (rdev->raid_disk >= 0) {
                        char nm[20];
                        sprintf(nm, "rd%d", rdev->raid_disk);
@@ -3725,9 +3890,8 @@ static int do_md_run(mddev_t * mddev)
         * it will remove the drives and not do the right thing
         */
        if (mddev->degraded && !mddev->sync_thread) {
-               struct list_head *rtmp;
                int spares = 0;
-               rdev_for_each(rdev, rtmp, mddev)
+               list_for_each_entry(rdev, &mddev->disks, same_set)
                        if (rdev->raid_disk >= 0 &&
                            !test_bit(In_sync, &rdev->flags) &&
                            !test_bit(Faulty, &rdev->flags))
@@ -3754,7 +3918,8 @@ static int do_md_run(mddev_t * mddev)
        mddev->changed = 1;
        md_new_event(mddev);
        sysfs_notify_dirent(mddev->sysfs_state);
-       sysfs_notify(&mddev->kobj, NULL, "sync_action");
+       if (mddev->sysfs_action)
+               sysfs_notify_dirent(mddev->sysfs_action);
        sysfs_notify(&mddev->kobj, NULL, "degraded");
        kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
        return 0;
@@ -3854,9 +4019,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
                        mddev->queue->merge_bvec_fn = NULL;
                        mddev->queue->unplug_fn = NULL;
                        mddev->queue->backing_dev_info.congested_fn = NULL;
-                       if (mddev->pers->sync_request)
+                       if (mddev->pers->sync_request) {
                                sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
-
+                               if (mddev->sysfs_action)
+                                       sysfs_put(mddev->sysfs_action);
+                               mddev->sysfs_action = NULL;
+                       }
                        module_put(mddev->pers->owner);
                        mddev->pers = NULL;
                        /* tell userspace to handle 'inactive' */
@@ -3883,7 +4051,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
         */
        if (mode == 0) {
                mdk_rdev_t *rdev;
-               struct list_head *tmp;
 
                printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
 
@@ -3895,7 +4062,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
                }
                mddev->bitmap_offset = 0;
 
-               rdev_for_each(rdev, tmp, mddev)
+               list_for_each_entry(rdev, &mddev->disks, same_set)
                        if (rdev->raid_disk >= 0) {
                                char nm[20];
                                sprintf(nm, "rd%d", rdev->raid_disk);
@@ -3941,6 +4108,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
                mddev->barriers_work = 0;
                mddev->safemode = 0;
                kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
+               if (mddev->hold_active == UNTIL_STOP)
+                       mddev->hold_active = 0;
 
        } else if (mddev->pers)
                printk(KERN_INFO "md: %s switched to read-only mode.\n",
@@ -3956,7 +4125,6 @@ out:
 static void autorun_array(mddev_t *mddev)
 {
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
        int err;
 
        if (list_empty(&mddev->disks))
@@ -3964,7 +4132,7 @@ static void autorun_array(mddev_t *mddev)
 
        printk(KERN_INFO "md: running: ");
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                char b[BDEVNAME_SIZE];
                printk("<%s>", bdevname(rdev->bdev,b));
        }
@@ -3991,8 +4159,7 @@ static void autorun_array(mddev_t *mddev)
  */
 static void autorun_devices(int part)
 {
-       struct list_head *tmp;
-       mdk_rdev_t *rdev0, *rdev;
+       mdk_rdev_t *rdev0, *rdev, *tmp;
        mddev_t *mddev;
        char b[BDEVNAME_SIZE];
 
@@ -4007,7 +4174,7 @@ static void autorun_devices(int part)
                printk(KERN_INFO "md: considering %s ...\n",
                        bdevname(rdev0->bdev,b));
                INIT_LIST_HEAD(&candidates);
-               rdev_for_each_list(rdev, tmp, pending_raid_disks)
+               rdev_for_each_list(rdev, tmp, &pending_raid_disks)
                        if (super_90_load(rdev, rdev0, 0) >= 0) {
                                printk(KERN_INFO "md:  adding %s ...\n",
                                        bdevname(rdev->bdev,b));
@@ -4053,7 +4220,7 @@ static void autorun_devices(int part)
                } else {
                        printk(KERN_INFO "md: created %s\n", mdname(mddev));
                        mddev->persistent = 1;
-                       rdev_for_each_list(rdev, tmp, candidates) {
+                       rdev_for_each_list(rdev, tmp, &candidates) {
                                list_del_init(&rdev->same_set);
                                if (bind_rdev_to_array(rdev, mddev))
                                        export_rdev(rdev);
@@ -4064,7 +4231,7 @@ static void autorun_devices(int part)
                /* on success, candidates will be empty, on error
                 * it won't...
                 */
-               rdev_for_each_list(rdev, tmp, candidates) {
+               rdev_for_each_list(rdev, tmp, &candidates) {
                        list_del_init(&rdev->same_set);
                        export_rdev(rdev);
                }
@@ -4093,10 +4260,9 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
        mdu_array_info_t info;
        int nr,working,active,failed,spare;
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
 
        nr=working=active=failed=spare=0;
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                nr++;
                if (test_bit(Faulty, &rdev->flags))
                        failed++;
@@ -4614,9 +4780,8 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
 
 static int update_size(mddev_t *mddev, sector_t num_sectors)
 {
-       mdk_rdev_t * rdev;
+       mdk_rdev_t *rdev;
        int rv;
-       struct list_head *tmp;
        int fit = (num_sectors == 0);
 
        if (mddev->pers->resize == NULL)
@@ -4638,7 +4803,7 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
                 * grow, and re-add.
                 */
                return -EBUSY;
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                sector_t avail;
                avail = rdev->size * 2;
 
@@ -5000,6 +5165,9 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
 
 done_unlock:
 abort_unlock:
+       if (mddev->hold_active == UNTIL_IOCTL &&
+           err != -EINVAL)
+               mddev->hold_active = 0;
        mddev_unlock(mddev);
 
        return err;
@@ -5016,14 +5184,25 @@ static int md_open(struct block_device *bdev, fmode_t mode)
         * Succeed if we can lock the mddev, which confirms that
         * it isn't being stopped right now.
         */
-       mddev_t *mddev = bdev->bd_disk->private_data;
+       mddev_t *mddev = mddev_find(bdev->bd_dev);
        int err;
 
+       if (mddev->gendisk != bdev->bd_disk) {
+               /* we are racing with mddev_put which is discarding this
+                * bd_disk.
+                */
+               mddev_put(mddev);
+               /* Wait until bdev->bd_disk is definitely gone */
+               flush_scheduled_work();
+               /* Then retry the open from the top */
+               return -ERESTARTSYS;
+       }
+       BUG_ON(mddev != bdev->bd_disk->private_data);
+
        if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
                goto out;
 
        err = 0;
-       mddev_get(mddev);
        atomic_inc(&mddev->openers);
        mddev_unlock(mddev);
 
@@ -5187,11 +5366,10 @@ static void status_unused(struct seq_file *seq)
 {
        int i = 0;
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
 
        seq_printf(seq, "unused devices: ");
 
-       rdev_for_each_list(rdev, tmp, pending_raid_disks) {
+       list_for_each_entry(rdev, &pending_raid_disks, same_set) {
                char b[BDEVNAME_SIZE];
                i++;
                seq_printf(seq, "%s ",
@@ -5350,7 +5528,6 @@ static int md_seq_show(struct seq_file *seq, void *v)
 {
        mddev_t *mddev = v;
        sector_t size;
-       struct list_head *tmp2;
        mdk_rdev_t *rdev;
        struct mdstat_info *mi = seq->private;
        struct bitmap *bitmap;
@@ -5387,7 +5564,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
                }
 
                size = 0;
-               rdev_for_each(rdev, tmp2, mddev) {
+               list_for_each_entry(rdev, &mddev->disks, same_set) {
                        char b[BDEVNAME_SIZE];
                        seq_printf(seq, " %s[%d]",
                                bdevname(rdev->bdev,b), rdev->desc_nr);
@@ -5694,7 +5871,6 @@ void md_do_sync(mddev_t *mddev)
        struct list_head *tmp;
        sector_t last_check;
        int skipped = 0;
-       struct list_head *rtmp;
        mdk_rdev_t *rdev;
        char *desc;
 
@@ -5799,7 +5975,7 @@ void md_do_sync(mddev_t *mddev)
                /* recovery follows the physical size of devices */
                max_sectors = mddev->size << 1;
                j = MaxSector;
-               rdev_for_each(rdev, rtmp, mddev)
+               list_for_each_entry(rdev, &mddev->disks, same_set)
                        if (rdev->raid_disk >= 0 &&
                            !test_bit(Faulty, &rdev->flags) &&
                            !test_bit(In_sync, &rdev->flags) &&
@@ -5949,7 +6125,7 @@ void md_do_sync(mddev_t *mddev)
                } else {
                        if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
                                mddev->curr_resync = MaxSector;
-                       rdev_for_each(rdev, rtmp, mddev)
+                       list_for_each_entry(rdev, &mddev->disks, same_set)
                                if (rdev->raid_disk >= 0 &&
                                    !test_bit(Faulty, &rdev->flags) &&
                                    !test_bit(In_sync, &rdev->flags) &&
@@ -5985,10 +6161,9 @@ EXPORT_SYMBOL_GPL(md_do_sync);
 static int remove_and_add_spares(mddev_t *mddev)
 {
        mdk_rdev_t *rdev;
-       struct list_head *rtmp;
        int spares = 0;
 
-       rdev_for_each(rdev, rtmp, mddev)
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                if (rdev->raid_disk >= 0 &&
                    !test_bit(Blocked, &rdev->flags) &&
                    (test_bit(Faulty, &rdev->flags) ||
@@ -6003,8 +6178,8 @@ static int remove_and_add_spares(mddev_t *mddev)
                        }
                }
 
-       if (mddev->degraded && ! mddev->ro) {
-               rdev_for_each(rdev, rtmp, mddev) {
+       if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) {
+               list_for_each_entry(rdev, &mddev->disks, same_set) {
                        if (rdev->raid_disk >= 0 &&
                            !test_bit(In_sync, &rdev->flags) &&
                            !test_bit(Blocked, &rdev->flags))
@@ -6056,7 +6231,6 @@ static int remove_and_add_spares(mddev_t *mddev)
 void md_check_recovery(mddev_t *mddev)
 {
        mdk_rdev_t *rdev;
-       struct list_head *rtmp;
 
 
        if (mddev->bitmap)
@@ -6120,7 +6294,7 @@ void md_check_recovery(mddev_t *mddev)
                if (mddev->flags)
                        md_update_sb(mddev, 0);
 
-               rdev_for_each(rdev, rtmp, mddev)
+               list_for_each_entry(rdev, &mddev->disks, same_set)
                        if (test_and_clear_bit(StateChanged, &rdev->flags))
                                sysfs_notify_dirent(rdev->sysfs_state);
 
@@ -6149,13 +6323,13 @@ void md_check_recovery(mddev_t *mddev)
                         * information must be scrapped
                         */
                        if (!mddev->degraded)
-                               rdev_for_each(rdev, rtmp, mddev)
+                               list_for_each_entry(rdev, &mddev->disks, same_set)
                                        rdev->saved_raid_disk = -1;
 
                        mddev->recovery = 0;
                        /* flag recovery needed just to double check */
                        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-                       sysfs_notify(&mddev->kobj, NULL, "sync_action");
+                       sysfs_notify_dirent(mddev->sysfs_action);
                        md_new_event(mddev);
                        goto unlock;
                }
@@ -6216,7 +6390,7 @@ void md_check_recovery(mddev_t *mddev)
                                mddev->recovery = 0;
                        } else
                                md_wakeup_thread(mddev->sync_thread);
-                       sysfs_notify(&mddev->kobj, NULL, "sync_action");
+                       sysfs_notify_dirent(mddev->sysfs_action);
                        md_new_event(mddev);
                }
        unlock:
@@ -6224,7 +6398,8 @@ void md_check_recovery(mddev_t *mddev)
                        clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
                        if (test_and_clear_bit(MD_RECOVERY_RECOVER,
                                               &mddev->recovery))
-                               sysfs_notify(&mddev->kobj, NULL, "sync_action");
+                               if (mddev->sysfs_action)
+                                       sysfs_notify_dirent(mddev->sysfs_action);
                }
                mddev_unlock(mddev);
        }
@@ -6386,14 +6561,8 @@ static __exit void md_exit(void)
        unregister_sysctl_table(raid_table_header);
        remove_proc_entry("mdstat", NULL);
        for_each_mddev(mddev, tmp) {
-               struct gendisk *disk = mddev->gendisk;
-               if (!disk)
-                       continue;
                export_array(mddev);
-               del_gendisk(disk);
-               put_disk(disk);
-               mddev->gendisk = NULL;
-               mddev_put(mddev);
+               mddev->hold_active = 0;
        }
 }
 
@@ -6418,6 +6587,7 @@ static int set_ro(const char *val, struct kernel_param *kp)
 module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
 module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
 
+module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
 
 EXPORT_SYMBOL(register_md_personality);
 EXPORT_SYMBOL(unregister_md_personality);
index d4ac47d112797f4f3343420175b4312397337de2..f6d08f2416716f7207fe49aba23d154a5eb41fae 100644 (file)
@@ -408,7 +408,6 @@ static int multipath_run (mddev_t *mddev)
        int disk_idx;
        struct multipath_info *disk;
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
 
        if (mddev->level != LEVEL_MULTIPATH) {
                printk("multipath: %s: raid level not set to multipath IO (%d)\n",
@@ -441,7 +440,7 @@ static int multipath_run (mddev_t *mddev)
        }
 
        conf->working_disks = 0;
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                disk_idx = rdev->raid_disk;
                if (disk_idx < 0 ||
                    disk_idx >= mddev->raid_disks)
index 8ac6488ad0dc6c1a86463c4e732c1205a6cbb9a3..c605ba8055863d2d0ede52e9fe7e9c4e374bd9d3 100644 (file)
@@ -53,11 +53,10 @@ static int raid0_congested(void *data, int bits)
 static int create_strip_zones (mddev_t *mddev)
 {
        int i, c, j;
-       sector_t current_offset, curr_zone_offset;
+       sector_t current_start, curr_zone_start;
        sector_t min_spacing;
        raid0_conf_t *conf = mddev_to_conf(mddev);
        mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev;
-       struct list_head *tmp1, *tmp2;
        struct strip_zone *zone;
        int cnt;
        char b[BDEVNAME_SIZE];
@@ -67,19 +66,19 @@ static int create_strip_zones (mddev_t *mddev)
         */
        conf->nr_strip_zones = 0;
  
-       rdev_for_each(rdev1, tmp1, mddev) {
-               printk("raid0: looking at %s\n",
+       list_for_each_entry(rdev1, &mddev->disks, same_set) {
+               printk(KERN_INFO "raid0: looking at %s\n",
                        bdevname(rdev1->bdev,b));
                c = 0;
-               rdev_for_each(rdev2, tmp2, mddev) {
-                       printk("raid0:   comparing %s(%llu)",
+               list_for_each_entry(rdev2, &mddev->disks, same_set) {
+                       printk(KERN_INFO "raid0:   comparing %s(%llu)",
                               bdevname(rdev1->bdev,b),
                               (unsigned long long)rdev1->size);
-                       printk(" with %s(%llu)\n",
+                       printk(KERN_INFO " with %s(%llu)\n",
                               bdevname(rdev2->bdev,b),
                               (unsigned long long)rdev2->size);
                        if (rdev2 == rdev1) {
-                               printk("raid0:   END\n");
+                               printk(KERN_INFO "raid0:   END\n");
                                break;
                        }
                        if (rdev2->size == rdev1->size)
@@ -88,19 +87,20 @@ static int create_strip_zones (mddev_t *mddev)
                                 * Not unique, don't count it as a new
                                 * group
                                 */
-                               printk("raid0:   EQUAL\n");
+                               printk(KERN_INFO "raid0:   EQUAL\n");
                                c = 1;
                                break;
                        }
-                       printk("raid0:   NOT EQUAL\n");
+                       printk(KERN_INFO "raid0:   NOT EQUAL\n");
                }
                if (!c) {
-                       printk("raid0:   ==> UNIQUE\n");
+                       printk(KERN_INFO "raid0:   ==> UNIQUE\n");
                        conf->nr_strip_zones++;
-                       printk("raid0: %d zones\n", conf->nr_strip_zones);
+                       printk(KERN_INFO "raid0: %d zones\n",
+                               conf->nr_strip_zones);
                }
        }
-       printk("raid0: FINAL %d zones\n", conf->nr_strip_zones);
+       printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones);
 
        conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
                                conf->nr_strip_zones, GFP_KERNEL);
@@ -119,16 +119,17 @@ static int create_strip_zones (mddev_t *mddev)
        cnt = 0;
        smallest = NULL;
        zone->dev = conf->devlist;
-       rdev_for_each(rdev1, tmp1, mddev) {
+       list_for_each_entry(rdev1, &mddev->disks, same_set) {
                int j = rdev1->raid_disk;
 
                if (j < 0 || j >= mddev->raid_disks) {
-                       printk("raid0: bad disk number %d - aborting!\n", j);
+                       printk(KERN_ERR "raid0: bad disk number %d - "
+                               "aborting!\n", j);
                        goto abort;
                }
                if (zone->dev[j]) {
-                       printk("raid0: multiple devices for %d - aborting!\n",
-                               j);
+                       printk(KERN_ERR "raid0: multiple devices for %d - "
+                               "aborting!\n", j);
                        goto abort;
                }
                zone->dev[j] = rdev1;
@@ -149,16 +150,16 @@ static int create_strip_zones (mddev_t *mddev)
                cnt++;
        }
        if (cnt != mddev->raid_disks) {
-               printk("raid0: too few disks (%d of %d) - aborting!\n",
-                       cnt, mddev->raid_disks);
+               printk(KERN_ERR "raid0: too few disks (%d of %d) - "
+                       "aborting!\n", cnt, mddev->raid_disks);
                goto abort;
        }
        zone->nb_dev = cnt;
-       zone->size = smallest->size * cnt;
-       zone->zone_offset = 0;
+       zone->sectors = smallest->size * cnt * 2;
+       zone->zone_start = 0;
 
-       current_offset = smallest->size;
-       curr_zone_offset = zone->size;
+       current_start = smallest->size * 2;
+       curr_zone_start = zone->sectors;
 
        /* now do the other zones */
        for (i = 1; i < conf->nr_strip_zones; i++)
@@ -166,40 +167,41 @@ static int create_strip_zones (mddev_t *mddev)
                zone = conf->strip_zone + i;
                zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks;
 
-               printk("raid0: zone %d\n", i);
-               zone->dev_offset = current_offset;
+               printk(KERN_INFO "raid0: zone %d\n", i);
+               zone->dev_start = current_start;
                smallest = NULL;
                c = 0;
 
                for (j=0; j<cnt; j++) {
                        char b[BDEVNAME_SIZE];
                        rdev = conf->strip_zone[0].dev[j];
-                       printk("raid0: checking %s ...", bdevname(rdev->bdev,b));
-                       if (rdev->size > current_offset)
-                       {
-                               printk(" contained as device %d\n", c);
+                       printk(KERN_INFO "raid0: checking %s ...",
+                               bdevname(rdev->bdev, b));
+                       if (rdev->size > current_start / 2) {
+                               printk(KERN_INFO " contained as device %d\n",
+                                       c);
                                zone->dev[c] = rdev;
                                c++;
                                if (!smallest || (rdev->size <smallest->size)) {
                                        smallest = rdev;
-                                       printk("  (%llu) is smallest!.\n", 
+                                       printk(KERN_INFO "  (%llu) is smallest!.\n",
                                                (unsigned long long)rdev->size);
                                }
                        } else
-                               printk(" nope.\n");
+                               printk(KERN_INFO " nope.\n");
                }
 
                zone->nb_dev = c;
-               zone->size = (smallest->size - current_offset) * c;
-               printk("raid0: zone->nb_dev: %d, size: %llu\n",
-                       zone->nb_dev, (unsigned long long)zone->size);
+               zone->sectors = (smallest->size * 2 - current_start) * c;
+               printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n",
+                       zone->nb_dev, (unsigned long long)zone->sectors);
 
-               zone->zone_offset = curr_zone_offset;
-               curr_zone_offset += zone->size;
+               zone->zone_start = curr_zone_start;
+               curr_zone_start += zone->sectors;
 
-               current_offset = smallest->size;
-               printk("raid0: current zone offset: %llu\n",
-                       (unsigned long long)current_offset);
+               current_start = smallest->size * 2;
+               printk(KERN_INFO "raid0: current zone start: %llu\n",
+                       (unsigned long long)current_start);
        }
 
        /* Now find appropriate hash spacing.
@@ -210,16 +212,16 @@ static int create_strip_zones (mddev_t *mddev)
         * strip though as it's size has no bearing on the efficacy of the hash
         * table.
         */
-       conf->hash_spacing = curr_zone_offset;
-       min_spacing = curr_zone_offset;
+       conf->spacing = curr_zone_start;
+       min_spacing = curr_zone_start;
        sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*));
        for (i=0; i < conf->nr_strip_zones-1; i++) {
-               sector_t sz = 0;
-               for (j=i; j<conf->nr_strip_zones-1 &&
-                            sz < min_spacing ; j++)
-                       sz += conf->strip_zone[j].size;
-               if (sz >= min_spacing && sz < conf->hash_spacing)
-                       conf->hash_spacing = sz;
+               sector_t s = 0;
+               for (j = i; j < conf->nr_strip_zones - 1 &&
+                               s < min_spacing; j++)
+                       s += conf->strip_zone[j].sectors;
+               if (s >= min_spacing && s < conf->spacing)
+                       conf->spacing = s;
        }
 
        mddev->queue->unplug_fn = raid0_unplug;
@@ -227,7 +229,7 @@ static int create_strip_zones (mddev_t *mddev)
        mddev->queue->backing_dev_info.congested_fn = raid0_congested;
        mddev->queue->backing_dev_info.congested_data = mddev;
 
-       printk("raid0: done.\n");
+       printk(KERN_INFO "raid0: done.\n");
        return 0;
  abort:
        return 1;
@@ -262,10 +264,9 @@ static int raid0_mergeable_bvec(struct request_queue *q,
 static int raid0_run (mddev_t *mddev)
 {
        unsigned  cur=0, i=0, nb_zone;
-       s64 size;
+       s64 sectors;
        raid0_conf_t *conf;
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
 
        if (mddev->chunk_size == 0) {
                printk(KERN_ERR "md/raid0: non-zero chunk size required.\n");
@@ -291,54 +292,54 @@ static int raid0_run (mddev_t *mddev)
 
        /* calculate array device size */
        mddev->array_sectors = 0;
-       rdev_for_each(rdev, tmp, mddev)
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                mddev->array_sectors += rdev->size * 2;
 
-       printk("raid0 : md_size is %llu blocks.\n", 
-               (unsigned long long)mddev->array_sectors / 2);
-       printk("raid0 : conf->hash_spacing is %llu blocks.\n",
-               (unsigned long long)conf->hash_spacing);
+       printk(KERN_INFO "raid0 : md_size is %llu sectors.\n",
+               (unsigned long long)mddev->array_sectors);
+       printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n",
+               (unsigned long long)conf->spacing);
        {
-               sector_t s = mddev->array_sectors / 2;
-               sector_t space = conf->hash_spacing;
+               sector_t s = mddev->array_sectors;
+               sector_t space = conf->spacing;
                int round;
-               conf->preshift = 0;
+               conf->sector_shift = 0;
                if (sizeof(sector_t) > sizeof(u32)) {
                        /*shift down space and s so that sector_div will work */
                        while (space > (sector_t) (~(u32)0)) {
                                s >>= 1;
                                space >>= 1;
                                s += 1; /* force round-up */
-                               conf->preshift++;
+                               conf->sector_shift++;
                        }
                }
                round = sector_div(s, (u32)space) ? 1 : 0;
                nb_zone = s + round;
        }
-       printk("raid0 : nb_zone is %d.\n", nb_zone);
+       printk(KERN_INFO "raid0 : nb_zone is %d.\n", nb_zone);
 
-       printk("raid0 : Allocating %Zd bytes for hash.\n",
+       printk(KERN_INFO "raid0 : Allocating %zu bytes for hash.\n",
                                nb_zone*sizeof(struct strip_zone*));
        conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL);
        if (!conf->hash_table)
                goto out_free_conf;
-       size = conf->strip_zone[cur].size;
+       sectors = conf->strip_zone[cur].sectors;
 
        conf->hash_table[0] = conf->strip_zone + cur;
        for (i=1; i< nb_zone; i++) {
-               while (size <= conf->hash_spacing) {
+               while (sectors <= conf->spacing) {
                        cur++;
-                       size += conf->strip_zone[cur].size;
+                       sectors += conf->strip_zone[cur].sectors;
                }
-               size -= conf->hash_spacing;
+               sectors -= conf->spacing;
                conf->hash_table[i] = conf->strip_zone + cur;
        }
-       if (conf->preshift) {
-               conf->hash_spacing >>= conf->preshift;
-               /* round hash_spacing up so when we divide by it, we
+       if (conf->sector_shift) {
+               conf->spacing >>= conf->sector_shift;
+               /* round spacing up so when we divide by it, we
                 * err on the side of too-low, which is safest
                 */
-               conf->hash_spacing++;
+               conf->spacing++;
        }
 
        /* calculate the max read-ahead size.
@@ -387,12 +388,12 @@ static int raid0_stop (mddev_t *mddev)
 static int raid0_make_request (struct request_queue *q, struct bio *bio)
 {
        mddev_t *mddev = q->queuedata;
-       unsigned int sect_in_chunk, chunksize_bits,  chunk_size, chunk_sects;
+       unsigned int sect_in_chunk, chunksect_bits, chunk_sects;
        raid0_conf_t *conf = mddev_to_conf(mddev);
        struct strip_zone *zone;
        mdk_rdev_t *tmp_dev;
        sector_t chunk;
-       sector_t block, rsect;
+       sector_t sector, rsect;
        const int rw = bio_data_dir(bio);
        int cpu;
 
@@ -407,11 +408,9 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
                      bio_sectors(bio));
        part_stat_unlock();
 
-       chunk_size = mddev->chunk_size >> 10;
        chunk_sects = mddev->chunk_size >> 9;
-       chunksize_bits = ffz(~chunk_size);
-       block = bio->bi_sector >> 1;
-       
+       chunksect_bits = ffz(~chunk_sects);
+       sector = bio->bi_sector;
 
        if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) {
                struct bio_pair *bp;
@@ -434,28 +433,27 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
  
 
        {
-               sector_t x = block >> conf->preshift;
-               sector_div(x, (u32)conf->hash_spacing);
+               sector_t x = sector >> conf->sector_shift;
+               sector_div(x, (u32)conf->spacing);
                zone = conf->hash_table[x];
        }
-       while (block >= (zone->zone_offset + zone->size)) 
+
+       while (sector >= zone->zone_start + zone->sectors)
                zone++;
-    
-       sect_in_chunk = bio->bi_sector & ((chunk_size<<1) -1);
+
+       sect_in_chunk = bio->bi_sector & (chunk_sects - 1);
 
 
        {
-               sector_t x =  (block - zone->zone_offset) >> chunksize_bits;
+               sector_t x = (sector - zone->zone_start) >> chunksect_bits;
 
                sector_div(x, zone->nb_dev);
                chunk = x;
 
-               x = block >> chunksize_bits;
+               x = sector >> chunksect_bits;
                tmp_dev = zone->dev[sector_div(x, zone->nb_dev)];
        }
-       rsect = (((chunk << chunksize_bits) + zone->dev_offset)<<1)
-               + sect_in_chunk;
+       rsect = (chunk << chunksect_bits) + zone->dev_start + sect_in_chunk;
  
        bio->bi_bdev = tmp_dev->bdev;
        bio->bi_sector = rsect + tmp_dev->data_offset;
@@ -467,7 +465,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
 
 bad_map:
        printk("raid0_make_request bug: can't convert block across chunks"
-               " or bigger than %dk %llu %d\n", chunk_size, 
+               " or bigger than %dk %llu %d\n", chunk_sects / 2,
                (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
 
        bio_io_error(bio);
@@ -492,10 +490,10 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev)
                        seq_printf(seq, "%s/", bdevname(
                                conf->strip_zone[j].dev[k]->bdev,b));
 
-               seq_printf(seq, "] zo=%d do=%d s=%d\n",
-                               conf->strip_zone[j].zone_offset,
-                               conf->strip_zone[j].dev_offset,
-                               conf->strip_zone[j].size);
+               seq_printf(seq, "] zs=%d ds=%d s=%d\n",
+                               conf->strip_zone[j].zone_start,
+                               conf->strip_zone[j].dev_start,
+                               conf->strip_zone[j].sectors);
        }
 #endif
        seq_printf(seq, " %dk chunks", mddev->chunk_size/1024);
index 9c788e2489b18934eadb62cac4d4b33b3c121620..7b4f5f7155d8726705c1f42757f68dd1de869624 100644 (file)
@@ -1016,12 +1016,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
         * else mark the drive as failed
         */
        if (test_bit(In_sync, &rdev->flags)
-           && (conf->raid_disks - mddev->degraded) == 1)
+           && (conf->raid_disks - mddev->degraded) == 1) {
                /*
                 * Don't fail the drive, act as though we were just a
-                * normal single drive
+                * normal single drive.
+                * However don't try a recovery from this drive as
+                * it is very likely to fail.
                 */
+               mddev->recovery_disabled = 1;
                return;
+       }
        if (test_and_clear_bit(In_sync, &rdev->flags)) {
                unsigned long flags;
                spin_lock_irqsave(&conf->device_lock, flags);
@@ -1919,7 +1923,6 @@ static int run(mddev_t *mddev)
        int i, j, disk_idx;
        mirror_info_t *disk;
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
 
        if (mddev->level != 1) {
                printk("raid1: %s: raid level not set to mirroring (%d)\n",
@@ -1964,7 +1967,7 @@ static int run(mddev_t *mddev)
        spin_lock_init(&conf->device_lock);
        mddev->queue->queue_lock = &conf->device_lock;
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                disk_idx = rdev->raid_disk;
                if (disk_idx >= mddev->raid_disks
                    || disk_idx < 0)
index 970a96ef9b1841badb7b43657d46263e67a9e722..6736d6dff981c8a1a5c12e773c3d788410a0f191 100644 (file)
@@ -2025,7 +2025,6 @@ static int run(mddev_t *mddev)
        int i, disk_idx;
        mirror_info_t *disk;
        mdk_rdev_t *rdev;
-       struct list_head *tmp;
        int nc, fc, fo;
        sector_t stride, size;
 
@@ -2108,7 +2107,7 @@ static int run(mddev_t *mddev)
        spin_lock_init(&conf->device_lock);
        mddev->queue->queue_lock = &conf->device_lock;
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                disk_idx = rdev->raid_disk;
                if (disk_idx >= mddev->raid_disks
                    || disk_idx < 0)
index a36a7435edf51bd29cc32681cb8b211b29a9f8cb..a5ba080d303b93bb3a4d764ea621f4bf9bc191d0 100644 (file)
@@ -3998,7 +3998,6 @@ static int run(mddev_t *mddev)
        int raid_disk, memory;
        mdk_rdev_t *rdev;
        struct disk_info *disk;
-       struct list_head *tmp;
        int working_disks = 0;
 
        if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) {
@@ -4108,7 +4107,7 @@ static int run(mddev_t *mddev)
 
        pr_debug("raid5: run(%s) called.\n", mdname(mddev));
 
-       rdev_for_each(rdev, tmp, mddev) {
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
                raid_disk = rdev->raid_disk;
                if (raid_disk >= conf->raid_disks
                    || raid_disk < 0)
@@ -4533,7 +4532,6 @@ static int raid5_start_reshape(mddev_t *mddev)
 {
        raid5_conf_t *conf = mddev_to_conf(mddev);
        mdk_rdev_t *rdev;
-       struct list_head *rtmp;
        int spares = 0;
        int added_devices = 0;
        unsigned long flags;
@@ -4541,7 +4539,7 @@ static int raid5_start_reshape(mddev_t *mddev)
        if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                return -EBUSY;
 
-       rdev_for_each(rdev, rtmp, mddev)
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                if (rdev->raid_disk < 0 &&
                    !test_bit(Faulty, &rdev->flags))
                        spares++;
@@ -4563,7 +4561,7 @@ static int raid5_start_reshape(mddev_t *mddev)
        /* Add some new drives, as many as will fit.
         * We know there are enough to make the newly sized array work.
         */
-       rdev_for_each(rdev, rtmp, mddev)
+       list_for_each_entry(rdev, &mddev->disks, same_set)
                if (rdev->raid_disk < 0 &&
                    !test_bit(Faulty, &rdev->flags)) {
                        if (raid5_add_disk(mddev, rdev) == 0) {
index fee7304102af8158ca3882bccb854f0f6fcb2bbe..3949a1c7345136048fac097a8e90eddf24e476d9 100644 (file)
@@ -498,6 +498,18 @@ config SGI_GRU_DEBUG
        This option enables addition debugging code for the SGI GRU driver. If
        you are unsure, say N.
 
+config DELL_LAPTOP
+       tristate "Dell Laptop Extras (EXPERIMENTAL)"
+       depends on X86
+       depends on DCDBAS
+       depends on EXPERIMENTAL
+       depends on BACKLIGHT_CLASS_DEVICE
+       depends on RFKILL
+       default n
+       ---help---
+       This driver adds support for rfkill and backlight control to Dell
+       laptops.
+
 source "drivers/misc/c2port/Kconfig"
 
 endif # MISC_DEVICES
index 817f7f5ab3bd2c21476033c195dbba93bf58d05b..5de863a0e3956dae444215549e770bdb9a2049bc 100644 (file)
@@ -18,6 +18,7 @@ obj-$(CONFIG_ICS932S401)      += ics932s401.o
 obj-$(CONFIG_TC1100_WMI)       += tc1100-wmi.o
 obj-$(CONFIG_LKDTM)            += lkdtm.o
 obj-$(CONFIG_TIFM_CORE)        += tifm_core.o
+obj-$(CONFIG_DELL_LAPTOP)      += dell-laptop.o
 obj-$(CONFIG_TIFM_7XX1)        += tifm_7xx1.o
 obj-$(CONFIG_PHANTOM)          += phantom.o
 obj-$(CONFIG_SGI_IOC4)         += ioc4.o
diff --git a/drivers/misc/dell-laptop.c b/drivers/misc/dell-laptop.c
new file mode 100644 (file)
index 0000000..4d33a20
--- /dev/null
@@ -0,0 +1,436 @@
+/*
+ *  Driver for Dell laptop extras
+ *
+ *  Copyright (c) Red Hat <mjg@redhat.com>
+ *
+ *  Based on documentation in the libsmbios package, Copyright (C) 2005 Dell
+ *  Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/backlight.h>
+#include <linux/err.h>
+#include <linux/dmi.h>
+#include <linux/io.h>
+#include <linux/rfkill.h>
+#include <linux/power_supply.h>
+#include <linux/acpi.h>
+#include "../firmware/dcdbas.h"
+
+#define BRIGHTNESS_TOKEN 0x7d
+
+/* This structure will be modified by the firmware when we enter
+ * system management mode, hence the volatiles */
+
+struct calling_interface_buffer {
+       u16 class;
+       u16 select;
+       volatile u32 input[4];
+       volatile u32 output[4];
+} __packed;
+
+struct calling_interface_token {
+       u16 tokenID;
+       u16 location;
+       union {
+               u16 value;
+               u16 stringlength;
+       };
+};
+
+struct calling_interface_structure {
+       struct dmi_header header;
+       u16 cmdIOAddress;
+       u8 cmdIOCode;
+       u32 supportedCmds;
+       struct calling_interface_token tokens[];
+} __packed;
+
+static int da_command_address;
+static int da_command_code;
+static int da_num_tokens;
+static struct calling_interface_token *da_tokens;
+
+static struct backlight_device *dell_backlight_device;
+static struct rfkill *wifi_rfkill;
+static struct rfkill *bluetooth_rfkill;
+static struct rfkill *wwan_rfkill;
+
+static const struct dmi_system_id __initdata dell_device_table[] = {
+       {
+               .ident = "Dell laptop",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_CHASSIS_TYPE, "8"),
+               },
+       },
+       { }
+};
+
+static void parse_da_table(const struct dmi_header *dm)
+{
+       /* Final token is a terminator, so we don't want to copy it */
+       int tokens = (dm->length-11)/sizeof(struct calling_interface_token)-1;
+       struct calling_interface_structure *table =
+               container_of(dm, struct calling_interface_structure, header);
+
+       /* 4 bytes of table header, plus 7 bytes of Dell header, plus at least
+          6 bytes of entry */
+
+       if (dm->length < 17)
+               return;
+
+       da_command_address = table->cmdIOAddress;
+       da_command_code = table->cmdIOCode;
+
+       da_tokens = krealloc(da_tokens, (da_num_tokens + tokens) *
+                            sizeof(struct calling_interface_token),
+                            GFP_KERNEL);
+
+       if (!da_tokens)
+               return;
+
+       memcpy(da_tokens+da_num_tokens, table->tokens,
+              sizeof(struct calling_interface_token) * tokens);
+
+       da_num_tokens += tokens;
+}
+
+static void find_tokens(const struct dmi_header *dm)
+{
+       switch (dm->type) {
+       case 0xd4: /* Indexed IO */
+               break;
+       case 0xd5: /* Protected Area Type 1 */
+               break;
+       case 0xd6: /* Protected Area Type 2 */
+               break;
+       case 0xda: /* Calling interface */
+               parse_da_table(dm);
+               break;
+       }
+}
+
+static int find_token_location(int tokenid)
+{
+       int i;
+       for (i = 0; i < da_num_tokens; i++) {
+               if (da_tokens[i].tokenID == tokenid)
+                       return da_tokens[i].location;
+       }
+
+       return -1;
+}
+
+static struct calling_interface_buffer *
+dell_send_request(struct calling_interface_buffer *buffer, int class,
+                 int select)
+{
+       struct smi_cmd command;
+
+       command.magic = SMI_CMD_MAGIC;
+       command.command_address = da_command_address;
+       command.command_code = da_command_code;
+       command.ebx = virt_to_phys(buffer);
+       command.ecx = 0x42534931;
+
+       buffer->class = class;
+       buffer->select = select;
+
+       dcdbas_smi_request(&command);
+
+       return buffer;
+}
+
+/* Derived from information in DellWirelessCtl.cpp:
+   Class 17, select 11 is radio control. It returns an array of 32-bit values.
+
+   result[0]: return code
+   result[1]:
+     Bit 0:      Hardware switch supported
+     Bit 1:      Wifi locator supported
+     Bit 2:      Wifi is supported
+     Bit 3:      Bluetooth is supported
+     Bit 4:      WWAN is supported
+     Bit 5:      Wireless keyboard supported
+     Bits 6-7:   Reserved
+     Bit 8:      Wifi is installed
+     Bit 9:      Bluetooth is installed
+     Bit 10:     WWAN is installed
+     Bits 11-15: Reserved
+     Bit 16:     Hardware switch is on
+     Bit 17:     Wifi is blocked
+     Bit 18:     Bluetooth is blocked
+     Bit 19:     WWAN is blocked
+     Bits 20-31: Reserved
+   result[2]: NVRAM size in bytes
+   result[3]: NVRAM format version number
+*/
+
+static int dell_rfkill_set(int radio, enum rfkill_state state)
+{
+       struct calling_interface_buffer buffer;
+       int disable = (state == RFKILL_STATE_UNBLOCKED) ? 0 : 1;
+
+       memset(&buffer, 0, sizeof(struct calling_interface_buffer));
+       buffer.input[0] = (1 | (radio<<8) | (disable << 16));
+       dell_send_request(&buffer, 17, 11);
+
+       return 0;
+}
+
+static int dell_wifi_set(void *data, enum rfkill_state state)
+{
+       return dell_rfkill_set(1, state);
+}
+
+static int dell_bluetooth_set(void *data, enum rfkill_state state)
+{
+       return dell_rfkill_set(2, state);
+}
+
+static int dell_wwan_set(void *data, enum rfkill_state state)
+{
+       return dell_rfkill_set(3, state);
+}
+
+static int dell_rfkill_get(int bit, enum rfkill_state *state)
+{
+       struct calling_interface_buffer buffer;
+       int status;
+       int new_state = RFKILL_STATE_HARD_BLOCKED;
+
+       memset(&buffer, 0, sizeof(struct calling_interface_buffer));
+       dell_send_request(&buffer, 17, 11);
+       status = buffer.output[1];
+
+       if (status & (1<<16))
+               new_state = RFKILL_STATE_SOFT_BLOCKED;
+
+       if (status & (1<<bit))
+               *state = new_state;
+       else
+               *state = RFKILL_STATE_UNBLOCKED;
+
+       return 0;
+}
+
+static int dell_wifi_get(void *data, enum rfkill_state *state)
+{
+       return dell_rfkill_get(17, state);
+}
+
+static int dell_bluetooth_get(void *data, enum rfkill_state *state)
+{
+       return dell_rfkill_get(18, state);
+}
+
+static int dell_wwan_get(void *data, enum rfkill_state *state)
+{
+       return dell_rfkill_get(19, state);
+}
+
+static int dell_setup_rfkill(void)
+{
+       struct calling_interface_buffer buffer;
+       int status;
+       int ret;
+
+       memset(&buffer, 0, sizeof(struct calling_interface_buffer));
+       dell_send_request(&buffer, 17, 11);
+       status = buffer.output[1];
+
+       if ((status & (1<<2|1<<8)) == (1<<2|1<<8)) {
+               wifi_rfkill = rfkill_allocate(NULL, RFKILL_TYPE_WLAN);
+               if (!wifi_rfkill)
+                       goto err_wifi;
+               wifi_rfkill->name = "dell-wifi";
+               wifi_rfkill->toggle_radio = dell_wifi_set;
+               wifi_rfkill->get_state = dell_wifi_get;
+               ret = rfkill_register(wifi_rfkill);
+               if (ret)
+                       goto err_wifi;
+       }
+
+       if ((status & (1<<3|1<<9)) == (1<<3|1<<9)) {
+               bluetooth_rfkill = rfkill_allocate(NULL, RFKILL_TYPE_BLUETOOTH);
+               if (!bluetooth_rfkill)
+                       goto err_bluetooth;
+               bluetooth_rfkill->name = "dell-bluetooth";
+               bluetooth_rfkill->toggle_radio = dell_bluetooth_set;
+               bluetooth_rfkill->get_state = dell_bluetooth_get;
+               ret = rfkill_register(bluetooth_rfkill);
+               if (ret)
+                       goto err_bluetooth;
+       }
+
+       if ((status & (1<<4|1<<10)) == (1<<4|1<<10)) {
+               wwan_rfkill = rfkill_allocate(NULL, RFKILL_TYPE_WWAN);
+               if (!wwan_rfkill)
+                       goto err_wwan;
+               wwan_rfkill->name = "dell-wwan";
+               wwan_rfkill->toggle_radio = dell_wwan_set;
+               wwan_rfkill->get_state = dell_wwan_get;
+               ret = rfkill_register(wwan_rfkill);
+               if (ret)
+                       goto err_wwan;
+       }
+
+       return 0;
+err_wwan:
+       if (wwan_rfkill)
+               rfkill_free(wwan_rfkill);
+       if (bluetooth_rfkill) {
+               rfkill_unregister(bluetooth_rfkill);
+               bluetooth_rfkill = NULL;
+       }
+err_bluetooth:
+       if (bluetooth_rfkill)
+               rfkill_free(bluetooth_rfkill);
+       if (wifi_rfkill) {
+               rfkill_unregister(wifi_rfkill);
+               wifi_rfkill = NULL;
+       }
+err_wifi:
+       if (wifi_rfkill)
+               rfkill_free(wifi_rfkill);
+
+       return ret;
+}
+
+static int dell_send_intensity(struct backlight_device *bd)
+{
+       struct calling_interface_buffer buffer;
+
+       memset(&buffer, 0, sizeof(struct calling_interface_buffer));
+       buffer.input[0] = find_token_location(BRIGHTNESS_TOKEN);
+       buffer.input[1] = bd->props.brightness;
+
+       if (buffer.input[0] == -1)
+               return -ENODEV;
+
+       if (power_supply_is_system_supplied() > 0)
+               dell_send_request(&buffer, 1, 2);
+       else
+               dell_send_request(&buffer, 1, 1);
+
+       return 0;
+}
+
+static int dell_get_intensity(struct backlight_device *bd)
+{
+       struct calling_interface_buffer buffer;
+
+       memset(&buffer, 0, sizeof(struct calling_interface_buffer));
+       buffer.input[0] = find_token_location(BRIGHTNESS_TOKEN);
+
+       if (buffer.input[0] == -1)
+               return -ENODEV;
+
+       if (power_supply_is_system_supplied() > 0)
+               dell_send_request(&buffer, 0, 2);
+       else
+               dell_send_request(&buffer, 0, 1);
+
+       return buffer.output[1];
+}
+
+static struct backlight_ops dell_ops = {
+       .get_brightness = dell_get_intensity,
+       .update_status  = dell_send_intensity,
+};
+
+static int __init dell_init(void)
+{
+       struct calling_interface_buffer buffer;
+       int max_intensity = 0;
+       int ret;
+
+       if (!dmi_check_system(dell_device_table))
+               return -ENODEV;
+
+       dmi_walk(find_tokens);
+
+       if (!da_tokens)  {
+               printk(KERN_INFO "dell-laptop: Unable to find dmi tokens\n");
+               return -ENODEV;
+       }
+
+       ret = dell_setup_rfkill();
+
+       if (ret) {
+               printk(KERN_WARNING "dell-laptop: Unable to setup rfkill\n");
+               goto out;
+       }
+
+#ifdef CONFIG_ACPI
+       /* In the event of an ACPI backlight being available, don't
+        * register the platform controller.
+        */
+       if (acpi_video_backlight_support())
+               return 0;
+#endif
+
+       memset(&buffer, 0, sizeof(struct calling_interface_buffer));
+       buffer.input[0] = find_token_location(BRIGHTNESS_TOKEN);
+
+       if (buffer.input[0] != -1) {
+               dell_send_request(&buffer, 0, 2);
+               max_intensity = buffer.output[3];
+       }
+
+       if (max_intensity) {
+               dell_backlight_device = backlight_device_register(
+                       "dell_backlight",
+                       NULL, NULL,
+                       &dell_ops);
+
+               if (IS_ERR(dell_backlight_device)) {
+                       ret = PTR_ERR(dell_backlight_device);
+                       dell_backlight_device = NULL;
+                       goto out;
+               }
+
+               dell_backlight_device->props.max_brightness = max_intensity;
+               dell_backlight_device->props.brightness =
+                       dell_get_intensity(dell_backlight_device);
+               backlight_update_status(dell_backlight_device);
+       }
+
+       return 0;
+out:
+       if (wifi_rfkill)
+               rfkill_unregister(wifi_rfkill);
+       if (bluetooth_rfkill)
+               rfkill_unregister(bluetooth_rfkill);
+       if (wwan_rfkill)
+               rfkill_unregister(wwan_rfkill);
+       kfree(da_tokens);
+       return ret;
+}
+
+static void __exit dell_exit(void)
+{
+       backlight_device_unregister(dell_backlight_device);
+       if (wifi_rfkill)
+               rfkill_unregister(wifi_rfkill);
+       if (bluetooth_rfkill)
+               rfkill_unregister(bluetooth_rfkill);
+       if (wwan_rfkill)
+               rfkill_unregister(wwan_rfkill);
+}
+
+module_init(dell_init);
+module_exit(dell_exit);
+
+MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
+MODULE_DESCRIPTION("Dell laptop driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("dmi:*svnDellInc.:*:ct8:*");
index 6fde0a2e3567d589fb4757a12eb49929b1c6c463..bc33200535fc3857a474892eec8340dbfabfbc64 100644 (file)
@@ -120,6 +120,13 @@ config MTD_PHRAM
          doesn't have access to, memory beyond the mem=xxx limit, nvram,
          memory on the video card, etc...
 
+config MTD_PS3VRAM
+       tristate "PS3 video RAM"
+       depends on FB_PS3
+       help
+         This driver allows you to use excess PS3 video RAM as volatile
+         storage or system swap.
+
 config MTD_LART
        tristate "28F160xx flash driver for LART"
        depends on SA1100_LART
index 0993d5cf3923f3a4895a6579aa18759bb76a3c72..e51521df4e40278d08a098a3d6dcd7425f35cea8 100644 (file)
@@ -16,3 +16,4 @@ obj-$(CONFIG_MTD_LART)                += lart.o
 obj-$(CONFIG_MTD_BLOCK2MTD)    += block2mtd.o
 obj-$(CONFIG_MTD_DATAFLASH)    += mtd_dataflash.o
 obj-$(CONFIG_MTD_M25P80)       += m25p80.o
+obj-$(CONFIG_MTD_PS3VRAM)      += ps3vram.o
diff --git a/drivers/mtd/devices/ps3vram.c b/drivers/mtd/devices/ps3vram.c
new file mode 100644 (file)
index 0000000..d21e9be
--- /dev/null
@@ -0,0 +1,768 @@
+/**
+ * ps3vram - Use extra PS3 video ram as MTD block device.
+ *
+ * Copyright (c) 2007-2008 Jim Paris <jim@jtan.com>
+ * Added support RSX DMA Vivien Chappelier <vivien.chappelier@free.fr>
+ */
+
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/gfp.h>
+#include <linux/delay.h>
+#include <linux/mtd/mtd.h>
+
+#include <asm/lv1call.h>
+#include <asm/ps3.h>
+
+#define DEVICE_NAME            "ps3vram"
+
+#define XDR_BUF_SIZE (2 * 1024 * 1024) /* XDR buffer (must be 1MiB aligned) */
+#define XDR_IOIF 0x0c000000
+
+#define FIFO_BASE XDR_IOIF
+#define FIFO_SIZE (64 * 1024)
+
+#define DMA_PAGE_SIZE (4 * 1024)
+
+#define CACHE_PAGE_SIZE (256 * 1024)
+#define CACHE_PAGE_COUNT ((XDR_BUF_SIZE - FIFO_SIZE) / CACHE_PAGE_SIZE)
+
+#define CACHE_OFFSET CACHE_PAGE_SIZE
+#define FIFO_OFFSET 0
+
+#define CTRL_PUT 0x10
+#define CTRL_GET 0x11
+#define CTRL_TOP 0x15
+
+#define UPLOAD_SUBCH   1
+#define DOWNLOAD_SUBCH 2
+
+#define NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN   0x0000030c
+#define NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY      0x00000104
+
+#define L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT 0x601
+
+struct mtd_info ps3vram_mtd;
+
+#define CACHE_PAGE_PRESENT 1
+#define CACHE_PAGE_DIRTY   2
+
+struct ps3vram_tag {
+       unsigned int address;
+       unsigned int flags;
+};
+
+struct ps3vram_cache {
+       unsigned int page_count;
+       unsigned int page_size;
+       struct ps3vram_tag *tags;
+};
+
+struct ps3vram_priv {
+       u64 memory_handle;
+       u64 context_handle;
+       u32 *ctrl;
+       u32 *reports;
+       u8 __iomem *ddr_base;
+       u8 *xdr_buf;
+
+       u32 *fifo_base;
+       u32 *fifo_ptr;
+
+       struct device *dev;
+       struct ps3vram_cache cache;
+
+       /* Used to serialize cache/DMA operations */
+       struct mutex lock;
+};
+
+#define DMA_NOTIFIER_HANDLE_BASE 0x66604200 /* first DMA notifier handle */
+#define DMA_NOTIFIER_OFFSET_BASE 0x1000     /* first DMA notifier offset */
+#define DMA_NOTIFIER_SIZE        0x40
+#define NOTIFIER 7     /* notifier used for completion report */
+
+/* A trailing '-' means to subtract off ps3fb_videomemory.size */
+char *size = "256M-";
+module_param(size, charp, 0);
+MODULE_PARM_DESC(size, "memory size");
+
+static u32 *ps3vram_get_notifier(u32 *reports, int notifier)
+{
+       return (void *) reports +
+               DMA_NOTIFIER_OFFSET_BASE +
+               DMA_NOTIFIER_SIZE * notifier;
+}
+
+static void ps3vram_notifier_reset(struct mtd_info *mtd)
+{
+       int i;
+
+       struct ps3vram_priv *priv = mtd->priv;
+       u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+       for (i = 0; i < 4; i++)
+               notify[i] = 0xffffffff;
+}
+
+static int ps3vram_notifier_wait(struct mtd_info *mtd, unsigned int timeout_ms)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+       u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+       unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
+
+       do {
+               if (!notify[3])
+                       return 0;
+               msleep(1);
+       } while (time_before(jiffies, timeout));
+
+       return -ETIMEDOUT;
+}
+
+static void ps3vram_init_ring(struct mtd_info *mtd)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+
+       priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
+       priv->ctrl[CTRL_GET] = FIFO_BASE + FIFO_OFFSET;
+}
+
+static int ps3vram_wait_ring(struct mtd_info *mtd, unsigned int timeout_ms)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+       unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
+
+       do {
+               if (priv->ctrl[CTRL_PUT] == priv->ctrl[CTRL_GET])
+                       return 0;
+               msleep(1);
+       } while (time_before(jiffies, timeout));
+
+       dev_dbg(priv->dev, "%s:%d: FIFO timeout (%08x/%08x/%08x)\n", __func__,
+               __LINE__, priv->ctrl[CTRL_PUT], priv->ctrl[CTRL_GET],
+               priv->ctrl[CTRL_TOP]);
+
+       return -ETIMEDOUT;
+}
+
+static void ps3vram_out_ring(struct ps3vram_priv *priv, u32 data)
+{
+       *(priv->fifo_ptr)++ = data;
+}
+
+static void ps3vram_begin_ring(struct ps3vram_priv *priv, u32 chan,
+                                     u32 tag, u32 size)
+{
+       ps3vram_out_ring(priv, (size << 18) | (chan << 13) | tag);
+}
+
+static void ps3vram_rewind_ring(struct mtd_info *mtd)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+       u64 status;
+
+       ps3vram_out_ring(priv, 0x20000000 | (FIFO_BASE + FIFO_OFFSET));
+
+       priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
+
+       /* asking the HV for a blit will kick the fifo */
+       status = lv1_gpu_context_attribute(priv->context_handle,
+                                          L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT,
+                                          0, 0, 0, 0);
+       if (status)
+               dev_err(priv->dev, "%s:%d: lv1_gpu_context_attribute failed\n",
+                       __func__, __LINE__);
+
+       priv->fifo_ptr = priv->fifo_base;
+}
+
+static void ps3vram_fire_ring(struct mtd_info *mtd)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+       u64 status;
+
+       mutex_lock(&ps3_gpu_mutex);
+
+       priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET +
+               (priv->fifo_ptr - priv->fifo_base) * sizeof(u32);
+
+       /* asking the HV for a blit will kick the fifo */
+       status = lv1_gpu_context_attribute(priv->context_handle,
+                                          L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT,
+                                          0, 0, 0, 0);
+       if (status)
+               dev_err(priv->dev, "%s:%d: lv1_gpu_context_attribute failed\n",
+                       __func__, __LINE__);
+
+       if ((priv->fifo_ptr - priv->fifo_base) * sizeof(u32) >
+               FIFO_SIZE - 1024) {
+               dev_dbg(priv->dev, "%s:%d: fifo full, rewinding\n", __func__,
+                       __LINE__);
+               ps3vram_wait_ring(mtd, 200);
+               ps3vram_rewind_ring(mtd);
+       }
+
+       mutex_unlock(&ps3_gpu_mutex);
+}
+
+static void ps3vram_bind(struct mtd_info *mtd)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+
+       ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0, 1);
+       ps3vram_out_ring(priv, 0x31337303);
+       ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0x180, 3);
+       ps3vram_out_ring(priv, DMA_NOTIFIER_HANDLE_BASE + NOTIFIER);
+       ps3vram_out_ring(priv, 0xfeed0001);     /* DMA system RAM instance */
+       ps3vram_out_ring(priv, 0xfeed0000);     /* DMA video RAM instance */
+
+       ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0, 1);
+       ps3vram_out_ring(priv, 0x3137c0de);
+       ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0x180, 3);
+       ps3vram_out_ring(priv, DMA_NOTIFIER_HANDLE_BASE + NOTIFIER);
+       ps3vram_out_ring(priv, 0xfeed0000);     /* DMA video RAM instance */
+       ps3vram_out_ring(priv, 0xfeed0001);     /* DMA system RAM instance */
+
+       ps3vram_fire_ring(mtd);
+}
+
+static int ps3vram_upload(struct mtd_info *mtd, unsigned int src_offset,
+                         unsigned int dst_offset, int len, int count)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+
+       ps3vram_begin_ring(priv, UPLOAD_SUBCH,
+                          NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+       ps3vram_out_ring(priv, XDR_IOIF + src_offset);
+       ps3vram_out_ring(priv, dst_offset);
+       ps3vram_out_ring(priv, len);
+       ps3vram_out_ring(priv, len);
+       ps3vram_out_ring(priv, len);
+       ps3vram_out_ring(priv, count);
+       ps3vram_out_ring(priv, (1 << 8) | 1);
+       ps3vram_out_ring(priv, 0);
+
+       ps3vram_notifier_reset(mtd);
+       ps3vram_begin_ring(priv, UPLOAD_SUBCH,
+                          NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
+       ps3vram_out_ring(priv, 0);
+       ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0x100, 1);
+       ps3vram_out_ring(priv, 0);
+       ps3vram_fire_ring(mtd);
+       if (ps3vram_notifier_wait(mtd, 200) < 0) {
+               dev_dbg(priv->dev, "%s:%d: notifier timeout\n", __func__,
+                       __LINE__);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int ps3vram_download(struct mtd_info *mtd, unsigned int src_offset,
+                           unsigned int dst_offset, int len, int count)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+
+       ps3vram_begin_ring(priv, DOWNLOAD_SUBCH,
+                          NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+       ps3vram_out_ring(priv, src_offset);
+       ps3vram_out_ring(priv, XDR_IOIF + dst_offset);
+       ps3vram_out_ring(priv, len);
+       ps3vram_out_ring(priv, len);
+       ps3vram_out_ring(priv, len);
+       ps3vram_out_ring(priv, count);
+       ps3vram_out_ring(priv, (1 << 8) | 1);
+       ps3vram_out_ring(priv, 0);
+
+       ps3vram_notifier_reset(mtd);
+       ps3vram_begin_ring(priv, DOWNLOAD_SUBCH,
+                          NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
+       ps3vram_out_ring(priv, 0);
+       ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0x100, 1);
+       ps3vram_out_ring(priv, 0);
+       ps3vram_fire_ring(mtd);
+       if (ps3vram_notifier_wait(mtd, 200) < 0) {
+               dev_dbg(priv->dev, "%s:%d: notifier timeout\n", __func__,
+                       __LINE__);
+               return -1;
+       }
+
+       return 0;
+}
+
+static void ps3vram_cache_evict(struct mtd_info *mtd, int entry)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+       struct ps3vram_cache *cache = &priv->cache;
+
+       if (cache->tags[entry].flags & CACHE_PAGE_DIRTY) {
+               dev_dbg(priv->dev, "%s:%d: flushing %d : 0x%08x\n", __func__,
+                       __LINE__, entry, cache->tags[entry].address);
+               if (ps3vram_upload(mtd,
+                                  CACHE_OFFSET + entry * cache->page_size,
+                                  cache->tags[entry].address,
+                                  DMA_PAGE_SIZE,
+                                  cache->page_size / DMA_PAGE_SIZE) < 0) {
+                       dev_dbg(priv->dev, "%s:%d: failed to upload from "
+                               "0x%x to 0x%x size 0x%x\n", __func__, __LINE__,
+                               entry * cache->page_size,
+                               cache->tags[entry].address, cache->page_size);
+               }
+               cache->tags[entry].flags &= ~CACHE_PAGE_DIRTY;
+       }
+}
+
+static void ps3vram_cache_load(struct mtd_info *mtd, int entry,
+                              unsigned int address)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+       struct ps3vram_cache *cache = &priv->cache;
+
+       dev_dbg(priv->dev, "%s:%d: fetching %d : 0x%08x\n", __func__, __LINE__,
+               entry, address);
+       if (ps3vram_download(mtd,
+                            address,
+                            CACHE_OFFSET + entry * cache->page_size,
+                            DMA_PAGE_SIZE,
+                            cache->page_size / DMA_PAGE_SIZE) < 0) {
+               dev_err(priv->dev, "%s:%d: failed to download from "
+                       "0x%x to 0x%x size 0x%x\n", __func__, __LINE__, address,
+                       entry * cache->page_size, cache->page_size);
+       }
+
+       cache->tags[entry].address = address;
+       cache->tags[entry].flags |= CACHE_PAGE_PRESENT;
+}
+
+
+static void ps3vram_cache_flush(struct mtd_info *mtd)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+       struct ps3vram_cache *cache = &priv->cache;
+       int i;
+
+       dev_dbg(priv->dev, "%s:%d: FLUSH\n", __func__, __LINE__);
+       for (i = 0; i < cache->page_count; i++) {
+               ps3vram_cache_evict(mtd, i);
+               cache->tags[i].flags = 0;
+       }
+}
+
+static unsigned int ps3vram_cache_match(struct mtd_info *mtd, loff_t address)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+       struct ps3vram_cache *cache = &priv->cache;
+       unsigned int base;
+       unsigned int offset;
+       int i;
+       static int counter;
+
+       offset = (unsigned int) (address & (cache->page_size - 1));
+       base = (unsigned int) (address - offset);
+
+       /* fully associative check */
+       for (i = 0; i < cache->page_count; i++) {
+               if ((cache->tags[i].flags & CACHE_PAGE_PRESENT) &&
+                   cache->tags[i].address == base) {
+                       dev_dbg(priv->dev, "%s:%d: found entry %d : 0x%08x\n",
+                               __func__, __LINE__, i, cache->tags[i].address);
+                       return i;
+               }
+       }
+
+       /* choose a random entry */
+       i = (jiffies + (counter++)) % cache->page_count;
+       dev_dbg(priv->dev, "%s:%d: using entry %d\n", __func__, __LINE__, i);
+
+       ps3vram_cache_evict(mtd, i);
+       ps3vram_cache_load(mtd, i, base);
+
+       return i;
+}
+
+static int ps3vram_cache_init(struct mtd_info *mtd)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+
+       priv->cache.page_count = CACHE_PAGE_COUNT;
+       priv->cache.page_size = CACHE_PAGE_SIZE;
+       priv->cache.tags = kzalloc(sizeof(struct ps3vram_tag) *
+                                  CACHE_PAGE_COUNT, GFP_KERNEL);
+       if (priv->cache.tags == NULL) {
+               dev_err(priv->dev, "%s:%d: could not allocate cache tags\n",
+                       __func__, __LINE__);
+               return -ENOMEM;
+       }
+
+       dev_info(priv->dev, "created ram cache: %d entries, %d KiB each\n",
+               CACHE_PAGE_COUNT, CACHE_PAGE_SIZE / 1024);
+
+       return 0;
+}
+
+static void ps3vram_cache_cleanup(struct mtd_info *mtd)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+
+       ps3vram_cache_flush(mtd);
+       kfree(priv->cache.tags);
+}
+
+static int ps3vram_erase(struct mtd_info *mtd, struct erase_info *instr)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+
+       if (instr->addr + instr->len > mtd->size)
+               return -EINVAL;
+
+       mutex_lock(&priv->lock);
+
+       ps3vram_cache_flush(mtd);
+
+       /* Set bytes to 0xFF */
+       memset_io(priv->ddr_base + instr->addr, 0xFF, instr->len);
+
+       mutex_unlock(&priv->lock);
+
+       instr->state = MTD_ERASE_DONE;
+       mtd_erase_callback(instr);
+
+       return 0;
+}
+
+static int ps3vram_read(struct mtd_info *mtd, loff_t from, size_t len,
+                       size_t *retlen, u_char *buf)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+       unsigned int cached, count;
+
+       dev_dbg(priv->dev, "%s:%d: from=0x%08x len=0x%zx\n", __func__, __LINE__,
+               (unsigned int)from, len);
+
+       if (from >= mtd->size)
+               return -EINVAL;
+
+       if (len > mtd->size - from)
+               len = mtd->size - from;
+
+       /* Copy from vram to buf */
+       count = len;
+       while (count) {
+               unsigned int offset, avail;
+               unsigned int entry;
+
+               offset = (unsigned int) (from & (priv->cache.page_size - 1));
+               avail  = priv->cache.page_size - offset;
+
+               mutex_lock(&priv->lock);
+
+               entry = ps3vram_cache_match(mtd, from);
+               cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
+
+               dev_dbg(priv->dev, "%s:%d: from=%08x cached=%08x offset=%08x "
+                       "avail=%08x count=%08x\n", __func__, __LINE__,
+                       (unsigned int)from, cached, offset, avail, count);
+
+               if (avail > count)
+                       avail = count;
+               memcpy(buf, priv->xdr_buf + cached, avail);
+
+               mutex_unlock(&priv->lock);
+
+               buf += avail;
+               count -= avail;
+               from += avail;
+       }
+
+       *retlen = len;
+       return 0;
+}
+
+static int ps3vram_write(struct mtd_info *mtd, loff_t to, size_t len,
+                        size_t *retlen, const u_char *buf)
+{
+       struct ps3vram_priv *priv = mtd->priv;
+       unsigned int cached, count;
+
+       if (to >= mtd->size)
+               return -EINVAL;
+
+       if (len > mtd->size - to)
+               len = mtd->size - to;
+
+       /* Copy from buf to vram */
+       count = len;
+       while (count) {
+               unsigned int offset, avail;
+               unsigned int entry;
+
+               offset = (unsigned int) (to & (priv->cache.page_size - 1));
+               avail  = priv->cache.page_size - offset;
+
+               mutex_lock(&priv->lock);
+
+               entry = ps3vram_cache_match(mtd, to);
+               cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
+
+               dev_dbg(priv->dev, "%s:%d: to=%08x cached=%08x offset=%08x "
+                       "avail=%08x count=%08x\n", __func__, __LINE__,
+                       (unsigned int)to, cached, offset, avail, count);
+
+               if (avail > count)
+                       avail = count;
+               memcpy(priv->xdr_buf + cached, buf, avail);
+
+               priv->cache.tags[entry].flags |= CACHE_PAGE_DIRTY;
+
+               mutex_unlock(&priv->lock);
+
+               buf += avail;
+               count -= avail;
+               to += avail;
+       }
+
+       *retlen = len;
+       return 0;
+}
+
+static int __devinit ps3vram_probe(struct ps3_system_bus_device *dev)
+{
+       struct ps3vram_priv *priv;
+       int status;
+       u64 ddr_lpar;
+       u64 ctrl_lpar;
+       u64 info_lpar;
+       u64 reports_lpar;
+       u64 ddr_size;
+       u64 reports_size;
+       int ret = -ENOMEM;
+       char *rest;
+
+       ret = -EIO;
+       ps3vram_mtd.priv = kzalloc(sizeof(struct ps3vram_priv), GFP_KERNEL);
+       if (!ps3vram_mtd.priv)
+               goto out;
+       priv = ps3vram_mtd.priv;
+
+       mutex_init(&priv->lock);
+       priv->dev = &dev->core;
+
+       /* Allocate XDR buffer (1MiB aligned) */
+       priv->xdr_buf = (void *)__get_free_pages(GFP_KERNEL,
+               get_order(XDR_BUF_SIZE));
+       if (priv->xdr_buf == NULL) {
+               dev_dbg(&dev->core, "%s:%d: could not allocate XDR buffer\n",
+                       __func__, __LINE__);
+               ret = -ENOMEM;
+               goto out_free_priv;
+       }
+
+       /* Put FIFO at begginning of XDR buffer */
+       priv->fifo_base = (u32 *) (priv->xdr_buf + FIFO_OFFSET);
+       priv->fifo_ptr = priv->fifo_base;
+
+       /* XXX: Need to open GPU, in case ps3fb or snd_ps3 aren't loaded */
+       if (ps3_open_hv_device(dev)) {
+               dev_err(&dev->core, "%s:%d: ps3_open_hv_device failed\n",
+                       __func__, __LINE__);
+               ret = -EAGAIN;
+               goto out_close_gpu;
+       }
+
+       /* Request memory */
+       status = -1;
+       ddr_size = memparse(size, &rest);
+       if (*rest == '-')
+               ddr_size -= ps3fb_videomemory.size;
+       ddr_size = ALIGN(ddr_size, 1024*1024);
+       if (ddr_size <= 0) {
+               dev_err(&dev->core, "%s:%d: specified size is too small\n",
+                       __func__, __LINE__);
+               ret = -EINVAL;
+               goto out_close_gpu;
+       }
+
+       while (ddr_size > 0) {
+               status = lv1_gpu_memory_allocate(ddr_size, 0, 0, 0, 0,
+                                                &priv->memory_handle,
+                                                &ddr_lpar);
+               if (!status)
+                       break;
+               ddr_size -= 1024*1024;
+       }
+       if (status || ddr_size <= 0) {
+               dev_err(&dev->core, "%s:%d: lv1_gpu_memory_allocate failed\n",
+                       __func__, __LINE__);
+               ret = -ENOMEM;
+               goto out_free_xdr_buf;
+       }
+
+       /* Request context */
+       status = lv1_gpu_context_allocate(priv->memory_handle,
+                                         0,
+                                         &priv->context_handle,
+                                         &ctrl_lpar,
+                                         &info_lpar,
+                                         &reports_lpar,
+                                         &reports_size);
+       if (status) {
+               dev_err(&dev->core, "%s:%d: lv1_gpu_context_allocate failed\n",
+                       __func__, __LINE__);
+               ret = -ENOMEM;
+               goto out_free_memory;
+       }
+
+       /* Map XDR buffer to RSX */
+       status = lv1_gpu_context_iomap(priv->context_handle, XDR_IOIF,
+                                      ps3_mm_phys_to_lpar(__pa(priv->xdr_buf)),
+                                      XDR_BUF_SIZE, 0);
+       if (status) {
+               dev_err(&dev->core, "%s:%d: lv1_gpu_context_iomap failed\n",
+                       __func__, __LINE__);
+               ret = -ENOMEM;
+               goto out_free_context;
+       }
+
+       priv->ddr_base = ioremap_flags(ddr_lpar, ddr_size, _PAGE_NO_CACHE);
+
+       if (!priv->ddr_base) {
+               dev_err(&dev->core, "%s:%d: ioremap failed\n", __func__,
+                       __LINE__);
+               ret = -ENOMEM;
+               goto out_free_context;
+       }
+
+       priv->ctrl = ioremap(ctrl_lpar, 64 * 1024);
+       if (!priv->ctrl) {
+               dev_err(&dev->core, "%s:%d: ioremap failed\n", __func__,
+                       __LINE__);
+               ret = -ENOMEM;
+               goto out_unmap_vram;
+       }
+
+       priv->reports = ioremap(reports_lpar, reports_size);
+       if (!priv->reports) {
+               dev_err(&dev->core, "%s:%d: ioremap failed\n", __func__,
+                       __LINE__);
+               ret = -ENOMEM;
+               goto out_unmap_ctrl;
+       }
+
+       mutex_lock(&ps3_gpu_mutex);
+       ps3vram_init_ring(&ps3vram_mtd);
+       mutex_unlock(&ps3_gpu_mutex);
+
+       ps3vram_mtd.name = "ps3vram";
+       ps3vram_mtd.size = ddr_size;
+       ps3vram_mtd.flags = MTD_CAP_RAM;
+       ps3vram_mtd.erase = ps3vram_erase;
+       ps3vram_mtd.point = NULL;
+       ps3vram_mtd.unpoint = NULL;
+       ps3vram_mtd.read = ps3vram_read;
+       ps3vram_mtd.write = ps3vram_write;
+       ps3vram_mtd.owner = THIS_MODULE;
+       ps3vram_mtd.type = MTD_RAM;
+       ps3vram_mtd.erasesize = CACHE_PAGE_SIZE;
+       ps3vram_mtd.writesize = 1;
+
+       ps3vram_bind(&ps3vram_mtd);
+
+       mutex_lock(&ps3_gpu_mutex);
+       ret = ps3vram_wait_ring(&ps3vram_mtd, 100);
+       mutex_unlock(&ps3_gpu_mutex);
+       if (ret < 0) {
+               dev_err(&dev->core, "%s:%d: failed to initialize channels\n",
+                       __func__, __LINE__);
+               ret = -ETIMEDOUT;
+               goto out_unmap_reports;
+       }
+
+       ps3vram_cache_init(&ps3vram_mtd);
+
+       if (add_mtd_device(&ps3vram_mtd)) {
+               dev_err(&dev->core, "%s:%d: add_mtd_device failed\n",
+                       __func__, __LINE__);
+               ret = -EAGAIN;
+               goto out_cache_cleanup;
+       }
+
+       dev_info(&dev->core, "reserved %u MiB of gpu memory\n",
+               (unsigned int)(ddr_size / 1024 / 1024));
+
+       return 0;
+
+out_cache_cleanup:
+       ps3vram_cache_cleanup(&ps3vram_mtd);
+out_unmap_reports:
+       iounmap(priv->reports);
+out_unmap_ctrl:
+       iounmap(priv->ctrl);
+out_unmap_vram:
+       iounmap(priv->ddr_base);
+out_free_context:
+       lv1_gpu_context_free(priv->context_handle);
+out_free_memory:
+       lv1_gpu_memory_free(priv->memory_handle);
+out_close_gpu:
+       ps3_close_hv_device(dev);
+out_free_xdr_buf:
+       free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE));
+out_free_priv:
+       kfree(ps3vram_mtd.priv);
+       ps3vram_mtd.priv = NULL;
+out:
+       return ret;
+}
+
+static int ps3vram_shutdown(struct ps3_system_bus_device *dev)
+{
+       struct ps3vram_priv *priv;
+
+       priv = ps3vram_mtd.priv;
+
+       del_mtd_device(&ps3vram_mtd);
+       ps3vram_cache_cleanup(&ps3vram_mtd);
+       iounmap(priv->reports);
+       iounmap(priv->ctrl);
+       iounmap(priv->ddr_base);
+       lv1_gpu_context_free(priv->context_handle);
+       lv1_gpu_memory_free(priv->memory_handle);
+       ps3_close_hv_device(dev);
+       free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE));
+       kfree(priv);
+       return 0;
+}
+
+static struct ps3_system_bus_driver ps3vram_driver = {
+       .match_id       = PS3_MATCH_ID_GPU,
+       .match_sub_id   = PS3_MATCH_SUB_ID_GPU_RAMDISK,
+       .core.name      = DEVICE_NAME,
+       .core.owner     = THIS_MODULE,
+       .probe          = ps3vram_probe,
+       .remove         = ps3vram_shutdown,
+       .shutdown       = ps3vram_shutdown,
+};
+
+static int __init ps3vram_init(void)
+{
+       return ps3_system_bus_driver_register(&ps3vram_driver);
+}
+
+static void __exit ps3vram_exit(void)
+{
+       ps3_system_bus_driver_unregister(&ps3vram_driver);
+}
+
+module_init(ps3vram_init);
+module_exit(ps3vram_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jim Paris <jim@jtan.com>");
+MODULE_DESCRIPTION("MTD driver for PS3 video RAM");
+MODULE_ALIAS(PS3_MODULE_ALIAS_GPU_RAMDISK);
index 5d9bcf109c13be5e584b17bf345b6ddd016cc876..4abbe573fa40300e05b26526c030749bd6621759 100644 (file)
@@ -564,7 +564,7 @@ EXPORT_SYMBOL_GPL(ubi_leb_unmap);
  * @dtype: expected data type
  *
  * This function maps an un-mapped logical eraseblock @lnum to a physical
- * eraseblock. This means, that after a successfull invocation of this
+ * eraseblock. This means, that after a successful invocation of this
  * function the logical eraseblock @lnum will be empty (contain only %0xFF
  * bytes) and be mapped to a physical eraseblock, even if an unclean reboot
  * happens.
index 7e2b1a67e5da3c505cddea78fde4406ab80254fd..b65b4feb2d287559383a2913f8d2bc8bd7cb7252 100644 (file)
@@ -594,7 +594,7 @@ int ath5k_hw_get_isr(struct ath5k_hw *ah, enum ath5k_int *interrupt_mask)
                 * XXX: BMISS interrupts may occur after association.
                 * I found this on 5210 code but it needs testing. If this is
                 * true we should disable them before assoc and re-enable them
-                * after a successfull assoc + some jiffies.
+                * after a successful assoc + some jiffies.
                        interrupt_mask &= ~AR5K_INT_BMISS;
                 */
        }
index 9caa96a135866c8c79cbc240d63ebfadc9c5e699..a611ad8579832ad3451faec3dac6b96a4355fa6c 100644 (file)
@@ -287,7 +287,7 @@ static void zd_op_stop(struct ieee80211_hw *hw)
  * @skb - a sk-buffer
  * @flags: extra flags to set in the TX status info
  * @ackssi: ACK signal strength
- * @success - True for successfull transmission of the frame
+ * @success - True for successful transmission of the frame
  *
  * This information calls ieee80211_tx_status_irqsafe() if required by the
  * control information. It copies the control information into the status
index 162330b9d1dc0069ab94397e071d1d04e5c78865..7e5155e88ac75a19c9a4f6809298da344f020516 100644 (file)
@@ -86,13 +86,11 @@ enum ds_type {
 
 
 struct ds1307 {
-       u8                      reg_addr;
        u8                      regs[11];
        enum ds_type            type;
        unsigned long           flags;
 #define HAS_NVRAM      0               /* bit 0 == sysfs file active */
 #define HAS_ALARM      1               /* bit 1 == irq claimed */
-       struct i2c_msg          msg[2];
        struct i2c_client       *client;
        struct rtc_device       *rtc;
        struct work_struct      work;
@@ -204,13 +202,9 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
        int             tmp;
 
        /* read the RTC date and time registers all at once */
-       ds1307->reg_addr = 0;
-       ds1307->msg[1].flags = I2C_M_RD;
-       ds1307->msg[1].len = 7;
-
-       tmp = i2c_transfer(to_i2c_adapter(ds1307->client->dev.parent),
-                       ds1307->msg, 2);
-       if (tmp != 2) {
+       tmp = i2c_smbus_read_i2c_block_data(ds1307->client,
+               DS1307_REG_SECS, 7, ds1307->regs);
+       if (tmp != 7) {
                dev_err(dev, "%s error %d\n", "read", tmp);
                return -EIO;
        }
@@ -257,7 +251,6 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
                t->tm_hour, t->tm_mday,
                t->tm_mon, t->tm_year, t->tm_wday);
 
-       *buf++ = 0;             /* first register addr */
        buf[DS1307_REG_SECS] = bin2bcd(t->tm_sec);
        buf[DS1307_REG_MIN] = bin2bcd(t->tm_min);
        buf[DS1307_REG_HOUR] = bin2bcd(t->tm_hour);
@@ -282,23 +275,19 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
                break;
        }
 
-       ds1307->msg[1].flags = 0;
-       ds1307->msg[1].len = 8;
-
        dev_dbg(dev, "%s: %02x %02x %02x %02x %02x %02x %02x\n",
                "write", buf[0], buf[1], buf[2], buf[3],
                buf[4], buf[5], buf[6]);
 
-       result = i2c_transfer(to_i2c_adapter(ds1307->client->dev.parent),
-                       &ds1307->msg[1], 1);
-       if (result != 1) {
-               dev_err(dev, "%s error %d\n", "write", tmp);
-               return -EIO;
+       result = i2c_smbus_write_i2c_block_data(ds1307->client, 0, 7, buf);
+       if (result < 0) {
+               dev_err(dev, "%s error %d\n", "write", result);
+               return result;
        }
        return 0;
 }
 
-static int ds1307_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+static int ds1337_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
        struct i2c_client       *client = to_i2c_client(dev);
        struct ds1307           *ds1307 = i2c_get_clientdata(client);
@@ -308,13 +297,9 @@ static int ds1307_read_alarm(struct device *dev, struct rtc_wkalrm *t)
                return -EINVAL;
 
        /* read all ALARM1, ALARM2, and status registers at once */
-       ds1307->reg_addr = DS1339_REG_ALARM1_SECS;
-       ds1307->msg[1].flags = I2C_M_RD;
-       ds1307->msg[1].len = 9;
-
-       ret = i2c_transfer(to_i2c_adapter(client->dev.parent),
-                       ds1307->msg, 2);
-       if (ret != 2) {
+       ret = i2c_smbus_read_i2c_block_data(client,
+                       DS1339_REG_ALARM1_SECS, 9, ds1307->regs);
+       if (ret != 9) {
                dev_err(dev, "%s error %d\n", "alarm read", ret);
                return -EIO;
        }
@@ -353,7 +338,7 @@ static int ds1307_read_alarm(struct device *dev, struct rtc_wkalrm *t)
        return 0;
 }
 
-static int ds1307_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+static int ds1337_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
        struct i2c_client       *client = to_i2c_client(dev);
        struct ds1307           *ds1307 = i2c_get_clientdata(client);
@@ -371,13 +356,9 @@ static int ds1307_set_alarm(struct device *dev, struct rtc_wkalrm *t)
                t->enabled, t->pending);
 
        /* read current status of both alarms and the chip */
-       ds1307->reg_addr = DS1339_REG_ALARM1_SECS;
-       ds1307->msg[1].flags = I2C_M_RD;
-       ds1307->msg[1].len = 9;
-
-       ret = i2c_transfer(to_i2c_adapter(client->dev.parent),
-                       ds1307->msg, 2);
-       if (ret != 2) {
+       ret = i2c_smbus_read_i2c_block_data(client,
+                       DS1339_REG_ALARM1_SECS, 9, buf);
+       if (ret != 9) {
                dev_err(dev, "%s error %d\n", "alarm write", ret);
                return -EIO;
        }
@@ -392,7 +373,6 @@ static int ds1307_set_alarm(struct device *dev, struct rtc_wkalrm *t)
                        ds1307->regs[6], control, status);
 
        /* set ALARM1, using 24 hour and day-of-month modes */
-       *buf++ = DS1339_REG_ALARM1_SECS;        /* first register addr */
        buf[0] = bin2bcd(t->time.tm_sec);
        buf[1] = bin2bcd(t->time.tm_min);
        buf[2] = bin2bcd(t->time.tm_hour);
@@ -411,14 +391,11 @@ static int ds1307_set_alarm(struct device *dev, struct rtc_wkalrm *t)
        }
        buf[8] = status & ~(DS1337_BIT_A1I | DS1337_BIT_A2I);
 
-       ds1307->msg[1].flags = 0;
-       ds1307->msg[1].len = 10;
-
-       ret = i2c_transfer(to_i2c_adapter(client->dev.parent),
-                       &ds1307->msg[1], 1);
-       if (ret != 1) {
+       ret = i2c_smbus_write_i2c_block_data(client,
+                       DS1339_REG_ALARM1_SECS, 9, buf);
+       if (ret < 0) {
                dev_err(dev, "can't set alarm time\n");
-               return -EIO;
+               return ret;
        }
 
        return 0;
@@ -475,8 +452,8 @@ static int ds1307_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
 static const struct rtc_class_ops ds13xx_rtc_ops = {
        .read_time      = ds1307_get_time,
        .set_time       = ds1307_set_time,
-       .read_alarm     = ds1307_read_alarm,
-       .set_alarm      = ds1307_set_alarm,
+       .read_alarm     = ds1337_read_alarm,
+       .set_alarm      = ds1337_set_alarm,
        .ioctl          = ds1307_ioctl,
 };
 
@@ -490,7 +467,6 @@ ds1307_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
 {
        struct i2c_client       *client;
        struct ds1307           *ds1307;
-       struct i2c_msg          msg[2];
        int                     result;
 
        client = kobj_to_i2c_client(kobj);
@@ -503,24 +479,10 @@ ds1307_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
        if (unlikely(!count))
                return count;
 
-       msg[0].addr = client->addr;
-       msg[0].flags = 0;
-       msg[0].len = 1;
-       msg[0].buf = buf;
-
-       buf[0] = 8 + off;
-
-       msg[1].addr = client->addr;
-       msg[1].flags = I2C_M_RD;
-       msg[1].len = count;
-       msg[1].buf = buf;
-
-       result = i2c_transfer(to_i2c_adapter(client->dev.parent), msg, 2);
-       if (result != 2) {
+       result = i2c_smbus_read_i2c_block_data(client, 8 + off, count, buf);
+       if (result < 0)
                dev_err(&client->dev, "%s error %d\n", "nvram read", result);
-               return -EIO;
-       }
-       return count;
+       return result;
 }
 
 static ssize_t
@@ -528,8 +490,7 @@ ds1307_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
                char *buf, loff_t off, size_t count)
 {
        struct i2c_client       *client;
-       u8                      buffer[NVRAM_SIZE + 1];
-       int                     ret;
+       int                     result;
 
        client = kobj_to_i2c_client(kobj);
 
@@ -540,11 +501,12 @@ ds1307_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
        if (unlikely(!count))
                return count;
 
-       buffer[0] = 8 + off;
-       memcpy(buffer + 1, buf, count);
-
-       ret = i2c_master_send(client, buffer, count + 1);
-       return (ret < 0) ? ret : (ret - 1);
+       result = i2c_smbus_write_i2c_block_data(client, 8 + off, count, buf);
+       if (result < 0) {
+               dev_err(&client->dev, "%s error %d\n", "nvram write", result);
+               return result;
+       }
+       return count;
 }
 
 static struct bin_attribute nvram = {
@@ -571,9 +533,11 @@ static int __devinit ds1307_probe(struct i2c_client *client,
        const struct chip_desc  *chip = &chips[id->driver_data];
        struct i2c_adapter      *adapter = to_i2c_adapter(client->dev.parent);
        int                     want_irq = false;
+       unsigned char           *buf;
 
        if (!i2c_check_functionality(adapter,
-                       I2C_FUNC_I2C | I2C_FUNC_SMBUS_WRITE_BYTE_DATA))
+                       I2C_FUNC_SMBUS_WRITE_BYTE_DATA |
+                       I2C_FUNC_SMBUS_I2C_BLOCK))
                return -EIO;
 
        if (!(ds1307 = kzalloc(sizeof(struct ds1307), GFP_KERNEL)))
@@ -581,18 +545,8 @@ static int __devinit ds1307_probe(struct i2c_client *client,
 
        ds1307->client = client;
        i2c_set_clientdata(client, ds1307);
-
-       ds1307->msg[0].addr = client->addr;
-       ds1307->msg[0].flags = 0;
-       ds1307->msg[0].len = 1;
-       ds1307->msg[0].buf = &ds1307->reg_addr;
-
-       ds1307->msg[1].addr = client->addr;
-       ds1307->msg[1].flags = I2C_M_RD;
-       ds1307->msg[1].len = sizeof(ds1307->regs);
-       ds1307->msg[1].buf = ds1307->regs;
-
        ds1307->type = id->driver_data;
+       buf = ds1307->regs;
 
        switch (ds1307->type) {
        case ds_1337:
@@ -602,21 +556,15 @@ static int __devinit ds1307_probe(struct i2c_client *client,
                        INIT_WORK(&ds1307->work, ds1307_work);
                        want_irq = true;
                }
-
-               ds1307->reg_addr = DS1337_REG_CONTROL;
-               ds1307->msg[1].len = 2;
-
                /* get registers that the "rtc" read below won't read... */
-               tmp = i2c_transfer(adapter, ds1307->msg, 2);
+               tmp = i2c_smbus_read_i2c_block_data(ds1307->client,
+                               DS1337_REG_CONTROL, 2, buf);
                if (tmp != 2) {
                        pr_debug("read error %d\n", tmp);
                        err = -EIO;
                        goto exit_free;
                }
 
-               ds1307->reg_addr = 0;
-               ds1307->msg[1].len = sizeof(ds1307->regs);
-
                /* oscillator off?  turn it on, so clock can tick. */
                if (ds1307->regs[0] & DS1337_BIT_nEOSC)
                        ds1307->regs[0] &= ~DS1337_BIT_nEOSC;
@@ -647,9 +595,8 @@ static int __devinit ds1307_probe(struct i2c_client *client,
 
 read_rtc:
        /* read RTC registers */
-
-       tmp = i2c_transfer(adapter, ds1307->msg, 2);
-       if (tmp != 2) {
+       tmp = i2c_smbus_read_i2c_block_data(ds1307->client, 0, 8, buf);
+       if (tmp != 8) {
                pr_debug("read error %d\n", tmp);
                err = -EIO;
                goto exit_free;
@@ -707,22 +654,6 @@ read_rtc:
                break;
        }
 
-       tmp = ds1307->regs[DS1307_REG_SECS];
-       tmp = bcd2bin(tmp & 0x7f);
-       if (tmp > 60)
-               goto exit_bad;
-       tmp = bcd2bin(ds1307->regs[DS1307_REG_MIN] & 0x7f);
-       if (tmp > 60)
-               goto exit_bad;
-
-       tmp = bcd2bin(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
-       if (tmp == 0 || tmp > 31)
-               goto exit_bad;
-
-       tmp = bcd2bin(ds1307->regs[DS1307_REG_MONTH] & 0x1f);
-       if (tmp == 0 || tmp > 12)
-               goto exit_bad;
-
        tmp = ds1307->regs[DS1307_REG_HOUR];
        switch (ds1307->type) {
        case ds_1340:
@@ -779,13 +710,6 @@ read_rtc:
 
        return 0;
 
-exit_bad:
-       dev_dbg(&client->dev, "%s: %02x %02x %02x %02x %02x %02x %02x\n",
-                       "bogus register",
-                       ds1307->regs[0], ds1307->regs[1],
-                       ds1307->regs[2], ds1307->regs[3],
-                       ds1307->regs[4], ds1307->regs[5],
-                       ds1307->regs[6]);
 exit_irq:
        if (ds1307->rtc)
                rtc_device_unregister(ds1307->rtc);
index b8f9c00633f3adfb6885daedbb04031c1b925877..d82aad5224f0c6854fdf96999fb19185b9801c5e 100644 (file)
@@ -2621,7 +2621,7 @@ dasd_3990_erp_action(struct dasd_ccw_req * cqr)
                }
        }
 
-       /* double-check if current erp/cqr was successfull */
+       /* double-check if current erp/cqr was successful */
        if ((cqr->irb.scsw.cmd.cstat == 0x00) &&
            (cqr->irb.scsw.cmd.dstat ==
             (DEV_STAT_CHN_END | DEV_STAT_DEV_END))) {
index 05a14536c369685985e39b225afc69b0e89e9930..4a39084d9c95cd4f47370903143484bc04bae10f 100644 (file)
@@ -199,7 +199,7 @@ struct dasd_ccw_req {
 #define DASD_CQR_ERROR         0x82    /* request is completed with error */
 #define DASD_CQR_CLEAR_PENDING 0x83    /* request is clear pending */
 #define DASD_CQR_CLEARED       0x84    /* request was cleared */
-#define DASD_CQR_SUCCESS       0x85    /* request was successfull */
+#define DASD_CQR_SUCCESS       0x85    /* request was successful */
 
 
 /* per dasd_ccw_req flags */
index 4005c44a404c47efbb33d8e177389ca4a0b01a28..71605a179d65e6740839413ad9ec99811a5fd248 100644 (file)
@@ -801,7 +801,7 @@ tape_3590_done(struct tape_device *device, struct tape_request *request)
 static inline int
 tape_3590_erp_succeded(struct tape_device *device, struct tape_request *request)
 {
-       DBF_EVENT(3, "Error Recovery successfull for %s\n",
+       DBF_EVENT(3, "Error Recovery successful for %s\n",
                  tape_op_verbose[request->op]);
        return tape_3590_done(device, request);
 }
index 06b71823f3991b7e23d6810d13be0f6c0f5cee20..659f8a791656f472a22c18a99c14e45211185c97 100644 (file)
@@ -379,7 +379,7 @@ int cio_commit_config(struct subchannel *sch)
                if (ccode < 0) /* -EIO if msch gets a program check. */
                        return ccode;
                switch (ccode) {
-               case 0: /* successfull */
+               case 0: /* successful */
                        if (stsch(sch->schid, &schib) ||
                            !css_sch_is_valid(&schib))
                                return -ENODEV;
index 744f928a59eac4c3df9c3564fef85bece0b88ff3..10cb0f8726e5a72a264e4b62a2cbb9c3e32b65b1 100644 (file)
@@ -114,7 +114,7 @@ static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq)
  * @count: count of buffers to examine
  * @auto_ack: automatically acknowledge buffers
  *
- * Returns the number of successfull extracted equal buffer states.
+ * Returns the number of successfully extracted equal buffer states.
  * Stops processing if a state is different from the last buffers state.
  */
 static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state,
index b7322976d2b7c7e3c131deecd577181209804d83..256c7bec7bd722d188830c6003d3dd83c593bab8 100644 (file)
@@ -884,6 +884,7 @@ config SCSI_IBMVSCSI
        tristate "IBM Virtual SCSI support"
        depends on PPC_PSERIES || PPC_ISERIES
        select SCSI_SRP_ATTRS
+       select VIOPATH if PPC_ISERIES
        help
          This is the IBM POWER Virtual SCSI Client
 
index b695ab3142d83e2249586623344d4a49ca0c84a9..3e525e38a5d998ffbd0331f0b51ebb65ec45ca3c 100644 (file)
@@ -457,7 +457,7 @@ config SERIAL_SAMSUNG
 
 config SERIAL_SAMSUNG_UARTS
        int
-       depends on SERIAL_SAMSUNG
+       depends on ARM && PLAT_S3C
        default 2 if ARCH_S3C2400
        default 4 if ARCH_S3C64XX || CPU_S3C2443
        default 3
@@ -1320,13 +1320,30 @@ config SERIAL_NETX_CONSOLE
 config SERIAL_OF_PLATFORM
        tristate "Serial port on Open Firmware platform bus"
        depends on PPC_OF
-       depends on SERIAL_8250
+       depends on SERIAL_8250 || SERIAL_OF_PLATFORM_NWPSERIAL
        help
          If you have a PowerPC based system that has serial ports
          on a platform specific bus, you should enable this option.
          Currently, only 8250 compatible ports are supported, but
          others can easily be added.
 
+config SERIAL_OF_PLATFORM_NWPSERIAL
+       tristate "NWP serial port driver"
+       depends on PPC_OF && PPC_DCR
+       select SERIAL_OF_PLATFORM
+       select SERIAL_CORE_CONSOLE
+       select SERIAL_CORE
+       help
+         This driver supports the cell network processor nwp serial
+         device.
+
+config SERIAL_OF_PLATFORM_NWPSERIAL_CONSOLE
+       bool "Console on NWP serial port"
+       depends on SERIAL_OF_PLATFORM_NWPSERIAL=y
+       select SERIAL_CORE_CONSOLE
+       help
+         Support for Console on the NWP serial ports.
+
 config SERIAL_QE
        tristate "Freescale QUICC Engine serial port support"
        depends on QUICC_ENGINE
index dfe775ac45b227a6badb58c19b1a921d7003f85a..8844c0a039298f7f3f8d32576a19a97ff593d399 100644 (file)
@@ -72,6 +72,7 @@ obj-$(CONFIG_SERIAL_ATMEL) += atmel_serial.o
 obj-$(CONFIG_SERIAL_UARTLITE) += uartlite.o
 obj-$(CONFIG_SERIAL_NETX) += netx-serial.o
 obj-$(CONFIG_SERIAL_OF_PLATFORM) += of_serial.o
+obj-$(CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL) += nwpserial.o
 obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
 obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o
 obj-$(CONFIG_SERIAL_QE) += ucc_uart.o
diff --git a/drivers/serial/nwpserial.c b/drivers/serial/nwpserial.c
new file mode 100644 (file)
index 0000000..32f3eaf
--- /dev/null
@@ -0,0 +1,475 @@
+/*
+ *  Serial Port driver for a NWP uart device
+ *
+ *    Copyright (C) 2008 IBM Corp., Benjamin Krill <ben@codiert.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/serial.h>
+#include <linux/serial_reg.h>
+#include <linux/serial_core.h>
+#include <linux/tty.h>
+#include <linux/irqreturn.h>
+#include <linux/mutex.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+#include <linux/nwpserial.h>
+#include <asm/prom.h>
+#include <asm/dcr.h>
+
+#define NWPSERIAL_NR               2
+
+#define NWPSERIAL_STATUS_RXVALID 0x1
+#define NWPSERIAL_STATUS_TXFULL  0x2
+
+struct nwpserial_port {
+       struct uart_port port;
+       dcr_host_t dcr_host;
+       unsigned int ier;
+       unsigned int mcr;
+};
+
+static DEFINE_MUTEX(nwpserial_mutex);
+static struct nwpserial_port nwpserial_ports[NWPSERIAL_NR];
+
+static void wait_for_bits(struct nwpserial_port *up, int bits)
+{
+       unsigned int status, tmout = 10000;
+
+       /* Wait up to 10ms for the character(s) to be sent. */
+       do {
+               status = dcr_read(up->dcr_host, UART_LSR);
+
+               if (--tmout == 0)
+                       break;
+               udelay(1);
+       } while ((status & bits) != bits);
+}
+
+#ifdef CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL_CONSOLE
+static void nwpserial_console_putchar(struct uart_port *port, int c)
+{
+       struct nwpserial_port *up;
+       up = container_of(port, struct nwpserial_port, port);
+       /* check if tx buffer is full */
+       wait_for_bits(up, UART_LSR_THRE);
+       dcr_write(up->dcr_host, UART_TX, c);
+       up->port.icount.tx++;
+}
+
+static void
+nwpserial_console_write(struct console *co, const char *s, unsigned int count)
+{
+       struct nwpserial_port *up = &nwpserial_ports[co->index];
+       unsigned long flags;
+       int locked = 1;
+
+       if (oops_in_progress)
+               locked = spin_trylock_irqsave(&up->port.lock, flags);
+       else
+               spin_lock_irqsave(&up->port.lock, flags);
+
+       /* save and disable interrupt */
+       up->ier = dcr_read(up->dcr_host, UART_IER);
+       dcr_write(up->dcr_host, UART_IER, up->ier & ~UART_IER_RDI);
+
+       uart_console_write(&up->port, s, count, nwpserial_console_putchar);
+
+       /* wait for transmitter to become emtpy */
+       while ((dcr_read(up->dcr_host, UART_LSR) & UART_LSR_THRE) == 0)
+               cpu_relax();
+
+       /* restore interrupt state */
+       dcr_write(up->dcr_host, UART_IER, up->ier);
+
+       if (locked)
+               spin_unlock_irqrestore(&up->port.lock, flags);
+}
+
+static struct uart_driver nwpserial_reg;
+static struct console nwpserial_console = {
+       .name           = "ttySQ",
+       .write          = nwpserial_console_write,
+       .device         = uart_console_device,
+       .flags          = CON_PRINTBUFFER,
+       .index          = -1,
+       .data           = &nwpserial_reg,
+};
+#define NWPSERIAL_CONSOLE      (&nwpserial_console)
+#else
+#define NWPSERIAL_CONSOLE      NULL
+#endif /* CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL_CONSOLE */
+
+/**************************************************************************/
+
+static int nwpserial_request_port(struct uart_port *port)
+{
+       return 0;
+}
+
+static void nwpserial_release_port(struct uart_port *port)
+{
+       /* N/A */
+}
+
+static void nwpserial_config_port(struct uart_port *port, int flags)
+{
+       port->type = PORT_NWPSERIAL;
+}
+
+static irqreturn_t nwpserial_interrupt(int irq, void *dev_id)
+{
+       struct nwpserial_port *up = dev_id;
+       struct tty_struct *tty = up->port.info->port.tty;
+       irqreturn_t ret;
+       unsigned int iir;
+       unsigned char ch;
+
+       spin_lock(&up->port.lock);
+
+       /* check if the uart was the interrupt source. */
+       iir = dcr_read(up->dcr_host, UART_IIR);
+       if (!iir) {
+               ret = IRQ_NONE;
+               goto out;
+       }
+
+       do {
+               up->port.icount.rx++;
+               ch = dcr_read(up->dcr_host, UART_RX);
+               if (up->port.ignore_status_mask != NWPSERIAL_STATUS_RXVALID)
+                       tty_insert_flip_char(tty, ch, TTY_NORMAL);
+       } while (dcr_read(up->dcr_host, UART_RX) & UART_LSR_DR);
+
+       tty_flip_buffer_push(tty);
+       ret = IRQ_HANDLED;
+
+out:
+       spin_unlock(&up->port.lock);
+       return ret;
+}
+
+static int nwpserial_startup(struct uart_port *port)
+{
+       struct nwpserial_port *up;
+       int err;
+
+       up = container_of(port, struct nwpserial_port, port);
+
+       /* disable flow control by default */
+       up->mcr = dcr_read(up->dcr_host, UART_MCR) & ~UART_MCR_AFE;
+       dcr_write(up->dcr_host, UART_MCR, up->mcr);
+
+       /* register interrupt handler */
+       err = request_irq(up->port.irq, nwpserial_interrupt,
+                       IRQF_SHARED, "nwpserial", up);
+       if (err)
+               return err;
+
+       /* enable interrupts */
+       up->ier = UART_IER_RDI;
+       dcr_write(up->dcr_host, UART_IER, up->ier);
+
+       /* enable receiving */
+       up->port.ignore_status_mask &= ~NWPSERIAL_STATUS_RXVALID;
+
+       return 0;
+}
+
+static void nwpserial_shutdown(struct uart_port *port)
+{
+       struct nwpserial_port *up;
+       up = container_of(port, struct nwpserial_port, port);
+
+       /* disable receiving */
+       up->port.ignore_status_mask |= NWPSERIAL_STATUS_RXVALID;
+
+       /* disable interrupts from this port */
+       up->ier = 0;
+       dcr_write(up->dcr_host, UART_IER, up->ier);
+
+       /* free irq */
+       free_irq(up->port.irq, port);
+}
+
+static int nwpserial_verify_port(struct uart_port *port,
+                       struct serial_struct *ser)
+{
+       return -EINVAL;
+}
+
+static const char *nwpserial_type(struct uart_port *port)
+{
+       return port->type == PORT_NWPSERIAL ? "nwpserial" : NULL;
+}
+
+static void nwpserial_set_termios(struct uart_port *port,
+                       struct ktermios *termios, struct ktermios *old)
+{
+       struct nwpserial_port *up;
+       up = container_of(port, struct nwpserial_port, port);
+
+       up->port.read_status_mask = NWPSERIAL_STATUS_RXVALID
+                               | NWPSERIAL_STATUS_TXFULL;
+
+       up->port.ignore_status_mask = 0;
+       /* ignore all characters if CREAD is not set */
+       if ((termios->c_cflag & CREAD) == 0)
+               up->port.ignore_status_mask |= NWPSERIAL_STATUS_RXVALID;
+
+       /* Copy back the old hardware settings */
+       if (old)
+               tty_termios_copy_hw(termios, old);
+}
+
+static void nwpserial_break_ctl(struct uart_port *port, int ctl)
+{
+       /* N/A */
+}
+
+static void nwpserial_enable_ms(struct uart_port *port)
+{
+       /* N/A */
+}
+
+static void nwpserial_stop_rx(struct uart_port *port)
+{
+       struct nwpserial_port *up;
+       up = container_of(port, struct nwpserial_port, port);
+       /* don't forward any more data (like !CREAD) */
+       up->port.ignore_status_mask = NWPSERIAL_STATUS_RXVALID;
+}
+
+static void nwpserial_putchar(struct nwpserial_port *up, unsigned char c)
+{
+       /* check if tx buffer is full */
+       wait_for_bits(up, UART_LSR_THRE);
+       dcr_write(up->dcr_host, UART_TX, c);
+       up->port.icount.tx++;
+}
+
+static void nwpserial_start_tx(struct uart_port *port)
+{
+       struct nwpserial_port *up;
+       struct circ_buf *xmit;
+       up = container_of(port, struct nwpserial_port, port);
+       xmit  = &up->port.info->xmit;
+
+       if (port->x_char) {
+               nwpserial_putchar(up, up->port.x_char);
+               port->x_char = 0;
+       }
+
+       while (!(uart_circ_empty(xmit) || uart_tx_stopped(&up->port))) {
+               nwpserial_putchar(up, xmit->buf[xmit->tail]);
+               xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE-1);
+       }
+}
+
+static unsigned int nwpserial_get_mctrl(struct uart_port *port)
+{
+       return 0;
+}
+
+static void nwpserial_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+       /* N/A */
+}
+
+static void nwpserial_stop_tx(struct uart_port *port)
+{
+       /* N/A */
+}
+
+static unsigned int nwpserial_tx_empty(struct uart_port *port)
+{
+       struct nwpserial_port *up;
+       unsigned long flags;
+       int ret;
+       up = container_of(port, struct nwpserial_port, port);
+
+       spin_lock_irqsave(&up->port.lock, flags);
+       ret = dcr_read(up->dcr_host, UART_LSR);
+       spin_unlock_irqrestore(&up->port.lock, flags);
+
+       return ret & UART_LSR_TEMT ? TIOCSER_TEMT : 0;
+}
+
+static struct uart_ops nwpserial_pops = {
+       .tx_empty     = nwpserial_tx_empty,
+       .set_mctrl    = nwpserial_set_mctrl,
+       .get_mctrl    = nwpserial_get_mctrl,
+       .stop_tx      = nwpserial_stop_tx,
+       .start_tx     = nwpserial_start_tx,
+       .stop_rx      = nwpserial_stop_rx,
+       .enable_ms    = nwpserial_enable_ms,
+       .break_ctl    = nwpserial_break_ctl,
+       .startup      = nwpserial_startup,
+       .shutdown     = nwpserial_shutdown,
+       .set_termios  = nwpserial_set_termios,
+       .type         = nwpserial_type,
+       .release_port = nwpserial_release_port,
+       .request_port = nwpserial_request_port,
+       .config_port  = nwpserial_config_port,
+       .verify_port  = nwpserial_verify_port,
+};
+
+static struct uart_driver nwpserial_reg = {
+       .owner       = THIS_MODULE,
+       .driver_name = "nwpserial",
+       .dev_name    = "ttySQ",
+       .major       = TTY_MAJOR,
+       .minor       = 68,
+       .nr          = NWPSERIAL_NR,
+       .cons        = NWPSERIAL_CONSOLE,
+};
+
+int nwpserial_register_port(struct uart_port *port)
+{
+       struct nwpserial_port *up = NULL;
+       int ret = -1;
+       int i;
+       static int first = 1;
+       int dcr_len;
+       int dcr_base;
+       struct device_node *dn;
+
+       mutex_lock(&nwpserial_mutex);
+
+       dn = to_of_device(port->dev)->node;
+       if (dn == NULL)
+               goto out;
+
+       /* get dcr base. */
+       dcr_base = dcr_resource_start(dn, 0);
+
+       /* find matching entry */
+       for (i = 0; i < NWPSERIAL_NR; i++)
+               if (nwpserial_ports[i].port.iobase == dcr_base) {
+                       up = &nwpserial_ports[i];
+                       break;
+               }
+
+       /* we didn't find a mtching entry, search for a free port */
+       if (up == NULL)
+               for (i = 0; i < NWPSERIAL_NR; i++)
+                       if (nwpserial_ports[i].port.type == PORT_UNKNOWN &&
+                               nwpserial_ports[i].port.iobase == 0) {
+                               up = &nwpserial_ports[i];
+                               break;
+                       }
+
+       if (up == NULL) {
+               ret = -EBUSY;
+               goto out;
+       }
+
+       if (first)
+               uart_register_driver(&nwpserial_reg);
+       first = 0;
+
+       up->port.membase      = port->membase;
+       up->port.irq          = port->irq;
+       up->port.uartclk      = port->uartclk;
+       up->port.fifosize     = port->fifosize;
+       up->port.regshift     = port->regshift;
+       up->port.iotype       = port->iotype;
+       up->port.flags        = port->flags;
+       up->port.mapbase      = port->mapbase;
+       up->port.private_data = port->private_data;
+
+       if (port->dev)
+               up->port.dev = port->dev;
+
+       if (up->port.iobase != dcr_base) {
+               up->port.ops          = &nwpserial_pops;
+               up->port.fifosize     = 16;
+
+               spin_lock_init(&up->port.lock);
+
+               up->port.iobase = dcr_base;
+               dcr_len = dcr_resource_len(dn, 0);
+
+               up->dcr_host = dcr_map(dn, dcr_base, dcr_len);
+               if (!DCR_MAP_OK(up->dcr_host)) {
+                       printk(KERN_ERR "Cannot map DCR resources for NWPSERIAL");
+                       goto out;
+               }
+       }
+
+       ret = uart_add_one_port(&nwpserial_reg, &up->port);
+       if (ret == 0)
+               ret = up->port.line;
+
+out:
+       mutex_unlock(&nwpserial_mutex);
+
+       return ret;
+}
+EXPORT_SYMBOL(nwpserial_register_port);
+
+void nwpserial_unregister_port(int line)
+{
+       struct nwpserial_port *up = &nwpserial_ports[line];
+       mutex_lock(&nwpserial_mutex);
+       uart_remove_one_port(&nwpserial_reg, &up->port);
+
+       up->port.type = PORT_UNKNOWN;
+
+       mutex_unlock(&nwpserial_mutex);
+}
+EXPORT_SYMBOL(nwpserial_unregister_port);
+
+#ifdef CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL_CONSOLE
+static int __init nwpserial_console_init(void)
+{
+       struct nwpserial_port *up = NULL;
+       struct device_node *dn;
+       const char *name;
+       int dcr_base;
+       int dcr_len;
+       int i;
+
+       /* search for a free port */
+       for (i = 0; i < NWPSERIAL_NR; i++)
+               if (nwpserial_ports[i].port.type == PORT_UNKNOWN) {
+                       up = &nwpserial_ports[i];
+                       break;
+               }
+
+       if (up == NULL)
+               return -1;
+
+       name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+       if (name == NULL)
+               return -1;
+
+       dn = of_find_node_by_path(name);
+       if (!dn)
+               return -1;
+
+       spin_lock_init(&up->port.lock);
+       up->port.ops = &nwpserial_pops;
+       up->port.type = PORT_NWPSERIAL;
+       up->port.fifosize = 16;
+
+       dcr_base = dcr_resource_start(dn, 0);
+       dcr_len = dcr_resource_len(dn, 0);
+       up->port.iobase = dcr_base;
+
+       up->dcr_host = dcr_map(dn, dcr_base, dcr_len);
+       if (!DCR_MAP_OK(up->dcr_host)) {
+               printk("Cannot map DCR resources for SERIAL");
+               return -1;
+       }
+       register_console(&nwpserial_console);
+       return 0;
+}
+console_initcall(nwpserial_console_init);
+#endif /* CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL_CONSOLE */
index 8fa0ff561e9fbd901acb46787bff587b035ddd79..a821e3a3d664e93f18fa6dcb8e7e561ce4fc6076 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/serial_core.h>
 #include <linux/serial_8250.h>
 #include <linux/of_platform.h>
+#include <linux/nwpserial.h>
 
 #include <asm/prom.h>
 
@@ -99,9 +100,16 @@ static int __devinit of_platform_serial_probe(struct of_device *ofdev,
                goto out;
 
        switch (port_type) {
+#ifdef CONFIG_SERIAL_8250
        case PORT_8250 ... PORT_MAX_8250:
                ret = serial8250_register_port(&port);
                break;
+#endif
+#ifdef CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL
+       case PORT_NWPSERIAL:
+               ret = nwpserial_register_port(&port);
+               break;
+#endif
        default:
                /* need to add code for these */
        case PORT_UNKNOWN:
@@ -129,9 +137,16 @@ static int of_platform_serial_remove(struct of_device *ofdev)
 {
        struct of_serial_info *info = ofdev->dev.driver_data;
        switch (info->type) {
+#ifdef CONFIG_SERIAL_8250
        case PORT_8250 ... PORT_MAX_8250:
                serial8250_unregister_port(info->line);
                break;
+#endif
+#ifdef CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL
+       case PORT_NWPSERIAL:
+               nwpserial_unregister_port(info->line);
+               break;
+#endif
        default:
                /* need to add code for these */
                break;
@@ -148,6 +163,10 @@ static struct of_device_id __devinitdata of_platform_serial_table[] = {
        { .type = "serial", .compatible = "ns16450",  .data = (void *)PORT_16450, },
        { .type = "serial", .compatible = "ns16550",  .data = (void *)PORT_16550, },
        { .type = "serial", .compatible = "ns16750",  .data = (void *)PORT_16750, },
+#ifdef CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL
+       { .type = "serial", .compatible = "ibm,qpace-nwp-serial",
+                                       .data = (void *)PORT_NWPSERIAL, },
+#endif
        { .type = "serial",                           .data = (void *)PORT_UNKNOWN, },
        { /* end of list */ },
 };
index 2ac52fd8cc11f67595a6368ea879bf40fde497fa..4e046fed1380e7b4f158b4fe4c576b825b575eca 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
 #include <linux/clk.h>
+#include <linux/hardirq.h>
 
 #include <asm/sizes.h>
 
index 90616822cd2021ea9b1f8a195baf5dbda6f5e28e..96d2f8e4c2756fdc50b4fc9e21aa496ec875f59d 100644 (file)
@@ -34,6 +34,12 @@ config W1_MASTER_DS2482
          This driver can also be built as a module.  If so, the module
          will be called ds2482.
 
+config W1_MASTER_MXC
+       tristate "Freescale MXC 1-wire busmaster"
+       depends on W1 && ARCH_MXC
+       help
+         Say Y here to enable MXC 1-wire host
+
 config W1_MASTER_DS1WM
        tristate "Maxim DS1WM 1-wire busmaster"
        depends on W1 && ARM && HAVE_CLK
index bc4714a75f3a0358ebcc7c51a78025dab7e1d594..c5a3e96fcbabf9bd952e119a89dfd9d40e1c54d9 100644 (file)
@@ -5,6 +5,8 @@
 obj-$(CONFIG_W1_MASTER_MATROX)         += matrox_w1.o
 obj-$(CONFIG_W1_MASTER_DS2490)         += ds2490.o
 obj-$(CONFIG_W1_MASTER_DS2482)         += ds2482.o
+obj-$(CONFIG_W1_MASTER_MXC)            += mxc_w1.o
+
 obj-$(CONFIG_W1_MASTER_DS1WM)          += ds1wm.o
 obj-$(CONFIG_W1_MASTER_GPIO)           += w1-gpio.o
 obj-$(CONFIG_HDQ_MASTER_OMAP)          += omap_hdq.o
diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c
new file mode 100644 (file)
index 0000000..b9d74d0
--- /dev/null
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2005-2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Luotao Fu, kernel@pengutronix.de
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+
+#include "../w1.h"
+#include "../w1_int.h"
+#include "../w1_log.h"
+
+/* According to the mx27 Datasheet the reset procedure should take up to about
+ * 1350us. We set the timeout to 500*100us = 50ms for sure */
+#define MXC_W1_RESET_TIMEOUT 500
+
+/*
+ * MXC W1 Register offsets
+ */
+#define MXC_W1_CONTROL          0x00
+#define MXC_W1_TIME_DIVIDER     0x02
+#define MXC_W1_RESET            0x04
+#define MXC_W1_COMMAND          0x06
+#define MXC_W1_TXRX             0x08
+#define MXC_W1_INTERRUPT        0x0A
+#define MXC_W1_INTERRUPT_EN     0x0C
+
+struct mxc_w1_device {
+       void __iomem *regs;
+       unsigned int clkdiv;
+       struct clk *clk;
+       struct w1_bus_master bus_master;
+};
+
+/*
+ * this is the low level routine to
+ * reset the device on the One Wire interface
+ * on the hardware
+ */
+static u8 mxc_w1_ds2_reset_bus(void *data)
+{
+       u8 reg_val;
+       unsigned int timeout_cnt = 0;
+       struct mxc_w1_device *dev = data;
+
+       __raw_writeb(0x80, (dev->regs + MXC_W1_CONTROL));
+
+       while (1) {
+               reg_val = __raw_readb(dev->regs + MXC_W1_CONTROL);
+
+               if (((reg_val >> 7) & 0x1) == 0 ||
+                   timeout_cnt > MXC_W1_RESET_TIMEOUT)
+                       break;
+               else
+                       timeout_cnt++;
+
+               udelay(100);
+       }
+       return (reg_val >> 7) & 0x1;
+}
+
+/*
+ * this is the low level routine to read/write a bit on the One Wire
+ * interface on the hardware. It does write 0 if parameter bit is set
+ * to 0, otherwise a write 1/read.
+ */
+static u8 mxc_w1_ds2_touch_bit(void *data, u8 bit)
+{
+       struct mxc_w1_device *mdev = data;
+       void __iomem *ctrl_addr = mdev->regs + MXC_W1_CONTROL;
+       unsigned int timeout_cnt = 400; /* Takes max. 120us according to
+                                        * datasheet.
+                                        */
+
+       __raw_writeb((1 << (5 - bit)), ctrl_addr);
+
+       while (timeout_cnt--) {
+               if (!((__raw_readb(ctrl_addr) >> (5 - bit)) & 0x1))
+                       break;
+
+               udelay(1);
+       }
+
+       return ((__raw_readb(ctrl_addr)) >> 3) & 0x1;
+}
+
+static int __init mxc_w1_probe(struct platform_device *pdev)
+{
+       struct mxc_w1_device *mdev;
+       struct resource *res;
+       int err = 0;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -ENODEV;
+
+       mdev = kzalloc(sizeof(struct mxc_w1_device), GFP_KERNEL);
+       if (!mdev)
+               return -ENOMEM;
+
+       mdev->clk = clk_get(&pdev->dev, "owire_clk");
+       if (!mdev->clk) {
+               err = -ENODEV;
+               goto failed_clk;
+       }
+
+       mdev->clkdiv = (clk_get_rate(mdev->clk) / 1000000) - 1;
+
+       res = request_mem_region(res->start, resource_size(res),
+                               "mxc_w1");
+       if (!res) {
+               err = -EBUSY;
+               goto failed_req;
+       }
+
+       mdev->regs = ioremap(res->start, resource_size(res));
+       if (!mdev->regs) {
+               printk(KERN_ERR "Cannot map frame buffer registers\n");
+               goto failed_ioremap;
+       }
+
+       clk_enable(mdev->clk);
+       __raw_writeb(mdev->clkdiv, mdev->regs + MXC_W1_TIME_DIVIDER);
+
+       mdev->bus_master.data = mdev;
+       mdev->bus_master.reset_bus = mxc_w1_ds2_reset_bus;
+       mdev->bus_master.touch_bit = mxc_w1_ds2_touch_bit;
+
+       err = w1_add_master_device(&mdev->bus_master);
+
+       if (err)
+               goto failed_add;
+
+       platform_set_drvdata(pdev, mdev);
+       return 0;
+
+failed_add:
+       iounmap(mdev->regs);
+failed_ioremap:
+       release_mem_region(res->start, resource_size(res));
+failed_req:
+       clk_put(mdev->clk);
+failed_clk:
+       kfree(mdev);
+       return err;
+}
+
+/*
+ * disassociate the w1 device from the driver
+ */
+static int mxc_w1_remove(struct platform_device *pdev)
+{
+       struct mxc_w1_device *mdev = platform_get_drvdata(pdev);
+       struct resource *res;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+       w1_remove_master_device(&mdev->bus_master);
+
+       iounmap(mdev->regs);
+       release_mem_region(res->start, resource_size(res));
+       clk_disable(mdev->clk);
+       clk_put(mdev->clk);
+
+       platform_set_drvdata(pdev, NULL);
+
+       return 0;
+}
+
+static struct platform_driver mxc_w1_driver = {
+       .driver = {
+                  .name = "mxc_w1",
+       },
+       .probe = mxc_w1_probe,
+       .remove = mxc_w1_remove,
+};
+
+static int __init mxc_w1_init(void)
+{
+       return platform_driver_register(&mxc_w1_driver);
+}
+
+static void mxc_w1_exit(void)
+{
+       platform_driver_unregister(&mxc_w1_driver);
+}
+
+module_init(mxc_w1_init);
+module_exit(mxc_w1_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Freescale Semiconductors Inc");
+MODULE_DESCRIPTION("Driver for One-Wire on MXC");
index 97304bd83ec9674105bc837a4e9ecea6ba373cfd..d8a9709f3449159a2f8ada1c2ca328316414ed79 100644 (file)
@@ -210,6 +210,7 @@ u8 w1_read_8(struct w1_master *);
 int w1_reset_bus(struct w1_master *);
 u8 w1_calc_crc8(u8 *, int);
 void w1_write_block(struct w1_master *, const u8 *, int);
+void w1_touch_block(struct w1_master *, u8 *, int);
 u8 w1_read_block(struct w1_master *, u8 *, int);
 int w1_reset_select_slave(struct w1_slave *sl);
 void w1_next_pullup(struct w1_master *, int);
index 5139c25ca96288474cf976cf5577be12c6d6d30b..442bd8bbd4a5dcbed086cae48fed1350ba9e31b6 100644 (file)
@@ -238,7 +238,6 @@ EXPORT_SYMBOL_GPL(w1_read_8);
  * @param dev     the master device
  * @param buf     pointer to the data to write
  * @param len     the number of bytes to write
- * @return        the byte read
  */
 void w1_write_block(struct w1_master *dev, const u8 *buf, int len)
 {
@@ -255,6 +254,31 @@ void w1_write_block(struct w1_master *dev, const u8 *buf, int len)
 }
 EXPORT_SYMBOL_GPL(w1_write_block);
 
+/**
+ * Touches a series of bytes.
+ *
+ * @param dev     the master device
+ * @param buf     pointer to the data to write
+ * @param len     the number of bytes to write
+ */
+void w1_touch_block(struct w1_master *dev, u8 *buf, int len)
+{
+       int i, j;
+       u8 tmp;
+
+       for (i = 0; i < len; ++i) {
+               tmp = 0;
+               for (j = 0; j < 8; ++j) {
+                       if (j == 7)
+                               w1_pre_write(dev);
+                       tmp |= w1_touch_bit(dev, (buf[i] >> j) & 0x1) << j;
+               }
+
+               buf[i] = tmp;
+       }
+}
+EXPORT_SYMBOL_GPL(w1_touch_block);
+
 /**
  * Reads a series of bytes.
  *
index 65c5ebd0787e0ea4840ab73db3af612443d3d7b2..fdf72851c57451f70a135324241a62359e97ad63 100644 (file)
@@ -47,21 +47,56 @@ void w1_netlink_send(struct w1_master *dev, struct w1_netlink_msg *msg)
        cn_netlink_send(m, 0, GFP_KERNEL);
 }
 
-static int w1_process_command_master(struct w1_master *dev, struct cn_msg *msg,
-               struct w1_netlink_msg *hdr, struct w1_netlink_cmd *cmd)
+static void w1_send_slave(struct w1_master *dev, u64 rn)
+{
+       struct cn_msg *msg = dev->priv;
+       struct w1_netlink_msg *hdr = (struct w1_netlink_msg *)(msg + 1);
+       struct w1_netlink_cmd *cmd = (struct w1_netlink_cmd *)(hdr + 1);
+       int avail;
+
+       avail = dev->priv_size - cmd->len;
+
+       if (avail > 8) {
+               u64 *data = (void *)(cmd + 1) + cmd->len;
+
+               *data = rn;
+               cmd->len += 8;
+               hdr->len += 8;
+               msg->len += 8;
+               return;
+       }
+
+       msg->ack++;
+       cn_netlink_send(msg, 0, GFP_KERNEL);
+
+       msg->len = sizeof(struct w1_netlink_msg) + sizeof(struct w1_netlink_cmd);
+       hdr->len = sizeof(struct w1_netlink_cmd);
+       cmd->len = 0;
+}
+
+static int w1_process_search_command(struct w1_master *dev, struct cn_msg *msg,
+               unsigned int avail)
 {
-       dev_dbg(&dev->dev, "%s: %s: cmd=%02x, len=%u.\n",
-               __func__, dev->name, cmd->cmd, cmd->len);
+       struct w1_netlink_msg *hdr = (struct w1_netlink_msg *)(msg + 1);
+       struct w1_netlink_cmd *cmd = (struct w1_netlink_cmd *)(hdr + 1);
+       int search_type = (cmd->cmd == W1_CMD_ALARM_SEARCH)?W1_ALARM_SEARCH:W1_SEARCH;
 
-       if (cmd->cmd != W1_CMD_SEARCH && cmd->cmd != W1_CMD_ALARM_SEARCH)
-               return -EINVAL;
+       dev->priv = msg;
+       dev->priv_size = avail;
+
+       w1_search_devices(dev, search_type, w1_send_slave);
+
+       msg->ack = 0;
+       cn_netlink_send(msg, 0, GFP_KERNEL);
+
+       dev->priv = NULL;
+       dev->priv_size = 0;
 
-       w1_search_process(dev, (cmd->cmd == W1_CMD_ALARM_SEARCH)?W1_ALARM_SEARCH:W1_SEARCH);
        return 0;
 }
 
-static int w1_send_read_reply(struct w1_slave *sl, struct cn_msg *msg,
-               struct w1_netlink_msg *hdr, struct w1_netlink_cmd *cmd)
+static int w1_send_read_reply(struct cn_msg *msg, struct w1_netlink_msg *hdr,
+               struct w1_netlink_cmd *cmd)
 {
        void *data;
        struct w1_netlink_msg *h;
@@ -85,7 +120,8 @@ static int w1_send_read_reply(struct w1_slave *sl, struct cn_msg *msg,
        memcpy(c, cmd, sizeof(struct w1_netlink_cmd));
 
        cm->ack = msg->seq+1;
-       cm->len = sizeof(struct w1_netlink_msg) + sizeof(struct w1_netlink_cmd) + cmd->len;
+       cm->len = sizeof(struct w1_netlink_msg) +
+               sizeof(struct w1_netlink_cmd) + cmd->len;
 
        h->len = sizeof(struct w1_netlink_cmd) + cmd->len;
 
@@ -98,36 +134,178 @@ static int w1_send_read_reply(struct w1_slave *sl, struct cn_msg *msg,
        return err;
 }
 
-static int w1_process_command_slave(struct w1_slave *sl, struct cn_msg *msg,
+static int w1_process_command_io(struct w1_master *dev, struct cn_msg *msg,
                struct w1_netlink_msg *hdr, struct w1_netlink_cmd *cmd)
 {
        int err = 0;
 
-       dev_dbg(&sl->master->dev, "%s: %02x.%012llx.%02x: cmd=%02x, len=%u.\n",
-               __func__, sl->reg_num.family, (unsigned long long)sl->reg_num.id, sl->reg_num.crc,
-               cmd->cmd, cmd->len);
+       switch (cmd->cmd) {
+       case W1_CMD_TOUCH:
+               w1_touch_block(dev, cmd->data, cmd->len);
+               w1_send_read_reply(msg, hdr, cmd);
+               break;
+       case W1_CMD_READ:
+               w1_read_block(dev, cmd->data, cmd->len);
+               w1_send_read_reply(msg, hdr, cmd);
+               break;
+       case W1_CMD_WRITE:
+               w1_write_block(dev, cmd->data, cmd->len);
+               break;
+       default:
+               err = -EINVAL;
+               break;
+       }
+
+       return err;
+}
+
+static int w1_process_command_master(struct w1_master *dev, struct cn_msg *req_msg,
+               struct w1_netlink_msg *req_hdr, struct w1_netlink_cmd *req_cmd)
+{
+       int err = -EINVAL;
+       struct cn_msg *msg;
+       struct w1_netlink_msg *hdr;
+       struct w1_netlink_cmd *cmd;
+
+       msg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       msg->id = req_msg->id;
+       msg->seq = req_msg->seq;
+       msg->ack = 0;
+       msg->len = sizeof(struct w1_netlink_msg) + sizeof(struct w1_netlink_cmd);
+
+       hdr = (struct w1_netlink_msg *)(msg + 1);
+       cmd = (struct w1_netlink_cmd *)(hdr + 1);
+
+       hdr->type = W1_MASTER_CMD;
+       hdr->id = req_hdr->id;
+       hdr->len = sizeof(struct w1_netlink_cmd);
+
+       cmd->cmd = req_cmd->cmd;
+       cmd->len = 0;
 
        switch (cmd->cmd) {
-               case W1_CMD_READ:
-                       w1_read_block(sl->master, cmd->data, cmd->len);
-                       w1_send_read_reply(sl, msg, hdr, cmd);
-                       break;
-               case W1_CMD_WRITE:
-                       w1_write_block(sl->master, cmd->data, cmd->len);
-                       break;
-               case W1_CMD_SEARCH:
-               case W1_CMD_ALARM_SEARCH:
-                       w1_search_process(sl->master,
-                                       (cmd->cmd == W1_CMD_ALARM_SEARCH)?W1_ALARM_SEARCH:W1_SEARCH);
-                       break;
-               default:
-                       err = -1;
-                       break;
+       case W1_CMD_SEARCH:
+       case W1_CMD_ALARM_SEARCH:
+               err = w1_process_search_command(dev, msg,
+                               PAGE_SIZE - msg->len - sizeof(struct cn_msg));
+               break;
+       case W1_CMD_READ:
+       case W1_CMD_WRITE:
+       case W1_CMD_TOUCH:
+               err = w1_process_command_io(dev, req_msg, req_hdr, req_cmd);
+               break;
+       case W1_CMD_RESET:
+               err = w1_reset_bus(dev);
+               break;
+       default:
+               err = -EINVAL;
+               break;
        }
 
+       kfree(msg);
        return err;
 }
 
+static int w1_process_command_slave(struct w1_slave *sl, struct cn_msg *msg,
+               struct w1_netlink_msg *hdr, struct w1_netlink_cmd *cmd)
+{
+       dev_dbg(&sl->master->dev, "%s: %02x.%012llx.%02x: cmd=%02x, len=%u.\n",
+               __func__, sl->reg_num.family, (unsigned long long)sl->reg_num.id,
+               sl->reg_num.crc, cmd->cmd, cmd->len);
+
+       return w1_process_command_io(sl->master, msg, hdr, cmd);
+}
+
+static int w1_process_command_root(struct cn_msg *msg, struct w1_netlink_msg *mcmd)
+{
+       struct w1_master *m;
+       struct cn_msg *cn;
+       struct w1_netlink_msg *w;
+       u32 *id;
+
+       if (mcmd->type != W1_LIST_MASTERS) {
+               printk(KERN_NOTICE "%s: msg: %x.%x, wrong type: %u, len: %u.\n",
+                       __func__, msg->id.idx, msg->id.val, mcmd->type, mcmd->len);
+               return -EPROTO;
+       }
+
+       cn = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!cn)
+               return -ENOMEM;
+
+       cn->id.idx = CN_W1_IDX;
+       cn->id.val = CN_W1_VAL;
+
+       cn->seq = msg->seq;
+       cn->ack = 1;
+       cn->len = sizeof(struct w1_netlink_msg);
+       w = (struct w1_netlink_msg *)(cn + 1);
+
+       w->type = W1_LIST_MASTERS;
+       w->status = 0;
+       w->len = 0;
+       id = (u32 *)(w + 1);
+
+       mutex_lock(&w1_mlock);
+       list_for_each_entry(m, &w1_masters, w1_master_entry) {
+               if (cn->len + sizeof(*id) > PAGE_SIZE - sizeof(struct cn_msg)) {
+                       cn_netlink_send(cn, 0, GFP_KERNEL);
+                       cn->ack++;
+                       cn->len = sizeof(struct w1_netlink_msg);
+                       w->len = 0;
+                       id = (u32 *)(w + 1);
+               }
+
+               *id = m->id;
+               w->len += sizeof(*id);
+               cn->len += sizeof(*id);
+               id++;
+       }
+       cn->ack = 0;
+       cn_netlink_send(cn, 0, GFP_KERNEL);
+       mutex_unlock(&w1_mlock);
+
+       kfree(cn);
+       return 0;
+}
+
+static int w1_netlink_send_error(struct cn_msg *rcmsg, struct w1_netlink_msg *rmsg,
+               struct w1_netlink_cmd *rcmd, int error)
+{
+       struct cn_msg *cmsg;
+       struct w1_netlink_msg *msg;
+       struct w1_netlink_cmd *cmd;
+
+       cmsg = kzalloc(sizeof(*msg) + sizeof(*cmd) + sizeof(*cmsg), GFP_KERNEL);
+       if (!cmsg)
+               return -ENOMEM;
+
+       msg = (struct w1_netlink_msg *)(cmsg + 1);
+       cmd = (struct w1_netlink_cmd *)(msg + 1);
+
+       memcpy(cmsg, rcmsg, sizeof(*cmsg));
+       cmsg->len = sizeof(*msg);
+
+       memcpy(msg, rmsg, sizeof(*msg));
+       msg->len = 0;
+       msg->status = (short)-error;
+
+       if (rcmd) {
+               memcpy(cmd, rcmd, sizeof(*cmd));
+               cmd->len = 0;
+               msg->len += sizeof(*cmd);
+               cmsg->len += sizeof(*cmd);
+       }
+
+       error = cn_netlink_send(cmsg, 0, GFP_KERNEL);
+       kfree(cmsg);
+
+       return error;
+}
+
 static void w1_cn_callback(void *data)
 {
        struct cn_msg *msg = data;
@@ -144,6 +322,7 @@ static void w1_cn_callback(void *data)
 
                dev = NULL;
                sl = NULL;
+               cmd = NULL;
 
                memcpy(&id, m->id.id, sizeof(id));
 #if 0
@@ -155,15 +334,15 @@ static void w1_cn_callback(void *data)
                        break;
                }
 
-               if (!mlen)
-                       goto out_cont;
-
                if (m->type == W1_MASTER_CMD) {
                        dev = w1_search_master_id(m->id.mst.id);
                } else if (m->type == W1_SLAVE_CMD) {
                        sl = w1_search_slave(&id);
                        if (sl)
                                dev = sl->master;
+               } else {
+                       err = w1_process_command_root(msg, m);
+                       goto out_cont;
                }
 
                if (!dev) {
@@ -171,6 +350,10 @@ static void w1_cn_callback(void *data)
                        goto out_cont;
                }
 
+               err = 0;
+               if (!mlen)
+                       goto out_cont;
+
                mutex_lock(&dev->mutex);
 
                if (sl && w1_reset_select_slave(sl)) {
@@ -187,9 +370,12 @@ static void w1_cn_callback(void *data)
                        }
 
                        if (sl)
-                               w1_process_command_slave(sl, msg, m, cmd);
+                               err = w1_process_command_slave(sl, msg, m, cmd);
                        else
-                               w1_process_command_master(dev, msg, m, cmd);
+                               err = w1_process_command_master(dev, msg, m, cmd);
+
+                       w1_netlink_send_error(msg, m, cmd, err);
+                       err = 0;
 
                        cmd_data += cmd->len + sizeof(struct w1_netlink_cmd);
                        mlen -= cmd->len + sizeof(struct w1_netlink_cmd);
@@ -200,6 +386,8 @@ out_up:
                        atomic_dec(&sl->refcnt);
                mutex_unlock(&dev->mutex);
 out_cont:
+               if (!cmd || err)
+                       w1_netlink_send_error(msg, m, cmd, err);
                msg->len -= sizeof(struct w1_netlink_msg) + m->len;
                m = (struct w1_netlink_msg *)(((u8 *)m) + sizeof(struct w1_netlink_msg) + m->len);
 
@@ -209,11 +397,6 @@ out_cont:
                if (err == -ENODEV)
                        err = 0;
        }
-#if 0
-       if (err) {
-               printk("%s: malformed message. Dropping.\n", __func__);
-       }
-#endif
 }
 
 int w1_init_netlink(void)
index 56122b9e9294b865f30da01730c16ac0a4728dc3..27e950f935b18040437ae3e64019dde69fc4e273 100644 (file)
@@ -34,12 +34,13 @@ enum w1_netlink_message_types {
        W1_MASTER_REMOVE,
        W1_MASTER_CMD,
        W1_SLAVE_CMD,
+       W1_LIST_MASTERS,
 };
 
 struct w1_netlink_msg
 {
        __u8                            type;
-       __u8                            reserved;
+       __u8                            status;
        __u16                           len;
        union {
                __u8                    id[8];
@@ -51,10 +52,15 @@ struct w1_netlink_msg
        __u8                            data[0];
 };
 
-#define W1_CMD_READ            0x0
-#define W1_CMD_WRITE           0x1
-#define W1_CMD_SEARCH          0x2
-#define W1_CMD_ALARM_SEARCH    0x3
+enum w1_commands {
+       W1_CMD_READ = 0,
+       W1_CMD_WRITE,
+       W1_CMD_SEARCH,
+       W1_CMD_ALARM_SEARCH,
+       W1_CMD_TOUCH,
+       W1_CMD_RESET,
+       W1_CMD_MAX,
+};
 
 struct w1_netlink_cmd
 {
index 4b75a16de0094b3ea36f38d4d2b157ac3a20641d..526187c8a12de73ccbab828eb815eac4c2abd52e 100644 (file)
@@ -17,3 +17,27 @@ config XEN_SCRUB_PAGES
          is not accidentally visible to other domains.  Is it more
          secure, but slightly less efficient.
          If in doubt, say yes.
+
+config XENFS
+       tristate "Xen filesystem"
+       depends on XEN
+       default y
+       help
+         The xen filesystem provides a way for domains to share
+         information with each other and with the hypervisor.
+         For example, by reading and writing the "xenbus" file, guests
+         may pass arbitrary information to the initial domain.
+         If in doubt, say yes.
+
+config XEN_COMPAT_XENFS
+       bool "Create compatibility mount point /proc/xen"
+       depends on XENFS
+       default y
+       help
+         The old xenstore userspace tools expect to find "xenbus"
+         under /proc/xen, but "xenbus" is now found at the root of the
+         xenfs filesystem.  Selecting this causes the kernel to create
+         the compatibilty mount point /proc/xen if it is running on
+         a xen platform.
+         If in doubt, say yes.
+
index d2a8fdf0e1918c7ada49c7a1c5c369647ddbeafd..ff8accc9e103f9d2a4fc21dcbcc2e88d479a0ade 100644 (file)
@@ -1,5 +1,7 @@
 obj-y  += grant-table.o features.o events.o manage.o
 obj-y  += xenbus/
+
 obj-$(CONFIG_HOTPLUG_CPU)      += cpu_hotplug.o
 obj-$(CONFIG_XEN_XENCOMM)      += xencomm.o
 obj-$(CONFIG_XEN_BALLOON)      += balloon.o
+obj-$(CONFIG_XENFS)            += xenfs/
\ No newline at end of file
index 9678b3e98c635014bebc845d79552aadaacbc596..92a1ef80a288173a7ccae8d5fc10f484d1acd613 100644 (file)
@@ -136,7 +136,6 @@ EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
 /**
  * xenbus_switch_state
  * @dev: xenbus device
- * @xbt: transaction handle
  * @state: new state
  *
  * Advertise in the store a change of the given driver to the given new_state.
@@ -267,7 +266,7 @@ EXPORT_SYMBOL_GPL(xenbus_dev_error);
  * @fmt: error message format
  *
  * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
- * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly
+ * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly
  * closedown of this driver and its peer.
  */
 
index b2a03184a2461af2544bd686a0ed4148d76f3d17..773d1cf2328334b9c62cc52fdf90c7107df02778 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/ctype.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
+#include <linux/proc_fs.h>
 #include <linux/notifier.h>
 #include <linux/kthread.h>
 #include <linux/mutex.h>
 #include "xenbus_comms.h"
 #include "xenbus_probe.h"
 
+
 int xen_store_evtchn;
+EXPORT_SYMBOL(xen_store_evtchn);
+
 struct xenstore_domain_interface *xen_store_interface;
 static unsigned long xen_store_mfn;
 
@@ -166,6 +170,9 @@ static int read_backend_details(struct xenbus_device *xendev)
        return read_otherend_details(xendev, "backend-id", "backend");
 }
 
+static struct device_attribute xenbus_dev_attrs[] = {
+       __ATTR_NULL
+};
 
 /* Bus type for frontend drivers. */
 static struct xen_bus_type xenbus_frontend = {
@@ -174,12 +181,13 @@ static struct xen_bus_type xenbus_frontend = {
        .get_bus_id = frontend_bus_id,
        .probe = xenbus_probe_frontend,
        .bus = {
-               .name     = "xen",
-               .match    = xenbus_match,
-               .uevent   = xenbus_uevent,
-               .probe    = xenbus_dev_probe,
-               .remove   = xenbus_dev_remove,
-               .shutdown = xenbus_dev_shutdown,
+               .name      = "xen",
+               .match     = xenbus_match,
+               .uevent    = xenbus_uevent,
+               .probe     = xenbus_dev_probe,
+               .remove    = xenbus_dev_remove,
+               .shutdown  = xenbus_dev_shutdown,
+               .dev_attrs = xenbus_dev_attrs,
        },
 };
 
@@ -852,6 +860,14 @@ static int __init xenbus_probe_init(void)
        if (!xen_initial_domain())
                xenbus_probe(NULL);
 
+#ifdef CONFIG_XEN_COMPAT_XENFS
+       /*
+        * Create xenfs mountpoint in /proc for compatibility with
+        * utilities that expect to find "xenbus" under "/proc/xen".
+        */
+       proc_mkdir("xen", NULL);
+#endif
+
        return 0;
 
   out_unreg_back:
index 7f2f91c0e11dc0c8e5a343c6400ec664faadd824..e325eab4724d8cb0fab667b26f8c5b104c965c05 100644 (file)
@@ -184,6 +184,7 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
 
        return ret;
 }
+EXPORT_SYMBOL(xenbus_dev_request_and_reply);
 
 /* Send message to xs, get kmalloc'ed reply.  ERR_PTR() on error. */
 static void *xs_talkv(struct xenbus_transaction t,
diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
new file mode 100644 (file)
index 0000000..25275c3
--- /dev/null
@@ -0,0 +1,3 @@
+obj-$(CONFIG_XENFS) += xenfs.o
+
+xenfs-objs = super.o xenbus.o
\ No newline at end of file
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
new file mode 100644 (file)
index 0000000..515741a
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ *  xenfs.c - a filesystem for passing info between the a domain and
+ *  the hypervisor.
+ *
+ * 2008-10-07  Alex Zeffertt    Replaced /proc/xen/xenbus with xenfs filesystem
+ *                              and /proc/xen compatibility mount point.
+ *                              Turned xenfs into a loadable module.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/magic.h>
+
+#include "xenfs.h"
+
+#include <asm/xen/hypervisor.h>
+
+MODULE_DESCRIPTION("Xen filesystem");
+MODULE_LICENSE("GPL");
+
+static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+       static struct tree_descr xenfs_files[] = {
+               [2] = {"xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR},
+               {""},
+       };
+
+       return simple_fill_super(sb, XENFS_SUPER_MAGIC, xenfs_files);
+}
+
+static int xenfs_get_sb(struct file_system_type *fs_type,
+                       int flags, const char *dev_name,
+                       void *data, struct vfsmount *mnt)
+{
+       return get_sb_single(fs_type, flags, data, xenfs_fill_super, mnt);
+}
+
+static struct file_system_type xenfs_type = {
+       .owner =        THIS_MODULE,
+       .name =         "xenfs",
+       .get_sb =       xenfs_get_sb,
+       .kill_sb =      kill_litter_super,
+};
+
+static int __init xenfs_init(void)
+{
+       if (xen_pv_domain())
+               return register_filesystem(&xenfs_type);
+
+       printk(KERN_INFO "XENFS: not registering filesystem on non-xen platform\n");
+       return 0;
+}
+
+static void __exit xenfs_exit(void)
+{
+       if (xen_pv_domain())
+               unregister_filesystem(&xenfs_type);
+}
+
+module_init(xenfs_init);
+module_exit(xenfs_exit);
+
diff --git a/drivers/xen/xenfs/xenbus.c b/drivers/xen/xenfs/xenbus.c
new file mode 100644 (file)
index 0000000..875a4c5
--- /dev/null
@@ -0,0 +1,593 @@
+/*
+ * Driver giving user-space access to the kernel's xenbus connection
+ * to xenstore.
+ *
+ * Copyright (c) 2005, Christian Limpach
+ * Copyright (c) 2005, Rusty Russell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Changes:
+ * 2008-10-07  Alex Zeffertt    Replaced /proc/xen/xenbus with xenfs filesystem
+ *                              and /proc/xen compatibility mount point.
+ *                              Turned xenfs into a loadable module.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/uio.h>
+#include <linux/notifier.h>
+#include <linux/wait.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/uaccess.h>
+#include <linux/init.h>
+#include <linux/namei.h>
+#include <linux/string.h>
+
+#include "xenfs.h"
+#include "../xenbus/xenbus_comms.h"
+
+#include <xen/xenbus.h>
+#include <asm/xen/hypervisor.h>
+
+/*
+ * An element of a list of outstanding transactions, for which we're
+ * still waiting a reply.
+ */
+struct xenbus_transaction_holder {
+       struct list_head list;
+       struct xenbus_transaction handle;
+};
+
+/*
+ * A buffer of data on the queue.
+ */
+struct read_buffer {
+       struct list_head list;
+       unsigned int cons;
+       unsigned int len;
+       char msg[];
+};
+
+struct xenbus_file_priv {
+       /*
+        * msgbuffer_mutex is held while partial requests are built up
+        * and complete requests are acted on.  It therefore protects
+        * the "transactions" and "watches" lists, and the partial
+        * request length and buffer.
+        *
+        * reply_mutex protects the reply being built up to return to
+        * usermode.  It nests inside msgbuffer_mutex but may be held
+        * alone during a watch callback.
+        */
+       struct mutex msgbuffer_mutex;
+
+       /* In-progress transactions */
+       struct list_head transactions;
+
+       /* Active watches. */
+       struct list_head watches;
+
+       /* Partial request. */
+       unsigned int len;
+       union {
+               struct xsd_sockmsg msg;
+               char buffer[PAGE_SIZE];
+       } u;
+
+       /* Response queue. */
+       struct mutex reply_mutex;
+       struct list_head read_buffers;
+       wait_queue_head_t read_waitq;
+
+};
+
+/* Read out any raw xenbus messages queued up. */
+static ssize_t xenbus_file_read(struct file *filp,
+                              char __user *ubuf,
+                              size_t len, loff_t *ppos)
+{
+       struct xenbus_file_priv *u = filp->private_data;
+       struct read_buffer *rb;
+       unsigned i;
+       int ret;
+
+       mutex_lock(&u->reply_mutex);
+       while (list_empty(&u->read_buffers)) {
+               mutex_unlock(&u->reply_mutex);
+               ret = wait_event_interruptible(u->read_waitq,
+                                              !list_empty(&u->read_buffers));
+               if (ret)
+                       return ret;
+               mutex_lock(&u->reply_mutex);
+       }
+
+       rb = list_entry(u->read_buffers.next, struct read_buffer, list);
+       i = 0;
+       while (i < len) {
+               unsigned sz = min((unsigned)len - i, rb->len - rb->cons);
+
+               ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz);
+
+               i += sz - ret;
+               rb->cons += sz - ret;
+
+               if (ret != sz) {
+                       if (i == 0)
+                               i = -EFAULT;
+                       goto out;
+               }
+
+               /* Clear out buffer if it has been consumed */
+               if (rb->cons == rb->len) {
+                       list_del(&rb->list);
+                       kfree(rb);
+                       if (list_empty(&u->read_buffers))
+                               break;
+                       rb = list_entry(u->read_buffers.next,
+                                       struct read_buffer, list);
+               }
+       }
+
+out:
+       mutex_unlock(&u->reply_mutex);
+       return i;
+}
+
+/*
+ * Add a buffer to the queue.  Caller must hold the appropriate lock
+ * if the queue is not local.  (Commonly the caller will build up
+ * multiple queued buffers on a temporary local list, and then add it
+ * to the appropriate list under lock once all the buffers have een
+ * successfully allocated.)
+ */
+static int queue_reply(struct list_head *queue, const void *data, size_t len)
+{
+       struct read_buffer *rb;
+
+       if (len == 0)
+               return 0;
+
+       rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
+       if (rb == NULL)
+               return -ENOMEM;
+
+       rb->cons = 0;
+       rb->len = len;
+
+       memcpy(rb->msg, data, len);
+
+       list_add_tail(&rb->list, queue);
+       return 0;
+}
+
+/*
+ * Free all the read_buffer s on a list.
+ * Caller must have sole reference to list.
+ */
+static void queue_cleanup(struct list_head *list)
+{
+       struct read_buffer *rb;
+
+       while (!list_empty(list)) {
+               rb = list_entry(list->next, struct read_buffer, list);
+               list_del(list->next);
+               kfree(rb);
+       }
+}
+
+struct watch_adapter {
+       struct list_head list;
+       struct xenbus_watch watch;
+       struct xenbus_file_priv *dev_data;
+       char *token;
+};
+
+static void free_watch_adapter(struct watch_adapter *watch)
+{
+       kfree(watch->watch.node);
+       kfree(watch->token);
+       kfree(watch);
+}
+
+static struct watch_adapter *alloc_watch_adapter(const char *path,
+                                                const char *token)
+{
+       struct watch_adapter *watch;
+
+       watch = kzalloc(sizeof(*watch), GFP_KERNEL);
+       if (watch == NULL)
+               goto out_fail;
+
+       watch->watch.node = kstrdup(path, GFP_KERNEL);
+       if (watch->watch.node == NULL)
+               goto out_free;
+
+       watch->token = kstrdup(token, GFP_KERNEL);
+       if (watch->token == NULL)
+               goto out_free;
+
+       return watch;
+
+out_free:
+       free_watch_adapter(watch);
+
+out_fail:
+       return NULL;
+}
+
+static void watch_fired(struct xenbus_watch *watch,
+                       const char **vec,
+                       unsigned int len)
+{
+       struct watch_adapter *adap;
+       struct xsd_sockmsg hdr;
+       const char *path, *token;
+       int path_len, tok_len, body_len, data_len = 0;
+       int ret;
+       LIST_HEAD(staging_q);
+
+       adap = container_of(watch, struct watch_adapter, watch);
+
+       path = vec[XS_WATCH_PATH];
+       token = adap->token;
+
+       path_len = strlen(path) + 1;
+       tok_len = strlen(token) + 1;
+       if (len > 2)
+               data_len = vec[len] - vec[2] + 1;
+       body_len = path_len + tok_len + data_len;
+
+       hdr.type = XS_WATCH_EVENT;
+       hdr.len = body_len;
+
+       mutex_lock(&adap->dev_data->reply_mutex);
+
+       ret = queue_reply(&staging_q, &hdr, sizeof(hdr));
+       if (!ret)
+               ret = queue_reply(&staging_q, path, path_len);
+       if (!ret)
+               ret = queue_reply(&staging_q, token, tok_len);
+       if (!ret && len > 2)
+               ret = queue_reply(&staging_q, vec[2], data_len);
+
+       if (!ret) {
+               /* success: pass reply list onto watcher */
+               list_splice_tail(&staging_q, &adap->dev_data->read_buffers);
+               wake_up(&adap->dev_data->read_waitq);
+       } else
+               queue_cleanup(&staging_q);
+
+       mutex_unlock(&adap->dev_data->reply_mutex);
+}
+
+static int xenbus_write_transaction(unsigned msg_type,
+                                   struct xenbus_file_priv *u)
+{
+       int rc, ret;
+       void *reply;
+       struct xenbus_transaction_holder *trans = NULL;
+       LIST_HEAD(staging_q);
+
+       if (msg_type == XS_TRANSACTION_START) {
+               trans = kmalloc(sizeof(*trans), GFP_KERNEL);
+               if (!trans) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
+       }
+
+       reply = xenbus_dev_request_and_reply(&u->u.msg);
+       if (IS_ERR(reply)) {
+               kfree(trans);
+               rc = PTR_ERR(reply);
+               goto out;
+       }
+
+       if (msg_type == XS_TRANSACTION_START) {
+               trans->handle.id = simple_strtoul(reply, NULL, 0);
+
+               list_add(&trans->list, &u->transactions);
+       } else if (msg_type == XS_TRANSACTION_END) {
+               list_for_each_entry(trans, &u->transactions, list)
+                       if (trans->handle.id == u->u.msg.tx_id)
+                               break;
+               BUG_ON(&trans->list == &u->transactions);
+               list_del(&trans->list);
+
+               kfree(trans);
+       }
+
+       mutex_lock(&u->reply_mutex);
+       ret = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg));
+       if (!ret)
+               ret = queue_reply(&staging_q, reply, u->u.msg.len);
+       if (!ret) {
+               list_splice_tail(&staging_q, &u->read_buffers);
+               wake_up(&u->read_waitq);
+       } else {
+               queue_cleanup(&staging_q);
+               rc = ret;
+       }
+       mutex_unlock(&u->reply_mutex);
+
+       kfree(reply);
+
+out:
+       return rc;
+}
+
+static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u)
+{
+       struct watch_adapter *watch, *tmp_watch;
+       char *path, *token;
+       int err, rc;
+       LIST_HEAD(staging_q);
+
+       path = u->u.buffer + sizeof(u->u.msg);
+       token = memchr(path, 0, u->u.msg.len);
+       if (token == NULL) {
+               rc = -EILSEQ;
+               goto out;
+       }
+       token++;
+
+       if (msg_type == XS_WATCH) {
+               watch = alloc_watch_adapter(path, token);
+               if (watch == NULL) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
+
+               watch->watch.callback = watch_fired;
+               watch->dev_data = u;
+
+               err = register_xenbus_watch(&watch->watch);
+               if (err) {
+                       free_watch_adapter(watch);
+                       rc = err;
+                       goto out;
+               }
+               list_add(&watch->list, &u->watches);
+       } else {
+               list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
+                       if (!strcmp(watch->token, token) &&
+                           !strcmp(watch->watch.node, path)) {
+                               unregister_xenbus_watch(&watch->watch);
+                               list_del(&watch->list);
+                               free_watch_adapter(watch);
+                               break;
+                       }
+               }
+       }
+
+       /* Success.  Synthesize a reply to say all is OK. */
+       {
+               struct {
+                       struct xsd_sockmsg hdr;
+                       char body[3];
+               } __packed reply = {
+                       {
+                               .type = msg_type,
+                               .len = sizeof(reply.body)
+                       },
+                       "OK"
+               };
+
+               mutex_lock(&u->reply_mutex);
+               rc = queue_reply(&u->read_buffers, &reply, sizeof(reply));
+               mutex_unlock(&u->reply_mutex);
+       }
+
+out:
+       return rc;
+}
+
+static ssize_t xenbus_file_write(struct file *filp,
+                               const char __user *ubuf,
+                               size_t len, loff_t *ppos)
+{
+       struct xenbus_file_priv *u = filp->private_data;
+       uint32_t msg_type;
+       int rc = len;
+       int ret;
+       LIST_HEAD(staging_q);
+
+       /*
+        * We're expecting usermode to be writing properly formed
+        * xenbus messages.  If they write an incomplete message we
+        * buffer it up.  Once it is complete, we act on it.
+        */
+
+       /*
+        * Make sure concurrent writers can't stomp all over each
+        * other's messages and make a mess of our partial message
+        * buffer.  We don't make any attemppt to stop multiple
+        * writers from making a mess of each other's incomplete
+        * messages; we're just trying to guarantee our own internal
+        * consistency and make sure that single writes are handled
+        * atomically.
+        */
+       mutex_lock(&u->msgbuffer_mutex);
+
+       /* Get this out of the way early to avoid confusion */
+       if (len == 0)
+               goto out;
+
+       /* Can't write a xenbus message larger we can buffer */
+       if ((len + u->len) > sizeof(u->u.buffer)) {
+               /* On error, dump existing buffer */
+               u->len = 0;
+               rc = -EINVAL;
+               goto out;
+       }
+
+       ret = copy_from_user(u->u.buffer + u->len, ubuf, len);
+
+       if (ret == len) {
+               rc = -EFAULT;
+               goto out;
+       }
+
+       /* Deal with a partial copy. */
+       len -= ret;
+       rc = len;
+
+       u->len += len;
+
+       /* Return if we haven't got a full message yet */
+       if (u->len < sizeof(u->u.msg))
+               goto out;       /* not even the header yet */
+
+       /* If we're expecting a message that's larger than we can
+          possibly send, dump what we have and return an error. */
+       if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) {
+               rc = -E2BIG;
+               u->len = 0;
+               goto out;
+       }
+
+       if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
+               goto out;       /* incomplete data portion */
+
+       /*
+        * OK, now we have a complete message.  Do something with it.
+        */
+
+       msg_type = u->u.msg.type;
+
+       switch (msg_type) {
+       case XS_TRANSACTION_START:
+       case XS_TRANSACTION_END:
+       case XS_DIRECTORY:
+       case XS_READ:
+       case XS_GET_PERMS:
+       case XS_RELEASE:
+       case XS_GET_DOMAIN_PATH:
+       case XS_WRITE:
+       case XS_MKDIR:
+       case XS_RM:
+       case XS_SET_PERMS:
+               /* Send out a transaction */
+               ret = xenbus_write_transaction(msg_type, u);
+               break;
+
+       case XS_WATCH:
+       case XS_UNWATCH:
+               /* (Un)Ask for some path to be watched for changes */
+               ret = xenbus_write_watch(msg_type, u);
+               break;
+
+       default:
+               ret = -EINVAL;
+               break;
+       }
+       if (ret != 0)
+               rc = ret;
+
+       /* Buffered message consumed */
+       u->len = 0;
+
+ out:
+       mutex_unlock(&u->msgbuffer_mutex);
+       return rc;
+}
+
+static int xenbus_file_open(struct inode *inode, struct file *filp)
+{
+       struct xenbus_file_priv *u;
+
+       if (xen_store_evtchn == 0)
+               return -ENOENT;
+
+       nonseekable_open(inode, filp);
+
+       u = kzalloc(sizeof(*u), GFP_KERNEL);
+       if (u == NULL)
+               return -ENOMEM;
+
+       INIT_LIST_HEAD(&u->transactions);
+       INIT_LIST_HEAD(&u->watches);
+       INIT_LIST_HEAD(&u->read_buffers);
+       init_waitqueue_head(&u->read_waitq);
+
+       mutex_init(&u->reply_mutex);
+       mutex_init(&u->msgbuffer_mutex);
+
+       filp->private_data = u;
+
+       return 0;
+}
+
+static int xenbus_file_release(struct inode *inode, struct file *filp)
+{
+       struct xenbus_file_priv *u = filp->private_data;
+       struct xenbus_transaction_holder *trans, *tmp;
+       struct watch_adapter *watch, *tmp_watch;
+
+       /*
+        * No need for locking here because there are no other users,
+        * by definition.
+        */
+
+       list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
+               xenbus_transaction_end(trans->handle, 1);
+               list_del(&trans->list);
+               kfree(trans);
+       }
+
+       list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
+               unregister_xenbus_watch(&watch->watch);
+               list_del(&watch->list);
+               free_watch_adapter(watch);
+       }
+
+       kfree(u);
+
+       return 0;
+}
+
+static unsigned int xenbus_file_poll(struct file *file, poll_table *wait)
+{
+       struct xenbus_file_priv *u = file->private_data;
+
+       poll_wait(file, &u->read_waitq, wait);
+       if (!list_empty(&u->read_buffers))
+               return POLLIN | POLLRDNORM;
+       return 0;
+}
+
+const struct file_operations xenbus_file_ops = {
+       .read = xenbus_file_read,
+       .write = xenbus_file_write,
+       .open = xenbus_file_open,
+       .release = xenbus_file_release,
+       .poll = xenbus_file_poll,
+};
diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
new file mode 100644 (file)
index 0000000..51f08b2
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _XENFS_XENBUS_H
+#define _XENFS_XENBUS_H
+
+extern const struct file_operations xenbus_file_ops;
+
+#endif /* _XENFS_XENBUS_H */
index c41fa2af76771b16abf72b8acb9e7de4ffa8d851..e3ff2b9e602fd8fc8d35b24ae5a6dad841bc05f1 100644 (file)
@@ -152,8 +152,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
        elf_addr_t __user *sp;
        elf_addr_t __user *u_platform;
        elf_addr_t __user *u_base_platform;
+       elf_addr_t __user *u_rand_bytes;
        const char *k_platform = ELF_PLATFORM;
        const char *k_base_platform = ELF_BASE_PLATFORM;
+       unsigned char k_rand_bytes[16];
        int items;
        elf_addr_t *elf_info;
        int ei_index = 0;
@@ -196,6 +198,15 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
                        return -EFAULT;
        }
 
+       /*
+        * Generate 16 random bytes for userspace PRNG seeding.
+        */
+       get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
+       u_rand_bytes = (elf_addr_t __user *)
+                      STACK_ALLOC(p, sizeof(k_rand_bytes));
+       if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
+               return -EFAULT;
+
        /* Create the ELF interpreter info */
        elf_info = (elf_addr_t *)current->mm->saved_auxv;
        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
@@ -228,6 +239,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
        NEW_AUX_ENT(AT_GID, cred->gid);
        NEW_AUX_ENT(AT_EGID, cred->egid);
        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
+       NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
        if (k_platform) {
                NEW_AUX_ENT(AT_PLATFORM,
index b957717e25abbbe2dc16aa655084d0f779f6c5c6..8ebbfdf708c24c9d70bc4cc88e8266202d56ec06 100644 (file)
@@ -1005,6 +1005,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
        }
 
        lock_kernel();
+ restart:
 
        ret = -ENXIO;
        disk = get_gendisk(bdev->bd_dev, &partno);
@@ -1025,6 +1026,19 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 
                        if (disk->fops->open) {
                                ret = disk->fops->open(bdev, mode);
+                               if (ret == -ERESTARTSYS) {
+                                       /* Lost a race with 'disk' being
+                                        * deleted, try again.
+                                        * See md.c
+                                        */
+                                       disk_put_part(bdev->bd_part);
+                                       bdev->bd_part = NULL;
+                                       module_put(disk->fops->owner);
+                                       put_disk(disk);
+                                       bdev->bd_disk = NULL;
+                                       mutex_unlock(&bdev->bd_mutex);
+                                       goto restart;
+                               }
                                if (ret)
                                        goto out_clear;
                        }
index 81b7771c64653d7e3f18f6caff0288b8ca33c669..43c96ce29614e65fcdfdc3825d309ad0fe62dcfd 100644 (file)
@@ -11,7 +11,9 @@
 
 #include "coda_int.h"
 
+#ifdef CONFIG_SYSCTL
 static struct ctl_table_header *fs_table_header;
+#endif
 
 static ctl_table coda_table[] = {
        {
@@ -41,6 +43,7 @@ static ctl_table coda_table[] = {
        {}
 };
 
+#ifdef CONFIG_SYSCTL
 static ctl_table fs_table[] = {
        {
                .ctl_name       = CTL_UNNUMBERED,
@@ -50,7 +53,7 @@ static ctl_table fs_table[] = {
        },
        {}
 };
-
+#endif
 
 void coda_sysctl_init(void)
 {
index e88c23b85a32d4456bf7741a7f218315189153e1..4547f66884a07f62c4cde29c118ee808360dba38 100644 (file)
@@ -1567,10 +1567,6 @@ void d_rehash(struct dentry * entry)
        spin_unlock(&dcache_lock);
 }
 
-#define do_switch(x,y) do { \
-       __typeof__ (x) __tmp = x; \
-       x = y; y = __tmp; } while (0)
-
 /*
  * When switching names, the actual string doesn't strictly have to
  * be preserved in the target - because we're dropping the target
@@ -1589,7 +1585,7 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
                        /*
                         * Both external: swap the pointers
                         */
-                       do_switch(target->d_name.name, dentry->d_name.name);
+                       swap(target->d_name.name, dentry->d_name.name);
                } else {
                        /*
                         * dentry:internal, target:external.  Steal target's
@@ -1620,7 +1616,7 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
                        return;
                }
        }
-       do_switch(dentry->d_name.len, target->d_name.len);
+       swap(dentry->d_name.len, target->d_name.len);
 }
 
 /*
@@ -1680,7 +1676,7 @@ already_unhashed:
 
        /* Switch the names.. */
        switch_names(dentry, target);
-       do_switch(dentry->d_name.hash, target->d_name.hash);
+       swap(dentry->d_name.hash, target->d_name.hash);
 
        /* ... and switch the parents */
        if (IS_ROOT(dentry)) {
@@ -1688,7 +1684,7 @@ already_unhashed:
                target->d_parent = target;
                INIT_LIST_HEAD(&target->d_u.d_child);
        } else {
-               do_switch(dentry->d_parent, target->d_parent);
+               swap(dentry->d_parent, target->d_parent);
 
                /* And add them back to the (new) parent lists */
                list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
@@ -1789,7 +1785,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
        struct dentry *dparent, *aparent;
 
        switch_names(dentry, anon);
-       do_switch(dentry->d_name.hash, anon->d_name.hash);
+       swap(dentry->d_name.hash, anon->d_name.hash);
 
        dparent = dentry->d_parent;
        aparent = anon->d_parent;
index 61bfff64e5af1368a37801311ebe9aed39f42e1e..48c0571f831d5fc72d7724f54ffab6ea1d61849a 100644 (file)
@@ -2090,10 +2090,12 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
        }
        if (di->dqb_valid & QIF_BTIME) {
                dm->dqb_btime = di->dqb_btime;
+               check_blim = 1;
                __set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
        }
        if (di->dqb_valid & QIF_ITIME) {
                dm->dqb_itime = di->dqb_itime;
+               check_ilim = 1;
                __set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
        }
 
index c454d5db28a5aef07d2585d3a672bb5310c3fa4b..66321a877e7458795616e0568c58fac83e0437fd 100644 (file)
@@ -565,12 +565,8 @@ got:
        inode->i_blocks = 0;
        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
        memset(ei->i_data, 0, sizeof(ei->i_data));
-       ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL;
-       if (S_ISLNK(mode))
-               ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL);
-       /* dirsync is only applied to directories */
-       if (!S_ISDIR(mode))
-               ei->i_flags &= ~EXT2_DIRSYNC_FL;
+       ei->i_flags =
+               ext2_mask_flags(mode, EXT2_I(dir)->i_flags & EXT2_FL_INHERITED);
        ei->i_faddr = 0;
        ei->i_frag_no = 0;
        ei->i_frag_size = 0;
index 02b39a5deb74033d62deb7c5ccd86d96b6ddf9b9..23fff2f87783f7e8856a6ba3679e2e0057d94767 100644 (file)
@@ -498,8 +498,6 @@ static int ext2_alloc_branch(struct inode *inode,
  * ext2_splice_branch - splice the allocated branch onto inode.
  * @inode: owner
  * @block: (logical) number of block we are adding
- * @chain: chain of indirect blocks (with a missing link - see
- *     ext2_alloc_branch)
  * @where: location of missing link
  * @num:   number of indirect blocks we are adding
  * @blks:  number of direct blocks we are adding
index de876fa793e1d140386f8bd9693785943f706c09..7cb4badef927433ec90618b67c69cc08bec936f1 100644 (file)
@@ -50,8 +50,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        goto setflags_out;
                }
 
-               if (!S_ISDIR(inode->i_mode))
-                       flags &= ~EXT2_DIRSYNC_FL;
+               flags = ext2_mask_flags(inode->i_mode, flags);
 
                mutex_lock(&inode->i_mutex);
                /* Is it quota file? Do not allow user to mess with it */
index 647cd888ac87c8933a168e3ff05ca6dbe201e786..da8bdeaa2e6d594aec9680bcb2bcd0aca17fdecf 100644 (file)
@@ -132,6 +132,7 @@ static void ext2_put_super (struct super_block * sb)
        percpu_counter_destroy(&sbi->s_dirs_counter);
        brelse (sbi->s_sbh);
        sb->s_fs_info = NULL;
+       kfree(sbi->s_blockgroup_lock);
        kfree(sbi);
 
        return;
@@ -756,6 +757,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
        sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
        if (!sbi)
                return -ENOMEM;
+
+       sbi->s_blockgroup_lock =
+               kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
+       if (!sbi->s_blockgroup_lock) {
+               kfree(sbi);
+               return -ENOMEM;
+       }
        sb->s_fs_info = sbi;
        sbi->s_sb_block = sb_block;
 
@@ -983,7 +991,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
                printk ("EXT2-fs: not enough memory\n");
                goto failed_mount;
        }
-       bgl_lock_init(&sbi->s_blockgroup_lock);
+       bgl_lock_init(sbi->s_blockgroup_lock);
        sbi->s_debts = kcalloc(sbi->s_groups_count, sizeof(*sbi->s_debts), GFP_KERNEL);
        if (!sbi->s_debts) {
                printk ("EXT2-fs: not enough memory\n");
index 5655fbcbd11f564b82c31eea45a6c8fe33b41f58..8de6c720e510910b0278c5c806e836494d783fd2 100644 (file)
@@ -559,12 +559,8 @@ got:
        ei->i_dir_start_lookup = 0;
        ei->i_disksize = 0;
 
-       ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL;
-       if (S_ISLNK(mode))
-               ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL);
-       /* dirsync only applies to directories */
-       if (!S_ISDIR(mode))
-               ei->i_flags &= ~EXT3_DIRSYNC_FL;
+       ei->i_flags =
+               ext3_mask_flags(mode, EXT3_I(dir)->i_flags & EXT3_FL_INHERITED);
 #ifdef EXT3_FRAGMENTS
        ei->i_faddr = 0;
        ei->i_frag_no = 0;
index b7394d05ee8e9e8261d2a6e8be983b83781d2211..5e86ce9a86e05b20d3ffe297dd4b9191fee08994 100644 (file)
@@ -53,8 +53,7 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                        goto flags_out;
                }
 
-               if (!S_ISDIR(inode->i_mode))
-                       flags &= ~EXT3_DIRSYNC_FL;
+               flags = ext3_mask_flags(inode->i_mode, flags);
 
                mutex_lock(&inode->i_mutex);
                /* Is it quota file? Do not allow user to mess with it */
index 1dd2abe6313e3b656f42af6e2ebd63e2ec02a98c..8d6f965e502cd9d56969fefcf347dbeec37a42c7 100644 (file)
@@ -74,10 +74,6 @@ static struct buffer_head *ext3_append(handle_t *handle,
 #define assert(test) J_ASSERT(test)
 #endif
 
-#ifndef swap
-#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
-#endif
-
 #ifdef DX_DEBUG
 #define dxtrace(command) command
 #else
index c22d01467bd18bf3b3d8f2c69508c53c6b38d07e..01c235bc2054422a0bf1134d6af7214ab459689a 100644 (file)
@@ -439,6 +439,7 @@ static void ext3_put_super (struct super_block * sb)
                ext3_blkdev_remove(sbi);
        }
        sb->s_fs_info = NULL;
+       kfree(sbi->s_blockgroup_lock);
        kfree(sbi);
        return;
 }
@@ -1546,6 +1547,13 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
        sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
        if (!sbi)
                return -ENOMEM;
+
+       sbi->s_blockgroup_lock =
+               kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
+       if (!sbi->s_blockgroup_lock) {
+               kfree(sbi);
+               return -ENOMEM;
+       }
        sb->s_fs_info = sbi;
        sbi->s_mount_opt = 0;
        sbi->s_resuid = EXT3_DEF_RESUID;
@@ -1786,7 +1794,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
                goto failed_mount;
        }
 
-       bgl_lock_init(&sbi->s_blockgroup_lock);
+       bgl_lock_init(sbi->s_blockgroup_lock);
 
        for (i = 0; i < db_count; i++) {
                block = descriptor_loc(sb, logic_sb_block, i);
index ea2ce3c0ae66ec47db52596ed6d5c22cf844ba65..3f54db31cdc233c4627a6956f2b47228ebffed2f 100644 (file)
@@ -2536,7 +2536,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                 */
                newdepth = ext_depth(inode);
                /*
-                * update the extent length after successfull insert of the
+                * update the extent length after successful insert of the
                 * split extent
                 */
                orig_ex.ee_len = cpu_to_le16(ee_len -
index 9fd2a5e1be4dba61c58f44db91429b9ae14e79c8..4b8d431d7dff8b6763f6433db2fa4ce1d2f90235 100644 (file)
@@ -74,10 +74,6 @@ static struct buffer_head *ext4_append(handle_t *handle,
 #define assert(test) J_ASSERT(test)
 #endif
 
-#ifndef swap
-#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
-#endif
-
 #ifdef DX_DEBUG
 #define dxtrace(command) command
 #else
index 25719d902c5116a6ff50248b272e5603c196f0f7..3fbffb1ea7147ff079e5c18788231717a345f332 100644 (file)
@@ -306,6 +306,8 @@ void journal_commit_transaction(journal_t *journal)
        int flags;
        int err;
        unsigned long blocknr;
+       ktime_t start_time;
+       u64 commit_time;
        char *tagp = NULL;
        journal_header_t *header;
        journal_block_tag_t *tag = NULL;
@@ -418,6 +420,7 @@ void journal_commit_transaction(journal_t *journal)
        commit_transaction->t_state = T_FLUSH;
        journal->j_committing_transaction = commit_transaction;
        journal->j_running_transaction = NULL;
+       start_time = ktime_get();
        commit_transaction->t_log_start = journal->j_head;
        wake_up(&journal->j_wait_transaction_locked);
        spin_unlock(&journal->j_state_lock);
@@ -913,6 +916,18 @@ restart_loop:
        J_ASSERT(commit_transaction == journal->j_committing_transaction);
        journal->j_commit_sequence = commit_transaction->t_tid;
        journal->j_committing_transaction = NULL;
+       commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
+
+       /*
+        * weight the commit time higher than the average time so we don't
+        * react too strongly to vast changes in commit time
+        */
+       if (likely(journal->j_average_commit_time))
+               journal->j_average_commit_time = (commit_time*3 +
+                               journal->j_average_commit_time) / 4;
+       else
+               journal->j_average_commit_time = commit_time;
+
        spin_unlock(&journal->j_state_lock);
 
        if (commit_transaction->t_checkpoint_list == NULL &&
index 60d4c32c880869719c7261a4b0370e5d52d590e9..e6a117431277129a9a3e23ca93916826e43f842a 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
+#include <linux/hrtimer.h>
 
 static void __journal_temp_unlink_buffer(struct journal_head *jh);
 
@@ -49,6 +50,7 @@ get_transaction(journal_t *journal, transaction_t *transaction)
 {
        transaction->t_journal = journal;
        transaction->t_state = T_RUNNING;
+       transaction->t_start_time = ktime_get();
        transaction->t_tid = journal->j_transaction_sequence++;
        transaction->t_expires = jiffies + journal->j_commit_interval;
        spin_lock_init(&transaction->t_handle_lock);
@@ -752,7 +754,6 @@ out:
  * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
  * @handle: transaction to add buffer modifications to
  * @bh:     bh to be used for metadata writes
- * @credits: variable that will receive credits for the buffer
  *
  * Returns an error code or 0 on success.
  *
@@ -1370,7 +1371,7 @@ int journal_stop(handle_t *handle)
 {
        transaction_t *transaction = handle->h_transaction;
        journal_t *journal = transaction->t_journal;
-       int old_handle_count, err;
+       int err;
        pid_t pid;
 
        J_ASSERT(journal_current_handle() == handle);
@@ -1399,6 +1400,17 @@ int journal_stop(handle_t *handle)
         * on IO anyway.  Speeds up many-threaded, many-dir operations
         * by 30x or more...
         *
+        * We try and optimize the sleep time against what the underlying disk
+        * can do, instead of having a static sleep time.  This is usefull for
+        * the case where our storage is so fast that it is more optimal to go
+        * ahead and force a flush and wait for the transaction to be committed
+        * than it is to wait for an arbitrary amount of time for new writers to
+        * join the transaction.  We acheive this by measuring how long it takes
+        * to commit a transaction, and compare it with how long this
+        * transaction has been running, and if run time < commit time then we
+        * sleep for the delta and commit.  This greatly helps super fast disks
+        * that would see slowdowns as more threads started doing fsyncs.
+        *
         * But don't do this if this process was the most recent one to
         * perform a synchronous write.  We do this to detect the case where a
         * single process is doing a stream of sync writes.  No point in waiting
@@ -1406,11 +1418,26 @@ int journal_stop(handle_t *handle)
         */
        pid = current->pid;
        if (handle->h_sync && journal->j_last_sync_writer != pid) {
+               u64 commit_time, trans_time;
+
                journal->j_last_sync_writer = pid;
-               do {
-                       old_handle_count = transaction->t_handle_count;
-                       schedule_timeout_uninterruptible(1);
-               } while (old_handle_count != transaction->t_handle_count);
+
+               spin_lock(&journal->j_state_lock);
+               commit_time = journal->j_average_commit_time;
+               spin_unlock(&journal->j_state_lock);
+
+               trans_time = ktime_to_ns(ktime_sub(ktime_get(),
+                                                  transaction->t_start_time));
+
+               commit_time = min_t(u64, commit_time,
+                                   1000*jiffies_to_usecs(1));
+
+               if (trans_time < commit_time) {
+                       ktime_t expires = ktime_add_ns(ktime_get(),
+                                                      commit_time);
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+                       schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
+               }
        }
 
        current->journal_info = NULL;
index 54ff4c77aaa30c9eee1b001e2d7cd79fbff82cc1..d861096c9d81cbd643618377a7b7c9831c524cd9 100644 (file)
@@ -3868,7 +3868,7 @@ static void ocfs2_split_record(struct inode *inode,
        struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el;
        struct ocfs2_extent_rec *rec, *tmprec;
 
-       right_el = path_leaf_el(right_path);;
+       right_el = path_leaf_el(right_path);
        if (left_path)
                left_el = path_leaf_el(left_path);
 
index f731ab4917951feae22d07b30d837a2ca7e0994b..b0c4cadd4c45181f7d8d02f1af11a182e2f380dd 100644 (file)
@@ -1324,7 +1324,7 @@ again:
                        goto out;
                }
 
-               mlog(0, "lock %s, successfull return from ocfs2_dlm_lock\n",
+               mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
                     lockres->l_name);
 
                /* At this point we've gone inside the dlm and need to
@@ -2951,7 +2951,7 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
                ocfs2_dlm_dump_lksb(&lockres->l_lksb);
                BUG();
        }
-       mlog(0, "lock %s, successfull return from ocfs2_dlm_unlock\n",
+       mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
             lockres->l_name);
 
        ocfs2_wait_on_busy_lock(lockres);
index e8f795f978aaae80c865d8ed55e694fcbcf35fa1..a5887df2cd8adce0f84a28a292a354a19c9fa4b5 100644 (file)
@@ -1605,7 +1605,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
                            struct ocfs2_space_resv *sr)
 {
        struct inode *inode = file->f_path.dentry->d_inode;
-       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);;
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
        if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) &&
            !ocfs2_writes_unwritten_extents(osb))
index 03ec595049065f41b2513d9b94e76058a02ebfec..5edcc3f92ba746496da35c23964c19e5518810b4 100644 (file)
@@ -47,8 +47,6 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
 
        offset = (unsigned long)(*ppos % PAGE_SIZE);
        pfn = (unsigned long)(*ppos / PAGE_SIZE);
-       if (pfn > saved_max_pfn)
-               return -EINVAL;
 
        do {
                if (count > (PAGE_SIZE - offset))
index c97d4c931715767bff385ece2600f1504d2483af..98a232f7196b942aa1c61ff0c80b772e0d957a12 100644 (file)
@@ -490,7 +490,7 @@ static mode_t romfs_modemap[] =
 static struct inode *
 romfs_iget(struct super_block *sb, unsigned long ino)
 {
-       int nextfh;
+       int nextfh, ret;
        struct romfs_inode ri;
        struct inode *i;
 
@@ -526,11 +526,11 @@ romfs_iget(struct super_block *sb, unsigned long ino)
        i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0;
 
         /* Precalculate the data offset */
-        ino = romfs_strnlen(i, ino+ROMFH_SIZE, ROMFS_MAXFN);
-        if (ino >= 0)
-                ino = ((ROMFH_SIZE+ino+1+ROMFH_PAD)&ROMFH_MASK);
-        else
-                ino = 0;
+       ret = romfs_strnlen(i, ino + ROMFH_SIZE, ROMFS_MAXFN);
+       if (ret >= 0)
+               ino = (ROMFH_SIZE + ret + 1 + ROMFH_PAD) & ROMFH_MASK;
+       else
+               ino = 0;
 
         ROMFS_I(i)->i_metasize = ino;
         ROMFS_I(i)->i_dataoffset = ino+(i->i_ino&ROMFH_MASK);
index 1abab5cee4ba8d552df8300e3561ff7e8a1375c3..a54b3e3f10a716cb63ab56311745716a70086d0f 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/file.h>
 #include <linux/pagemap.h>
 #include <linux/splice.h>
+#include <linux/memcontrol.h>
 #include <linux/mm_inline.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
index cb20744ec789e1dc83254f3b0912c4b1df12b49f..7d67387496cb3348d81795c694a84cb136d655aa 100644 (file)
@@ -458,7 +458,6 @@ void sync_filesystems(int wait)
                if (sb->s_flags & MS_RDONLY)
                        continue;
                sb->s_need_sync_fs = 1;
-               async_synchronize_full_special(&sb->s_async_list);
        }
 
 restart:
@@ -471,6 +470,7 @@ restart:
                sb->s_count++;
                spin_unlock(&sb_lock);
                down_read(&sb->s_umount);
+               async_synchronize_full_special(&sb->s_async_list);
                if (sb->s_root && (wait || sb->s_dirt))
                        sb->s_op->sync_fs(sb, wait);
                up_read(&sb->s_umount);
index d7afa9dd6635cc812a87d8b6b291c3fd2419f095..f3b5d4e3a2ac43eb1b529802122cb38c7898827b 100644 (file)
 #define AT_PLATFORM 15  /* string identifying CPU for optimizations */
 #define AT_HWCAP  16    /* arch dependent hints at CPU capabilities */
 #define AT_CLKTCK 17   /* frequency at which times() increments */
-
+/* AT_* values 18 through 22 are reserved */
 #define AT_SECURE 23   /* secure mode boolean */
-
 #define AT_BASE_PLATFORM 24    /* string identifying real platform, may
                                 * differ from AT_PLATFORM. */
+#define AT_RANDOM 25   /* address of 16 random bytes */
 
 #define AT_EXECFN  31  /* filename of program */
 
 #ifdef __KERNEL__
-#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */
+#define AT_VECTOR_SIZE_BASE 19 /* NEW_AUX_ENT entries in auxiliary table */
   /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */
 #endif
 
index 08b78c09b09a111867b68b443225f53ece341093..e267e62827bbc4e564c930576fcee8b70a8c4a79 100644 (file)
@@ -52,9 +52,9 @@ struct cgroup_subsys_state {
         * hierarchy structure */
        struct cgroup *cgroup;
 
-       /* State maintained by the cgroup system to allow
-        * subsystems to be "busy". Should be accessed via css_get()
-        * and css_put() */
+       /* State maintained by the cgroup system to allow subsystems
+        * to be "busy". Should be accessed via css_get(),
+        * css_tryget() and and css_put(). */
 
        atomic_t refcnt;
 
@@ -64,11 +64,14 @@ struct cgroup_subsys_state {
 /* bits in struct cgroup_subsys_state flags field */
 enum {
        CSS_ROOT, /* This CSS is the root of the subsystem */
+       CSS_REMOVED, /* This CSS is dead */
 };
 
 /*
- * Call css_get() to hold a reference on the cgroup;
- *
+ * Call css_get() to hold a reference on the css; it can be used
+ * for a reference obtained via:
+ * - an existing ref-counted reference to the css
+ * - task->cgroups for a locked task
  */
 
 static inline void css_get(struct cgroup_subsys_state *css)
@@ -77,9 +80,32 @@ static inline void css_get(struct cgroup_subsys_state *css)
        if (!test_bit(CSS_ROOT, &css->flags))
                atomic_inc(&css->refcnt);
 }
+
+static inline bool css_is_removed(struct cgroup_subsys_state *css)
+{
+       return test_bit(CSS_REMOVED, &css->flags);
+}
+
+/*
+ * Call css_tryget() to take a reference on a css if your existing
+ * (known-valid) reference isn't already ref-counted. Returns false if
+ * the css has been destroyed.
+ */
+
+static inline bool css_tryget(struct cgroup_subsys_state *css)
+{
+       if (test_bit(CSS_ROOT, &css->flags))
+               return true;
+       while (!atomic_inc_not_zero(&css->refcnt)) {
+               if (test_bit(CSS_REMOVED, &css->flags))
+                       return false;
+       }
+       return true;
+}
+
 /*
  * css_put() should be called to release a reference taken by
- * css_get()
+ * css_get() or css_tryget()
  */
 
 extern void __css_put(struct cgroup_subsys_state *css);
@@ -116,7 +142,7 @@ struct cgroup {
        struct list_head children;      /* my children */
 
        struct cgroup *parent;  /* my parent */
-       struct dentry *dentry;          /* cgroup fs entry */
+       struct dentry *dentry;          /* cgroup fs entry, RCU protected */
 
        /* Private pointers for each registered subsystem */
        struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
@@ -145,6 +171,9 @@ struct cgroup {
        int pids_use_count;
        /* Length of the current tasks_pids array */
        int pids_length;
+
+       /* For RCU-protected deletion */
+       struct rcu_head rcu_head;
 };
 
 /* A css_set is a structure holding pointers to a set of
@@ -337,9 +366,23 @@ struct cgroup_subsys {
 #define MAX_CGROUP_TYPE_NAMELEN 32
        const char *name;
 
-       /* Protected by RCU */
-       struct cgroupfs_root *root;
+       /*
+        * Protects sibling/children links of cgroups in this
+        * hierarchy, plus protects which hierarchy (or none) the
+        * subsystem is a part of (i.e. root/sibling).  To avoid
+        * potential deadlocks, the following operations should not be
+        * undertaken while holding any hierarchy_mutex:
+        *
+        * - allocating memory
+        * - initiating hotplug events
+        */
+       struct mutex hierarchy_mutex;
 
+       /*
+        * Link to parent, and list entry in parent's children.
+        * Protected by this->hierarchy_mutex and cgroup_lock()
+        */
+       struct cgroupfs_root *root;
        struct list_head sibling;
 };
 
index 51ea2bdea0f93746fd5ed943791a97355add8b0a..90c6074a36cab42c47df2ac8f6239cfe2ae0ddfe 100644 (file)
@@ -20,8 +20,9 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
 extern int cpuset_init_early(void);
 extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
-extern void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask);
-extern void cpuset_cpus_allowed_locked(struct task_struct *p, cpumask_t *mask);
+extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
+extern void cpuset_cpus_allowed_locked(struct task_struct *p,
+                                      struct cpumask *mask);
 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
 #define cpuset_current_mems_allowed (current->mems_allowed)
 void cpuset_init_current_mems_allowed(void);
@@ -86,12 +87,13 @@ static inline int cpuset_init_early(void) { return 0; }
 static inline int cpuset_init(void) { return 0; }
 static inline void cpuset_init_smp(void) {}
 
-static inline void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask)
+static inline void cpuset_cpus_allowed(struct task_struct *p,
+                                      struct cpumask *mask)
 {
        *mask = cpu_possible_map;
 }
 static inline void cpuset_cpus_allowed_locked(struct task_struct *p,
-                                                               cpumask_t *mask)
+                                             struct cpumask *mask)
 {
        *mask = cpu_possible_map;
 }
index 78c775a83f7cd041acaa872ae455676a06a0e096..121720d74e15babe70c9dab51eb4394cb0ca53b2 100644 (file)
@@ -194,6 +194,30 @@ struct ext2_group_desc
 #define EXT2_FL_USER_VISIBLE           FS_FL_USER_VISIBLE      /* User visible flags */
 #define EXT2_FL_USER_MODIFIABLE                FS_FL_USER_MODIFIABLE   /* User modifiable flags */
 
+/* Flags that should be inherited by new inodes from their parent. */
+#define EXT2_FL_INHERITED (EXT2_SECRM_FL | EXT2_UNRM_FL | EXT2_COMPR_FL |\
+                          EXT2_SYNC_FL | EXT2_IMMUTABLE_FL | EXT2_APPEND_FL |\
+                          EXT2_NODUMP_FL | EXT2_NOATIME_FL | EXT2_COMPRBLK_FL|\
+                          EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\
+                          EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL)
+
+/* Flags that are appropriate for regular files (all but dir-specific ones). */
+#define EXT2_REG_FLMASK (~(EXT2_DIRSYNC_FL | EXT2_TOPDIR_FL))
+
+/* Flags that are appropriate for non-directories/regular files. */
+#define EXT2_OTHER_FLMASK (EXT2_NODUMP_FL | EXT2_NOATIME_FL)
+
+/* Mask out flags that are inappropriate for the given type of inode. */
+static inline __u32 ext2_mask_flags(umode_t mode, __u32 flags)
+{
+       if (S_ISDIR(mode))
+               return flags;
+       else if (S_ISREG(mode))
+               return flags & EXT2_REG_FLMASK;
+       else
+               return flags & EXT2_OTHER_FLMASK;
+}
+
 /*
  * ioctl commands
  */
index dc541f3653d173fe45cbd9ba1e7a89192432d6bc..1cdb66367c9815cac59913f65cbbd0d56c6a99d7 100644 (file)
@@ -101,7 +101,7 @@ struct ext2_sb_info {
        struct percpu_counter s_freeblocks_counter;
        struct percpu_counter s_freeinodes_counter;
        struct percpu_counter s_dirs_counter;
-       struct blockgroup_lock s_blockgroup_lock;
+       struct blockgroup_lock *s_blockgroup_lock;
        /* root of the per fs reservation window tree */
        spinlock_t s_rsv_window_lock;
        struct rb_root s_rsv_window_root;
@@ -111,7 +111,7 @@ struct ext2_sb_info {
 static inline spinlock_t *
 sb_bgl_lock(struct ext2_sb_info *sbi, unsigned int block_group)
 {
-       return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
+       return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
 }
 
 #endif /* _LINUX_EXT2_FS_SB */
index d14f029184832bd300ad7856c7cacbe467101c5d..d76800f6ecf0fb927bf16f144fa6516f5ac62111 100644 (file)
@@ -178,6 +178,30 @@ struct ext3_group_desc
 #define EXT3_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
 #define EXT3_FL_USER_MODIFIABLE                0x000380FF /* User modifiable flags */
 
+/* Flags that should be inherited by new inodes from their parent. */
+#define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
+                          EXT3_SYNC_FL | EXT3_IMMUTABLE_FL | EXT3_APPEND_FL |\
+                          EXT3_NODUMP_FL | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL|\
+                          EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
+                          EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
+
+/* Flags that are appropriate for regular files (all but dir-specific ones). */
+#define EXT3_REG_FLMASK (~(EXT3_DIRSYNC_FL | EXT3_TOPDIR_FL))
+
+/* Flags that are appropriate for non-directories/regular files. */
+#define EXT3_OTHER_FLMASK (EXT3_NODUMP_FL | EXT3_NOATIME_FL)
+
+/* Mask out flags that are inappropriate for the given type of inode. */
+static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags)
+{
+       if (S_ISDIR(mode))
+               return flags;
+       else if (S_ISREG(mode))
+               return flags & EXT3_REG_FLMASK;
+       else
+               return flags & EXT3_OTHER_FLMASK;
+}
+
 /*
  * Inode dynamic state flags
  */
index e024e38248ffc83e0bfc77bdb3713674d2188118..76fdc0f4b0287f3e3b0450a4357de6519d69f0c0 100644 (file)
@@ -60,7 +60,7 @@ struct ext3_sb_info {
        struct percpu_counter s_freeblocks_counter;
        struct percpu_counter s_freeinodes_counter;
        struct percpu_counter s_dirs_counter;
-       struct blockgroup_lock s_blockgroup_lock;
+       struct blockgroup_lock *s_blockgroup_lock;
 
        /* root of the per fs reservation window tree */
        spinlock_t s_rsv_window_lock;
@@ -86,7 +86,7 @@ struct ext3_sb_info {
 static inline spinlock_t *
 sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group)
 {
-       return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
+       return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
 }
 
 #endif /* _LINUX_EXT3_FS_SB */
index 346e2b80be7d0ec26c7b7637f58af3d03b201297..6384b19efe64cc13dd671eceed9745f541295c8d 100644 (file)
@@ -542,6 +542,11 @@ struct transaction_s
         */
        unsigned long           t_expires;
 
+       /*
+        * When this transaction started, in nanoseconds [no locking]
+        */
+       ktime_t                 t_start_time;
+
        /*
         * How many handles used this transaction? [t_handle_lock]
         */
@@ -798,8 +803,18 @@ struct journal_s
        struct buffer_head      **j_wbuf;
        int                     j_wbufsize;
 
+       /*
+        * this is the pid of the last person to run a synchronous operation
+        * through the journal.
+        */
        pid_t                   j_last_sync_writer;
 
+       /*
+        * the average amount of time in nanoseconds it takes to commit a
+        * transaction to the disk.  [j_state_lock]
+        */
+       u64                     j_average_commit_time;
+
        /*
         * An opaque pointer to fs-private information.  ext3 puts its
         * superblock pointer here
index 6b8e2027165eb9e810978dfaaf363174ab767cf4..343df9ef2412393020e90c1ef5227135b3b5a07f 100644 (file)
@@ -476,6 +476,12 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
        __val = __val < __min ? __min: __val;   \
        __val > __max ? __max: __val; })
 
+
+/*
+ * swap - swap value of @a and @b
+ */
+#define swap(a, b) ({ typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; })
+
 /**
  * container_of - cast a member of a structure out to the containing structure
  * @ptr:       the pointer to the member.
index 3449de597eff8810a1538672da316f33a6ad6cd6..4f7c8fb4d3fe6173b80788be019e5439efacdbbb 100644 (file)
@@ -1518,6 +1518,7 @@ extern void sata_pmp_error_handler(struct ata_port *ap);
 
 extern const struct ata_port_operations ata_sff_port_ops;
 extern const struct ata_port_operations ata_bmdma_port_ops;
+extern const struct ata_port_operations ata_bmdma32_port_ops;
 
 /* PIO only, sg_tablesize and dma_boundary limits can be removed */
 #define ATA_PIO_SHT(drv_name)                                  \
@@ -1545,6 +1546,8 @@ extern void ata_sff_exec_command(struct ata_port *ap,
                                 const struct ata_taskfile *tf);
 extern unsigned int ata_sff_data_xfer(struct ata_device *dev,
                        unsigned char *buf, unsigned int buflen, int rw);
+extern unsigned int ata_sff_data_xfer32(struct ata_device *dev,
+                       unsigned char *buf, unsigned int buflen, int rw);
 extern unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev,
                        unsigned char *buf, unsigned int buflen, int rw);
 extern u8 ata_sff_irq_on(struct ata_port *ap);
index f7f3fdddbef0b5f4f599cd506c524e4c9bf0b911..439f6f3cb0c445a18d5ee0036f500f06f836bafe 100644 (file)
@@ -13,6 +13,7 @@
 #define EFS_SUPER_MAGIC                0x414A53
 #define EXT2_SUPER_MAGIC       0xEF53
 #define EXT3_SUPER_MAGIC       0xEF53
+#define XENFS_SUPER_MAGIC      0xabba1974
 #define EXT4_SUPER_MAGIC       0xEF53
 #define HPFS_SUPER_MAGIC       0xf995e849
 #define ISOFS_SUPER_MAGIC      0x9660
index 1fbe14d39521d728dd877d263b278210bd9d3ed5..326f45c86530302c6e94ba8a872971bf7474646c 100644 (file)
 
 #ifndef _LINUX_MEMCONTROL_H
 #define _LINUX_MEMCONTROL_H
-
+#include <linux/cgroup.h>
 struct mem_cgroup;
 struct page_cgroup;
 struct page;
 struct mm_struct;
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
+/*
+ * All "charge" functions with gfp_mask should use GFP_KERNEL or
+ * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't
+ * alloc memory but reclaims memory from all available zones. So, "where I want
+ * memory from" bits of gfp_mask has no meaning. So any bits of that field is
+ * available but adding a rule is better. charge functions' gfp_mask should
+ * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous
+ * codes.
+ * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
+ */
 
-extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
+extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
                                gfp_t gfp_mask);
+/* for swap handling */
+extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
+               struct page *page, gfp_t mask, struct mem_cgroup **ptr);
+extern void mem_cgroup_commit_charge_swapin(struct page *page,
+                                       struct mem_cgroup *ptr);
+extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
+
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
                                        gfp_t gfp_mask);
-extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru);
+extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru);
+extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru);
+extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru);
+extern void mem_cgroup_del_lru(struct page *page);
+extern void mem_cgroup_move_lists(struct page *page,
+                                 enum lru_list from, enum lru_list to);
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_uncharge_cache_page(struct page *page);
-extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
+extern int mem_cgroup_shrink_usage(struct page *page,
+                       struct mm_struct *mm, gfp_t gfp_mask);
 
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
                                        struct list_head *dst,
@@ -47,12 +70,20 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 
 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 
-#define mm_match_cgroup(mm, cgroup)    \
-       ((cgroup) == mem_cgroup_from_task((mm)->owner))
+static inline
+int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
+{
+       struct mem_cgroup *mem;
+       rcu_read_lock();
+       mem = mem_cgroup_from_task((mm)->owner);
+       rcu_read_unlock();
+       return cgroup == mem;
+}
 
 extern int
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
-extern void mem_cgroup_end_migration(struct page *page);
+mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr);
+extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
+       struct page *oldpage, struct page *newpage);
 
 /*
  * For memory reclaim.
@@ -65,13 +96,32 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
                                                        int priority);
 extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
                                                        int priority);
+int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg);
+unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
+                                      struct zone *zone,
+                                      enum lru_list lru);
+struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
+                                                     struct zone *zone);
+struct zone_reclaim_stat*
+mem_cgroup_get_reclaim_stat_from_page(struct page *page);
 
-extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
-                                       int priority, enum lru_list lru);
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+extern int do_swap_account;
+#endif
 
+static inline bool mem_cgroup_disabled(void)
+{
+       if (mem_cgroup_subsys.disabled)
+               return true;
+       return false;
+}
+
+extern bool mem_cgroup_oom_called(struct task_struct *task);
 
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
-static inline int mem_cgroup_charge(struct page *page,
+struct mem_cgroup;
+
+static inline int mem_cgroup_newpage_charge(struct page *page,
                                        struct mm_struct *mm, gfp_t gfp_mask)
 {
        return 0;
@@ -83,6 +133,21 @@ static inline int mem_cgroup_cache_charge(struct page *page,
        return 0;
 }
 
+static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
+               struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr)
+{
+       return 0;
+}
+
+static inline void mem_cgroup_commit_charge_swapin(struct page *page,
+                                         struct mem_cgroup *ptr)
+{
+}
+
+static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr)
+{
+}
+
 static inline void mem_cgroup_uncharge_page(struct page *page)
 {
 }
@@ -91,12 +156,33 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
 {
 }
 
-static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
+static inline int mem_cgroup_shrink_usage(struct page *page,
+                       struct mm_struct *mm, gfp_t gfp_mask)
 {
        return 0;
 }
 
-static inline void mem_cgroup_move_lists(struct page *page, bool active)
+static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
+{
+}
+
+static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
+{
+       return ;
+}
+
+static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru)
+{
+       return ;
+}
+
+static inline void mem_cgroup_del_lru(struct page *page)
+{
+       return ;
+}
+
+static inline void
+mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
 {
 }
 
@@ -112,12 +198,14 @@ static inline int task_in_mem_cgroup(struct task_struct *task,
 }
 
 static inline int
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
+mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
 {
        return 0;
 }
 
-static inline void mem_cgroup_end_migration(struct page *page)
+static inline void mem_cgroup_end_migration(struct mem_cgroup *mem,
+                                       struct page *oldpage,
+                                       struct page *newpage)
 {
 }
 
@@ -146,12 +234,42 @@ static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 {
 }
 
-static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem,
-                                       struct zone *zone, int priority,
-                                       enum lru_list lru)
+static inline bool mem_cgroup_disabled(void)
+{
+       return true;
+}
+
+static inline bool mem_cgroup_oom_called(struct task_struct *task)
+{
+       return false;
+}
+
+static inline int
+mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
+{
+       return 1;
+}
+
+static inline unsigned long
+mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone,
+                        enum lru_list lru)
 {
        return 0;
 }
+
+
+static inline struct zone_reclaim_stat*
+mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone)
+{
+       return NULL;
+}
+
+static inline struct zone_reclaim_stat*
+mem_cgroup_get_reclaim_stat_from_page(struct page *page)
+{
+       return NULL;
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
index c948350c378e93cb9144e9c17c52f3445e8f4a80..7fbb97267556531b0f13a112a5afc7e134234189 100644 (file)
@@ -28,6 +28,7 @@ add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
        list_add(&page->lru, &zone->lru[l].list);
        __inc_zone_state(zone, NR_LRU_BASE + l);
+       mem_cgroup_add_lru_list(page, l);
 }
 
 static inline void
@@ -35,6 +36,7 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
        list_del(&page->lru);
        __dec_zone_state(zone, NR_LRU_BASE + l);
+       mem_cgroup_del_lru_list(page, l);
 }
 
 static inline void
@@ -54,6 +56,7 @@ del_page_from_lru(struct zone *zone, struct page *page)
                l += page_is_file_cache(page);
        }
        __dec_zone_state(zone, NR_LRU_BASE + l);
+       mem_cgroup_del_lru_list(page, l);
 }
 
 /**
@@ -78,23 +81,4 @@ static inline enum lru_list page_lru(struct page *page)
        return lru;
 }
 
-/**
- * inactive_anon_is_low - check if anonymous pages need to be deactivated
- * @zone: zone to check
- *
- * Returns true if the zone does not have enough inactive anon pages,
- * meaning some active anon pages need to be deactivated.
- */
-static inline int inactive_anon_is_low(struct zone *zone)
-{
-       unsigned long active, inactive;
-
-       active = zone_page_state(zone, NR_ACTIVE_ANON);
-       inactive = zone_page_state(zone, NR_INACTIVE_ANON);
-
-       if (inactive * zone->inactive_ratio < active)
-               return 1;
-
-       return 0;
-}
 #endif
index 35a7b5e19465fc89154dd2097f0bd4913e7b2e21..09c14e213b63eb72f2f614695a886e67da7444d8 100644 (file)
@@ -263,6 +263,19 @@ enum zone_type {
 #error ZONES_SHIFT -- too many zones configured adjust calculation
 #endif
 
+struct zone_reclaim_stat {
+       /*
+        * The pageout code in vmscan.c keeps track of how many of the
+        * mem/swap backed and file backed pages are refeferenced.
+        * The higher the rotated/scanned ratio, the more valuable
+        * that cache is.
+        *
+        * The anon LRU stats live in [0], file LRU stats in [1]
+        */
+       unsigned long           recent_rotated[2];
+       unsigned long           recent_scanned[2];
+};
+
 struct zone {
        /* Fields commonly accessed by the page allocator */
        unsigned long           pages_min, pages_low, pages_high;
@@ -315,16 +328,7 @@ struct zone {
                unsigned long nr_scan;
        } lru[NR_LRU_LISTS];
 
-       /*
-        * The pageout code in vmscan.c keeps track of how many of the
-        * mem/swap backed and file backed pages are refeferenced.
-        * The higher the rotated/scanned ratio, the more valuable
-        * that cache is.
-        *
-        * The anon LRU stats live in [0], file LRU stats in [1]
-        */
-       unsigned long           recent_rotated[2];
-       unsigned long           recent_scanned[2];
+       struct zone_reclaim_stat reclaim_stat;
 
        unsigned long           pages_scanned;     /* since last reclaim */
        unsigned long           flags;             /* zone flags, see below */
diff --git a/include/linux/nwpserial.h b/include/linux/nwpserial.h
new file mode 100644 (file)
index 0000000..9acb215
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ *  Serial Port driver for a NWP uart device
+ *
+ *    Copyright (C) 2008 IBM Corp., Benjamin Krill <ben@codiert.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#ifndef _NWPSERIAL_H
+#define _NWPSERIAL_H
+
+int nwpserial_register_port(struct uart_port *port);
+void nwpserial_unregister_port(int line);
+
+#endif /* _NWPSERIAL_H */
index 1e6d34bfa0945b5cdf5a7f8904e9fa182a80efa0..602cc1fdee90c152dc458642256b623ce8cbfa78 100644 (file)
@@ -26,10 +26,6 @@ enum {
        PCG_LOCK,  /* page cgroup is locked */
        PCG_CACHE, /* charged as cache */
        PCG_USED, /* this object is in use. */
-       /* flags for LRU placement */
-       PCG_ACTIVE, /* page is active in this cgroup */
-       PCG_FILE, /* page is file system backed */
-       PCG_UNEVICTABLE, /* page is unevictableable */
 };
 
 #define TESTPCGFLAG(uname, lname)                      \
@@ -50,19 +46,6 @@ TESTPCGFLAG(Cache, CACHE)
 TESTPCGFLAG(Used, USED)
 CLEARPCGFLAG(Used, USED)
 
-/* LRU management flags (from global-lru definition) */
-TESTPCGFLAG(File, FILE)
-SETPCGFLAG(File, FILE)
-CLEARPCGFLAG(File, FILE)
-
-TESTPCGFLAG(Active, ACTIVE)
-SETPCGFLAG(Active, ACTIVE)
-CLEARPCGFLAG(Active, ACTIVE)
-
-TESTPCGFLAG(Unevictable, UNEVICTABLE)
-SETPCGFLAG(Unevictable, UNEVICTABLE)
-CLEARPCGFLAG(Unevictable, UNEVICTABLE)
-
 static inline int page_cgroup_nid(struct page_cgroup *pc)
 {
        return page_to_nid(pc->page);
@@ -104,5 +87,40 @@ static inline void page_cgroup_init(void)
 {
 }
 
+#endif
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+#include <linux/swap.h>
+extern struct mem_cgroup *
+swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem);
+extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent);
+extern int swap_cgroup_swapon(int type, unsigned long max_pages);
+extern void swap_cgroup_swapoff(int type);
+#else
+#include <linux/swap.h>
+
+static inline
+struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
+{
+       return NULL;
+}
+
+static inline
+struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
+{
+       return NULL;
+}
+
+static inline int
+swap_cgroup_swapon(int type, unsigned long max_pages)
+{
+       return 0;
+}
+
+static inline void swap_cgroup_swapoff(int type)
+{
+       return;
+}
+
 #endif
 #endif
index bb206c56d1f0eb16877a0c664712edae4de9df1d..49f1c2f66e951bc7e226a5809450af8714c80473 100644 (file)
@@ -122,6 +122,24 @@ int next_pidmap(struct pid_namespace *pid_ns, int last);
 extern struct pid *alloc_pid(struct pid_namespace *ns);
 extern void free_pid(struct pid *pid);
 
+/*
+ * ns_of_pid() returns the pid namespace in which the specified pid was
+ * allocated.
+ *
+ * NOTE:
+ *     ns_of_pid() is expected to be called for a process (task) that has
+ *     an attached 'struct pid' (see attach_pid(), detach_pid()) i.e @pid
+ *     is expected to be non-NULL. If @pid is NULL, caller should handle
+ *     the resulting NULL pid-ns.
+ */
+static inline struct pid_namespace *ns_of_pid(struct pid *pid)
+{
+       struct pid_namespace *ns = NULL;
+       if (pid)
+               ns = pid->numbers[pid->level].ns;
+       return ns;
+}
+
 /*
  * the helpers to get the pid's id seen from different namespaces
  *
index d82fe825d62f9571fe6412700d854a326f5d8c4c..38d10326246afbbec371b5cddc2beaff37f1dd48 100644 (file)
@@ -79,11 +79,7 @@ static inline void zap_pid_ns_processes(struct pid_namespace *ns)
 }
 #endif /* CONFIG_PID_NS */
 
-static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
-{
-       return tsk->nsproxy->pid_ns;
-}
-
+extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
 void pidhash_init(void);
 void pidmap_init(void);
 
index 8fc909ef6787738b983b45243fc1786d4867fc2a..9743e4dbc9188031b0c180584df4c5e6ba898513 100644 (file)
@@ -137,6 +137,9 @@ struct mddev_s
        struct gendisk                  *gendisk;
 
        struct kobject                  kobj;
+       int                             hold_active;
+#define        UNTIL_IOCTL     1
+#define        UNTIL_STOP      2
 
        /* Superblock information */
        int                             major_version,
@@ -215,6 +218,9 @@ struct mddev_s
 #define        MD_RECOVERY_FROZEN      9
 
        unsigned long                   recovery;
+       int                             recovery_disabled; /* if we detect that recovery
+                                                           * will always fail, set this
+                                                           * so we don't loop trying */
 
        int                             in_sync;        /* know to not need resync */
        struct mutex                    reconfig_mutex;
@@ -244,6 +250,9 @@ struct mddev_s
        struct sysfs_dirent             *sysfs_state;   /* handle for 'array_state'
                                                         * file in sysfs.
                                                         */
+       struct sysfs_dirent             *sysfs_action;  /* handle for 'sync_action' */
+
+       struct work_struct del_work;    /* used for delayed sysfs removal */
 
        spinlock_t                      write_lock;
        wait_queue_head_t               sb_wait;        /* for waiting on superblock updates */
@@ -334,17 +343,14 @@ static inline char * mdname (mddev_t * mddev)
  * iterates through some rdev ringlist. It's safe to remove the
  * current 'rdev'. Dont touch 'tmp' though.
  */
-#define rdev_for_each_list(rdev, tmp, list)                            \
-                                                                       \
-       for ((tmp) = (list).next;                                       \
-               (rdev) = (list_entry((tmp), mdk_rdev_t, same_set)),     \
-                       (tmp) = (tmp)->next, (tmp)->prev != &(list)     \
-               ; )
+#define rdev_for_each_list(rdev, tmp, head)                            \
+       list_for_each_entry_safe(rdev, tmp, head, same_set)
+
 /*
  * iterates through the 'same array disks' ringlist
  */
 #define rdev_for_each(rdev, tmp, mddev)                                \
-       rdev_for_each_list(rdev, tmp, (mddev)->disks)
+       list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
 
 #define rdev_for_each_rcu(rdev, mddev)                         \
        list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
index 8b4de4a41ff14c15f33550a2a9ad99955861c31e..9491026afe6610bc5f4897fbe2f30c5b75484f2c 100644 (file)
@@ -194,6 +194,8 @@ static inline __u64 md_event(mdp_super_t *sb) {
        return (ev<<32)| sb->events_lo;
 }
 
+#define MD_SUPERBLOCK_1_TIME_SEC_MASK ((1ULL<<40) - 1)
+
 /*
  * The version-1 superblock :
  * All numeric fields are little-endian.
index 1b2dda035f8efcfeb5e979ed463d735e9f3f70b1..fd42aa87c39186791d1a06481fecb6ddd7c03326 100644 (file)
@@ -5,9 +5,9 @@
 
 struct strip_zone
 {
-       sector_t zone_offset;   /* Zone offset in md_dev */
-       sector_t dev_offset;    /* Zone offset in real dev */
-       sector_t size;          /* Zone size */
+       sector_t zone_start;    /* Zone offset in md_dev (in sectors) */
+       sector_t dev_start;     /* Zone offset in real dev (in sectors) */
+       sector_t sectors;       /* Zone size in sectors */
        int nb_dev;             /* # of devices attached to the zone */
        mdk_rdev_t **dev;       /* Devices attached to the zone */
 };
@@ -19,8 +19,8 @@ struct raid0_private_data
        mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */
        int nr_strip_zones;
 
-       sector_t hash_spacing;
-       int preshift;                   /* shift this before divide by hash_spacing */
+       sector_t spacing;
+       int sector_shift; /* shift this before divide by spacing */
 };
 
 typedef struct raid0_private_data raid0_conf_t;
index 271c1c2c9f6f741016f0917c360469ae5ee872a5..dede0a2cfc45980a067e916ea1fabfeee273c7fb 100644 (file)
@@ -43,6 +43,10 @@ struct res_counter {
         * the routines below consider this to be IRQ-safe
         */
        spinlock_t lock;
+       /*
+        * Parent counter, used for hierarchial resource accounting
+        */
+       struct res_counter *parent;
 };
 
 /**
@@ -87,7 +91,7 @@ enum {
  * helpers for accounting
  */
 
-void res_counter_init(struct res_counter *counter);
+void res_counter_init(struct res_counter *counter, struct res_counter *parent);
 
 /*
  * charge - try to consume more resource.
@@ -103,7 +107,7 @@ void res_counter_init(struct res_counter *counter);
 int __must_check res_counter_charge_locked(struct res_counter *counter,
                unsigned long val);
 int __must_check res_counter_charge(struct res_counter *counter,
-               unsigned long val);
+               unsigned long val, struct res_counter **limit_fail_at);
 
 /*
  * uncharge - tell that some portion of the resource is released
index b4199841f1fc92e973e1b03f2a9472b8faaac32a..90bbbf0b1161bd592a19c1b2cc3f6803776c53df 100644 (file)
 
 #define PORT_S3C6400   84
 
+/* NWPSERIAL */
+#define PORT_NWPSERIAL 85
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
index 91dee50fe26081b150e3be9155f1f9d7b147f730..d30215578877e499e30cec085664c50619bc3056 100644 (file)
@@ -214,7 +214,8 @@ static inline void lru_cache_add_active_file(struct page *page)
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                                        gfp_t gfp_mask);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
-                                                       gfp_t gfp_mask);
+                                                 gfp_t gfp_mask, bool noswap,
+                                                 unsigned int swappiness);
 extern int __isolate_lru_page(struct page *page, int mode, int file);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
@@ -333,6 +334,22 @@ static inline void disable_swap_token(void)
        put_swap_token(swap_token_mm);
 }
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent);
+#else
+static inline void
+mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
+{
+}
+#endif
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+extern void mem_cgroup_uncharge_swap(swp_entry_t ent);
+#else
+static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
+{
+}
+#endif
+
 #else /* CONFIG_SWAP */
 
 #define nr_swap_pages                          0L
@@ -409,6 +426,12 @@ static inline swp_entry_t get_swap_page(void)
 #define has_swap_token(x) 0
 #define disable_swap_token() do { } while(0)
 
+static inline int mem_cgroup_cache_charge_swapin(struct page *page,
+                       struct mm_struct *mm, gfp_t mask, bool locked)
+{
+       return 0;
+}
+
 #endif /* CONFIG_SWAP */
 #endif /* __KERNEL__*/
 #endif /* _LINUX_SWAP_H */
index 6369d89c25d5346a6e8dcc4948218c883d900f37..f87f9614844db69e7eeacb2784646f5458ed954a 100644 (file)
@@ -136,8 +136,6 @@ struct xenbus_transaction
 /* Nil transaction ID. */
 #define XBT_NIL ((struct xenbus_transaction) { 0 })
 
-int __init xenbus_dev_init(void);
-
 char **xenbus_directory(struct xenbus_transaction t,
                        const char *dir, const char *node, unsigned int *num);
 void *xenbus_read(struct xenbus_transaction t,
index e7893b1d3e425767b5f765b8c829c401d7a4dea7..a724a149bf3f232aba760c6b584db3f52dfd7e32 100644 (file)
@@ -271,59 +271,6 @@ config LOG_BUF_SHIFT
                     13 =>  8 KB
                     12 =>  4 KB
 
-config CGROUPS
-       bool "Control Group support"
-       help
-         This option will let you use process cgroup subsystems
-         such as Cpusets
-
-         Say N if unsure.
-
-config CGROUP_DEBUG
-       bool "Example debug cgroup subsystem"
-       depends on CGROUPS
-       default n
-       help
-         This option enables a simple cgroup subsystem that
-         exports useful debugging information about the cgroups
-         framework
-
-         Say N if unsure
-
-config CGROUP_NS
-        bool "Namespace cgroup subsystem"
-        depends on CGROUPS
-        help
-          Provides a simple namespace cgroup subsystem to
-          provide hierarchical naming of sets of namespaces,
-          for instance virtual servers and checkpoint/restart
-          jobs.
-
-config CGROUP_FREEZER
-        bool "control group freezer subsystem"
-        depends on CGROUPS
-        help
-          Provides a way to freeze and unfreeze all tasks in a
-         cgroup.
-
-config CGROUP_DEVICE
-       bool "Device controller for cgroups"
-       depends on CGROUPS && EXPERIMENTAL
-       help
-         Provides a cgroup implementing whitelists for devices which
-         a process in the cgroup can mknod or open.
-
-config CPUSETS
-       bool "Cpuset support"
-       depends on SMP && CGROUPS
-       help
-         This option will let you create and manage CPUSETs which
-         allow dynamically partitioning a system into sets of CPUs and
-         Memory Nodes and assigning tasks to run only within those sets.
-         This is primarily useful on large SMP or NUMA systems.
-
-         Say N if unsure.
-
 #
 # Architectures with an unreliable sched_clock() should select this:
 #
@@ -337,6 +284,8 @@ config GROUP_SCHED
        help
          This feature lets CPU scheduler recognize task groups and control CPU
          bandwidth allocation to such task groups.
+         In order to create a group from arbitrary set of processes, use
+         CONFIG_CGROUPS. (See Control Group support.)
 
 config FAIR_GROUP_SCHED
        bool "Group scheduling for SCHED_OTHER"
@@ -379,6 +328,66 @@ config CGROUP_SCHED
 
 endchoice
 
+menu "Control Group support"
+config CGROUPS
+       bool "Control Group support"
+       help
+         This option add support for grouping sets of processes together, for
+         use with process control subsystems such as Cpusets, CFS, memory
+         controls or device isolation.
+         See
+               - Documentation/cpusets.txt     (Cpusets)
+               - Documentation/scheduler/sched-design-CFS.txt  (CFS)
+               - Documentation/cgroups/ (features for grouping, isolation)
+               - Documentation/controllers/ (features for resource control)
+
+         Say N if unsure.
+
+config CGROUP_DEBUG
+       bool "Example debug cgroup subsystem"
+       depends on CGROUPS
+       default n
+       help
+         This option enables a simple cgroup subsystem that
+         exports useful debugging information about the cgroups
+         framework
+
+         Say N if unsure
+
+config CGROUP_NS
+        bool "Namespace cgroup subsystem"
+        depends on CGROUPS
+        help
+          Provides a simple namespace cgroup subsystem to
+          provide hierarchical naming of sets of namespaces,
+          for instance virtual servers and checkpoint/restart
+          jobs.
+
+config CGROUP_FREEZER
+        bool "control group freezer subsystem"
+        depends on CGROUPS
+        help
+          Provides a way to freeze and unfreeze all tasks in a
+         cgroup.
+
+config CGROUP_DEVICE
+       bool "Device controller for cgroups"
+       depends on CGROUPS && EXPERIMENTAL
+       help
+         Provides a cgroup implementing whitelists for devices which
+         a process in the cgroup can mknod or open.
+
+config CPUSETS
+       bool "Cpuset support"
+       depends on SMP && CGROUPS
+       help
+         This option will let you create and manage CPUSETs which
+         allow dynamically partitioning a system into sets of CPUs and
+         Memory Nodes and assigning tasks to run only within those sets.
+         This is primarily useful on large SMP or NUMA systems.
+
+         Say N if unsure.
+
 config CGROUP_CPUACCT
        bool "Simple CPU accounting cgroup subsystem"
        depends on CGROUPS
@@ -393,9 +402,6 @@ config RESOURCE_COUNTERS
           infrastructure that works with cgroups
        depends on CGROUPS
 
-config MM_OWNER
-       bool
-
 config CGROUP_MEM_RES_CTLR
        bool "Memory Resource Controller for Control Groups"
        depends on CGROUPS && RESOURCE_COUNTERS
@@ -414,11 +420,33 @@ config CGROUP_MEM_RES_CTLR
          sure you need the memory resource controller. Even when you enable
          this, you can set "cgroup_disable=memory" at your boot option to
          disable memory resource controller and you can avoid overheads.
-         (and lose benefits of memory resource contoller)
+         (and lose benefits of memory resource controller)
 
          This config option also selects MM_OWNER config option, which
          could in turn add some fork/exit overhead.
 
+config MM_OWNER
+       bool
+
+config CGROUP_MEM_RES_CTLR_SWAP
+       bool "Memory Resource Controller Swap Extension(EXPERIMENTAL)"
+       depends on CGROUP_MEM_RES_CTLR && SWAP && EXPERIMENTAL
+       help
+         Add swap management feature to memory resource controller. When you
+         enable this, you can limit mem+swap usage per cgroup. In other words,
+         when you disable this, memory resource controller has no cares to
+         usage of swap...a process can exhaust all of the swap. This extension
+         is useful when you want to avoid exhaustion swap but this itself
+         adds more overheads and consumes memory for remembering information.
+         Especially if you use 32bit system or small memory system, please
+         be careful about enabling this. When memory resource controller
+         is disabled by boot option, this will be automatically disabled and
+         there will be no overhead from this. Even when you set this config=y,
+         if boot option "noswapaccount" is set, swap will not be accounted.
+
+
+endmenu
+
 config SYSFS_DEPRECATED
        bool
 
index eddb6247a55322be4e021083ef96ea279be8f07f..23fdb8492b8e56e431d46471359a69b298493b73 100644 (file)
@@ -505,7 +505,8 @@ static void __do_notify(struct mqueue_inode_info *info)
                        sig_i.si_errno = 0;
                        sig_i.si_code = SI_MESGQ;
                        sig_i.si_value = info->notify.sigev_value;
-                       sig_i.si_pid = task_tgid_vnr(current);
+                       sig_i.si_pid = task_tgid_nr_ns(current,
+                                               ns_of_pid(info->notify_owner));
                        sig_i.si_uid = current_uid();
 
                        kill_pid_info(info->notify.sigev_signo,
index 97373380c9e7be42ad7126c6a5d436f7954cc0aa..64cc916299a5bb0f6558cc56c243f7d2b50bf9aa 100644 (file)
@@ -206,7 +206,9 @@ EXPORT_SYMBOL_GPL(async_schedule_special);
 
 void async_synchronize_full(void)
 {
-       async_synchronize_cookie(next_cookie);
+       do {
+               async_synchronize_cookie(next_cookie);
+       } while (!list_empty(&async_running) || !list_empty(&async_pending));
 }
 EXPORT_SYMBOL_GPL(async_synchronize_full);
 
index f221446aa02da4d60b32bbc7a8a53354e155c11d..c29831076e7a2e156e584725453c51568682eb93 100644 (file)
@@ -84,7 +84,7 @@ struct cgroupfs_root {
        /* Tracks how many cgroups are currently defined in hierarchy.*/
        int number_of_cgroups;
 
-       /* A list running through the mounted hierarchies */
+       /* A list running through the active hierarchies */
        struct list_head root_list;
 
        /* Hierarchy-specific flags */
@@ -148,8 +148,8 @@ static int notify_on_release(const struct cgroup *cgrp)
 #define for_each_subsys(_root, _ss) \
 list_for_each_entry(_ss, &_root->subsys_list, sibling)
 
-/* for_each_root() allows you to iterate across the active hierarchies */
-#define for_each_root(_root) \
+/* for_each_active_root() allows you to iterate across the active hierarchies */
+#define for_each_active_root(_root) \
 list_for_each_entry(_root, &roots, root_list)
 
 /* the list of cgroups eligible for automatic release. Protected by
@@ -271,7 +271,7 @@ static void __put_css_set(struct css_set *cg, int taskexit)
 
        rcu_read_lock();
        for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-               struct cgroup *cgrp = cg->subsys[i]->cgroup;
+               struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup);
                if (atomic_dec_and_test(&cgrp->count) &&
                    notify_on_release(cgrp)) {
                        if (taskexit)
@@ -384,6 +384,25 @@ static int allocate_cg_links(int count, struct list_head *tmp)
        return 0;
 }
 
+/**
+ * link_css_set - a helper function to link a css_set to a cgroup
+ * @tmp_cg_links: cg_cgroup_link objects allocated by allocate_cg_links()
+ * @cg: the css_set to be linked
+ * @cgrp: the destination cgroup
+ */
+static void link_css_set(struct list_head *tmp_cg_links,
+                        struct css_set *cg, struct cgroup *cgrp)
+{
+       struct cg_cgroup_link *link;
+
+       BUG_ON(list_empty(tmp_cg_links));
+       link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
+                               cgrp_link_list);
+       link->cg = cg;
+       list_move(&link->cgrp_link_list, &cgrp->css_sets);
+       list_add(&link->cg_link_list, &cg->cg_links);
+}
+
 /*
  * find_css_set() takes an existing cgroup group and a
  * cgroup object, and returns a css_set object that's
@@ -399,7 +418,6 @@ static struct css_set *find_css_set(
        int i;
 
        struct list_head tmp_cg_links;
-       struct cg_cgroup_link *link;
 
        struct hlist_head *hhead;
 
@@ -444,26 +462,11 @@ static struct css_set *find_css_set(
                 * only do it for the first subsystem in each
                 * hierarchy
                 */
-               if (ss->root->subsys_list.next == &ss->sibling) {
-                       BUG_ON(list_empty(&tmp_cg_links));
-                       link = list_entry(tmp_cg_links.next,
-                                         struct cg_cgroup_link,
-                                         cgrp_link_list);
-                       list_del(&link->cgrp_link_list);
-                       list_add(&link->cgrp_link_list, &cgrp->css_sets);
-                       link->cg = res;
-                       list_add(&link->cg_link_list, &res->cg_links);
-               }
-       }
-       if (list_empty(&rootnode.subsys_list)) {
-               link = list_entry(tmp_cg_links.next,
-                                 struct cg_cgroup_link,
-                                 cgrp_link_list);
-               list_del(&link->cgrp_link_list);
-               list_add(&link->cgrp_link_list, &dummytop->css_sets);
-               link->cg = res;
-               list_add(&link->cg_link_list, &res->cg_links);
+               if (ss->root->subsys_list.next == &ss->sibling)
+                       link_css_set(&tmp_cg_links, res, cgrp);
        }
+       if (list_empty(&rootnode.subsys_list))
+               link_css_set(&tmp_cg_links, res, dummytop);
 
        BUG_ON(!list_empty(&tmp_cg_links));
 
@@ -586,11 +589,18 @@ static void cgroup_call_pre_destroy(struct cgroup *cgrp)
 {
        struct cgroup_subsys *ss;
        for_each_subsys(cgrp->root, ss)
-               if (ss->pre_destroy && cgrp->subsys[ss->subsys_id])
+               if (ss->pre_destroy)
                        ss->pre_destroy(ss, cgrp);
        return;
 }
 
+static void free_cgroup_rcu(struct rcu_head *obj)
+{
+       struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
+
+       kfree(cgrp);
+}
+
 static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 {
        /* is dentry a directory ? if so, kfree() associated cgroup */
@@ -610,19 +620,19 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
                /*
                 * Release the subsystem state objects.
                 */
-               for_each_subsys(cgrp->root, ss) {
-                       if (cgrp->subsys[ss->subsys_id])
-                               ss->destroy(ss, cgrp);
-               }
+               for_each_subsys(cgrp->root, ss)
+                       ss->destroy(ss, cgrp);
 
                cgrp->root->number_of_cgroups--;
                mutex_unlock(&cgroup_mutex);
 
-               /* Drop the active superblock reference that we took when we
-                * created the cgroup */
+               /*
+                * Drop the active superblock reference that we took when we
+                * created the cgroup
+                */
                deactivate_super(cgrp->root->sb);
 
-               kfree(cgrp);
+               call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
        }
        iput(inode);
 }
@@ -712,23 +722,26 @@ static int rebind_subsystems(struct cgroupfs_root *root,
                        BUG_ON(cgrp->subsys[i]);
                        BUG_ON(!dummytop->subsys[i]);
                        BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
+                       mutex_lock(&ss->hierarchy_mutex);
                        cgrp->subsys[i] = dummytop->subsys[i];
                        cgrp->subsys[i]->cgroup = cgrp;
-                       list_add(&ss->sibling, &root->subsys_list);
-                       rcu_assign_pointer(ss->root, root);
+                       list_move(&ss->sibling, &root->subsys_list);
+                       ss->root = root;
                        if (ss->bind)
                                ss->bind(ss, cgrp);
-
+                       mutex_unlock(&ss->hierarchy_mutex);
                } else if (bit & removed_bits) {
                        /* We're removing this subsystem */
                        BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
                        BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
+                       mutex_lock(&ss->hierarchy_mutex);
                        if (ss->bind)
                                ss->bind(ss, dummytop);
                        dummytop->subsys[i]->cgroup = dummytop;
                        cgrp->subsys[i] = NULL;
-                       rcu_assign_pointer(subsys[i]->root, &rootnode);
-                       list_del(&ss->sibling);
+                       subsys[i]->root = &rootnode;
+                       list_move(&ss->sibling, &rootnode.subsys_list);
+                       mutex_unlock(&ss->hierarchy_mutex);
                } else if (bit & final_bits) {
                        /* Subsystem state should already exist */
                        BUG_ON(!cgrp->subsys[i]);
@@ -990,7 +1003,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                root = NULL;
        } else {
                /* New superblock */
-               struct cgroup *cgrp = &root->top_cgroup;
+               struct cgroup *root_cgrp = &root->top_cgroup;
                struct inode *inode;
                int i;
 
@@ -1031,7 +1044,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                list_add(&root->root_list, &roots);
                root_count++;
 
-               sb->s_root->d_fsdata = &root->top_cgroup;
+               sb->s_root->d_fsdata = root_cgrp;
                root->top_cgroup.dentry = sb->s_root;
 
                /* Link the top cgroup in this hierarchy into all
@@ -1042,29 +1055,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                        struct hlist_node *node;
                        struct css_set *cg;
 
-                       hlist_for_each_entry(cg, node, hhead, hlist) {
-                               struct cg_cgroup_link *link;
-
-                               BUG_ON(list_empty(&tmp_cg_links));
-                               link = list_entry(tmp_cg_links.next,
-                                                 struct cg_cgroup_link,
-                                                 cgrp_link_list);
-                               list_del(&link->cgrp_link_list);
-                               link->cg = cg;
-                               list_add(&link->cgrp_link_list,
-                                        &root->top_cgroup.css_sets);
-                               list_add(&link->cg_link_list, &cg->cg_links);
-                       }
+                       hlist_for_each_entry(cg, node, hhead, hlist)
+                               link_css_set(&tmp_cg_links, cg, root_cgrp);
                }
                write_unlock(&css_set_lock);
 
                free_cg_links(&tmp_cg_links);
 
-               BUG_ON(!list_empty(&cgrp->sibling));
-               BUG_ON(!list_empty(&cgrp->children));
+               BUG_ON(!list_empty(&root_cgrp->sibling));
+               BUG_ON(!list_empty(&root_cgrp->children));
                BUG_ON(root->number_of_cgroups != 1);
 
-               cgroup_populate_dir(cgrp);
+               cgroup_populate_dir(root_cgrp);
                mutex_unlock(&inode->i_mutex);
                mutex_unlock(&cgroup_mutex);
        }
@@ -1113,10 +1115,9 @@ static void cgroup_kill_sb(struct super_block *sb) {
        }
        write_unlock(&css_set_lock);
 
-       if (!list_empty(&root->root_list)) {
-               list_del(&root->root_list);
-               root_count--;
-       }
+       list_del(&root->root_list);
+       root_count--;
+
        mutex_unlock(&cgroup_mutex);
 
        kfree(root);
@@ -1145,14 +1146,16 @@ static inline struct cftype *__d_cft(struct dentry *dentry)
  * @buf: the buffer to write the path into
  * @buflen: the length of the buffer
  *
- * Called with cgroup_mutex held. Writes path of cgroup into buf.
- * Returns 0 on success, -errno on error.
+ * Called with cgroup_mutex held or else with an RCU-protected cgroup
+ * reference.  Writes path of cgroup into buf.  Returns 0 on success,
+ * -errno on error.
  */
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 {
        char *start;
+       struct dentry *dentry = rcu_dereference(cgrp->dentry);
 
-       if (cgrp == dummytop) {
+       if (!dentry || cgrp == dummytop) {
                /*
                 * Inactive subsystems have no dentry for their root
                 * cgroup
@@ -1165,13 +1168,14 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 
        *--start = '\0';
        for (;;) {
-               int len = cgrp->dentry->d_name.len;
+               int len = dentry->d_name.len;
                if ((start -= len) < buf)
                        return -ENAMETOOLONG;
                memcpy(start, cgrp->dentry->d_name.name, len);
                cgrp = cgrp->parent;
                if (!cgrp)
                        break;
+               dentry = rcu_dereference(cgrp->dentry);
                if (!cgrp->parent)
                        continue;
                if (--start < buf)
@@ -1216,7 +1220,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
        int retval = 0;
        struct cgroup_subsys *ss;
        struct cgroup *oldcgrp;
-       struct css_set *cg = tsk->cgroups;
+       struct css_set *cg;
        struct css_set *newcg;
        struct cgroupfs_root *root = cgrp->root;
        int subsys_id;
@@ -1236,11 +1240,16 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
                }
        }
 
+       task_lock(tsk);
+       cg = tsk->cgroups;
+       get_css_set(cg);
+       task_unlock(tsk);
        /*
         * Locate or allocate a new css_set for this task,
         * based on its final set of cgroups
         */
        newcg = find_css_set(cg, cgrp);
+       put_css_set(cg);
        if (!newcg)
                return -ENOMEM;
 
@@ -1445,7 +1454,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
        struct cftype *cft = __d_cft(file->f_dentry);
        struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
 
-       if (!cft || cgroup_is_removed(cgrp))
+       if (cgroup_is_removed(cgrp))
                return -ENODEV;
        if (cft->write)
                return cft->write(cgrp, cft, file, buf, nbytes, ppos);
@@ -1490,7 +1499,7 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf,
        struct cftype *cft = __d_cft(file->f_dentry);
        struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
 
-       if (!cft || cgroup_is_removed(cgrp))
+       if (cgroup_is_removed(cgrp))
                return -ENODEV;
 
        if (cft->read)
@@ -1554,10 +1563,8 @@ static int cgroup_file_open(struct inode *inode, struct file *file)
        err = generic_file_open(inode, file);
        if (err)
                return err;
-
        cft = __d_cft(file->f_dentry);
-       if (!cft)
-               return -ENODEV;
+
        if (cft->read_map || cft->read_seq_string) {
                struct cgroup_seqfile_state *state =
                        kzalloc(sizeof(*state), GFP_USER);
@@ -1671,7 +1678,7 @@ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
        if (!error) {
                dentry->d_fsdata = cgrp;
                inc_nlink(parent->d_inode);
-               cgrp->dentry = dentry;
+               rcu_assign_pointer(cgrp->dentry, dentry);
                dget(dentry);
        }
        dput(dentry);
@@ -1812,6 +1819,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
 {
        struct task_struct *res;
        struct list_head *l = it->task;
+       struct cg_cgroup_link *link;
 
        /* If the iterator cg is NULL, we have no tasks */
        if (!it->cg_link)
@@ -1819,7 +1827,8 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
        res = list_entry(l, struct task_struct, cg_list);
        /* Advance iterator to find next entry */
        l = l->next;
-       if (l == &res->cgroups->tasks) {
+       link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
+       if (l == &link->cg->tasks) {
                /* We reached the end of this task list - move on to
                 * the next cg_cgroup_link */
                cgroup_advance_iter(cgrp, it);
@@ -2013,14 +2022,16 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
  */
 static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
 {
-       int n = 0;
+       int n = 0, pid;
        struct cgroup_iter it;
        struct task_struct *tsk;
        cgroup_iter_start(cgrp, &it);
        while ((tsk = cgroup_iter_next(cgrp, &it))) {
                if (unlikely(n == npids))
                        break;
-               pidarray[n++] = task_pid_vnr(tsk);
+               pid = task_pid_vnr(tsk);
+               if (pid > 0)
+                       pidarray[n++] = pid;
        }
        cgroup_iter_end(cgrp, &it);
        return n;
@@ -2052,7 +2063,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
 
        ret = 0;
        cgrp = dentry->d_fsdata;
-       rcu_read_lock();
 
        cgroup_iter_start(cgrp, &it);
        while ((tsk = cgroup_iter_next(cgrp, &it))) {
@@ -2077,7 +2087,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
        }
        cgroup_iter_end(cgrp, &it);
 
-       rcu_read_unlock();
 err:
        return ret;
 }
@@ -2324,7 +2333,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
                               struct cgroup *cgrp)
 {
        css->cgroup = cgrp;
-       atomic_set(&css->refcnt, 0);
+       atomic_set(&css->refcnt, 1);
        css->flags = 0;
        if (cgrp == dummytop)
                set_bit(CSS_ROOT, &css->flags);
@@ -2332,6 +2341,29 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
        cgrp->subsys[ss->subsys_id] = css;
 }
 
+static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
+{
+       /* We need to take each hierarchy_mutex in a consistent order */
+       int i;
+
+       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+               struct cgroup_subsys *ss = subsys[i];
+               if (ss->root == root)
+                       mutex_lock_nested(&ss->hierarchy_mutex, i);
+       }
+}
+
+static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
+{
+       int i;
+
+       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+               struct cgroup_subsys *ss = subsys[i];
+               if (ss->root == root)
+                       mutex_unlock(&ss->hierarchy_mutex);
+       }
+}
+
 /*
  * cgroup_create - create a cgroup
  * @parent: cgroup that will be parent of the new cgroup
@@ -2380,7 +2412,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
                init_cgroup_css(css, ss, cgrp);
        }
 
+       cgroup_lock_hierarchy(root);
        list_add(&cgrp->sibling, &cgrp->parent->children);
+       cgroup_unlock_hierarchy(root);
        root->number_of_cgroups++;
 
        err = cgroup_create_dir(cgrp, dentry, mode);
@@ -2431,7 +2465,7 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
 {
        /* Check the reference count on each subsystem. Since we
         * already established that there are no tasks in the
-        * cgroup, if the css refcount is also 0, then there should
+        * cgroup, if the css refcount is also 1, then there should
         * be no outstanding references, so the subsystem is safe to
         * destroy. We scan across all subsystems rather than using
         * the per-hierarchy linked list of mounted subsystems since
@@ -2452,19 +2486,67 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
                 * matter, since it can only happen if the cgroup
                 * has been deleted and hence no longer needs the
                 * release agent to be called anyway. */
-               if (css && atomic_read(&css->refcnt))
+               if (css && (atomic_read(&css->refcnt) > 1))
                        return 1;
        }
        return 0;
 }
 
+/*
+ * Atomically mark all (or else none) of the cgroup's CSS objects as
+ * CSS_REMOVED. Return true on success, or false if the cgroup has
+ * busy subsystems. Call with cgroup_mutex held
+ */
+
+static int cgroup_clear_css_refs(struct cgroup *cgrp)
+{
+       struct cgroup_subsys *ss;
+       unsigned long flags;
+       bool failed = false;
+       local_irq_save(flags);
+       for_each_subsys(cgrp->root, ss) {
+               struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+               int refcnt;
+               do {
+                       /* We can only remove a CSS with a refcnt==1 */
+                       refcnt = atomic_read(&css->refcnt);
+                       if (refcnt > 1) {
+                               failed = true;
+                               goto done;
+                       }
+                       BUG_ON(!refcnt);
+                       /*
+                        * Drop the refcnt to 0 while we check other
+                        * subsystems. This will cause any racing
+                        * css_tryget() to spin until we set the
+                        * CSS_REMOVED bits or abort
+                        */
+               } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt);
+       }
+ done:
+       for_each_subsys(cgrp->root, ss) {
+               struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+               if (failed) {
+                       /*
+                        * Restore old refcnt if we previously managed
+                        * to clear it from 1 to 0
+                        */
+                       if (!atomic_read(&css->refcnt))
+                               atomic_set(&css->refcnt, 1);
+               } else {
+                       /* Commit the fact that the CSS is removed */
+                       set_bit(CSS_REMOVED, &css->flags);
+               }
+       }
+       local_irq_restore(flags);
+       return !failed;
+}
+
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 {
        struct cgroup *cgrp = dentry->d_fsdata;
        struct dentry *d;
        struct cgroup *parent;
-       struct super_block *sb;
-       struct cgroupfs_root *root;
 
        /* the vfs holds both inode->i_mutex already */
 
@@ -2487,12 +2569,10 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 
        mutex_lock(&cgroup_mutex);
        parent = cgrp->parent;
-       root = cgrp->root;
-       sb = root->sb;
 
        if (atomic_read(&cgrp->count)
            || !list_empty(&cgrp->children)
-           || cgroup_has_css_refs(cgrp)) {
+           || !cgroup_clear_css_refs(cgrp)) {
                mutex_unlock(&cgroup_mutex);
                return -EBUSY;
        }
@@ -2502,8 +2582,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
        if (!list_empty(&cgrp->release_list))
                list_del(&cgrp->release_list);
        spin_unlock(&release_list_lock);
-       /* delete my sibling from parent->children */
+
+       cgroup_lock_hierarchy(cgrp->root);
+       /* delete this cgroup from parent->children */
        list_del(&cgrp->sibling);
+       cgroup_unlock_hierarchy(cgrp->root);
+
        spin_lock(&cgrp->dentry->d_lock);
        d = dget(cgrp->dentry);
        spin_unlock(&d->d_lock);
@@ -2525,6 +2609,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
        printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
 
        /* Create the top cgroup state for this subsystem */
+       list_add(&ss->sibling, &rootnode.subsys_list);
        ss->root = &rootnode;
        css = ss->create(ss, dummytop);
        /* We don't handle early failures gracefully */
@@ -2544,6 +2629,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
         * need to invoke fork callbacks here. */
        BUG_ON(!list_empty(&init_task.tasks));
 
+       mutex_init(&ss->hierarchy_mutex);
        ss->active = 1;
 }
 
@@ -2562,7 +2648,6 @@ int __init cgroup_init_early(void)
        INIT_HLIST_NODE(&init_css_set.hlist);
        css_set_count = 1;
        init_cgroup_root(&rootnode);
-       list_add(&rootnode.root_list, &roots);
        root_count = 1;
        init_task.cgroups = &init_css_set;
 
@@ -2669,15 +2754,12 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
 
        mutex_lock(&cgroup_mutex);
 
-       for_each_root(root) {
+       for_each_active_root(root) {
                struct cgroup_subsys *ss;
                struct cgroup *cgrp;
                int subsys_id;
                int count = 0;
 
-               /* Skip this hierarchy if it has no active subsystems */
-               if (!root->actual_subsys_bits)
-                       continue;
                seq_printf(m, "%lu:", root->subsys_bits);
                for_each_subsys(root, ss)
                        seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
@@ -2800,8 +2882,10 @@ void cgroup_post_fork(struct task_struct *child)
 {
        if (use_task_css_set_links) {
                write_lock(&css_set_lock);
+               task_lock(child);
                if (list_empty(&child->cg_list))
                        list_add(&child->cg_list, &child->cgroups->tasks);
+               task_unlock(child);
                write_unlock(&css_set_lock);
        }
 }
@@ -2907,6 +2991,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
                mutex_unlock(&cgroup_mutex);
                return 0;
        }
+       task_lock(tsk);
        cg = tsk->cgroups;
        parent = task_cgroup(tsk, subsys->subsys_id);
 
@@ -2919,6 +3004,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
 
        /* Keep the cgroup alive */
        get_css_set(cg);
+       task_unlock(tsk);
        mutex_unlock(&cgroup_mutex);
 
        /* Now do the VFS work to create a cgroup */
@@ -2937,7 +3023,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
        }
 
        /* Create the cgroup directory, which also creates the cgroup */
-       ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
+       ret = vfs_mkdir(inode, dentry, 0755);
        child = __d_cgrp(dentry);
        dput(dentry);
        if (ret) {
@@ -2947,13 +3033,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
                goto out_release;
        }
 
-       if (!child) {
-               printk(KERN_INFO
-                      "Couldn't find new cgroup %s\n", nodename);
-               ret = -ENOMEM;
-               goto out_release;
-       }
-
        /* The cgroup now exists. Retake cgroup_mutex and check
         * that we're still in the same state that we thought we
         * were. */
@@ -3049,7 +3128,8 @@ void __css_put(struct cgroup_subsys_state *css)
 {
        struct cgroup *cgrp = css->cgroup;
        rcu_read_lock();
-       if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) {
+       if ((atomic_dec_return(&css->refcnt) == 1) &&
+           notify_on_release(cgrp)) {
                set_bit(CGRP_RELEASABLE, &cgrp->flags);
                check_for_release(cgrp);
        }
index 345ace5117de9ebfdd886e1a959e4b01eefb2374..647c77a88fcb5f39969241988bcbf6c8edcfc876 100644 (file)
@@ -84,7 +84,7 @@ struct cpuset {
        struct cgroup_subsys_state css;
 
        unsigned long flags;            /* "unsigned long" so bitops work */
-       cpumask_t cpus_allowed;         /* CPUs allowed to tasks in cpuset */
+       cpumask_var_t cpus_allowed;     /* CPUs allowed to tasks in cpuset */
        nodemask_t mems_allowed;        /* Memory Nodes allowed to tasks */
 
        struct cpuset *parent;          /* my parent */
@@ -195,8 +195,6 @@ static int cpuset_mems_generation;
 
 static struct cpuset top_cpuset = {
        .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
-       .cpus_allowed = CPU_MASK_ALL,
-       .mems_allowed = NODE_MASK_ALL,
 };
 
 /*
@@ -278,7 +276,7 @@ static struct file_system_type cpuset_fs_type = {
 };
 
 /*
- * Return in *pmask the portion of a cpusets's cpus_allowed that
+ * Return in pmask the portion of a cpusets's cpus_allowed that
  * are online.  If none are online, walk up the cpuset hierarchy
  * until we find one that does have some online cpus.  If we get
  * all the way to the top and still haven't found any online cpus,
@@ -291,15 +289,16 @@ static struct file_system_type cpuset_fs_type = {
  * Call with callback_mutex held.
  */
 
-static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
+static void guarantee_online_cpus(const struct cpuset *cs,
+                                 struct cpumask *pmask)
 {
-       while (cs && !cpus_intersects(cs->cpus_allowed, cpu_online_map))
+       while (cs && !cpumask_intersects(cs->cpus_allowed, cpu_online_mask))
                cs = cs->parent;
        if (cs)
-               cpus_and(*pmask, cs->cpus_allowed, cpu_online_map);
+               cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask);
        else
-               *pmask = cpu_online_map;
-       BUG_ON(!cpus_intersects(*pmask, cpu_online_map));
+               cpumask_copy(pmask, cpu_online_mask);
+       BUG_ON(!cpumask_intersects(pmask, cpu_online_mask));
 }
 
 /*
@@ -375,14 +374,9 @@ void cpuset_update_task_memory_state(void)
        struct task_struct *tsk = current;
        struct cpuset *cs;
 
-       if (task_cs(tsk) == &top_cpuset) {
-               /* Don't need rcu for top_cpuset.  It's never freed. */
-               my_cpusets_mem_gen = top_cpuset.mems_generation;
-       } else {
-               rcu_read_lock();
-               my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
-               rcu_read_unlock();
-       }
+       rcu_read_lock();
+       my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
+       rcu_read_unlock();
 
        if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
                mutex_lock(&callback_mutex);
@@ -414,12 +408,43 @@ void cpuset_update_task_memory_state(void)
 
 static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
 {
-       return  cpus_subset(p->cpus_allowed, q->cpus_allowed) &&
+       return  cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
                nodes_subset(p->mems_allowed, q->mems_allowed) &&
                is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
                is_mem_exclusive(p) <= is_mem_exclusive(q);
 }
 
+/**
+ * alloc_trial_cpuset - allocate a trial cpuset
+ * @cs: the cpuset that the trial cpuset duplicates
+ */
+static struct cpuset *alloc_trial_cpuset(const struct cpuset *cs)
+{
+       struct cpuset *trial;
+
+       trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
+       if (!trial)
+               return NULL;
+
+       if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) {
+               kfree(trial);
+               return NULL;
+       }
+       cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
+
+       return trial;
+}
+
+/**
+ * free_trial_cpuset - free the trial cpuset
+ * @trial: the trial cpuset to be freed
+ */
+static void free_trial_cpuset(struct cpuset *trial)
+{
+       free_cpumask_var(trial->cpus_allowed);
+       kfree(trial);
+}
+
 /*
  * validate_change() - Used to validate that any proposed cpuset change
  *                    follows the structural rules for cpusets.
@@ -469,7 +494,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
                c = cgroup_cs(cont);
                if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
                    c != cur &&
-                   cpus_intersects(trial->cpus_allowed, c->cpus_allowed))
+                   cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
                        return -EINVAL;
                if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
                    c != cur &&
@@ -479,7 +504,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
 
        /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */
        if (cgroup_task_count(cur->css.cgroup)) {
-               if (cpus_empty(trial->cpus_allowed) ||
+               if (cpumask_empty(trial->cpus_allowed) ||
                    nodes_empty(trial->mems_allowed)) {
                        return -ENOSPC;
                }
@@ -494,7 +519,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
  */
 static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
 {
-       return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
+       return cpumask_intersects(a->cpus_allowed, b->cpus_allowed);
 }
 
 static void
@@ -519,7 +544,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
                cp = list_first_entry(&q, struct cpuset, stack_list);
                list_del(q.next);
 
-               if (cpus_empty(cp->cpus_allowed))
+               if (cpumask_empty(cp->cpus_allowed))
                        continue;
 
                if (is_sched_load_balance(cp))
@@ -586,7 +611,8 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
  *     element of the partition (one sched domain) to be passed to
  *     partition_sched_domains().
  */
-static int generate_sched_domains(cpumask_t **domains,
+/* FIXME: see the FIXME in partition_sched_domains() */
+static int generate_sched_domains(struct cpumask **domains,
                        struct sched_domain_attr **attributes)
 {
        LIST_HEAD(q);           /* queue of cpusets to be scanned */
@@ -594,10 +620,10 @@ static int generate_sched_domains(cpumask_t **domains,
        struct cpuset **csa;    /* array of all cpuset ptrs */
        int csn;                /* how many cpuset ptrs in csa so far */
        int i, j, k;            /* indices for partition finding loops */
-       cpumask_t *doms;        /* resulting partition; i.e. sched domains */
+       struct cpumask *doms;   /* resulting partition; i.e. sched domains */
        struct sched_domain_attr *dattr;  /* attributes for custom domains */
        int ndoms = 0;          /* number of sched domains in result */
-       int nslot;              /* next empty doms[] cpumask_t slot */
+       int nslot;              /* next empty doms[] struct cpumask slot */
 
        doms = NULL;
        dattr = NULL;
@@ -605,7 +631,7 @@ static int generate_sched_domains(cpumask_t **domains,
 
        /* Special case for the 99% of systems with one, full, sched domain */
        if (is_sched_load_balance(&top_cpuset)) {
-               doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+               doms = kmalloc(cpumask_size(), GFP_KERNEL);
                if (!doms)
                        goto done;
 
@@ -614,7 +640,7 @@ static int generate_sched_domains(cpumask_t **domains,
                        *dattr = SD_ATTR_INIT;
                        update_domain_attr_tree(dattr, &top_cpuset);
                }
-               *doms = top_cpuset.cpus_allowed;
+               cpumask_copy(doms, top_cpuset.cpus_allowed);
 
                ndoms = 1;
                goto done;
@@ -633,7 +659,7 @@ static int generate_sched_domains(cpumask_t **domains,
                cp = list_first_entry(&q, struct cpuset, stack_list);
                list_del(q.next);
 
-               if (cpus_empty(cp->cpus_allowed))
+               if (cpumask_empty(cp->cpus_allowed))
                        continue;
 
                /*
@@ -684,7 +710,7 @@ restart:
         * Now we know how many domains to create.
         * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
         */
-       doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
+       doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL);
        if (!doms)
                goto done;
 
@@ -696,7 +722,7 @@ restart:
 
        for (nslot = 0, i = 0; i < csn; i++) {
                struct cpuset *a = csa[i];
-               cpumask_t *dp;
+               struct cpumask *dp;
                int apn = a->pn;
 
                if (apn < 0) {
@@ -719,14 +745,14 @@ restart:
                        continue;
                }
 
-               cpus_clear(*dp);
+               cpumask_clear(dp);
                if (dattr)
                        *(dattr + nslot) = SD_ATTR_INIT;
                for (j = i; j < csn; j++) {
                        struct cpuset *b = csa[j];
 
                        if (apn == b->pn) {
-                               cpus_or(*dp, *dp, b->cpus_allowed);
+                               cpumask_or(dp, dp, b->cpus_allowed);
                                if (dattr)
                                        update_domain_attr_tree(dattr + nslot, b);
 
@@ -766,7 +792,7 @@ done:
 static void do_rebuild_sched_domains(struct work_struct *unused)
 {
        struct sched_domain_attr *attr;
-       cpumask_t *doms;
+       struct cpumask *doms;
        int ndoms;
 
        get_online_cpus();
@@ -835,7 +861,7 @@ void rebuild_sched_domains(void)
 static int cpuset_test_cpumask(struct task_struct *tsk,
                               struct cgroup_scanner *scan)
 {
-       return !cpus_equal(tsk->cpus_allowed,
+       return !cpumask_equal(&tsk->cpus_allowed,
                        (cgroup_cs(scan->cg))->cpus_allowed);
 }
 
@@ -853,7 +879,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk,
 static void cpuset_change_cpumask(struct task_struct *tsk,
                                  struct cgroup_scanner *scan)
 {
-       set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed));
+       set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed));
 }
 
 /**
@@ -885,10 +911,10 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
  * @cs: the cpuset to consider
  * @buf: buffer of cpu numbers written to this cpuset
  */
-static int update_cpumask(struct cpuset *cs, const char *buf)
+static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+                         const char *buf)
 {
        struct ptr_heap heap;
-       struct cpuset trialcs;
        int retval;
        int is_load_balanced;
 
@@ -896,8 +922,6 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
        if (cs == &top_cpuset)
                return -EACCES;
 
-       trialcs = *cs;
-
        /*
         * An empty cpus_allowed is ok only if the cpuset has no tasks.
         * Since cpulist_parse() fails on an empty mask, we special case
@@ -905,31 +929,31 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
         * with tasks have cpus.
         */
        if (!*buf) {
-               cpus_clear(trialcs.cpus_allowed);
+               cpumask_clear(trialcs->cpus_allowed);
        } else {
-               retval = cpulist_parse(buf, &trialcs.cpus_allowed);
+               retval = cpulist_parse(buf, trialcs->cpus_allowed);
                if (retval < 0)
                        return retval;
 
-               if (!cpus_subset(trialcs.cpus_allowed, cpu_online_map))
+               if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask))
                        return -EINVAL;
        }
-       retval = validate_change(cs, &trialcs);
+       retval = validate_change(cs, trialcs);
        if (retval < 0)
                return retval;
 
        /* Nothing to do if the cpus didn't change */
-       if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
+       if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
                return 0;
 
        retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
        if (retval)
                return retval;
 
-       is_load_balanced = is_sched_load_balance(&trialcs);
+       is_load_balanced = is_sched_load_balance(trialcs);
 
        mutex_lock(&callback_mutex);
-       cs->cpus_allowed = trialcs.cpus_allowed;
+       cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
        mutex_unlock(&callback_mutex);
 
        /*
@@ -1017,7 +1041,7 @@ static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
        cpuset_being_rebound = cs;              /* causes mpol_dup() rebind */
 
        fudge = 10;                             /* spare mmarray[] slots */
-       fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */
+       fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */
        retval = -ENOMEM;
 
        /*
@@ -1104,9 +1128,9 @@ done:
  * lock each such tasks mm->mmap_sem, scan its vma's and rebind
  * their mempolicies to the cpusets new mems_allowed.
  */
-static int update_nodemask(struct cpuset *cs, const char *buf)
+static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
+                          const char *buf)
 {
-       struct cpuset trialcs;
        nodemask_t oldmem;
        int retval;
 
@@ -1117,8 +1141,6 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
        if (cs == &top_cpuset)
                return -EACCES;
 
-       trialcs = *cs;
-
        /*
         * An empty mems_allowed is ok iff there are no tasks in the cpuset.
         * Since nodelist_parse() fails on an empty mask, we special case
@@ -1126,27 +1148,27 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
         * with tasks have memory.
         */
        if (!*buf) {
-               nodes_clear(trialcs.mems_allowed);
+               nodes_clear(trialcs->mems_allowed);
        } else {
-               retval = nodelist_parse(buf, trialcs.mems_allowed);
+               retval = nodelist_parse(buf, trialcs->mems_allowed);
                if (retval < 0)
                        goto done;
 
-               if (!nodes_subset(trialcs.mems_allowed,
+               if (!nodes_subset(trialcs->mems_allowed,
                                node_states[N_HIGH_MEMORY]))
                        return -EINVAL;
        }
        oldmem = cs->mems_allowed;
-       if (nodes_equal(oldmem, trialcs.mems_allowed)) {
+       if (nodes_equal(oldmem, trialcs->mems_allowed)) {
                retval = 0;             /* Too easy - nothing to do */
                goto done;
        }
-       retval = validate_change(cs, &trialcs);
+       retval = validate_change(cs, trialcs);
        if (retval < 0)
                goto done;
 
        mutex_lock(&callback_mutex);
-       cs->mems_allowed = trialcs.mems_allowed;
+       cs->mems_allowed = trialcs->mems_allowed;
        cs->mems_generation = cpuset_mems_generation++;
        mutex_unlock(&callback_mutex);
 
@@ -1167,7 +1189,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
 
        if (val != cs->relax_domain_level) {
                cs->relax_domain_level = val;
-               if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs))
+               if (!cpumask_empty(cs->cpus_allowed) &&
+                   is_sched_load_balance(cs))
                        async_rebuild_sched_domains();
        }
 
@@ -1186,31 +1209,36 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
 static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
                       int turning_on)
 {
-       struct cpuset trialcs;
+       struct cpuset *trialcs;
        int err;
        int balance_flag_changed;
 
-       trialcs = *cs;
+       trialcs = alloc_trial_cpuset(cs);
+       if (!trialcs)
+               return -ENOMEM;
+
        if (turning_on)
-               set_bit(bit, &trialcs.flags);
+               set_bit(bit, &trialcs->flags);
        else
-               clear_bit(bit, &trialcs.flags);
+               clear_bit(bit, &trialcs->flags);
 
-       err = validate_change(cs, &trialcs);
+       err = validate_change(cs, trialcs);
        if (err < 0)
-               return err;
+               goto out;
 
        balance_flag_changed = (is_sched_load_balance(cs) !=
-                                       is_sched_load_balance(&trialcs));
+                               is_sched_load_balance(trialcs));
 
        mutex_lock(&callback_mutex);
-       cs->flags = trialcs.flags;
+       cs->flags = trialcs->flags;
        mutex_unlock(&callback_mutex);
 
-       if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed)
+       if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
                async_rebuild_sched_domains();
 
-       return 0;
+out:
+       free_trial_cpuset(trialcs);
+       return err;
 }
 
 /*
@@ -1311,42 +1339,47 @@ static int fmeter_getrate(struct fmeter *fmp)
        return val;
 }
 
+/* Protected by cgroup_lock */
+static cpumask_var_t cpus_attach;
+
 /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
 static int cpuset_can_attach(struct cgroup_subsys *ss,
                             struct cgroup *cont, struct task_struct *tsk)
 {
        struct cpuset *cs = cgroup_cs(cont);
+       int ret = 0;
 
-       if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
+       if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
                return -ENOSPC;
-       if (tsk->flags & PF_THREAD_BOUND) {
-               cpumask_t mask;
 
+       if (tsk->flags & PF_THREAD_BOUND) {
                mutex_lock(&callback_mutex);
-               mask = cs->cpus_allowed;
+               if (!cpumask_equal(&tsk->cpus_allowed, cs->cpus_allowed))
+                       ret = -EINVAL;
                mutex_unlock(&callback_mutex);
-               if (!cpus_equal(tsk->cpus_allowed, mask))
-                       return -EINVAL;
        }
 
-       return security_task_setscheduler(tsk, 0, NULL);
+       return ret < 0 ? ret : security_task_setscheduler(tsk, 0, NULL);
 }
 
 static void cpuset_attach(struct cgroup_subsys *ss,
                          struct cgroup *cont, struct cgroup *oldcont,
                          struct task_struct *tsk)
 {
-       cpumask_t cpus;
        nodemask_t from, to;
        struct mm_struct *mm;
        struct cpuset *cs = cgroup_cs(cont);
        struct cpuset *oldcs = cgroup_cs(oldcont);
        int err;
 
-       mutex_lock(&callback_mutex);
-       guarantee_online_cpus(cs, &cpus);
-       err = set_cpus_allowed_ptr(tsk, &cpus);
-       mutex_unlock(&callback_mutex);
+       if (cs == &top_cpuset) {
+               cpumask_copy(cpus_attach, cpu_possible_mask);
+       } else {
+               mutex_lock(&callback_mutex);
+               guarantee_online_cpus(cs, cpus_attach);
+               mutex_unlock(&callback_mutex);
+       }
+       err = set_cpus_allowed_ptr(tsk, cpus_attach);
        if (err)
                return;
 
@@ -1359,7 +1392,6 @@ static void cpuset_attach(struct cgroup_subsys *ss,
                        cpuset_migrate_mm(mm, &from, &to);
                mmput(mm);
        }
-
 }
 
 /* The various types of files and directories in a cpuset file system */
@@ -1454,21 +1486,29 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
                                const char *buf)
 {
        int retval = 0;
+       struct cpuset *cs = cgroup_cs(cgrp);
+       struct cpuset *trialcs;
 
        if (!cgroup_lock_live_group(cgrp))
                return -ENODEV;
 
+       trialcs = alloc_trial_cpuset(cs);
+       if (!trialcs)
+               return -ENOMEM;
+
        switch (cft->private) {
        case FILE_CPULIST:
-               retval = update_cpumask(cgroup_cs(cgrp), buf);
+               retval = update_cpumask(cs, trialcs, buf);
                break;
        case FILE_MEMLIST:
-               retval = update_nodemask(cgroup_cs(cgrp), buf);
+               retval = update_nodemask(cs, trialcs, buf);
                break;
        default:
                retval = -EINVAL;
                break;
        }
+
+       free_trial_cpuset(trialcs);
        cgroup_unlock();
        return retval;
 }
@@ -1487,13 +1527,13 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
 
 static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
 {
-       cpumask_t mask;
+       int ret;
 
        mutex_lock(&callback_mutex);
-       mask = cs->cpus_allowed;
+       ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
        mutex_unlock(&callback_mutex);
 
-       return cpulist_scnprintf(page, PAGE_SIZE, &mask);
+       return ret;
 }
 
 static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
@@ -1729,7 +1769,7 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
        parent_cs = cgroup_cs(parent);
 
        cs->mems_allowed = parent_cs->mems_allowed;
-       cs->cpus_allowed = parent_cs->cpus_allowed;
+       cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
        return;
 }
 
@@ -1755,6 +1795,10 @@ static struct cgroup_subsys_state *cpuset_create(
        cs = kmalloc(sizeof(*cs), GFP_KERNEL);
        if (!cs)
                return ERR_PTR(-ENOMEM);
+       if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) {
+               kfree(cs);
+               return ERR_PTR(-ENOMEM);
+       }
 
        cpuset_update_task_memory_state();
        cs->flags = 0;
@@ -1763,7 +1807,7 @@ static struct cgroup_subsys_state *cpuset_create(
        if (is_spread_slab(parent))
                set_bit(CS_SPREAD_SLAB, &cs->flags);
        set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
-       cpus_clear(cs->cpus_allowed);
+       cpumask_clear(cs->cpus_allowed);
        nodes_clear(cs->mems_allowed);
        cs->mems_generation = cpuset_mems_generation++;
        fmeter_init(&cs->fmeter);
@@ -1790,6 +1834,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
                update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
 
        number_of_cpusets--;
+       free_cpumask_var(cs->cpus_allowed);
        kfree(cs);
 }
 
@@ -1813,6 +1858,8 @@ struct cgroup_subsys cpuset_subsys = {
 
 int __init cpuset_init_early(void)
 {
+       alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed);
+
        top_cpuset.mems_generation = cpuset_mems_generation++;
        return 0;
 }
@@ -1828,7 +1875,7 @@ int __init cpuset_init(void)
 {
        int err = 0;
 
-       cpus_setall(top_cpuset.cpus_allowed);
+       cpumask_setall(top_cpuset.cpus_allowed);
        nodes_setall(top_cpuset.mems_allowed);
 
        fmeter_init(&top_cpuset.fmeter);
@@ -1840,6 +1887,9 @@ int __init cpuset_init(void)
        if (err < 0)
                return err;
 
+       if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
+               BUG();
+
        number_of_cpusets = 1;
        return 0;
 }
@@ -1914,7 +1964,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
         * has online cpus, so can't be empty).
         */
        parent = cs->parent;
-       while (cpus_empty(parent->cpus_allowed) ||
+       while (cpumask_empty(parent->cpus_allowed) ||
                        nodes_empty(parent->mems_allowed))
                parent = parent->parent;
 
@@ -1955,7 +2005,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
                }
 
                /* Continue past cpusets with all cpus, mems online */
-               if (cpus_subset(cp->cpus_allowed, cpu_online_map) &&
+               if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) &&
                    nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
                        continue;
 
@@ -1963,13 +2013,14 @@ static void scan_for_empty_cpusets(struct cpuset *root)
 
                /* Remove offline cpus and mems from this cpuset. */
                mutex_lock(&callback_mutex);
-               cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map);
+               cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
+                           cpu_online_mask);
                nodes_and(cp->mems_allowed, cp->mems_allowed,
                                                node_states[N_HIGH_MEMORY]);
                mutex_unlock(&callback_mutex);
 
                /* Move tasks from the empty cpuset to a parent */
-               if (cpus_empty(cp->cpus_allowed) ||
+               if (cpumask_empty(cp->cpus_allowed) ||
                     nodes_empty(cp->mems_allowed))
                        remove_tasks_in_empty_cpuset(cp);
                else {
@@ -1995,7 +2046,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
                                unsigned long phase, void *unused_cpu)
 {
        struct sched_domain_attr *attr;
-       cpumask_t *doms;
+       struct cpumask *doms;
        int ndoms;
 
        switch (phase) {
@@ -2010,7 +2061,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
        }
 
        cgroup_lock();
-       top_cpuset.cpus_allowed = cpu_online_map;
+       cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
        scan_for_empty_cpusets(&top_cpuset);
        ndoms = generate_sched_domains(&doms, &attr);
        cgroup_unlock();
@@ -2055,7 +2106,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
 
 void __init cpuset_init_smp(void)
 {
-       top_cpuset.cpus_allowed = cpu_online_map;
+       cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
        top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
 
        hotcpu_notifier(cpuset_track_online_cpus, 0);
@@ -2065,15 +2116,15 @@ void __init cpuset_init_smp(void)
 /**
  * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
  * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
- * @pmask: pointer to cpumask_t variable to receive cpus_allowed set.
+ * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
  *
- * Description: Returns the cpumask_t cpus_allowed of the cpuset
+ * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
  * attached to the specified @tsk.  Guaranteed to return some non-empty
  * subset of cpu_online_map, even if this means going outside the
  * tasks cpuset.
  **/
 
-void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
+void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
 {
        mutex_lock(&callback_mutex);
        cpuset_cpus_allowed_locked(tsk, pmask);
@@ -2084,7 +2135,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
  * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
  * Must be called with callback_mutex held.
  **/
-void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask)
+void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
 {
        task_lock(tsk);
        guarantee_online_cpus(task_cs(tsk), pmask);
index 7b8f2a78be3db0820c479cba2d83ea8f2562be46..4018308048cf8f52db8202cf5969c14a919830d1 100644 (file)
@@ -1126,12 +1126,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
        if (pid != &init_struct_pid) {
                retval = -ENOMEM;
-               pid = alloc_pid(task_active_pid_ns(p));
+               pid = alloc_pid(p->nsproxy->pid_ns);
                if (!pid)
                        goto bad_fork_cleanup_io;
 
                if (clone_flags & CLONE_NEWPID) {
-                       retval = pid_ns_prepare_proc(task_active_pid_ns(p));
+                       retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
                        if (retval < 0)
                                goto bad_fork_free_pid;
                }
index 43c2111cd54de719917c0cdc9ace9e92445f4513..78bc3fdac0d262f019d1c1c4caee95e9c26aa147 100644 (file)
@@ -13,7 +13,6 @@
 
 struct ns_cgroup {
        struct cgroup_subsys_state css;
-       spinlock_t lock;
 };
 
 struct cgroup_subsys ns_subsys;
@@ -84,7 +83,6 @@ static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss,
        ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL);
        if (!ns_cgroup)
                return ERR_PTR(-ENOMEM);
-       spin_lock_init(&ns_cgroup->lock);
        return &ns_cgroup->css;
 }
 
index af9224cdd6c0f4bf00f206879df9a4dcf5254d2c..1b3586fe753afc25a482f92e78b46483d90a369c 100644 (file)
@@ -474,6 +474,12 @@ pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 }
 EXPORT_SYMBOL(task_session_nr_ns);
 
+struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
+{
+       return ns_of_pid(task_pid(tsk));
+}
+EXPORT_SYMBOL_GPL(task_active_pid_ns);
+
 /*
  * Used by proc to find the first pid that is greater than or equal to nr.
  *
index f275c8eca772c4dfe6bcdda6b1253a65fefe3e83..bf8e7534c803d4e7708ed6929ddd54f00d12064b 100644 (file)
 #include <linux/uaccess.h>
 #include <linux/mm.h>
 
-void res_counter_init(struct res_counter *counter)
+void res_counter_init(struct res_counter *counter, struct res_counter *parent)
 {
        spin_lock_init(&counter->lock);
        counter->limit = (unsigned long long)LLONG_MAX;
+       counter->parent = parent;
 }
 
 int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
@@ -34,14 +35,34 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
        return 0;
 }
 
-int res_counter_charge(struct res_counter *counter, unsigned long val)
+int res_counter_charge(struct res_counter *counter, unsigned long val,
+                       struct res_counter **limit_fail_at)
 {
        int ret;
        unsigned long flags;
-
-       spin_lock_irqsave(&counter->lock, flags);
-       ret = res_counter_charge_locked(counter, val);
-       spin_unlock_irqrestore(&counter->lock, flags);
+       struct res_counter *c, *u;
+
+       *limit_fail_at = NULL;
+       local_irq_save(flags);
+       for (c = counter; c != NULL; c = c->parent) {
+               spin_lock(&c->lock);
+               ret = res_counter_charge_locked(c, val);
+               spin_unlock(&c->lock);
+               if (ret < 0) {
+                       *limit_fail_at = c;
+                       goto undo;
+               }
+       }
+       ret = 0;
+       goto done;
+undo:
+       for (u = counter; u != c; u = u->parent) {
+               spin_lock(&u->lock);
+               res_counter_uncharge_locked(u, val);
+               spin_unlock(&u->lock);
+       }
+done:
+       local_irq_restore(flags);
        return ret;
 }
 
@@ -56,10 +77,15 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
 void res_counter_uncharge(struct res_counter *counter, unsigned long val)
 {
        unsigned long flags;
+       struct res_counter *c;
 
-       spin_lock_irqsave(&counter->lock, flags);
-       res_counter_uncharge_locked(counter, val);
-       spin_unlock_irqrestore(&counter->lock, flags);
+       local_irq_save(flags);
+       for (c = counter; c != NULL; c = c->parent) {
+               spin_lock(&c->lock);
+               res_counter_uncharge_locked(c, val);
+               spin_unlock(&c->lock);
+       }
+       local_irq_restore(flags);
 }
 
 
index e0c0b4bc3f08e822d976fe2d6c222300a61c6352..8e1352c75557308bf1e892bb260b39e9feffd10f 100644 (file)
@@ -1617,8 +1617,6 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
        }
 }
 
-#define swap(a, b) do { typeof(a) tmp = (a); (a) = (b); (b) = tmp; } while (0)
-
 /*
  * Share the fairness runtime between parent and child, thus the
  * total amount of pressure for CPU stays equal - new tasks
index 6abbaf3d5858e587705881a4ab6d45a65b8f4e89..926d00429ed23901c288bc7b620a616ae90c2eff 100644 (file)
@@ -32,11 +32,11 @@ static void generic_swap(void *a, void *b, int size)
  * @base: pointer to data to sort
  * @num: number of elements
  * @size: size of each element
- * @cmp: pointer to comparison function
- * @swap: pointer to swap function or NULL
+ * @cmp_func: pointer to comparison function
+ * @swap_func: pointer to swap function or NULL
  *
  * This function does a heapsort on the given array. You may provide a
- * swap function optimized to your element type.
+ * swap_func function optimized to your element type.
  *
  * Sorting time is O(n log n) both on average and worst-case. While
  * qsort is about 20% faster on average, it suffers from exploitable
@@ -45,37 +45,39 @@ static void generic_swap(void *a, void *b, int size)
  */
 
 void sort(void *base, size_t num, size_t size,
-         int (*cmp)(const void *, const void *),
-         void (*swap)(void *, void *, int size))
+         int (*cmp_func)(const void *, const void *),
+         void (*swap_func)(void *, void *, int size))
 {
        /* pre-scale counters for performance */
        int i = (num/2 - 1) * size, n = num * size, c, r;
 
-       if (!swap)
-               swap = (size == 4 ? u32_swap : generic_swap);
+       if (!swap_func)
+               swap_func = (size == 4 ? u32_swap : generic_swap);
 
        /* heapify */
        for ( ; i >= 0; i -= size) {
                for (r = i; r * 2 + size < n; r  = c) {
                        c = r * 2 + size;
-                       if (c < n - size && cmp(base + c, base + c + size) < 0)
+                       if (c < n - size &&
+                                       cmp_func(base + c, base + c + size) < 0)
                                c += size;
-                       if (cmp(base + r, base + c) >= 0)
+                       if (cmp_func(base + r, base + c) >= 0)
                                break;
-                       swap(base + r, base + c, size);
+                       swap_func(base + r, base + c, size);
                }
        }
 
        /* sort */
        for (i = n - size; i > 0; i -= size) {
-               swap(base, base + i, size);
+               swap_func(base, base + i, size);
                for (r = 0; r * 2 + size < i; r = c) {
                        c = r * 2 + size;
-                       if (c < i - size && cmp(base + c, base + c + size) < 0)
+                       if (c < i - size &&
+                                       cmp_func(base + c, base + c + size) < 0)
                                c += size;
-                       if (cmp(base + r, base + c) >= 0)
+                       if (cmp_func(base + r, base + c) >= 0)
                                break;
-                       swap(base + r, base + c, size);
+                       swap_func(base + r, base + c, size);
                }
        }
 }
index 2f55a1e2baf75db8ea692a7e093aaded51147369..ceba0bd0366261740b27c8f19785ae8857eeb8c8 100644 (file)
@@ -460,7 +460,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
        VM_BUG_ON(!PageLocked(page));
 
        error = mem_cgroup_cache_charge(page, current->mm,
-                                       gfp_mask & ~__GFP_HIGHMEM);
+                                       gfp_mask & GFP_RECLAIM_MASK);
        if (error)
                goto out;
 
index 51ee965455798e63af02d392e44c09be8436d60d..e2996b80601f8fd20a3bb9d1216ebf3017d11afa 100644 (file)
 #include <linux/memcontrol.h>
 #include <linux/cgroup.h>
 #include <linux/mm.h>
+#include <linux/pagemap.h>
 #include <linux/smp.h>
 #include <linux/page-flags.h>
 #include <linux/backing-dev.h>
 #include <linux/bit_spinlock.h>
 #include <linux/rcupdate.h>
+#include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
 #include <linux/mm_inline.h>
 #include <linux/page_cgroup.h>
+#include "internal.h"
 
 #include <asm/uaccess.h>
 
 struct cgroup_subsys mem_cgroup_subsys __read_mostly;
 #define MEM_CGROUP_RECLAIM_RETRIES     5
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+/* Turned on only when memory cgroup is enabled && really_do_swap_account = 0 */
+int do_swap_account __read_mostly;
+static int really_do_swap_account __initdata = 1; /* for remember boot option*/
+#else
+#define do_swap_account                (0)
+#endif
+
+static DEFINE_MUTEX(memcg_tasklist);   /* can be hold under cgroup_mutex */
+
 /*
  * Statistics for memory cgroup.
  */
@@ -60,7 +73,7 @@ struct mem_cgroup_stat_cpu {
 } ____cacheline_aligned_in_smp;
 
 struct mem_cgroup_stat {
-       struct mem_cgroup_stat_cpu cpustat[NR_CPUS];
+       struct mem_cgroup_stat_cpu cpustat[0];
 };
 
 /*
@@ -89,9 +102,10 @@ struct mem_cgroup_per_zone {
        /*
         * spin_lock to protect the per cgroup LRU
         */
-       spinlock_t              lru_lock;
        struct list_head        lists[NR_LRU_LISTS];
        unsigned long           count[NR_LRU_LISTS];
+
+       struct zone_reclaim_stat reclaim_stat;
 };
 /* Macro for accessing counter */
 #define MEM_CGROUP_ZSTAT(mz, idx)      ((mz)->count[(idx)])
@@ -121,45 +135,74 @@ struct mem_cgroup {
         * the counter to account for memory usage
         */
        struct res_counter res;
+       /*
+        * the counter to account for mem+swap usage.
+        */
+       struct res_counter memsw;
        /*
         * Per cgroup active and inactive list, similar to the
         * per zone LRU lists.
         */
        struct mem_cgroup_lru_info info;
 
+       /*
+         protect against reclaim related member.
+       */
+       spinlock_t reclaim_param_lock;
+
        int     prev_priority;  /* for recording reclaim priority */
+
        /*
-        * statistics.
+        * While reclaiming in a hiearchy, we cache the last child we
+        * reclaimed from. Protected by hierarchy_mutex
+        */
+       struct mem_cgroup *last_scanned_child;
+       /*
+        * Should the accounting and control be hierarchical, per subtree?
+        */
+       bool use_hierarchy;
+       unsigned long   last_oom_jiffies;
+       atomic_t        refcnt;
+
+       unsigned int    swappiness;
+
+       /*
+        * statistics. This must be placed at the end of memcg.
         */
        struct mem_cgroup_stat stat;
 };
-static struct mem_cgroup init_mem_cgroup;
 
 enum charge_type {
        MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
        MEM_CGROUP_CHARGE_TYPE_MAPPED,
        MEM_CGROUP_CHARGE_TYPE_SHMEM,   /* used by page migration of shmem */
        MEM_CGROUP_CHARGE_TYPE_FORCE,   /* used by force_empty */
+       MEM_CGROUP_CHARGE_TYPE_SWAPOUT, /* for accounting swapcache */
        NR_CHARGE_TYPE,
 };
 
 /* only for here (for easy reading.) */
 #define PCGF_CACHE     (1UL << PCG_CACHE)
 #define PCGF_USED      (1UL << PCG_USED)
-#define PCGF_ACTIVE    (1UL << PCG_ACTIVE)
 #define PCGF_LOCK      (1UL << PCG_LOCK)
-#define PCGF_FILE      (1UL << PCG_FILE)
 static const unsigned long
 pcg_default_flags[NR_CHARGE_TYPE] = {
-       PCGF_CACHE | PCGF_FILE | PCGF_USED | PCGF_LOCK, /* File Cache */
-       PCGF_ACTIVE | PCGF_USED | PCGF_LOCK, /* Anon */
-       PCGF_ACTIVE | PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
+       PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */
+       PCGF_USED | PCGF_LOCK, /* Anon */
+       PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
        0, /* FORCE */
 };
 
-/*
- * Always modified under lru lock. Then, not necessary to preempt_disable()
- */
+/* for encoding cft->private value on file */
+#define _MEM                   (0)
+#define _MEMSWAP               (1)
+#define MEMFILE_PRIVATE(x, val)        (((x) << 16) | (val))
+#define MEMFILE_TYPE(val)      (((val) >> 16) & 0xffff)
+#define MEMFILE_ATTR(val)      ((val) & 0xffff)
+
+static void mem_cgroup_get(struct mem_cgroup *mem);
+static void mem_cgroup_put(struct mem_cgroup *mem);
+
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
                                         struct page_cgroup *pc,
                                         bool charge)
@@ -167,10 +210,9 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
        int val = (charge)? 1 : -1;
        struct mem_cgroup_stat *stat = &mem->stat;
        struct mem_cgroup_stat_cpu *cpustat;
+       int cpu = get_cpu();
 
-       VM_BUG_ON(!irqs_disabled());
-
-       cpustat = &stat->cpustat[smp_processor_id()];
+       cpustat = &stat->cpustat[cpu];
        if (PageCgroupCache(pc))
                __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val);
        else
@@ -182,6 +224,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
        else
                __mem_cgroup_stat_add_safe(cpustat,
                                MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
+       put_cpu();
 }
 
 static struct mem_cgroup_per_zone *
@@ -197,6 +240,9 @@ page_cgroup_zoneinfo(struct page_cgroup *pc)
        int nid = page_cgroup_nid(pc);
        int zid = page_cgroup_zid(pc);
 
+       if (!mem)
+               return NULL;
+
        return mem_cgroup_zoneinfo(mem, nid, zid);
 }
 
@@ -236,77 +282,152 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
                                struct mem_cgroup, css);
 }
 
-static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
-                       struct page_cgroup *pc)
+static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
 {
-       int lru = LRU_BASE;
+       struct mem_cgroup *mem = NULL;
+       /*
+        * Because we have no locks, mm->owner's may be being moved to other
+        * cgroup. We use css_tryget() here even if this looks
+        * pessimistic (rather than adding locks here).
+        */
+       rcu_read_lock();
+       do {
+               mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+               if (unlikely(!mem))
+                       break;
+       } while (!css_tryget(&mem->css));
+       rcu_read_unlock();
+       return mem;
+}
 
-       if (PageCgroupUnevictable(pc))
-               lru = LRU_UNEVICTABLE;
-       else {
-               if (PageCgroupActive(pc))
-                       lru += LRU_ACTIVE;
-               if (PageCgroupFile(pc))
-                       lru += LRU_FILE;
-       }
+static bool mem_cgroup_is_obsolete(struct mem_cgroup *mem)
+{
+       if (!mem)
+               return true;
+       return css_is_removed(&mem->css);
+}
 
-       MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+/*
+ * Following LRU functions are allowed to be used without PCG_LOCK.
+ * Operations are called by routine of global LRU independently from memcg.
+ * What we have to take care of here is validness of pc->mem_cgroup.
+ *
+ * Changes to pc->mem_cgroup happens when
+ * 1. charge
+ * 2. moving account
+ * In typical case, "charge" is done before add-to-lru. Exception is SwapCache.
+ * It is added to LRU before charge.
+ * If PCG_USED bit is not set, page_cgroup is not added to this private LRU.
+ * When moving account, the page is not on LRU. It's isolated.
+ */
 
-       mem_cgroup_charge_statistics(pc->mem_cgroup, pc, false);
-       list_del(&pc->lru);
+void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
+{
+       struct page_cgroup *pc;
+       struct mem_cgroup *mem;
+       struct mem_cgroup_per_zone *mz;
+
+       if (mem_cgroup_disabled())
+               return;
+       pc = lookup_page_cgroup(page);
+       /* can happen while we handle swapcache. */
+       if (list_empty(&pc->lru) || !pc->mem_cgroup)
+               return;
+       /*
+        * We don't check PCG_USED bit. It's cleared when the "page" is finally
+        * removed from global LRU.
+        */
+       mz = page_cgroup_zoneinfo(pc);
+       mem = pc->mem_cgroup;
+       MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+       list_del_init(&pc->lru);
+       return;
 }
 
-static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
-                               struct page_cgroup *pc)
+void mem_cgroup_del_lru(struct page *page)
 {
-       int lru = LRU_BASE;
+       mem_cgroup_del_lru_list(page, page_lru(page));
+}
 
-       if (PageCgroupUnevictable(pc))
-               lru = LRU_UNEVICTABLE;
-       else {
-               if (PageCgroupActive(pc))
-                       lru += LRU_ACTIVE;
-               if (PageCgroupFile(pc))
-                       lru += LRU_FILE;
-       }
+void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
+{
+       struct mem_cgroup_per_zone *mz;
+       struct page_cgroup *pc;
 
-       MEM_CGROUP_ZSTAT(mz, lru) += 1;
-       list_add(&pc->lru, &mz->lists[lru]);
+       if (mem_cgroup_disabled())
+               return;
 
-       mem_cgroup_charge_statistics(pc->mem_cgroup, pc, true);
+       pc = lookup_page_cgroup(page);
+       smp_rmb();
+       /* unused page is not rotated. */
+       if (!PageCgroupUsed(pc))
+               return;
+       mz = page_cgroup_zoneinfo(pc);
+       list_move(&pc->lru, &mz->lists[lru]);
 }
 
-static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru)
+void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
 {
-       struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
-       int active    = PageCgroupActive(pc);
-       int file      = PageCgroupFile(pc);
-       int unevictable = PageCgroupUnevictable(pc);
-       enum lru_list from = unevictable ? LRU_UNEVICTABLE :
-                               (LRU_FILE * !!file + !!active);
+       struct page_cgroup *pc;
+       struct mem_cgroup_per_zone *mz;
 
-       if (lru == from)
+       if (mem_cgroup_disabled())
+               return;
+       pc = lookup_page_cgroup(page);
+       /* barrier to sync with "charge" */
+       smp_rmb();
+       if (!PageCgroupUsed(pc))
                return;
 
-       MEM_CGROUP_ZSTAT(mz, from) -= 1;
+       mz = page_cgroup_zoneinfo(pc);
+       MEM_CGROUP_ZSTAT(mz, lru) += 1;
+       list_add(&pc->lru, &mz->lists[lru]);
+}
+
+/*
+ * At handling SwapCache, pc->mem_cgroup may be changed while it's linked to
+ * lru because the page may.be reused after it's fully uncharged (because of
+ * SwapCache behavior).To handle that, unlink page_cgroup from LRU when charge
+ * it again. This function is only used to charge SwapCache. It's done under
+ * lock_page and expected that zone->lru_lock is never held.
+ */
+static void mem_cgroup_lru_del_before_commit_swapcache(struct page *page)
+{
+       unsigned long flags;
+       struct zone *zone = page_zone(page);
+       struct page_cgroup *pc = lookup_page_cgroup(page);
+
+       spin_lock_irqsave(&zone->lru_lock, flags);
        /*
-        * However this is done under mz->lru_lock, another flags, which
-        * are not related to LRU, will be modified from out-of-lock.
-        * We have to use atomic set/clear flags.
+        * Forget old LRU when this page_cgroup is *not* used. This Used bit
+        * is guarded by lock_page() because the page is SwapCache.
         */
-       if (is_unevictable_lru(lru)) {
-               ClearPageCgroupActive(pc);
-               SetPageCgroupUnevictable(pc);
-       } else {
-               if (is_active_lru(lru))
-                       SetPageCgroupActive(pc);
-               else
-                       ClearPageCgroupActive(pc);
-               ClearPageCgroupUnevictable(pc);
-       }
+       if (!PageCgroupUsed(pc))
+               mem_cgroup_del_lru_list(page, page_lru(page));
+       spin_unlock_irqrestore(&zone->lru_lock, flags);
+}
 
-       MEM_CGROUP_ZSTAT(mz, lru) += 1;
-       list_move(&pc->lru, &mz->lists[lru]);
+static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page)
+{
+       unsigned long flags;
+       struct zone *zone = page_zone(page);
+       struct page_cgroup *pc = lookup_page_cgroup(page);
+
+       spin_lock_irqsave(&zone->lru_lock, flags);
+       /* link when the page is linked to LRU but page_cgroup isn't */
+       if (PageLRU(page) && list_empty(&pc->lru))
+               mem_cgroup_add_lru_list(page, page_lru(page));
+       spin_unlock_irqrestore(&zone->lru_lock, flags);
+}
+
+
+void mem_cgroup_move_lists(struct page *page,
+                          enum lru_list from, enum lru_list to)
+{
+       if (mem_cgroup_disabled())
+               return;
+       mem_cgroup_del_lru_list(page, from);
+       mem_cgroup_add_lru_list(page, to);
 }
 
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
@@ -319,37 +440,6 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
        return ret;
 }
 
-/*
- * This routine assumes that the appropriate zone's lru lock is already held
- */
-void mem_cgroup_move_lists(struct page *page, enum lru_list lru)
-{
-       struct page_cgroup *pc;
-       struct mem_cgroup_per_zone *mz;
-       unsigned long flags;
-
-       if (mem_cgroup_subsys.disabled)
-               return;
-
-       /*
-        * We cannot lock_page_cgroup while holding zone's lru_lock,
-        * because other holders of lock_page_cgroup can be interrupted
-        * with an attempt to rotate_reclaimable_page.  But we cannot
-        * safely get to page_cgroup without it, so just try_lock it:
-        * mem_cgroup_isolate_pages allows for page left on wrong list.
-        */
-       pc = lookup_page_cgroup(page);
-       if (!trylock_page_cgroup(pc))
-               return;
-       if (pc && PageCgroupUsed(pc)) {
-               mz = page_cgroup_zoneinfo(pc);
-               spin_lock_irqsave(&mz->lru_lock, flags);
-               __mem_cgroup_move_lists(pc, lru);
-               spin_unlock_irqrestore(&mz->lru_lock, flags);
-       }
-       unlock_page_cgroup(pc);
-}
-
 /*
  * Calculate mapped_ratio under memory controller. This will be used in
  * vmscan.c for deteremining we have to reclaim mapped pages.
@@ -372,39 +462,108 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
  */
 int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
 {
-       return mem->prev_priority;
+       int prev_priority;
+
+       spin_lock(&mem->reclaim_param_lock);
+       prev_priority = mem->prev_priority;
+       spin_unlock(&mem->reclaim_param_lock);
+
+       return prev_priority;
 }
 
 void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority)
 {
+       spin_lock(&mem->reclaim_param_lock);
        if (priority < mem->prev_priority)
                mem->prev_priority = priority;
+       spin_unlock(&mem->reclaim_param_lock);
 }
 
 void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority)
 {
+       spin_lock(&mem->reclaim_param_lock);
        mem->prev_priority = priority;
+       spin_unlock(&mem->reclaim_param_lock);
 }
 
-/*
- * Calculate # of pages to be scanned in this priority/zone.
- * See also vmscan.c
- *
- * priority starts from "DEF_PRIORITY" and decremented in each loop.
- * (see include/linux/mmzone.h)
- */
+static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages)
+{
+       unsigned long active;
+       unsigned long inactive;
+       unsigned long gb;
+       unsigned long inactive_ratio;
+
+       inactive = mem_cgroup_get_all_zonestat(memcg, LRU_INACTIVE_ANON);
+       active = mem_cgroup_get_all_zonestat(memcg, LRU_ACTIVE_ANON);
+
+       gb = (inactive + active) >> (30 - PAGE_SHIFT);
+       if (gb)
+               inactive_ratio = int_sqrt(10 * gb);
+       else
+               inactive_ratio = 1;
+
+       if (present_pages) {
+               present_pages[0] = inactive;
+               present_pages[1] = active;
+       }
+
+       return inactive_ratio;
+}
 
-long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
-                                       int priority, enum lru_list lru)
+int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
+{
+       unsigned long active;
+       unsigned long inactive;
+       unsigned long present_pages[2];
+       unsigned long inactive_ratio;
+
+       inactive_ratio = calc_inactive_ratio(memcg, present_pages);
+
+       inactive = present_pages[0];
+       active = present_pages[1];
+
+       if (inactive * inactive_ratio < active)
+               return 1;
+
+       return 0;
+}
+
+unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
+                                      struct zone *zone,
+                                      enum lru_list lru)
 {
-       long nr_pages;
        int nid = zone->zone_pgdat->node_id;
        int zid = zone_idx(zone);
-       struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid);
+       struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid);
+
+       return MEM_CGROUP_ZSTAT(mz, lru);
+}
 
-       nr_pages = MEM_CGROUP_ZSTAT(mz, lru);
+struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
+                                                     struct zone *zone)
+{
+       int nid = zone->zone_pgdat->node_id;
+       int zid = zone_idx(zone);
+       struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid);
 
-       return (nr_pages >> priority);
+       return &mz->reclaim_stat;
+}
+
+struct zone_reclaim_stat *
+mem_cgroup_get_reclaim_stat_from_page(struct page *page)
+{
+       struct page_cgroup *pc;
+       struct mem_cgroup_per_zone *mz;
+
+       if (mem_cgroup_disabled())
+               return NULL;
+
+       pc = lookup_page_cgroup(page);
+       mz = page_cgroup_zoneinfo(pc);
+       if (!mz)
+               return NULL;
+
+       return &mz->reclaim_stat;
 }
 
 unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
@@ -429,95 +588,281 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
        mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
        src = &mz->lists[lru];
 
-       spin_lock(&mz->lru_lock);
        scan = 0;
        list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
                if (scan >= nr_to_scan)
                        break;
+
+               page = pc->page;
                if (unlikely(!PageCgroupUsed(pc)))
                        continue;
-               page = pc->page;
-
                if (unlikely(!PageLRU(page)))
                        continue;
 
-               /*
-                * TODO: play better with lumpy reclaim, grabbing anything.
-                */
-               if (PageUnevictable(page) ||
-                   (PageActive(page) && !active) ||
-                   (!PageActive(page) && active)) {
-                       __mem_cgroup_move_lists(pc, page_lru(page));
-                       continue;
-               }
-
                scan++;
-               list_move(&pc->lru, &pc_list);
-
                if (__isolate_lru_page(page, mode, file) == 0) {
                        list_move(&page->lru, dst);
                        nr_taken++;
                }
        }
 
-       list_splice(&pc_list, src);
-       spin_unlock(&mz->lru_lock);
-
        *scanned = scan;
        return nr_taken;
 }
 
+#define mem_cgroup_from_res_counter(counter, member)   \
+       container_of(counter, struct mem_cgroup, member)
+
 /*
- * Charge the memory controller for page usage.
- * Return
- * 0 if the charge was successful
- * < 0 if the cgroup is over its limit
+ * This routine finds the DFS walk successor. This routine should be
+ * called with hierarchy_mutex held
  */
-static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
-                               gfp_t gfp_mask, enum charge_type ctype,
-                               struct mem_cgroup *memcg)
+static struct mem_cgroup *
+mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
+{
+       struct cgroup *cgroup, *curr_cgroup, *root_cgroup;
+
+       curr_cgroup = curr->css.cgroup;
+       root_cgroup = root_mem->css.cgroup;
+
+       if (!list_empty(&curr_cgroup->children)) {
+               /*
+                * Walk down to children
+                */
+               mem_cgroup_put(curr);
+               cgroup = list_entry(curr_cgroup->children.next,
+                                               struct cgroup, sibling);
+               curr = mem_cgroup_from_cont(cgroup);
+               mem_cgroup_get(curr);
+               goto done;
+       }
+
+visit_parent:
+       if (curr_cgroup == root_cgroup) {
+               mem_cgroup_put(curr);
+               curr = root_mem;
+               mem_cgroup_get(curr);
+               goto done;
+       }
+
+       /*
+        * Goto next sibling
+        */
+       if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) {
+               mem_cgroup_put(curr);
+               cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup,
+                                               sibling);
+               curr = mem_cgroup_from_cont(cgroup);
+               mem_cgroup_get(curr);
+               goto done;
+       }
+
+       /*
+        * Go up to next parent and next parent's sibling if need be
+        */
+       curr_cgroup = curr_cgroup->parent;
+       goto visit_parent;
+
+done:
+       root_mem->last_scanned_child = curr;
+       return curr;
+}
+
+/*
+ * Visit the first child (need not be the first child as per the ordering
+ * of the cgroup list, since we track last_scanned_child) of @mem and use
+ * that to reclaim free pages from.
+ */
+static struct mem_cgroup *
+mem_cgroup_get_first_node(struct mem_cgroup *root_mem)
+{
+       struct cgroup *cgroup;
+       struct mem_cgroup *ret;
+       bool obsolete;
+
+       obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child);
+
+       /*
+        * Scan all children under the mem_cgroup mem
+        */
+       mutex_lock(&mem_cgroup_subsys.hierarchy_mutex);
+       if (list_empty(&root_mem->css.cgroup->children)) {
+               ret = root_mem;
+               goto done;
+       }
+
+       if (!root_mem->last_scanned_child || obsolete) {
+
+               if (obsolete && root_mem->last_scanned_child)
+                       mem_cgroup_put(root_mem->last_scanned_child);
+
+               cgroup = list_first_entry(&root_mem->css.cgroup->children,
+                               struct cgroup, sibling);
+               ret = mem_cgroup_from_cont(cgroup);
+               mem_cgroup_get(ret);
+       } else
+               ret = mem_cgroup_get_next_node(root_mem->last_scanned_child,
+                                               root_mem);
+
+done:
+       root_mem->last_scanned_child = ret;
+       mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
+       return ret;
+}
+
+static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
+{
+       if (do_swap_account) {
+               if (res_counter_check_under_limit(&mem->res) &&
+                       res_counter_check_under_limit(&mem->memsw))
+                       return true;
+       } else
+               if (res_counter_check_under_limit(&mem->res))
+                       return true;
+       return false;
+}
+
+static unsigned int get_swappiness(struct mem_cgroup *memcg)
+{
+       struct cgroup *cgrp = memcg->css.cgroup;
+       unsigned int swappiness;
+
+       /* root ? */
+       if (cgrp->parent == NULL)
+               return vm_swappiness;
+
+       spin_lock(&memcg->reclaim_param_lock);
+       swappiness = memcg->swappiness;
+       spin_unlock(&memcg->reclaim_param_lock);
+
+       return swappiness;
+}
+
+/*
+ * Dance down the hierarchy if needed to reclaim memory. We remember the
+ * last child we reclaimed from, so that we don't end up penalizing
+ * one child extensively based on its position in the children list.
+ *
+ * root_mem is the original ancestor that we've been reclaim from.
+ */
+static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
+                                               gfp_t gfp_mask, bool noswap)
+{
+       struct mem_cgroup *next_mem;
+       int ret = 0;
+
+       /*
+        * Reclaim unconditionally and don't check for return value.
+        * We need to reclaim in the current group and down the tree.
+        * One might think about checking for children before reclaiming,
+        * but there might be left over accounting, even after children
+        * have left.
+        */
+       ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap,
+                                          get_swappiness(root_mem));
+       if (mem_cgroup_check_under_limit(root_mem))
+               return 0;
+       if (!root_mem->use_hierarchy)
+               return ret;
+
+       next_mem = mem_cgroup_get_first_node(root_mem);
+
+       while (next_mem != root_mem) {
+               if (mem_cgroup_is_obsolete(next_mem)) {
+                       mem_cgroup_put(next_mem);
+                       next_mem = mem_cgroup_get_first_node(root_mem);
+                       continue;
+               }
+               ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,
+                                                  get_swappiness(next_mem));
+               if (mem_cgroup_check_under_limit(root_mem))
+                       return 0;
+               mutex_lock(&mem_cgroup_subsys.hierarchy_mutex);
+               next_mem = mem_cgroup_get_next_node(next_mem, root_mem);
+               mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
+       }
+       return ret;
+}
+
+bool mem_cgroup_oom_called(struct task_struct *task)
 {
+       bool ret = false;
        struct mem_cgroup *mem;
-       struct page_cgroup *pc;
-       unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
-       struct mem_cgroup_per_zone *mz;
-       unsigned long flags;
+       struct mm_struct *mm;
 
-       pc = lookup_page_cgroup(page);
-       /* can happen at boot */
-       if (unlikely(!pc))
+       rcu_read_lock();
+       mm = task->mm;
+       if (!mm)
+               mm = &init_mm;
+       mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+       if (mem && time_before(jiffies, mem->last_oom_jiffies + HZ/10))
+               ret = true;
+       rcu_read_unlock();
+       return ret;
+}
+/*
+ * Unlike exported interface, "oom" parameter is added. if oom==true,
+ * oom-killer can be invoked.
+ */
+static int __mem_cgroup_try_charge(struct mm_struct *mm,
+                       gfp_t gfp_mask, struct mem_cgroup **memcg,
+                       bool oom)
+{
+       struct mem_cgroup *mem, *mem_over_limit;
+       int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
+       struct res_counter *fail_res;
+
+       if (unlikely(test_thread_flag(TIF_MEMDIE))) {
+               /* Don't account this! */
+               *memcg = NULL;
                return 0;
-       prefetchw(pc);
+       }
+
        /*
         * We always charge the cgroup the mm_struct belongs to.
         * The mm_struct's mem_cgroup changes on task migration if the
         * thread group leader migrates. It's possible that mm is not
         * set, if so charge the init_mm (happens for pagecache usage).
         */
-
-       if (likely(!memcg)) {
-               rcu_read_lock();
-               mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
-               if (unlikely(!mem)) {
-                       rcu_read_unlock();
-                       return 0;
-               }
-               /*
-                * For every charge from the cgroup, increment reference count
-                */
-               css_get(&mem->css);
-               rcu_read_unlock();
+       mem = *memcg;
+       if (likely(!mem)) {
+               mem = try_get_mem_cgroup_from_mm(mm);
+               *memcg = mem;
        } else {
-               mem = memcg;
-               css_get(&memcg->css);
+               css_get(&mem->css);
        }
+       if (unlikely(!mem))
+               return 0;
+
+       VM_BUG_ON(mem_cgroup_is_obsolete(mem));
+
+       while (1) {
+               int ret;
+               bool noswap = false;
+
+               ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
+               if (likely(!ret)) {
+                       if (!do_swap_account)
+                               break;
+                       ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
+                                                       &fail_res);
+                       if (likely(!ret))
+                               break;
+                       /* mem+swap counter fails */
+                       res_counter_uncharge(&mem->res, PAGE_SIZE);
+                       noswap = true;
+                       mem_over_limit = mem_cgroup_from_res_counter(fail_res,
+                                                                       memsw);
+               } else
+                       /* mem counter fails */
+                       mem_over_limit = mem_cgroup_from_res_counter(fail_res,
+                                                                       res);
 
-       while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) {
                if (!(gfp_mask & __GFP_WAIT))
-                       goto out;
+                       goto nomem;
 
-               if (try_to_free_mem_cgroup_pages(mem, gfp_mask))
-                       continue;
+               ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
+                                                       noswap);
 
                /*
                 * try_to_free_mem_cgroup_pages() might not give us a full
@@ -525,49 +870,214 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
                 * moved to swap cache or just unmapped from the cgroup.
                 * Check the limit again to see if the reclaim reduced the
                 * current usage of the cgroup before giving up
+                *
                 */
-               if (res_counter_check_under_limit(&mem->res))
+               if (mem_cgroup_check_under_limit(mem_over_limit))
                        continue;
 
                if (!nr_retries--) {
-                       mem_cgroup_out_of_memory(mem, gfp_mask);
-                       goto out;
+                       if (oom) {
+                               mutex_lock(&memcg_tasklist);
+                               mem_cgroup_out_of_memory(mem_over_limit, gfp_mask);
+                               mutex_unlock(&memcg_tasklist);
+                               mem_over_limit->last_oom_jiffies = jiffies;
+                       }
+                       goto nomem;
                }
        }
+       return 0;
+nomem:
+       css_put(&mem->css);
+       return -ENOMEM;
+}
 
+static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
+{
+       struct mem_cgroup *mem;
+       swp_entry_t ent;
+
+       if (!PageSwapCache(page))
+               return NULL;
+
+       ent.val = page_private(page);
+       mem = lookup_swap_cgroup(ent);
+       if (!mem)
+               return NULL;
+       if (!css_tryget(&mem->css))
+               return NULL;
+       return mem;
+}
+
+/*
+ * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be
+ * USED state. If already USED, uncharge and return.
+ */
+
+static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
+                                    struct page_cgroup *pc,
+                                    enum charge_type ctype)
+{
+       /* try_charge() can return NULL to *memcg, taking care of it. */
+       if (!mem)
+               return;
 
        lock_page_cgroup(pc);
        if (unlikely(PageCgroupUsed(pc))) {
                unlock_page_cgroup(pc);
                res_counter_uncharge(&mem->res, PAGE_SIZE);
+               if (do_swap_account)
+                       res_counter_uncharge(&mem->memsw, PAGE_SIZE);
                css_put(&mem->css);
-
-               goto done;
+               return;
        }
        pc->mem_cgroup = mem;
-       /*
-        * If a page is accounted as a page cache, insert to inactive list.
-        * If anon, insert to active list.
-        */
+       smp_wmb();
        pc->flags = pcg_default_flags[ctype];
 
-       mz = page_cgroup_zoneinfo(pc);
+       mem_cgroup_charge_statistics(mem, pc, true);
 
-       spin_lock_irqsave(&mz->lru_lock, flags);
-       __mem_cgroup_add_list(mz, pc);
-       spin_unlock_irqrestore(&mz->lru_lock, flags);
        unlock_page_cgroup(pc);
+}
 
-done:
-       return 0;
+/**
+ * mem_cgroup_move_account - move account of the page
+ * @pc:        page_cgroup of the page.
+ * @from: mem_cgroup which the page is moved from.
+ * @to:        mem_cgroup which the page is moved to. @from != @to.
+ *
+ * The caller must confirm following.
+ * - page is not on LRU (isolate_page() is useful.)
+ *
+ * returns 0 at success,
+ * returns -EBUSY when lock is busy or "pc" is unstable.
+ *
+ * This function does "uncharge" from old cgroup but doesn't do "charge" to
+ * new cgroup. It should be done by a caller.
+ */
+
+static int mem_cgroup_move_account(struct page_cgroup *pc,
+       struct mem_cgroup *from, struct mem_cgroup *to)
+{
+       struct mem_cgroup_per_zone *from_mz, *to_mz;
+       int nid, zid;
+       int ret = -EBUSY;
+
+       VM_BUG_ON(from == to);
+       VM_BUG_ON(PageLRU(pc->page));
+
+       nid = page_cgroup_nid(pc);
+       zid = page_cgroup_zid(pc);
+       from_mz =  mem_cgroup_zoneinfo(from, nid, zid);
+       to_mz =  mem_cgroup_zoneinfo(to, nid, zid);
+
+       if (!trylock_page_cgroup(pc))
+               return ret;
+
+       if (!PageCgroupUsed(pc))
+               goto out;
+
+       if (pc->mem_cgroup != from)
+               goto out;
+
+       css_put(&from->css);
+       res_counter_uncharge(&from->res, PAGE_SIZE);
+       mem_cgroup_charge_statistics(from, pc, false);
+       if (do_swap_account)
+               res_counter_uncharge(&from->memsw, PAGE_SIZE);
+       pc->mem_cgroup = to;
+       mem_cgroup_charge_statistics(to, pc, true);
+       css_get(&to->css);
+       ret = 0;
 out:
-       css_put(&mem->css);
-       return -ENOMEM;
+       unlock_page_cgroup(pc);
+       return ret;
+}
+
+/*
+ * move charges to its parent.
+ */
+
+static int mem_cgroup_move_parent(struct page_cgroup *pc,
+                                 struct mem_cgroup *child,
+                                 gfp_t gfp_mask)
+{
+       struct page *page = pc->page;
+       struct cgroup *cg = child->css.cgroup;
+       struct cgroup *pcg = cg->parent;
+       struct mem_cgroup *parent;
+       int ret;
+
+       /* Is ROOT ? */
+       if (!pcg)
+               return -EINVAL;
+
+
+       parent = mem_cgroup_from_cont(pcg);
+
+
+       ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false);
+       if (ret || !parent)
+               return ret;
+
+       if (!get_page_unless_zero(page))
+               return -EBUSY;
+
+       ret = isolate_lru_page(page);
+
+       if (ret)
+               goto cancel;
+
+       ret = mem_cgroup_move_account(pc, child, parent);
+
+       /* drop extra refcnt by try_charge() (move_account increment one) */
+       css_put(&parent->css);
+       putback_lru_page(page);
+       if (!ret) {
+               put_page(page);
+               return 0;
+       }
+       /* uncharge if move fails */
+cancel:
+       res_counter_uncharge(&parent->res, PAGE_SIZE);
+       if (do_swap_account)
+               res_counter_uncharge(&parent->memsw, PAGE_SIZE);
+       put_page(page);
+       return ret;
+}
+
+/*
+ * Charge the memory controller for page usage.
+ * Return
+ * 0 if the charge was successful
+ * < 0 if the cgroup is over its limit
+ */
+static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
+                               gfp_t gfp_mask, enum charge_type ctype,
+                               struct mem_cgroup *memcg)
+{
+       struct mem_cgroup *mem;
+       struct page_cgroup *pc;
+       int ret;
+
+       pc = lookup_page_cgroup(page);
+       /* can happen at boot */
+       if (unlikely(!pc))
+               return 0;
+       prefetchw(pc);
+
+       mem = memcg;
+       ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true);
+       if (ret || !mem)
+               return ret;
+
+       __mem_cgroup_commit_charge(mem, pc, ctype);
+       return 0;
 }
 
-int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
+int mem_cgroup_newpage_charge(struct page *page,
+                             struct mm_struct *mm, gfp_t gfp_mask)
 {
-       if (mem_cgroup_subsys.disabled)
+       if (mem_cgroup_disabled())
                return 0;
        if (PageCompound(page))
                return 0;
@@ -589,7 +1099,10 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
                                gfp_t gfp_mask)
 {
-       if (mem_cgroup_subsys.disabled)
+       struct mem_cgroup *mem = NULL;
+       int ret;
+
+       if (mem_cgroup_disabled())
                return 0;
        if (PageCompound(page))
                return 0;
@@ -601,6 +1114,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
         * For GFP_NOWAIT case, the page may be pre-charged before calling
         * add_to_page_cache(). (See shmem.c) check it here and avoid to call
         * charge twice. (It works but has to pay a bit larger cost.)
+        * And when the page is SwapCache, it should take swap information
+        * into account. This is under lock_page() now.
         */
        if (!(gfp_mask & __GFP_WAIT)) {
                struct page_cgroup *pc;
@@ -617,58 +1132,198 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
                unlock_page_cgroup(pc);
        }
 
-       if (unlikely(!mm))
+       if (do_swap_account && PageSwapCache(page)) {
+               mem = try_get_mem_cgroup_from_swapcache(page);
+               if (mem)
+                       mm = NULL;
+                 else
+                       mem = NULL;
+               /* SwapCache may be still linked to LRU now. */
+               mem_cgroup_lru_del_before_commit_swapcache(page);
+       }
+
+       if (unlikely(!mm && !mem))
                mm = &init_mm;
 
        if (page_is_file_cache(page))
                return mem_cgroup_charge_common(page, mm, gfp_mask,
                                MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
-       else
-               return mem_cgroup_charge_common(page, mm, gfp_mask,
-                               MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);
+
+       ret = mem_cgroup_charge_common(page, mm, gfp_mask,
+                               MEM_CGROUP_CHARGE_TYPE_SHMEM, mem);
+       if (mem)
+               css_put(&mem->css);
+       if (PageSwapCache(page))
+               mem_cgroup_lru_add_after_commit_swapcache(page);
+
+       if (do_swap_account && !ret && PageSwapCache(page)) {
+               swp_entry_t ent = {.val = page_private(page)};
+               /* avoid double counting */
+               mem = swap_cgroup_record(ent, NULL);
+               if (mem) {
+                       res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+                       mem_cgroup_put(mem);
+               }
+       }
+       return ret;
 }
 
+/*
+ * While swap-in, try_charge -> commit or cancel, the page is locked.
+ * And when try_charge() successfully returns, one refcnt to memcg without
+ * struct page_cgroup is aquired. This refcnt will be cumsumed by
+ * "commit()" or removed by "cancel()"
+ */
+int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
+                                struct page *page,
+                                gfp_t mask, struct mem_cgroup **ptr)
+{
+       struct mem_cgroup *mem;
+       int ret;
+
+       if (mem_cgroup_disabled())
+               return 0;
+
+       if (!do_swap_account)
+               goto charge_cur_mm;
+       /*
+        * A racing thread's fault, or swapoff, may have already updated
+        * the pte, and even removed page from swap cache: return success
+        * to go on to do_swap_page()'s pte_same() test, which should fail.
+        */
+       if (!PageSwapCache(page))
+               return 0;
+       mem = try_get_mem_cgroup_from_swapcache(page);
+       if (!mem)
+               goto charge_cur_mm;
+       *ptr = mem;
+       ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
+       /* drop extra refcnt from tryget */
+       css_put(&mem->css);
+       return ret;
+charge_cur_mm:
+       if (unlikely(!mm))
+               mm = &init_mm;
+       return __mem_cgroup_try_charge(mm, mask, ptr, true);
+}
+
+void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
+{
+       struct page_cgroup *pc;
+
+       if (mem_cgroup_disabled())
+               return;
+       if (!ptr)
+               return;
+       pc = lookup_page_cgroup(page);
+       mem_cgroup_lru_del_before_commit_swapcache(page);
+       __mem_cgroup_commit_charge(ptr, pc, MEM_CGROUP_CHARGE_TYPE_MAPPED);
+       mem_cgroup_lru_add_after_commit_swapcache(page);
+       /*
+        * Now swap is on-memory. This means this page may be
+        * counted both as mem and swap....double count.
+        * Fix it by uncharging from memsw. Basically, this SwapCache is stable
+        * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
+        * may call delete_from_swap_cache() before reach here.
+        */
+       if (do_swap_account && PageSwapCache(page)) {
+               swp_entry_t ent = {.val = page_private(page)};
+               struct mem_cgroup *memcg;
+               memcg = swap_cgroup_record(ent, NULL);
+               if (memcg) {
+                       res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+                       mem_cgroup_put(memcg);
+               }
+
+       }
+       /* add this page(page_cgroup) to the LRU we want. */
+
+}
+
+void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
+{
+       if (mem_cgroup_disabled())
+               return;
+       if (!mem)
+               return;
+       res_counter_uncharge(&mem->res, PAGE_SIZE);
+       if (do_swap_account)
+               res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+       css_put(&mem->css);
+}
+
+
 /*
  * uncharge if !page_mapped(page)
  */
-static void
+static struct mem_cgroup *
 __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 {
        struct page_cgroup *pc;
-       struct mem_cgroup *mem;
+       struct mem_cgroup *mem = NULL;
        struct mem_cgroup_per_zone *mz;
-       unsigned long flags;
 
-       if (mem_cgroup_subsys.disabled)
-               return;
+       if (mem_cgroup_disabled())
+               return NULL;
+
+       if (PageSwapCache(page))
+               return NULL;
 
        /*
         * Check if our page_cgroup is valid
         */
        pc = lookup_page_cgroup(page);
        if (unlikely(!pc || !PageCgroupUsed(pc)))
-               return;
+               return NULL;
 
        lock_page_cgroup(pc);
-       if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED && page_mapped(page))
-            || !PageCgroupUsed(pc)) {
-               /* This happens at race in zap_pte_range() and do_swap_page()*/
-               unlock_page_cgroup(pc);
-               return;
+
+       mem = pc->mem_cgroup;
+
+       if (!PageCgroupUsed(pc))
+               goto unlock_out;
+
+       switch (ctype) {
+       case MEM_CGROUP_CHARGE_TYPE_MAPPED:
+               if (page_mapped(page))
+                       goto unlock_out;
+               break;
+       case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
+               if (!PageAnon(page)) {  /* Shared memory */
+                       if (page->mapping && !page_is_file_cache(page))
+                               goto unlock_out;
+               } else if (page_mapped(page)) /* Anon */
+                               goto unlock_out;
+               break;
+       default:
+               break;
        }
+
+       res_counter_uncharge(&mem->res, PAGE_SIZE);
+       if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
+               res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+
+       mem_cgroup_charge_statistics(mem, pc, false);
        ClearPageCgroupUsed(pc);
-       mem = pc->mem_cgroup;
+       /*
+        * pc->mem_cgroup is not cleared here. It will be accessed when it's
+        * freed from LRU. This is safe because uncharged page is expected not
+        * to be reused (freed soon). Exception is SwapCache, it's handled by
+        * special functions.
+        */
 
        mz = page_cgroup_zoneinfo(pc);
-       spin_lock_irqsave(&mz->lru_lock, flags);
-       __mem_cgroup_remove_list(mz, pc);
-       spin_unlock_irqrestore(&mz->lru_lock, flags);
        unlock_page_cgroup(pc);
 
-       res_counter_uncharge(&mem->res, PAGE_SIZE);
-       css_put(&mem->css);
+       /* at swapout, this memcg will be accessed to record to swap */
+       if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
+               css_put(&mem->css);
 
-       return;
+       return mem;
+
+unlock_out:
+       unlock_page_cgroup(pc);
+       return NULL;
 }
 
 void mem_cgroup_uncharge_page(struct page *page)
@@ -681,24 +1336,63 @@ void mem_cgroup_uncharge_page(struct page *page)
        __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
 }
 
-void mem_cgroup_uncharge_cache_page(struct page *page)
-{
-       VM_BUG_ON(page_mapped(page));
-       VM_BUG_ON(page->mapping);
-       __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
+void mem_cgroup_uncharge_cache_page(struct page *page)
+{
+       VM_BUG_ON(page_mapped(page));
+       VM_BUG_ON(page->mapping);
+       __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
+}
+
+/*
+ * called from __delete_from_swap_cache() and drop "page" account.
+ * memcg information is recorded to swap_cgroup of "ent"
+ */
+void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
+{
+       struct mem_cgroup *memcg;
+
+       memcg = __mem_cgroup_uncharge_common(page,
+                                       MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
+       /* record memcg information */
+       if (do_swap_account && memcg) {
+               swap_cgroup_record(ent, memcg);
+               mem_cgroup_get(memcg);
+       }
+       if (memcg)
+               css_put(&memcg->css);
+}
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+/*
+ * called from swap_entry_free(). remove record in swap_cgroup and
+ * uncharge "memsw" account.
+ */
+void mem_cgroup_uncharge_swap(swp_entry_t ent)
+{
+       struct mem_cgroup *memcg;
+
+       if (!do_swap_account)
+               return;
+
+       memcg = swap_cgroup_record(ent, NULL);
+       if (memcg) {
+               res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+               mem_cgroup_put(memcg);
+       }
 }
+#endif
 
 /*
- * Before starting migration, account against new page.
+ * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
+ * page belongs to.
  */
-int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
+int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
 {
        struct page_cgroup *pc;
        struct mem_cgroup *mem = NULL;
-       enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
        int ret = 0;
 
-       if (mem_cgroup_subsys.disabled)
+       if (mem_cgroup_disabled())
                return 0;
 
        pc = lookup_page_cgroup(page);
@@ -706,41 +1400,67 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
        if (PageCgroupUsed(pc)) {
                mem = pc->mem_cgroup;
                css_get(&mem->css);
-               if (PageCgroupCache(pc)) {
-                       if (page_is_file_cache(page))
-                               ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
-                       else
-                               ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
-               }
        }
        unlock_page_cgroup(pc);
+
        if (mem) {
-               ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
-                       ctype, mem);
+               ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false);
                css_put(&mem->css);
        }
+       *ptr = mem;
        return ret;
 }
 
 /* remove redundant charge if migration failed*/
-void mem_cgroup_end_migration(struct page *newpage)
+void mem_cgroup_end_migration(struct mem_cgroup *mem,
+               struct page *oldpage, struct page *newpage)
 {
+       struct page *target, *unused;
+       struct page_cgroup *pc;
+       enum charge_type ctype;
+
+       if (!mem)
+               return;
+
+       /* at migration success, oldpage->mapping is NULL. */
+       if (oldpage->mapping) {
+               target = oldpage;
+               unused = NULL;
+       } else {
+               target = newpage;
+               unused = oldpage;
+       }
+
+       if (PageAnon(target))
+               ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
+       else if (page_is_file_cache(target))
+               ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+       else
+               ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
+
+       /* unused page is not on radix-tree now. */
+       if (unused)
+               __mem_cgroup_uncharge_common(unused, ctype);
+
+       pc = lookup_page_cgroup(target);
+       /*
+        * __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup.
+        * So, double-counting is effectively avoided.
+        */
+       __mem_cgroup_commit_charge(mem, pc, ctype);
+
        /*
-        * At success, page->mapping is not NULL.
-        * special rollback care is necessary when
-        * 1. at migration failure. (newpage->mapping is cleared in this case)
-        * 2. the newpage was moved but not remapped again because the task
-        *    exits and the newpage is obsolete. In this case, the new page
-        *    may be a swapcache. So, we just call mem_cgroup_uncharge_page()
-        *    always for avoiding mess. The  page_cgroup will be removed if
-        *    unnecessary. File cache pages is still on radix-tree. Don't
-        *    care it.
+        * Both of oldpage and newpage are still under lock_page().
+        * Then, we don't have to care about race in radix-tree.
+        * But we have to be careful that this page is unmapped or not.
+        *
+        * There is a case for !page_mapped(). At the start of
+        * migration, oldpage was mapped. But now, it's zapped.
+        * But we know *target* page is not freed/reused under us.
+        * mem_cgroup_uncharge_page() does all necessary checks.
         */
-       if (!newpage->mapping)
-               __mem_cgroup_uncharge_common(newpage,
-                               MEM_CGROUP_CHARGE_TYPE_FORCE);
-       else if (PageAnon(newpage))
-               mem_cgroup_uncharge_page(newpage);
+       if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
+               mem_cgroup_uncharge_page(target);
 }
 
 /*
@@ -748,29 +1468,26 @@ void mem_cgroup_end_migration(struct page *newpage)
  * This is typically used for page reclaiming for shmem for reducing side
  * effect of page allocation from shmem, which is used by some mem_cgroup.
  */
-int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
+int mem_cgroup_shrink_usage(struct page *page,
+                           struct mm_struct *mm,
+                           gfp_t gfp_mask)
 {
-       struct mem_cgroup *mem;
+       struct mem_cgroup *mem = NULL;
        int progress = 0;
        int retry = MEM_CGROUP_RECLAIM_RETRIES;
 
-       if (mem_cgroup_subsys.disabled)
+       if (mem_cgroup_disabled())
                return 0;
-       if (!mm)
-               return 0;
-
-       rcu_read_lock();
-       mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
-       if (unlikely(!mem)) {
-               rcu_read_unlock();
+       if (page)
+               mem = try_get_mem_cgroup_from_swapcache(page);
+       if (!mem && mm)
+               mem = try_get_mem_cgroup_from_mm(mm);
+       if (unlikely(!mem))
                return 0;
-       }
-       css_get(&mem->css);
-       rcu_read_unlock();
 
        do {
-               progress = try_to_free_mem_cgroup_pages(mem, gfp_mask);
-               progress += res_counter_check_under_limit(&mem->res);
+               progress = mem_cgroup_hierarchical_reclaim(mem, gfp_mask, true);
+               progress += mem_cgroup_check_under_limit(mem);
        } while (!progress && --retry);
 
        css_put(&mem->css);
@@ -779,117 +1496,295 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
        return 0;
 }
 
+static DEFINE_MUTEX(set_limit_mutex);
+
 static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
-                                  unsigned long long val)
+                               unsigned long long val)
 {
 
        int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
        int progress;
+       u64 memswlimit;
        int ret = 0;
 
-       while (res_counter_set_limit(&memcg->res, val)) {
+       while (retry_count) {
                if (signal_pending(current)) {
                        ret = -EINTR;
                        break;
                }
-               if (!retry_count) {
-                       ret = -EBUSY;
+               /*
+                * Rather than hide all in some function, I do this in
+                * open coded manner. You see what this really does.
+                * We have to guarantee mem->res.limit < mem->memsw.limit.
+                */
+               mutex_lock(&set_limit_mutex);
+               memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
+               if (memswlimit < val) {
+                       ret = -EINVAL;
+                       mutex_unlock(&set_limit_mutex);
                        break;
                }
-               progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL);
-               if (!progress)
-                       retry_count--;
+               ret = res_counter_set_limit(&memcg->res, val);
+               mutex_unlock(&set_limit_mutex);
+
+               if (!ret)
+                       break;
+
+               progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL,
+                                                          false);
+               if (!progress)                  retry_count--;
        }
+
        return ret;
 }
 
+int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
+                               unsigned long long val)
+{
+       int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
+       u64 memlimit, oldusage, curusage;
+       int ret;
+
+       if (!do_swap_account)
+               return -EINVAL;
+
+       while (retry_count) {
+               if (signal_pending(current)) {
+                       ret = -EINTR;
+                       break;
+               }
+               /*
+                * Rather than hide all in some function, I do this in
+                * open coded manner. You see what this really does.
+                * We have to guarantee mem->res.limit < mem->memsw.limit.
+                */
+               mutex_lock(&set_limit_mutex);
+               memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);
+               if (memlimit > val) {
+                       ret = -EINVAL;
+                       mutex_unlock(&set_limit_mutex);
+                       break;
+               }
+               ret = res_counter_set_limit(&memcg->memsw, val);
+               mutex_unlock(&set_limit_mutex);
+
+               if (!ret)
+                       break;
+
+               oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+               mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true);
+               curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+               if (curusage >= oldusage)
+                       retry_count--;
+       }
+       return ret;
+}
 
 /*
  * This routine traverse page_cgroup in given list and drop them all.
  * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
  */
-#define FORCE_UNCHARGE_BATCH   (128)
-static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
-                           struct mem_cgroup_per_zone *mz,
-                           enum lru_list lru)
+static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
+                               int node, int zid, enum lru_list lru)
 {
-       struct page_cgroup *pc;
-       struct page *page;
-       int count = FORCE_UNCHARGE_BATCH;
-       unsigned long flags;
+       struct zone *zone;
+       struct mem_cgroup_per_zone *mz;
+       struct page_cgroup *pc, *busy;
+       unsigned long flags, loop;
        struct list_head *list;
+       int ret = 0;
 
+       zone = &NODE_DATA(node)->node_zones[zid];
+       mz = mem_cgroup_zoneinfo(mem, node, zid);
        list = &mz->lists[lru];
 
-       spin_lock_irqsave(&mz->lru_lock, flags);
-       while (!list_empty(list)) {
-               pc = list_entry(list->prev, struct page_cgroup, lru);
-               page = pc->page;
-               if (!PageCgroupUsed(pc))
-                       break;
-               get_page(page);
-               spin_unlock_irqrestore(&mz->lru_lock, flags);
-               /*
-                * Check if this page is on LRU. !LRU page can be found
-                * if it's under page migration.
-                */
-               if (PageLRU(page)) {
-                       __mem_cgroup_uncharge_common(page,
-                                       MEM_CGROUP_CHARGE_TYPE_FORCE);
-                       put_page(page);
-                       if (--count <= 0) {
-                               count = FORCE_UNCHARGE_BATCH;
-                               cond_resched();
-                       }
-               } else {
-                       spin_lock_irqsave(&mz->lru_lock, flags);
+       loop = MEM_CGROUP_ZSTAT(mz, lru);
+       /* give some margin against EBUSY etc...*/
+       loop += 256;
+       busy = NULL;
+       while (loop--) {
+               ret = 0;
+               spin_lock_irqsave(&zone->lru_lock, flags);
+               if (list_empty(list)) {
+                       spin_unlock_irqrestore(&zone->lru_lock, flags);
                        break;
                }
-               spin_lock_irqsave(&mz->lru_lock, flags);
+               pc = list_entry(list->prev, struct page_cgroup, lru);
+               if (busy == pc) {
+                       list_move(&pc->lru, list);
+                       busy = 0;
+                       spin_unlock_irqrestore(&zone->lru_lock, flags);
+                       continue;
+               }
+               spin_unlock_irqrestore(&zone->lru_lock, flags);
+
+               ret = mem_cgroup_move_parent(pc, mem, GFP_KERNEL);
+               if (ret == -ENOMEM)
+                       break;
+
+               if (ret == -EBUSY || ret == -EINVAL) {
+                       /* found lock contention or "pc" is obsolete. */
+                       busy = pc;
+                       cond_resched();
+               } else
+                       busy = NULL;
        }
-       spin_unlock_irqrestore(&mz->lru_lock, flags);
+
+       if (!ret && !list_empty(list))
+               return -EBUSY;
+       return ret;
 }
 
 /*
  * make mem_cgroup's charge to be 0 if there is no task.
  * This enables deleting this mem_cgroup.
  */
-static int mem_cgroup_force_empty(struct mem_cgroup *mem)
+static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all)
 {
-       int ret = -EBUSY;
-       int node, zid;
+       int ret;
+       int node, zid, shrink;
+       int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
+       struct cgroup *cgrp = mem->css.cgroup;
 
        css_get(&mem->css);
-       /*
-        * page reclaim code (kswapd etc..) will move pages between
-        * active_list <-> inactive_list while we don't take a lock.
-        * So, we have to do loop here until all lists are empty.
-        */
+
+       shrink = 0;
+       /* should free all ? */
+       if (free_all)
+               goto try_to_free;
+move_account:
        while (mem->res.usage > 0) {
-               if (atomic_read(&mem->css.cgroup->count) > 0)
+               ret = -EBUSY;
+               if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
+                       goto out;
+               ret = -EINTR;
+               if (signal_pending(current))
                        goto out;
                /* This is for making all *used* pages to be on LRU. */
                lru_add_drain_all();
-               for_each_node_state(node, N_POSSIBLE)
-                       for (zid = 0; zid < MAX_NR_ZONES; zid++) {
-                               struct mem_cgroup_per_zone *mz;
+               ret = 0;
+               for_each_node_state(node, N_POSSIBLE) {
+                       for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
                                enum lru_list l;
-                               mz = mem_cgroup_zoneinfo(mem, node, zid);
-                               for_each_lru(l)
-                                       mem_cgroup_force_empty_list(mem, mz, l);
+                               for_each_lru(l) {
+                                       ret = mem_cgroup_force_empty_list(mem,
+                                                       node, zid, l);
+                                       if (ret)
+                                               break;
+                               }
                        }
+                       if (ret)
+                               break;
+               }
+               /* it seems parent cgroup doesn't have enough mem */
+               if (ret == -ENOMEM)
+                       goto try_to_free;
                cond_resched();
        }
        ret = 0;
 out:
        css_put(&mem->css);
        return ret;
+
+try_to_free:
+       /* returns EBUSY if there is a task or if we come here twice. */
+       if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children) || shrink) {
+               ret = -EBUSY;
+               goto out;
+       }
+       /* we call try-to-free pages for make this cgroup empty */
+       lru_add_drain_all();
+       /* try to free all pages in this cgroup */
+       shrink = 1;
+       while (nr_retries && mem->res.usage > 0) {
+               int progress;
+
+               if (signal_pending(current)) {
+                       ret = -EINTR;
+                       goto out;
+               }
+               progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
+                                               false, get_swappiness(mem));
+               if (!progress) {
+                       nr_retries--;
+                       /* maybe some writeback is necessary */
+                       congestion_wait(WRITE, HZ/10);
+               }
+
+       }
+       lru_add_drain();
+       /* try move_account...there may be some *locked* pages. */
+       if (mem->res.usage)
+               goto move_account;
+       ret = 0;
+       goto out;
+}
+
+int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
+{
+       return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true);
+}
+
+
+static u64 mem_cgroup_hierarchy_read(struct cgroup *cont, struct cftype *cft)
+{
+       return mem_cgroup_from_cont(cont)->use_hierarchy;
+}
+
+static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
+                                       u64 val)
+{
+       int retval = 0;
+       struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+       struct cgroup *parent = cont->parent;
+       struct mem_cgroup *parent_mem = NULL;
+
+       if (parent)
+               parent_mem = mem_cgroup_from_cont(parent);
+
+       cgroup_lock();
+       /*
+        * If parent's use_hiearchy is set, we can't make any modifications
+        * in the child subtrees. If it is unset, then the change can
+        * occur, provided the current cgroup has no children.
+        *
+        * For the root cgroup, parent_mem is NULL, we allow value to be
+        * set if there are no children.
+        */
+       if ((!parent_mem || !parent_mem->use_hierarchy) &&
+                               (val == 1 || val == 0)) {
+               if (list_empty(&cont->children))
+                       mem->use_hierarchy = val;
+               else
+                       retval = -EBUSY;
+       } else
+               retval = -EINVAL;
+       cgroup_unlock();
+
+       return retval;
 }
 
 static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
 {
-       return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
-                                   cft->private);
+       struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+       u64 val = 0;
+       int type, name;
+
+       type = MEMFILE_TYPE(cft->private);
+       name = MEMFILE_ATTR(cft->private);
+       switch (type) {
+       case _MEM:
+               val = res_counter_read_u64(&mem->res, name);
+               break;
+       case _MEMSWAP:
+               if (do_swap_account)
+                       val = res_counter_read_u64(&mem->memsw, name);
+               break;
+       default:
+               BUG();
+               break;
+       }
+       return val;
 }
 /*
  * The user of this function is...
@@ -899,15 +1794,22 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
                            const char *buffer)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+       int type, name;
        unsigned long long val;
        int ret;
 
-       switch (cft->private) {
+       type = MEMFILE_TYPE(cft->private);
+       name = MEMFILE_ATTR(cft->private);
+       switch (name) {
        case RES_LIMIT:
                /* This function does all necessary parse...reuse it */
                ret = res_counter_memparse_write_strategy(buffer, &val);
-               if (!ret)
+               if (ret)
+                       break;
+               if (type == _MEM)
                        ret = mem_cgroup_resize_limit(memcg, val);
+               else
+                       ret = mem_cgroup_resize_memsw_limit(memcg, val);
                break;
        default:
                ret = -EINVAL; /* should be BUG() ? */
@@ -916,27 +1818,59 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
        return ret;
 }
 
+static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
+               unsigned long long *mem_limit, unsigned long long *memsw_limit)
+{
+       struct cgroup *cgroup;
+       unsigned long long min_limit, min_memsw_limit, tmp;
+
+       min_limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
+       min_memsw_limit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
+       cgroup = memcg->css.cgroup;
+       if (!memcg->use_hierarchy)
+               goto out;
+
+       while (cgroup->parent) {
+               cgroup = cgroup->parent;
+               memcg = mem_cgroup_from_cont(cgroup);
+               if (!memcg->use_hierarchy)
+                       break;
+               tmp = res_counter_read_u64(&memcg->res, RES_LIMIT);
+               min_limit = min(min_limit, tmp);
+               tmp = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
+               min_memsw_limit = min(min_memsw_limit, tmp);
+       }
+out:
+       *mem_limit = min_limit;
+       *memsw_limit = min_memsw_limit;
+       return;
+}
+
 static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
 {
        struct mem_cgroup *mem;
+       int type, name;
 
        mem = mem_cgroup_from_cont(cont);
-       switch (event) {
+       type = MEMFILE_TYPE(event);
+       name = MEMFILE_ATTR(event);
+       switch (name) {
        case RES_MAX_USAGE:
-               res_counter_reset_max(&mem->res);
+               if (type == _MEM)
+                       res_counter_reset_max(&mem->res);
+               else
+                       res_counter_reset_max(&mem->memsw);
                break;
        case RES_FAILCNT:
-               res_counter_reset_failcnt(&mem->res);
+               if (type == _MEM)
+                       res_counter_reset_failcnt(&mem->res);
+               else
+                       res_counter_reset_failcnt(&mem->memsw);
                break;
        }
        return 0;
 }
 
-static int mem_force_empty_write(struct cgroup *cont, unsigned int event)
-{
-       return mem_cgroup_force_empty(mem_cgroup_from_cont(cont));
-}
-
 static const struct mem_cgroup_stat_desc {
        const char *msg;
        u64 unit;
@@ -985,43 +1919,163 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
                cb->fill(cb, "unevictable", unevictable * PAGE_SIZE);
 
        }
+       {
+               unsigned long long limit, memsw_limit;
+               memcg_get_hierarchical_limit(mem_cont, &limit, &memsw_limit);
+               cb->fill(cb, "hierarchical_memory_limit", limit);
+               if (do_swap_account)
+                       cb->fill(cb, "hierarchical_memsw_limit", memsw_limit);
+       }
+
+#ifdef CONFIG_DEBUG_VM
+       cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
+
+       {
+               int nid, zid;
+               struct mem_cgroup_per_zone *mz;
+               unsigned long recent_rotated[2] = {0, 0};
+               unsigned long recent_scanned[2] = {0, 0};
+
+               for_each_online_node(nid)
+                       for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+                               mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
+
+                               recent_rotated[0] +=
+                                       mz->reclaim_stat.recent_rotated[0];
+                               recent_rotated[1] +=
+                                       mz->reclaim_stat.recent_rotated[1];
+                               recent_scanned[0] +=
+                                       mz->reclaim_stat.recent_scanned[0];
+                               recent_scanned[1] +=
+                                       mz->reclaim_stat.recent_scanned[1];
+                       }
+               cb->fill(cb, "recent_rotated_anon", recent_rotated[0]);
+               cb->fill(cb, "recent_rotated_file", recent_rotated[1]);
+               cb->fill(cb, "recent_scanned_anon", recent_scanned[0]);
+               cb->fill(cb, "recent_scanned_file", recent_scanned[1]);
+       }
+#endif
+
+       return 0;
+}
+
+static u64 mem_cgroup_swappiness_read(struct cgroup *cgrp, struct cftype *cft)
+{
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+
+       return get_swappiness(memcg);
+}
+
+static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
+                                      u64 val)
+{
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+       struct mem_cgroup *parent;
+       if (val > 100)
+               return -EINVAL;
+
+       if (cgrp->parent == NULL)
+               return -EINVAL;
+
+       parent = mem_cgroup_from_cont(cgrp->parent);
+       /* If under hierarchy, only empty-root can set this value */
+       if ((parent->use_hierarchy) ||
+           (memcg->use_hierarchy && !list_empty(&cgrp->children)))
+               return -EINVAL;
+
+       spin_lock(&memcg->reclaim_param_lock);
+       memcg->swappiness = val;
+       spin_unlock(&memcg->reclaim_param_lock);
+
        return 0;
 }
 
+
 static struct cftype mem_cgroup_files[] = {
        {
                .name = "usage_in_bytes",
-               .private = RES_USAGE,
+               .private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
                .read_u64 = mem_cgroup_read,
        },
        {
                .name = "max_usage_in_bytes",
-               .private = RES_MAX_USAGE,
+               .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE),
                .trigger = mem_cgroup_reset,
                .read_u64 = mem_cgroup_read,
        },
        {
                .name = "limit_in_bytes",
-               .private = RES_LIMIT,
+               .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT),
                .write_string = mem_cgroup_write,
                .read_u64 = mem_cgroup_read,
        },
        {
                .name = "failcnt",
-               .private = RES_FAILCNT,
+               .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
                .trigger = mem_cgroup_reset,
                .read_u64 = mem_cgroup_read,
        },
+       {
+               .name = "stat",
+               .read_map = mem_control_stat_show,
+       },
        {
                .name = "force_empty",
-               .trigger = mem_force_empty_write,
+               .trigger = mem_cgroup_force_empty_write,
        },
        {
-               .name = "stat",
-               .read_map = mem_control_stat_show,
+               .name = "use_hierarchy",
+               .write_u64 = mem_cgroup_hierarchy_write,
+               .read_u64 = mem_cgroup_hierarchy_read,
+       },
+       {
+               .name = "swappiness",
+               .read_u64 = mem_cgroup_swappiness_read,
+               .write_u64 = mem_cgroup_swappiness_write,
+       },
+};
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+static struct cftype memsw_cgroup_files[] = {
+       {
+               .name = "memsw.usage_in_bytes",
+               .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
+               .read_u64 = mem_cgroup_read,
+       },
+       {
+               .name = "memsw.max_usage_in_bytes",
+               .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE),
+               .trigger = mem_cgroup_reset,
+               .read_u64 = mem_cgroup_read,
+       },
+       {
+               .name = "memsw.limit_in_bytes",
+               .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT),
+               .write_string = mem_cgroup_write,
+               .read_u64 = mem_cgroup_read,
+       },
+       {
+               .name = "memsw.failcnt",
+               .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT),
+               .trigger = mem_cgroup_reset,
+               .read_u64 = mem_cgroup_read,
        },
 };
 
+static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss)
+{
+       if (!do_swap_account)
+               return 0;
+       return cgroup_add_files(cont, ss, memsw_cgroup_files,
+                               ARRAY_SIZE(memsw_cgroup_files));
+};
+#else
+static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss)
+{
+       return 0;
+}
+#endif
+
 static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
 {
        struct mem_cgroup_per_node *pn;
@@ -1047,7 +2101,6 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
 
        for (zone = 0; zone < MAX_NR_ZONES; zone++) {
                mz = &pn->zoneinfo[zone];
-               spin_lock_init(&mz->lru_lock);
                for_each_lru(l)
                        INIT_LIST_HEAD(&mz->lists[l]);
        }
@@ -1059,55 +2112,113 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
        kfree(mem->info.nodeinfo[node]);
 }
 
+static int mem_cgroup_size(void)
+{
+       int cpustat_size = nr_cpu_ids * sizeof(struct mem_cgroup_stat_cpu);
+       return sizeof(struct mem_cgroup) + cpustat_size;
+}
+
 static struct mem_cgroup *mem_cgroup_alloc(void)
 {
        struct mem_cgroup *mem;
+       int size = mem_cgroup_size();
 
-       if (sizeof(*mem) < PAGE_SIZE)
-               mem = kmalloc(sizeof(*mem), GFP_KERNEL);
+       if (size < PAGE_SIZE)
+               mem = kmalloc(size, GFP_KERNEL);
        else
-               mem = vmalloc(sizeof(*mem));
+               mem = vmalloc(size);
 
        if (mem)
-               memset(mem, 0, sizeof(*mem));
+               memset(mem, 0, size);
        return mem;
 }
 
-static void mem_cgroup_free(struct mem_cgroup *mem)
+/*
+ * At destroying mem_cgroup, references from swap_cgroup can remain.
+ * (scanning all at force_empty is too costly...)
+ *
+ * Instead of clearing all references at force_empty, we remember
+ * the number of reference from swap_cgroup and free mem_cgroup when
+ * it goes down to 0.
+ *
+ * Removal of cgroup itself succeeds regardless of refs from swap.
+ */
+
+static void __mem_cgroup_free(struct mem_cgroup *mem)
 {
-       if (sizeof(*mem) < PAGE_SIZE)
+       int node;
+
+       for_each_node_state(node, N_POSSIBLE)
+               free_mem_cgroup_per_zone_info(mem, node);
+
+       if (mem_cgroup_size() < PAGE_SIZE)
                kfree(mem);
        else
                vfree(mem);
 }
 
+static void mem_cgroup_get(struct mem_cgroup *mem)
+{
+       atomic_inc(&mem->refcnt);
+}
+
+static void mem_cgroup_put(struct mem_cgroup *mem)
+{
+       if (atomic_dec_and_test(&mem->refcnt))
+               __mem_cgroup_free(mem);
+}
+
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+static void __init enable_swap_cgroup(void)
+{
+       if (!mem_cgroup_disabled() && really_do_swap_account)
+               do_swap_account = 1;
+}
+#else
+static void __init enable_swap_cgroup(void)
+{
+}
+#endif
 
 static struct cgroup_subsys_state *
 mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 {
-       struct mem_cgroup *mem;
+       struct mem_cgroup *mem, *parent;
        int node;
 
-       if (unlikely((cont->parent) == NULL)) {
-               mem = &init_mem_cgroup;
-       } else {
-               mem = mem_cgroup_alloc();
-               if (!mem)
-                       return ERR_PTR(-ENOMEM);
-       }
-
-       res_counter_init(&mem->res);
+       mem = mem_cgroup_alloc();
+       if (!mem)
+               return ERR_PTR(-ENOMEM);
 
        for_each_node_state(node, N_POSSIBLE)
                if (alloc_mem_cgroup_per_zone_info(mem, node))
                        goto free_out;
+       /* root ? */
+       if (cont->parent == NULL) {
+               enable_swap_cgroup();
+               parent = NULL;
+       } else {
+               parent = mem_cgroup_from_cont(cont->parent);
+               mem->use_hierarchy = parent->use_hierarchy;
+       }
 
+       if (parent && parent->use_hierarchy) {
+               res_counter_init(&mem->res, &parent->res);
+               res_counter_init(&mem->memsw, &parent->memsw);
+       } else {
+               res_counter_init(&mem->res, NULL);
+               res_counter_init(&mem->memsw, NULL);
+       }
+       mem->last_scanned_child = NULL;
+       spin_lock_init(&mem->reclaim_param_lock);
+
+       if (parent)
+               mem->swappiness = get_swappiness(parent);
+       atomic_set(&mem->refcnt, 1);
        return &mem->css;
 free_out:
-       for_each_node_state(node, N_POSSIBLE)
-               free_mem_cgroup_per_zone_info(mem, node);
-       if (cont->parent != NULL)
-               mem_cgroup_free(mem);
+       __mem_cgroup_free(mem);
        return ERR_PTR(-ENOMEM);
 }
 
@@ -1115,26 +2226,26 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
                                        struct cgroup *cont)
 {
        struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
-       mem_cgroup_force_empty(mem);
+       mem_cgroup_force_empty(mem, false);
 }
 
 static void mem_cgroup_destroy(struct cgroup_subsys *ss,
                                struct cgroup *cont)
 {
-       int node;
-       struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
-
-       for_each_node_state(node, N_POSSIBLE)
-               free_mem_cgroup_per_zone_info(mem, node);
-
-       mem_cgroup_free(mem_cgroup_from_cont(cont));
+       mem_cgroup_put(mem_cgroup_from_cont(cont));
 }
 
 static int mem_cgroup_populate(struct cgroup_subsys *ss,
                                struct cgroup *cont)
 {
-       return cgroup_add_files(cont, ss, mem_cgroup_files,
-                                       ARRAY_SIZE(mem_cgroup_files));
+       int ret;
+
+       ret = cgroup_add_files(cont, ss, mem_cgroup_files,
+                               ARRAY_SIZE(mem_cgroup_files));
+
+       if (!ret)
+               ret = register_memsw_files(cont, ss);
+       return ret;
 }
 
 static void mem_cgroup_move_task(struct cgroup_subsys *ss,
@@ -1142,25 +2253,12 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
                                struct cgroup *old_cont,
                                struct task_struct *p)
 {
-       struct mm_struct *mm;
-       struct mem_cgroup *mem, *old_mem;
-
-       mm = get_task_mm(p);
-       if (mm == NULL)
-               return;
-
-       mem = mem_cgroup_from_cont(cont);
-       old_mem = mem_cgroup_from_cont(old_cont);
-
+       mutex_lock(&memcg_tasklist);
        /*
-        * Only thread group leaders are allowed to migrate, the mm_struct is
-        * in effect owned by the leader
+        * FIXME: It's better to move charges of this process from old
+        * memcg to new memcg. But it's just on TODO-List now.
         */
-       if (!thread_group_leader(p))
-               goto out;
-
-out:
-       mmput(mm);
+       mutex_unlock(&memcg_tasklist);
 }
 
 struct cgroup_subsys mem_cgroup_subsys = {
@@ -1173,3 +2271,13 @@ struct cgroup_subsys mem_cgroup_subsys = {
        .attach = mem_cgroup_move_task,
        .early_init = 0,
 };
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+
+static int __init disable_swap_account(char *s)
+{
+       really_do_swap_account = 0;
+       return 1;
+}
+__setup("noswapaccount", disable_swap_account);
+#endif
index 3f8fa06b963b281191ac2c1350af0b21b2dad7c5..e009ce8708597fe3c7542a92ec9201fe8d5f73b6 100644 (file)
@@ -2000,7 +2000,7 @@ gotten:
        cow_user_page(new_page, old_page, address, vma);
        __SetPageUptodate(new_page);
 
-       if (mem_cgroup_charge(new_page, mm, GFP_KERNEL))
+       if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
                goto oom_free_new;
 
        /*
@@ -2392,6 +2392,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
        struct page *page;
        swp_entry_t entry;
        pte_t pte;
+       struct mem_cgroup *ptr = NULL;
        int ret = 0;
 
        if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
@@ -2430,7 +2431,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
        lock_page(page);
        delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 
-       if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
+       if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
                ret = VM_FAULT_OOM;
                unlock_page(page);
                goto out;
@@ -2448,7 +2449,19 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                goto out_nomap;
        }
 
-       /* The page isn't present yet, go ahead with the fault. */
+       /*
+        * The page isn't present yet, go ahead with the fault.
+        *
+        * Be careful about the sequence of operations here.
+        * To get its accounting right, reuse_swap_page() must be called
+        * while the page is counted on swap but not yet in mapcount i.e.
+        * before page_add_anon_rmap() and swap_free(); try_to_free_swap()
+        * must be called after the swap_free(), or it will never succeed.
+        * Because delete_from_swap_page() may be called by reuse_swap_page(),
+        * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry
+        * in page->private. In this case, a record in swap_cgroup  is silently
+        * discarded at swap_free().
+        */
 
        inc_mm_counter(mm, anon_rss);
        pte = mk_pte(page, vma->vm_page_prot);
@@ -2456,10 +2469,11 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                pte = maybe_mkwrite(pte_mkdirty(pte), vma);
                write_access = 0;
        }
-
        flush_icache_page(vma, page);
        set_pte_at(mm, address, page_table, pte);
        page_add_anon_rmap(page, vma, address);
+       /* It's better to call commit-charge after rmap is established */
+       mem_cgroup_commit_charge_swapin(page, ptr);
 
        swap_free(entry);
        if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
@@ -2480,7 +2494,7 @@ unlock:
 out:
        return ret;
 out_nomap:
-       mem_cgroup_uncharge_page(page);
+       mem_cgroup_cancel_charge_swapin(ptr);
        pte_unmap_unlock(page_table, ptl);
        unlock_page(page);
        page_cache_release(page);
@@ -2510,7 +2524,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                goto oom;
        __SetPageUptodate(page);
 
-       if (mem_cgroup_charge(page, mm, GFP_KERNEL))
+       if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))
                goto oom_free_page;
 
        entry = mk_pte(page, vma->vm_page_prot);
@@ -2601,7 +2615,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                                ret = VM_FAULT_OOM;
                                goto out;
                        }
-                       if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
+                       if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) {
                                ret = VM_FAULT_OOM;
                                page_cache_release(page);
                                goto out;
index 55373983c9c68410ebdb98883518e7f2b56728bd..a30ea5fcf9f1cab76172fdb4f7473e8df0880cac 100644 (file)
@@ -121,20 +121,6 @@ static void remove_migration_pte(struct vm_area_struct *vma,
        if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old)
                goto out;
 
-       /*
-        * Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge.
-        * Failure is not an option here: we're now expected to remove every
-        * migration pte, and will cause crashes otherwise.  Normally this
-        * is not an issue: mem_cgroup_prepare_migration bumped up the old
-        * page_cgroup count for safety, that's now attached to the new page,
-        * so this charge should just be another incrementation of the count,
-        * to keep in balance with rmap.c's mem_cgroup_uncharging.  But if
-        * there's been a force_empty, those reference counts may no longer
-        * be reliable, and this charge can actually fail: oh well, we don't
-        * make the situation any worse by proceeding as if it had succeeded.
-        */
-       mem_cgroup_charge(new, mm, GFP_ATOMIC);
-
        get_page(new);
        pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
        if (is_write_migration_entry(entry))
@@ -378,9 +364,6 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
        anon = PageAnon(page);
        page->mapping = NULL;
 
-       if (!anon) /* This page was removed from radix-tree. */
-               mem_cgroup_uncharge_cache_page(page);
-
        /*
         * If any waiters have accumulated on the new page then
         * wake them up.
@@ -614,6 +597,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
        struct page *newpage = get_new_page(page, private, &result);
        int rcu_locked = 0;
        int charge = 0;
+       struct mem_cgroup *mem;
 
        if (!newpage)
                return -ENOMEM;
@@ -623,24 +607,26 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
                goto move_newpage;
        }
 
-       charge = mem_cgroup_prepare_migration(page, newpage);
-       if (charge == -ENOMEM) {
-               rc = -ENOMEM;
-               goto move_newpage;
-       }
        /* prepare cgroup just returns 0 or -ENOMEM */
-       BUG_ON(charge);
-
        rc = -EAGAIN;
+
        if (!trylock_page(page)) {
                if (!force)
                        goto move_newpage;
                lock_page(page);
        }
 
+       /* charge against new page */
+       charge = mem_cgroup_prepare_migration(page, &mem);
+       if (charge == -ENOMEM) {
+               rc = -ENOMEM;
+               goto unlock;
+       }
+       BUG_ON(charge);
+
        if (PageWriteback(page)) {
                if (!force)
-                       goto unlock;
+                       goto uncharge;
                wait_on_page_writeback(page);
        }
        /*
@@ -693,7 +679,9 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 rcu_unlock:
        if (rcu_locked)
                rcu_read_unlock();
-
+uncharge:
+       if (!charge)
+               mem_cgroup_end_migration(mem, page, newpage);
 unlock:
        unlock_page(page);
 
@@ -709,8 +697,6 @@ unlock:
        }
 
 move_newpage:
-       if (!charge)
-               mem_cgroup_end_migration(newpage);
 
        /*
         * Move the new page to the LRU. If migration was not successful
index 6b9e758c98a59ebd46168ecf150db260988fd437..40ba05061a4fbc8a43d3eb046a58fe756748cd06 100644 (file)
@@ -429,7 +429,6 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
        unsigned long points = 0;
        struct task_struct *p;
 
-       cgroup_lock();
        read_lock(&tasklist_lock);
 retry:
        p = select_bad_process(&points, mem);
@@ -444,7 +443,6 @@ retry:
                goto retry;
 out:
        read_unlock(&tasklist_lock);
-       cgroup_unlock();
 }
 #endif
 
@@ -560,6 +558,13 @@ void pagefault_out_of_memory(void)
                /* Got some memory back in the last second. */
                return;
 
+       /*
+        * If this is from memcg, oom-killer is already invoked.
+        * and not worth to go system-wide-oom.
+        */
+       if (mem_cgroup_oom_called(current))
+               goto rest_and_return;
+
        if (sysctl_panic_on_oom)
                panic("out of memory from page fault. panic_on_oom is selected.\n");
 
@@ -571,6 +576,7 @@ void pagefault_out_of_memory(void)
         * Give "p" a good chance of killing itself before we
         * retry to allocate memory.
         */
+rest_and_return:
        if (!test_thread_flag(TIF_MEMDIE))
                schedule_timeout_uninterruptible(1);
 }
index 7bf22e04531888b9a3b1c18c540023040432913e..5675b30738546451c47771e5703f06830a63898a 100644 (file)
@@ -3523,10 +3523,10 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                        INIT_LIST_HEAD(&zone->lru[l].list);
                        zone->lru[l].nr_scan = 0;
                }
-               zone->recent_rotated[0] = 0;
-               zone->recent_rotated[1] = 0;
-               zone->recent_scanned[0] = 0;
-               zone->recent_scanned[1] = 0;
+               zone->reclaim_stat.recent_rotated[0] = 0;
+               zone->reclaim_stat.recent_rotated[1] = 0;
+               zone->reclaim_stat.recent_scanned[0] = 0;
+               zone->reclaim_stat.recent_scanned[1] = 0;
                zap_zone_vm_stats(zone);
                zone->flags = 0;
                if (!size)
index d6507a660ed64bb1a15129b9336457a616bd9008..7006a11350c822b7e8e34c0c2813667ae357a839 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/memory.h>
 #include <linux/vmalloc.h>
 #include <linux/cgroup.h>
+#include <linux/swapops.h>
 
 static void __meminit
 __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
@@ -15,6 +16,7 @@ __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
        pc->flags = 0;
        pc->mem_cgroup = NULL;
        pc->page = pfn_to_page(pfn);
+       INIT_LIST_HEAD(&pc->lru);
 }
 static unsigned long total_usage;
 
@@ -72,7 +74,7 @@ void __init page_cgroup_init(void)
 
        int nid, fail;
 
-       if (mem_cgroup_subsys.disabled)
+       if (mem_cgroup_disabled())
                return;
 
        for_each_online_node(nid)  {
@@ -103,13 +105,11 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
 /* __alloc_bootmem...() is protected by !slab_available() */
 static int __init_refok init_section_page_cgroup(unsigned long pfn)
 {
-       struct mem_section *section;
+       struct mem_section *section = __pfn_to_section(pfn);
        struct page_cgroup *base, *pc;
        unsigned long table_size;
        int nid, index;
 
-       section = __pfn_to_section(pfn);
-
        if (!section->page_cgroup) {
                nid = page_to_nid(pfn_to_page(pfn));
                table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
@@ -145,7 +145,6 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
                __init_page_cgroup(pc, pfn + index);
        }
 
-       section = __pfn_to_section(pfn);
        section->page_cgroup = base - pfn;
        total_usage += table_size;
        return 0;
@@ -248,7 +247,7 @@ void __init page_cgroup_init(void)
        unsigned long pfn;
        int fail = 0;
 
-       if (mem_cgroup_subsys.disabled)
+       if (mem_cgroup_disabled())
                return;
 
        for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) {
@@ -273,3 +272,199 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
 }
 
 #endif
+
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+
+static DEFINE_MUTEX(swap_cgroup_mutex);
+struct swap_cgroup_ctrl {
+       struct page **map;
+       unsigned long length;
+};
+
+struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
+
+/*
+ * This 8bytes seems big..maybe we can reduce this when we can use "id" for
+ * cgroup rather than pointer.
+ */
+struct swap_cgroup {
+       struct mem_cgroup       *val;
+};
+#define SC_PER_PAGE    (PAGE_SIZE/sizeof(struct swap_cgroup))
+#define SC_POS_MASK    (SC_PER_PAGE - 1)
+
+/*
+ * SwapCgroup implements "lookup" and "exchange" operations.
+ * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
+ * against SwapCache. At swap_free(), this is accessed directly from swap.
+ *
+ * This means,
+ *  - we have no race in "exchange" when we're accessed via SwapCache because
+ *    SwapCache(and its swp_entry) is under lock.
+ *  - When called via swap_free(), there is no user of this entry and no race.
+ * Then, we don't need lock around "exchange".
+ *
+ * TODO: we can push these buffers out to HIGHMEM.
+ */
+
+/*
+ * allocate buffer for swap_cgroup.
+ */
+static int swap_cgroup_prepare(int type)
+{
+       struct page *page;
+       struct swap_cgroup_ctrl *ctrl;
+       unsigned long idx, max;
+
+       if (!do_swap_account)
+               return 0;
+       ctrl = &swap_cgroup_ctrl[type];
+
+       for (idx = 0; idx < ctrl->length; idx++) {
+               page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+               if (!page)
+                       goto not_enough_page;
+               ctrl->map[idx] = page;
+       }
+       return 0;
+not_enough_page:
+       max = idx;
+       for (idx = 0; idx < max; idx++)
+               __free_page(ctrl->map[idx]);
+
+       return -ENOMEM;
+}
+
+/**
+ * swap_cgroup_record - record mem_cgroup for this swp_entry.
+ * @ent: swap entry to be recorded into
+ * @mem: mem_cgroup to be recorded
+ *
+ * Returns old value at success, NULL at failure.
+ * (Of course, old value can be NULL.)
+ */
+struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
+{
+       int type = swp_type(ent);
+       unsigned long offset = swp_offset(ent);
+       unsigned long idx = offset / SC_PER_PAGE;
+       unsigned long pos = offset & SC_POS_MASK;
+       struct swap_cgroup_ctrl *ctrl;
+       struct page *mappage;
+       struct swap_cgroup *sc;
+       struct mem_cgroup *old;
+
+       if (!do_swap_account)
+               return NULL;
+
+       ctrl = &swap_cgroup_ctrl[type];
+
+       mappage = ctrl->map[idx];
+       sc = page_address(mappage);
+       sc += pos;
+       old = sc->val;
+       sc->val = mem;
+
+       return old;
+}
+
+/**
+ * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
+ * @ent: swap entry to be looked up.
+ *
+ * Returns pointer to mem_cgroup at success. NULL at failure.
+ */
+struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
+{
+       int type = swp_type(ent);
+       unsigned long offset = swp_offset(ent);
+       unsigned long idx = offset / SC_PER_PAGE;
+       unsigned long pos = offset & SC_POS_MASK;
+       struct swap_cgroup_ctrl *ctrl;
+       struct page *mappage;
+       struct swap_cgroup *sc;
+       struct mem_cgroup *ret;
+
+       if (!do_swap_account)
+               return NULL;
+
+       ctrl = &swap_cgroup_ctrl[type];
+       mappage = ctrl->map[idx];
+       sc = page_address(mappage);
+       sc += pos;
+       ret = sc->val;
+       return ret;
+}
+
+int swap_cgroup_swapon(int type, unsigned long max_pages)
+{
+       void *array;
+       unsigned long array_size;
+       unsigned long length;
+       struct swap_cgroup_ctrl *ctrl;
+
+       if (!do_swap_account)
+               return 0;
+
+       length = ((max_pages/SC_PER_PAGE) + 1);
+       array_size = length * sizeof(void *);
+
+       array = vmalloc(array_size);
+       if (!array)
+               goto nomem;
+
+       memset(array, 0, array_size);
+       ctrl = &swap_cgroup_ctrl[type];
+       mutex_lock(&swap_cgroup_mutex);
+       ctrl->length = length;
+       ctrl->map = array;
+       if (swap_cgroup_prepare(type)) {
+               /* memory shortage */
+               ctrl->map = NULL;
+               ctrl->length = 0;
+               vfree(array);
+               mutex_unlock(&swap_cgroup_mutex);
+               goto nomem;
+       }
+       mutex_unlock(&swap_cgroup_mutex);
+
+       printk(KERN_INFO
+               "swap_cgroup: uses %ld bytes of vmalloc for pointer array space"
+               " and %ld bytes to hold mem_cgroup pointers on swap\n",
+               array_size, length * PAGE_SIZE);
+       printk(KERN_INFO
+       "swap_cgroup can be disabled by noswapaccount boot option.\n");
+
+       return 0;
+nomem:
+       printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n");
+       printk(KERN_INFO
+               "swap_cgroup can be disabled by noswapaccount boot option\n");
+       return -ENOMEM;
+}
+
+void swap_cgroup_swapoff(int type)
+{
+       int i;
+       struct swap_cgroup_ctrl *ctrl;
+
+       if (!do_swap_account)
+               return;
+
+       mutex_lock(&swap_cgroup_mutex);
+       ctrl = &swap_cgroup_ctrl[type];
+       if (ctrl->map) {
+               for (i = 0; i < ctrl->length; i++) {
+                       struct page *page = ctrl->map[i];
+                       if (page)
+                               __free_page(page);
+               }
+               vfree(ctrl->map);
+               ctrl->map = NULL;
+               ctrl->length = 0;
+       }
+       mutex_unlock(&swap_cgroup_mutex);
+}
+
+#endif
index 5941f980136367c8e0bc28c39b80f2acadee54c2..5d0de96c97897f463eaa0d469f86ad5f273664d5 100644 (file)
@@ -928,7 +928,11 @@ found:
        error = 1;
        if (!inode)
                goto out;
-       /* Precharge page using GFP_KERNEL while we can wait */
+       /*
+        * Charge page using GFP_KERNEL while we can wait.
+        * Charged back to the user(not to caller) when swap account is used.
+        * add_to_page_cache() will be called with GFP_NOWAIT.
+        */
        error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
        if (error)
                goto out;
@@ -1320,15 +1324,19 @@ repeat:
                } else {
                        shmem_swp_unmap(entry);
                        spin_unlock(&info->lock);
-                       unlock_page(swappage);
-                       page_cache_release(swappage);
                        if (error == -ENOMEM) {
                                /* allow reclaim from this memory cgroup */
-                               error = mem_cgroup_shrink_usage(current->mm,
+                               error = mem_cgroup_shrink_usage(swappage,
+                                                               current->mm,
                                                                gfp);
-                               if (error)
+                               if (error) {
+                                       unlock_page(swappage);
+                                       page_cache_release(swappage);
                                        goto failed;
+                               }
                        }
+                       unlock_page(swappage);
+                       page_cache_release(swappage);
                        goto repeat;
                }
        } else if (sgp == SGP_READ && !filepage) {
@@ -1379,7 +1387,7 @@ repeat:
 
                        /* Precharge page while we can wait, compensate after */
                        error = mem_cgroup_cache_charge(filepage, current->mm,
-                                                       gfp & ~__GFP_HIGHMEM);
+                                       GFP_KERNEL);
                        if (error) {
                                page_cache_release(filepage);
                                shmem_unacct_blocks(info->flags, 1);
index ba2c0e8b8b54387f1743ee60f3d97301820f1b28..8adb9feb61e10b7385e01d3410d8cb0f0a4807e1 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -151,6 +151,26 @@ void  rotate_reclaimable_page(struct page *page)
        }
 }
 
+static void update_page_reclaim_stat(struct zone *zone, struct page *page,
+                                    int file, int rotated)
+{
+       struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat;
+       struct zone_reclaim_stat *memcg_reclaim_stat;
+
+       memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page);
+
+       reclaim_stat->recent_scanned[file]++;
+       if (rotated)
+               reclaim_stat->recent_rotated[file]++;
+
+       if (!memcg_reclaim_stat)
+               return;
+
+       memcg_reclaim_stat->recent_scanned[file]++;
+       if (rotated)
+               memcg_reclaim_stat->recent_rotated[file]++;
+}
+
 /*
  * FIXME: speed this up?
  */
@@ -168,10 +188,8 @@ void activate_page(struct page *page)
                lru += LRU_ACTIVE;
                add_page_to_lru_list(zone, page, lru);
                __count_vm_event(PGACTIVATE);
-               mem_cgroup_move_lists(page, lru);
 
-               zone->recent_rotated[!!file]++;
-               zone->recent_scanned[!!file]++;
+               update_page_reclaim_stat(zone, page, !!file, 1);
        }
        spin_unlock_irq(&zone->lru_lock);
 }
@@ -386,12 +404,14 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
 {
        int i;
        struct zone *zone = NULL;
+
        VM_BUG_ON(is_unevictable_lru(lru));
 
        for (i = 0; i < pagevec_count(pvec); i++) {
                struct page *page = pvec->pages[i];
                struct zone *pagezone = page_zone(page);
                int file;
+               int active;
 
                if (pagezone != zone) {
                        if (zone)
@@ -403,12 +423,11 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
                VM_BUG_ON(PageUnevictable(page));
                VM_BUG_ON(PageLRU(page));
                SetPageLRU(page);
+               active = is_active_lru(lru);
                file = is_file_lru(lru);
-               zone->recent_scanned[file]++;
-               if (is_active_lru(lru)) {
+               if (active)
                        SetPageActive(page);
-                       zone->recent_rotated[file]++;
-               }
+               update_page_reclaim_stat(zone, page, file, active);
                add_page_to_lru_list(zone, page, lru);
        }
        if (zone)
index 81c825f67a7f5bebfad1750d660551e2e2f9f2c3..3ecea98ecb459e779e68ed0a68899599045de0f9 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
 #include <linux/migrate.h>
+#include <linux/page_cgroup.h>
 
 #include <asm/pgtable.h>
 
@@ -108,6 +109,8 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
  */
 void __delete_from_swap_cache(struct page *page)
 {
+       swp_entry_t ent = {.val = page_private(page)};
+
        VM_BUG_ON(!PageLocked(page));
        VM_BUG_ON(!PageSwapCache(page));
        VM_BUG_ON(PageWriteback(page));
@@ -118,6 +121,7 @@ void __delete_from_swap_cache(struct page *page)
        total_swapcache_pages--;
        __dec_zone_page_state(page, NR_FILE_PAGES);
        INC_CACHE_INFO(del_total);
+       mem_cgroup_uncharge_swapcache(page, ent);
 }
 
 /**
index eec5ca758a23b91c5707655dde0b191d7d4ada52..da422c47e2ee515bb68c131f0980c603b7d1b380 100644 (file)
@@ -33,6 +33,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
+#include <linux/page_cgroup.h>
 
 static DEFINE_SPINLOCK(swap_lock);
 static unsigned int nr_swapfiles;
@@ -470,8 +471,9 @@ out:
        return NULL;
 }
 
-static int swap_entry_free(struct swap_info_struct *p, unsigned long offset)
+static int swap_entry_free(struct swap_info_struct *p, swp_entry_t ent)
 {
+       unsigned long offset = swp_offset(ent);
        int count = p->swap_map[offset];
 
        if (count < SWAP_MAP_MAX) {
@@ -486,6 +488,7 @@ static int swap_entry_free(struct swap_info_struct *p, unsigned long offset)
                                swap_list.next = p - swap_info;
                        nr_swap_pages++;
                        p->inuse_pages--;
+                       mem_cgroup_uncharge_swap(ent);
                }
        }
        return count;
@@ -501,7 +504,7 @@ void swap_free(swp_entry_t entry)
 
        p = swap_info_get(entry);
        if (p) {
-               swap_entry_free(p, swp_offset(entry));
+               swap_entry_free(p, entry);
                spin_unlock(&swap_lock);
        }
 }
@@ -581,7 +584,7 @@ int free_swap_and_cache(swp_entry_t entry)
 
        p = swap_info_get(entry);
        if (p) {
-               if (swap_entry_free(p, swp_offset(entry)) == 1) {
+               if (swap_entry_free(p, entry) == 1) {
                        page = find_get_page(&swapper_space, entry.val);
                        if (page && !trylock_page(page)) {
                                page_cache_release(page);
@@ -690,17 +693,18 @@ unsigned int count_swap_pages(int type, int free)
 static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
                unsigned long addr, swp_entry_t entry, struct page *page)
 {
+       struct mem_cgroup *ptr = NULL;
        spinlock_t *ptl;
        pte_t *pte;
        int ret = 1;
 
-       if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
+       if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr))
                ret = -ENOMEM;
 
        pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
        if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
                if (ret > 0)
-                       mem_cgroup_uncharge_page(page);
+                       mem_cgroup_cancel_charge_swapin(ptr);
                ret = 0;
                goto out;
        }
@@ -710,6 +714,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
        set_pte_at(vma->vm_mm, addr, pte,
                   pte_mkold(mk_pte(page, vma->vm_page_prot)));
        page_add_anon_rmap(page, vma, addr);
+       mem_cgroup_commit_charge_swapin(page, ptr);
        swap_free(entry);
        /*
         * Move the page to the active list so it is not
@@ -1492,6 +1497,9 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
        spin_unlock(&swap_lock);
        mutex_unlock(&swapon_mutex);
        vfree(swap_map);
+       /* Destroy swap account informatin */
+       swap_cgroup_swapoff(type);
+
        inode = mapping->host;
        if (S_ISBLK(inode->i_mode)) {
                struct block_device *bdev = I_BDEV(inode);
@@ -1809,6 +1817,11 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
                }
                swap_map[page_nr] = SWAP_MAP_BAD;
        }
+
+       error = swap_cgroup_swapon(type, maxpages);
+       if (error)
+               goto bad_swap;
+
        nr_good_pages = swap_header->info.last_page -
                        swap_header->info.nr_badpages -
                        1 /* header page */;
@@ -1880,6 +1893,7 @@ bad_swap:
                bd_release(bdev);
        }
        destroy_swap_extents(p);
+       swap_cgroup_swapoff(type);
 bad_swap_2:
        spin_lock(&swap_lock);
        p->swap_file = NULL;
index b07c48b09a93224f27b54a490c5beb857c744564..9a27c44aa327f8f46ddbde07776df8644db82de5 100644 (file)
@@ -125,11 +125,30 @@ static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
-#define scan_global_lru(sc)    (!(sc)->mem_cgroup)
+#define scanning_global_lru(sc)        (!(sc)->mem_cgroup)
 #else
-#define scan_global_lru(sc)    (1)
+#define scanning_global_lru(sc)        (1)
 #endif
 
+static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
+                                                 struct scan_control *sc)
+{
+       if (!scanning_global_lru(sc))
+               return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone);
+
+       return &zone->reclaim_stat;
+}
+
+static unsigned long zone_nr_pages(struct zone *zone, struct scan_control *sc,
+                                  enum lru_list lru)
+{
+       if (!scanning_global_lru(sc))
+               return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru);
+
+       return zone_page_state(zone, NR_LRU_BASE + lru);
+}
+
+
 /*
  * Add a shrinker callback to be called from the vm
  */
@@ -512,7 +531,6 @@ redo:
                lru = LRU_UNEVICTABLE;
                add_page_to_unevictable_list(page);
        }
-       mem_cgroup_move_lists(page, lru);
 
        /*
         * page's status can change while we move it among lru. If an evictable
@@ -547,7 +565,6 @@ void putback_lru_page(struct page *page)
 
        lru = !!TestClearPageActive(page) + page_is_file_cache(page);
        lru_cache_add_lru(page, lru);
-       mem_cgroup_move_lists(page, lru);
        put_page(page);
 }
 #endif /* CONFIG_UNEVICTABLE_LRU */
@@ -813,6 +830,7 @@ int __isolate_lru_page(struct page *page, int mode, int file)
                return ret;
 
        ret = -EBUSY;
+
        if (likely(get_page_unless_zero(page))) {
                /*
                 * Be careful not to clear PageLRU until after we're
@@ -821,6 +839,7 @@ int __isolate_lru_page(struct page *page, int mode, int file)
                 */
                ClearPageLRU(page);
                ret = 0;
+               mem_cgroup_del_lru(page);
        }
 
        return ret;
@@ -1029,6 +1048,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
        struct pagevec pvec;
        unsigned long nr_scanned = 0;
        unsigned long nr_reclaimed = 0;
+       struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
 
        pagevec_init(&pvec, 1);
 
@@ -1070,13 +1090,14 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
                __mod_zone_page_state(zone, NR_INACTIVE_ANON,
                                                -count[LRU_INACTIVE_ANON]);
 
-               if (scan_global_lru(sc)) {
+               if (scanning_global_lru(sc))
                        zone->pages_scanned += nr_scan;
-                       zone->recent_scanned[0] += count[LRU_INACTIVE_ANON];
-                       zone->recent_scanned[0] += count[LRU_ACTIVE_ANON];
-                       zone->recent_scanned[1] += count[LRU_INACTIVE_FILE];
-                       zone->recent_scanned[1] += count[LRU_ACTIVE_FILE];
-               }
+
+               reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON];
+               reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON];
+               reclaim_stat->recent_scanned[1] += count[LRU_INACTIVE_FILE];
+               reclaim_stat->recent_scanned[1] += count[LRU_ACTIVE_FILE];
+
                spin_unlock_irq(&zone->lru_lock);
 
                nr_scanned += nr_scan;
@@ -1108,7 +1129,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
                if (current_is_kswapd()) {
                        __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan);
                        __count_vm_events(KSWAPD_STEAL, nr_freed);
-               } else if (scan_global_lru(sc))
+               } else if (scanning_global_lru(sc))
                        __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan);
 
                __count_zone_vm_events(PGSTEAL, zone, nr_freed);
@@ -1134,10 +1155,9 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
                        SetPageLRU(page);
                        lru = page_lru(page);
                        add_page_to_lru_list(zone, page, lru);
-                       mem_cgroup_move_lists(page, lru);
-                       if (PageActive(page) && scan_global_lru(sc)) {
+                       if (PageActive(page)) {
                                int file = !!page_is_file_cache(page);
-                               zone->recent_rotated[file]++;
+                               reclaim_stat->recent_rotated[file]++;
                        }
                        if (!pagevec_add(&pvec, page)) {
                                spin_unlock_irq(&zone->lru_lock);
@@ -1197,6 +1217,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
        struct page *page;
        struct pagevec pvec;
        enum lru_list lru;
+       struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
 
        lru_add_drain();
        spin_lock_irq(&zone->lru_lock);
@@ -1207,10 +1228,10 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
         * zone->pages_scanned is used for detect zone's oom
         * mem_cgroup remembers nr_scan by itself.
         */
-       if (scan_global_lru(sc)) {
+       if (scanning_global_lru(sc)) {
                zone->pages_scanned += pgscanned;
-               zone->recent_scanned[!!file] += pgmoved;
        }
+       reclaim_stat->recent_scanned[!!file] += pgmoved;
 
        if (file)
                __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
@@ -1251,8 +1272,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
         * This helps balance scan pressure between file and anonymous
         * pages in get_scan_ratio.
         */
-       if (scan_global_lru(sc))
-               zone->recent_rotated[!!file] += pgmoved;
+       reclaim_stat->recent_rotated[!!file] += pgmoved;
 
        while (!list_empty(&l_inactive)) {
                page = lru_to_page(&l_inactive);
@@ -1263,7 +1283,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
                ClearPageActive(page);
 
                list_move(&page->lru, &zone->lru[lru].list);
-               mem_cgroup_move_lists(page, lru);
+               mem_cgroup_add_lru_list(page, lru);
                pgmoved++;
                if (!pagevec_add(&pvec, page)) {
                        __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
@@ -1292,6 +1312,38 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
        pagevec_release(&pvec);
 }
 
+static int inactive_anon_is_low_global(struct zone *zone)
+{
+       unsigned long active, inactive;
+
+       active = zone_page_state(zone, NR_ACTIVE_ANON);
+       inactive = zone_page_state(zone, NR_INACTIVE_ANON);
+
+       if (inactive * zone->inactive_ratio < active)
+               return 1;
+
+       return 0;
+}
+
+/**
+ * inactive_anon_is_low - check if anonymous pages need to be deactivated
+ * @zone: zone to check
+ * @sc:   scan control of this context
+ *
+ * Returns true if the zone does not have enough inactive anon pages,
+ * meaning some active anon pages need to be deactivated.
+ */
+static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
+{
+       int low;
+
+       if (scanning_global_lru(sc))
+               low = inactive_anon_is_low_global(zone);
+       else
+               low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup);
+       return low;
+}
+
 static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
        struct zone *zone, struct scan_control *sc, int priority)
 {
@@ -1302,8 +1354,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
                return 0;
        }
 
-       if (lru == LRU_ACTIVE_ANON &&
-           (!scan_global_lru(sc) || inactive_anon_is_low(zone))) {
+       if (lru == LRU_ACTIVE_ANON && inactive_anon_is_low(zone, sc)) {
                shrink_active_list(nr_to_scan, zone, sc, priority, file);
                return 0;
        }
@@ -1325,6 +1376,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
        unsigned long anon, file, free;
        unsigned long anon_prio, file_prio;
        unsigned long ap, fp;
+       struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
 
        /* If we have no swap space, do not bother scanning anon pages. */
        if (nr_swap_pages <= 0) {
@@ -1333,17 +1385,20 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
                return;
        }
 
-       anon  = zone_page_state(zone, NR_ACTIVE_ANON) +
-               zone_page_state(zone, NR_INACTIVE_ANON);
-       file  = zone_page_state(zone, NR_ACTIVE_FILE) +
-               zone_page_state(zone, NR_INACTIVE_FILE);
-       free  = zone_page_state(zone, NR_FREE_PAGES);
-
-       /* If we have very few page cache pages, force-scan anon pages. */
-       if (unlikely(file + free <= zone->pages_high)) {
-               percent[0] = 100;
-               percent[1] = 0;
-               return;
+       anon  = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) +
+               zone_nr_pages(zone, sc, LRU_INACTIVE_ANON);
+       file  = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) +
+               zone_nr_pages(zone, sc, LRU_INACTIVE_FILE);
+
+       if (scanning_global_lru(sc)) {
+               free  = zone_page_state(zone, NR_FREE_PAGES);
+               /* If we have very few page cache pages,
+                  force-scan anon pages. */
+               if (unlikely(file + free <= zone->pages_high)) {
+                       percent[0] = 100;
+                       percent[1] = 0;
+                       return;
+               }
        }
 
        /*
@@ -1357,17 +1412,17 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
         *
         * anon in [0], file in [1]
         */
-       if (unlikely(zone->recent_scanned[0] > anon / 4)) {
+       if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
                spin_lock_irq(&zone->lru_lock);
-               zone->recent_scanned[0] /= 2;
-               zone->recent_rotated[0] /= 2;
+               reclaim_stat->recent_scanned[0] /= 2;
+               reclaim_stat->recent_rotated[0] /= 2;
                spin_unlock_irq(&zone->lru_lock);
        }
 
-       if (unlikely(zone->recent_scanned[1] > file / 4)) {
+       if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) {
                spin_lock_irq(&zone->lru_lock);
-               zone->recent_scanned[1] /= 2;
-               zone->recent_rotated[1] /= 2;
+               reclaim_stat->recent_scanned[1] /= 2;
+               reclaim_stat->recent_rotated[1] /= 2;
                spin_unlock_irq(&zone->lru_lock);
        }
 
@@ -1383,11 +1438,11 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
         * proportional to the fraction of recently scanned pages on
         * each list that were recently referenced and in active use.
         */
-       ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1);
-       ap /= zone->recent_rotated[0] + 1;
+       ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1);
+       ap /= reclaim_stat->recent_rotated[0] + 1;
 
-       fp = (file_prio + 1) * (zone->recent_scanned[1] + 1);
-       fp /= zone->recent_rotated[1] + 1;
+       fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
+       fp /= reclaim_stat->recent_rotated[1] + 1;
 
        /* Normalize to percentages */
        percent[0] = 100 * ap / (ap + fp + 1);
@@ -1411,30 +1466,23 @@ static void shrink_zone(int priority, struct zone *zone,
        get_scan_ratio(zone, sc, percent);
 
        for_each_evictable_lru(l) {
-               if (scan_global_lru(sc)) {
-                       int file = is_file_lru(l);
-                       int scan;
-
-                       scan = zone_page_state(zone, NR_LRU_BASE + l);
-                       if (priority) {
-                               scan >>= priority;
-                               scan = (scan * percent[file]) / 100;
-                       }
+               int file = is_file_lru(l);
+               int scan;
+
+               scan = zone_page_state(zone, NR_LRU_BASE + l);
+               if (priority) {
+                       scan >>= priority;
+                       scan = (scan * percent[file]) / 100;
+               }
+               if (scanning_global_lru(sc)) {
                        zone->lru[l].nr_scan += scan;
                        nr[l] = zone->lru[l].nr_scan;
                        if (nr[l] >= swap_cluster_max)
                                zone->lru[l].nr_scan = 0;
                        else
                                nr[l] = 0;
-               } else {
-                       /*
-                        * This reclaim occurs not because zone memory shortage
-                        * but because memory controller hits its limit.
-                        * Don't modify zone reclaim related data.
-                        */
-                       nr[l] = mem_cgroup_calc_reclaim(sc->mem_cgroup, zone,
-                                                               priority, l);
-               }
+               } else
+                       nr[l] = scan;
        }
 
        while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -1467,9 +1515,7 @@ static void shrink_zone(int priority, struct zone *zone,
         * Even if we did not try to evict anon pages at all, we want to
         * rebalance the anon lru active/inactive ratio.
         */
-       if (!scan_global_lru(sc) || inactive_anon_is_low(zone))
-               shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
-       else if (!scan_global_lru(sc))
+       if (inactive_anon_is_low(zone, sc))
                shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
 
        throttle_vm_writeout(sc->gfp_mask);
@@ -1504,7 +1550,7 @@ static void shrink_zones(int priority, struct zonelist *zonelist,
                 * Take care memory controller reclaiming has small influence
                 * to global LRU.
                 */
-               if (scan_global_lru(sc)) {
+               if (scanning_global_lru(sc)) {
                        if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                                continue;
                        note_zone_scanning_priority(zone, priority);
@@ -1557,12 +1603,12 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 
        delayacct_freepages_start();
 
-       if (scan_global_lru(sc))
+       if (scanning_global_lru(sc))
                count_vm_event(ALLOCSTALL);
        /*
         * mem_cgroup will not do shrink_slab.
         */
-       if (scan_global_lru(sc)) {
+       if (scanning_global_lru(sc)) {
                for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
 
                        if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
@@ -1581,7 +1627,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                 * Don't shrink slabs when reclaiming memory from
                 * over limit cgroups
                 */
-               if (scan_global_lru(sc)) {
+               if (scanning_global_lru(sc)) {
                        shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
                        if (reclaim_state) {
                                sc->nr_reclaimed += reclaim_state->reclaimed_slab;
@@ -1612,7 +1658,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                        congestion_wait(WRITE, HZ/10);
        }
        /* top priority shrink_zones still had more to do? don't OOM, then */
-       if (!sc->all_unreclaimable && scan_global_lru(sc))
+       if (!sc->all_unreclaimable && scanning_global_lru(sc))
                ret = sc->nr_reclaimed;
 out:
        /*
@@ -1625,7 +1671,7 @@ out:
        if (priority < 0)
                priority = 0;
 
-       if (scan_global_lru(sc)) {
+       if (scanning_global_lru(sc)) {
                for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
 
                        if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
@@ -1661,19 +1707,24 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 
 unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
-                                               gfp_t gfp_mask)
+                                          gfp_t gfp_mask,
+                                          bool noswap,
+                                          unsigned int swappiness)
 {
        struct scan_control sc = {
                .may_writepage = !laptop_mode,
                .may_swap = 1,
                .swap_cluster_max = SWAP_CLUSTER_MAX,
-               .swappiness = vm_swappiness,
+               .swappiness = swappiness,
                .order = 0,
                .mem_cgroup = mem_cont,
                .isolate_pages = mem_cgroup_isolate_pages,
        };
        struct zonelist *zonelist;
 
+       if (noswap)
+               sc.may_swap = 0;
+
        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                        (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
        zonelist = NODE_DATA(numa_node_id())->node_zonelists;
@@ -1761,7 +1812,7 @@ loop_again:
                         * Do some background aging of the anon list, to give
                         * pages a chance to be referenced before reclaiming.
                         */
-                       if (inactive_anon_is_low(zone))
+                       if (inactive_anon_is_low(zone, &sc))
                                shrink_active_list(SWAP_CLUSTER_MAX, zone,
                                                        &sc, priority, 0);
 
@@ -2404,6 +2455,7 @@ retry:
 
                __dec_zone_state(zone, NR_UNEVICTABLE);
                list_move(&page->lru, &zone->lru[l].list);
+               mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
                __inc_zone_state(zone, NR_INACTIVE_ANON + l);
                __count_vm_event(UNEVICTABLE_PGRESCUED);
        } else {
@@ -2412,6 +2464,7 @@ retry:
                 */
                SetPageUnevictable(page);
                list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
+               mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
                if (page_evictable(page, NULL))
                        goto retry;
        }
index 76f06b94ab9f61a6e7ed6c844feeba21cae47e9a..c4a59824ac2ce45129508517ef0627e0f0fb96b1 100644 (file)
@@ -2752,7 +2752,7 @@ int __init ip6_route_init(void)
                kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
                                  SLAB_HWCACHE_ALIGN, NULL);
        if (!ip6_dst_ops_template.kmem_cachep)
-               goto out;;
+               goto out;
 
        ret = register_pernet_subsys(&ip6_route_net_ops);
        if (ret)
index 9048fe7e7ea76dd2a62baa8955da06e30577f2e1..a031034720b430d46428762fb68515dff57636dd 100644 (file)
@@ -128,7 +128,7 @@ static struct ctl_table_header *ip6_header;
 
 int ipv6_sysctl_register(void)
 {
-       int err = -ENOMEM;;
+       int err = -ENOMEM;
 
        ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_table);
        if (ip6_header == NULL)
index f3965df00559d970e4b46d871014ac9e08c8c5de..33133d27b5390673c6540b1c39eff35b460b19a9 100644 (file)
@@ -435,7 +435,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
        int i;
 
        q->perturb_timer.function = sfq_perturbation;
-       q->perturb_timer.data = (unsigned long)sch;;
+       q->perturb_timer.data = (unsigned long)sch;
        init_timer_deferrable(&q->perturb_timer);
 
        for (i = 0; i < SFQ_HASH_DIVISOR; i++)
index 20c576f530fa93ab03cfd93d50d561eac8bd831b..56935bbc1496f535e646d2e8ef03c9bddb3a2634 100644 (file)
@@ -489,7 +489,7 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
        return 0;
 
 out_err:
-       /* Clean up any successfull allocations */
+       /* Clean up any successful allocations */
        sctp_auth_destroy_hmacs(ep->auth_hmacs);
        return -ENOMEM;
 }
index 5ba78701adc3ab868079ee180b1fc2e4ffc3b95f..3aacd0fe7179b26a4e08bc7bce343dd7fbadc239 100644 (file)
@@ -513,11 +513,14 @@ int devcgroup_inode_mknod(int mode, dev_t dev)
        struct dev_cgroup *dev_cgroup;
        struct dev_whitelist_item *wh;
 
+       if (!S_ISBLK(mode) && !S_ISCHR(mode))
+               return 0;
+
        rcu_read_lock();
 
        dev_cgroup = task_devcgroup(current);
 
-       list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
+       list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) {
                if (wh->type & DEV_ALL)
                        goto acc_check;
                if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode))
index bf107a389ac14c66b8c25df297bef6d56cf08c87..71e2b914363e2cf5490fe9698b7bd76fa23eb9c9 100644 (file)
@@ -569,7 +569,7 @@ static ssize_t smk_write_cipso(struct file *file, const char __user *buf,
        if (skp == NULL)
                goto out;
 
-       rule += SMK_LABELLEN;;
+       rule += SMK_LABELLEN;
        ret = sscanf(rule, "%d", &maplevel);
        if (ret != 1 || maplevel > SMACK_CIPSO_MAXLEVEL)
                goto out;
index 74c823d60f91b4662184dcf60b76eeb2eaae4303..bc8d654576c017194428fc75b3720abc291f1466 100644 (file)
@@ -187,7 +187,7 @@ static int au1x_pcm_dbdma_realloc(struct au1xpsc_audio_dmadata *pcd,
                                        au1x_pcm_dmatx_cb, (void *)pcd);
 
        if (!pcd->ddma_chan)
-               return -ENOMEM;;
+               return -ENOMEM;
 
        au1xxx_dbdma_set_devwidth(pcd->ddma_chan, msbits);
        au1xxx_dbdma_ring_alloc(pcd->ddma_chan, 2);
index 74abc9b4f1ccd6e9277166873047499bb39c8380..366049d8578c1f10611d1e211dc66358fcd2045f 100644 (file)
@@ -212,7 +212,7 @@ davinci_pcm_pointer(struct snd_pcm_substream *substream)
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
                count = src - runtime->dma_addr;
        else
-               count = dst - runtime->dma_addr;;
+               count = dst - runtime->dma_addr;
 
        spin_unlock(&prtd->lock);