]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Jul 2008 18:26:14 +0000 (11:26 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Jul 2008 18:26:14 +0000 (11:26 -0700)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6:
  slab: rename slab_destroy_objs
  slub: current is always valid
  slub: Add check for kfree() of non slab objects.

368 files changed:
Documentation/IRQ-affinity.txt
Documentation/cputopology.txt
Documentation/feature-removal-schedule.txt
Documentation/filesystems/ext4.txt
Documentation/filesystems/gfs2-glocks.txt [new file with mode: 0644]
Documentation/filesystems/proc.txt
Documentation/i2c/busses/i2c-i810 [deleted file]
Documentation/i2c/busses/i2c-prosavage [deleted file]
Documentation/i2c/busses/i2c-savage4 [deleted file]
Documentation/i2c/fault-codes [new file with mode: 0644]
Documentation/i2c/smbus-protocol
Documentation/i2c/writing-clients
Documentation/kernel-parameters.txt
MAINTAINERS
arch/alpha/kernel/irq.c
arch/arm/kernel/stacktrace.c
arch/avr32/kernel/stacktrace.c
arch/mips/kernel/stacktrace.c
arch/mips/sibyte/swarm/Makefile
arch/mips/sibyte/swarm/swarm-i2c.c [new file with mode: 0644]
arch/powerpc/kernel/stacktrace.c
arch/s390/kernel/stacktrace.c
arch/sh/kernel/stacktrace.c
arch/sparc64/kernel/stacktrace.c
arch/x86/Kconfig
arch/x86/kernel/acpi/sleep.c
arch/x86/kernel/quirks.c
arch/x86/kernel/stacktrace.c
block/blk-core.c
block/blk-exec.c
drivers/ata/ahci.c
drivers/ata/libata-core.c
drivers/ata/libata-eh.c
drivers/ata/libata-pmp.c
drivers/ata/libata-scsi.c
drivers/ata/libata-sff.c
drivers/ata/libata.h
drivers/ata/pata_bf54x.c
drivers/ata/pata_legacy.c
drivers/ata/pata_qdi.c
drivers/ata/pata_scc.c
drivers/ata/pata_winbond.c
drivers/ata/sata_svw.c
drivers/base/topology.c
drivers/block/paride/pd.c
drivers/clocksource/acpi_pm.c
drivers/gpu/drm/drm_memory.c
drivers/gpu/drm/radeon/radeon_cp.c
drivers/i2c/algos/i2c-algo-bit.c
drivers/i2c/algos/i2c-algo-pca.c
drivers/i2c/algos/i2c-algo-pcf.c
drivers/i2c/busses/Kconfig
drivers/i2c/busses/Makefile
drivers/i2c/busses/i2c-ali1535.c
drivers/i2c/busses/i2c-ali1563.c
drivers/i2c/busses/i2c-ali15x3.c
drivers/i2c/busses/i2c-amd756-s4882.c
drivers/i2c/busses/i2c-amd756.c
drivers/i2c/busses/i2c-amd8111.c
drivers/i2c/busses/i2c-au1550.c
drivers/i2c/busses/i2c-cpm.c [new file with mode: 0644]
drivers/i2c/busses/i2c-davinci.c
drivers/i2c/busses/i2c-elektor.c
drivers/i2c/busses/i2c-gpio.c
drivers/i2c/busses/i2c-hydra.c
drivers/i2c/busses/i2c-i801.c
drivers/i2c/busses/i2c-i810.c [deleted file]
drivers/i2c/busses/i2c-ibm_iic.c
drivers/i2c/busses/i2c-iop3xx.c
drivers/i2c/busses/i2c-isch.c [new file with mode: 0644]
drivers/i2c/busses/i2c-mpc.c
drivers/i2c/busses/i2c-mv64xxx.c
drivers/i2c/busses/i2c-nforce2-s4985.c [new file with mode: 0644]
drivers/i2c/busses/i2c-nforce2.c
drivers/i2c/busses/i2c-ocores.c
drivers/i2c/busses/i2c-pasemi.c
drivers/i2c/busses/i2c-pca-platform.c
drivers/i2c/busses/i2c-piix4.c
drivers/i2c/busses/i2c-pmcmsp.c
drivers/i2c/busses/i2c-prosavage.c [deleted file]
drivers/i2c/busses/i2c-pxa.c
drivers/i2c/busses/i2c-s3c2410.c
drivers/i2c/busses/i2c-savage4.c [deleted file]
drivers/i2c/busses/i2c-sibyte.c
drivers/i2c/busses/i2c-sis5595.c
drivers/i2c/busses/i2c-sis630.c
drivers/i2c/busses/i2c-sis96x.c
drivers/i2c/busses/i2c-stub.c
drivers/i2c/busses/i2c-taos-evm.c
drivers/i2c/busses/i2c-via.c
drivers/i2c/busses/i2c-viapro.c
drivers/i2c/busses/i2c-voodoo3.c
drivers/i2c/busses/scx200_acb.c
drivers/i2c/chips/Kconfig
drivers/i2c/chips/Makefile
drivers/i2c/chips/at24.c [new file with mode: 0644]
drivers/i2c/chips/eeprom.c
drivers/i2c/chips/max6875.c
drivers/i2c/chips/pca9539.c
drivers/i2c/chips/pcf8574.c
drivers/i2c/chips/pcf8591.c
drivers/i2c/i2c-core.c
drivers/i2c/i2c-dev.c
drivers/ide/Kconfig
drivers/ide/Makefile
drivers/ide/arm/palm_bk3710.c
drivers/ide/h8300/ide-h8300.c
drivers/ide/ide-acpi.c
drivers/ide/ide-atapi.c [new file with mode: 0644]
drivers/ide/ide-cd.c
drivers/ide/ide-cd.h
drivers/ide/ide-cd_ioctl.c
drivers/ide/ide-disk.c
drivers/ide/ide-dma.c
drivers/ide/ide-floppy.c
drivers/ide/ide-io.c
drivers/ide/ide-iops.c
drivers/ide/ide-probe.c
drivers/ide/ide-tape.c
drivers/ide/ide-taskfile.c
drivers/ide/ide-timing.h
drivers/ide/ide.c
drivers/ide/legacy/ali14xx.c
drivers/ide/legacy/gayle.c
drivers/ide/legacy/ht6560b.c
drivers/ide/legacy/qd65xx.c
drivers/ide/pci/aec62xx.c
drivers/ide/pci/alim15x3.c
drivers/ide/pci/amd74xx.c
drivers/ide/pci/cmd640.c
drivers/ide/pci/cmd64x.c
drivers/ide/pci/cy82c693.c
drivers/ide/pci/hpt366.c
drivers/ide/pci/ns87415.c
drivers/ide/pci/scc_pata.c
drivers/ide/pci/sgiioc4.c
drivers/ide/pci/siimage.c
drivers/ide/pci/via82cxxx.c
drivers/ide/ppc/pmac.c
drivers/ide/setup-pci.c
drivers/infiniband/core/addr.c
drivers/infiniband/core/agent.h
drivers/infiniband/core/cache.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/device.c
drivers/infiniband/core/fmr_pool.c
drivers/infiniband/core/mad_priv.h
drivers/infiniband/core/mad_rmpp.c
drivers/infiniband/core/mad_rmpp.h
drivers/infiniband/core/packer.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/ucm.c
drivers/infiniband/core/ud_header.c
drivers/infiniband/core/umem.c
drivers/infiniband/core/user_mad.c
drivers/infiniband/core/uverbs.h
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/amso1100/c2_rnic.c
drivers/infiniband/hw/cxgb3/cxio_hal.c
drivers/infiniband/hw/cxgb3/cxio_hal.h
drivers/infiniband/hw/cxgb3/cxio_wr.h
drivers/infiniband/hw/cxgb3/iwch.c
drivers/infiniband/hw/cxgb3/iwch.h
drivers/infiniband/hw/cxgb3/iwch_cq.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb3/iwch_provider.h
drivers/infiniband/hw/cxgb3/iwch_qp.c
drivers/infiniband/hw/ehca/ehca_irq.c
drivers/infiniband/hw/ehca/ehca_main.c
drivers/infiniband/hw/ehca/ehca_reqs.c
drivers/infiniband/hw/ehca/hcp_if.c
drivers/infiniband/hw/ehca/hcp_if.h
drivers/infiniband/hw/ipath/ipath_cq.c
drivers/infiniband/hw/ipath/ipath_iba7220.c
drivers/infiniband/hw/ipath/ipath_mad.c
drivers/infiniband/hw/ipath/ipath_rc.c
drivers/infiniband/hw/ipath/ipath_ruc.c
drivers/infiniband/hw/ipath/ipath_uc.c
drivers/infiniband/hw/ipath/ipath_ud.c
drivers/infiniband/hw/ipath/ipath_verbs.c
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx4/mad.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mthca/mthca_allocator.c
drivers/infiniband/hw/mthca/mthca_av.c
drivers/infiniband/hw/mthca/mthca_catas.c
drivers/infiniband/hw/mthca/mthca_cmd.c
drivers/infiniband/hw/mthca/mthca_cmd.h
drivers/infiniband/hw/mthca/mthca_config_reg.h
drivers/infiniband/hw/mthca/mthca_cq.c
drivers/infiniband/hw/mthca/mthca_dev.h
drivers/infiniband/hw/mthca/mthca_doorbell.h
drivers/infiniband/hw/mthca/mthca_eq.c
drivers/infiniband/hw/mthca/mthca_mad.c
drivers/infiniband/hw/mthca/mthca_main.c
drivers/infiniband/hw/mthca/mthca_mcg.c
drivers/infiniband/hw/mthca/mthca_memfree.c
drivers/infiniband/hw/mthca/mthca_memfree.h
drivers/infiniband/hw/mthca/mthca_mr.c
drivers/infiniband/hw/mthca/mthca_pd.c
drivers/infiniband/hw/mthca/mthca_profile.c
drivers/infiniband/hw/mthca/mthca_profile.h
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/mthca/mthca_provider.h
drivers/infiniband/hw/mthca/mthca_qp.c
drivers/infiniband/hw/mthca/mthca_reset.c
drivers/infiniband/hw/mthca/mthca_srq.c
drivers/infiniband/hw/mthca/mthca_uar.c
drivers/infiniband/hw/mthca/mthca_user.h
drivers/infiniband/hw/mthca/mthca_wqe.h
drivers/infiniband/hw/nes/nes.c
drivers/infiniband/hw/nes/nes.h
drivers/infiniband/hw/nes/nes_cm.c
drivers/infiniband/hw/nes/nes_hw.c
drivers/infiniband/hw/nes/nes_hw.h
drivers/infiniband/hw/nes/nes_utils.c
drivers/infiniband/hw/nes/nes_verbs.c
drivers/infiniband/ulp/ipoib/Kconfig
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
drivers/infiniband/ulp/ipoib/ipoib_fs.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/infiniband/ulp/ipoib/ipoib_verbs.c
drivers/infiniband/ulp/ipoib/ipoib_vlan.c
drivers/infiniband/ulp/iser/iscsi_iser.c
drivers/infiniband/ulp/iser/iscsi_iser.h
drivers/infiniband/ulp/iser/iser_initiator.c
drivers/infiniband/ulp/iser/iser_memory.c
drivers/infiniband/ulp/iser/iser_verbs.c
drivers/infiniband/ulp/srp/ib_srp.c
drivers/infiniband/ulp/srp/ib_srp.h
drivers/net/cxgb3/cxgb3_ctl_defs.h
drivers/net/cxgb3/cxgb3_offload.c
drivers/net/cxgb3/version.h
drivers/net/mlx4/fw.c
drivers/net/mlx4/fw.h
drivers/net/mlx4/main.c
drivers/net/mlx4/mcg.c
drivers/scsi/ide-scsi.c
drivers/video/fb_ddc.c
drivers/video/intelfb/intelfb_i2c.c
drivers/video/matrox/i2c-matroxfb.c
fs/buffer.c
fs/ext4/balloc.c
fs/ext4/dir.c
fs/ext4/ext4.h
fs/ext4/ext4_extents.h
fs/ext4/ext4_i.h
fs/ext4/ext4_jbd2.h
fs/ext4/ext4_sb.h
fs/ext4/extents.c
fs/ext4/file.c
fs/ext4/fsync.c
fs/ext4/group.h
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/mballoc.c
fs/ext4/namei.c
fs/ext4/resize.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/ext4/xattr_trusted.c
fs/ext4/xattr_user.c
fs/gfs2/Kconfig
fs/gfs2/Makefile
fs/gfs2/gfs2.h
fs/gfs2/glock.c
fs/gfs2/glock.h
fs/gfs2/glops.c
fs/gfs2/incore.h
fs/gfs2/inode.c
fs/gfs2/inode.h
fs/gfs2/locking.c
fs/gfs2/locking/dlm/lock.c
fs/gfs2/locking/dlm/lock_dlm.h
fs/gfs2/locking/dlm/mount.c
fs/gfs2/locking/dlm/sysfs.c
fs/gfs2/locking/dlm/thread.c
fs/gfs2/locking/nolock/Makefile [deleted file]
fs/gfs2/locking/nolock/main.c [deleted file]
fs/gfs2/log.c
fs/gfs2/log.h
fs/gfs2/main.c
fs/gfs2/meta_io.c
fs/gfs2/meta_io.h
fs/gfs2/ops_address.c
fs/gfs2/ops_file.c
fs/gfs2/ops_fstype.c
fs/gfs2/ops_inode.c
fs/gfs2/ops_super.c
fs/gfs2/quota.c
fs/gfs2/recovery.c
fs/gfs2/rgrp.c
fs/gfs2/super.c
fs/gfs2/sys.c
fs/jbd2/checkpoint.c
fs/jbd2/commit.c
fs/jbd2/journal.c
fs/jbd2/transaction.c
fs/jfs/jfs_debug.c
fs/jfs/jfs_debug.h
fs/jfs/jfs_dtree.h
fs/jfs/jfs_imap.c
fs/jfs/jfs_logmgr.c
fs/jfs/jfs_metapage.c
fs/jfs/jfs_txnmgr.c
fs/jfs/jfs_xtree.c
fs/jfs/namei.c
fs/jfs/super.c
fs/mpage.c
include/asm-x86/dwarf2.h
include/drm/drmP.h
include/linux/blkdev.h
include/linux/fs.h
include/linux/i2c-algo-pcf.h
include/linux/i2c-id.h
include/linux/i2c.h
include/linux/i2c/at24.h [new file with mode: 0644]
include/linux/ide.h
include/linux/interrupt.h
include/linux/irq.h
include/linux/jbd2.h
include/linux/libata.h
include/linux/lm_interface.h
include/linux/mlx4/device.h
include/linux/mpage.h
include/linux/percpu_counter.h
include/linux/topology.h
include/linux/writeback.h
include/rdma/ib_addr.h
include/rdma/ib_cache.h
include/rdma/ib_cm.h
include/rdma/ib_fmr_pool.h
include/rdma/ib_mad.h
include/rdma/ib_pack.h
include/rdma/ib_sa.h
include/rdma/ib_smi.h
include/rdma/ib_user_cm.h
include/rdma/ib_user_mad.h
include/rdma/ib_user_verbs.h
include/rdma/ib_verbs.h
include/rdma/iw_cm.h
include/rdma/rdma_cm.h
include/rdma/rdma_cm_ib.h
kernel/backtracetest.c
kernel/hrtimer.c
kernel/irq/manage.c
kernel/irq/proc.c
kernel/posix-cpu-timers.c
kernel/stacktrace.c
kernel/time/tick-sched.c
lib/Kconfig.debug
lib/percpu_counter.c
mm/filemap.c
mm/page-writeback.c
security/selinux/hooks.c
security/selinux/include/security.h
security/selinux/ss/services.c

index 938d7dd054905edfae6d9bc76ca27ffdfbd42629..b4a615b78403a48f02252b90a41fabb1392fefe4 100644 (file)
@@ -1,17 +1,26 @@
+ChangeLog:
+       Started by Ingo Molnar <mingo@redhat.com>
+       Update by Max Krasnyansky <maxk@qualcomm.com>
 
-SMP IRQ affinity, started by Ingo Molnar <mingo@redhat.com>
-
+SMP IRQ affinity
 
 /proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted
 for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed
 to turn off all CPUs, and if an IRQ controller does not support IRQ
 affinity then the value will not change from the default 0xffffffff.
 
+/proc/irq/default_smp_affinity specifies default affinity mask that applies
+to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask
+will be set to the default mask. It can then be changed as described above.
+Default mask is 0xffffffff.
+
 Here is an example of restricting IRQ44 (eth1) to CPU0-3 then restricting
-the IRQ to CPU4-7 (this is an 8-CPU SMP box):
+it to CPU4-7 (this is an 8-CPU SMP box):
 
+[root@moon 44]# cd /proc/irq/44
 [root@moon 44]# cat smp_affinity
 ffffffff
+
 [root@moon 44]# echo 0f > smp_affinity
 [root@moon 44]# cat smp_affinity
 0000000f
@@ -21,17 +30,27 @@ PING hell (195.4.7.3): 56 data bytes
 --- hell ping statistics ---
 6029 packets transmitted, 6027 packets received, 0% packet loss
 round-trip min/avg/max = 0.1/0.1/0.4 ms
-[root@moon 44]# cat /proc/interrupts | grep 44:
- 44:          0       1785       1785       1783       1783          1
-1          0   IO-APIC-level  eth1
+[root@moon 44]# cat /proc/interrupts | grep 'CPU\|44:'
+           CPU0       CPU1       CPU2       CPU3      CPU4       CPU5        CPU6       CPU7
+ 44:       1068       1785       1785       1783         0          0           0         0    IO-APIC-level  eth1
+
+As can be seen from the line above IRQ44 was delivered only to the first four
+processors (0-3).
+Now lets restrict that IRQ to CPU(4-7).
+
 [root@moon 44]# echo f0 > smp_affinity
+[root@moon 44]# cat smp_affinity
+000000f0
 [root@moon 44]# ping -f h
 PING hell (195.4.7.3): 56 data bytes
 ..
 --- hell ping statistics ---
 2779 packets transmitted, 2777 packets received, 0% packet loss
 round-trip min/avg/max = 0.1/0.5/585.4 ms
-[root@moon 44]# cat /proc/interrupts | grep 44:
- 44:       1068       1785       1785       1784       1784       1069       1070       1069   IO-APIC-level  eth1
-[root@moon 44]#
+[root@moon 44]# cat /proc/interrupts |  'CPU\|44:'
+           CPU0       CPU1       CPU2       CPU3      CPU4       CPU5        CPU6       CPU7
+ 44:       1068       1785       1785       1783      1784       1069        1070       1069   IO-APIC-level  eth1
+
+This time around IRQ44 was delivered only to the last four processors.
+i.e counters for the CPU0-3 did not change.
 
index b61cb9564023b6dc878246fac760280c670d5ccf..bd699da246666a503195ed0d8240fb531b90f746 100644 (file)
@@ -14,9 +14,8 @@ represent the thread siblings to cpu X in the same physical package;
 To implement it in an architecture-neutral way, a new source file,
 drivers/base/topology.c, is to export the 4 attributes.
 
-If one architecture wants to support this feature, it just needs to
-implement 4 defines, typically in file include/asm-XXX/topology.h.
-The 4 defines are:
+For an architecture to support this feature, it must define some of
+these macros in include/asm-XXX/topology.h:
 #define topology_physical_package_id(cpu)
 #define topology_core_id(cpu)
 #define topology_thread_siblings(cpu)
@@ -25,17 +24,10 @@ The 4 defines are:
 The type of **_id is int.
 The type of siblings is cpumask_t.
 
-To be consistent on all architectures, the 4 attributes should have
-default values if their values are unavailable. Below is the rule.
-1) physical_package_id: If cpu has no physical package id, -1 is the
-default value.
-2) core_id: If cpu doesn't support multi-core, its core id is 0.
-3) thread_siblings: Just include itself, if the cpu doesn't support
-HT/multi-thread.
-4) core_siblings: Just include itself, if the cpu doesn't support
-multi-core and HT/Multi-thread.
-
-So be careful when declaring the 4 defines in include/asm-XXX/topology.h.
-
-If an attribute isn't defined on an architecture, it won't be exported.
-
+To be consistent on all architectures, include/linux/topology.h
+provides default definitions for any of the above macros that are
+not defined by include/asm-XXX/topology.h:
+1) physical_package_id: -1
+2) core_id: 0
+3) thread_siblings: just the given CPU
+4) core_siblings: just the given CPU
index 46ece3fba6f94c2924c94d6f96b4402984705102..65a1482457a89ec9d6c5beec89464bc049f3f5e2 100644 (file)
@@ -222,13 +222,6 @@ Who:       Thomas Gleixner <tglx@linutronix.de>
 
 ---------------------------
 
-What:  i2c-i810, i2c-prosavage and i2c-savage4
-When:  May 2008
-Why:   These drivers are superseded by i810fb, intelfb and savagefb.
-Who:   Jean Delvare <khali@linux-fr.org>
-
----------------------------
-
 What (Why):
        - include/linux/netfilter_ipv4/ipt_TOS.h ipt_tos.h header files
          (superseded by xt_TOS/xt_tos target & match)
index 0c5086db835262828edd6b377bee4affb4b6da6f..80e193d82e2e10415cc828124cec296855398aeb 100644 (file)
@@ -13,72 +13,93 @@ Mailing list: linux-ext4@vger.kernel.org
 1. Quick usage instructions:
 ===========================
 
-  - Grab updated e2fsprogs from
-    ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs-interim/
-    This is a patchset on top of e2fsprogs-1.39, which can be found at
+  - Compile and install the latest version of e2fsprogs (as of this
+    writing version 1.41) from:
+
+    http://sourceforge.net/project/showfiles.php?group_id=2406
+       
+       or
+
     ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs/
 
-  - It's still mke2fs -j /dev/hda1
+       or grab the latest git repository from:
+
+    git://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git
+
+  - Create a new filesystem using the ext4dev filesystem type:
+
+       # mke2fs -t ext4dev /dev/hda1
+
+    Or configure an existing ext3 filesystem to support extents and set
+    the test_fs flag to indicate that it's ok for an in-development
+    filesystem to touch this filesystem:
 
-  - mount /dev/hda1 /wherever -t ext4dev
+       # tune2fs -O extents -E test_fs /dev/hda1
 
-  - To enable extents,
+    If the filesystem was created with 128 byte inodes, it can be
+    converted to use 256 byte for greater efficiency via:
 
-       mount /dev/hda1 /wherever -t ext4dev -o extents
+        # tune2fs -I 256 /dev/hda1
 
-  - The filesystem is compatible with the ext3 driver until you add a file
-    which has extents (ie: `mount -o extents', then create a file).
+    (Note: we currently do not have tools to convert an ext4dev
+    filesystem back to ext3; so please do not do try this on production
+    filesystems.)
 
-    NOTE: The "extents" mount flag is temporary.  It will soon go away and
-    extents will be enabled by the "-o extents" flag to mke2fs or tune2fs
+  - Mounting:
+
+       # mount -t ext4dev /dev/hda1 /wherever
 
   - When comparing performance with other filesystems, remember that
-    ext3/4 by default offers higher data integrity guarantees than most.  So
-    when comparing with a metadata-only journalling filesystem, use `mount -o
-    data=writeback'.  And you might as well use `mount -o nobh' too along
-    with it.  Making the journal larger than the mke2fs default often helps
-    performance with metadata-intensive workloads.
+    ext3/4 by default offers higher data integrity guarantees than most.
+    So when comparing with a metadata-only journalling filesystem, such
+    as ext3, use `mount -o data=writeback'.  And you might as well use
+    `mount -o nobh' too along with it.  Making the journal larger than
+    the mke2fs default often helps performance with metadata-intensive
+    workloads.
 
 2. Features
 ===========
 
 2.1 Currently available
 
-* ability to use filesystems > 16TB
+* ability to use filesystems > 16TB (e2fsprogs support not available yet)
 * extent format reduces metadata overhead (RAM, IO for access, transactions)
 * extent format more robust in face of on-disk corruption due to magics,
 * internal redunancy in tree
-
-2.1 Previously available, soon to be enabled by default by "mkefs.ext4":
-
-* dir_index and resize inode will be on by default
-* large inodes will be used by default for fast EAs, nsec timestamps, etc
+* improved file allocation (multi-block alloc)
+* fix 32000 subdirectory limit
+* nsec timestamps for mtime, atime, ctime, create time
+* inode version field on disk (NFSv4, Lustre)
+* reduced e2fsck time via uninit_bg feature
+* journal checksumming for robustness, performance
+* persistent file preallocation (e.g for streaming media, databases)
+* ability to pack bitmaps and inode tables into larger virtual groups via the
+  flex_bg feature
+* large file support
+* Inode allocation using large virtual block groups via flex_bg
+* delayed allocation
+* large block (up to pagesize) support
+* efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force
+  the ordering)
 
 2.2 Candidate features for future inclusion
 
-There are several under discussion, whether they all make it in is
-partly a function of how much time everyone has to work on them:
+* Online defrag (patches available but not well tested)
+* reduced mke2fs time via lazy itable initialization in conjuction with
+  the uninit_bg feature (capability to do this is available in e2fsprogs
+  but a kernel thread to do lazy zeroing of unused inode table blocks
+  after filesystem is first mounted is required for safety)
 
-* improved file allocation (multi-block alloc, delayed alloc; basically done)
-* fix 32000 subdirectory limit (patch exists, needs some e2fsck work)
-* nsec timestamps for mtime, atime, ctime, create time (patch exists,
-  needs some e2fsck work)
-* inode version field on disk (NFSv4, Lustre; prototype exists)
-* reduced mke2fs/e2fsck time via uninitialized groups (prototype exists)
-* journal checksumming for robustness, performance (prototype exists)
-* persistent file preallocation (e.g for streaming media, databases)
+There are several others under discussion, whether they all make it in is
+partly a function of how much time everyone has to work on them. Features like
+metadata checksumming have been discussed and planned for a bit but no patches
+exist yet so I'm not sure they're in the near-term roadmap.
 
-Features like metadata checksumming have been discussed and planned for
-a bit but no patches exist yet so I'm not sure they're in the near-term
-roadmap.
+The big performance win will come with mballoc, delalloc and flex_bg
+grouping of bitmaps and inode tables.  Some test results available here:
 
-The big performance win will come with mballoc and delalloc.  CFS has
-been using mballoc for a few years already with Lustre, and IBM + Bull
-did a lot of benchmarking on it.  The reason it isn't in the first set of
-patches is partly a manageability issue, and partly because it doesn't
-directly affect the on-disk format (outside of much better allocation)
-so it isn't critical to get into the first round of changes.  I believe
-Alex is working on a new set of patches right now.
+ - http://www.bullopensource.org/ext4/20080530/ffsb-write-2.6.26-rc2.html
+ - http://www.bullopensource.org/ext4/20080530/ffsb-readwrite-2.6.26-rc2.html
 
 3. Options
 ==========
@@ -222,9 +243,11 @@ stripe=n           Number of filesystem blocks that mballoc will try
                        to use for allocation size and alignment. For RAID5/6
                        systems this should be the number of data
                        disks *  RAID chunk size in file system blocks.
-
+delalloc       (*)     Deferring block allocation until write-out time.
+nodelalloc             Disable delayed allocation. Blocks are allocation
+                       when data is copied from user to page cache.
 Data Mode
----------
+=========
 There are 3 different data modes:
 
 * writeback mode
@@ -236,10 +259,10 @@ typically provide the best ext4 performance.
 
 * ordered mode
 In data=ordered mode, ext4 only officially journals metadata, but it logically
-groups metadata and data blocks into a single unit called a transaction.  When
-it's time to write the new metadata out to disk, the associated data blocks
-are written first.  In general, this mode performs slightly slower than
-writeback but significantly faster than journal mode.
+groups metadata information related to data changes with the data blocks into a
+single unit called a transaction.  When it's time to write the new metadata
+out to disk, the associated data blocks are written first.  In general,
+this mode performs slightly slower than writeback but significantly faster than journal mode.
 
 * journal mode
 data=journal mode provides full data and metadata journaling.  All new data is
@@ -247,7 +270,8 @@ written to the journal first, and then to its final location.
 In the event of a crash, the journal can be replayed, bringing both data and
 metadata into a consistent state.  This mode is the slowest except when data
 needs to be read from and written to disk at the same time where it
-outperforms all others modes.
+outperforms all others modes.  Curently ext4 does not have delayed
+allocation support if this data journalling mode is selected.
 
 References
 ==========
@@ -256,7 +280,8 @@ kernel source:      <file:fs/ext4/>
                <file:fs/jbd2/>
 
 programs:      http://e2fsprogs.sourceforge.net/
-               http://ext2resize.sourceforge.net
 
 useful links:  http://fedoraproject.org/wiki/ext3-devel
                http://www.bullopensource.org/ext4/
+               http://ext4.wiki.kernel.org/index.php/Main_Page
+               http://fedoraproject.org/wiki/Features/Ext4
diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.txt
new file mode 100644 (file)
index 0000000..4dae9a3
--- /dev/null
@@ -0,0 +1,114 @@
+                   Glock internal locking rules
+                  ------------------------------
+
+This documents the basic principles of the glock state machine
+internals. Each glock (struct gfs2_glock in fs/gfs2/incore.h)
+has two main (internal) locks:
+
+ 1. A spinlock (gl_spin) which protects the internal state such
+    as gl_state, gl_target and the list of holders (gl_holders)
+ 2. A non-blocking bit lock, GLF_LOCK, which is used to prevent other
+    threads from making calls to the DLM, etc. at the same time. If a
+    thread takes this lock, it must then call run_queue (usually via the
+    workqueue) when it releases it in order to ensure any pending tasks
+    are completed.
+
+The gl_holders list contains all the queued lock requests (not
+just the holders) associated with the glock. If there are any
+held locks, then they will be contiguous entries at the head
+of the list. Locks are granted in strictly the order that they
+are queued, except for those marked LM_FLAG_PRIORITY which are
+used only during recovery, and even then only for journal locks.
+
+There are three lock states that users of the glock layer can request,
+namely shared (SH), deferred (DF) and exclusive (EX). Those translate
+to the following DLM lock modes:
+
+Glock mode    | DLM lock mode
+------------------------------
+    UN        |    IV/NL  Unlocked (no DLM lock associated with glock) or NL
+    SH        |    PR     (Protected read)
+    DF        |    CW     (Concurrent write)
+    EX        |    EX     (Exclusive)
+
+Thus DF is basically a shared mode which is incompatible with the "normal"
+shared lock mode, SH. In GFS2 the DF mode is used exclusively for direct I/O
+operations. The glocks are basically a lock plus some routines which deal
+with cache management. The following rules apply for the cache:
+
+Glock mode   |  Cache data | Cache Metadata | Dirty Data | Dirty Metadata
+--------------------------------------------------------------------------
+    UN       |     No      |       No       |     No     |      No
+    SH       |     Yes     |       Yes      |     No     |      No
+    DF       |     No      |       Yes      |     No     |      No
+    EX       |     Yes     |       Yes      |     Yes    |      Yes
+
+These rules are implemented using the various glock operations which
+are defined for each type of glock. Not all types of glocks use
+all the modes. Only inode glocks use the DF mode for example.
+
+Table of glock operations and per type constants:
+
+Field            | Purpose
+----------------------------------------------------------------------------
+go_xmote_th      | Called before remote state change (e.g. to sync dirty data)
+go_xmote_bh      | Called after remote state change (e.g. to refill cache)
+go_inval         | Called if remote state change requires invalidating the cache
+go_demote_ok     | Returns boolean value of whether its ok to demote a glock
+                 | (e.g. checks timeout, and that there is no cached data)
+go_lock          | Called for the first local holder of a lock
+go_unlock        | Called on the final local unlock of a lock
+go_dump          | Called to print content of object for debugfs file, or on
+                 | error to dump glock to the log.
+go_type;         | The type of the glock, LM_TYPE_.....
+go_min_hold_time | The minimum hold time
+
+The minimum hold time for each lock is the time after a remote lock
+grant for which we ignore remote demote requests. This is in order to
+prevent a situation where locks are being bounced around the cluster
+from node to node with none of the nodes making any progress. This
+tends to show up most with shared mmaped files which are being written
+to by multiple nodes. By delaying the demotion in response to a
+remote callback, that gives the userspace program time to make
+some progress before the pages are unmapped.
+
+There is a plan to try and remove the go_lock and go_unlock callbacks
+if possible, in order to try and speed up the fast path though the locking.
+Also, eventually we hope to make the glock "EX" mode locally shared
+such that any local locking will be done with the i_mutex as required
+rather than via the glock.
+
+Locking rules for glock operations:
+
+Operation     |  GLF_LOCK bit lock held |  gl_spin spinlock held
+-----------------------------------------------------------------
+go_xmote_th   |       Yes               |       No
+go_xmote_bh   |       Yes               |       No
+go_inval      |       Yes               |       No
+go_demote_ok  |       Sometimes         |       Yes
+go_lock       |       Yes               |       No
+go_unlock     |       Yes               |       No
+go_dump       |       Sometimes         |       Yes
+
+N.B. Operations must not drop either the bit lock or the spinlock
+if its held on entry. go_dump and do_demote_ok must never block.
+Note that go_dump will only be called if the glock's state
+indicates that it is caching uptodate data.
+
+Glock locking order within GFS2:
+
+ 1. i_mutex (if required)
+ 2. Rename glock (for rename only)
+ 3. Inode glock(s)
+    (Parents before children, inodes at "same level" with same parent in
+     lock number order)
+ 4. Rgrp glock(s) (for (de)allocation operations)
+ 5. Transaction glock (via gfs2_trans_begin) for non-read operations
+ 6. Page lock  (always last, very important!)
+
+There are two glocks per inode. One deals with access to the inode
+itself (locking order as above), and the other, known as the iopen
+glock is used in conjunction with the i_nlink field in the inode to
+determine the lifetime of the inode in question. Locking of inodes
+is on a per-inode basis. Locking of rgrps is on a per rgrp basis.
+
index dbc3c6a3650f5c29b24000d69e1c9cb7973b4988..7f268f327d750e725f1e5ca5ddfe99dcb8014cd5 100644 (file)
@@ -380,28 +380,35 @@ i386 and x86_64 platforms support the new IRQ vector displays.
 Of some interest is the introduction of the /proc/irq directory to 2.4.
 It could be used to set IRQ to CPU affinity, this means that you can "hook" an
 IRQ to only one CPU, or to exclude a CPU of handling IRQs. The contents of the
-irq subdir is one subdir for each IRQ, and one file; prof_cpu_mask
+irq subdir is one subdir for each IRQ, and two files; default_smp_affinity and
+prof_cpu_mask.
 
 For example 
   > ls /proc/irq/
   0  10  12  14  16  18  2  4  6  8  prof_cpu_mask
-  1  11  13  15  17  19  3  5  7  9
+  1  11  13  15  17  19  3  5  7  9  default_smp_affinity
   > ls /proc/irq/0/
   smp_affinity
 
-The contents of the prof_cpu_mask file and each smp_affinity file for each IRQ
-is the same by default:
+smp_affinity is a bitmask, in which you can specify which CPUs can handle the
+IRQ, you can set it by doing:
 
-  > cat /proc/irq/0/smp_affinity 
-  ffffffff
+  > echo 1 > /proc/irq/10/smp_affinity
+
+This means that only the first CPU will handle the IRQ, but you can also echo
+5 which means that only the first and fourth CPU can handle the IRQ.
 
-It's a bitmask, in which you can specify which CPUs can handle the IRQ, you can
-set it by doing:
+The contents of each smp_affinity file is the same by default:
+
+  > cat /proc/irq/0/smp_affinity
+  ffffffff
 
-  > echo 1 > /proc/irq/prof_cpu_mask
+The default_smp_affinity mask applies to all non-active IRQs, which are the
+IRQs which have not yet been allocated/activated, and hence which lack a
+/proc/irq/[0-9]* directory.
 
-This means that only the first CPU will handle the IRQ, but you can also echo 5
-which means that only the first and fourth CPU can handle the IRQ.
+prof_cpu_mask specifies which CPUs are to be profiled by the system wide
+profiler. Default value is ffffffff (all cpus).
 
 The way IRQs are routed is handled by the IO-APIC, and it's Round Robin
 between all the CPUs which are allowed to handle it. As usual the kernel has
diff --git a/Documentation/i2c/busses/i2c-i810 b/Documentation/i2c/busses/i2c-i810
deleted file mode 100644 (file)
index 778210e..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-Kernel driver i2c-i810
-
-Supported adapters:
-  * Intel 82810, 82810-DC100, 82810E, and 82815 (GMCH)
-  * Intel 82845G (GMCH)
-
-Authors: 
-       Frodo Looijaard <frodol@dds.nl>, 
-       Philip Edelbrock <phil@netroedge.com>,
-        Kyösti Mälkki <kmalkki@cc.hut.fi>,
-       Ralph Metzler <rjkm@thp.uni-koeln.de>,
-       Mark D. Studebaker <mdsxyz123@yahoo.com>
-
-Main contact: Mark Studebaker <mdsxyz123@yahoo.com>
-
-Description 
------------ 
-
-WARNING: If you have an '810' or '815' motherboard, your standard I2C
-temperature sensors are most likely on the 801's I2C bus. You want the
-i2c-i801 driver for those, not this driver.
-
-Now for the i2c-i810...
-
-The GMCH chip contains two I2C interfaces.
-
-The first interface is used for DDC (Data Display Channel) which is a
-serial channel through the VGA monitor connector to a DDC-compliant
-monitor. This interface is defined by the Video Electronics Standards
-Association (VESA). The standards are available for purchase at
-http://www.vesa.org .
-
-The second interface is a general-purpose I2C bus. It may be connected to a
-TV-out chip such as the BT869 or possibly to a digital flat-panel display.
-
-Features
--------- 
-
-Both busses use the i2c-algo-bit driver for 'bit banging'
-and support for specific transactions is provided by i2c-algo-bit.
-
-Issues
-------
-
-If you enable bus testing in i2c-algo-bit (insmod i2c-algo-bit bit_test=1),
-the test may fail; if so, the i2c-i810 driver won't be inserted. However,
-we think this has been fixed.
diff --git a/Documentation/i2c/busses/i2c-prosavage b/Documentation/i2c/busses/i2c-prosavage
deleted file mode 100644 (file)
index 7036879..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-Kernel driver i2c-prosavage
-
-Supported adapters:
-       
-       S3/VIA KM266/VT8375 aka ProSavage8 
-       S3/VIA KM133/VT8365 aka Savage4 
-
-Author: Henk Vergonet <henk@god.dyndns.org>
-
-Description
------------
-
-The Savage4 chips contain two I2C interfaces (aka a I2C 'master' or
-'host'). 
-
-The first interface is used for DDC (Data Display Channel) which is a
-serial channel through the VGA monitor connector to a DDC-compliant
-monitor. This interface is defined by the Video Electronics Standards
-Association (VESA). The standards are available for purchase at
-http://www.vesa.org . The second interface is a general-purpose I2C bus.
-
-Usefull for gaining access to the TV Encoder chips.
-
diff --git a/Documentation/i2c/busses/i2c-savage4 b/Documentation/i2c/busses/i2c-savage4
deleted file mode 100644 (file)
index 6ecceab..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-Kernel driver i2c-savage4
-
-Supported adapters:
-  * Savage4
-  * Savage2000
-
-Authors: 
-       Alexander Wold <awold@bigfoot.com>,
-       Mark D. Studebaker <mdsxyz123@yahoo.com> 
-
-Description
------------
-
-The Savage4 chips contain two I2C interfaces (aka a I2C 'master'
-or 'host'). 
-
-The first interface is used for DDC (Data Display Channel) which is a
-serial channel through the VGA monitor connector to a DDC-compliant
-monitor. This interface is defined by the Video Electronics Standards
-Association (VESA). The standards are available for purchase at
-http://www.vesa.org . The DDC bus is not yet supported because its register
-is not directly memory-mapped.
-
-The second interface is a general-purpose I2C bus. This is the only
-interface supported by the driver at the moment.
-
diff --git a/Documentation/i2c/fault-codes b/Documentation/i2c/fault-codes
new file mode 100644 (file)
index 0000000..045765c
--- /dev/null
@@ -0,0 +1,127 @@
+This is a summary of the most important conventions for use of fault
+codes in the I2C/SMBus stack.
+
+
+A "Fault" is not always an "Error"
+----------------------------------
+Not all fault reports imply errors; "page faults" should be a familiar
+example.  Software often retries idempotent operations after transient
+faults.  There may be fancier recovery schemes that are appropriate in
+some cases, such as re-initializing (and maybe resetting).  After such
+recovery, triggered by a fault report, there is no error.
+
+In a similar way, sometimes a "fault" code just reports one defined
+result for an operation ... it doesn't indicate that anything is wrong
+at all, just that the outcome wasn't on the "golden path".
+
+In short, your I2C driver code may need to know these codes in order
+to respond correctly.  Other code may need to rely on YOUR code reporting
+the right fault code, so that it can (in turn) behave correctly.
+
+
+I2C and SMBus fault codes
+-------------------------
+These are returned as negative numbers from most calls, with zero or
+some positive number indicating a non-fault return.  The specific
+numbers associated with these symbols differ between architectures,
+though most Linux systems use <asm-generic/errno*.h> numbering.
+
+Note that the descriptions here are not exhaustive.  There are other
+codes that may be returned, and other cases where these codes should
+be returned.  However, drivers should not return other codes for these
+cases (unless the hardware doesn't provide unique fault reports).
+
+Also, codes returned by adapter probe methods follow rules which are
+specific to their host bus (such as PCI, or the platform bus).
+
+
+EAGAIN
+       Returned by I2C adapters when they lose arbitration in master
+       transmit mode:  some other master was transmitting different
+       data at the same time.
+
+       Also returned when trying to invoke an I2C operation in an
+       atomic context, when some task is already using that I2C bus
+       to execute some other operation.
+
+EBADMSG
+       Returned by SMBus logic when an invalid Packet Error Code byte
+       is received.  This code is a CRC covering all bytes in the
+       transaction, and is sent before the terminating STOP.  This
+       fault is only reported on read transactions; the SMBus slave
+       may have a way to report PEC mismatches on writes from the
+       host.  Note that even if PECs are in use, you should not rely
+       on these as the only way to detect incorrect data transfers.
+
+EBUSY
+       Returned by SMBus adapters when the bus was busy for longer
+       than allowed.  This usually indicates some device (maybe the
+       SMBus adapter) needs some fault recovery (such as resetting),
+       or that the reset was attempted but failed.
+
+EINVAL
+       This rather vague error means an invalid parameter has been
+       detected before any I/O operation was started.  Use a more
+       specific fault code when you can.
+
+       One example would be a driver trying an SMBus Block Write
+       with block size outside the range of 1-32 bytes.
+
+EIO
+       This rather vague error means something went wrong when
+       performing an I/O operation.  Use a more specific fault
+       code when you can.
+
+ENODEV
+       Returned by driver probe() methods.  This is a bit more
+       specific than ENXIO, implying the problem isn't with the
+       address, but with the device found there.  Driver probes
+       may verify the device returns *correct* responses, and
+       return this as appropriate.  (The driver core will warn
+       about probe faults other than ENXIO and ENODEV.)
+
+ENOMEM
+       Returned by any component that can't allocate memory when
+       it needs to do so.
+
+ENXIO
+       Returned by I2C adapters to indicate that the address phase
+       of a transfer didn't get an ACK.  While it might just mean
+       an I2C device was temporarily not responding, usually it
+       means there's nothing listening at that address.
+
+       Returned by driver probe() methods to indicate that they
+       found no device to bind to.  (ENODEV may also be used.)
+
+EOPNOTSUPP
+       Returned by an adapter when asked to perform an operation
+       that it doesn't, or can't, support.
+
+       For example, this would be returned when an adapter that
+       doesn't support SMBus block transfers is asked to execute
+       one.  In that case, the driver making that request should
+       have verified that functionality was supported before it
+       made that block transfer request.
+
+       Similarly, if an I2C adapter can't execute all legal I2C
+       messages, it should return this when asked to perform a
+       transaction it can't.  (These limitations can't be seen in
+       the adapter's functionality mask, since the assumption is
+       that if an adapter supports I2C it supports all of I2C.)
+
+EPROTO
+       Returned when slave does not conform to the relevant I2C
+       or SMBus (or chip-specific) protocol specifications.  One
+       case is when the length of an SMBus block data response
+       (from the SMBus slave) is outside the range 1-32 bytes.
+
+ETIMEDOUT
+       This is returned by drivers when an operation took too much
+       time, and was aborted before it completed.
+
+       SMBus adapters may return it when an operation took more
+       time than allowed by the SMBus specification; for example,
+       when a slave stretches clocks too far.  I2C has no such
+       timeouts, but it's normal for I2C adapters to impose some
+       arbitrary limits (much longer than SMBus!) too.
+
index 03f08fb491ccca6edcbcc6ce6fd1e0dda6eae2ad..24bfb65da17dbe53db1d6f8c98562ff9c85ce5ea 100644 (file)
@@ -42,8 +42,8 @@ Count (8 bits): A data byte containing the length of a block operation.
 [..]: Data sent by I2C device, as opposed to data sent by the host adapter.
 
 
-SMBus Quick Command:  i2c_smbus_write_quick()
-=============================================
+SMBus Quick Command
+===================
 
 This sends a single bit to the device, at the place of the Rd/Wr bit.
 
index d4cd4126d1adeca025e76a2511b0881cd6d0c00c..6b61b3a2e90bee2b7cbed065160bd029db89ba08 100644 (file)
@@ -44,6 +44,10 @@ static struct i2c_driver foo_driver = {
        .id_table       = foo_ids,
        .probe          = foo_probe,
        .remove         = foo_remove,
+       /* if device autodetection is needed: */
+       .class          = I2C_CLASS_SOMETHING,
+       .detect         = foo_detect,
+       .address_data   = &addr_data,
 
        /* else, driver uses "legacy" binding model: */
        .attach_adapter = foo_attach_adapter,
@@ -217,6 +221,31 @@ in the I2C bus driver. You may want to save the returned i2c_client
 reference for later use.
 
 
+Device Detection (Standard driver model)
+----------------------------------------
+
+Sometimes you do not know in advance which I2C devices are connected to
+a given I2C bus.  This is for example the case of hardware monitoring
+devices on a PC's SMBus.  In that case, you may want to let your driver
+detect supported devices automatically.  This is how the legacy model
+was working, and is now available as an extension to the standard
+driver model (so that we can finally get rid of the legacy model.)
+
+You simply have to define a detect callback which will attempt to
+identify supported devices (returning 0 for supported ones and -ENODEV
+for unsupported ones), a list of addresses to probe, and a device type
+(or class) so that only I2C buses which may have that type of device
+connected (and not otherwise enumerated) will be probed.  The i2c
+core will then call you back as needed and will instantiate a device
+for you for every successful detection.
+
+Note that this mechanism is purely optional and not suitable for all
+devices.  You need some reliable way to identify the supported devices
+(typically using device-specific, dedicated identification registers),
+otherwise misdetections are likely to occur and things can get wrong
+quickly.
+
+
 Device Deletion (Standard driver model)
 ---------------------------------------
 
@@ -569,7 +598,6 @@ SMBus communication
   in terms of it. Never use this function directly!
 
 
-  extern s32 i2c_smbus_write_quick(struct i2c_client * client, u8 value);
   extern s32 i2c_smbus_read_byte(struct i2c_client * client);
   extern s32 i2c_smbus_write_byte(struct i2c_client * client, u8 value);
   extern s32 i2c_smbus_read_byte_data(struct i2c_client * client, u8 command);
@@ -578,30 +606,31 @@ SMBus communication
   extern s32 i2c_smbus_read_word_data(struct i2c_client * client, u8 command);
   extern s32 i2c_smbus_write_word_data(struct i2c_client * client,
                                        u8 command, u16 value);
+  extern s32 i2c_smbus_read_block_data(struct i2c_client * client,
+                                       u8 command, u8 *values);
   extern s32 i2c_smbus_write_block_data(struct i2c_client * client,
                                         u8 command, u8 length,
                                         u8 *values);
   extern s32 i2c_smbus_read_i2c_block_data(struct i2c_client * client,
                                            u8 command, u8 length, u8 *values);
-
-These ones were removed in Linux 2.6.10 because they had no users, but could
-be added back later if needed:
-
-  extern s32 i2c_smbus_read_block_data(struct i2c_client * client,
-                                       u8 command, u8 *values);
   extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client * client,
                                             u8 command, u8 length,
                                             u8 *values);
+
+These ones were removed from i2c-core because they had no users, but could
+be added back later if needed:
+
+  extern s32 i2c_smbus_write_quick(struct i2c_client * client, u8 value);
   extern s32 i2c_smbus_process_call(struct i2c_client * client,
                                     u8 command, u16 value);
   extern s32 i2c_smbus_block_process_call(struct i2c_client *client,
                                           u8 command, u8 length,
                                           u8 *values)
 
-All these transactions return -1 on failure. The 'write' transactions 
-return 0 on success; the 'read' transactions return the read value, except 
-for read_block, which returns the number of values read. The block buffers 
-need not be longer than 32 bytes.
+All these transactions return a negative errno value on failure. The 'write'
+transactions return 0 on success; the 'read' transactions return the read
+value, except for block transactions, which return the number of values
+read. The block buffers need not be longer than 32 bytes.
 
 You can read the file `smbus-protocol' for more information about the
 actual SMBus protocol.
index b3a5aad7e6291e0607d061e3563e2528b2bc79c1..312fe77764a48cba9fb04e000fc2dffeba7fa978 100644 (file)
@@ -571,6 +571,8 @@ and is between 256 and 4096 characters. It is defined in the file
 
        debug_objects   [KNL] Enable object debugging
 
+       debugpat        [X86] Enable PAT debugging
+
        decnet.addr=    [HW,NET]
                        Format: <area>[,<node>]
                        See also Documentation/networking/decnet.txt.
@@ -756,9 +758,6 @@ and is between 256 and 4096 characters. It is defined in the file
        hd=             [EIDE] (E)IDE hard drive subsystem geometry
                        Format: <cyl>,<head>,<sect>
 
-       hd?=            [HW] (E)IDE subsystem
-       hd?lun=         See Documentation/ide/ide.txt.
-
        highmem=nn[KMG] [KNL,BOOT] forces the highmem zone to have an exact
                        size of <nn>. This works even on boxes that have no
                        highmem otherwise. This also works to reduce highmem
@@ -1610,6 +1609,10 @@ and is between 256 and 4096 characters. It is defined in the file
                        Format: { parport<nr> | timid | 0 }
                        See also Documentation/parport.txt.
 
+       pmtmr=          [X86] Manual setup of pmtmr I/O Port. 
+                       Override pmtimer IOPort with a hex value.
+                       e.g. pmtmr=0x508
+
        pnpacpi=        [ACPI]
                        { off }
 
index 1528e58b540829a110eba85ad8bd0e5bea4a4c41..6198fa3deb9979b9c154677ce47a10ea72d91164 100644 (file)
@@ -1686,6 +1686,13 @@ L:       linuxppc-embedded@ozlabs.org
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
 
+FREESCALE I2C CPM DRIVER
+P:     Jochen Friedrich
+M:     jochen@scram.de
+L:     linuxppc-dev@ozlabs.org
+L:     i2c@lm-sensors.org
+S:     Maintained
+
 FREESCALE SOC FS_ENET DRIVER
 P:     Pantelis Antoniou
 M:     pantelis.antoniou@gmail.com
index facf82a5499a84ce83e72e4d4a70324d0ea224d5..c626a821cdcb9eacc089121b52d7afa6f3f5fe4f 100644 (file)
@@ -42,8 +42,7 @@ void ack_bad_irq(unsigned int irq)
 #ifdef CONFIG_SMP 
 static char irq_user_affinity[NR_IRQS];
 
-int
-select_smp_affinity(unsigned int irq)
+int irq_select_affinity(unsigned int irq)
 {
        static int last_cpu;
        int cpu = last_cpu + 1;
@@ -51,7 +50,7 @@ select_smp_affinity(unsigned int irq)
        if (!irq_desc[irq].chip->set_affinity || irq_user_affinity[irq])
                return 1;
 
-       while (!cpu_possible(cpu))
+       while (!cpu_possible(cpu) || !cpu_isset(cpu, irq_default_affinity))
                cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
        last_cpu = cpu;
 
index 90e0c35ae60d24934b15fbd377c95ef3314c2c88..fc650f64df43ce00a83ad9f22e33e26b266d76ce 100644 (file)
@@ -92,4 +92,5 @@ void save_stack_trace(struct stack_trace *trace)
 {
        save_stack_trace_tsk(current, trace);
 }
+EXPORT_SYMBOL_GPL(save_stack_trace);
 #endif
index 9a68190bbffd78dd41703259c3012c3d638b6994..f4bdb448049c309b7363d7889d272d5c15ab33ed 100644 (file)
@@ -51,3 +51,4 @@ void save_stack_trace(struct stack_trace *trace)
                fp = frame->fp;
        }
 }
+EXPORT_SYMBOL_GPL(save_stack_trace);
index ebd9db8d1ecebfe7a065145df4d2c204a24ed313..5eb4681a73d2fe7c29ee02010abe5287a5411ad0 100644 (file)
@@ -73,3 +73,4 @@ void save_stack_trace(struct stack_trace *trace)
        prepare_frametrace(regs);
        save_context_stack(trace, regs);
 }
+EXPORT_SYMBOL_GPL(save_stack_trace);
index 1775755a2619e8359c2402db68c8c7bc5bdaeb92..255d692bfa188c6df2d59d9a167644396d4d1b15 100644 (file)
@@ -1,3 +1,4 @@
 obj-y                          := setup.o rtc_xicor1241.o rtc_m41t81.o
 
+obj-$(CONFIG_I2C_BOARDINFO)    += swarm-i2c.o
 obj-$(CONFIG_KGDB)             += dbg_io.o
diff --git a/arch/mips/sibyte/swarm/swarm-i2c.c b/arch/mips/sibyte/swarm/swarm-i2c.c
new file mode 100644 (file)
index 0000000..4282ac9
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ *     arch/mips/sibyte/swarm/swarm-i2c.c
+ *
+ *     Broadcom BCM91250A (SWARM), etc. I2C platform setup.
+ *
+ *     Copyright (c) 2008  Maciej W. Rozycki
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+
+static struct i2c_board_info swarm_i2c_info1[] __initdata = {
+       {
+               I2C_BOARD_INFO("m41t81", 0x68),
+       },
+};
+
+static int __init swarm_i2c_init(void)
+{
+       int err;
+
+       err = i2c_register_board_info(1, swarm_i2c_info1,
+                                     ARRAY_SIZE(swarm_i2c_info1));
+       if (err < 0)
+               printk(KERN_ERR
+                      "swarm-i2c: cannot register board I2C devices\n");
+       return err;
+}
+
+arch_initcall(swarm_i2c_init);
index 962944038430f69861181623763abb5f16f98049..3cf0d94ba3408c8a93dbd16ddfb4789579cde119 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
+#include <linux/module.h>
 #include <asm/ptrace.h>
 
 /*
@@ -44,3 +45,4 @@ void save_stack_trace(struct stack_trace *trace)
                sp = newsp;
        }
 }
+EXPORT_SYMBOL_GPL(save_stack_trace);
index 85e46a5d0e0801d9880987cd2595a06391231401..57571f10270cb3f8f6187a1a5a132db23181057d 100644 (file)
@@ -81,6 +81,7 @@ void save_stack_trace(struct stack_trace *trace)
                           S390_lowcore.thread_info,
                           S390_lowcore.thread_info + THREAD_SIZE, 1);
 }
+EXPORT_SYMBOL_GPL(save_stack_trace);
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
@@ -93,3 +94,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
        if (trace->nr_entries < trace->max_entries)
                trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
index d41e561be20eafacbacf9c99fd08a84d1f1e2321..1b2ae35c4a76504f5d08ba43ff5e0a68a36aa04f 100644 (file)
@@ -34,3 +34,4 @@ void save_stack_trace(struct stack_trace *trace)
                }
        }
 }
+EXPORT_SYMBOL_GPL(save_stack_trace);
index c73ce3f4197e67097da288e381dc134203036ec0..b3e3737750d8bfb49701a862a6c57a821303fc18 100644 (file)
@@ -1,6 +1,7 @@
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
 #include <linux/thread_info.h>
+#include <linux/module.h>
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 
@@ -47,3 +48,4 @@ void save_stack_trace(struct stack_trace *trace)
                        trace->entries[trace->nr_entries++] = pc;
        } while (trace->nr_entries < trace->max_entries);
 }
+EXPORT_SYMBOL_GPL(save_stack_trace);
index 6958d6bcaf704c8cc0c9d5af066e4787353cbb5b..2642b4bf41b9798ecc8663e2e8770c35ab55f8bc 100644 (file)
@@ -447,7 +447,6 @@ config PARAVIRT_DEBUG
 config MEMTEST
        bool "Memtest"
        depends on X86_64
-       default y
        help
          This option adds a kernel parameter 'memtest', which allows memtest
          to be set.
@@ -455,7 +454,7 @@ config MEMTEST
                memtest=1, mean do 1 test pattern;
                ...
                memtest=4, mean do 4 test patterns.
-         If you are unsure how to answer this question, answer Y.
+         If you are unsure how to answer this question, answer N.
 
 config X86_SUMMIT_NUMA
        def_bool y
@@ -1135,21 +1134,18 @@ config MTRR
          See <file:Documentation/mtrr.txt> for more information.
 
 config MTRR_SANITIZER
-       def_bool y
+       bool
        prompt "MTRR cleanup support"
        depends on MTRR
        help
-         Convert MTRR layout from continuous to discrete, so some X driver
-         could add WB entries.
+         Convert MTRR layout from continuous to discrete, so X drivers can
+         add writeback entries.
 
-         Say N here if you see bootup problems (boot crash, boot hang,
-         spontaneous reboots).
+         Can be disabled with disable_mtrr_cleanup on the kernel command line.
+         The largest mtrr entry size for a continous block can be set with
+         mtrr_chunk_size.
 
-         Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size
-         could be used to send largest mtrr entry size for continuous block
-         to hold holes (aka. UC entries)
-
-         If unsure, say Y.
+         If unsure, say N.
 
 config MTRR_SANITIZER_ENABLE_DEFAULT
        int "MTRR cleanup enable value (0-1)"
@@ -1166,7 +1162,7 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
        depends on MTRR_SANITIZER
        help
          mtrr cleanup spare entries default, it can be changed via
-         mtrr_spare_reg_nr=
+         mtrr_spare_reg_nr=N on the kernel command line.
 
 config X86_PAT
        bool
index e6a4b564ccaa0e1ecb2669c77978ff7d85d79829..793ad2045f585acceb8501d5c08c86f20803f871 100644 (file)
@@ -23,6 +23,15 @@ static unsigned long acpi_realmode;
 static char temp_stack[10240];
 #endif
 
+/* XXX: this macro should move to asm-x86/segment.h and be shared with the
+   boot code... */
+#define GDT_ENTRY(flags, base, limit)          \
+       (((u64)(base & 0xff000000) << 32) |     \
+        ((u64)flags << 40) |                   \
+        ((u64)(limit & 0x00ff0000) << 32) |    \
+        ((u64)(base & 0x00ffffff) << 16) |     \
+        ((u64)(limit & 0x0000ffff)))
+
 /**
  * acpi_save_state_mem - save kernel state
  *
@@ -51,18 +60,27 @@ int acpi_save_state_mem(void)
        header->video_mode = saved_video_mode;
 
        header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
+
+       /*
+        * Set up the wakeup GDT.  We set these up as Big Real Mode,
+        * that is, with limits set to 4 GB.  At least the Lenovo
+        * Thinkpad X61 is known to need this for the video BIOS
+        * initialization quirk to work; this is likely to also
+        * be the case for other laptops or integrated video devices.
+        */
+
        /* GDT[0]: GDT self-pointer */
        header->wakeup_gdt[0] =
                (u64)(sizeof(header->wakeup_gdt) - 1) +
                ((u64)(acpi_wakeup_address +
                        ((char *)&header->wakeup_gdt - (char *)acpi_realmode))
                                << 16);
-       /* GDT[1]: real-mode-like code segment */
-       header->wakeup_gdt[1] = (0x009bULL << 40) +
-               ((u64)acpi_wakeup_address << 16) + 0xffff;
-       /* GDT[2]: real-mode-like data segment */
-       header->wakeup_gdt[2] = (0x0093ULL << 40) +
-               ((u64)acpi_wakeup_address << 16) + 0xffff;
+       /* GDT[1]: big real mode-like code segment */
+       header->wakeup_gdt[1] =
+               GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
+       /* GDT[2]: big real mode-like data segment */
+       header->wakeup_gdt[2] =
+               GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff);
 
 #ifndef CONFIG_64BIT
        store_gdt((struct desc_ptr *)&header->pmode_gdt);
index 79bdcd11c66e1b7c72220af7ee9ae388d1bebd20..d13858818100e6f35cb328bab4394dc00885d712 100644 (file)
@@ -266,6 +266,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
                hpet_print_force_info();
 }
 
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1,
+                        old_ich_force_enable_hpet_user);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,
                         old_ich_force_enable_hpet_user);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12,
index c28c342c162f0a90d3ad6e50ba0fb1d5ecd132a7..a03e7f6d90c35af5f4638f6394e4a39f6e2e4020 100644 (file)
@@ -74,6 +74,7 @@ void save_stack_trace(struct stack_trace *trace)
        if (trace->nr_entries < trace->max_entries)
                trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
+EXPORT_SYMBOL_GPL(save_stack_trace);
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
@@ -81,3 +82,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
        if (trace->nr_entries < trace->max_entries)
                trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
index c6e536597c8a824bbe29c6a8ac6296d6d87d1a6b..fef79ccb2a118da2f6f3144c993c8338c7205351 100644 (file)
@@ -1042,15 +1042,9 @@ void blk_put_request(struct request *req)
        unsigned long flags;
        struct request_queue *q = req->q;
 
-       /*
-        * Gee, IDE calls in w/ NULL q.  Fix IDE and remove the
-        * following if (q) test.
-        */
-       if (q) {
-               spin_lock_irqsave(q->queue_lock, flags);
-               __blk_put_request(q, req);
-               spin_unlock_irqrestore(q->queue_lock, flags);
-       }
+       spin_lock_irqsave(q->queue_lock, flags);
+       __blk_put_request(q, req);
+       spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_put_request);
 
index 391dd6224890a660f688df22b1985810930b88f3..9bceff7674f220acaa9b9580181a1466a8024388 100644 (file)
@@ -18,7 +18,7 @@
  * @rq: request to complete
  * @error: end io status of the request
  */
-void blk_end_sync_rq(struct request *rq, int error)
+static void blk_end_sync_rq(struct request *rq, int error)
 {
        struct completion *waiting = rq->end_io_data;
 
@@ -31,7 +31,6 @@ void blk_end_sync_rq(struct request *rq, int error)
         */
        complete(waiting);
 }
-EXPORT_SYMBOL(blk_end_sync_rq);
 
 /**
  * blk_execute_rq_nowait - insert a request into queue for execution
@@ -58,6 +57,9 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
        spin_lock_irq(q->queue_lock);
        __elv_add_request(q, rq, where, 1);
        __generic_unplug_device(q);
+       /* the queue is stopped so it won't be plugged+unplugged */
+       if (blk_pm_resume_request(rq))
+               q->request_fn(q);
        spin_unlock_irq(q->queue_lock);
 }
 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
index 5e6468a7ca4bd2a051b6ee9244297933a3ee7a60..dc7596f028b69437d8e833a688e15142c97675c8 100644 (file)
@@ -56,6 +56,12 @@ MODULE_PARM_DESC(skip_host_reset, "skip global host reset (0=don't skip, 1=skip)
 static int ahci_enable_alpm(struct ata_port *ap,
                enum link_pm policy);
 static void ahci_disable_alpm(struct ata_port *ap);
+static ssize_t ahci_led_show(struct ata_port *ap, char *buf);
+static ssize_t ahci_led_store(struct ata_port *ap, const char *buf,
+                             size_t size);
+static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state,
+                                       ssize_t size);
+#define MAX_SLOTS 8
 
 enum {
        AHCI_PCI_BAR            = 5,
@@ -98,6 +104,8 @@ enum {
        HOST_IRQ_STAT           = 0x08, /* interrupt status */
        HOST_PORTS_IMPL         = 0x0c, /* bitmap of implemented ports */
        HOST_VERSION            = 0x10, /* AHCI spec. version compliancy */
+       HOST_EM_LOC             = 0x1c, /* Enclosure Management location */
+       HOST_EM_CTL             = 0x20, /* Enclosure Management Control */
 
        /* HOST_CTL bits */
        HOST_RESET              = (1 << 0),  /* reset controller; self-clear */
@@ -105,6 +113,7 @@ enum {
        HOST_AHCI_EN            = (1 << 31), /* AHCI enabled */
 
        /* HOST_CAP bits */
+       HOST_CAP_EMS            = (1 << 6),  /* Enclosure Management support */
        HOST_CAP_SSC            = (1 << 14), /* Slumber capable */
        HOST_CAP_PMP            = (1 << 17), /* Port Multiplier support */
        HOST_CAP_CLO            = (1 << 24), /* Command List Override support */
@@ -202,6 +211,11 @@ enum {
                                          ATA_FLAG_IPM,
 
        ICH_MAP                         = 0x90, /* ICH MAP register */
+
+       /* em_ctl bits */
+       EM_CTL_RST                      = (1 << 9), /* Reset */
+       EM_CTL_TM                       = (1 << 8), /* Transmit Message */
+       EM_CTL_ALHD                     = (1 << 26), /* Activity LED */
 };
 
 struct ahci_cmd_hdr {
@@ -219,12 +233,21 @@ struct ahci_sg {
        __le32                  flags_size;
 };
 
+struct ahci_em_priv {
+       enum sw_activity blink_policy;
+       struct timer_list timer;
+       unsigned long saved_activity;
+       unsigned long activity;
+       unsigned long led_state;
+};
+
 struct ahci_host_priv {
        unsigned int            flags;          /* AHCI_HFLAG_* */
        u32                     cap;            /* cap to use */
        u32                     port_map;       /* port map to use */
        u32                     saved_cap;      /* saved initial cap */
        u32                     saved_port_map; /* saved initial port_map */
+       u32                     em_loc; /* enclosure management location */
 };
 
 struct ahci_port_priv {
@@ -240,6 +263,8 @@ struct ahci_port_priv {
        unsigned int            ncq_saw_dmas:1;
        unsigned int            ncq_saw_sdb:1;
        u32                     intr_mask;      /* interrupts to enable */
+       struct ahci_em_priv     em_priv[MAX_SLOTS];/* enclosure management info
+                                                * per PM slot */
 };
 
 static int ahci_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
@@ -277,9 +302,20 @@ static int ahci_port_suspend(struct ata_port *ap, pm_message_t mesg);
 static int ahci_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg);
 static int ahci_pci_device_resume(struct pci_dev *pdev);
 #endif
+static ssize_t ahci_activity_show(struct ata_device *dev, char *buf);
+static ssize_t ahci_activity_store(struct ata_device *dev,
+                                  enum sw_activity val);
+static void ahci_init_sw_activity(struct ata_link *link);
 
 static struct device_attribute *ahci_shost_attrs[] = {
        &dev_attr_link_power_management_policy,
+       &dev_attr_em_message_type,
+       &dev_attr_em_message,
+       NULL
+};
+
+static struct device_attribute *ahci_sdev_attrs[] = {
+       &dev_attr_sw_activity,
        NULL
 };
 
@@ -289,6 +325,7 @@ static struct scsi_host_template ahci_sht = {
        .sg_tablesize           = AHCI_MAX_SG,
        .dma_boundary           = AHCI_DMA_BOUNDARY,
        .shost_attrs            = ahci_shost_attrs,
+       .sdev_attrs             = ahci_sdev_attrs,
 };
 
 static struct ata_port_operations ahci_ops = {
@@ -316,6 +353,10 @@ static struct ata_port_operations ahci_ops = {
 
        .enable_pm              = ahci_enable_alpm,
        .disable_pm             = ahci_disable_alpm,
+       .em_show                = ahci_led_show,
+       .em_store               = ahci_led_store,
+       .sw_activity_show       = ahci_activity_show,
+       .sw_activity_store      = ahci_activity_store,
 #ifdef CONFIG_PM
        .port_suspend           = ahci_port_suspend,
        .port_resume            = ahci_port_resume,
@@ -561,6 +602,11 @@ static struct pci_driver ahci_pci_driver = {
 #endif
 };
 
+static int ahci_em_messages = 1;
+module_param(ahci_em_messages, int, 0444);
+/* add other LED protocol types when they become supported */
+MODULE_PARM_DESC(ahci_em_messages,
+       "Set AHCI Enclosure Management Message type (0 = disabled, 1 = LED");
 
 static inline int ahci_nr_ports(u32 cap)
 {
@@ -1031,11 +1077,28 @@ static void ahci_power_down(struct ata_port *ap)
 
 static void ahci_start_port(struct ata_port *ap)
 {
+       struct ahci_port_priv *pp = ap->private_data;
+       struct ata_link *link;
+       struct ahci_em_priv *emp;
+
        /* enable FIS reception */
        ahci_start_fis_rx(ap);
 
        /* enable DMA */
        ahci_start_engine(ap);
+
+       /* turn on LEDs */
+       if (ap->flags & ATA_FLAG_EM) {
+               ata_port_for_each_link(link, ap) {
+                       emp = &pp->em_priv[link->pmp];
+                       ahci_transmit_led_message(ap, emp->led_state, 4);
+               }
+       }
+
+       if (ap->flags & ATA_FLAG_SW_ACTIVITY)
+               ata_port_for_each_link(link, ap)
+                       ahci_init_sw_activity(link);
+
 }
 
 static int ahci_deinit_port(struct ata_port *ap, const char **emsg)
@@ -1079,12 +1142,15 @@ static int ahci_reset_controller(struct ata_host *host)
                        readl(mmio + HOST_CTL); /* flush */
                }
 
-               /* reset must complete within 1 second, or
+               /*
+                * to perform host reset, OS should set HOST_RESET
+                * and poll until this bit is read to be "0".
+                * reset must complete within 1 second, or
                 * the hardware should be considered fried.
                 */
-               ssleep(1);
+               tmp = ata_wait_register(mmio + HOST_CTL, HOST_RESET,
+                                       HOST_RESET, 10, 1000);
 
-               tmp = readl(mmio + HOST_CTL);
                if (tmp & HOST_RESET) {
                        dev_printk(KERN_ERR, host->dev,
                                   "controller reset failed (0x%x)\n", tmp);
@@ -1116,6 +1182,230 @@ static int ahci_reset_controller(struct ata_host *host)
        return 0;
 }
 
+static void ahci_sw_activity(struct ata_link *link)
+{
+       struct ata_port *ap = link->ap;
+       struct ahci_port_priv *pp = ap->private_data;
+       struct ahci_em_priv *emp = &pp->em_priv[link->pmp];
+
+       if (!(link->flags & ATA_LFLAG_SW_ACTIVITY))
+               return;
+
+       emp->activity++;
+       if (!timer_pending(&emp->timer))
+               mod_timer(&emp->timer, jiffies + msecs_to_jiffies(10));
+}
+
+static void ahci_sw_activity_blink(unsigned long arg)
+{
+       struct ata_link *link = (struct ata_link *)arg;
+       struct ata_port *ap = link->ap;
+       struct ahci_port_priv *pp = ap->private_data;
+       struct ahci_em_priv *emp = &pp->em_priv[link->pmp];
+       unsigned long led_message = emp->led_state;
+       u32 activity_led_state;
+
+       led_message &= 0xffff0000;
+       led_message |= ap->port_no | (link->pmp << 8);
+
+       /* check to see if we've had activity.  If so,
+        * toggle state of LED and reset timer.  If not,
+        * turn LED to desired idle state.
+        */
+       if (emp->saved_activity != emp->activity) {
+               emp->saved_activity = emp->activity;
+               /* get the current LED state */
+               activity_led_state = led_message & 0x00010000;
+
+               if (activity_led_state)
+                       activity_led_state = 0;
+               else
+                       activity_led_state = 1;
+
+               /* clear old state */
+               led_message &= 0xfff8ffff;
+
+               /* toggle state */
+               led_message |= (activity_led_state << 16);
+               mod_timer(&emp->timer, jiffies + msecs_to_jiffies(100));
+       } else {
+               /* switch to idle */
+               led_message &= 0xfff8ffff;
+               if (emp->blink_policy == BLINK_OFF)
+                       led_message |= (1 << 16);
+       }
+       ahci_transmit_led_message(ap, led_message, 4);
+}
+
+static void ahci_init_sw_activity(struct ata_link *link)
+{
+       struct ata_port *ap = link->ap;
+       struct ahci_port_priv *pp = ap->private_data;
+       struct ahci_em_priv *emp = &pp->em_priv[link->pmp];
+
+       /* init activity stats, setup timer */
+       emp->saved_activity = emp->activity = 0;
+       setup_timer(&emp->timer, ahci_sw_activity_blink, (unsigned long)link);
+
+       /* check our blink policy and set flag for link if it's enabled */
+       if (emp->blink_policy)
+               link->flags |= ATA_LFLAG_SW_ACTIVITY;
+}
+
+static int ahci_reset_em(struct ata_host *host)
+{
+       void __iomem *mmio = host->iomap[AHCI_PCI_BAR];
+       u32 em_ctl;
+
+       em_ctl = readl(mmio + HOST_EM_CTL);
+       if ((em_ctl & EM_CTL_TM) || (em_ctl & EM_CTL_RST))
+               return -EINVAL;
+
+       writel(em_ctl | EM_CTL_RST, mmio + HOST_EM_CTL);
+       return 0;
+}
+
+static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state,
+                                       ssize_t size)
+{
+       struct ahci_host_priv *hpriv = ap->host->private_data;
+       struct ahci_port_priv *pp = ap->private_data;
+       void __iomem *mmio = ap->host->iomap[AHCI_PCI_BAR];
+       u32 em_ctl;
+       u32 message[] = {0, 0};
+       unsigned int flags;
+       int pmp;
+       struct ahci_em_priv *emp;
+
+       /* get the slot number from the message */
+       pmp = (state & 0x0000ff00) >> 8;
+       if (pmp < MAX_SLOTS)
+               emp = &pp->em_priv[pmp];
+       else
+               return -EINVAL;
+
+       spin_lock_irqsave(ap->lock, flags);
+
+       /*
+        * if we are still busy transmitting a previous message,
+        * do not allow
+        */
+       em_ctl = readl(mmio + HOST_EM_CTL);
+       if (em_ctl & EM_CTL_TM) {
+               spin_unlock_irqrestore(ap->lock, flags);
+               return -EINVAL;
+       }
+
+       /*
+        * create message header - this is all zero except for
+        * the message size, which is 4 bytes.
+        */
+       message[0] |= (4 << 8);
+
+       /* ignore 0:4 of byte zero, fill in port info yourself */
+       message[1] = ((state & 0xfffffff0) | ap->port_no);
+
+       /* write message to EM_LOC */
+       writel(message[0], mmio + hpriv->em_loc);
+       writel(message[1], mmio + hpriv->em_loc+4);
+
+       /* save off new led state for port/slot */
+       emp->led_state = message[1];
+
+       /*
+        * tell hardware to transmit the message
+        */
+       writel(em_ctl | EM_CTL_TM, mmio + HOST_EM_CTL);
+
+       spin_unlock_irqrestore(ap->lock, flags);
+       return size;
+}
+
+static ssize_t ahci_led_show(struct ata_port *ap, char *buf)
+{
+       struct ahci_port_priv *pp = ap->private_data;
+       struct ata_link *link;
+       struct ahci_em_priv *emp;
+       int rc = 0;
+
+       ata_port_for_each_link(link, ap) {
+               emp = &pp->em_priv[link->pmp];
+               rc += sprintf(buf, "%lx\n", emp->led_state);
+       }
+       return rc;
+}
+
+static ssize_t ahci_led_store(struct ata_port *ap, const char *buf,
+                               size_t size)
+{
+       int state;
+       int pmp;
+       struct ahci_port_priv *pp = ap->private_data;
+       struct ahci_em_priv *emp;
+
+       state = simple_strtoul(buf, NULL, 0);
+
+       /* get the slot number from the message */
+       pmp = (state & 0x0000ff00) >> 8;
+       if (pmp < MAX_SLOTS)
+               emp = &pp->em_priv[pmp];
+       else
+               return -EINVAL;
+
+       /* mask off the activity bits if we are in sw_activity
+        * mode, user should turn off sw_activity before setting
+        * activity led through em_message
+        */
+       if (emp->blink_policy)
+               state &= 0xfff8ffff;
+
+       return ahci_transmit_led_message(ap, state, size);
+}
+
+static ssize_t ahci_activity_store(struct ata_device *dev, enum sw_activity val)
+{
+       struct ata_link *link = dev->link;
+       struct ata_port *ap = link->ap;
+       struct ahci_port_priv *pp = ap->private_data;
+       struct ahci_em_priv *emp = &pp->em_priv[link->pmp];
+       u32 port_led_state = emp->led_state;
+
+       /* save the desired Activity LED behavior */
+       if (val == OFF) {
+               /* clear LFLAG */
+               link->flags &= ~(ATA_LFLAG_SW_ACTIVITY);
+
+               /* set the LED to OFF */
+               port_led_state &= 0xfff80000;
+               port_led_state |= (ap->port_no | (link->pmp << 8));
+               ahci_transmit_led_message(ap, port_led_state, 4);
+       } else {
+               link->flags |= ATA_LFLAG_SW_ACTIVITY;
+               if (val == BLINK_OFF) {
+                       /* set LED to ON for idle */
+                       port_led_state &= 0xfff80000;
+                       port_led_state |= (ap->port_no | (link->pmp << 8));
+                       port_led_state |= 0x00010000; /* check this */
+                       ahci_transmit_led_message(ap, port_led_state, 4);
+               }
+       }
+       emp->blink_policy = val;
+       return 0;
+}
+
+static ssize_t ahci_activity_show(struct ata_device *dev, char *buf)
+{
+       struct ata_link *link = dev->link;
+       struct ata_port *ap = link->ap;
+       struct ahci_port_priv *pp = ap->private_data;
+       struct ahci_em_priv *emp = &pp->em_priv[link->pmp];
+
+       /* display the saved value of activity behavior for this
+        * disk.
+        */
+       return sprintf(buf, "%d\n", emp->blink_policy);
+}
+
 static void ahci_port_init(struct pci_dev *pdev, struct ata_port *ap,
                           int port_no, void __iomem *mmio,
                           void __iomem *port_mmio)
@@ -1846,7 +2136,8 @@ static unsigned int ahci_qc_issue(struct ata_queued_cmd *qc)
        if (qc->tf.protocol == ATA_PROT_NCQ)
                writel(1 << qc->tag, port_mmio + PORT_SCR_ACT);
        writel(1 << qc->tag, port_mmio + PORT_CMD_ISSUE);
-       readl(port_mmio + PORT_CMD_ISSUE);      /* flush */
+
+       ahci_sw_activity(qc->dev->link);
 
        return 0;
 }
@@ -2154,7 +2445,8 @@ static void ahci_print_info(struct ata_host *host)
        dev_printk(KERN_INFO, &pdev->dev,
                "flags: "
                "%s%s%s%s%s%s%s"
-               "%s%s%s%s%s%s%s\n"
+               "%s%s%s%s%s%s%s"
+               "%s\n"
                ,
 
                cap & (1 << 31) ? "64bit " : "",
@@ -2171,7 +2463,8 @@ static void ahci_print_info(struct ata_host *host)
                cap & (1 << 17) ? "pmp " : "",
                cap & (1 << 15) ? "pio " : "",
                cap & (1 << 14) ? "slum " : "",
-               cap & (1 << 13) ? "part " : ""
+               cap & (1 << 13) ? "part " : "",
+               cap & (1 << 6) ? "ems ": ""
                );
 }
 
@@ -2291,6 +2584,24 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (hpriv->cap & HOST_CAP_PMP)
                pi.flags |= ATA_FLAG_PMP;
 
+       if (ahci_em_messages && (hpriv->cap & HOST_CAP_EMS)) {
+               u8 messages;
+               void __iomem *mmio = pcim_iomap_table(pdev)[AHCI_PCI_BAR];
+               u32 em_loc = readl(mmio + HOST_EM_LOC);
+               u32 em_ctl = readl(mmio + HOST_EM_CTL);
+
+               messages = (em_ctl & 0x000f0000) >> 16;
+
+               /* we only support LED message type right now */
+               if ((messages & 0x01) && (ahci_em_messages == 1)) {
+                       /* store em_loc */
+                       hpriv->em_loc = ((em_loc >> 16) * 4);
+                       pi.flags |= ATA_FLAG_EM;
+                       if (!(em_ctl & EM_CTL_ALHD))
+                               pi.flags |= ATA_FLAG_SW_ACTIVITY;
+               }
+       }
+
        /* CAP.NP sometimes indicate the index of the last enabled
         * port, at other times, that of the last possible port, so
         * determining the maximum port number requires looking at
@@ -2304,6 +2615,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        host->iomap = pcim_iomap_table(pdev);
        host->private_data = hpriv;
 
+       if (pi.flags & ATA_FLAG_EM)
+               ahci_reset_em(host);
+
        for (i = 0; i < host->n_ports; i++) {
                struct ata_port *ap = host->ports[i];
 
@@ -2314,6 +2628,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                /* set initial link pm policy */
                ap->pm_policy = NOT_AVAILABLE;
 
+               /* set enclosure management message type */
+               if (ap->flags & ATA_FLAG_EM)
+                       ap->em_message_type = ahci_em_messages;
+
+
                /* disabled/not-implemented port */
                if (!(hpriv->port_map & (1 << i)))
                        ap->ops = &ata_dummy_port_ops;
index 303fc0d2b978464dbf6d927a370f4cf743862b6b..9bef1a84fe3f926832b21986d46d93f3bcef72ed 100644 (file)
@@ -54,7 +54,6 @@
 #include <linux/completion.h>
 #include <linux/suspend.h>
 #include <linux/workqueue.h>
-#include <linux/jiffies.h>
 #include <linux/scatterlist.h>
 #include <linux/io.h>
 #include <scsi/scsi.h>
@@ -145,7 +144,7 @@ static int libata_dma_mask = ATA_DMA_MASK_ATA|ATA_DMA_MASK_ATAPI|ATA_DMA_MASK_CF
 module_param_named(dma, libata_dma_mask, int, 0444);
 MODULE_PARM_DESC(dma, "DMA enable/disable (0x1==ATA, 0x2==ATAPI, 0x4==CF)");
 
-static int ata_probe_timeout = ATA_TMOUT_INTERNAL / HZ;
+static int ata_probe_timeout;
 module_param(ata_probe_timeout, int, 0444);
 MODULE_PARM_DESC(ata_probe_timeout, "Set ATA probing timeout (seconds)");
 
@@ -1533,7 +1532,7 @@ unsigned long ata_id_xfermask(const u16 *id)
  *     @ap: The ata_port to queue port_task for
  *     @fn: workqueue function to be scheduled
  *     @data: data for @fn to use
- *     @delay: delay time for workqueue function
+ *     @delay: delay time in msecs for workqueue function
  *
  *     Schedule @fn(@data) for execution after @delay jiffies using
  *     port_task.  There is one port_task per port and it's the
@@ -1552,7 +1551,7 @@ void ata_pio_queue_task(struct ata_port *ap, void *data, unsigned long delay)
        ap->port_task_data = data;
 
        /* may fail if ata_port_flush_task() in progress */
-       queue_delayed_work(ata_wq, &ap->port_task, delay);
+       queue_delayed_work(ata_wq, &ap->port_task, msecs_to_jiffies(delay));
 }
 
 /**
@@ -1612,6 +1611,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
        struct ata_link *link = dev->link;
        struct ata_port *ap = link->ap;
        u8 command = tf->command;
+       int auto_timeout = 0;
        struct ata_queued_cmd *qc;
        unsigned int tag, preempted_tag;
        u32 preempted_sactive, preempted_qc_active;
@@ -1684,8 +1684,14 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 
        spin_unlock_irqrestore(ap->lock, flags);
 
-       if (!timeout)
-               timeout = ata_probe_timeout * 1000 / HZ;
+       if (!timeout) {
+               if (ata_probe_timeout)
+                       timeout = ata_probe_timeout * 1000;
+               else {
+                       timeout = ata_internal_cmd_timeout(dev, command);
+                       auto_timeout = 1;
+               }
+       }
 
        rc = wait_for_completion_timeout(&wait, msecs_to_jiffies(timeout));
 
@@ -1761,6 +1767,9 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 
        spin_unlock_irqrestore(ap->lock, flags);
 
+       if ((err_mask & AC_ERR_TIMEOUT) && auto_timeout)
+               ata_internal_cmd_timed_out(dev, command);
+
        return err_mask;
 }
 
@@ -3319,7 +3328,7 @@ int ata_wait_ready(struct ata_link *link, unsigned long deadline,
                   int (*check_ready)(struct ata_link *link))
 {
        unsigned long start = jiffies;
-       unsigned long nodev_deadline = start + ATA_TMOUT_FF_WAIT;
+       unsigned long nodev_deadline = ata_deadline(start, ATA_TMOUT_FF_WAIT);
        int warned = 0;
 
        if (time_after(nodev_deadline, deadline))
@@ -3387,7 +3396,7 @@ int ata_wait_ready(struct ata_link *link, unsigned long deadline,
 int ata_wait_after_reset(struct ata_link *link, unsigned long deadline,
                                int (*check_ready)(struct ata_link *link))
 {
-       msleep(ATA_WAIT_AFTER_RESET_MSECS);
+       msleep(ATA_WAIT_AFTER_RESET);
 
        return ata_wait_ready(link, deadline, check_ready);
 }
@@ -3417,13 +3426,13 @@ int ata_wait_after_reset(struct ata_link *link, unsigned long deadline,
 int sata_link_debounce(struct ata_link *link, const unsigned long *params,
                       unsigned long deadline)
 {
-       unsigned long interval_msec = params[0];
-       unsigned long duration = msecs_to_jiffies(params[1]);
+       unsigned long interval = params[0];
+       unsigned long duration = params[1];
        unsigned long last_jiffies, t;
        u32 last, cur;
        int rc;
 
-       t = jiffies + msecs_to_jiffies(params[2]);
+       t = ata_deadline(jiffies, params[2]);
        if (time_before(t, deadline))
                deadline = t;
 
@@ -3435,7 +3444,7 @@ int sata_link_debounce(struct ata_link *link, const unsigned long *params,
        last_jiffies = jiffies;
 
        while (1) {
-               msleep(interval_msec);
+               msleep(interval);
                if ((rc = sata_scr_read(link, SCR_STATUS, &cur)))
                        return rc;
                cur &= 0xf;
@@ -3444,7 +3453,8 @@ int sata_link_debounce(struct ata_link *link, const unsigned long *params,
                if (cur == last) {
                        if (cur == 1 && time_before(jiffies, deadline))
                                continue;
-                       if (time_after(jiffies, last_jiffies + duration))
+                       if (time_after(jiffies,
+                                      ata_deadline(last_jiffies, duration)))
                                return 0;
                        continue;
                }
@@ -3636,7 +3646,8 @@ int sata_link_hardreset(struct ata_link *link, const unsigned long *timing,
                if (check_ready) {
                        unsigned long pmp_deadline;
 
-                       pmp_deadline = jiffies + ATA_TMOUT_PMP_SRST_WAIT;
+                       pmp_deadline = ata_deadline(jiffies,
+                                                   ATA_TMOUT_PMP_SRST_WAIT);
                        if (time_after(pmp_deadline, deadline))
                                pmp_deadline = deadline;
                        ata_wait_ready(link, pmp_deadline, check_ready);
@@ -6073,8 +6084,6 @@ static void __init ata_parse_force_param(void)
 
 static int __init ata_init(void)
 {
-       ata_probe_timeout *= HZ;
-
        ata_parse_force_param();
 
        ata_wq = create_workqueue("ata");
@@ -6127,8 +6136,8 @@ int ata_ratelimit(void)
  *     @reg: IO-mapped register
  *     @mask: Mask to apply to read register value
  *     @val: Wait condition
- *     @interval_msec: polling interval in milliseconds
- *     @timeout_msec: timeout in milliseconds
+ *     @interval: polling interval in milliseconds
+ *     @timeout: timeout in milliseconds
  *
  *     Waiting for some bits of register to change is a common
  *     operation for ATA controllers.  This function reads 32bit LE
@@ -6146,10 +6155,9 @@ int ata_ratelimit(void)
  *     The final register value.
  */
 u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val,
-                     unsigned long interval_msec,
-                     unsigned long timeout_msec)
+                     unsigned long interval, unsigned long timeout)
 {
-       unsigned long timeout;
+       unsigned long deadline;
        u32 tmp;
 
        tmp = ioread32(reg);
@@ -6158,10 +6166,10 @@ u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val,
         * preceding writes reach the controller before starting to
         * eat away the timeout.
         */
-       timeout = jiffies + (timeout_msec * HZ) / 1000;
+       deadline = ata_deadline(jiffies, timeout);
 
-       while ((tmp & mask) == val && time_before(jiffies, timeout)) {
-               msleep(interval_msec);
+       while ((tmp & mask) == val && time_before(jiffies, deadline)) {
+               msleep(interval);
                tmp = ioread32(reg);
        }
 
index 7894d83ea1eb8d10f57db21f4c71013cd22209f8..58bdc538d229d400b78fd2c0a46317c1a84d943c 100644 (file)
@@ -66,15 +66,19 @@ enum {
        ATA_ECAT_DUBIOUS_TOUT_HSM       = 6,
        ATA_ECAT_DUBIOUS_UNK_DEV        = 7,
        ATA_ECAT_NR                     = 8,
-};
 
-/* Waiting in ->prereset can never be reliable.  It's sometimes nice
- * to wait there but it can't be depended upon; otherwise, we wouldn't
- * be resetting.  Just give it enough time for most drives to spin up.
- */
-enum {
-       ATA_EH_PRERESET_TIMEOUT         = 10 * HZ,
-       ATA_EH_FASTDRAIN_INTERVAL       = 3 * HZ,
+       ATA_EH_CMD_DFL_TIMEOUT          =  5000,
+
+       /* always put at least this amount of time between resets */
+       ATA_EH_RESET_COOL_DOWN          =  5000,
+
+       /* Waiting in ->prereset can never be reliable.  It's
+        * sometimes nice to wait there but it can't be depended upon;
+        * otherwise, we wouldn't be resetting.  Just give it enough
+        * time for most drives to spin up.
+        */
+       ATA_EH_PRERESET_TIMEOUT         = 10000,
+       ATA_EH_FASTDRAIN_INTERVAL       =  3000,
 };
 
 /* The following table determines how we sequence resets.  Each entry
@@ -84,12 +88,59 @@ enum {
  * are mostly for error handling, hotplug and retarded devices.
  */
 static const unsigned long ata_eh_reset_timeouts[] = {
-       10 * HZ,        /* most drives spin up by 10sec */
-       10 * HZ,        /* > 99% working drives spin up before 20sec */
-       35 * HZ,        /* give > 30 secs of idleness for retarded devices */
-       5 * HZ,         /* and sweet one last chance */
-       /* > 1 min has elapsed, give up */
+       10000,  /* most drives spin up by 10sec */
+       10000,  /* > 99% working drives spin up before 20sec */
+       35000,  /* give > 30 secs of idleness for retarded devices */
+        5000,  /* and sweet one last chance */
+       ULONG_MAX, /* > 1 min has elapsed, give up */
+};
+
+static const unsigned long ata_eh_identify_timeouts[] = {
+        5000,  /* covers > 99% of successes and not too boring on failures */
+       10000,  /* combined time till here is enough even for media access */
+       30000,  /* for true idiots */
+       ULONG_MAX,
+};
+
+static const unsigned long ata_eh_other_timeouts[] = {
+        5000,  /* same rationale as identify timeout */
+       10000,  /* ditto */
+       /* but no merciful 30sec for other commands, it just isn't worth it */
+       ULONG_MAX,
+};
+
+struct ata_eh_cmd_timeout_ent {
+       const u8                *commands;
+       const unsigned long     *timeouts;
+};
+
+/* The following table determines timeouts to use for EH internal
+ * commands.  Each table entry is a command class and matches the
+ * commands the entry applies to and the timeout table to use.
+ *
+ * On the retry after a command timed out, the next timeout value from
+ * the table is used.  If the table doesn't contain further entries,
+ * the last value is used.
+ *
+ * ehc->cmd_timeout_idx keeps track of which timeout to use per
+ * command class, so if SET_FEATURES times out on the first try, the
+ * next try will use the second timeout value only for that class.
+ */
+#define CMDS(cmds...)  (const u8 []){ cmds, 0 }
+static const struct ata_eh_cmd_timeout_ent
+ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
+       { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI),
+         .timeouts = ata_eh_identify_timeouts, },
+       { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT),
+         .timeouts = ata_eh_other_timeouts, },
+       { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT),
+         .timeouts = ata_eh_other_timeouts, },
+       { .commands = CMDS(ATA_CMD_SET_FEATURES),
+         .timeouts = ata_eh_other_timeouts, },
+       { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS),
+         .timeouts = ata_eh_other_timeouts, },
 };
+#undef CMDS
 
 static void __ata_port_freeze(struct ata_port *ap);
 #ifdef CONFIG_PM
@@ -236,6 +287,73 @@ void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset,
 
 #endif /* CONFIG_PCI */
 
+static int ata_lookup_timeout_table(u8 cmd)
+{
+       int i;
+
+       for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) {
+               const u8 *cur;
+
+               for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++)
+                       if (*cur == cmd)
+                               return i;
+       }
+
+       return -1;
+}
+
+/**
+ *     ata_internal_cmd_timeout - determine timeout for an internal command
+ *     @dev: target device
+ *     @cmd: internal command to be issued
+ *
+ *     Determine timeout for internal command @cmd for @dev.
+ *
+ *     LOCKING:
+ *     EH context.
+ *
+ *     RETURNS:
+ *     Determined timeout.
+ */
+unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd)
+{
+       struct ata_eh_context *ehc = &dev->link->eh_context;
+       int ent = ata_lookup_timeout_table(cmd);
+       int idx;
+
+       if (ent < 0)
+               return ATA_EH_CMD_DFL_TIMEOUT;
+
+       idx = ehc->cmd_timeout_idx[dev->devno][ent];
+       return ata_eh_cmd_timeout_table[ent].timeouts[idx];
+}
+
+/**
+ *     ata_internal_cmd_timed_out - notification for internal command timeout
+ *     @dev: target device
+ *     @cmd: internal command which timed out
+ *
+ *     Notify EH that internal command @cmd for @dev timed out.  This
+ *     function should be called only for commands whose timeouts are
+ *     determined using ata_internal_cmd_timeout().
+ *
+ *     LOCKING:
+ *     EH context.
+ */
+void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd)
+{
+       struct ata_eh_context *ehc = &dev->link->eh_context;
+       int ent = ata_lookup_timeout_table(cmd);
+       int idx;
+
+       if (ent < 0)
+               return;
+
+       idx = ehc->cmd_timeout_idx[dev->devno][ent];
+       if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX)
+               ehc->cmd_timeout_idx[dev->devno][ent]++;
+}
+
 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags,
                             unsigned int err_mask)
 {
@@ -486,6 +604,9 @@ void ata_scsi_error(struct Scsi_Host *host)
                                if (ata_ncq_enabled(dev))
                                        ehc->saved_ncq_enabled |= 1 << devno;
                        }
+
+                       /* set last reset timestamp to some time in the past */
+                       ehc->last_reset = jiffies - 60 * HZ;
                }
 
                ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
@@ -641,7 +762,7 @@ void ata_eh_fastdrain_timerfn(unsigned long arg)
                /* some qcs have finished, give it another chance */
                ap->fastdrain_cnt = cnt;
                ap->fastdrain_timer.expires =
-                       jiffies + ATA_EH_FASTDRAIN_INTERVAL;
+                       ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
                add_timer(&ap->fastdrain_timer);
        }
 
@@ -681,7 +802,8 @@ static void ata_eh_set_pending(struct ata_port *ap, int fastdrain)
 
        /* activate fast drain */
        ap->fastdrain_cnt = cnt;
-       ap->fastdrain_timer.expires = jiffies + ATA_EH_FASTDRAIN_INTERVAL;
+       ap->fastdrain_timer.expires =
+               ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
        add_timer(&ap->fastdrain_timer);
 }
 
@@ -1238,6 +1360,7 @@ static int ata_eh_read_log_10h(struct ata_device *dev,
  *     atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
  *     @dev: device to perform REQUEST_SENSE to
  *     @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
+ *     @dfl_sense_key: default sense key to use
  *
  *     Perform ATAPI REQUEST_SENSE after the device reported CHECK
  *     SENSE.  This function is EH helper.
@@ -1248,13 +1371,13 @@ static int ata_eh_read_log_10h(struct ata_device *dev,
  *     RETURNS:
  *     0 on success, AC_ERR_* mask on failure
  */
-static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc)
+static unsigned int atapi_eh_request_sense(struct ata_device *dev,
+                                          u8 *sense_buf, u8 dfl_sense_key)
 {
-       struct ata_device *dev = qc->dev;
-       unsigned char *sense_buf = qc->scsicmd->sense_buffer;
+       u8 cdb[ATAPI_CDB_LEN] =
+               { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 };
        struct ata_port *ap = dev->link->ap;
        struct ata_taskfile tf;
-       u8 cdb[ATAPI_CDB_LEN];
 
        DPRINTK("ATAPI request sense\n");
 
@@ -1265,15 +1388,11 @@ static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc)
         * for the case where they are -not- overwritten
         */
        sense_buf[0] = 0x70;
-       sense_buf[2] = qc->result_tf.feature >> 4;
+       sense_buf[2] = dfl_sense_key;
 
        /* some devices time out if garbage left in tf */
        ata_tf_init(dev, &tf);
 
-       memset(cdb, 0, ATAPI_CDB_LEN);
-       cdb[0] = REQUEST_SENSE;
-       cdb[4] = SCSI_SENSE_BUFFERSIZE;
-
        tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
        tf.command = ATA_CMD_PACKET;
 
@@ -1445,7 +1564,9 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
 
        case ATA_DEV_ATAPI:
                if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) {
-                       tmp = atapi_eh_request_sense(qc);
+                       tmp = atapi_eh_request_sense(qc->dev,
+                                               qc->scsicmd->sense_buffer,
+                                               qc->result_tf.feature >> 4);
                        if (!tmp) {
                                /* ATA_QCFLAG_SENSE_VALID is used to
                                 * tell atapi_qc_complete() that sense
@@ -2071,13 +2192,12 @@ int ata_eh_reset(struct ata_link *link, int classify,
                 ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
                 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
 {
-       const int max_tries = ARRAY_SIZE(ata_eh_reset_timeouts);
        struct ata_port *ap = link->ap;
        struct ata_eh_context *ehc = &link->eh_context;
        unsigned int *classes = ehc->classes;
        unsigned int lflags = link->flags;
        int verbose = !(ehc->i.flags & ATA_EHI_QUIET);
-       int try = 0;
+       int max_tries = 0, try = 0;
        struct ata_device *dev;
        unsigned long deadline, now;
        ata_reset_fn_t reset;
@@ -2088,11 +2208,20 @@ int ata_eh_reset(struct ata_link *link, int classify,
        /*
         * Prepare to reset
         */
+       while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX)
+               max_tries++;
+
+       now = jiffies;
+       deadline = ata_deadline(ehc->last_reset, ATA_EH_RESET_COOL_DOWN);
+       if (time_before(now, deadline))
+               schedule_timeout_uninterruptible(deadline - now);
+
        spin_lock_irqsave(ap->lock, flags);
        ap->pflags |= ATA_PFLAG_RESETTING;
        spin_unlock_irqrestore(ap->lock, flags);
 
        ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
+       ehc->last_reset = jiffies;
 
        ata_link_for_each_dev(dev, link) {
                /* If we issue an SRST then an ATA drive (not ATAPI)
@@ -2125,7 +2254,8 @@ int ata_eh_reset(struct ata_link *link, int classify,
        }
 
        if (prereset) {
-               rc = prereset(link, jiffies + ATA_EH_PRERESET_TIMEOUT);
+               rc = prereset(link,
+                             ata_deadline(jiffies, ATA_EH_PRERESET_TIMEOUT));
                if (rc) {
                        if (rc == -ENOENT) {
                                ata_link_printk(link, KERN_DEBUG,
@@ -2157,10 +2287,11 @@ int ata_eh_reset(struct ata_link *link, int classify,
        /*
         * Perform reset
         */
+       ehc->last_reset = jiffies;
        if (ata_is_host_link(link))
                ata_eh_freeze_port(ap);
 
-       deadline = jiffies + ata_eh_reset_timeouts[try++];
+       deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]);
 
        if (reset) {
                if (verbose)
@@ -2277,6 +2408,7 @@ int ata_eh_reset(struct ata_link *link, int classify,
 
        /* reset successful, schedule revalidation */
        ata_eh_done(link, NULL, ATA_EH_RESET);
+       ehc->last_reset = jiffies;
        ehc->i.action |= ATA_EH_REVALIDATE;
 
        rc = 0;
@@ -2303,9 +2435,9 @@ int ata_eh_reset(struct ata_link *link, int classify,
        if (time_before(now, deadline)) {
                unsigned long delta = deadline - now;
 
-               ata_link_printk(link, KERN_WARNING, "reset failed "
-                               "(errno=%d), retrying in %u secs\n",
-                               rc, (jiffies_to_msecs(delta) + 999) / 1000);
+               ata_link_printk(link, KERN_WARNING,
+                       "reset failed (errno=%d), retrying in %u secs\n",
+                       rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000));
 
                while (delta)
                        delta = schedule_timeout_uninterruptible(delta);
@@ -2583,8 +2715,11 @@ static int ata_eh_handle_dev_fail(struct ata_device *dev, int err)
                        ata_eh_detach_dev(dev);
 
                /* schedule probe if necessary */
-               if (ata_eh_schedule_probe(dev))
+               if (ata_eh_schedule_probe(dev)) {
                        ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
+                       memset(ehc->cmd_timeout_idx[dev->devno], 0,
+                              sizeof(ehc->cmd_timeout_idx[dev->devno]));
+               }
 
                return 1;
        } else {
@@ -2622,7 +2757,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 {
        struct ata_link *link;
        struct ata_device *dev;
-       int nr_failed_devs, nr_disabled_devs;
+       int nr_failed_devs;
        int rc;
        unsigned long flags;
 
@@ -2665,7 +2800,6 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
  retry:
        rc = 0;
        nr_failed_devs = 0;
-       nr_disabled_devs = 0;
 
        /* if UNLOADING, finish immediately */
        if (ap->pflags & ATA_PFLAG_UNLOADING)
@@ -2732,8 +2866,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 
 dev_fail:
                nr_failed_devs++;
-               if (ata_eh_handle_dev_fail(dev, rc))
-                       nr_disabled_devs++;
+               ata_eh_handle_dev_fail(dev, rc);
 
                if (ap->pflags & ATA_PFLAG_FROZEN) {
                        /* PMP reset requires working host port.
@@ -2745,18 +2878,8 @@ dev_fail:
                }
        }
 
-       if (nr_failed_devs) {
-               if (nr_failed_devs != nr_disabled_devs) {
-                       ata_port_printk(ap, KERN_WARNING, "failed to recover "
-                                       "some devices, retrying in 5 secs\n");
-                       ssleep(5);
-               } else {
-                       /* no device left to recover, repeat fast */
-                       msleep(500);
-               }
-
+       if (nr_failed_devs)
                goto retry;
-       }
 
  out:
        if (rc && r_failed_link)
index 7daf4c0f62167af1735c7f5885903774312cb8ba..b65db309c181f02f76431a4d7b3e24d5e63b3388 100644 (file)
@@ -727,19 +727,12 @@ static int sata_pmp_eh_recover_pmp(struct ata_port *ap,
                }
 
                if (tries) {
-                       int sleep = ehc->i.flags & ATA_EHI_DID_RESET;
-
                        /* consecutive revalidation failures? speed down */
                        if (reval_failed)
                                sata_down_spd_limit(link);
                        else
                                reval_failed = 1;
 
-                       ata_dev_printk(dev, KERN_WARNING,
-                                      "retrying reset%s\n",
-                                      sleep ? " in 5 secs" : "");
-                       if (sleep)
-                               ssleep(5);
                        ehc->i.action |= ATA_EH_RESET;
                        goto retry;
                } else {
@@ -785,7 +778,8 @@ static int sata_pmp_eh_handle_disabled_links(struct ata_port *ap)
                 * SError.N working.
                 */
                sata_link_hardreset(link, sata_deb_timing_normal,
-                               jiffies + ATA_TMOUT_INTERNAL_QUICK, NULL, NULL);
+                               ata_deadline(jiffies, ATA_TMOUT_INTERNAL_QUICK),
+                               NULL, NULL);
 
                /* unconditionally clear SError.N */
                rc = sata_scr_write(link, SCR_ERROR, SERR_PHYRDY_CHG);
@@ -990,10 +984,7 @@ static int sata_pmp_eh_recover(struct ata_port *ap)
                goto retry;
 
        if (--pmp_tries) {
-               ata_port_printk(ap, KERN_WARNING,
-                               "failed to recover PMP, retrying in 5 secs\n");
                pmp_ehc->i.action |= ATA_EH_RESET;
-               ssleep(5);
                goto retry;
        }
 
index 499ccc628d81f3f80326ecd1340cd1fea4463e79..f3b4b15a8dc49509802a7cf06bae5fba2d8f6dfb 100644 (file)
@@ -190,6 +190,85 @@ static void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq)
        scsi_build_sense_buffer(0, cmd->sense_buffer, sk, asc, ascq);
 }
 
+static ssize_t
+ata_scsi_em_message_store(struct device *dev, struct device_attribute *attr,
+                         const char *buf, size_t count)
+{
+       struct Scsi_Host *shost = class_to_shost(dev);
+       struct ata_port *ap = ata_shost_to_port(shost);
+       if (ap->ops->em_store && (ap->flags & ATA_FLAG_EM))
+               return ap->ops->em_store(ap, buf, count);
+       return -EINVAL;
+}
+
+static ssize_t
+ata_scsi_em_message_show(struct device *dev, struct device_attribute *attr,
+                        char *buf)
+{
+       struct Scsi_Host *shost = class_to_shost(dev);
+       struct ata_port *ap = ata_shost_to_port(shost);
+
+       if (ap->ops->em_show && (ap->flags & ATA_FLAG_EM))
+               return ap->ops->em_show(ap, buf);
+       return -EINVAL;
+}
+DEVICE_ATTR(em_message, S_IRUGO | S_IWUGO,
+               ata_scsi_em_message_show, ata_scsi_em_message_store);
+EXPORT_SYMBOL_GPL(dev_attr_em_message);
+
+static ssize_t
+ata_scsi_em_message_type_show(struct device *dev, struct device_attribute *attr,
+                             char *buf)
+{
+       struct Scsi_Host *shost = class_to_shost(dev);
+       struct ata_port *ap = ata_shost_to_port(shost);
+
+       return snprintf(buf, 23, "%d\n", ap->em_message_type);
+}
+DEVICE_ATTR(em_message_type, S_IRUGO,
+                 ata_scsi_em_message_type_show, NULL);
+EXPORT_SYMBOL_GPL(dev_attr_em_message_type);
+
+static ssize_t
+ata_scsi_activity_show(struct device *dev, struct device_attribute *attr,
+               char *buf)
+{
+       struct scsi_device *sdev = to_scsi_device(dev);
+       struct ata_port *ap = ata_shost_to_port(sdev->host);
+       struct ata_device *atadev = ata_scsi_find_dev(ap, sdev);
+
+       if (ap->ops->sw_activity_show && (ap->flags & ATA_FLAG_SW_ACTIVITY))
+               return ap->ops->sw_activity_show(atadev, buf);
+       return -EINVAL;
+}
+
+static ssize_t
+ata_scsi_activity_store(struct device *dev, struct device_attribute *attr,
+       const char *buf, size_t count)
+{
+       struct scsi_device *sdev = to_scsi_device(dev);
+       struct ata_port *ap = ata_shost_to_port(sdev->host);
+       struct ata_device *atadev = ata_scsi_find_dev(ap, sdev);
+       enum sw_activity val;
+       int rc;
+
+       if (ap->ops->sw_activity_store && (ap->flags & ATA_FLAG_SW_ACTIVITY)) {
+               val = simple_strtoul(buf, NULL, 0);
+               switch (val) {
+               case OFF: case BLINK_ON: case BLINK_OFF:
+                       rc = ap->ops->sw_activity_store(atadev, val);
+                       if (!rc)
+                               return count;
+                       else
+                               return rc;
+               }
+       }
+       return -EINVAL;
+}
+DEVICE_ATTR(sw_activity, S_IWUGO | S_IRUGO, ata_scsi_activity_show,
+                       ata_scsi_activity_store);
+EXPORT_SYMBOL_GPL(dev_attr_sw_activity);
+
 static void ata_scsi_invalid_field(struct scsi_cmnd *cmd,
                                   void (*done)(struct scsi_cmnd *))
 {
@@ -1779,7 +1858,9 @@ static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf)
        const u8 pages[] = {
                0x00,   /* page 0x00, this page */
                0x80,   /* page 0x80, unit serial no page */
-               0x83    /* page 0x83, device ident page */
+               0x83,   /* page 0x83, device ident page */
+               0x89,   /* page 0x89, ata info page */
+               0xb1,   /* page 0xb1, block device characteristics page */
        };
 
        rbuf[3] = sizeof(pages);        /* number of supported VPD pages */
@@ -1900,6 +1981,19 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf)
        return 0;
 }
 
+static unsigned int ata_scsiop_inq_b1(struct ata_scsi_args *args, u8 *rbuf)
+{
+       rbuf[1] = 0xb1;
+       rbuf[3] = 0x3c;
+       if (ata_id_major_version(args->id) > 7) {
+               rbuf[4] = args->id[217] >> 8;
+               rbuf[5] = args->id[217];
+               rbuf[7] = args->id[168] & 0xf;
+       }
+
+       return 0;
+}
+
 /**
  *     ata_scsiop_noop - Command handler that simply returns success.
  *     @args: device IDENTIFY data / SCSI command of interest.
@@ -2921,6 +3015,9 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd,
                case 0x89:
                        ata_scsi_rbuf_fill(&args, ata_scsiop_inq_89);
                        break;
+               case 0xb1:
+                       ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b1);
+                       break;
                default:
                        ata_scsi_invalid_field(cmd, done);
                        break;
index c0908c225483bcd352508a1b9fc239a48b50f36c..304fdc6f1dc2c8bce53e7e4e1fca18d982762f31 100644 (file)
@@ -345,8 +345,8 @@ void ata_sff_dma_pause(struct ata_port *ap)
 /**
  *     ata_sff_busy_sleep - sleep until BSY clears, or timeout
  *     @ap: port containing status register to be polled
- *     @tmout_pat: impatience timeout
- *     @tmout: overall timeout
+ *     @tmout_pat: impatience timeout in msecs
+ *     @tmout: overall timeout in msecs
  *
  *     Sleep until ATA Status register bit BSY clears,
  *     or a timeout occurs.
@@ -365,7 +365,7 @@ int ata_sff_busy_sleep(struct ata_port *ap,
 
        status = ata_sff_busy_wait(ap, ATA_BUSY, 300);
        timer_start = jiffies;
-       timeout = timer_start + tmout_pat;
+       timeout = ata_deadline(timer_start, tmout_pat);
        while (status != 0xff && (status & ATA_BUSY) &&
               time_before(jiffies, timeout)) {
                msleep(50);
@@ -377,7 +377,7 @@ int ata_sff_busy_sleep(struct ata_port *ap,
                                "port is slow to respond, please be patient "
                                "(Status 0x%x)\n", status);
 
-       timeout = timer_start + tmout;
+       timeout = ata_deadline(timer_start, tmout);
        while (status != 0xff && (status & ATA_BUSY) &&
               time_before(jiffies, timeout)) {
                msleep(50);
@@ -390,7 +390,7 @@ int ata_sff_busy_sleep(struct ata_port *ap,
        if (status & ATA_BUSY) {
                ata_port_printk(ap, KERN_ERR, "port failed to respond "
                                "(%lu secs, Status 0x%x)\n",
-                               tmout / HZ, status);
+                               DIV_ROUND_UP(tmout, 1000), status);
                return -EBUSY;
        }
 
@@ -1888,7 +1888,7 @@ int ata_sff_wait_after_reset(struct ata_link *link, unsigned int devmask,
        unsigned int dev1 = devmask & (1 << 1);
        int rc, ret = 0;
 
-       msleep(ATA_WAIT_AFTER_RESET_MSECS);
+       msleep(ATA_WAIT_AFTER_RESET);
 
        /* always check readiness of the master device */
        rc = ata_sff_wait_ready(link, deadline);
@@ -2371,7 +2371,8 @@ void ata_bus_reset(struct ata_port *ap)
 
        /* issue bus reset */
        if (ap->flags & ATA_FLAG_SRST) {
-               rc = ata_bus_softreset(ap, devmask, jiffies + 40 * HZ);
+               rc = ata_bus_softreset(ap, devmask,
+                                      ata_deadline(jiffies, 40000));
                if (rc && rc != -ENODEV)
                        goto err_out;
        }
index 1cf803adbc958dbcc5f44ae1bc6efb980007ca0c..f6f9c28ec7f834932d16a541004919212bf3a626 100644 (file)
@@ -151,6 +151,8 @@ extern void ata_scsi_dev_rescan(struct work_struct *work);
 extern int ata_bus_probe(struct ata_port *ap);
 
 /* libata-eh.c */
+extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
+extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd);
 extern enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
 extern void ata_scsi_error(struct Scsi_Host *host);
 extern void ata_port_wait_eh(struct ata_port *ap);
index 55516103626a7e7616c545ed64b3a0b54a750b1c..d3932901a3b3511a45e13cc489ce350392b4a0dd 100644 (file)
@@ -1011,7 +1011,7 @@ static void bfin_bus_post_reset(struct ata_port *ap, unsigned int devmask)
        void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
        unsigned int dev0 = devmask & (1 << 0);
        unsigned int dev1 = devmask & (1 << 1);
-       unsigned long timeout;
+       unsigned long deadline;
 
        /* if device 0 was found in ata_devchk, wait for its
         * BSY bit to clear
@@ -1022,7 +1022,7 @@ static void bfin_bus_post_reset(struct ata_port *ap, unsigned int devmask)
        /* if device 1 was found in ata_devchk, wait for
         * register access, then wait for BSY to clear
         */
-       timeout = jiffies + ATA_TMOUT_BOOT;
+       deadline = ata_deadline(jiffies, ATA_TMOUT_BOOT);
        while (dev1) {
                u8 nsect, lbal;
 
@@ -1031,7 +1031,7 @@ static void bfin_bus_post_reset(struct ata_port *ap, unsigned int devmask)
                lbal = read_atapi_register(base, ATA_REG_LBAL);
                if ((nsect == 1) && (lbal == 1))
                        break;
-               if (time_after(jiffies, timeout)) {
+               if (time_after(jiffies, deadline)) {
                        dev1 = 0;
                        break;
                }
index fe7cc8ed4ea4261c2c08ebd6b2454e983e4f5b0c..bc037ffce2006e52a3d122f89ff424f56f40d3b5 100644 (file)
@@ -305,7 +305,7 @@ static unsigned int pdc_data_xfer_vlb(struct ata_device *dev,
                        iowrite32_rep(ap->ioaddr.data_addr, buf, buflen >> 2);
 
                if (unlikely(slop)) {
-                       u32 pad;
+                       __le32 pad;
                        if (rw == READ) {
                                pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr));
                                memcpy(buf + buflen - slop, &pad, slop);
@@ -746,14 +746,12 @@ static unsigned int vlb32_data_xfer(struct ata_device *adev, unsigned char *buf,
                        ioread32_rep(ap->ioaddr.data_addr, buf, buflen >> 2);
 
                if (unlikely(slop)) {
-                       u32 pad;
+                       __le32 pad;
                        if (rw == WRITE) {
                                memcpy(&pad, buf + buflen - slop, slop);
-                               pad = le32_to_cpu(pad);
-                               iowrite32(pad, ap->ioaddr.data_addr);
+                               iowrite32(le32_to_cpu(pad), ap->ioaddr.data_addr);
                        } else {
-                               pad = ioread32(ap->ioaddr.data_addr);
-                               pad = cpu_to_le32(pad);
+                               pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr));
                                memcpy(buf + buflen - slop, &pad, slop);
                        }
                }
index 97e5b090d7c2cdc5233791d3ea5eb07280bef8fc..63b7a1c165a5d8ddbb41c9b66210ebadcba34cc1 100644 (file)
@@ -137,7 +137,7 @@ static unsigned int qdi_data_xfer(struct ata_device *dev, unsigned char *buf,
                        iowrite32_rep(ap->ioaddr.data_addr, buf, buflen >> 2);
 
                if (unlikely(slop)) {
-                       u32 pad;
+                       __le32 pad;
                        if (rw == READ) {
                                pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr));
                                memcpy(buf + buflen - slop, &pad, slop);
index bbf5aa345e68ea2164c7f3544340b255e790078b..16673d1685735a316d23a25a853b98ff31b0f9e6 100644 (file)
@@ -696,7 +696,7 @@ static void scc_bmdma_stop (struct ata_queued_cmd *qc)
 
                if (reg & INTSTS_BMSINT) {
                        unsigned int classes;
-                       unsigned long deadline = jiffies + ATA_TMOUT_BOOT;
+                       unsigned long deadline = ata_deadline(jiffies, ATA_TMOUT_BOOT);
                        printk(KERN_WARNING "%s: Internal Bus Error\n", DRV_NAME);
                        out_be32(bmid_base + SCC_DMA_INTST, INTSTS_BMSINT);
                        /* TBD: SW reset */
index 474528f8fe3de769dacca40a5dcbaf9ec8a3ccc1..a7606b044a61961e8a7da92d5a808b990ed6850d 100644 (file)
@@ -105,7 +105,7 @@ static unsigned int winbond_data_xfer(struct ata_device *dev,
                        iowrite32_rep(ap->ioaddr.data_addr, buf, buflen >> 2);
 
                if (unlikely(slop)) {
-                       u32 pad;
+                       __le32 pad;
                        if (rw == READ) {
                                pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr));
                                memcpy(buf + buflen - slop, &pad, slop);
index 16aa6839aa5a68e435060d7035fbb129c18dc951..fb13b82aacba7ee255adbf2fef9ca7e5083654ce 100644 (file)
@@ -253,21 +253,29 @@ static void k2_bmdma_start_mmio(struct ata_queued_cmd *qc)
        /* start host DMA transaction */
        dmactl = readb(mmio + ATA_DMA_CMD);
        writeb(dmactl | ATA_DMA_START, mmio + ATA_DMA_CMD);
-       /* There is a race condition in certain SATA controllers that can
-          be seen when the r/w command is given to the controller before the
-          host DMA is started. On a Read command, the controller would initiate
-          the command to the drive even before it sees the DMA start. When there
-          are very fast drives connected to the controller, or when the data request
-          hits in the drive cache, there is the possibility that the drive returns a part
-          or all of the requested data to the controller before the DMA start is issued.
-          In this case, the controller would become confused as to what to do with the data.
-          In the worst case when all the data is returned back to the controller, the
-          controller could hang. In other cases it could return partial data returning
-          in data corruption. This problem has been seen in PPC systems and can also appear
-          on an system with very fast disks, where the SATA controller is sitting behind a
-          number of bridges, and hence there is significant latency between the r/w command
-          and the start command. */
-       /* issue r/w command if the access is to ATA*/
+       /* This works around possible data corruption.
+
+          On certain SATA controllers that can be seen when the r/w
+          command is given to the controller before the host DMA is
+          started.
+
+          On a Read command, the controller would initiate the
+          command to the drive even before it sees the DMA
+          start. When there are very fast drives connected to the
+          controller, or when the data request hits in the drive
+          cache, there is the possibility that the drive returns a
+          part or all of the requested data to the controller before
+          the DMA start is issued.  In this case, the controller
+          would become confused as to what to do with the data.  In
+          the worst case when all the data is returned back to the
+          controller, the controller could hang. In other cases it
+          could return partial data returning in data
+          corruption. This problem has been seen in PPC systems and
+          can also appear on an system with very fast disks, where
+          the SATA controller is sitting behind a number of bridges,
+          and hence there is significant latency between the r/w
+          command and the start command. */
+       /* issue r/w command if the access is to ATA */
        if (qc->tf.protocol == ATA_PROT_DMA)
                ap->ops->sff_exec_command(ap, &qc->tf);
 }
index 1efe162e16d75a8a684557412d71ff32fbcb1595..3f6d9b0a6abed354ca07527e0294c8042dcc8ae0 100644 (file)
@@ -93,47 +93,27 @@ static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \
 #define define_siblings_show_func(name)                \
        define_siblings_show_map(name); define_siblings_show_list(name)
 
-#ifdef topology_physical_package_id
 define_id_show_func(physical_package_id);
 define_one_ro(physical_package_id);
-#define ref_physical_package_id_attr   &attr_physical_package_id.attr,
-#else
-#define ref_physical_package_id_attr
-#endif
 
-#ifdef topology_core_id
 define_id_show_func(core_id);
 define_one_ro(core_id);
-#define ref_core_id_attr               &attr_core_id.attr,
-#else
-#define ref_core_id_attr
-#endif
 
-#ifdef topology_thread_siblings
 define_siblings_show_func(thread_siblings);
 define_one_ro(thread_siblings);
 define_one_ro(thread_siblings_list);
-#define ref_thread_siblings_attr       \
-               &attr_thread_siblings.attr, &attr_thread_siblings_list.attr,
-#else
-#define ref_thread_siblings_attr
-#endif
 
-#ifdef topology_core_siblings
 define_siblings_show_func(core_siblings);
 define_one_ro(core_siblings);
 define_one_ro(core_siblings_list);
-#define ref_core_siblings_attr         \
-               &attr_core_siblings.attr, &attr_core_siblings_list.attr,
-#else
-#define ref_core_siblings_attr
-#endif
 
 static struct attribute *default_attrs[] = {
-       ref_physical_package_id_attr
-       ref_core_id_attr
-       ref_thread_siblings_attr
-       ref_core_siblings_attr
+       &attr_physical_package_id.attr,
+       &attr_core_id.attr,
+       &attr_thread_siblings.attr,
+       &attr_thread_siblings_list.attr,
+       &attr_core_siblings.attr,
+       &attr_core_siblings_list.attr,
        NULL
 };
 
index 570f3b70dce795afc8e1beabd104de3dc31e26ad..5fdfa7c888cebc98aacc9873a9f7216f79c3a1f5 100644 (file)
@@ -712,19 +712,17 @@ static void do_pd_request(struct request_queue * q)
 static int pd_special_command(struct pd_unit *disk,
                      enum action (*func)(struct pd_unit *disk))
 {
-       DECLARE_COMPLETION_ONSTACK(wait);
-       struct request rq;
+       struct request *rq;
        int err = 0;
 
-       blk_rq_init(NULL, &rq);
-       rq.rq_disk = disk->gd;
-       rq.end_io_data = &wait;
-       rq.end_io = blk_end_sync_rq;
-       blk_insert_request(disk->gd->queue, &rq, 0, func);
-       wait_for_completion(&wait);
-       if (rq.errors)
-               err = -EIO;
-       blk_put_request(&rq);
+       rq = blk_get_request(disk->gd->queue, READ, __GFP_WAIT);
+
+       rq->cmd_type = REQ_TYPE_SPECIAL;
+       rq->special = func;
+
+       err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
+
+       blk_put_request(rq);
        return err;
 }
 
index 7b46faf22318c31f57072e73ae711d86afc7ba45..5ca1d80de182bfa3bb333f0aeca907f7929ed26c 100644 (file)
@@ -215,3 +215,22 @@ pm_good:
  * but we still need to load before device_initcall
  */
 fs_initcall(init_acpi_pm_clocksource);
+
+/*
+ * Allow an override of the IOPort. Stupid BIOSes do not tell us about
+ * the PMTimer, but we might know where it is.
+ */
+static int __init parse_pmtmr(char *arg)
+{
+       unsigned long base;
+
+       if (strict_strtoul(arg, 16, &base))
+               return -EINVAL;
+
+       printk(KERN_INFO "PMTMR IOPort override: 0x%04x -> 0x%04lx\n",
+              (unsigned int)pmtmr_ioport, base);
+       pmtmr_ioport = base;
+
+       return 1;
+}
+__setup("pmtmr=", parse_pmtmr);
index 845081b44f637fbd2ba538a83155fa58b68423d8..0177012845c60cc951380d4419f3dbb9a736b8be 100644 (file)
@@ -167,6 +167,11 @@ void drm_core_ioremap(struct drm_map *map, struct drm_device *dev)
 }
 EXPORT_SYMBOL(drm_core_ioremap);
 
+void drm_core_ioremap_wc(struct drm_map *map, struct drm_device *dev)
+{
+       map->handle = ioremap_wc(map->offset, map->size);
+}
+EXPORT_SYMBOL(drm_core_ioremap_wc);
 void drm_core_ioremapfree(struct drm_map *map, struct drm_device *dev)
 {
        if (!map->handle || !map->size)
index e53158f0ecb5c9ebf074bbe241df3f60986e0987..f0de81a5689d3c88f386b7e378afda0e59f1001e 100644 (file)
@@ -1154,7 +1154,7 @@ static int radeon_do_init_cp(struct drm_device * dev, drm_radeon_init_t * init)
                        dev_priv->gart_info.mapping.size =
                            dev_priv->gart_info.table_size;
 
-                       drm_core_ioremap(&dev_priv->gart_info.mapping, dev);
+                       drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev);
                        dev_priv->gart_info.addr =
                            dev_priv->gart_info.mapping.handle;
 
index 35812823787bd7030f1cdc2452f923a9e06641a8..eb8f72ca02f45cfe954da3a1240e5a18fdd5d2bd 100644 (file)
@@ -320,7 +320,7 @@ static int try_address(struct i2c_adapter *i2c_adap,
                       unsigned char addr, int retries)
 {
        struct i2c_algo_bit_data *adap = i2c_adap->algo_data;
-       int i, ret = -1;
+       int i, ret = 0;
 
        for (i = 0; i <= retries; i++) {
                ret = i2c_outb(i2c_adap, addr);
@@ -508,7 +508,7 @@ static int bit_doAddress(struct i2c_adapter *i2c_adap, struct i2c_msg *msg)
                        addr ^= 1;
                ret = try_address(i2c_adap, addr, retries);
                if ((ret != 1) && !nak_ok)
-                       return -EREMOTEIO;
+                       return -ENXIO;
        }
 
        return 0;
index e954a20b97a647e1b5766d345117cab121a42811..d50b329a3c94f079a78c6f2c85bff7b2271d2ae0 100644 (file)
@@ -182,7 +182,7 @@ static int pca_xfer(struct i2c_adapter *i2c_adap,
        }
        if (state != 0xf8) {
                dev_dbg(&i2c_adap->dev, "bus is not idle. status is %#04x\n", state);
-               return -EIO;
+               return -EAGAIN;
        }
 
        DEB1("{{{ XFER %d messages\n", num);
index 8907b0191677e73b7ef6358fe9d8c937e8e49326..1e328d19cd6dd9aaf79573a7521f96579ff7cf0b 100644 (file)
@@ -78,6 +78,36 @@ static void i2c_stop(struct i2c_algo_pcf_data *adap)
        set_pcf(adap, 1, I2C_PCF_STOP);
 }
 
+static void handle_lab(struct i2c_algo_pcf_data *adap, const int *status)
+{
+       DEB2(printk(KERN_INFO
+               "i2c-algo-pcf.o: lost arbitration (CSR 0x%02x)\n",
+                *status));
+
+       /* Cleanup from LAB -- reset and enable ESO.
+        * This resets the PCF8584; since we've lost the bus, no
+        * further attempts should be made by callers to clean up
+        * (no i2c_stop() etc.)
+        */
+       set_pcf(adap, 1, I2C_PCF_PIN);
+       set_pcf(adap, 1, I2C_PCF_ESO);
+
+       /* We pause for a time period sufficient for any running
+        * I2C transaction to complete -- the arbitration logic won't
+        * work properly until the next START is seen.
+        * It is assumed the bus driver or client has set a proper value.
+        *
+        * REVISIT: should probably use msleep instead of mdelay if we
+        * know we can sleep.
+        */
+       if (adap->lab_mdelay)
+               mdelay(adap->lab_mdelay);
+
+       DEB2(printk(KERN_INFO
+               "i2c-algo-pcf.o: reset LAB condition (CSR 0x%02x)\n",
+               get_pcf(adap, 1)));
+}
+
 static int wait_for_bb(struct i2c_algo_pcf_data *adap) {
 
        int timeout = DEF_TIMEOUT;
@@ -109,23 +139,7 @@ static int wait_for_pin(struct i2c_algo_pcf_data *adap, int *status) {
                *status = get_pcf(adap, 1);
        }
        if (*status & I2C_PCF_LAB) {
-               DEB2(printk(KERN_INFO 
-                       "i2c-algo-pcf.o: lost arbitration (CSR 0x%02x)\n",
-                        *status));
-               /* Cleanup from LAB-- reset and enable ESO.
-                * This resets the PCF8584; since we've lost the bus, no
-                * further attempts should be made by callers to clean up 
-                * (no i2c_stop() etc.)
-                */
-               set_pcf(adap, 1, I2C_PCF_PIN);
-               set_pcf(adap, 1, I2C_PCF_ESO);
-               /* TODO: we should pause for a time period sufficient for any
-                * running I2C transaction to complete-- the arbitration
-                * logic won't work properly until the next START is seen.
-                */
-               DEB2(printk(KERN_INFO 
-                       "i2c-algo-pcf.o: reset LAB condition (CSR 0x%02x)\n", 
-                       get_pcf(adap,1)));
+               handle_lab(adap, status);
                return(-EINTR);
        }
 #endif
index 48438cc5d0caa282bcf8ae7050bdbebccea3e879..6ee997b2817c450b3ac2fae2bf32e5ad035ff5d3 100644 (file)
@@ -4,6 +4,9 @@
 
 menu "I2C Hardware Bus support"
 
+comment "PC SMBus host controller drivers"
+       depends on PCI
+
 config I2C_ALI1535
        tristate "ALI 1535"
        depends on PCI
@@ -73,94 +76,6 @@ config I2C_AMD8111
          This driver can also be built as a module.  If so, the module
          will be called i2c-amd8111.
 
-config I2C_AT91
-       tristate "Atmel AT91 I2C Two-Wire interface (TWI)"
-       depends on ARCH_AT91 && EXPERIMENTAL && BROKEN
-       help
-         This supports the use of the I2C interface on Atmel AT91
-         processors.
-
-         This driver is BROKEN because the controller which it uses
-         will easily trigger RX overrun and TX underrun errors.  Using
-         low I2C clock rates may partially work around those issues
-         on some systems.  Another serious problem is that there is no
-         documented way to issue repeated START conditions, as needed
-         to support combined I2C messages.  Use the i2c-gpio driver
-         unless your system can cope with those limitations.
-
-config I2C_AU1550
-       tristate "Au1550/Au1200 SMBus interface"
-       depends on SOC_AU1550 || SOC_AU1200
-       help
-         If you say yes to this option, support will be included for the
-         Au1550 and Au1200 SMBus interface.
-
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-au1550.
-
-config I2C_BLACKFIN_TWI
-       tristate "Blackfin TWI I2C support"
-       depends on BLACKFIN
-       help
-         This is the TWI I2C device driver for Blackfin BF522, BF525,
-         BF527, BF534, BF536, BF537 and BF54x. For other Blackfin processors,
-         please don't use this driver.
-
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-bfin-twi.
-
-config I2C_BLACKFIN_TWI_CLK_KHZ
-       int "Blackfin TWI I2C clock (kHz)"
-       depends on I2C_BLACKFIN_TWI
-       range 10 400
-       default 50
-       help
-         The unit of the TWI clock is kHz.
-
-config I2C_DAVINCI
-       tristate "DaVinci I2C driver"
-       depends on ARCH_DAVINCI
-       help
-         Support for TI DaVinci I2C controller driver.
-
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-davinci.
-
-         Please note that this driver might be needed to bring up other
-         devices such as DaVinci NIC.
-         For details please see http://www.ti.com/davinci
-
-config I2C_ELEKTOR
-       tristate "Elektor ISA card"
-       depends on ISA && BROKEN_ON_SMP
-       select I2C_ALGOPCF
-       help
-         This supports the PCF8584 ISA bus I2C adapter.  Say Y if you own
-         such an adapter.
-
-         This support is also available as a module.  If so, the module
-         will be called i2c-elektor.
-
-config I2C_GPIO
-       tristate "GPIO-based bitbanging I2C"
-       depends on GENERIC_GPIO
-       select I2C_ALGOBIT
-       help
-         This is a very simple bitbanging I2C driver utilizing the
-         arch-neutral GPIO API to control the SCL and SDA lines.
-
-config I2C_HYDRA
-       tristate "CHRP Apple Hydra Mac I/O I2C interface"
-       depends on PCI && PPC_CHRP && EXPERIMENTAL
-       select I2C_ALGOBIT
-       help
-         This supports the use of the I2C interface in the Apple Hydra Mac
-         I/O chip on some CHRP machines (e.g. the LongTrail).  Say Y if you
-         have such a machine.
-
-         This support is also available as a module.  If so, the module
-         will be called i2c-hydra.
-
 config I2C_I801
        tristate "Intel 82801 (ICH)"
        depends on PCI
@@ -186,41 +101,15 @@ config I2C_I801
          This driver can also be built as a module.  If so, the module
          will be called i2c-i801.
 
-config I2C_I810
-       tristate "Intel 810/815 (DEPRECATED)"
-       default n
+config I2C_ISCH
+       tristate "Intel SCH SMBus 1.0"
        depends on PCI
-       select I2C_ALGOBIT
-       help
-         If you say yes to this option, support will be included for the Intel
-         810/815 family of mainboard I2C interfaces.  Specifically, the
-         following versions of the chipset are supported:
-           i810AA
-           i810AB
-           i810E
-           i815
-           i845G
-
-         This driver is deprecated in favor of the i810fb and intelfb drivers.
-
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-i810.
-
-config I2C_PXA
-       tristate "Intel PXA2XX I2C adapter (EXPERIMENTAL)"
-       depends on EXPERIMENTAL && ARCH_PXA
        help
-         If you have devices in the PXA I2C bus, say yes to this option.
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-pxa.
+         Say Y here if you want to use SMBus controller on the Intel SCH
+         based systems.
 
-config I2C_PXA_SLAVE
-       bool "Intel PXA2XX I2C Slave comms support"
-       depends on I2C_PXA
-       help
-         Support I2C slave mode communications on the PXA I2C bus.  This
-         is necessary for systems where the PXA may be a target on the
-         I2C bus.
+         This driver can also be built as a module. If so, the module
+         will be called i2c-isch.
 
 config I2C_PIIX4
        tristate "Intel PIIX4 and compatible (ATI/Serverworks/Broadcom/SMSC)"
@@ -247,39 +136,111 @@ config I2C_PIIX4
          This driver can also be built as a module.  If so, the module
          will be called i2c-piix4.
 
-config I2C_IBM_IIC
-       tristate "IBM PPC 4xx on-chip I2C interface"
-       depends on 4xx
+config I2C_NFORCE2
+       tristate "Nvidia nForce2, nForce3 and nForce4"
+       depends on PCI
        help
-         Say Y here if you want to use IIC peripheral found on
-         embedded IBM PPC 4xx based systems.
+         If you say yes to this option, support will be included for the Nvidia
+         nForce2, nForce3 and nForce4 families of mainboard I2C interfaces.
 
          This driver can also be built as a module.  If so, the module
-         will be called i2c-ibm_iic.
+         will be called i2c-nforce2.
 
-config I2C_IOP3XX
-       tristate "Intel IOPx3xx and IXP4xx on-chip I2C interface"
-       depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IXP4XX || ARCH_IOP13XX
+config I2C_NFORCE2_S4985
+       tristate "SMBus multiplexing on the Tyan S4985"
+       depends on I2C_NFORCE2 && EXPERIMENTAL
        help
-         Say Y here if you want to use the IIC bus controller on
-         the Intel IOPx3xx I/O Processors or IXP4xx Network Processors.
+         Enabling this option will add specific SMBus support for the Tyan
+         S4985 motherboard.  On this 4-CPU board, the SMBus is multiplexed
+         over 4 different channels, where the various memory module EEPROMs
+         live.  Saying yes here will give you access to these in addition
+         to the trunk.
 
          This driver can also be built as a module.  If so, the module
-         will be called i2c-iop3xx.
+         will be called i2c-nforce2-s4985.
 
-config I2C_IXP2000
-       tristate "IXP2000 GPIO-Based I2C Interface (DEPRECATED)"
-       depends on ARCH_IXP2000
+config I2C_SIS5595
+       tristate "SiS 5595"
+       depends on PCI
+       help
+         If you say yes to this option, support will be included for the
+         SiS5595 SMBus (a subset of I2C) interface.
+
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-sis5595.
+
+config I2C_SIS630
+       tristate "SiS 630/730"
+       depends on PCI
+       help
+         If you say yes to this option, support will be included for the
+         SiS630 and SiS730 SMBus (a subset of I2C) interface.
+
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-sis630.
+
+config I2C_SIS96X
+       tristate "SiS 96x"
+       depends on PCI
+       help
+         If you say yes to this option, support will be included for the SiS
+         96x SMBus (a subset of I2C) interfaces.  Specifically, the following
+         chipsets are supported:
+           645/961
+           645DX/961
+           645DX/962
+           648/961
+           650/961
+           735
+           745
+
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-sis96x.
+
+config I2C_VIA
+       tristate "VIA VT82C586B"
+       depends on PCI && EXPERIMENTAL
        select I2C_ALGOBIT
        help
-         Say Y here if you have an Intel IXP2000 (2400, 2800, 2850) based
-         system and are using GPIO lines for an I2C bus.
+         If you say yes to this option, support will be included for the VIA
+          82C586B I2C interface
 
-         This support is also available as a module. If so, the module
-         will be called i2c-ixp2000.
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-via.
 
-         This driver is deprecated and will be dropped soon. Use i2c-gpio
-         instead.
+config I2C_VIAPRO
+       tristate "VIA VT82C596/82C686/82xx and CX700"
+       depends on PCI
+       help
+         If you say yes to this option, support will be included for the VIA
+         VT82C596 and later SMBus interface.  Specifically, the following
+         chipsets are supported:
+           VT82C596A/B
+           VT82C686A/B
+           VT8231
+           VT8233/A
+           VT8235
+           VT8237R/A/S
+           VT8251
+           CX700
+
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-viapro.
+
+comment "Mac SMBus host controller drivers"
+       depends on PPC_CHRP || PPC_PMAC
+
+config I2C_HYDRA
+       tristate "CHRP Apple Hydra Mac I/O I2C interface"
+       depends on PCI && PPC_CHRP && EXPERIMENTAL
+       select I2C_ALGOBIT
+       help
+         This supports the use of the I2C interface in the Apple Hydra Mac
+         I/O chip on some CHRP machines (e.g. the LongTrail).  Say Y if you
+         have such a machine.
+
+         This support is also available as a module.  If so, the module
+         will be called i2c-hydra.
 
 config I2C_POWERMAC
        tristate "Powermac I2C interface"
@@ -293,95 +254,158 @@ config I2C_POWERMAC
          This support is also available as a module.  If so, the module
          will be called i2c-powermac.
 
-config I2C_MPC
-       tristate "MPC107/824x/85xx/52xx/86xx"
-       depends on PPC32
+comment "I2C system bus drivers (mostly embedded / system-on-chip)"
+
+config I2C_AT91
+       tristate "Atmel AT91 I2C Two-Wire interface (TWI)"
+       depends on ARCH_AT91 && EXPERIMENTAL && BROKEN
        help
-         If you say yes to this option, support will be included for the
-         built-in I2C interface on the MPC107/Tsi107/MPC8240/MPC8245 and
-         MPC85xx/MPC8641 family processors. The driver may also work on 52xx
-         family processors, though interrupts are known not to work.
+         This supports the use of the I2C interface on Atmel AT91
+         processors.
 
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-mpc.
+         This driver is BROKEN because the controller which it uses
+         will easily trigger RX overrun and TX underrun errors.  Using
+         low I2C clock rates may partially work around those issues
+         on some systems.  Another serious problem is that there is no
+         documented way to issue repeated START conditions, as needed
+         to support combined I2C messages.  Use the i2c-gpio driver
+         unless your system can cope with those limitations.
 
-config I2C_NFORCE2
-       tristate "Nvidia nForce2, nForce3 and nForce4"
-       depends on PCI
+config I2C_AU1550
+       tristate "Au1550/Au1200 SMBus interface"
+       depends on SOC_AU1550 || SOC_AU1200
        help
-         If you say yes to this option, support will be included for the Nvidia
-         nForce2, nForce3 and nForce4 families of mainboard I2C interfaces.
+         If you say yes to this option, support will be included for the
+         Au1550 and Au1200 SMBus interface.
 
          This driver can also be built as a module.  If so, the module
-         will be called i2c-nforce2.
+         will be called i2c-au1550.
 
-config I2C_OCORES
-       tristate "OpenCores I2C Controller"
-       depends on EXPERIMENTAL
+config I2C_BLACKFIN_TWI
+       tristate "Blackfin TWI I2C support"
+       depends on BLACKFIN
+       depends on !BF561 && !BF531 && !BF532 && !BF533
        help
-         If you say yes to this option, support will be included for the
-         OpenCores I2C controller. For details see
-         http://www.opencores.org/projects.cgi/web/i2c/overview
+         This is the I2C bus driver for Blackfin on-chip TWI interface.
 
          This driver can also be built as a module.  If so, the module
-         will be called i2c-ocores.
+         will be called i2c-bfin-twi.
 
-config I2C_OMAP
-       tristate "OMAP I2C adapter"
-       depends on ARCH_OMAP
-       default y if MACH_OMAP_H3 || MACH_OMAP_OSK
+config I2C_BLACKFIN_TWI_CLK_KHZ
+       int "Blackfin TWI I2C clock (kHz)"
+       depends on I2C_BLACKFIN_TWI
+       range 10 400
+       default 50
        help
-         If you say yes to this option, support will be included for the
-         I2C interface on the Texas Instruments OMAP1/2 family of processors.
-         Like OMAP1510/1610/1710/5912 and OMAP242x.
-         For details see http://www.ti.com/omap.
+         The unit of the TWI clock is kHz.
 
-config I2C_PARPORT
-       tristate "Parallel port adapter"
-       depends on PARPORT
+config I2C_CPM
+       tristate "Freescale CPM1 or CPM2 (MPC8xx/826x)"
+       depends on (CPM1 || CPM2) && OF_I2C
+       help
+         This supports the use of the I2C interface on Freescale
+         processors with CPM1 or CPM2.
+
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-cpm.
+
+config I2C_DAVINCI
+       tristate "DaVinci I2C driver"
+       depends on ARCH_DAVINCI
+       help
+         Support for TI DaVinci I2C controller driver.
+
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-davinci.
+
+         Please note that this driver might be needed to bring up other
+         devices such as DaVinci NIC.
+         For details please see http://www.ti.com/davinci
+
+config I2C_GPIO
+       tristate "GPIO-based bitbanging I2C"
+       depends on GENERIC_GPIO
        select I2C_ALGOBIT
        help
-         This supports parallel port I2C adapters such as the ones made by
-         Philips or Velleman, Analog Devices evaluation boards, and more.
-         Basically any adapter using the parallel port as an I2C bus with
-         no extra chipset is supported by this driver, or could be.
+         This is a very simple bitbanging I2C driver utilizing the
+         arch-neutral GPIO API to control the SCL and SDA lines.
 
-         This driver is a replacement for (and was inspired by) an older
-         driver named i2c-philips-par.  The new driver supports more devices,
-         and makes it easier to add support for new devices.
+config I2C_IBM_IIC
+       tristate "IBM PPC 4xx on-chip I2C interface"
+       depends on 4xx
+       help
+         Say Y here if you want to use IIC peripheral found on
+         embedded IBM PPC 4xx based systems.
 
-         An adapter type parameter is now mandatory.  Please read the file
-         Documentation/i2c/busses/i2c-parport for details.
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-ibm_iic.
 
-         Another driver exists, named i2c-parport-light, which doesn't depend
-         on the parport driver.  This is meant for embedded systems. Don't say
-         Y here if you intend to say Y or M there.
+config I2C_IOP3XX
+       tristate "Intel IOPx3xx and IXP4xx on-chip I2C interface"
+       depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IXP4XX || ARCH_IOP13XX
+       help
+         Say Y here if you want to use the IIC bus controller on
+         the Intel IOPx3xx I/O Processors or IXP4xx Network Processors.
 
-         This support is also available as a module.  If so, the module
-         will be called i2c-parport.
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-iop3xx.
 
-config I2C_PARPORT_LIGHT
-       tristate "Parallel port adapter (light)"
+config I2C_IXP2000
+       tristate "IXP2000 GPIO-Based I2C Interface (DEPRECATED)"
+       depends on ARCH_IXP2000
        select I2C_ALGOBIT
        help
-         This supports parallel port I2C adapters such as the ones made by
-         Philips or Velleman, Analog Devices evaluation boards, and more.
-         Basically any adapter using the parallel port as an I2C bus with
-         no extra chipset is supported by this driver, or could be.
+         Say Y here if you have an Intel IXP2000 (2400, 2800, 2850) based
+         system and are using GPIO lines for an I2C bus.
 
-         This driver is a light version of i2c-parport.  It doesn't depend
-         on the parport driver, and uses direct I/O access instead.  This
-         might be preferred on embedded systems where wasting memory for
-         the clean but heavy parport handling is not an option.  The
-         drawback is a reduced portability and the impossibility to
-         daisy-chain other parallel port devices.
+         This support is also available as a module. If so, the module
+         will be called i2c-ixp2000.
 
-         Don't say Y here if you said Y or M to i2c-parport.  Saying M to
-         both is possible but both modules should not be loaded at the same
-         time.
+         This driver is deprecated and will be dropped soon. Use i2c-gpio
+         instead.
 
-         This support is also available as a module.  If so, the module
-         will be called i2c-parport-light.
+config I2C_MPC
+       tristate "MPC107/824x/85xx/52xx/86xx"
+       depends on PPC32
+       help
+         If you say yes to this option, support will be included for the
+         built-in I2C interface on the MPC107/Tsi107/MPC8240/MPC8245 and
+         MPC85xx/MPC8641 family processors. The driver may also work on 52xx
+         family processors, though interrupts are known not to work.
+
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-mpc.
+
+config I2C_MV64XXX
+       tristate "Marvell mv64xxx I2C Controller"
+       depends on (MV64X60 || PLAT_ORION) && EXPERIMENTAL
+       help
+         If you say yes to this option, support will be included for the
+         built-in I2C interface on the Marvell 64xxx line of host bridges.
+
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-mv64xxx.
+
+config I2C_OCORES
+       tristate "OpenCores I2C Controller"
+       depends on EXPERIMENTAL
+       help
+         If you say yes to this option, support will be included for the
+         OpenCores I2C controller. For details see
+         http://www.opencores.org/projects.cgi/web/i2c/overview
+
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-ocores.
+
+config I2C_OMAP
+       tristate "OMAP I2C adapter"
+       depends on ARCH_OMAP
+       default y if MACH_OMAP_H3 || MACH_OMAP_OSK
+       help
+         If you say yes to this option, support will be included for the
+         I2C interface on the Texas Instruments OMAP1/2 family of processors.
+         Like OMAP1510/1610/1710/5912 and OMAP242x.
+         For details see http://www.ti.com/omap.
 
 config I2C_PASEMI
        tristate "PA Semi SMBus interface"
@@ -389,23 +413,31 @@ config I2C_PASEMI
        help
          Supports the PA Semi PWRficient on-chip SMBus interfaces.
 
-config I2C_PROSAVAGE
-       tristate "S3/VIA (Pro)Savage (DEPRECATED)"
-       default n
-       depends on PCI
-       select I2C_ALGOBIT
+config I2C_PNX
+       tristate "I2C bus support for Philips PNX targets"
+       depends on ARCH_PNX4008
        help
-         If you say yes to this option, support will be included for the
-         I2C bus and DDC bus of the S3VIA embedded Savage4 and ProSavage8
-         graphics processors.
-         chipsets supported:
-           S3/VIA KM266/VT8375 aka ProSavage8
-           S3/VIA KM133/VT8365 aka Savage4
+         This driver supports the Philips IP3204 I2C IP block master and/or
+         slave controller
 
-         This driver is deprecated in favor of the savagefb driver.
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-pnx.
 
-         This support is also available as a module.  If so, the module
-         will be called i2c-prosavage.
+config I2C_PXA
+       tristate "Intel PXA2XX I2C adapter (EXPERIMENTAL)"
+       depends on EXPERIMENTAL && ARCH_PXA
+       help
+         If you have devices in the PXA I2C bus, say yes to this option.
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-pxa.
+
+config I2C_PXA_SLAVE
+       bool "Intel PXA2XX I2C Slave comms support"
+       depends on I2C_PXA
+       help
+         Support I2C slave mode communications on the PXA I2C bus.  This
+         is necessary for systems where the PXA may be a target on the
+         I2C bus.
 
 config I2C_S3C2410
        tristate "S3C2410 I2C Driver"
@@ -414,25 +446,24 @@ config I2C_S3C2410
          Say Y here to include support for I2C controller in the
          Samsung S3C2410 based System-on-Chip devices.
 
-config I2C_SAVAGE4
-       tristate "S3 Savage 4 (DEPRECATED)"
-       default n
-       depends on PCI
-       select I2C_ALGOBIT
+config I2C_SH7760
+       tristate "Renesas SH7760 I2C Controller"
+       depends on CPU_SUBTYPE_SH7760
        help
-         If you say yes to this option, support will be included for the
-         S3 Savage 4 I2C interface.
-
-         This driver is deprecated in favor of the savagefb driver.
+         This driver supports the 2 I2C interfaces on the Renesas SH7760.
 
          This driver can also be built as a module.  If so, the module
-         will be called i2c-savage4.
+         will be called i2c-sh7760.
 
-config I2C_SIBYTE
-       tristate "SiByte SMBus interface"
-       depends on SIBYTE_SB1xxx_SOC
+config I2C_SH_MOBILE
+       tristate "SuperH Mobile I2C Controller"
+       depends on SUPERH
        help
-         Supports the SiByte SOC on-chip I2C interfaces (2 channels).
+         If you say yes to this option, support will be included for the
+         built-in I2C interface on the Renesas SH-Mobile processor.
+
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-sh_mobile.
 
 config I2C_SIMTEC
        tristate "Simtec Generic I2C interface"
@@ -446,86 +477,65 @@ config I2C_SIMTEC
          This driver can also be built as a module. If so, the module
          will be called i2c-simtec.
 
-config SCx200_I2C
-       tristate "NatSemi SCx200 I2C using GPIO pins (DEPRECATED)"
-       depends on SCx200_GPIO
+config I2C_VERSATILE
+       tristate "ARM Versatile/Realview I2C bus support"
+       depends on ARCH_VERSATILE || ARCH_REALVIEW
        select I2C_ALGOBIT
        help
-         Enable the use of two GPIO pins of a SCx200 processor as an I2C bus.
-
-         If you don't know what to do here, say N.
+         Say yes if you want to support the I2C serial bus on ARMs Versatile
+         range of platforms.
 
-         This support is also available as a module.  If so, the module
-         will be called scx200_i2c.
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-versatile.
 
-         This driver is deprecated and will be dropped soon. Use i2c-gpio
-         (or scx200_acb) instead.
+comment "External I2C/SMBus adapter drivers"
 
-config SCx200_I2C_SCL
-       int "GPIO pin used for SCL"
-       depends on SCx200_I2C
-       default "12"
+config I2C_PARPORT
+       tristate "Parallel port adapter"
+       depends on PARPORT
+       select I2C_ALGOBIT
        help
-         Enter the GPIO pin number used for the SCL signal.  This value can
-         also be specified with a module parameter.
+         This supports parallel port I2C adapters such as the ones made by
+         Philips or Velleman, Analog Devices evaluation boards, and more.
+         Basically any adapter using the parallel port as an I2C bus with
+         no extra chipset is supported by this driver, or could be.
 
-config SCx200_I2C_SDA
-       int "GPIO pin used for SDA"
-       depends on SCx200_I2C
-       default "13"
-       help
-         Enter the GPIO pin number used for the SSA signal.  This value can
-         also be specified with a module parameter.
+         This driver is a replacement for (and was inspired by) an older
+         driver named i2c-philips-par.  The new driver supports more devices,
+         and makes it easier to add support for new devices.
 
-config SCx200_ACB
-       tristate "Geode ACCESS.bus support"
-       depends on X86_32 && PCI
-       help
-         Enable the use of the ACCESS.bus controllers on the Geode SCx200 and
-         SC1100 processors and the CS5535 and CS5536 Geode companion devices.
+         An adapter type parameter is now mandatory.  Please read the file
+         Documentation/i2c/busses/i2c-parport for details.
 
-         If you don't know what to do here, say N.
+         Another driver exists, named i2c-parport-light, which doesn't depend
+         on the parport driver.  This is meant for embedded systems. Don't say
+         Y here if you intend to say Y or M there.
 
          This support is also available as a module.  If so, the module
-         will be called scx200_acb.
-
-config I2C_SIS5595
-       tristate "SiS 5595"
-       depends on PCI
-       help
-         If you say yes to this option, support will be included for the
-         SiS5595 SMBus (a subset of I2C) interface.
-
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-sis5595.
+         will be called i2c-parport.
 
-config I2C_SIS630
-       tristate "SiS 630/730"
-       depends on PCI
+config I2C_PARPORT_LIGHT
+       tristate "Parallel port adapter (light)"
+       select I2C_ALGOBIT
        help
-         If you say yes to this option, support will be included for the
-         SiS630 and SiS730 SMBus (a subset of I2C) interface.
+         This supports parallel port I2C adapters such as the ones made by
+         Philips or Velleman, Analog Devices evaluation boards, and more.
+         Basically any adapter using the parallel port as an I2C bus with
+         no extra chipset is supported by this driver, or could be.
 
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-sis630.
+         This driver is a light version of i2c-parport.  It doesn't depend
+         on the parport driver, and uses direct I/O access instead.  This
+         might be preferred on embedded systems where wasting memory for
+         the clean but heavy parport handling is not an option.  The
+         drawback is a reduced portability and the impossibility to
+         daisy-chain other parallel port devices.
 
-config I2C_SIS96X
-       tristate "SiS 96x"
-       depends on PCI
-       help
-         If you say yes to this option, support will be included for the SiS
-         96x SMBus (a subset of I2C) interfaces.  Specifically, the following
-         chipsets are supported:
-           645/961
-           645DX/961
-           645DX/962
-           648/961
-           650/961
-           735
-           745
+         Don't say Y here if you said Y or M to i2c-parport.  Saying M to
+         both is possible but both modules should not be loaded at the same
+         time.
 
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-sis96x.
+         This support is also available as a module.  If so, the module
+         will be called i2c-parport-light.
 
 config I2C_TAOS_EVM
        tristate "TAOS evaluation module"
@@ -543,21 +553,8 @@ config I2C_TAOS_EVM
          This support is also available as a module.  If so, the module
          will be called i2c-taos-evm.
 
-config I2C_STUB
-       tristate "I2C/SMBus Test Stub"
-       depends on EXPERIMENTAL && m
-       default 'n'
-       help
-         This module may be useful to developers of SMBus client drivers,
-         especially for certain kinds of sensor chips.
-
-         If you do build this module, be sure to read the notes and warnings
-         in <file:Documentation/i2c/i2c-stub>.
-
-         If you don't know what to do here, definitely say N.
-
 config I2C_TINY_USB
-       tristate "I2C-Tiny-USB"
+       tristate "Tiny-USB adapter"
        depends on USB
        help
          If you say yes to this option, support will be included for the
@@ -567,16 +564,21 @@ config I2C_TINY_USB
          This driver can also be built as a module.  If so, the module
          will be called i2c-tiny-usb.
 
-config I2C_VERSATILE
-       tristate "ARM Versatile/Realview I2C bus support"
-       depends on ARCH_VERSATILE || ARCH_REALVIEW
+comment "Graphics adapter I2C/DDC channel drivers"
+       depends on PCI
+
+config I2C_VOODOO3
+       tristate "Voodoo 3"
+       depends on PCI
        select I2C_ALGOBIT
        help
-         Say yes if you want to support the I2C serial bus on ARMs Versatile
-         range of platforms.
+         If you say yes to this option, support will be included for the
+         Voodoo 3 I2C interface.
 
          This driver can also be built as a module.  If so, the module
-         will be called i2c-versatile.
+         will be called i2c-voodoo3.
+
+comment "Other I2C/SMBus bus drivers"
 
 config I2C_ACORN
        tristate "Acorn IOC/IOMD I2C bus support"
@@ -588,46 +590,16 @@ config I2C_ACORN
 
          If you don't know, say Y.
 
-config I2C_VIA
-       tristate "VIA 82C586B"
-       depends on PCI && EXPERIMENTAL
-       select I2C_ALGOBIT
-       help
-         If you say yes to this option, support will be included for the VIA
-          82C586B I2C interface
-
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-via.
-
-config I2C_VIAPRO
-       tristate "VIA VT82C596/82C686/82xx and CX700"
-       depends on PCI
-       help
-         If you say yes to this option, support will be included for the VIA
-         VT82C596 and later SMBus interface.  Specifically, the following
-         chipsets are supported:
-           VT82C596A/B
-           VT82C686A/B
-           VT8231
-           VT8233/A
-           VT8235
-           VT8237R/A/S
-           VT8251
-           CX700
-
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-viapro.
-
-config I2C_VOODOO3
-       tristate "Voodoo 3"
-       depends on PCI
-       select I2C_ALGOBIT
+config I2C_ELEKTOR
+       tristate "Elektor ISA card"
+       depends on ISA && BROKEN_ON_SMP
+       select I2C_ALGOPCF
        help
-         If you say yes to this option, support will be included for the
-         Voodoo 3 I2C interface.
+         This supports the PCF8584 ISA bus I2C adapter.  Say Y if you own
+         such an adapter.
 
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-voodoo3.
+         This support is also available as a module.  If so, the module
+         will be called i2c-elektor.
 
 config I2C_PCA_ISA
        tristate "PCA9564 on an ISA bus"
@@ -657,26 +629,6 @@ config I2C_PCA_PLATFORM
          This driver can also be built as a module.  If so, the module
          will be called i2c-pca-platform.
 
-config I2C_MV64XXX
-       tristate "Marvell mv64xxx I2C Controller"
-       depends on (MV64X60 || PLAT_ORION) && EXPERIMENTAL
-       help
-         If you say yes to this option, support will be included for the
-         built-in I2C interface on the Marvell 64xxx line of host bridges.
-
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-mv64xxx.
-
-config I2C_PNX
-       tristate "I2C bus support for Philips PNX targets"
-       depends on ARCH_PNX4008
-       help
-         This driver supports the Philips IP3204 I2C IP block master and/or
-         slave controller
-
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-pnx.
-
 config I2C_PMCMSP
        tristate "PMC MSP I2C TWI Controller"
        depends on PMC_MSP
@@ -686,23 +638,66 @@ config I2C_PMCMSP
          This driver can also be built as module. If so, the module
          will be called i2c-pmcmsp.
 
-config I2C_SH7760
-       tristate "Renesas SH7760 I2C Controller"
-       depends on CPU_SUBTYPE_SH7760
+config I2C_SIBYTE
+       tristate "SiByte SMBus interface"
+       depends on SIBYTE_SB1xxx_SOC
        help
-         This driver supports the 2 I2C interfaces on the Renesas SH7760.
+         Supports the SiByte SOC on-chip I2C interfaces (2 channels).
 
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-sh7760.
+config I2C_STUB
+       tristate "I2C/SMBus Test Stub"
+       depends on EXPERIMENTAL && m
+       default 'n'
+       help
+         This module may be useful to developers of SMBus client drivers,
+         especially for certain kinds of sensor chips.
 
-config I2C_SH_MOBILE
-       tristate "SuperH Mobile I2C Controller"
-       depends on SUPERH
+         If you do build this module, be sure to read the notes and warnings
+         in <file:Documentation/i2c/i2c-stub>.
+
+         If you don't know what to do here, definitely say N.
+
+config SCx200_I2C
+       tristate "NatSemi SCx200 I2C using GPIO pins (DEPRECATED)"
+       depends on SCx200_GPIO
+       select I2C_ALGOBIT
        help
-         If you say yes to this option, support will be included for the
-         built-in I2C interface on the Renesas SH-Mobile processor.
+         Enable the use of two GPIO pins of a SCx200 processor as an I2C bus.
 
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-sh_mobile.
+         If you don't know what to do here, say N.
+
+         This support is also available as a module.  If so, the module
+         will be called scx200_i2c.
+
+         This driver is deprecated and will be dropped soon. Use i2c-gpio
+         (or scx200_acb) instead.
+
+config SCx200_I2C_SCL
+       int "GPIO pin used for SCL"
+       depends on SCx200_I2C
+       default "12"
+       help
+         Enter the GPIO pin number used for the SCL signal.  This value can
+         also be specified with a module parameter.
+
+config SCx200_I2C_SDA
+       int "GPIO pin used for SDA"
+       depends on SCx200_I2C
+       default "13"
+       help
+         Enter the GPIO pin number used for the SSA signal.  This value can
+         also be specified with a module parameter.
+
+config SCx200_ACB
+       tristate "Geode ACCESS.bus support"
+       depends on X86_32 && PCI
+       help
+         Enable the use of the ACCESS.bus controllers on the Geode SCx200 and
+         SC1100 processors and the CS5535 and CS5536 Geode companion devices.
+
+         If you don't know what to do here, say N.
+
+         This support is also available as a module.  If so, the module
+         will be called scx200_acb.
 
 endmenu
index e8c882a5ea666e5319b0725f3740b400e7223d4f..97dbfa2107fee2fe7b7170219c9aebf552b8381b 100644 (file)
@@ -2,57 +2,68 @@
 # Makefile for the i2c bus drivers.
 #
 
+# PC SMBus host controller drivers
 obj-$(CONFIG_I2C_ALI1535)      += i2c-ali1535.o
 obj-$(CONFIG_I2C_ALI1563)      += i2c-ali1563.o
 obj-$(CONFIG_I2C_ALI15X3)      += i2c-ali15x3.o
 obj-$(CONFIG_I2C_AMD756)       += i2c-amd756.o
 obj-$(CONFIG_I2C_AMD756_S4882) += i2c-amd756-s4882.o
 obj-$(CONFIG_I2C_AMD8111)      += i2c-amd8111.o
+obj-$(CONFIG_I2C_I801)         += i2c-i801.o
+obj-$(CONFIG_I2C_ISCH)         += i2c-isch.o
+obj-$(CONFIG_I2C_NFORCE2)      += i2c-nforce2.o
+obj-$(CONFIG_I2C_NFORCE2_S4985)        += i2c-nforce2-s4985.o
+obj-$(CONFIG_I2C_PIIX4)                += i2c-piix4.o
+obj-$(CONFIG_I2C_SIS5595)      += i2c-sis5595.o
+obj-$(CONFIG_I2C_SIS630)       += i2c-sis630.o
+obj-$(CONFIG_I2C_SIS96X)       += i2c-sis96x.o
+obj-$(CONFIG_I2C_VIA)          += i2c-via.o
+obj-$(CONFIG_I2C_VIAPRO)       += i2c-viapro.o
+
+# Mac SMBus host controller drivers
+obj-$(CONFIG_I2C_HYDRA)                += i2c-hydra.o
+obj-$(CONFIG_I2C_POWERMAC)     += i2c-powermac.o
+
+# Embebbed system I2C/SMBus host controller drivers
 obj-$(CONFIG_I2C_AT91)         += i2c-at91.o
 obj-$(CONFIG_I2C_AU1550)       += i2c-au1550.o
 obj-$(CONFIG_I2C_BLACKFIN_TWI) += i2c-bfin-twi.o
+obj-$(CONFIG_I2C_CPM)          += i2c-cpm.o
 obj-$(CONFIG_I2C_DAVINCI)      += i2c-davinci.o
-obj-$(CONFIG_I2C_ELEKTOR)      += i2c-elektor.o
 obj-$(CONFIG_I2C_GPIO)         += i2c-gpio.o
-obj-$(CONFIG_I2C_HYDRA)                += i2c-hydra.o
-obj-$(CONFIG_I2C_I801)         += i2c-i801.o
-obj-$(CONFIG_I2C_I810)         += i2c-i810.o
 obj-$(CONFIG_I2C_IBM_IIC)      += i2c-ibm_iic.o
 obj-$(CONFIG_I2C_IOP3XX)       += i2c-iop3xx.o
 obj-$(CONFIG_I2C_IXP2000)      += i2c-ixp2000.o
-obj-$(CONFIG_I2C_POWERMAC)     += i2c-powermac.o
 obj-$(CONFIG_I2C_MPC)          += i2c-mpc.o
 obj-$(CONFIG_I2C_MV64XXX)      += i2c-mv64xxx.o
-obj-$(CONFIG_I2C_NFORCE2)      += i2c-nforce2.o
 obj-$(CONFIG_I2C_OCORES)       += i2c-ocores.o
 obj-$(CONFIG_I2C_OMAP)         += i2c-omap.o
-obj-$(CONFIG_I2C_PARPORT)      += i2c-parport.o
-obj-$(CONFIG_I2C_PARPORT_LIGHT)        += i2c-parport-light.o
 obj-$(CONFIG_I2C_PASEMI)       += i2c-pasemi.o
-obj-$(CONFIG_I2C_PCA_ISA)      += i2c-pca-isa.o
-obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o
-obj-$(CONFIG_I2C_PIIX4)                += i2c-piix4.o
-obj-$(CONFIG_I2C_PMCMSP)       += i2c-pmcmsp.o
 obj-$(CONFIG_I2C_PNX)          += i2c-pnx.o
-obj-$(CONFIG_I2C_PROSAVAGE)    += i2c-prosavage.o
 obj-$(CONFIG_I2C_PXA)          += i2c-pxa.o
 obj-$(CONFIG_I2C_S3C2410)      += i2c-s3c2410.o
-obj-$(CONFIG_I2C_SAVAGE4)      += i2c-savage4.o
 obj-$(CONFIG_I2C_SH7760)       += i2c-sh7760.o
 obj-$(CONFIG_I2C_SH_MOBILE)    += i2c-sh_mobile.o
-obj-$(CONFIG_I2C_SIBYTE)       += i2c-sibyte.o
 obj-$(CONFIG_I2C_SIMTEC)       += i2c-simtec.o
-obj-$(CONFIG_I2C_SIS5595)      += i2c-sis5595.o
-obj-$(CONFIG_I2C_SIS630)       += i2c-sis630.o
-obj-$(CONFIG_I2C_SIS96X)       += i2c-sis96x.o
-obj-$(CONFIG_I2C_STUB)         += i2c-stub.o
+obj-$(CONFIG_I2C_VERSATILE)    += i2c-versatile.o
+
+# External I2C/SMBus adapter drivers
+obj-$(CONFIG_I2C_PARPORT)      += i2c-parport.o
+obj-$(CONFIG_I2C_PARPORT_LIGHT)        += i2c-parport-light.o
 obj-$(CONFIG_I2C_TAOS_EVM)     += i2c-taos-evm.o
 obj-$(CONFIG_I2C_TINY_USB)     += i2c-tiny-usb.o
-obj-$(CONFIG_I2C_VERSATILE)    += i2c-versatile.o
-obj-$(CONFIG_I2C_ACORN)                += i2c-acorn.o
-obj-$(CONFIG_I2C_VIA)          += i2c-via.o
-obj-$(CONFIG_I2C_VIAPRO)       += i2c-viapro.o
+
+# Graphics adapter I2C/DDC channel drivers
 obj-$(CONFIG_I2C_VOODOO3)      += i2c-voodoo3.o
+
+# Other I2C/SMBus bus drivers
+obj-$(CONFIG_I2C_ACORN)                += i2c-acorn.o
+obj-$(CONFIG_I2C_ELEKTOR)      += i2c-elektor.o
+obj-$(CONFIG_I2C_PCA_ISA)      += i2c-pca-isa.o
+obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o
+obj-$(CONFIG_I2C_PMCMSP)       += i2c-pmcmsp.o
+obj-$(CONFIG_I2C_SIBYTE)       += i2c-sibyte.o
+obj-$(CONFIG_I2C_STUB)         += i2c-stub.o
 obj-$(CONFIG_SCx200_ACB)       += scx200_acb.o
 obj-$(CONFIG_SCx200_I2C)       += scx200_i2c.o
 
index f14372ac2fc5d08ef0b8ced5b994d86af7a5dee5..9cead9b9458e4770843ef25cc5fbf97ab158ab27 100644 (file)
@@ -1,6 +1,4 @@
 /*
-    i2c-ali1535.c - Part of lm_sensors, Linux kernel modules for hardware
-                    monitoring
     Copyright (c) 2000  Frodo Looijaard <frodol@dds.nl>, 
                         Philip Edelbrock <phil@netroedge.com>, 
                         Mark D. Studebaker <mdsxyz123@yahoo.com>,
@@ -61,6 +59,7 @@
 #include <linux/ioport.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
+#include <linux/acpi.h>
 #include <asm/io.h>
 
 
@@ -159,6 +158,11 @@ static int ali1535_setup(struct pci_dev *dev)
                goto exit;
        }
 
+       retval = acpi_check_region(ali1535_smba, ALI1535_SMB_IOSIZE,
+                                  ali1535_driver.name);
+       if (retval)
+               goto exit;
+
        if (!request_region(ali1535_smba, ALI1535_SMB_IOSIZE,
                            ali1535_driver.name)) {
                dev_err(&dev->dev, "ALI1535_smb region 0x%x already in use!\n",
@@ -259,7 +263,7 @@ static int ali1535_transaction(struct i2c_adapter *adap)
                        dev_err(&adap->dev,
                                "SMBus reset failed! (0x%02x) - controller or "
                                "device on bus is probably hung\n", temp);
-                       return -1;
+                       return -EBUSY;
                }
        } else {
                /* check and clear done bit */
@@ -281,12 +285,12 @@ static int ali1535_transaction(struct i2c_adapter *adap)
 
        /* If the SMBus is still busy, we give up */
        if (timeout >= MAX_TIMEOUT) {
-               result = -1;
+               result = -ETIMEDOUT;
                dev_err(&adap->dev, "SMBus Timeout!\n");
        }
 
        if (temp & ALI1535_STS_FAIL) {
-               result = -1;
+               result = -EIO;
                dev_dbg(&adap->dev, "Error: Failed bus transaction\n");
        }
 
@@ -295,7 +299,7 @@ static int ali1535_transaction(struct i2c_adapter *adap)
         * do a printk.  This means that bus collisions go unreported.
         */
        if (temp & ALI1535_STS_BUSERR) {
-               result = -1;
+               result = -ENXIO;
                dev_dbg(&adap->dev,
                        "Error: no response or bus collision ADD=%02x\n",
                        inb_p(SMBHSTADD));
@@ -303,13 +307,13 @@ static int ali1535_transaction(struct i2c_adapter *adap)
 
        /* haven't ever seen this */
        if (temp & ALI1535_STS_DEV) {
-               result = -1;
+               result = -EIO;
                dev_err(&adap->dev, "Error: device error\n");
        }
 
        /* check to see if the "command complete" indication is set */
        if (!(temp & ALI1535_STS_DONE)) {
-               result = -1;
+               result = -ETIMEDOUT;
                dev_err(&adap->dev, "Error: command never completed\n");
        }
 
@@ -332,7 +336,7 @@ static int ali1535_transaction(struct i2c_adapter *adap)
        return result;
 }
 
-/* Return -1 on error. */
+/* Return negative errno on error. */
 static s32 ali1535_access(struct i2c_adapter *adap, u16 addr,
                          unsigned short flags, char read_write, u8 command,
                          int size, union i2c_smbus_data *data)
@@ -357,10 +361,6 @@ static s32 ali1535_access(struct i2c_adapter *adap, u16 addr,
        outb_p(0xFF, SMBHSTSTS);
 
        switch (size) {
-       case I2C_SMBUS_PROC_CALL:
-               dev_err(&adap->dev, "I2C_SMBUS_PROC_CALL not supported!\n");
-               result = -1;
-               goto EXIT;
        case I2C_SMBUS_QUICK:
                outb_p(((addr & 0x7f) << 1) | (read_write & 0x01),
                       SMBHSTADD);
@@ -418,13 +418,15 @@ static s32 ali1535_access(struct i2c_adapter *adap, u16 addr,
                                outb_p(data->block[i], SMBBLKDAT);
                }
                break;
+       default:
+               dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
+               result = -EOPNOTSUPP;
+               goto EXIT;
        }
 
-       if (ali1535_transaction(adap)) {
-               /* Error in transaction */
-               result = -1;
+       result = ali1535_transaction(adap);
+       if (result)
                goto EXIT;
-       }
 
        if ((read_write == I2C_SMBUS_WRITE) || (size == ALI1535_QUICK)) {
                result = 0;
@@ -475,7 +477,7 @@ static const struct i2c_algorithm smbus_algorithm = {
 static struct i2c_adapter ali1535_adapter = {
        .owner          = THIS_MODULE,
        .id             = I2C_HW_SMBUS_ALI1535,
-       .class          = I2C_CLASS_HWMON,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .algo           = &smbus_algorithm,
 };
 
index 6b68074e518a072d1239c1e1f8ba61287cc4b456..fc3e5b026423d8e7ab752c73a93a2e18d1e975b5 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/i2c.h>
 #include <linux/pci.h>
 #include <linux/init.h>
+#include <linux/acpi.h>
 
 #define ALI1563_MAX_TIMEOUT    500
 #define        ALI1563_SMBBA           0x80
@@ -67,6 +68,7 @@ static int ali1563_transaction(struct i2c_adapter * a, int size)
 {
        u32 data;
        int timeout;
+       int status = -EIO;
 
        dev_dbg(&a->dev, "Transaction (pre): STS=%02x, CNTL1=%02x, "
                "CNTL2=%02x, CMD=%02x, ADD=%02x, DAT0=%02x, DAT1=%02x\n",
@@ -103,13 +105,15 @@ static int ali1563_transaction(struct i2c_adapter * a, int size)
                /* Issue 'kill' to host controller */
                outb_p(HST_CNTL2_KILL,SMB_HST_CNTL2);
                data = inb_p(SMB_HST_STS);
+               status = -ETIMEDOUT;
        }
 
        /* device error - no response, ignore the autodetection case */
-       if ((data & HST_STS_DEVERR) && (size != HST_CNTL2_QUICK)) {
-               dev_err(&a->dev, "Device error!\n");
+       if (data & HST_STS_DEVERR) {
+               if (size != HST_CNTL2_QUICK)
+                       dev_err(&a->dev, "Device error!\n");
+               status = -ENXIO;
        }
-
        /* bus collision */
        if (data & HST_STS_BUSERR) {
                dev_err(&a->dev, "Bus collision!\n");
@@ -122,13 +126,14 @@ static int ali1563_transaction(struct i2c_adapter * a, int size)
                outb_p(0x0,SMB_HST_CNTL2);
        }
 
-       return -1;
+       return status;
 }
 
 static int ali1563_block_start(struct i2c_adapter * a)
 {
        u32 data;
        int timeout;
+       int status = -EIO;
 
        dev_dbg(&a->dev, "Block (pre): STS=%02x, CNTL1=%02x, "
                "CNTL2=%02x, CMD=%02x, ADD=%02x, DAT0=%02x, DAT1=%02x\n",
@@ -164,13 +169,20 @@ static int ali1563_block_start(struct i2c_adapter * a)
 
        if (timeout && !(data & HST_STS_BAD))
                return 0;
+
+       if (timeout == 0)
+               status = -ETIMEDOUT;
+
+       if (data & HST_STS_DEVERR)
+               status = -ENXIO;
+
        dev_err(&a->dev, "SMBus Error: %s%s%s%s%s\n",
-               timeout ? "Timeout " : "",
+               timeout ? "" : "Timeout ",
                data & HST_STS_FAIL ? "Transaction Failed " : "",
                data & HST_STS_BUSERR ? "No response or Bus Collision " : "",
                data & HST_STS_DEVERR ? "Device Error " : "",
                !(data & HST_STS_DONE) ? "Transaction Never Finished " : "");
-       return -1;
+       return status;
 }
 
 static int ali1563_block(struct i2c_adapter * a, union i2c_smbus_data * data, u8 rw)
@@ -235,10 +247,6 @@ static s32 ali1563_access(struct i2c_adapter * a, u16 addr,
 
        /* Map the size to what the chip understands */
        switch (size) {
-       case I2C_SMBUS_PROC_CALL:
-               dev_err(&a->dev, "I2C_SMBUS_PROC_CALL not supported!\n");
-               error = -EINVAL;
-               break;
        case I2C_SMBUS_QUICK:
                size = HST_CNTL2_QUICK;
                break;
@@ -254,6 +262,10 @@ static s32 ali1563_access(struct i2c_adapter * a, u16 addr,
        case I2C_SMBUS_BLOCK_DATA:
                size = HST_CNTL2_BLOCK;
                break;
+       default:
+               dev_warn(&a->dev, "Unsupported transaction %d\n", size);
+               error = -EOPNOTSUPP;
+               goto Done;
        }
 
        outb_p(((addr & 0x7f) << 1) | (rw & 0x01), SMB_HST_ADD);
@@ -345,6 +357,10 @@ static int __devinit ali1563_setup(struct pci_dev * dev)
                }
        }
 
+       if (acpi_check_region(ali1563_smba, ALI1563_SMB_IOSIZE,
+                             ali1563_pci_driver.name))
+               goto Err;
+
        if (!request_region(ali1563_smba, ALI1563_SMB_IOSIZE,
                            ali1563_pci_driver.name)) {
                dev_err(&dev->dev, "Could not allocate I/O space at 0x%04x\n",
@@ -371,7 +387,7 @@ static const struct i2c_algorithm ali1563_algorithm = {
 static struct i2c_adapter ali1563_adapter = {
        .owner  = THIS_MODULE,
        .id     = I2C_HW_SMBUS_ALI1563,
-       .class  = I2C_CLASS_HWMON,
+       .class  = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .algo   = &ali1563_algorithm,
 };
 
index 93bf87d709618fff0ba489aa17028b2d84a39f23..234fdde7d40e8712f7a0ce1d21d0cda09679737c 100644 (file)
@@ -1,6 +1,4 @@
 /*
-    ali15x3.c - Part of lm_sensors, Linux kernel modules for hardware
-              monitoring
     Copyright (c) 1999  Frodo Looijaard <frodol@dds.nl> and
     Philip Edelbrock <phil@netroedge.com> and
     Mark D. Studebaker <mdsxyz123@yahoo.com>
@@ -68,6 +66,7 @@
 #include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
+#include <linux/acpi.h>
 #include <asm/io.h>
 
 /* ALI15X3 SMBus address offsets */
@@ -166,6 +165,10 @@ static int ali15x3_setup(struct pci_dev *ALI15X3_dev)
        if(force_addr)
                ali15x3_smba = force_addr & ~(ALI15X3_SMB_IOSIZE - 1);
 
+       if (acpi_check_region(ali15x3_smba, ALI15X3_SMB_IOSIZE,
+                             ali15x3_driver.name))
+               return -EBUSY;
+
        if (!request_region(ali15x3_smba, ALI15X3_SMB_IOSIZE,
                            ali15x3_driver.name)) {
                dev_err(&ALI15X3_dev->dev,
@@ -282,7 +285,7 @@ static int ali15x3_transaction(struct i2c_adapter *adap)
                        dev_err(&adap->dev, "SMBus reset failed! (0x%02x) - "
                                "controller or device on bus is probably hung\n",
                                temp);
-                       return -1;
+                       return -EBUSY;
                }
        } else {
                /* check and clear done bit */
@@ -304,12 +307,12 @@ static int ali15x3_transaction(struct i2c_adapter *adap)
 
        /* If the SMBus is still busy, we give up */
        if (timeout >= MAX_TIMEOUT) {
-               result = -1;
+               result = -ETIMEDOUT;
                dev_err(&adap->dev, "SMBus Timeout!\n");
        }
 
        if (temp & ALI15X3_STS_TERM) {
-               result = -1;
+               result = -EIO;
                dev_dbg(&adap->dev, "Error: Failed bus transaction\n");
        }
 
@@ -320,7 +323,7 @@ static int ali15x3_transaction(struct i2c_adapter *adap)
          This means that bus collisions go unreported.
        */
        if (temp & ALI15X3_STS_COLL) {
-               result = -1;
+               result = -ENXIO;
                dev_dbg(&adap->dev,
                        "Error: no response or bus collision ADD=%02x\n",
                        inb_p(SMBHSTADD));
@@ -328,7 +331,7 @@ static int ali15x3_transaction(struct i2c_adapter *adap)
 
        /* haven't ever seen this */
        if (temp & ALI15X3_STS_DEV) {
-               result = -1;
+               result = -EIO;
                dev_err(&adap->dev, "Error: device error\n");
        }
        dev_dbg(&adap->dev, "Transaction (post): STS=%02x, CNT=%02x, CMD=%02x, "
@@ -338,7 +341,7 @@ static int ali15x3_transaction(struct i2c_adapter *adap)
        return result;
 }
 
-/* Return -1 on error. */
+/* Return negative errno on error. */
 static s32 ali15x3_access(struct i2c_adapter * adap, u16 addr,
                   unsigned short flags, char read_write, u8 command,
                   int size, union i2c_smbus_data * data)
@@ -362,9 +365,6 @@ static s32 ali15x3_access(struct i2c_adapter * adap, u16 addr,
        }
 
        switch (size) {
-       case I2C_SMBUS_PROC_CALL:
-               dev_err(&adap->dev, "I2C_SMBUS_PROC_CALL not supported!\n");
-               return -1;
        case I2C_SMBUS_QUICK:
                outb_p(((addr & 0x7f) << 1) | (read_write & 0x01),
                       SMBHSTADD);
@@ -417,12 +417,16 @@ static s32 ali15x3_access(struct i2c_adapter * adap, u16 addr,
                }
                size = ALI15X3_BLOCK_DATA;
                break;
+       default:
+               dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
+               return -EOPNOTSUPP;
        }
 
        outb_p(size, SMBHSTCNT);        /* output command */
 
-       if (ali15x3_transaction(adap))  /* Error in transaction */
-               return -1;
+       temp = ali15x3_transaction(adap);
+       if (temp)
+               return temp;
 
        if ((read_write == I2C_SMBUS_WRITE) || (size == ALI15X3_QUICK))
                return 0;
@@ -470,7 +474,7 @@ static const struct i2c_algorithm smbus_algorithm = {
 static struct i2c_adapter ali15x3_adapter = {
        .owner          = THIS_MODULE,
        .id             = I2C_HW_SMBUS_ALI15X3,
-       .class          = I2C_CLASS_HWMON,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .algo           = &smbus_algorithm,
 };
 
index c38a0a112208286787d94966f57048980ad459b8..2f150e33c74c9b19ae6f93dc6e1828ebd619d933 100644 (file)
@@ -58,7 +58,7 @@ static s32 amd756_access_virt0(struct i2c_adapter * adap, u16 addr,
        /* We exclude the multiplexed addresses */
        if (addr == 0x4c || (addr & 0xfc) == 0x50 || (addr & 0xfc) == 0x30
         || addr == 0x18)
-               return -1;
+               return -ENXIO;
 
        mutex_lock(&amd756_lock);
 
@@ -86,7 +86,7 @@ static inline s32 amd756_access_channel(struct i2c_adapter * adap, u16 addr,
 
        /* We exclude the non-multiplexed addresses */
        if (addr != 0x4c && (addr & 0xfc) != 0x50 && (addr & 0xfc) != 0x30)
-               return -1;
+               return -ENXIO;
 
        mutex_lock(&amd756_lock);
 
index 43508d61eb7cf455c03fee1c82d4dfe501fbd260..1ea39254dac6e41dbd66fb0252d9016f1b95195d 100644 (file)
@@ -1,7 +1,4 @@
 /*
-    amd756.c - Part of lm_sensors, Linux kernel modules for hardware
-              monitoring
-
     Copyright (c) 1999-2002 Merlin Hughes <merlin@merlin.org>
 
     Shamelessly ripped from i2c-piix4.c:
@@ -45,6 +42,7 @@
 #include <linux/ioport.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
+#include <linux/acpi.h>
 #include <asm/io.h>
 
 /* AMD756 SMBus address offsets */
@@ -151,17 +149,17 @@ static int amd756_transaction(struct i2c_adapter *adap)
        }
 
        if (temp & GS_PRERR_STS) {
-               result = -1;
+               result = -ENXIO;
                dev_dbg(&adap->dev, "SMBus Protocol error (no response)!\n");
        }
 
        if (temp & GS_COL_STS) {
-               result = -1;
+               result = -EIO;
                dev_warn(&adap->dev, "SMBus collision!\n");
        }
 
        if (temp & GS_TO_STS) {
-               result = -1;
+               result = -ETIMEDOUT;
                dev_dbg(&adap->dev, "SMBus protocol timeout!\n");
        }
 
@@ -189,22 +187,18 @@ static int amd756_transaction(struct i2c_adapter *adap)
        outw_p(inw(SMB_GLOBAL_ENABLE) | GE_ABORT, SMB_GLOBAL_ENABLE);
        msleep(100);
        outw_p(GS_CLEAR_STS, SMB_GLOBAL_STATUS);
-       return -1;
+       return -EIO;
 }
 
-/* Return -1 on error. */
+/* Return negative errno on error. */
 static s32 amd756_access(struct i2c_adapter * adap, u16 addr,
                  unsigned short flags, char read_write,
                  u8 command, int size, union i2c_smbus_data * data)
 {
        int i, len;
+       int status;
 
-       /** TODO: Should I supporte the 10-bit transfers? */
        switch (size) {
-       case I2C_SMBUS_PROC_CALL:
-               dev_dbg(&adap->dev, "I2C_SMBUS_PROC_CALL not supported!\n");
-               /* TODO: Well... It is supported, I'm just not sure what to do here... */
-               return -1;
        case I2C_SMBUS_QUICK:
                outw_p(((addr & 0x7f) << 1) | (read_write & 0x01),
                       SMB_HOST_ADDRESS);
@@ -251,13 +245,17 @@ static s32 amd756_access(struct i2c_adapter * adap, u16 addr,
                }
                size = AMD756_BLOCK_DATA;
                break;
+       default:
+               dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
+               return -EOPNOTSUPP;
        }
 
        /* How about enabling interrupts... */
        outw_p(size & GE_CYC_TYPE_MASK, SMB_GLOBAL_ENABLE);
 
-       if (amd756_transaction(adap))   /* Error in transaction */
-               return -1;
+       status = amd756_transaction(adap);
+       if (status)
+               return status;
 
        if ((read_write == I2C_SMBUS_WRITE) || (size == AMD756_QUICK))
                return 0;
@@ -301,7 +299,7 @@ static const struct i2c_algorithm smbus_algorithm = {
 struct i2c_adapter amd756_smbus = {
        .owner          = THIS_MODULE,
        .id             = I2C_HW_SMBUS_AMD756,
-       .class          = I2C_CLASS_HWMON,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .algo           = &smbus_algorithm,
 };
 
@@ -368,6 +366,11 @@ static int __devinit amd756_probe(struct pci_dev *pdev,
                amd756_ioport += SMB_ADDR_OFFSET;
        }
 
+       error = acpi_check_region(amd756_ioport, SMB_IOSIZE,
+                                 amd756_driver.name);
+       if (error)
+               return error;
+
        if (!request_region(amd756_ioport, SMB_IOSIZE, amd756_driver.name)) {
                dev_err(&pdev->dev, "SMB region 0x%x already in use!\n",
                        amd756_ioport);
index 5d1a27ef250450bc76457c8f34b48484beba5e54..3972208876b3d6f9671567fc3496eab12254df72 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/i2c.h>
 #include <linux/delay.h>
+#include <linux/acpi.h>
 #include <asm/io.h>
 
 MODULE_LICENSE("GPL");
@@ -77,7 +78,7 @@ static unsigned int amd_ec_wait_write(struct amd_smbus *smbus)
        if (!timeout) {
                dev_warn(&smbus->dev->dev,
                         "Timeout while waiting for IBF to clear\n");
-               return -1;
+               return -ETIMEDOUT;
        }
 
        return 0;
@@ -93,7 +94,7 @@ static unsigned int amd_ec_wait_read(struct amd_smbus *smbus)
        if (!timeout) {
                dev_warn(&smbus->dev->dev,
                         "Timeout while waiting for OBF to set\n");
-               return -1;
+               return -ETIMEDOUT;
        }
 
        return 0;
@@ -102,16 +103,21 @@ static unsigned int amd_ec_wait_read(struct amd_smbus *smbus)
 static unsigned int amd_ec_read(struct amd_smbus *smbus, unsigned char address,
                unsigned char *data)
 {
-       if (amd_ec_wait_write(smbus))
-               return -1;
+       int status;
+
+       status = amd_ec_wait_write(smbus);
+       if (status)
+               return status;
        outb(AMD_EC_CMD_RD, smbus->base + AMD_EC_CMD);
 
-       if (amd_ec_wait_write(smbus))
-               return -1;
+       status = amd_ec_wait_write(smbus);
+       if (status)
+               return status;
        outb(address, smbus->base + AMD_EC_DATA);
 
-       if (amd_ec_wait_read(smbus))
-               return -1;
+       status = amd_ec_wait_read(smbus);
+       if (status)
+               return status;
        *data = inb(smbus->base + AMD_EC_DATA);
 
        return 0;
@@ -120,16 +126,21 @@ static unsigned int amd_ec_read(struct amd_smbus *smbus, unsigned char address,
 static unsigned int amd_ec_write(struct amd_smbus *smbus, unsigned char address,
                unsigned char data)
 {
-       if (amd_ec_wait_write(smbus))
-               return -1;
+       int status;
+
+       status = amd_ec_wait_write(smbus);
+       if (status)
+               return status;
        outb(AMD_EC_CMD_WR, smbus->base + AMD_EC_CMD);
 
-       if (amd_ec_wait_write(smbus))
-               return -1;
+       status = amd_ec_wait_write(smbus);
+       if (status)
+               return status;
        outb(address, smbus->base + AMD_EC_DATA);
 
-       if (amd_ec_wait_write(smbus))
-               return -1;
+       status = amd_ec_wait_write(smbus);
+       if (status)
+               return status;
        outb(data, smbus->base + AMD_EC_DATA);
 
        return 0;
@@ -267,12 +278,17 @@ static s32 amd8111_access(struct i2c_adapter * adap, u16 addr,
 
                default:
                        dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
-                       return -1;
+                       return -EOPNOTSUPP;
        }
 
        amd_ec_write(smbus, AMD_SMB_ADDR, addr << 1);
        amd_ec_write(smbus, AMD_SMB_PRTCL, protocol);
 
+       /* FIXME this discards status from ec_read(); so temp[0] will
+        * hold stack garbage ... the rest of this routine will act
+        * nonsensically.  Ignored ec_write() status might explain
+        * some such failures...
+        */
        amd_ec_read(smbus, AMD_SMB_STS, temp + 0);
 
        if (~temp[0] & AMD_SMB_STS_DONE) {
@@ -286,7 +302,7 @@ static s32 amd8111_access(struct i2c_adapter * adap, u16 addr,
        }
 
        if ((~temp[0] & AMD_SMB_STS_DONE) || (temp[0] & AMD_SMB_STS_STATUS))
-               return -1;
+               return -EIO;
 
        if (read_write == I2C_SMBUS_WRITE)
                return 0;
@@ -359,6 +375,10 @@ static int __devinit amd8111_probe(struct pci_dev *dev,
        smbus->base = pci_resource_start(dev, 0);
        smbus->size = pci_resource_len(dev, 0);
 
+       error = acpi_check_resource_conflict(&dev->resource[0]);
+       if (error)
+               goto out_kfree;
+
        if (!request_region(smbus->base, smbus->size, amd8111_driver.name)) {
                error = -EBUSY;
                goto out_kfree;
@@ -368,7 +388,7 @@ static int __devinit amd8111_probe(struct pci_dev *dev,
        snprintf(smbus->adapter.name, sizeof(smbus->adapter.name),
                "SMBus2 AMD8111 adapter at %04x", smbus->base);
        smbus->adapter.id = I2C_HW_SMBUS_AMD8111;
-       smbus->adapter.class = I2C_CLASS_HWMON;
+       smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
        smbus->adapter.algo = &smbus_algorithm;
        smbus->adapter.algo_data = smbus;
 
index cae9dc89d88cc0737379cf38f1ce55116f1abbac..66a04c2c660fdfde8eec02110a47162d231d2021 100644 (file)
@@ -269,9 +269,13 @@ static int
 au1550_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs, int num)
 {
        struct i2c_au1550_data *adap = i2c_adap->algo_data;
+       volatile psc_smb_t *sp = (volatile psc_smb_t *)adap->psc_base;
        struct i2c_msg *p;
        int i, err = 0;
 
+       sp->psc_ctrl = PSC_CTRL_ENABLE;
+       au_sync();
+
        for (i = 0; !err && i < num; i++) {
                p = &msgs[i];
                err = do_address(adap, p->addr, p->flags & I2C_M_RD,
@@ -288,6 +292,10 @@ au1550_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs, int num)
        */
        if (err == 0)
                err = num;
+
+       sp->psc_ctrl = PSC_CTRL_SUSPEND;
+       au_sync();
+
        return err;
 }
 
@@ -302,6 +310,61 @@ static const struct i2c_algorithm au1550_algo = {
        .functionality  = au1550_func,
 };
 
+static void i2c_au1550_setup(struct i2c_au1550_data *priv)
+{
+       volatile psc_smb_t *sp = (volatile psc_smb_t *)priv->psc_base;
+       u32 stat;
+
+       sp->psc_ctrl = PSC_CTRL_DISABLE;
+       au_sync();
+       sp->psc_sel = PSC_SEL_PS_SMBUSMODE;
+       sp->psc_smbcfg = 0;
+       au_sync();
+       sp->psc_ctrl = PSC_CTRL_ENABLE;
+       au_sync();
+       do {
+               stat = sp->psc_smbstat;
+               au_sync();
+       } while ((stat & PSC_SMBSTAT_SR) == 0);
+
+       sp->psc_smbcfg = (PSC_SMBCFG_RT_FIFO8 | PSC_SMBCFG_TT_FIFO8 |
+                               PSC_SMBCFG_DD_DISABLE);
+
+       /* Divide by 8 to get a 6.25 MHz clock.  The later protocol
+        * timings are based on this clock.
+        */
+       sp->psc_smbcfg |= PSC_SMBCFG_SET_DIV(PSC_SMBCFG_DIV8);
+       sp->psc_smbmsk = PSC_SMBMSK_ALLMASK;
+       au_sync();
+
+       /* Set the protocol timer values.  See Table 71 in the
+        * Au1550 Data Book for standard timing values.
+        */
+       sp->psc_smbtmr = PSC_SMBTMR_SET_TH(0) | PSC_SMBTMR_SET_PS(15) | \
+               PSC_SMBTMR_SET_PU(15) | PSC_SMBTMR_SET_SH(15) | \
+               PSC_SMBTMR_SET_SU(15) | PSC_SMBTMR_SET_CL(15) | \
+               PSC_SMBTMR_SET_CH(15);
+       au_sync();
+
+       sp->psc_smbcfg |= PSC_SMBCFG_DE_ENABLE;
+       do {
+               stat = sp->psc_smbstat;
+               au_sync();
+       } while ((stat & PSC_SMBSTAT_SR) == 0);
+
+       sp->psc_ctrl = PSC_CTRL_SUSPEND;
+       au_sync();
+}
+
+static void i2c_au1550_disable(struct i2c_au1550_data *priv)
+{
+       volatile psc_smb_t *sp = (volatile psc_smb_t *)priv->psc_base;
+
+       sp->psc_smbcfg = 0;
+       sp->psc_ctrl = PSC_CTRL_DISABLE;
+       au_sync();
+}
+
 /*
  * registering functions to load algorithms at runtime
  * Prior to calling us, the 50MHz clock frequency and routing
@@ -311,9 +374,7 @@ static int __devinit
 i2c_au1550_probe(struct platform_device *pdev)
 {
        struct i2c_au1550_data *priv;
-       volatile psc_smb_t *sp;
        struct resource *r;
-       u32 stat;
        int ret;
 
        r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -348,43 +409,7 @@ i2c_au1550_probe(struct platform_device *pdev)
 
        /* Now, set up the PSC for SMBus PIO mode.
        */
-       sp = (volatile psc_smb_t *)priv->psc_base;
-       sp->psc_ctrl = PSC_CTRL_DISABLE;
-       au_sync();
-       sp->psc_sel = PSC_SEL_PS_SMBUSMODE;
-       sp->psc_smbcfg = 0;
-       au_sync();
-       sp->psc_ctrl = PSC_CTRL_ENABLE;
-       au_sync();
-       do {
-               stat = sp->psc_smbstat;
-               au_sync();
-       } while ((stat & PSC_SMBSTAT_SR) == 0);
-
-       sp->psc_smbcfg = (PSC_SMBCFG_RT_FIFO8 | PSC_SMBCFG_TT_FIFO8 |
-                               PSC_SMBCFG_DD_DISABLE);
-
-       /* Divide by 8 to get a 6.25 MHz clock.  The later protocol
-        * timings are based on this clock.
-        */
-       sp->psc_smbcfg |= PSC_SMBCFG_SET_DIV(PSC_SMBCFG_DIV8);
-       sp->psc_smbmsk = PSC_SMBMSK_ALLMASK;
-       au_sync();
-
-       /* Set the protocol timer values.  See Table 71 in the
-        * Au1550 Data Book for standard timing values.
-        */
-       sp->psc_smbtmr = PSC_SMBTMR_SET_TH(0) | PSC_SMBTMR_SET_PS(15) | \
-               PSC_SMBTMR_SET_PU(15) | PSC_SMBTMR_SET_SH(15) | \
-               PSC_SMBTMR_SET_SU(15) | PSC_SMBTMR_SET_CL(15) | \
-               PSC_SMBTMR_SET_CH(15);
-       au_sync();
-
-       sp->psc_smbcfg |= PSC_SMBCFG_DE_ENABLE;
-       do {
-               stat = sp->psc_smbstat;
-               au_sync();
-       } while ((stat & PSC_SMBSTAT_DR) == 0);
+       i2c_au1550_setup(priv);
 
        ret = i2c_add_numbered_adapter(&priv->adap);
        if (ret == 0) {
@@ -392,10 +417,7 @@ i2c_au1550_probe(struct platform_device *pdev)
                return 0;
        }
 
-       /* disable the PSC */
-       sp->psc_smbcfg = 0;
-       sp->psc_ctrl = PSC_CTRL_DISABLE;
-       au_sync();
+       i2c_au1550_disable(priv);
 
        release_resource(priv->ioarea);
        kfree(priv->ioarea);
@@ -409,27 +431,24 @@ static int __devexit
 i2c_au1550_remove(struct platform_device *pdev)
 {
        struct i2c_au1550_data *priv = platform_get_drvdata(pdev);
-       volatile psc_smb_t *sp = (volatile psc_smb_t *)priv->psc_base;
 
        platform_set_drvdata(pdev, NULL);
        i2c_del_adapter(&priv->adap);
-       sp->psc_smbcfg = 0;
-       sp->psc_ctrl = PSC_CTRL_DISABLE;
-       au_sync();
+       i2c_au1550_disable(priv);
        release_resource(priv->ioarea);
        kfree(priv->ioarea);
        kfree(priv);
        return 0;
 }
 
+#ifdef CONFIG_PM
 static int
 i2c_au1550_suspend(struct platform_device *pdev, pm_message_t state)
 {
        struct i2c_au1550_data *priv = platform_get_drvdata(pdev);
-       volatile psc_smb_t *sp = (volatile psc_smb_t *)priv->psc_base;
 
-       sp->psc_ctrl = PSC_CTRL_SUSPEND;
-       au_sync();
+       i2c_au1550_disable(priv);
+
        return 0;
 }
 
@@ -437,14 +456,15 @@ static int
 i2c_au1550_resume(struct platform_device *pdev)
 {
        struct i2c_au1550_data *priv = platform_get_drvdata(pdev);
-       volatile psc_smb_t *sp = (volatile psc_smb_t *)priv->psc_base;
 
-       sp->psc_ctrl = PSC_CTRL_ENABLE;
-       au_sync();
-       while (!(sp->psc_smbstat & PSC_SMBSTAT_SR))
-               au_sync();
+       i2c_au1550_setup(priv);
+
        return 0;
 }
+#else
+#define i2c_au1550_suspend     NULL
+#define i2c_au1550_resume      NULL
+#endif
 
 static struct platform_driver au1xpsc_smbus_driver = {
        .driver = {
diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c
new file mode 100644 (file)
index 0000000..8164de1
--- /dev/null
@@ -0,0 +1,745 @@
+/*
+ * Freescale CPM1/CPM2 I2C interface.
+ * Copyright (c) 1999 Dan Malek (dmalek@jlc.net).
+ *
+ * moved into proper i2c interface;
+ * Brad Parker (brad@heeltoe.com)
+ *
+ * Parts from dbox2_i2c.c (cvs.tuxbox.org)
+ * (C) 2000-2001 Felix Domke (tmbinc@gmx.net), Gillem (htoa@gmx.net)
+ *
+ * (C) 2007 Montavista Software, Inc.
+ * Vitaly Bordug <vitb@kernel.crashing.org>
+ *
+ * Converted to of_platform_device. Renamed to i2c-cpm.c.
+ * (C) 2007,2008 Jochen Friedrich <jochen@scram.de>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/stddef.h>
+#include <linux/i2c.h>
+#include <linux/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/of_i2c.h>
+#include <sysdev/fsl_soc.h>
+#include <asm/cpm.h>
+
+/* Try to define this if you have an older CPU (earlier than rev D4) */
+/* However, better use a GPIO based bitbang driver in this case :/   */
+#undef I2C_CHIP_ERRATA
+
+#define CPM_MAX_READ    513
+#define CPM_MAXBD       4
+
+#define I2C_EB                 (0x10) /* Big endian mode */
+#define I2C_EB_CPM2            (0x30) /* Big endian mode, memory snoop */
+
+#define DPRAM_BASE             ((u8 __iomem __force *)cpm_muram_addr(0))
+
+/* I2C parameter RAM. */
+struct i2c_ram {
+       ushort  rbase;          /* Rx Buffer descriptor base address */
+       ushort  tbase;          /* Tx Buffer descriptor base address */
+       u_char  rfcr;           /* Rx function code */
+       u_char  tfcr;           /* Tx function code */
+       ushort  mrblr;          /* Max receive buffer length */
+       uint    rstate;         /* Internal */
+       uint    rdp;            /* Internal */
+       ushort  rbptr;          /* Rx Buffer descriptor pointer */
+       ushort  rbc;            /* Internal */
+       uint    rxtmp;          /* Internal */
+       uint    tstate;         /* Internal */
+       uint    tdp;            /* Internal */
+       ushort  tbptr;          /* Tx Buffer descriptor pointer */
+       ushort  tbc;            /* Internal */
+       uint    txtmp;          /* Internal */
+       char    res1[4];        /* Reserved */
+       ushort  rpbase;         /* Relocation pointer */
+       char    res2[2];        /* Reserved */
+};
+
+#define I2COM_START    0x80
+#define I2COM_MASTER   0x01
+#define I2CER_TXE      0x10
+#define I2CER_BUSY     0x04
+#define I2CER_TXB      0x02
+#define I2CER_RXB      0x01
+#define I2MOD_EN       0x01
+
+/* I2C Registers */
+struct i2c_reg {
+       u8      i2mod;
+       u8      res1[3];
+       u8      i2add;
+       u8      res2[3];
+       u8      i2brg;
+       u8      res3[3];
+       u8      i2com;
+       u8      res4[3];
+       u8      i2cer;
+       u8      res5[3];
+       u8      i2cmr;
+};
+
+struct cpm_i2c {
+       char *base;
+       struct of_device *ofdev;
+       struct i2c_adapter adap;
+       uint dp_addr;
+       int version; /* CPM1=1, CPM2=2 */
+       int irq;
+       int cp_command;
+       int freq;
+       struct i2c_reg __iomem *i2c_reg;
+       struct i2c_ram __iomem *i2c_ram;
+       u16 i2c_addr;
+       wait_queue_head_t i2c_wait;
+       cbd_t __iomem *tbase;
+       cbd_t __iomem *rbase;
+       u_char *txbuf[CPM_MAXBD];
+       u_char *rxbuf[CPM_MAXBD];
+       u32 txdma[CPM_MAXBD];
+       u32 rxdma[CPM_MAXBD];
+};
+
+static irqreturn_t cpm_i2c_interrupt(int irq, void *dev_id)
+{
+       struct cpm_i2c *cpm;
+       struct i2c_reg __iomem *i2c_reg;
+       struct i2c_adapter *adap = dev_id;
+       int i;
+
+       cpm = i2c_get_adapdata(dev_id);
+       i2c_reg = cpm->i2c_reg;
+
+       /* Clear interrupt. */
+       i = in_8(&i2c_reg->i2cer);
+       out_8(&i2c_reg->i2cer, i);
+
+       dev_dbg(&adap->dev, "Interrupt: %x\n", i);
+
+       wake_up_interruptible(&cpm->i2c_wait);
+
+       return i ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static void cpm_reset_i2c_params(struct cpm_i2c *cpm)
+{
+       struct i2c_ram __iomem *i2c_ram = cpm->i2c_ram;
+
+       /* Set up the I2C parameters in the parameter ram. */
+       out_be16(&i2c_ram->tbase, (u8 __iomem *)cpm->tbase - DPRAM_BASE);
+       out_be16(&i2c_ram->rbase, (u8 __iomem *)cpm->rbase - DPRAM_BASE);
+
+       if (cpm->version == 1) {
+               out_8(&i2c_ram->tfcr, I2C_EB);
+               out_8(&i2c_ram->rfcr, I2C_EB);
+       } else {
+               out_8(&i2c_ram->tfcr, I2C_EB_CPM2);
+               out_8(&i2c_ram->rfcr, I2C_EB_CPM2);
+       }
+
+       out_be16(&i2c_ram->mrblr, CPM_MAX_READ);
+
+       out_be32(&i2c_ram->rstate, 0);
+       out_be32(&i2c_ram->rdp, 0);
+       out_be16(&i2c_ram->rbptr, 0);
+       out_be16(&i2c_ram->rbc, 0);
+       out_be32(&i2c_ram->rxtmp, 0);
+       out_be32(&i2c_ram->tstate, 0);
+       out_be32(&i2c_ram->tdp, 0);
+       out_be16(&i2c_ram->tbptr, 0);
+       out_be16(&i2c_ram->tbc, 0);
+       out_be32(&i2c_ram->txtmp, 0);
+}
+
+static void cpm_i2c_force_close(struct i2c_adapter *adap)
+{
+       struct cpm_i2c *cpm = i2c_get_adapdata(adap);
+       struct i2c_reg __iomem *i2c_reg = cpm->i2c_reg;
+
+       dev_dbg(&adap->dev, "cpm_i2c_force_close()\n");
+
+       cpm_command(cpm->cp_command, CPM_CR_CLOSE_RX_BD);
+
+       out_8(&i2c_reg->i2cmr, 0x00);   /* Disable all interrupts */
+       out_8(&i2c_reg->i2cer, 0xff);
+}
+
+static void cpm_i2c_parse_message(struct i2c_adapter *adap,
+       struct i2c_msg *pmsg, int num, int tx, int rx)
+{
+       cbd_t __iomem *tbdf;
+       cbd_t __iomem *rbdf;
+       u_char addr;
+       u_char *tb;
+       u_char *rb;
+       struct cpm_i2c *cpm = i2c_get_adapdata(adap);
+
+       tbdf = cpm->tbase + tx;
+       rbdf = cpm->rbase + rx;
+
+       addr = pmsg->addr << 1;
+       if (pmsg->flags & I2C_M_RD)
+               addr |= 1;
+
+       tb = cpm->txbuf[tx];
+       rb = cpm->rxbuf[rx];
+
+       /* Align read buffer */
+       rb = (u_char *) (((ulong) rb + 1) & ~1);
+
+       tb[0] = addr;           /* Device address byte w/rw flag */
+
+       out_be16(&tbdf->cbd_datlen, pmsg->len + 1);
+       out_be16(&tbdf->cbd_sc, 0);
+
+       if (!(pmsg->flags & I2C_M_NOSTART))
+               setbits16(&tbdf->cbd_sc, BD_I2C_START);
+
+       if (tx + 1 == num)
+               setbits16(&tbdf->cbd_sc, BD_SC_LAST | BD_SC_WRAP);
+
+       if (pmsg->flags & I2C_M_RD) {
+               /*
+                * To read, we need an empty buffer of the proper length.
+                * All that is used is the first byte for address, the remainder
+                * is just used for timing (and doesn't really have to exist).
+                */
+
+               dev_dbg(&adap->dev, "cpm_i2c_read(abyte=0x%x)\n", addr);
+
+               out_be16(&rbdf->cbd_datlen, 0);
+               out_be16(&rbdf->cbd_sc, BD_SC_EMPTY | BD_SC_INTRPT);
+
+               if (rx + 1 == CPM_MAXBD)
+                       setbits16(&rbdf->cbd_sc, BD_SC_WRAP);
+
+               eieio();
+               setbits16(&tbdf->cbd_sc, BD_SC_READY);
+       } else {
+               dev_dbg(&adap->dev, "cpm_i2c_write(abyte=0x%x)\n", addr);
+
+               memcpy(tb+1, pmsg->buf, pmsg->len);
+
+               eieio();
+               setbits16(&tbdf->cbd_sc, BD_SC_READY | BD_SC_INTRPT);
+       }
+}
+
+static int cpm_i2c_check_message(struct i2c_adapter *adap,
+       struct i2c_msg *pmsg, int tx, int rx)
+{
+       cbd_t __iomem *tbdf;
+       cbd_t __iomem *rbdf;
+       u_char *tb;
+       u_char *rb;
+       struct cpm_i2c *cpm = i2c_get_adapdata(adap);
+
+       tbdf = cpm->tbase + tx;
+       rbdf = cpm->rbase + rx;
+
+       tb = cpm->txbuf[tx];
+       rb = cpm->rxbuf[rx];
+
+       /* Align read buffer */
+       rb = (u_char *) (((uint) rb + 1) & ~1);
+
+       eieio();
+       if (pmsg->flags & I2C_M_RD) {
+               dev_dbg(&adap->dev, "tx sc 0x%04x, rx sc 0x%04x\n",
+                       in_be16(&tbdf->cbd_sc), in_be16(&rbdf->cbd_sc));
+
+               if (in_be16(&tbdf->cbd_sc) & BD_SC_NAK) {
+                       dev_dbg(&adap->dev, "I2C read; No ack\n");
+                       return -ENXIO;
+               }
+               if (in_be16(&rbdf->cbd_sc) & BD_SC_EMPTY) {
+                       dev_err(&adap->dev,
+                               "I2C read; complete but rbuf empty\n");
+                       return -EREMOTEIO;
+               }
+               if (in_be16(&rbdf->cbd_sc) & BD_SC_OV) {
+                       dev_err(&adap->dev, "I2C read; Overrun\n");
+                       return -EREMOTEIO;
+               }
+               memcpy(pmsg->buf, rb, pmsg->len);
+       } else {
+               dev_dbg(&adap->dev, "tx sc %d 0x%04x\n", tx,
+                       in_be16(&tbdf->cbd_sc));
+
+               if (in_be16(&tbdf->cbd_sc) & BD_SC_NAK) {
+                       dev_dbg(&adap->dev, "I2C write; No ack\n");
+                       return -ENXIO;
+               }
+               if (in_be16(&tbdf->cbd_sc) & BD_SC_UN) {
+                       dev_err(&adap->dev, "I2C write; Underrun\n");
+                       return -EIO;
+               }
+               if (in_be16(&tbdf->cbd_sc) & BD_SC_CL) {
+                       dev_err(&adap->dev, "I2C write; Collision\n");
+                       return -EIO;
+               }
+       }
+       return 0;
+}
+
+static int cpm_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
+{
+       struct cpm_i2c *cpm = i2c_get_adapdata(adap);
+       struct i2c_reg __iomem *i2c_reg = cpm->i2c_reg;
+       struct i2c_ram __iomem *i2c_ram = cpm->i2c_ram;
+       struct i2c_msg *pmsg;
+       int ret, i;
+       int tptr;
+       int rptr;
+       cbd_t __iomem *tbdf;
+       cbd_t __iomem *rbdf;
+
+       if (num > CPM_MAXBD)
+               return -EINVAL;
+
+       /* Check if we have any oversized READ requests */
+       for (i = 0; i < num; i++) {
+               pmsg = &msgs[i];
+               if (pmsg->len >= CPM_MAX_READ)
+                       return -EINVAL;
+       }
+
+       /* Reset to use first buffer */
+       out_be16(&i2c_ram->rbptr, in_be16(&i2c_ram->rbase));
+       out_be16(&i2c_ram->tbptr, in_be16(&i2c_ram->tbase));
+
+       tbdf = cpm->tbase;
+       rbdf = cpm->rbase;
+
+       tptr = 0;
+       rptr = 0;
+
+       while (tptr < num) {
+               pmsg = &msgs[tptr];
+               dev_dbg(&adap->dev, "R: %d T: %d\n", rptr, tptr);
+
+               cpm_i2c_parse_message(adap, pmsg, num, tptr, rptr);
+               if (pmsg->flags & I2C_M_RD)
+                       rptr++;
+               tptr++;
+       }
+       /* Start transfer now */
+       /* Enable RX/TX/Error interupts */
+       out_8(&i2c_reg->i2cmr, I2CER_TXE | I2CER_TXB | I2CER_RXB);
+       out_8(&i2c_reg->i2cer, 0xff);   /* Clear interrupt status */
+       /* Chip bug, set enable here */
+       setbits8(&i2c_reg->i2mod, I2MOD_EN);    /* Enable */
+       /* Begin transmission */
+       setbits8(&i2c_reg->i2com, I2COM_START);
+
+       tptr = 0;
+       rptr = 0;
+
+       while (tptr < num) {
+               /* Check for outstanding messages */
+               dev_dbg(&adap->dev, "test ready.\n");
+               pmsg = &msgs[tptr];
+               if (pmsg->flags & I2C_M_RD)
+                       ret = wait_event_interruptible_timeout(cpm->i2c_wait,
+                               !(in_be16(&rbdf[rptr].cbd_sc) & BD_SC_EMPTY),
+                               1 * HZ);
+               else
+                       ret = wait_event_interruptible_timeout(cpm->i2c_wait,
+                               !(in_be16(&tbdf[tptr].cbd_sc) & BD_SC_READY),
+                               1 * HZ);
+               if (ret == 0) {
+                       ret = -EREMOTEIO;
+                       dev_err(&adap->dev, "I2C transfer: timeout\n");
+                       goto out_err;
+               }
+               if (ret > 0) {
+                       dev_dbg(&adap->dev, "ready.\n");
+                       ret = cpm_i2c_check_message(adap, pmsg, tptr, rptr);
+                       tptr++;
+                       if (pmsg->flags & I2C_M_RD)
+                               rptr++;
+                       if (ret)
+                               goto out_err;
+               }
+       }
+#ifdef I2C_CHIP_ERRATA
+       /*
+        * Chip errata, clear enable. This is not needed on rev D4 CPUs.
+        * Disabling I2C too early may cause too short stop condition
+        */
+       udelay(4);
+       clrbits8(&i2c_reg->i2mod, I2MOD_EN);
+#endif
+       return (num);
+
+out_err:
+       cpm_i2c_force_close(adap);
+#ifdef I2C_CHIP_ERRATA
+       /*
+        * Chip errata, clear enable. This is not needed on rev D4 CPUs.
+        */
+       clrbits8(&i2c_reg->i2mod, I2MOD_EN);
+#endif
+       return ret;
+}
+
+static u32 cpm_i2c_func(struct i2c_adapter *adap)
+{
+       return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK);
+}
+
+/* -----exported algorithm data: ------------------------------------- */
+
+static const struct i2c_algorithm cpm_i2c_algo = {
+       .master_xfer = cpm_i2c_xfer,
+       .functionality = cpm_i2c_func,
+};
+
+static const struct i2c_adapter cpm_ops = {
+       .owner          = THIS_MODULE,
+       .name           = "i2c-cpm",
+       .algo           = &cpm_i2c_algo,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
+};
+
+static int __devinit cpm_i2c_setup(struct cpm_i2c *cpm)
+{
+       struct of_device *ofdev = cpm->ofdev;
+       const u32 *data;
+       int len, ret, i;
+       void __iomem *i2c_base;
+       cbd_t __iomem *tbdf;
+       cbd_t __iomem *rbdf;
+       unsigned char brg;
+
+       dev_dbg(&cpm->ofdev->dev, "cpm_i2c_setup()\n");
+
+       init_waitqueue_head(&cpm->i2c_wait);
+
+       cpm->irq = of_irq_to_resource(ofdev->node, 0, NULL);
+       if (cpm->irq == NO_IRQ)
+               return -EINVAL;
+
+       /* Install interrupt handler. */
+       ret = request_irq(cpm->irq, cpm_i2c_interrupt, 0, "cpm_i2c",
+                         &cpm->adap);
+       if (ret)
+               return ret;
+
+       /* I2C parameter RAM */
+       i2c_base = of_iomap(ofdev->node, 1);
+       if (i2c_base == NULL) {
+               ret = -EINVAL;
+               goto out_irq;
+       }
+
+       if (of_device_is_compatible(ofdev->node, "fsl,cpm1-i2c")) {
+
+               /* Check for and use a microcode relocation patch. */
+               cpm->i2c_ram = i2c_base;
+               cpm->i2c_addr = in_be16(&cpm->i2c_ram->rpbase);
+
+               /*
+                * Maybe should use cpm_muram_alloc instead of hardcoding
+                * this in micropatch.c
+                */
+               if (cpm->i2c_addr) {
+                       cpm->i2c_ram = cpm_muram_addr(cpm->i2c_addr);
+                       iounmap(i2c_base);
+               }
+
+               cpm->version = 1;
+
+       } else if (of_device_is_compatible(ofdev->node, "fsl,cpm2-i2c")) {
+               cpm->i2c_addr = cpm_muram_alloc(sizeof(struct i2c_ram), 64);
+               cpm->i2c_ram = cpm_muram_addr(cpm->i2c_addr);
+               out_be16(i2c_base, cpm->i2c_addr);
+               iounmap(i2c_base);
+
+               cpm->version = 2;
+
+       } else {
+               iounmap(i2c_base);
+               ret = -EINVAL;
+               goto out_irq;
+       }
+
+       /* I2C control/status registers */
+       cpm->i2c_reg = of_iomap(ofdev->node, 0);
+       if (cpm->i2c_reg == NULL) {
+               ret = -EINVAL;
+               goto out_ram;
+       }
+
+       data = of_get_property(ofdev->node, "fsl,cpm-command", &len);
+       if (!data || len != 4) {
+               ret = -EINVAL;
+               goto out_reg;
+       }
+       cpm->cp_command = *data;
+
+       data = of_get_property(ofdev->node, "linux,i2c-class", &len);
+       if (data && len == 4)
+               cpm->adap.class = *data;
+
+       data = of_get_property(ofdev->node, "clock-frequency", &len);
+       if (data && len == 4)
+               cpm->freq = *data;
+       else
+               cpm->freq = 60000; /* use 60kHz i2c clock by default */
+
+       /*
+        * Allocate space for CPM_MAXBD transmit and receive buffer
+        * descriptors in the DP ram.
+        */
+       cpm->dp_addr = cpm_muram_alloc(sizeof(cbd_t) * 2 * CPM_MAXBD, 8);
+       if (!cpm->dp_addr) {
+               ret = -ENOMEM;
+               goto out_reg;
+       }
+
+       cpm->tbase = cpm_muram_addr(cpm->dp_addr);
+       cpm->rbase = cpm_muram_addr(cpm->dp_addr + sizeof(cbd_t) * CPM_MAXBD);
+
+       /* Allocate TX and RX buffers */
+
+       tbdf = cpm->tbase;
+       rbdf = cpm->rbase;
+
+       for (i = 0; i < CPM_MAXBD; i++) {
+               cpm->rxbuf[i] = dma_alloc_coherent(
+                       NULL, CPM_MAX_READ + 1, &cpm->rxdma[i], GFP_KERNEL);
+               if (!cpm->rxbuf[i]) {
+                       ret = -ENOMEM;
+                       goto out_muram;
+               }
+               out_be32(&rbdf[i].cbd_bufaddr, ((cpm->rxdma[i] + 1) & ~1));
+
+               cpm->txbuf[i] = (unsigned char *)dma_alloc_coherent(
+                       NULL, CPM_MAX_READ + 1, &cpm->txdma[i], GFP_KERNEL);
+               if (!cpm->txbuf[i]) {
+                       ret = -ENOMEM;
+                       goto out_muram;
+               }
+               out_be32(&tbdf[i].cbd_bufaddr, cpm->txdma[i]);
+       }
+
+       /* Initialize Tx/Rx parameters. */
+
+       cpm_reset_i2c_params(cpm);
+
+       dev_dbg(&cpm->ofdev->dev, "i2c_ram 0x%p, i2c_addr 0x%04x, freq %d\n",
+               cpm->i2c_ram, cpm->i2c_addr, cpm->freq);
+       dev_dbg(&cpm->ofdev->dev, "tbase 0x%04x, rbase 0x%04x\n",
+               (u8 __iomem *)cpm->tbase - DPRAM_BASE,
+               (u8 __iomem *)cpm->rbase - DPRAM_BASE);
+
+       cpm_command(cpm->cp_command, CPM_CR_INIT_TRX);
+
+       /*
+        * Select an invalid address. Just make sure we don't use loopback mode
+        */
+       out_8(&cpm->i2c_reg->i2add, 0x7f << 1);
+
+       /*
+        * PDIV is set to 00 in i2mod, so brgclk/32 is used as input to the
+        * i2c baud rate generator. This is divided by 2 x (DIV + 3) to get
+        * the actual i2c bus frequency.
+        */
+       brg = get_brgfreq() / (32 * 2 * cpm->freq) - 3;
+       out_8(&cpm->i2c_reg->i2brg, brg);
+
+       out_8(&cpm->i2c_reg->i2mod, 0x00);
+       out_8(&cpm->i2c_reg->i2com, I2COM_MASTER);      /* Master mode */
+
+       /* Disable interrupts. */
+       out_8(&cpm->i2c_reg->i2cmr, 0);
+       out_8(&cpm->i2c_reg->i2cer, 0xff);
+
+       return 0;
+
+out_muram:
+       for (i = 0; i < CPM_MAXBD; i++) {
+               if (cpm->rxbuf[i])
+                       dma_free_coherent(NULL, CPM_MAX_READ + 1,
+                               cpm->rxbuf[i], cpm->rxdma[i]);
+               if (cpm->txbuf[i])
+                       dma_free_coherent(NULL, CPM_MAX_READ + 1,
+                               cpm->txbuf[i], cpm->txdma[i]);
+       }
+       cpm_muram_free(cpm->dp_addr);
+out_reg:
+       iounmap(cpm->i2c_reg);
+out_ram:
+       if ((cpm->version == 1) && (!cpm->i2c_addr))
+               iounmap(cpm->i2c_ram);
+       if (cpm->version == 2)
+               cpm_muram_free(cpm->i2c_addr);
+out_irq:
+       free_irq(cpm->irq, &cpm->adap);
+       return ret;
+}
+
+static void cpm_i2c_shutdown(struct cpm_i2c *cpm)
+{
+       int i;
+
+       /* Shut down I2C. */
+       clrbits8(&cpm->i2c_reg->i2mod, I2MOD_EN);
+
+       /* Disable interrupts */
+       out_8(&cpm->i2c_reg->i2cmr, 0);
+       out_8(&cpm->i2c_reg->i2cer, 0xff);
+
+       free_irq(cpm->irq, &cpm->adap);
+
+       /* Free all memory */
+       for (i = 0; i < CPM_MAXBD; i++) {
+               dma_free_coherent(NULL, CPM_MAX_READ + 1,
+                       cpm->rxbuf[i], cpm->rxdma[i]);
+               dma_free_coherent(NULL, CPM_MAX_READ + 1,
+                       cpm->txbuf[i], cpm->txdma[i]);
+       }
+
+       cpm_muram_free(cpm->dp_addr);
+       iounmap(cpm->i2c_reg);
+
+       if ((cpm->version == 1) && (!cpm->i2c_addr))
+               iounmap(cpm->i2c_ram);
+       if (cpm->version == 2)
+               cpm_muram_free(cpm->i2c_addr);
+}
+
+static int __devinit cpm_i2c_probe(struct of_device *ofdev,
+                        const struct of_device_id *match)
+{
+       int result, len;
+       struct cpm_i2c *cpm;
+       const u32 *data;
+
+       cpm = kzalloc(sizeof(struct cpm_i2c), GFP_KERNEL);
+       if (!cpm)
+               return -ENOMEM;
+
+       cpm->ofdev = ofdev;
+
+       dev_set_drvdata(&ofdev->dev, cpm);
+
+       cpm->adap = cpm_ops;
+       i2c_set_adapdata(&cpm->adap, cpm);
+       cpm->adap.dev.parent = &ofdev->dev;
+
+       result = cpm_i2c_setup(cpm);
+       if (result) {
+               dev_err(&ofdev->dev, "Unable to init hardware\n");
+               goto out_free;
+       }
+
+       /* register new adapter to i2c module... */
+
+       data = of_get_property(ofdev->node, "linux,i2c-index", &len);
+       if (data && len == 4) {
+               cpm->adap.nr = *data;
+               result = i2c_add_numbered_adapter(&cpm->adap);
+       } else
+               result = i2c_add_adapter(&cpm->adap);
+
+       if (result < 0) {
+               dev_err(&ofdev->dev, "Unable to register with I2C\n");
+               goto out_shut;
+       }
+
+       dev_dbg(&ofdev->dev, "hw routines for %s registered.\n",
+               cpm->adap.name);
+
+       /*
+        * register OF I2C devices
+        */
+       of_register_i2c_devices(&cpm->adap, ofdev->node);
+
+       return 0;
+out_shut:
+       cpm_i2c_shutdown(cpm);
+out_free:
+       dev_set_drvdata(&ofdev->dev, NULL);
+       kfree(cpm);
+
+       return result;
+}
+
+static int __devexit cpm_i2c_remove(struct of_device *ofdev)
+{
+       struct cpm_i2c *cpm = dev_get_drvdata(&ofdev->dev);
+
+       i2c_del_adapter(&cpm->adap);
+
+       cpm_i2c_shutdown(cpm);
+
+       dev_set_drvdata(&ofdev->dev, NULL);
+       kfree(cpm);
+
+       return 0;
+}
+
+static const struct of_device_id cpm_i2c_match[] = {
+       {
+               .compatible = "fsl,cpm1-i2c",
+       },
+       {
+               .compatible = "fsl,cpm2-i2c",
+       },
+       {},
+};
+
+MODULE_DEVICE_TABLE(of, cpm_i2c_match);
+
+static struct of_platform_driver cpm_i2c_driver = {
+       .match_table    = cpm_i2c_match,
+       .probe          = cpm_i2c_probe,
+       .remove         = __devexit_p(cpm_i2c_remove),
+       .driver         = {
+               .name   = "fsl-i2c-cpm",
+               .owner  = THIS_MODULE,
+       }
+};
+
+static int __init cpm_i2c_init(void)
+{
+       return of_register_platform_driver(&cpm_i2c_driver);
+}
+
+static void __exit cpm_i2c_exit(void)
+{
+       of_unregister_platform_driver(&cpm_i2c_driver);
+}
+
+module_init(cpm_i2c_init);
+module_exit(cpm_i2c_exit);
+
+MODULE_AUTHOR("Jochen Friedrich <jochen@scram.de>");
+MODULE_DESCRIPTION("I2C-Bus adapter routines for CPM boards");
+MODULE_LICENSE("GPL");
index 7ecbfc429b19722e4c681958f1fee5eab5b73c24..af3846eda9858e15bd233aa6fe3172333fac5971 100644 (file)
@@ -85,6 +85,7 @@
 #define DAVINCI_I2C_MDR_MST    (1 << 10)
 #define DAVINCI_I2C_MDR_TRX    (1 << 9)
 #define DAVINCI_I2C_MDR_XA     (1 << 8)
+#define DAVINCI_I2C_MDR_RM     (1 << 7)
 #define DAVINCI_I2C_MDR_IRS    (1 << 5)
 
 #define DAVINCI_I2C_IMR_AAS    (1 << 6)
@@ -112,6 +113,7 @@ struct davinci_i2c_dev {
        u8                      *buf;
        size_t                  buf_len;
        int                     irq;
+       u8                      terminate;
        struct i2c_adapter      adapter;
 };
 
@@ -142,6 +144,7 @@ static int i2c_davinci_init(struct davinci_i2c_dev *dev)
        struct davinci_i2c_platform_data *pdata = dev->dev->platform_data;
        u16 psc;
        u32 clk;
+       u32 d;
        u32 clkh;
        u32 clkl;
        u32 input_clock = clk_get_rate(dev->clk);
@@ -171,23 +174,29 @@ static int i2c_davinci_init(struct davinci_i2c_dev *dev)
         *       if PSC > 1 , d = 5
         */
 
-       psc = 26; /* To get 1MHz clock */
+       /* get minimum of 7 MHz clock, but max of 12 MHz */
+       psc = (input_clock / 7000000) - 1;
+       if ((input_clock / (psc + 1)) > 12000000)
+               psc++;  /* better to run under spec than over */
+       d = (psc >= 2) ? 5 : 7 - psc;
 
-       clk = ((input_clock / (psc + 1)) / (pdata->bus_freq * 1000)) - 10;
-       clkh = (50 * clk) / 100;
+       clk = ((input_clock / (psc + 1)) / (pdata->bus_freq * 1000)) - (d << 1);
+       clkh = clk >> 1;
        clkl = clk - clkh;
 
        davinci_i2c_write_reg(dev, DAVINCI_I2C_PSC_REG, psc);
        davinci_i2c_write_reg(dev, DAVINCI_I2C_CLKH_REG, clkh);
        davinci_i2c_write_reg(dev, DAVINCI_I2C_CLKL_REG, clkl);
 
-       dev_dbg(dev->dev, "CLK  = %d\n", clk);
+       dev_dbg(dev->dev, "input_clock = %d, CLK = %d\n", input_clock, clk);
        dev_dbg(dev->dev, "PSC  = %d\n",
                davinci_i2c_read_reg(dev, DAVINCI_I2C_PSC_REG));
        dev_dbg(dev->dev, "CLKL = %d\n",
                davinci_i2c_read_reg(dev, DAVINCI_I2C_CLKL_REG));
        dev_dbg(dev->dev, "CLKH = %d\n",
                davinci_i2c_read_reg(dev, DAVINCI_I2C_CLKH_REG));
+       dev_dbg(dev->dev, "bus_freq = %dkHz, bus_delay = %d\n",
+               pdata->bus_freq, pdata->bus_delay);
 
        /* Take the I2C module out of reset: */
        w = davinci_i2c_read_reg(dev, DAVINCI_I2C_MDR_REG);
@@ -233,7 +242,6 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop)
        struct davinci_i2c_dev *dev = i2c_get_adapdata(adap);
        struct davinci_i2c_platform_data *pdata = dev->dev->platform_data;
        u32 flag;
-       u32 stat;
        u16 w;
        int r;
 
@@ -254,12 +262,9 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop)
 
        davinci_i2c_write_reg(dev, DAVINCI_I2C_CNT_REG, dev->buf_len);
 
-       init_completion(&dev->cmd_complete);
+       INIT_COMPLETION(dev->cmd_complete);
        dev->cmd_err = 0;
 
-       /* Clear any pending interrupts by reading the IVR */
-       stat = davinci_i2c_read_reg(dev, DAVINCI_I2C_IVR_REG);
-
        /* Take I2C out of reset, configure it as master and set the
         * start bit */
        flag = DAVINCI_I2C_MDR_IRS | DAVINCI_I2C_MDR_MST | DAVINCI_I2C_MDR_STT;
@@ -280,20 +285,34 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop)
                MOD_REG_BIT(w, DAVINCI_I2C_IMR_XRDY, 1);
        davinci_i2c_write_reg(dev, DAVINCI_I2C_IMR_REG, w);
 
+       dev->terminate = 0;
        /* write the data into mode register */
        davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, flag);
 
        r = wait_for_completion_interruptible_timeout(&dev->cmd_complete,
                                                      DAVINCI_I2C_TIMEOUT);
-       dev->buf_len = 0;
-       if (r < 0)
-               return r;
-
        if (r == 0) {
                dev_err(dev->dev, "controller timed out\n");
                i2c_davinci_init(dev);
+               dev->buf_len = 0;
                return -ETIMEDOUT;
        }
+       if (dev->buf_len) {
+               /* This should be 0 if all bytes were transferred
+                * or dev->cmd_err denotes an error.
+                * A signal may have aborted the transfer.
+                */
+               if (r >= 0) {
+                       dev_err(dev->dev, "abnormal termination buf_len=%i\n",
+                               dev->buf_len);
+                       r = -EREMOTEIO;
+               }
+               dev->terminate = 1;
+               wmb();
+               dev->buf_len = 0;
+       }
+       if (r < 0)
+               return r;
 
        /* no error */
        if (likely(!dev->cmd_err))
@@ -338,12 +357,11 @@ i2c_davinci_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 
        for (i = 0; i < num; i++) {
                ret = i2c_davinci_xfer_msg(adap, &msgs[i], (i == (num - 1)));
+               dev_dbg(dev->dev, "%s [%d/%d] ret: %d\n", __func__, i + 1, num,
+                       ret);
                if (ret < 0)
                        return ret;
        }
-
-       dev_dbg(dev->dev, "%s:%d ret: %d\n", __func__, __LINE__, ret);
-
        return num;
 }
 
@@ -352,6 +370,27 @@ static u32 i2c_davinci_func(struct i2c_adapter *adap)
        return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK);
 }
 
+static void terminate_read(struct davinci_i2c_dev *dev)
+{
+       u16 w = davinci_i2c_read_reg(dev, DAVINCI_I2C_MDR_REG);
+       w |= DAVINCI_I2C_MDR_NACK;
+       davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, w);
+
+       /* Throw away data */
+       davinci_i2c_read_reg(dev, DAVINCI_I2C_DRR_REG);
+       if (!dev->terminate)
+               dev_err(dev->dev, "RDR IRQ while no data requested\n");
+}
+static void terminate_write(struct davinci_i2c_dev *dev)
+{
+       u16 w = davinci_i2c_read_reg(dev, DAVINCI_I2C_MDR_REG);
+       w |= DAVINCI_I2C_MDR_RM | DAVINCI_I2C_MDR_STP;
+       davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, w);
+
+       if (!dev->terminate)
+               dev_err(dev->dev, "TDR IRQ while no data to send\n");
+}
+
 /*
  * Interrupt service routine. This gets called whenever an I2C interrupt
  * occurs.
@@ -372,12 +411,15 @@ static irqreturn_t i2c_davinci_isr(int this_irq, void *dev_id)
 
                switch (stat) {
                case DAVINCI_I2C_IVR_AL:
+                       /* Arbitration lost, must retry */
                        dev->cmd_err |= DAVINCI_I2C_STR_AL;
+                       dev->buf_len = 0;
                        complete(&dev->cmd_complete);
                        break;
 
                case DAVINCI_I2C_IVR_NACK:
                        dev->cmd_err |= DAVINCI_I2C_STR_NACK;
+                       dev->buf_len = 0;
                        complete(&dev->cmd_complete);
                        break;
 
@@ -399,9 +441,10 @@ static irqreturn_t i2c_davinci_isr(int this_irq, void *dev_id)
                                davinci_i2c_write_reg(dev,
                                        DAVINCI_I2C_STR_REG,
                                        DAVINCI_I2C_IMR_RRDY);
-                       } else
-                               dev_err(dev->dev, "RDR IRQ while no "
-                                       "data requested\n");
+                       } else {
+                               /* signal can terminate transfer */
+                               terminate_read(dev);
+                       }
                        break;
 
                case DAVINCI_I2C_IVR_XRDY:
@@ -418,9 +461,10 @@ static irqreturn_t i2c_davinci_isr(int this_irq, void *dev_id)
                                davinci_i2c_write_reg(dev,
                                                      DAVINCI_I2C_IMR_REG,
                                                      w);
-                       } else
-                               dev_err(dev->dev, "TDR IRQ while no data to "
-                                       "send\n");
+                       } else {
+                               /* signal can terminate transfer */
+                               terminate_write(dev);
+                       }
                        break;
 
                case DAVINCI_I2C_IVR_SCD:
@@ -475,6 +519,7 @@ static int davinci_i2c_probe(struct platform_device *pdev)
                goto err_release_region;
        }
 
+       init_completion(&dev->cmd_complete);
        dev->dev = get_device(&pdev->dev);
        dev->irq = irq->start;
        platform_set_drvdata(pdev, dev);
index b7a9977b025f31a492d632f8b57cb49930f05466..7f38c01fb3a06ce83753f4d411cde7f907f91086 100644 (file)
@@ -196,13 +196,11 @@ static struct i2c_algo_pcf_data pcf_isa_data = {
        .getown     = pcf_isa_getown,
        .getclock   = pcf_isa_getclock,
        .waitforpin = pcf_isa_waitforpin,
-       .udelay     = 10,
-       .timeout    = 100,
 };
 
 static struct i2c_adapter pcf_isa_ops = {
        .owner          = THIS_MODULE,
-       .class          = I2C_CLASS_HWMON,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .id             = I2C_HW_P_ELEK,
        .algo_data      = &pcf_isa_data,
        .name           = "i2c-elektor",
index 7c1b762aa681fc727312c0e0c1db08dbff0c2c04..79b455a1f090587c4c63d5b29b76d8474c81b122 100644 (file)
@@ -140,7 +140,7 @@ static int __init i2c_gpio_probe(struct platform_device *pdev)
        adap->owner = THIS_MODULE;
        snprintf(adap->name, sizeof(adap->name), "i2c-gpio%d", pdev->id);
        adap->algo_data = bit_data;
-       adap->class = I2C_CLASS_HWMON;
+       adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
        adap->dev.parent = &pdev->dev;
 
        /*
index f9972f9651e45f3497386cae1d16a55686066eab..1098f21ace133225901e65d13f9cb63a99c6359d 100644 (file)
@@ -1,7 +1,4 @@
 /*
-    i2c-hydra.c - Part of lm_sensors,  Linux kernel modules
-                  for hardware monitoring
-
     i2c Support for the Apple `Hydra' Mac I/O
 
     Copyright (c) 1999-2004 Geert Uytterhoeven <geert@linux-m68k.org>
index b0f771fe43265df51f425593fc1f42ee47ba058e..dc7ea32b69a8f64c3a09725cd54748fb8c95b9c8 100644 (file)
@@ -1,10 +1,8 @@
 /*
-    i2c-i801.c - Part of lm_sensors, Linux kernel modules for hardware
-              monitoring
     Copyright (c) 1998 - 2002  Frodo Looijaard <frodol@dds.nl>,
     Philip Edelbrock <phil@netroedge.com>, and Mark D. Studebaker
     <mdsxyz123@yahoo.com>
-    Copyright (C) 2007         Jean Delvare <khali@linux-fr.org>
+    Copyright (C) 2007, 2008   Jean Delvare <khali@linux-fr.org>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -64,6 +62,7 @@
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/i2c.h>
+#include <linux/acpi.h>
 #include <asm/io.h>
 
 /* I801 SMBus address offsets */
 #define SMBHSTSTS_INTR         0x02
 #define SMBHSTSTS_HOST_BUSY    0x01
 
+#define STATUS_FLAGS           (SMBHSTSTS_BYTE_DONE | SMBHSTSTS_FAILED | \
+                                SMBHSTSTS_BUS_ERR | SMBHSTSTS_DEV_ERR | \
+                                SMBHSTSTS_INTR)
+
 static unsigned long i801_smba;
 static unsigned char i801_original_hstcfg;
 static struct pci_driver i801_driver;
@@ -132,105 +135,137 @@ static struct pci_dev *I801_dev;
 #define FEATURE_I2C_BLOCK_READ (1 << 3)
 static unsigned int i801_features;
 
-static int i801_transaction(int xact)
+/* Make sure the SMBus host is ready to start transmitting.
+   Return 0 if it is, -EBUSY if it is not. */
+static int i801_check_pre(void)
 {
-       int temp;
-       int result = 0;
-       int timeout = 0;
+       int status;
 
-       dev_dbg(&I801_dev->dev, "Transaction (pre): CNT=%02x, CMD=%02x, "
-               "ADD=%02x, DAT0=%02x, DAT1=%02x\n", inb_p(SMBHSTCNT),
-               inb_p(SMBHSTCMD), inb_p(SMBHSTADD), inb_p(SMBHSTDAT0),
-               inb_p(SMBHSTDAT1));
-
-       /* Make sure the SMBus host is ready to start transmitting */
-       /* 0x1f = Failed, Bus_Err, Dev_Err, Intr, Host_Busy */
-       if ((temp = (0x1f & inb_p(SMBHSTSTS))) != 0x00) {
-               dev_dbg(&I801_dev->dev, "SMBus busy (%02x). Resetting...\n",
-                       temp);
-               outb_p(temp, SMBHSTSTS);
-               if ((temp = (0x1f & inb_p(SMBHSTSTS))) != 0x00) {
-                       dev_dbg(&I801_dev->dev, "Failed! (%02x)\n", temp);
-                       return -1;
-               } else {
-                       dev_dbg(&I801_dev->dev, "Successful!\n");
+       status = inb_p(SMBHSTSTS);
+       if (status & SMBHSTSTS_HOST_BUSY) {
+               dev_err(&I801_dev->dev, "SMBus is busy, can't use it!\n");
+               return -EBUSY;
+       }
+
+       status &= STATUS_FLAGS;
+       if (status) {
+               dev_dbg(&I801_dev->dev, "Clearing status flags (%02x)\n",
+                       status);
+               outb_p(status, SMBHSTSTS);
+               status = inb_p(SMBHSTSTS) & STATUS_FLAGS;
+               if (status) {
+                       dev_err(&I801_dev->dev,
+                               "Failed clearing status flags (%02x)\n",
+                               status);
+                       return -EBUSY;
                }
        }
 
-       /* the current contents of SMBHSTCNT can be overwritten, since PEC,
-        * INTREN, SMBSCMD are passed in xact */
-       outb_p(xact | I801_START, SMBHSTCNT);
+       return 0;
+}
 
-       /* We will always wait for a fraction of a second! */
-       do {
-               msleep(1);
-               temp = inb_p(SMBHSTSTS);
-       } while ((temp & SMBHSTSTS_HOST_BUSY) && (timeout++ < MAX_TIMEOUT));
+/* Convert the status register to an error code, and clear it. */
+static int i801_check_post(int status, int timeout)
+{
+       int result = 0;
 
        /* If the SMBus is still busy, we give up */
-       if (timeout >= MAX_TIMEOUT) {
-               dev_dbg(&I801_dev->dev, "SMBus Timeout!\n");
-               result = -1;
+       if (timeout) {
+               dev_err(&I801_dev->dev, "Transaction timeout\n");
                /* try to stop the current command */
                dev_dbg(&I801_dev->dev, "Terminating the current operation\n");
                outb_p(inb_p(SMBHSTCNT) | SMBHSTCNT_KILL, SMBHSTCNT);
                msleep(1);
                outb_p(inb_p(SMBHSTCNT) & (~SMBHSTCNT_KILL), SMBHSTCNT);
-       }
 
-       if (temp & SMBHSTSTS_FAILED) {
-               result = -1;
-               dev_dbg(&I801_dev->dev, "Error: Failed bus transaction\n");
+               /* Check if it worked */
+               status = inb_p(SMBHSTSTS);
+               if ((status & SMBHSTSTS_HOST_BUSY) ||
+                   !(status & SMBHSTSTS_FAILED))
+                       dev_err(&I801_dev->dev,
+                               "Failed terminating the transaction\n");
+               outb_p(STATUS_FLAGS, SMBHSTSTS);
+               return -ETIMEDOUT;
        }
 
-       if (temp & SMBHSTSTS_BUS_ERR) {
-               result = -1;
-               dev_err(&I801_dev->dev, "Bus collision! SMBus may be locked "
-                       "until next hard reset. (sorry!)\n");
-               /* Clock stops and slave is stuck in mid-transmission */
+       if (status & SMBHSTSTS_FAILED) {
+               result = -EIO;
+               dev_err(&I801_dev->dev, "Transaction failed\n");
        }
-
-       if (temp & SMBHSTSTS_DEV_ERR) {
-               result = -1;
-               dev_dbg(&I801_dev->dev, "Error: no response!\n");
+       if (status & SMBHSTSTS_DEV_ERR) {
+               result = -ENXIO;
+               dev_dbg(&I801_dev->dev, "No response\n");
+       }
+       if (status & SMBHSTSTS_BUS_ERR) {
+               result = -EAGAIN;
+               dev_dbg(&I801_dev->dev, "Lost arbitration\n");
        }
 
-       if ((inb_p(SMBHSTSTS) & 0x1f) != 0x00)
-               outb_p(inb(SMBHSTSTS), SMBHSTSTS);
-
-       if ((temp = (0x1f & inb_p(SMBHSTSTS))) != 0x00) {
-               dev_dbg(&I801_dev->dev, "Failed reset at end of transaction "
-                       "(%02x)\n", temp);
+       if (result) {
+               /* Clear error flags */
+               outb_p(status & STATUS_FLAGS, SMBHSTSTS);
+               status = inb_p(SMBHSTSTS) & STATUS_FLAGS;
+               if (status) {
+                       dev_warn(&I801_dev->dev, "Failed clearing status "
+                                "flags at end of transaction (%02x)\n",
+                                status);
+               }
        }
-       dev_dbg(&I801_dev->dev, "Transaction (post): CNT=%02x, CMD=%02x, "
-               "ADD=%02x, DAT0=%02x, DAT1=%02x\n", inb_p(SMBHSTCNT),
-               inb_p(SMBHSTCMD), inb_p(SMBHSTADD), inb_p(SMBHSTDAT0),
-               inb_p(SMBHSTDAT1));
+
        return result;
 }
 
+static int i801_transaction(int xact)
+{
+       int status;
+       int result;
+       int timeout = 0;
+
+       result = i801_check_pre();
+       if (result < 0)
+               return result;
+
+       /* the current contents of SMBHSTCNT can be overwritten, since PEC,
+        * INTREN, SMBSCMD are passed in xact */
+       outb_p(xact | I801_START, SMBHSTCNT);
+
+       /* We will always wait for a fraction of a second! */
+       do {
+               msleep(1);
+               status = inb_p(SMBHSTSTS);
+       } while ((status & SMBHSTSTS_HOST_BUSY) && (timeout++ < MAX_TIMEOUT));
+
+       result = i801_check_post(status, timeout >= MAX_TIMEOUT);
+       if (result < 0)
+               return result;
+
+       outb_p(SMBHSTSTS_INTR, SMBHSTSTS);
+       return 0;
+}
+
 /* wait for INTR bit as advised by Intel */
 static void i801_wait_hwpec(void)
 {
        int timeout = 0;
-       int temp;
+       int status;
 
        do {
                msleep(1);
-               temp = inb_p(SMBHSTSTS);
-       } while ((!(temp & SMBHSTSTS_INTR))
+               status = inb_p(SMBHSTSTS);
+       } while ((!(status & SMBHSTSTS_INTR))
                 && (timeout++ < MAX_TIMEOUT));
 
        if (timeout >= MAX_TIMEOUT) {
                dev_dbg(&I801_dev->dev, "PEC Timeout!\n");
        }
-       outb_p(temp, SMBHSTSTS);
+       outb_p(status, SMBHSTSTS);
 }
 
 static int i801_block_transaction_by_block(union i2c_smbus_data *data,
                                           char read_write, int hwpec)
 {
        int i, len;
+       int status;
 
        inb_p(SMBHSTCNT); /* reset the data buffer index */
 
@@ -242,14 +277,15 @@ static int i801_block_transaction_by_block(union i2c_smbus_data *data,
                        outb_p(data->block[i+1], SMBBLKDAT);
        }
 
-       if (i801_transaction(I801_BLOCK_DATA | ENABLE_INT9 |
-                            I801_PEC_EN * hwpec))
-               return -1;
+       status = i801_transaction(I801_BLOCK_DATA | ENABLE_INT9 |
+                                 I801_PEC_EN * hwpec);
+       if (status)
+               return status;
 
        if (read_write == I2C_SMBUS_READ) {
                len = inb_p(SMBHSTDAT0);
                if (len < 1 || len > I2C_SMBUS_BLOCK_MAX)
-                       return -1;
+                       return -EPROTO;
 
                data->block[0] = len;
                for (i = 0; i < len; i++)
@@ -264,10 +300,13 @@ static int i801_block_transaction_byte_by_byte(union i2c_smbus_data *data,
 {
        int i, len;
        int smbcmd;
-       int temp;
-       int result = 0;
+       int status;
+       int result;
        int timeout;
-       unsigned char errmask;
+
+       result = i801_check_pre();
+       if (result < 0)
+               return result;
 
        len = data->block[0];
 
@@ -291,36 +330,6 @@ static int i801_block_transaction_byte_by_byte(union i2c_smbus_data *data,
                }
                outb_p(smbcmd | ENABLE_INT9, SMBHSTCNT);
 
-               dev_dbg(&I801_dev->dev, "Block (pre %d): CNT=%02x, CMD=%02x, "
-                       "ADD=%02x, DAT0=%02x, DAT1=%02x, BLKDAT=%02x\n", i,
-                       inb_p(SMBHSTCNT), inb_p(SMBHSTCMD), inb_p(SMBHSTADD),
-                       inb_p(SMBHSTDAT0), inb_p(SMBHSTDAT1), inb_p(SMBBLKDAT));
-
-               /* Make sure the SMBus host is ready to start transmitting */
-               temp = inb_p(SMBHSTSTS);
-               if (i == 1) {
-                       /* Erroneous conditions before transaction:
-                        * Byte_Done, Failed, Bus_Err, Dev_Err, Intr, Host_Busy */
-                       errmask = 0x9f;
-               } else {
-                       /* Erroneous conditions during transaction:
-                        * Failed, Bus_Err, Dev_Err, Intr */
-                       errmask = 0x1e;
-               }
-               if (temp & errmask) {
-                       dev_dbg(&I801_dev->dev, "SMBus busy (%02x). "
-                               "Resetting...\n", temp);
-                       outb_p(temp, SMBHSTSTS);
-                       if (((temp = inb_p(SMBHSTSTS)) & errmask) != 0x00) {
-                               dev_err(&I801_dev->dev,
-                                       "Reset failed! (%02x)\n", temp);
-                               return -1;
-                       }
-                       if (i != 1)
-                               /* if die in middle of block transaction, fail */
-                               return -1;
-               }
-
                if (i == 1)
                        outb_p(inb(SMBHSTCNT) | I801_START, SMBHSTCNT);
 
@@ -328,41 +337,28 @@ static int i801_block_transaction_byte_by_byte(union i2c_smbus_data *data,
                timeout = 0;
                do {
                        msleep(1);
-                       temp = inb_p(SMBHSTSTS);
+                       status = inb_p(SMBHSTSTS);
                }
-               while ((!(temp & SMBHSTSTS_BYTE_DONE))
+               while ((!(status & SMBHSTSTS_BYTE_DONE))
                       && (timeout++ < MAX_TIMEOUT));
 
-               /* If the SMBus is still busy, we give up */
-               if (timeout >= MAX_TIMEOUT) {
-                       /* try to stop the current command */
-                       dev_dbg(&I801_dev->dev, "Terminating the current "
-                                               "operation\n");
-                       outb_p(inb_p(SMBHSTCNT) | SMBHSTCNT_KILL, SMBHSTCNT);
-                       msleep(1);
-                       outb_p(inb_p(SMBHSTCNT) & (~SMBHSTCNT_KILL),
-                               SMBHSTCNT);
-                       result = -1;
-                       dev_dbg(&I801_dev->dev, "SMBus Timeout!\n");
-               }
-
-               if (temp & SMBHSTSTS_FAILED) {
-                       result = -1;
-                       dev_dbg(&I801_dev->dev,
-                               "Error: Failed bus transaction\n");
-               } else if (temp & SMBHSTSTS_BUS_ERR) {
-                       result = -1;
-                       dev_err(&I801_dev->dev, "Bus collision!\n");
-               } else if (temp & SMBHSTSTS_DEV_ERR) {
-                       result = -1;
-                       dev_dbg(&I801_dev->dev, "Error: no response!\n");
-               }
+               result = i801_check_post(status, timeout >= MAX_TIMEOUT);
+               if (result < 0)
+                       return result;
 
                if (i == 1 && read_write == I2C_SMBUS_READ
                 && command != I2C_SMBUS_I2C_BLOCK_DATA) {
                        len = inb_p(SMBHSTDAT0);
-                       if (len < 1 || len > I2C_SMBUS_BLOCK_MAX)
-                               return -1;
+                       if (len < 1 || len > I2C_SMBUS_BLOCK_MAX) {
+                               dev_err(&I801_dev->dev,
+                                       "Illegal SMBus block read size %d\n",
+                                       len);
+                               /* Recover */
+                               while (inb_p(SMBHSTSTS) & SMBHSTSTS_HOST_BUSY)
+                                       outb_p(SMBHSTSTS_BYTE_DONE, SMBHSTSTS);
+                               outb_p(SMBHSTSTS_INTR, SMBHSTSTS);
+                               return -EPROTO;
+                       }
                        data->block[0] = len;
                }
 
@@ -371,30 +367,19 @@ static int i801_block_transaction_byte_by_byte(union i2c_smbus_data *data,
                        data->block[i] = inb_p(SMBBLKDAT);
                if (read_write == I2C_SMBUS_WRITE && i+1 <= len)
                        outb_p(data->block[i+1], SMBBLKDAT);
-               if ((temp & 0x9e) != 0x00)
-                       outb_p(temp, SMBHSTSTS);  /* signals SMBBLKDAT ready */
-
-               if ((temp = (0x1e & inb_p(SMBHSTSTS))) != 0x00) {
-                       dev_dbg(&I801_dev->dev,
-                               "Bad status (%02x) at end of transaction\n",
-                               temp);
-               }
-               dev_dbg(&I801_dev->dev, "Block (post %d): CNT=%02x, CMD=%02x, "
-                       "ADD=%02x, DAT0=%02x, DAT1=%02x, BLKDAT=%02x\n", i,
-                       inb_p(SMBHSTCNT), inb_p(SMBHSTCMD), inb_p(SMBHSTADD),
-                       inb_p(SMBHSTDAT0), inb_p(SMBHSTDAT1), inb_p(SMBBLKDAT));
 
-               if (result < 0)
-                       return result;
+               /* signals SMBBLKDAT ready */
+               outb_p(SMBHSTSTS_BYTE_DONE | SMBHSTSTS_INTR, SMBHSTSTS);
        }
-       return result;
+
+       return 0;
 }
 
 static int i801_set_block_buffer_mode(void)
 {
        outb_p(inb_p(SMBAUXCTL) | SMBAUXCTL_E32B, SMBAUXCTL);
        if ((inb_p(SMBAUXCTL) & SMBAUXCTL_E32B) == 0)
-               return -1;
+               return -EIO;
        return 0;
 }
 
@@ -414,7 +399,7 @@ static int i801_block_transaction(union i2c_smbus_data *data, char read_write,
                } else if (!(i801_features & FEATURE_I2C_BLOCK_READ)) {
                        dev_err(&I801_dev->dev,
                                "I2C block read is unsupported!\n");
-                       return -1;
+                       return -EOPNOTSUPP;
                }
        }
 
@@ -449,7 +434,7 @@ static int i801_block_transaction(union i2c_smbus_data *data, char read_write,
        return result;
 }
 
-/* Return -1 on error. */
+/* Return negative errno on error. */
 static s32 i801_access(struct i2c_adapter * adap, u16 addr,
                       unsigned short flags, char read_write, u8 command,
                       int size, union i2c_smbus_data * data)
@@ -511,10 +496,9 @@ static s32 i801_access(struct i2c_adapter * adap, u16 addr,
                        outb_p(command, SMBHSTCMD);
                block = 1;
                break;
-       case I2C_SMBUS_PROC_CALL:
        default:
                dev_err(&I801_dev->dev, "Unsupported transaction %d\n", size);
-               return -1;
+               return -EOPNOTSUPP;
        }
 
        if (hwpec)      /* enable/disable hardware PEC */
@@ -537,7 +521,7 @@ static s32 i801_access(struct i2c_adapter * adap, u16 addr,
        if(block)
                return ret;
        if(ret)
-               return -1;
+               return ret;
        if ((read_write == I2C_SMBUS_WRITE) || (xact == I801_QUICK))
                return 0;
 
@@ -572,7 +556,7 @@ static const struct i2c_algorithm smbus_algorithm = {
 static struct i2c_adapter i801_adapter = {
        .owner          = THIS_MODULE,
        .id             = I2C_HW_SMBUS_I801,
-       .class          = I2C_CLASS_HWMON,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .algo           = &smbus_algorithm,
 };
 
@@ -639,6 +623,10 @@ static int __devinit i801_probe(struct pci_dev *dev, const struct pci_device_id
                goto exit;
        }
 
+       err = acpi_check_resource_conflict(&dev->resource[SMBBAR]);
+       if (err)
+               goto exit;
+
        err = pci_request_region(dev, SMBBAR, i801_driver.name);
        if (err) {
                dev_err(&dev->dev, "Failed to request SMBus region "
diff --git a/drivers/i2c/busses/i2c-i810.c b/drivers/i2c/busses/i2c-i810.c
deleted file mode 100644 (file)
index 42e8d94..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
-    i2c-i810.c - Part of lm_sensors, Linux kernel modules for hardware
-              monitoring
-    Copyright (c) 1998, 1999, 2000  Frodo Looijaard <frodol@dds.nl>,
-    Philip Edelbrock <phil@netroedge.com>,
-    Ralph Metzler <rjkm@thp.uni-koeln.de>, and
-    Mark D. Studebaker <mdsxyz123@yahoo.com>
-    
-    Based on code written by Ralph Metzler <rjkm@thp.uni-koeln.de> and
-    Simon Vogl
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-/*
-   This interfaces to the I810/I815 to provide access to
-   the DDC Bus and the I2C Bus.
-
-   SUPPORTED DEVICES   PCI ID
-   i810AA              7121           
-   i810AB              7123           
-   i810E               7125           
-   i815                        1132           
-   i845G               2562
-*/
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/i2c.h>
-#include <linux/i2c-algo-bit.h>
-#include <asm/io.h>
-
-/* GPIO register locations */
-#define I810_IOCONTROL_OFFSET  0x5000
-#define I810_HVSYNC            0x00    /* not used */
-#define I810_GPIOA             0x10
-#define I810_GPIOB             0x14
-
-/* bit locations in the registers */
-#define SCL_DIR_MASK           0x0001
-#define SCL_DIR                        0x0002
-#define SCL_VAL_MASK           0x0004
-#define SCL_VAL_OUT            0x0008
-#define SCL_VAL_IN             0x0010
-#define SDA_DIR_MASK           0x0100
-#define SDA_DIR                        0x0200
-#define SDA_VAL_MASK           0x0400
-#define SDA_VAL_OUT            0x0800
-#define SDA_VAL_IN             0x1000
-
-/* initialization states */
-#define INIT1                  0x1
-#define INIT2                  0x2
-#define INIT3                  0x4
-
-/* delays */
-#define CYCLE_DELAY            10
-#define TIMEOUT                        (HZ / 2)
-
-static void __iomem *ioaddr;
-
-/* The i810 GPIO registers have individual masks for each bit
-   so we never have to read before writing. Nice. */
-
-static void bit_i810i2c_setscl(void *data, int val)
-{
-       writel((val ? SCL_VAL_OUT : 0) | SCL_DIR | SCL_DIR_MASK | SCL_VAL_MASK,
-            ioaddr + I810_GPIOB);
-       readl(ioaddr + I810_GPIOB);     /* flush posted write */
-}
-
-static void bit_i810i2c_setsda(void *data, int val)
-{
-       writel((val ? SDA_VAL_OUT : 0) | SDA_DIR | SDA_DIR_MASK | SDA_VAL_MASK,
-            ioaddr + I810_GPIOB);
-       readl(ioaddr + I810_GPIOB);     /* flush posted write */
-}
-
-/* The GPIO pins are open drain, so the pins could always remain outputs.
-   However, some chip versions don't latch the inputs unless they
-   are set as inputs.
-   We rely on the i2c-algo-bit routines to set the pins high before
-   reading the input from other chips. Following guidance in the 815
-   prog. ref. guide, we do a "dummy write" of 0 to the register before
-   reading which forces the input value to be latched. We presume this
-   applies to the 810 as well; shouldn't hurt anyway. This is necessary to get
-   i2c_algo_bit bit_test=1 to pass. */
-
-static int bit_i810i2c_getscl(void *data)
-{
-       writel(SCL_DIR_MASK, ioaddr + I810_GPIOB);
-       writel(0, ioaddr + I810_GPIOB);
-       return (0 != (readl(ioaddr + I810_GPIOB) & SCL_VAL_IN));
-}
-
-static int bit_i810i2c_getsda(void *data)
-{
-       writel(SDA_DIR_MASK, ioaddr + I810_GPIOB);
-       writel(0, ioaddr + I810_GPIOB);
-       return (0 != (readl(ioaddr + I810_GPIOB) & SDA_VAL_IN));
-}
-
-static void bit_i810ddc_setscl(void *data, int val)
-{
-       writel((val ? SCL_VAL_OUT : 0) | SCL_DIR | SCL_DIR_MASK | SCL_VAL_MASK,
-            ioaddr + I810_GPIOA);
-       readl(ioaddr + I810_GPIOA);     /* flush posted write */
-}
-
-static void bit_i810ddc_setsda(void *data, int val)
-{
-       writel((val ? SDA_VAL_OUT : 0) | SDA_DIR | SDA_DIR_MASK | SDA_VAL_MASK,
-            ioaddr + I810_GPIOA);
-       readl(ioaddr + I810_GPIOA);     /* flush posted write */
-}
-
-static int bit_i810ddc_getscl(void *data)
-{
-       writel(SCL_DIR_MASK, ioaddr + I810_GPIOA);
-       writel(0, ioaddr + I810_GPIOA);
-       return (0 != (readl(ioaddr + I810_GPIOA) & SCL_VAL_IN));
-}
-
-static int bit_i810ddc_getsda(void *data)
-{
-       writel(SDA_DIR_MASK, ioaddr + I810_GPIOA);
-       writel(0, ioaddr + I810_GPIOA);
-       return (0 != (readl(ioaddr + I810_GPIOA) & SDA_VAL_IN));
-}
-
-static int config_i810(struct pci_dev *dev)
-{
-       unsigned long cadr;
-
-       /* map I810 memory */
-       cadr = dev->resource[1].start;
-       cadr += I810_IOCONTROL_OFFSET;
-       cadr &= PCI_BASE_ADDRESS_MEM_MASK;
-       ioaddr = ioremap_nocache(cadr, 0x1000);
-       if (ioaddr) {
-               bit_i810i2c_setscl(NULL, 1);
-               bit_i810i2c_setsda(NULL, 1);
-               bit_i810ddc_setscl(NULL, 1);
-               bit_i810ddc_setsda(NULL, 1);
-               return 0;
-       }
-       return -ENODEV;
-}
-
-static struct i2c_algo_bit_data i810_i2c_bit_data = {
-       .setsda         = bit_i810i2c_setsda,
-       .setscl         = bit_i810i2c_setscl,
-       .getsda         = bit_i810i2c_getsda,
-       .getscl         = bit_i810i2c_getscl,
-       .udelay         = CYCLE_DELAY,
-       .timeout        = TIMEOUT,
-};
-
-static struct i2c_adapter i810_i2c_adapter = {
-       .owner          = THIS_MODULE,
-       .id             = I2C_HW_B_I810,
-       .name           = "I810/I815 I2C Adapter",
-       .algo_data      = &i810_i2c_bit_data,
-};
-
-static struct i2c_algo_bit_data i810_ddc_bit_data = {
-       .setsda         = bit_i810ddc_setsda,
-       .setscl         = bit_i810ddc_setscl,
-       .getsda         = bit_i810ddc_getsda,
-       .getscl         = bit_i810ddc_getscl,
-       .udelay         = CYCLE_DELAY,
-       .timeout        = TIMEOUT,
-};
-
-static struct i2c_adapter i810_ddc_adapter = {
-       .owner          = THIS_MODULE,
-       .id             = I2C_HW_B_I810,
-       .name           = "I810/I815 DDC Adapter",
-       .algo_data      = &i810_ddc_bit_data,
-};
-
-static struct pci_device_id i810_ids[] __devinitdata = {
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82810_IG1) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82810_IG3) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82810E_IG) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_CGC) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845G_IG) },
-       { 0, },
-};
-
-MODULE_DEVICE_TABLE (pci, i810_ids);
-
-static int __devinit i810_probe(struct pci_dev *dev, const struct pci_device_id *id)
-{
-       int retval;
-
-       retval = config_i810(dev);
-       if (retval)
-               return retval;
-       dev_info(&dev->dev, "i810/i815 i2c device found.\n");
-
-       /* set up the sysfs linkage to our parent device */
-       i810_i2c_adapter.dev.parent = &dev->dev;
-       i810_ddc_adapter.dev.parent = &dev->dev;
-
-       retval = i2c_bit_add_bus(&i810_i2c_adapter);
-       if (retval)
-               return retval;
-       retval = i2c_bit_add_bus(&i810_ddc_adapter);
-       if (retval)
-               i2c_del_adapter(&i810_i2c_adapter);
-       return retval;
-}
-
-static void __devexit i810_remove(struct pci_dev *dev)
-{
-       i2c_del_adapter(&i810_ddc_adapter);
-       i2c_del_adapter(&i810_i2c_adapter);
-       iounmap(ioaddr);
-}
-
-static struct pci_driver i810_driver = {
-       .name           = "i810_smbus",
-       .id_table       = i810_ids,
-       .probe          = i810_probe,
-       .remove         = __devexit_p(i810_remove),
-};
-
-static int __init i2c_i810_init(void)
-{
-       return pci_register_driver(&i810_driver);
-}
-
-static void __exit i2c_i810_exit(void)
-{
-       pci_unregister_driver(&i810_driver);
-}
-
-MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl>, "
-               "Philip Edelbrock <phil@netroedge.com>, "
-               "Ralph Metzler <rjkm@thp.uni-koeln.de>, "
-               "and Mark D. Studebaker <mdsxyz123@yahoo.com>");
-MODULE_DESCRIPTION("I810/I815 I2C/DDC driver");
-MODULE_LICENSE("GPL");
-
-module_init(i2c_i810_init);
-module_exit(i2c_i810_exit);
index 85dbf34382e1f54a3d964a2f71abf4f214457656..651f2f1ae5b7e96080e2664088695f608e059b4b 100644 (file)
 #include <asm/io.h>
 #include <linux/i2c.h>
 #include <linux/i2c-id.h>
-
-#ifdef CONFIG_IBM_OCP
-#include <asm/ocp.h>
-#include <asm/ibm4xx.h>
-#else
 #include <linux/of_platform.h>
-#endif
+#include <linux/of_i2c.h>
 
 #include "i2c-ibm_iic.h"
 
@@ -665,180 +660,6 @@ static inline u8 iic_clckdiv(unsigned int opb)
        return (u8)((opb + 9) / 10 - 1);
 }
 
-#ifdef CONFIG_IBM_OCP
-/*
- * Register single IIC interface
- */
-static int __devinit iic_probe(struct ocp_device *ocp){
-
-       struct ibm_iic_private* dev;
-       struct i2c_adapter* adap;
-       struct ocp_func_iic_data* iic_data = ocp->def->additions;
-       int ret;
-
-       if (!iic_data)
-               printk(KERN_WARNING"ibm-iic%d: missing additional data!\n",
-                       ocp->def->index);
-
-       if (!(dev = kzalloc(sizeof(*dev), GFP_KERNEL))) {
-               printk(KERN_ERR "ibm-iic%d: failed to allocate device data\n",
-                       ocp->def->index);
-               return -ENOMEM;
-       }
-
-       dev->idx = ocp->def->index;
-       ocp_set_drvdata(ocp, dev);
-
-       if (!request_mem_region(ocp->def->paddr, sizeof(struct iic_regs),
-                               "ibm_iic")) {
-               ret = -EBUSY;
-               goto fail1;
-       }
-
-       if (!(dev->vaddr = ioremap(ocp->def->paddr, sizeof(struct iic_regs)))){
-               printk(KERN_ERR "ibm-iic%d: failed to ioremap device registers\n",
-                       dev->idx);
-               ret = -ENXIO;
-               goto fail2;
-       }
-
-       init_waitqueue_head(&dev->wq);
-
-       dev->irq = iic_force_poll ? -1 : ocp->def->irq;
-       if (dev->irq >= 0){
-               /* Disable interrupts until we finish initialization,
-                  assumes level-sensitive IRQ setup...
-                */
-               iic_interrupt_mode(dev, 0);
-               if (request_irq(dev->irq, iic_handler, 0, "IBM IIC", dev)){
-                       printk(KERN_ERR "ibm-iic%d: request_irq %d failed\n",
-                               dev->idx, dev->irq);
-                       /* Fallback to the polling mode */
-                       dev->irq = -1;
-               }
-       }
-
-       if (dev->irq < 0)
-               printk(KERN_WARNING "ibm-iic%d: using polling mode\n",
-                       dev->idx);
-
-       /* Board specific settings */
-       dev->fast_mode = iic_force_fast ? 1 : (iic_data ? iic_data->fast_mode : 0);
-
-       /* clckdiv is the same for *all* IIC interfaces,
-        * but I'd rather make a copy than introduce another global. --ebs
-        */
-       dev->clckdiv = iic_clckdiv(ocp_sys_info.opb_bus_freq);
-       DBG("%d: clckdiv = %d\n", dev->idx, dev->clckdiv);
-
-       /* Initialize IIC interface */
-       iic_dev_init(dev);
-
-       /* Register it with i2c layer */
-       adap = &dev->adap;
-       adap->dev.parent = &ocp->dev;
-       strcpy(adap->name, "IBM IIC");
-       i2c_set_adapdata(adap, dev);
-       adap->id = I2C_HW_OCP;
-       adap->class = I2C_CLASS_HWMON;
-       adap->algo = &iic_algo;
-       adap->client_register = NULL;
-       adap->client_unregister = NULL;
-       adap->timeout = 1;
-
-       /*
-        * If "dev->idx" is negative we consider it as zero.
-        * The reason to do so is to avoid sysfs names that only make
-        * sense when there are multiple adapters.
-        */
-       adap->nr = dev->idx >= 0 ? dev->idx : 0;
-
-       if ((ret = i2c_add_numbered_adapter(adap)) < 0) {
-               printk(KERN_ERR "ibm-iic%d: failed to register i2c adapter\n",
-                       dev->idx);
-               goto fail;
-       }
-
-       printk(KERN_INFO "ibm-iic%d: using %s mode\n", dev->idx,
-               dev->fast_mode ? "fast (400 kHz)" : "standard (100 kHz)");
-
-       return 0;
-
-fail:
-       if (dev->irq >= 0){
-               iic_interrupt_mode(dev, 0);
-               free_irq(dev->irq, dev);
-       }
-
-       iounmap(dev->vaddr);
-fail2:
-       release_mem_region(ocp->def->paddr, sizeof(struct iic_regs));
-fail1:
-       ocp_set_drvdata(ocp, NULL);
-       kfree(dev);
-       return ret;
-}
-
-/*
- * Cleanup initialized IIC interface
- */
-static void __devexit iic_remove(struct ocp_device *ocp)
-{
-       struct ibm_iic_private* dev = (struct ibm_iic_private*)ocp_get_drvdata(ocp);
-       BUG_ON(dev == NULL);
-       if (i2c_del_adapter(&dev->adap)){
-               printk(KERN_ERR "ibm-iic%d: failed to delete i2c adapter :(\n",
-                       dev->idx);
-               /* That's *very* bad, just shutdown IRQ ... */
-               if (dev->irq >= 0){
-                   iic_interrupt_mode(dev, 0);
-                   free_irq(dev->irq, dev);
-                   dev->irq = -1;
-               }
-       } else {
-               if (dev->irq >= 0){
-                   iic_interrupt_mode(dev, 0);
-                   free_irq(dev->irq, dev);
-               }
-               iounmap(dev->vaddr);
-               release_mem_region(ocp->def->paddr, sizeof(struct iic_regs));
-               kfree(dev);
-       }
-}
-
-static struct ocp_device_id ibm_iic_ids[] __devinitdata =
-{
-       { .vendor = OCP_VENDOR_IBM, .function = OCP_FUNC_IIC },
-       { .vendor = OCP_VENDOR_INVALID }
-};
-
-MODULE_DEVICE_TABLE(ocp, ibm_iic_ids);
-
-static struct ocp_driver ibm_iic_driver =
-{
-       .name           = "iic",
-       .id_table       = ibm_iic_ids,
-       .probe          = iic_probe,
-       .remove         = __devexit_p(iic_remove),
-#if defined(CONFIG_PM)
-       .suspend        = NULL,
-       .resume         = NULL,
-#endif
-};
-
-static int __init iic_init(void)
-{
-       printk(KERN_INFO "IBM IIC driver v" DRIVER_VERSION "\n");
-       return ocp_register_driver(&ibm_iic_driver);
-}
-
-static void __exit iic_exit(void)
-{
-       ocp_unregister_driver(&ibm_iic_driver);
-}
-
-#else  /* !CONFIG_IBM_OCP */
-
 static int __devinit iic_request_irq(struct of_device *ofdev,
                                     struct ibm_iic_private *dev)
 {
@@ -876,7 +697,7 @@ static int __devinit iic_probe(struct of_device *ofdev,
        struct device_node *np = ofdev->node;
        struct ibm_iic_private *dev;
        struct i2c_adapter *adap;
-       const u32 *indexp, *freq;
+       const u32 *freq;
        int ret;
 
        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
@@ -887,14 +708,6 @@ static int __devinit iic_probe(struct of_device *ofdev,
 
        dev_set_drvdata(&ofdev->dev, dev);
 
-       indexp = of_get_property(np, "index", NULL);
-       if (!indexp) {
-               dev_err(&ofdev->dev, "no index specified\n");
-               ret = -EINVAL;
-               goto error_cleanup;
-       }
-       dev->idx = *indexp;
-
        dev->vaddr = of_iomap(np, 0);
        if (dev->vaddr == NULL) {
                dev_err(&ofdev->dev, "failed to iomap device\n");
@@ -934,17 +747,19 @@ static int __devinit iic_probe(struct of_device *ofdev,
        strlcpy(adap->name, "IBM IIC", sizeof(adap->name));
        i2c_set_adapdata(adap, dev);
        adap->id = I2C_HW_OCP;
-       adap->class = I2C_CLASS_HWMON;
+       adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
        adap->algo = &iic_algo;
        adap->timeout = 1;
-       adap->nr = dev->idx;
 
-       ret = i2c_add_numbered_adapter(adap);
+       ret = i2c_add_adapter(adap);
        if (ret  < 0) {
                dev_err(&ofdev->dev, "failed to register i2c adapter\n");
                goto error_cleanup;
        }
 
+       /* Now register all the child nodes */
+       of_register_i2c_devices(adap, np);
+
        dev_info(&ofdev->dev, "using %s mode\n",
                 dev->fast_mode ? "fast (400 kHz)" : "standard (100 kHz)");
 
@@ -987,11 +802,7 @@ static int __devexit iic_remove(struct of_device *ofdev)
 }
 
 static const struct of_device_id ibm_iic_match[] = {
-       { .compatible = "ibm,iic-405ex", },
-       { .compatible = "ibm,iic-405gp", },
-       { .compatible = "ibm,iic-440gp", },
-       { .compatible = "ibm,iic-440gpx", },
-       { .compatible = "ibm,iic-440grx", },
+       { .compatible = "ibm,iic", },
        {}
 };
 
@@ -1011,7 +822,6 @@ static void __exit iic_exit(void)
 {
        of_unregister_platform_driver(&ibm_iic_driver);
 }
-#endif /* CONFIG_IBM_OCP */
 
 module_init(iic_init);
 module_exit(iic_exit);
index 39884e7975945e8954ed4bb8f1e8194fab5fd895..fc2714ac0c0f4ba865584f5012fce1dc563f3d06 100644 (file)
@@ -482,7 +482,7 @@ iop3xx_i2c_probe(struct platform_device *pdev)
        memcpy(new_adapter->name, pdev->name, strlen(pdev->name));
        new_adapter->id = I2C_HW_IOP3XX;
        new_adapter->owner = THIS_MODULE;
-       new_adapter->class = I2C_CLASS_HWMON;
+       new_adapter->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
        new_adapter->dev.parent = &pdev->dev;
        new_adapter->nr = pdev->id;
 
diff --git a/drivers/i2c/busses/i2c-isch.c b/drivers/i2c/busses/i2c-isch.c
new file mode 100644 (file)
index 0000000..b9c01aa
--- /dev/null
@@ -0,0 +1,339 @@
+/*
+    i2c-isch.c - Linux kernel driver for Intel SCH chipset SMBus
+    - Based on i2c-piix4.c
+    Copyright (c) 1998 - 2002 Frodo Looijaard <frodol@dds.nl> and
+    Philip Edelbrock <phil@netroedge.com>
+    - Intel SCH support
+    Copyright (c) 2007 - 2008 Jacob Jun Pan <jacob.jun.pan@intel.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License version 2 as
+    published by the Free Software Foundation.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+/*
+   Supports:
+       Intel SCH chipsets (AF82US15W, AF82US15L, AF82UL11L)
+   Note: we assume there can only be one device, with one SMBus interface.
+*/
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/stddef.h>
+#include <linux/ioport.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/acpi.h>
+
+/* SCH SMBus address offsets */
+#define SMBHSTCNT      (0 + sch_smba)
+#define SMBHSTSTS      (1 + sch_smba)
+#define SMBHSTADD      (4 + sch_smba) /* TSA */
+#define SMBHSTCMD      (5 + sch_smba)
+#define SMBHSTDAT0     (6 + sch_smba)
+#define SMBHSTDAT1     (7 + sch_smba)
+#define SMBBLKDAT      (0x20 + sch_smba)
+
+/* count for request_region */
+#define SMBIOSIZE      64
+
+/* PCI Address Constants */
+#define SMBBA_SCH      0x40
+
+/* Other settings */
+#define MAX_TIMEOUT    500
+
+/* I2C constants */
+#define SCH_QUICK              0x00
+#define SCH_BYTE               0x01
+#define SCH_BYTE_DATA          0x02
+#define SCH_WORD_DATA          0x03
+#define SCH_BLOCK_DATA         0x05
+
+static unsigned short sch_smba;
+static struct pci_driver sch_driver;
+static struct i2c_adapter sch_adapter;
+
+/*
+ * Start the i2c transaction -- the i2c_access will prepare the transaction
+ * and this function will execute it.
+ * return 0 for success and others for failure.
+ */
+static int sch_transaction(void)
+{
+       int temp;
+       int result = 0;
+       int timeout = 0;
+
+       dev_dbg(&sch_adapter.dev, "Transaction (pre): CNT=%02x, CMD=%02x, "
+               "ADD=%02x, DAT0=%02x, DAT1=%02x\n", inb(SMBHSTCNT),
+               inb(SMBHSTCMD), inb(SMBHSTADD), inb(SMBHSTDAT0),
+               inb(SMBHSTDAT1));
+
+       /* Make sure the SMBus host is ready to start transmitting */
+       temp = inb(SMBHSTSTS) & 0x0f;
+       if (temp) {
+               /* Can not be busy since we checked it in sch_access */
+               if (temp & 0x01) {
+                       dev_dbg(&sch_adapter.dev, "Completion (%02x). "
+                               "Clear...\n", temp);
+               }
+               if (temp & 0x06) {
+                       dev_dbg(&sch_adapter.dev, "SMBus error (%02x). "
+                               "Resetting...\n", temp);
+               }
+               outb(temp, SMBHSTSTS);
+               temp = inb(SMBHSTSTS) & 0x0f;
+               if (temp) {
+                       dev_err(&sch_adapter.dev,
+                               "SMBus is not ready: (%02x)\n", temp);
+                       return -EAGAIN;
+               }
+       }
+
+       /* start the transaction by setting bit 4 */
+       outb(inb(SMBHSTCNT) | 0x10, SMBHSTCNT);
+
+       do {
+               msleep(1);
+               temp = inb(SMBHSTSTS) & 0x0f;
+       } while ((temp & 0x08) && (timeout++ < MAX_TIMEOUT));
+
+       /* If the SMBus is still busy, we give up */
+       if (timeout >= MAX_TIMEOUT) {
+               dev_err(&sch_adapter.dev, "SMBus Timeout!\n");
+               result = -ETIMEDOUT;
+       }
+       if (temp & 0x04) {
+               result = -EIO;
+               dev_dbg(&sch_adapter.dev, "Bus collision! SMBus may be "
+                       "locked until next hard reset. (sorry!)\n");
+               /* Clock stops and slave is stuck in mid-transmission */
+       } else if (temp & 0x02) {
+               result = -EIO;
+               dev_err(&sch_adapter.dev, "Error: no response!\n");
+       } else if (temp & 0x01) {
+               dev_dbg(&sch_adapter.dev, "Post complete!\n");
+               outb(temp, SMBHSTSTS);
+               temp = inb(SMBHSTSTS) & 0x07;
+               if (temp & 0x06) {
+                       /* Completion clear failed */
+                       dev_dbg(&sch_adapter.dev, "Failed reset at end of "
+                               "transaction (%02x), Bus error!\n", temp);
+               }
+       } else {
+               result = -ENXIO;
+               dev_dbg(&sch_adapter.dev, "No such address.\n");
+       }
+       dev_dbg(&sch_adapter.dev, "Transaction (post): CNT=%02x, CMD=%02x, "
+               "ADD=%02x, DAT0=%02x, DAT1=%02x\n", inb(SMBHSTCNT),
+               inb(SMBHSTCMD), inb(SMBHSTADD), inb(SMBHSTDAT0),
+               inb(SMBHSTDAT1));
+       return result;
+}
+
+/*
+ * This is the main access entry for i2c-sch access
+ * adap is i2c_adapter pointer, addr is the i2c device bus address, read_write
+ * (0 for read and 1 for write), size is i2c transaction type and data is the
+ * union of transaction for data to be transfered or data read from bus.
+ * return 0 for success and others for failure.
+ */
+static s32 sch_access(struct i2c_adapter *adap, u16 addr,
+                unsigned short flags, char read_write,
+                u8 command, int size, union i2c_smbus_data *data)
+{
+       int i, len, temp, rc;
+
+       /* Make sure the SMBus host is not busy */
+       temp = inb(SMBHSTSTS) & 0x0f;
+       if (temp & 0x08) {
+               dev_dbg(&sch_adapter.dev, "SMBus busy (%02x)\n", temp);
+               return -EAGAIN;
+       }
+       dev_dbg(&sch_adapter.dev, "access size: %d %s\n", size,
+               (read_write)?"READ":"WRITE");
+       switch (size) {
+       case I2C_SMBUS_QUICK:
+               outb((addr << 1) | read_write, SMBHSTADD);
+               size = SCH_QUICK;
+               break;
+       case I2C_SMBUS_BYTE:
+               outb((addr << 1) | read_write, SMBHSTADD);
+               if (read_write == I2C_SMBUS_WRITE)
+                       outb(command, SMBHSTCMD);
+               size = SCH_BYTE;
+               break;
+       case I2C_SMBUS_BYTE_DATA:
+               outb((addr << 1) | read_write, SMBHSTADD);
+               outb(command, SMBHSTCMD);
+               if (read_write == I2C_SMBUS_WRITE)
+                       outb(data->byte, SMBHSTDAT0);
+               size = SCH_BYTE_DATA;
+               break;
+       case I2C_SMBUS_WORD_DATA:
+               outb((addr << 1) | read_write, SMBHSTADD);
+               outb(command, SMBHSTCMD);
+               if (read_write == I2C_SMBUS_WRITE) {
+                       outb(data->word & 0xff, SMBHSTDAT0);
+                       outb((data->word & 0xff00) >> 8, SMBHSTDAT1);
+               }
+               size = SCH_WORD_DATA;
+               break;
+       case I2C_SMBUS_BLOCK_DATA:
+               outb((addr << 1) | read_write, SMBHSTADD);
+               outb(command, SMBHSTCMD);
+               if (read_write == I2C_SMBUS_WRITE) {
+                       len = data->block[0];
+                       if (len == 0 || len > I2C_SMBUS_BLOCK_MAX)
+                               return -EINVAL;
+                       outb(len, SMBHSTDAT0);
+                       for (i = 1; i <= len; i++)
+                               outb(data->block[i], SMBBLKDAT+i-1);
+               }
+               size = SCH_BLOCK_DATA;
+               break;
+       default:
+               dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
+               return -EOPNOTSUPP;
+       }
+       dev_dbg(&sch_adapter.dev, "write size %d to 0x%04x\n", size, SMBHSTCNT);
+       outb((inb(SMBHSTCNT) & 0xb0) | (size & 0x7), SMBHSTCNT);
+
+       rc = sch_transaction();
+       if (rc) /* Error in transaction */
+               return rc;
+
+       if ((read_write == I2C_SMBUS_WRITE) || (size == SCH_QUICK))
+               return 0;
+
+       switch (size) {
+       case SCH_BYTE:
+       case SCH_BYTE_DATA:
+               data->byte = inb(SMBHSTDAT0);
+               break;
+       case SCH_WORD_DATA:
+               data->word = inb(SMBHSTDAT0) + (inb(SMBHSTDAT1) << 8);
+               break;
+       case SCH_BLOCK_DATA:
+               data->block[0] = inb(SMBHSTDAT0);
+               if (data->block[0] == 0 || data->block[0] > I2C_SMBUS_BLOCK_MAX)
+                       return -EPROTO;
+               for (i = 1; i <= data->block[0]; i++)
+                       data->block[i] = inb(SMBBLKDAT+i-1);
+               break;
+       }
+       return 0;
+}
+
+static u32 sch_func(struct i2c_adapter *adapter)
+{
+       return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
+           I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
+           I2C_FUNC_SMBUS_BLOCK_DATA;
+}
+
+static const struct i2c_algorithm smbus_algorithm = {
+       .smbus_xfer     = sch_access,
+       .functionality  = sch_func,
+};
+
+static struct i2c_adapter sch_adapter = {
+       .owner          = THIS_MODULE,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
+       .algo           = &smbus_algorithm,
+};
+
+static struct pci_device_id sch_ids[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SCH_LPC) },
+       { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, sch_ids);
+
+static int __devinit sch_probe(struct pci_dev *dev,
+                               const struct pci_device_id *id)
+{
+       int retval;
+       unsigned int smba;
+
+       pci_read_config_dword(dev, SMBBA_SCH, &smba);
+       if (!(smba & (1 << 31))) {
+               dev_err(&dev->dev, "SMBus I/O space disabled!\n");
+               return -ENODEV;
+       }
+
+       sch_smba = (unsigned short)smba;
+       if (sch_smba == 0) {
+               dev_err(&dev->dev, "SMBus base address uninitialized!\n");
+               return -ENODEV;
+       }
+       if (acpi_check_region(sch_smba, SMBIOSIZE, sch_driver.name))
+               return -EBUSY;
+       if (!request_region(sch_smba, SMBIOSIZE, sch_driver.name)) {
+               dev_err(&dev->dev, "SMBus region 0x%x already in use!\n",
+                       sch_smba);
+               return -EBUSY;
+       }
+       dev_dbg(&dev->dev, "SMBA = 0x%X\n", sch_smba);
+
+       /* set up the sysfs linkage to our parent device */
+       sch_adapter.dev.parent = &dev->dev;
+
+       snprintf(sch_adapter.name, sizeof(sch_adapter.name),
+               "SMBus SCH adapter at %04x", sch_smba);
+
+       retval = i2c_add_adapter(&sch_adapter);
+       if (retval) {
+               dev_err(&dev->dev, "Couldn't register adapter!\n");
+               release_region(sch_smba, SMBIOSIZE);
+               sch_smba = 0;
+       }
+
+       return retval;
+}
+
+static void __devexit sch_remove(struct pci_dev *dev)
+{
+       if (sch_smba) {
+               i2c_del_adapter(&sch_adapter);
+               release_region(sch_smba, SMBIOSIZE);
+               sch_smba = 0;
+       }
+}
+
+static struct pci_driver sch_driver = {
+       .name           = "isch_smbus",
+       .id_table       = sch_ids,
+       .probe          = sch_probe,
+       .remove         = __devexit_p(sch_remove),
+};
+
+static int __init i2c_sch_init(void)
+{
+       return pci_register_driver(&sch_driver);
+}
+
+static void __exit i2c_sch_exit(void)
+{
+       pci_unregister_driver(&sch_driver);
+}
+
+MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@intel.com>");
+MODULE_DESCRIPTION("Intel SCH SMBus driver");
+MODULE_LICENSE("GPL");
+
+module_init(i2c_sch_init);
+module_exit(i2c_sch_exit);
index a076129de7e811acb2434864ef2d48fccbeafa3b..10b9342a36c21241bb3c0ca36960801148053239 100644 (file)
@@ -311,7 +311,7 @@ static struct i2c_adapter mpc_ops = {
        .name = "MPC adapter",
        .id = I2C_HW_MPC107,
        .algo = &mpc_algo,
-       .class = I2C_CLASS_HWMON,
+       .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .timeout = 1,
 };
 
index 036e6a883e6788e1564a56ae04afb227971681d6..9e8118d2fe64cccfa54e745337982569e0710064 100644 (file)
@@ -530,7 +530,7 @@ mv64xxx_i2c_probe(struct platform_device *pd)
        drv_data->adapter.id = I2C_HW_MV64XXX;
        drv_data->adapter.algo = &mv64xxx_i2c_algo;
        drv_data->adapter.owner = THIS_MODULE;
-       drv_data->adapter.class = I2C_CLASS_HWMON;
+       drv_data->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
        drv_data->adapter.timeout = pdata->timeout;
        drv_data->adapter.nr = pd->id;
        platform_set_drvdata(pd, drv_data);
diff --git a/drivers/i2c/busses/i2c-nforce2-s4985.c b/drivers/i2c/busses/i2c-nforce2-s4985.c
new file mode 100644 (file)
index 0000000..6a8995d
--- /dev/null
@@ -0,0 +1,257 @@
+/*
+ * i2c-nforce2-s4985.c - i2c-nforce2 extras for the Tyan S4985 motherboard
+ *
+ * Copyright (C) 2008 Jean Delvare <khali@linux-fr.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * We select the channels by sending commands to the Philips
+ * PCA9556 chip at I2C address 0x18. The main adapter is used for
+ * the non-multiplexed part of the bus, and 4 virtual adapters
+ * are defined for the multiplexed addresses: 0x50-0x53 (memory
+ * module EEPROM) located on channels 1-4. We define one virtual
+ * adapter per CPU, which corresponds to one multiplexed channel:
+ *   CPU0: virtual adapter 1, channel 1
+ *   CPU1: virtual adapter 2, channel 2
+ *   CPU2: virtual adapter 3, channel 3
+ *   CPU3: virtual adapter 4, channel 4
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+
+extern struct i2c_adapter *nforce2_smbus;
+
+static struct i2c_adapter *s4985_adapter;
+static struct i2c_algorithm *s4985_algo;
+
+/* Wrapper access functions for multiplexed SMBus */
+static DEFINE_MUTEX(nforce2_lock);
+
+static s32 nforce2_access_virt0(struct i2c_adapter *adap, u16 addr,
+                               unsigned short flags, char read_write,
+                               u8 command, int size,
+                               union i2c_smbus_data *data)
+{
+       int error;
+
+       /* We exclude the multiplexed addresses */
+       if ((addr & 0xfc) == 0x50 || (addr & 0xfc) == 0x30
+        || addr == 0x18)
+               return -ENXIO;
+
+       mutex_lock(&nforce2_lock);
+       error = nforce2_smbus->algo->smbus_xfer(adap, addr, flags, read_write,
+                                               command, size, data);
+       mutex_unlock(&nforce2_lock);
+
+       return error;
+}
+
+/* We remember the last used channels combination so as to only switch
+   channels when it is really needed. This greatly reduces the SMBus
+   overhead, but also assumes that nobody will be writing to the PCA9556
+   in our back. */
+static u8 last_channels;
+
+static inline s32 nforce2_access_channel(struct i2c_adapter *adap, u16 addr,
+                                        unsigned short flags, char read_write,
+                                        u8 command, int size,
+                                        union i2c_smbus_data *data,
+                                        u8 channels)
+{
+       int error;
+
+       /* We exclude the non-multiplexed addresses */
+       if ((addr & 0xfc) != 0x50 && (addr & 0xfc) != 0x30)
+               return -ENXIO;
+
+       mutex_lock(&nforce2_lock);
+       if (last_channels != channels) {
+               union i2c_smbus_data mplxdata;
+               mplxdata.byte = channels;
+
+               error = nforce2_smbus->algo->smbus_xfer(adap, 0x18, 0,
+                                                       I2C_SMBUS_WRITE, 0x01,
+                                                       I2C_SMBUS_BYTE_DATA,
+                                                       &mplxdata);
+               if (error)
+                       goto UNLOCK;
+               last_channels = channels;
+       }
+       error = nforce2_smbus->algo->smbus_xfer(adap, addr, flags, read_write,
+                                               command, size, data);
+
+UNLOCK:
+       mutex_unlock(&nforce2_lock);
+       return error;
+}
+
+static s32 nforce2_access_virt1(struct i2c_adapter *adap, u16 addr,
+                               unsigned short flags, char read_write,
+                               u8 command, int size,
+                               union i2c_smbus_data *data)
+{
+       /* CPU0: channel 1 enabled */
+       return nforce2_access_channel(adap, addr, flags, read_write, command,
+                                     size, data, 0x02);
+}
+
+static s32 nforce2_access_virt2(struct i2c_adapter *adap, u16 addr,
+                               unsigned short flags, char read_write,
+                               u8 command, int size,
+                               union i2c_smbus_data *data)
+{
+       /* CPU1: channel 2 enabled */
+       return nforce2_access_channel(adap, addr, flags, read_write, command,
+                                     size, data, 0x04);
+}
+
+static s32 nforce2_access_virt3(struct i2c_adapter *adap, u16 addr,
+                               unsigned short flags, char read_write,
+                               u8 command, int size,
+                               union i2c_smbus_data *data)
+{
+       /* CPU2: channel 3 enabled */
+       return nforce2_access_channel(adap, addr, flags, read_write, command,
+                                     size, data, 0x08);
+}
+
+static s32 nforce2_access_virt4(struct i2c_adapter *adap, u16 addr,
+                               unsigned short flags, char read_write,
+                               u8 command, int size,
+                               union i2c_smbus_data *data)
+{
+       /* CPU3: channel 4 enabled */
+       return nforce2_access_channel(adap, addr, flags, read_write, command,
+                                     size, data, 0x10);
+}
+
+static int __init nforce2_s4985_init(void)
+{
+       int i, error;
+       union i2c_smbus_data ioconfig;
+
+       /* Unregister physical bus */
+       if (!nforce2_smbus)
+               return -ENODEV;
+       error = i2c_del_adapter(nforce2_smbus);
+       if (error) {
+               dev_err(&nforce2_smbus->dev, "Physical bus removal failed\n");
+               goto ERROR0;
+       }
+
+       printk(KERN_INFO "Enabling SMBus multiplexing for Tyan S4985\n");
+       /* Define the 5 virtual adapters and algorithms structures */
+       s4985_adapter = kzalloc(5 * sizeof(struct i2c_adapter), GFP_KERNEL);
+       if (!s4985_adapter) {
+               error = -ENOMEM;
+               goto ERROR1;
+       }
+       s4985_algo = kzalloc(5 * sizeof(struct i2c_algorithm), GFP_KERNEL);
+       if (!s4985_algo) {
+               error = -ENOMEM;
+               goto ERROR2;
+       }
+
+       /* Fill in the new structures */
+       s4985_algo[0] = *(nforce2_smbus->algo);
+       s4985_algo[0].smbus_xfer = nforce2_access_virt0;
+       s4985_adapter[0] = *nforce2_smbus;
+       s4985_adapter[0].algo = s4985_algo;
+       s4985_adapter[0].dev.parent = nforce2_smbus->dev.parent;
+       for (i = 1; i < 5; i++) {
+               s4985_algo[i] = *(nforce2_smbus->algo);
+               s4985_adapter[i] = *nforce2_smbus;
+               snprintf(s4985_adapter[i].name, sizeof(s4985_adapter[i].name),
+                        "SMBus nForce2 adapter (CPU%d)", i - 1);
+               s4985_adapter[i].algo = s4985_algo + i;
+               s4985_adapter[i].dev.parent = nforce2_smbus->dev.parent;
+       }
+       s4985_algo[1].smbus_xfer = nforce2_access_virt1;
+       s4985_algo[2].smbus_xfer = nforce2_access_virt2;
+       s4985_algo[3].smbus_xfer = nforce2_access_virt3;
+       s4985_algo[4].smbus_xfer = nforce2_access_virt4;
+
+       /* Configure the PCA9556 multiplexer */
+       ioconfig.byte = 0x00; /* All I/O to output mode */
+       error = nforce2_smbus->algo->smbus_xfer(nforce2_smbus, 0x18, 0,
+                                               I2C_SMBUS_WRITE, 0x03,
+                                               I2C_SMBUS_BYTE_DATA, &ioconfig);
+       if (error) {
+               dev_err(&nforce2_smbus->dev, "PCA9556 configuration failed\n");
+               error = -EIO;
+               goto ERROR3;
+       }
+
+       /* Register virtual adapters */
+       for (i = 0; i < 5; i++) {
+               error = i2c_add_adapter(s4985_adapter + i);
+               if (error) {
+                       dev_err(&nforce2_smbus->dev,
+                               "Virtual adapter %d registration "
+                               "failed, module not inserted\n", i);
+                       for (i--; i >= 0; i--)
+                               i2c_del_adapter(s4985_adapter + i);
+                       goto ERROR3;
+               }
+       }
+
+       return 0;
+
+ERROR3:
+       kfree(s4985_algo);
+       s4985_algo = NULL;
+ERROR2:
+       kfree(s4985_adapter);
+       s4985_adapter = NULL;
+ERROR1:
+       /* Restore physical bus */
+       i2c_add_adapter(nforce2_smbus);
+ERROR0:
+       return error;
+}
+
+static void __exit nforce2_s4985_exit(void)
+{
+       if (s4985_adapter) {
+               int i;
+
+               for (i = 0; i < 5; i++)
+                       i2c_del_adapter(s4985_adapter+i);
+               kfree(s4985_adapter);
+               s4985_adapter = NULL;
+       }
+       kfree(s4985_algo);
+       s4985_algo = NULL;
+
+       /* Restore physical bus */
+       if (i2c_add_adapter(nforce2_smbus))
+               dev_err(&nforce2_smbus->dev, "Physical bus restoration "
+                       "failed\n");
+}
+
+MODULE_AUTHOR("Jean Delvare <khali@linux-fr.org>");
+MODULE_DESCRIPTION("S4985 SMBus multiplexing");
+MODULE_LICENSE("GPL");
+
+module_init(nforce2_s4985_init);
+module_exit(nforce2_s4985_exit);
index 43c9f8df950913a85137e13b124a90df30ae2041..3b19bc41a60b4d66f5995ff8082939160341a412 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/i2c.h>
 #include <linux/delay.h>
 #include <linux/dmi.h>
+#include <linux/acpi.h>
 #include <asm/io.h>
 
 MODULE_LICENSE("GPL");
@@ -124,6 +125,20 @@ static struct dmi_system_id __devinitdata nforce2_dmi_blacklist2[] = {
 
 static struct pci_driver nforce2_driver;
 
+/* For multiplexing support, we need a global reference to the 1st
+   SMBus channel */
+#if defined CONFIG_I2C_NFORCE2_S4985 || defined CONFIG_I2C_NFORCE2_S4985_MODULE
+struct i2c_adapter *nforce2_smbus;
+EXPORT_SYMBOL_GPL(nforce2_smbus);
+
+static void nforce2_set_reference(struct i2c_adapter *adap)
+{
+       nforce2_smbus = adap;
+}
+#else
+static inline void nforce2_set_reference(struct i2c_adapter *adap) { }
+#endif
+
 static void nforce2_abort(struct i2c_adapter *adap)
 {
        struct nforce2_smbus *smbus = adap->algo_data;
@@ -158,16 +173,16 @@ static int nforce2_check_status(struct i2c_adapter *adap)
                dev_dbg(&adap->dev, "SMBus Timeout!\n");
                if (smbus->can_abort)
                        nforce2_abort(adap);
-               return -1;
+               return -ETIMEDOUT;
        }
        if (!(temp & NVIDIA_SMB_STS_DONE) || (temp & NVIDIA_SMB_STS_STATUS)) {
                dev_dbg(&adap->dev, "Transaction failed (0x%02x)!\n", temp);
-               return -1;
+               return -EIO;
        }
        return 0;
 }
 
-/* Return -1 on error */
+/* Return negative errno on error */
 static s32 nforce2_access(struct i2c_adapter * adap, u16 addr,
                unsigned short flags, char read_write,
                u8 command, int size, union i2c_smbus_data * data)
@@ -175,7 +190,7 @@ static s32 nforce2_access(struct i2c_adapter * adap, u16 addr,
        struct nforce2_smbus *smbus = adap->algo_data;
        unsigned char protocol, pec;
        u8 len;
-       int i;
+       int i, status;
 
        protocol = (read_write == I2C_SMBUS_READ) ? NVIDIA_SMB_PRTCL_READ :
                NVIDIA_SMB_PRTCL_WRITE;
@@ -219,7 +234,7 @@ static s32 nforce2_access(struct i2c_adapter * adap, u16 addr,
                                                "Transaction failed "
                                                "(requested block size: %d)\n",
                                                len);
-                                       return -1;
+                                       return -EINVAL;
                                }
                                outb_p(len, NVIDIA_SMB_BCNT);
                                for (i = 0; i < I2C_SMBUS_BLOCK_MAX; i++)
@@ -231,14 +246,15 @@ static s32 nforce2_access(struct i2c_adapter * adap, u16 addr,
 
                default:
                        dev_err(&adap->dev, "Unsupported transaction %d\n", size);
-                       return -1;
+                       return -EOPNOTSUPP;
        }
 
        outb_p((addr & 0x7f) << 1, NVIDIA_SMB_ADDR);
        outb_p(protocol, NVIDIA_SMB_PRTCL);
 
-       if (nforce2_check_status(adap))
-               return -1;
+       status = nforce2_check_status(adap);
+       if (status)
+               return status;
 
        if (read_write == I2C_SMBUS_WRITE)
                return 0;
@@ -260,7 +276,7 @@ static s32 nforce2_access(struct i2c_adapter * adap, u16 addr,
                                dev_err(&adap->dev, "Transaction failed "
                                        "(received block size: 0x%02x)\n",
                                        len);
-                               return -1;
+                               return -EPROTO;
                        }
                        for (i = 0; i < len; i++)
                                data->block[i+1] = inb_p(NVIDIA_SMB_DATA + i);
@@ -321,21 +337,26 @@ static int __devinit nforce2_probe_smb (struct pci_dev *dev, int bar,
                    != PCIBIOS_SUCCESSFUL) {
                        dev_err(&dev->dev, "Error reading PCI config for %s\n",
                                name);
-                       return -1;
+                       return -EIO;
                }
 
                smbus->base = iobase & PCI_BASE_ADDRESS_IO_MASK;
                smbus->size = 64;
        }
 
+       error = acpi_check_region(smbus->base, smbus->size,
+                                 nforce2_driver.name);
+       if (error)
+               return -1;
+
        if (!request_region(smbus->base, smbus->size, nforce2_driver.name)) {
                dev_err(&smbus->adapter.dev, "Error requesting region %02x .. %02X for %s\n",
                        smbus->base, smbus->base+smbus->size-1, name);
-               return -1;
+               return -EBUSY;
        }
        smbus->adapter.owner = THIS_MODULE;
        smbus->adapter.id = I2C_HW_SMBUS_NFORCE2;
-       smbus->adapter.class = I2C_CLASS_HWMON;
+       smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
        smbus->adapter.algo = &smbus_algorithm;
        smbus->adapter.algo_data = smbus;
        smbus->adapter.dev.parent = &dev->dev;
@@ -346,7 +367,7 @@ static int __devinit nforce2_probe_smb (struct pci_dev *dev, int bar,
        if (error) {
                dev_err(&smbus->adapter.dev, "Failed to register adapter.\n");
                release_region(smbus->base, smbus->size);
-               return -1;
+               return error;
        }
        dev_info(&smbus->adapter.dev, "nForce2 SMBus adapter at %#x\n", smbus->base);
        return 0;
@@ -398,6 +419,7 @@ static int __devinit nforce2_probe(struct pci_dev *dev, const struct pci_device_
                return -ENODEV;
        }
 
+       nforce2_set_reference(&smbuses[0].adapter);
        return 0;
 }
 
@@ -406,6 +428,7 @@ static void __devexit nforce2_remove(struct pci_dev *dev)
 {
        struct nforce2_smbus *smbuses = (void*) pci_get_drvdata(dev);
 
+       nforce2_set_reference(NULL);
        if (smbuses[0].base) {
                i2c_del_adapter(&smbuses[0].adapter);
                release_region(smbuses[0].base, smbuses[0].size);
index f145692cbb76ad649901ccc3553443804bde3941..e5193bf754837bda5108651679c477ecd4248456 100644 (file)
@@ -29,6 +29,7 @@ struct ocores_i2c {
        int pos;
        int nmsgs;
        int state; /* see STATE_ */
+       int clock_khz;
 };
 
 /* registers */
@@ -173,8 +174,7 @@ static int ocores_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
                return -ETIMEDOUT;
 }
 
-static void ocores_init(struct ocores_i2c *i2c,
-                       struct ocores_i2c_platform_data *pdata)
+static void ocores_init(struct ocores_i2c *i2c)
 {
        int prescale;
        u8 ctrl = oc_getreg(i2c, OCI2C_CONTROL);
@@ -182,7 +182,7 @@ static void ocores_init(struct ocores_i2c *i2c,
        /* make sure the device is disabled */
        oc_setreg(i2c, OCI2C_CONTROL, ctrl & ~(OCI2C_CTRL_EN|OCI2C_CTRL_IEN));
 
-       prescale = (pdata->clock_khz / (5*100)) - 1;
+       prescale = (i2c->clock_khz / (5*100)) - 1;
        oc_setreg(i2c, OCI2C_PRELOW, prescale & 0xff);
        oc_setreg(i2c, OCI2C_PREHIGH, prescale >> 8);
 
@@ -205,7 +205,7 @@ static const struct i2c_algorithm ocores_algorithm = {
 static struct i2c_adapter ocores_adapter = {
        .owner          = THIS_MODULE,
        .name           = "i2c-ocores",
-       .class          = I2C_CLASS_HWMON,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .algo           = &ocores_algorithm,
 };
 
@@ -248,7 +248,8 @@ static int __devinit ocores_i2c_probe(struct platform_device *pdev)
        }
 
        i2c->regstep = pdata->regstep;
-       ocores_init(i2c, pdata);
+       i2c->clock_khz = pdata->clock_khz;
+       ocores_init(i2c);
 
        init_waitqueue_head(&i2c->wait);
        ret = request_irq(res2->start, ocores_isr, 0, pdev->name, i2c);
@@ -312,13 +313,40 @@ static int __devexit ocores_i2c_remove(struct platform_device* pdev)
        return 0;
 }
 
+#ifdef CONFIG_PM
+static int ocores_i2c_suspend(struct platform_device *pdev, pm_message_t state)
+{
+       struct ocores_i2c *i2c = platform_get_drvdata(pdev);
+       u8 ctrl = oc_getreg(i2c, OCI2C_CONTROL);
+
+       /* make sure the device is disabled */
+       oc_setreg(i2c, OCI2C_CONTROL, ctrl & ~(OCI2C_CTRL_EN|OCI2C_CTRL_IEN));
+
+       return 0;
+}
+
+static int ocores_i2c_resume(struct platform_device *pdev)
+{
+       struct ocores_i2c *i2c = platform_get_drvdata(pdev);
+
+       ocores_init(i2c);
+
+       return 0;
+}
+#else
+#define ocores_i2c_suspend     NULL
+#define ocores_i2c_resume      NULL
+#endif
+
 /* work with hotplug and coldplug */
 MODULE_ALIAS("platform:ocores-i2c");
 
 static struct platform_driver ocores_i2c_driver = {
-       .probe  = ocores_i2c_probe,
-       .remove = __devexit_p(ocores_i2c_remove),
-       .driver = {
+       .probe   = ocores_i2c_probe,
+       .remove  = __devexit_p(ocores_i2c_remove),
+       .suspend = ocores_i2c_suspend,
+       .resume  = ocores_i2c_resume,
+       .driver  = {
                .owner = THIS_MODULE,
                .name = "ocores-i2c",
        },
index 1603c81e39d472e0b45644477db450aa3e26382e..adf0fbb902f0850fb77a77b2bdee30a6bff7e206 100644 (file)
@@ -365,7 +365,7 @@ static int __devinit pasemi_smb_probe(struct pci_dev *dev,
        smbus->adapter.owner = THIS_MODULE;
        snprintf(smbus->adapter.name, sizeof(smbus->adapter.name),
                 "PA Semi SMBus adapter at 0x%lx", smbus->base);
-       smbus->adapter.class = I2C_CLASS_HWMON;
+       smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
        smbus->adapter.algo = &smbus_algorithm;
        smbus->adapter.algo_data = smbus;
        smbus->adapter.nr = PCI_FUNC(dev->devfn);
index 9d75f51e8f0efe648a22ff3ec33d231b42888167..6bb15ad0a6b6bc4c183eac4029220bf7355e2b53 100644 (file)
@@ -163,7 +163,7 @@ static int __devinit i2c_pca_pf_probe(struct platform_device *pdev)
 
        i2c->reg_base = ioremap(res->start, res_len(res));
        if (!i2c->reg_base) {
-               ret = -EIO;
+               ret = -ENOMEM;
                goto e_remap;
        }
        i2c->io_base = res->start;
index ac916596858764481f08afdde261b770be1a1b54..eaa9b387543e8730fec9c1e3d247ae52f6d4640a 100644 (file)
@@ -1,6 +1,4 @@
 /*
-    piix4.c - Part of lm_sensors, Linux kernel modules for hardware
-              monitoring
     Copyright (c) 1998 - 2002 Frodo Looijaard <frodol@dds.nl> and
     Philip Edelbrock <phil@netroedge.com>
 
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/dmi.h>
+#include <linux/acpi.h>
 #include <asm/io.h>
 
 
-struct sd {
-       const unsigned short mfr;
-       const unsigned short dev;
-       const unsigned char fn;
-       const char *name;
-};
-
 /* PIIX4 SMBus address offsets */
 #define SMBHSTSTS      (0 + piix4_smba)
 #define SMBHSLVSTS     (1 + piix4_smba)
@@ -101,8 +93,6 @@ MODULE_PARM_DESC(force_addr,
                 "Forcibly enable the PIIX4 at the given address. "
                 "EXTREMELY DANGEROUS!");
 
-static int piix4_transaction(void);
-
 static unsigned short piix4_smba;
 static int srvrworks_csb5_delay;
 static struct pci_driver piix4_driver;
@@ -141,8 +131,6 @@ static int __devinit piix4_setup(struct pci_dev *PIIX4_dev,
 {
        unsigned char temp;
 
-       dev_info(&PIIX4_dev->dev, "Found %s device\n", pci_name(PIIX4_dev));
-
        if ((PIIX4_dev->vendor == PCI_VENDOR_ID_SERVERWORKS) &&
            (PIIX4_dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5))
                srvrworks_csb5_delay = 1;
@@ -172,17 +160,20 @@ static int __devinit piix4_setup(struct pci_dev *PIIX4_dev,
                pci_read_config_word(PIIX4_dev, SMBBA, &piix4_smba);
                piix4_smba &= 0xfff0;
                if(piix4_smba == 0) {
-                       dev_err(&PIIX4_dev->dev, "SMB base address "
+                       dev_err(&PIIX4_dev->dev, "SMBus base address "
                                "uninitialized - upgrade BIOS or use "
                                "force_addr=0xaddr\n");
                        return -ENODEV;
                }
        }
 
+       if (acpi_check_region(piix4_smba, SMBIOSIZE, piix4_driver.name))
+               return -EBUSY;
+
        if (!request_region(piix4_smba, SMBIOSIZE, piix4_driver.name)) {
-               dev_err(&PIIX4_dev->dev, "SMB region 0x%x already in use!\n",
+               dev_err(&PIIX4_dev->dev, "SMBus region 0x%x already in use!\n",
                        piix4_smba);
-               return -ENODEV;
+               return -EBUSY;
        }
 
        pci_read_config_byte(PIIX4_dev, SMBHSTCFG, &temp);
@@ -228,13 +219,13 @@ static int __devinit piix4_setup(struct pci_dev *PIIX4_dev,
                        "(or code out of date)!\n");
 
        pci_read_config_byte(PIIX4_dev, SMBREV, &temp);
-       dev_dbg(&PIIX4_dev->dev, "SMBREV = 0x%X\n", temp);
-       dev_dbg(&PIIX4_dev->dev, "SMBA = 0x%X\n", piix4_smba);
+       dev_info(&PIIX4_dev->dev,
+                "SMBus Host Controller at 0x%x, revision %d\n",
+                piix4_smba, temp);
 
        return 0;
 }
 
-/* Another internally used function */
 static int piix4_transaction(void)
 {
        int temp;
@@ -253,7 +244,7 @@ static int piix4_transaction(void)
                outb_p(temp, SMBHSTSTS);
                if ((temp = inb_p(SMBHSTSTS)) != 0x00) {
                        dev_err(&piix4_adapter.dev, "Failed! (%02x)\n", temp);
-                       return -1;
+                       return -EBUSY;
                } else {
                        dev_dbg(&piix4_adapter.dev, "Successful!\n");
                }
@@ -275,23 +266,23 @@ static int piix4_transaction(void)
        /* If the SMBus is still busy, we give up */
        if (timeout >= MAX_TIMEOUT) {
                dev_err(&piix4_adapter.dev, "SMBus Timeout!\n");
-               result = -1;
+               result = -ETIMEDOUT;
        }
 
        if (temp & 0x10) {
-               result = -1;
+               result = -EIO;
                dev_err(&piix4_adapter.dev, "Error: Failed bus transaction\n");
        }
 
        if (temp & 0x08) {
-               result = -1;
+               result = -EIO;
                dev_dbg(&piix4_adapter.dev, "Bus collision! SMBus may be "
                        "locked until next hard reset. (sorry!)\n");
                /* Clock stops and slave is stuck in mid-transmission */
        }
 
        if (temp & 0x04) {
-               result = -1;
+               result = -ENXIO;
                dev_dbg(&piix4_adapter.dev, "Error: no response!\n");
        }
 
@@ -309,31 +300,29 @@ static int piix4_transaction(void)
        return result;
 }
 
-/* Return -1 on error. */
+/* Return negative errno on error. */
 static s32 piix4_access(struct i2c_adapter * adap, u16 addr,
                 unsigned short flags, char read_write,
                 u8 command, int size, union i2c_smbus_data * data)
 {
        int i, len;
+       int status;
 
        switch (size) {
-       case I2C_SMBUS_PROC_CALL:
-               dev_err(&adap->dev, "I2C_SMBUS_PROC_CALL not supported!\n");
-               return -1;
        case I2C_SMBUS_QUICK:
-               outb_p(((addr & 0x7f) << 1) | (read_write & 0x01),
+               outb_p((addr << 1) | read_write,
                       SMBHSTADD);
                size = PIIX4_QUICK;
                break;
        case I2C_SMBUS_BYTE:
-               outb_p(((addr & 0x7f) << 1) | (read_write & 0x01),
+               outb_p((addr << 1) | read_write,
                       SMBHSTADD);
                if (read_write == I2C_SMBUS_WRITE)
                        outb_p(command, SMBHSTCMD);
                size = PIIX4_BYTE;
                break;
        case I2C_SMBUS_BYTE_DATA:
-               outb_p(((addr & 0x7f) << 1) | (read_write & 0x01),
+               outb_p((addr << 1) | read_write,
                       SMBHSTADD);
                outb_p(command, SMBHSTCMD);
                if (read_write == I2C_SMBUS_WRITE)
@@ -341,7 +330,7 @@ static s32 piix4_access(struct i2c_adapter * adap, u16 addr,
                size = PIIX4_BYTE_DATA;
                break;
        case I2C_SMBUS_WORD_DATA:
-               outb_p(((addr & 0x7f) << 1) | (read_write & 0x01),
+               outb_p((addr << 1) | read_write,
                       SMBHSTADD);
                outb_p(command, SMBHSTCMD);
                if (read_write == I2C_SMBUS_WRITE) {
@@ -351,15 +340,13 @@ static s32 piix4_access(struct i2c_adapter * adap, u16 addr,
                size = PIIX4_WORD_DATA;
                break;
        case I2C_SMBUS_BLOCK_DATA:
-               outb_p(((addr & 0x7f) << 1) | (read_write & 0x01),
+               outb_p((addr << 1) | read_write,
                       SMBHSTADD);
                outb_p(command, SMBHSTCMD);
                if (read_write == I2C_SMBUS_WRITE) {
                        len = data->block[0];
-                       if (len < 0)
-                               len = 0;
-                       if (len > 32)
-                               len = 32;
+                       if (len == 0 || len > I2C_SMBUS_BLOCK_MAX)
+                               return -EINVAL;
                        outb_p(len, SMBHSTDAT0);
                        i = inb_p(SMBHSTCNT);   /* Reset SMBBLKDAT */
                        for (i = 1; i <= len; i++)
@@ -367,12 +354,16 @@ static s32 piix4_access(struct i2c_adapter * adap, u16 addr,
                }
                size = PIIX4_BLOCK_DATA;
                break;
+       default:
+               dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
+               return -EOPNOTSUPP;
        }
 
        outb_p((size & 0x1C) + (ENABLE_INT9 & 1), SMBHSTCNT);
 
-       if (piix4_transaction())        /* Error in transaction */
-               return -1;
+       status = piix4_transaction();
+       if (status)
+               return status;
 
        if ((read_write == I2C_SMBUS_WRITE) || (size == PIIX4_QUICK))
                return 0;
@@ -388,6 +379,8 @@ static s32 piix4_access(struct i2c_adapter * adap, u16 addr,
                break;
        case PIIX4_BLOCK_DATA:
                data->block[0] = inb_p(SMBHSTDAT0);
+               if (data->block[0] == 0 || data->block[0] > I2C_SMBUS_BLOCK_MAX)
+                       return -EPROTO;
                i = inb_p(SMBHSTCNT);   /* Reset SMBBLKDAT */
                for (i = 1; i <= data->block[0]; i++)
                        data->block[i] = inb_p(SMBBLKDAT);
@@ -411,7 +404,7 @@ static const struct i2c_algorithm smbus_algorithm = {
 static struct i2c_adapter piix4_adapter = {
        .owner          = THIS_MODULE,
        .id             = I2C_HW_SMBUS_PIIX4,
-       .class          = I2C_CLASS_HWMON,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .algo           = &smbus_algorithm,
 };
 
index 63b3e2c11cff1a17ac67b2b21b39d49851c5143a..dcf2045b5222b44aac99b568f7170701ecf0ae78 100644 (file)
@@ -622,7 +622,7 @@ static struct i2c_algorithm pmcmsptwi_algo = {
 
 static struct i2c_adapter pmcmsptwi_adapter = {
        .owner          = THIS_MODULE,
-       .class          = I2C_CLASS_HWMON,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .algo           = &pmcmsptwi_algo,
        .name           = DRV_NAME,
 };
diff --git a/drivers/i2c/busses/i2c-prosavage.c b/drivers/i2c/busses/i2c-prosavage.c
deleted file mode 100644 (file)
index 07c1f1e..0000000
+++ /dev/null
@@ -1,325 +0,0 @@
-/*
- *    kernel/busses/i2c-prosavage.c
- *
- *    i2c bus driver for S3/VIA 8365/8375 graphics processor.
- *    Copyright (c) 2003 Henk Vergonet <henk@god.dyndns.org>
- *    Based on code written by:
- *     Frodo Looijaard <frodol@dds.nl>,
- *     Philip Edelbrock <phil@netroedge.com>,
- *     Ralph Metzler <rjkm@thp.uni-koeln.de>, and
- *     Mark D. Studebaker <mdsxyz123@yahoo.com>
- *     Simon Vogl
- *     and others
- *
- *    Please read the lm_sensors documentation for details on use.
- *
- *    This program is free software; you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation; either version 2 of the License, or
- *    (at your option) any later version.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU General Public License for more details.
- *
- *    You should have received a copy of the GNU General Public License
- *    along with this program; if not, write to the Free Software
- *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-/*  18-05-2003 HVE - created
- *  14-06-2003 HVE - adapted for lm_sensors2
- *  17-06-2003 HVE - linux 2.5.xx compatible
- *  18-06-2003 HVE - codingstyle
- *  21-06-2003 HVE - compatibility lm_sensors2 and linux 2.5.xx
- *                  codingstyle, mmio enabled
- *
- *  This driver interfaces to the I2C bus of the VIA north bridge embedded
- *  ProSavage4/8 devices. Usefull for gaining access to the TV Encoder chips.
- *
- *  Graphics cores:
- *   S3/VIA KM266/VT8375 aka ProSavage8
- *   S3/VIA KM133/VT8365 aka Savage4
- *
- *  Two serial busses are implemented:
- *   SERIAL1 - I2C serial communications interface
- *   SERIAL2 - DDC2 monitor communications interface
- *
- *  Tested on a FX41 mainboard, see http://www.shuttle.com
- * 
- *
- *  TODO:
- *  - integration with prosavage framebuffer device
- *    (Additional documentation needed :(
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/i2c.h>
-#include <linux/i2c-algo-bit.h>
-#include <asm/io.h>
-
-/*
- * driver configuration
- */
-#define MAX_BUSSES     2
-
-struct s_i2c_bus {
-       void __iomem *mmvga;
-       int     i2c_reg;
-       int     adap_ok;
-       struct i2c_adapter              adap;
-       struct i2c_algo_bit_data        algo;
-};
-
-struct s_i2c_chip {
-       void __iomem *mmio;
-       struct s_i2c_bus        i2c_bus[MAX_BUSSES];
-};
-
-
-/*
- * i2c configuration
- */
-#define CYCLE_DELAY    10
-#define TIMEOUT                (HZ / 2)
-
-
-/* 
- * S3/VIA 8365/8375 registers
- */
-#define VGA_CR_IX      0x3d4
-#define VGA_CR_DATA    0x3d5
-
-#define CR_SERIAL1     0xa0    /* I2C serial communications interface */
-#define MM_SERIAL1     0xff20
-#define CR_SERIAL2     0xb1    /* DDC2 monitor communications interface */
-
-/* based on vt8365 documentation */
-#define I2C_ENAB       0x10
-#define I2C_SCL_OUT    0x01
-#define I2C_SDA_OUT    0x02
-#define I2C_SCL_IN     0x04
-#define I2C_SDA_IN     0x08
-
-#define SET_CR_IX(p, val)      writeb((val), (p)->mmvga + VGA_CR_IX)
-#define SET_CR_DATA(p, val)    writeb((val), (p)->mmvga + VGA_CR_DATA)
-#define GET_CR_DATA(p)         readb((p)->mmvga + VGA_CR_DATA)
-
-
-/*
- * Serial bus line handling
- *
- * serial communications register as parameter in private data
- *
- * TODO: locks with other code sections accessing video registers?
- */
-static void bit_s3via_setscl(void *bus, int val)
-{
-       struct s_i2c_bus *p = (struct s_i2c_bus *)bus;
-       unsigned int r;
-
-       SET_CR_IX(p, p->i2c_reg);
-       r = GET_CR_DATA(p);
-       r |= I2C_ENAB;
-       if (val) {
-               r |= I2C_SCL_OUT;
-       } else {
-               r &= ~I2C_SCL_OUT;
-       }
-       SET_CR_DATA(p, r);
-}
-
-static void bit_s3via_setsda(void *bus, int val)
-{
-       struct s_i2c_bus *p = (struct s_i2c_bus *)bus;
-       unsigned int r;
-       
-       SET_CR_IX(p, p->i2c_reg);
-       r = GET_CR_DATA(p);
-       r |= I2C_ENAB;
-       if (val) {
-               r |= I2C_SDA_OUT;
-       } else {
-               r &= ~I2C_SDA_OUT;
-       }
-       SET_CR_DATA(p, r);
-}
-
-static int bit_s3via_getscl(void *bus)
-{
-       struct s_i2c_bus *p = (struct s_i2c_bus *)bus;
-
-       SET_CR_IX(p, p->i2c_reg);
-       return (0 != (GET_CR_DATA(p) & I2C_SCL_IN));
-}
-
-static int bit_s3via_getsda(void *bus)
-{
-       struct s_i2c_bus *p = (struct s_i2c_bus *)bus;
-
-       SET_CR_IX(p, p->i2c_reg);
-       return (0 != (GET_CR_DATA(p) & I2C_SDA_IN));
-}
-
-
-/*
- * adapter initialisation
- */
-static int i2c_register_bus(struct pci_dev *dev, struct s_i2c_bus *p, void __iomem *mmvga, u32 i2c_reg)
-{
-       int ret;
-       p->adap.owner     = THIS_MODULE;
-       p->adap.id        = I2C_HW_B_S3VIA;
-       p->adap.algo_data = &p->algo;
-       p->adap.dev.parent = &dev->dev;
-       p->algo.setsda    = bit_s3via_setsda;
-       p->algo.setscl    = bit_s3via_setscl;
-       p->algo.getsda    = bit_s3via_getsda;
-       p->algo.getscl    = bit_s3via_getscl;
-       p->algo.udelay    = CYCLE_DELAY;
-       p->algo.timeout   = TIMEOUT;
-       p->algo.data      = p;
-       p->mmvga          = mmvga;
-       p->i2c_reg        = i2c_reg;
-    
-       ret = i2c_bit_add_bus(&p->adap);
-       if (ret) {
-               return ret;
-       }
-
-       p->adap_ok = 1;
-       return 0;
-}
-
-
-/*
- * Cleanup stuff
- */
-static void prosavage_remove(struct pci_dev *dev)
-{
-       struct s_i2c_chip *chip;
-       int i, ret;
-
-       chip = (struct s_i2c_chip *)pci_get_drvdata(dev);
-
-       if (!chip) {
-               return;
-       }
-       for (i = MAX_BUSSES - 1; i >= 0; i--) {
-               if (chip->i2c_bus[i].adap_ok == 0)
-                       continue;
-
-               ret = i2c_del_adapter(&chip->i2c_bus[i].adap);
-               if (ret) {
-                       dev_err(&dev->dev, "%s not removed\n",
-                               chip->i2c_bus[i].adap.name);
-               }
-       }
-       if (chip->mmio) {
-               iounmap(chip->mmio);
-       }
-       kfree(chip);
-}
-
-
-/*
- * Detect chip and initialize it
- */
-static int __devinit prosavage_probe(struct pci_dev *dev, const struct pci_device_id *id)
-{
-       int ret;
-       unsigned long base, len;
-       struct s_i2c_chip *chip;
-       struct s_i2c_bus  *bus;
-
-       pci_set_drvdata(dev, kzalloc(sizeof(struct s_i2c_chip), GFP_KERNEL));
-       chip = (struct s_i2c_chip *)pci_get_drvdata(dev);
-       if (chip == NULL) {
-               return -ENOMEM;
-       }
-
-       base = dev->resource[0].start & PCI_BASE_ADDRESS_MEM_MASK;
-       len  = dev->resource[0].end - base + 1;
-       chip->mmio = ioremap_nocache(base, len);
-
-       if (chip->mmio == NULL) {
-               dev_err(&dev->dev, "ioremap failed\n");
-               prosavage_remove(dev);
-               return -ENODEV;
-       }
-
-
-       /*
-        * Chip initialisation
-        */
-       /* Unlock Extended IO Space ??? */
-
-
-       /*
-        * i2c bus registration
-        */
-       bus = &chip->i2c_bus[0];
-       snprintf(bus->adap.name, sizeof(bus->adap.name),
-               "ProSavage I2C bus at %02x:%02x.%x",
-               dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
-       ret = i2c_register_bus(dev, bus, chip->mmio + 0x8000, CR_SERIAL1);
-       if (ret) {
-               goto err_adap;
-       }
-       /*
-        * ddc bus registration
-        */
-       bus = &chip->i2c_bus[1];
-       snprintf(bus->adap.name, sizeof(bus->adap.name),
-               "ProSavage DDC bus at %02x:%02x.%x",
-               dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
-       ret = i2c_register_bus(dev, bus, chip->mmio + 0x8000, CR_SERIAL2);
-       if (ret) {
-               goto err_adap;
-       }
-       return 0;
-err_adap:
-       dev_err(&dev->dev, "%s failed\n", bus->adap.name);
-       prosavage_remove(dev);
-       return ret;
-}
-
-
-/*
- * Data for PCI driver interface
- */
-static struct pci_device_id prosavage_pci_tbl[] = {
-       { PCI_DEVICE(PCI_VENDOR_ID_S3, PCI_DEVICE_ID_S3_SAVAGE4) },
-       { PCI_DEVICE(PCI_VENDOR_ID_S3, PCI_DEVICE_ID_S3_PROSAVAGE8) },
-       { 0, },
-};
-
-MODULE_DEVICE_TABLE (pci, prosavage_pci_tbl);
-
-static struct pci_driver prosavage_driver = {
-       .name           =       "prosavage_smbus",
-       .id_table       =       prosavage_pci_tbl,
-       .probe          =       prosavage_probe,
-       .remove         =       prosavage_remove,
-};
-
-static int __init i2c_prosavage_init(void)
-{
-       return pci_register_driver(&prosavage_driver);
-}
-
-static void __exit i2c_prosavage_exit(void)
-{
-       pci_unregister_driver(&prosavage_driver);
-}
-
-MODULE_DEVICE_TABLE(pci, prosavage_pci_tbl);
-MODULE_AUTHOR("Henk Vergonet");
-MODULE_DESCRIPTION("ProSavage VIA 8365/8375 smbus driver");
-MODULE_LICENSE("GPL");
-
-module_init (i2c_prosavage_init);
-module_exit (i2c_prosavage_exit);
index dde6ce963a1963efeeeecb284ad4031a0275bc2d..af9e6034d7fbda7c51ce0925a7a0707d974d82a1 100644 (file)
@@ -1104,5 +1104,5 @@ static void __exit i2c_adap_pxa_exit(void)
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:pxa2xx-i2c");
 
-module_init(i2c_adap_pxa_init);
+subsys_initcall(i2c_adap_pxa_init);
 module_exit(i2c_adap_pxa_exit);
index 9e8c875437bec533ec4e32579602f31f4336bbe9..007390ad9810555b0fe7f2ba6077445601ec919b 100644 (file)
@@ -590,7 +590,7 @@ static struct s3c24xx_i2c s3c24xx_i2c = {
                .owner                  = THIS_MODULE,
                .algo                   = &s3c24xx_i2c_algorithm,
                .retries                = 2,
-               .class                  = I2C_CLASS_HWMON,
+               .class                  = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        },
 };
 
diff --git a/drivers/i2c/busses/i2c-savage4.c b/drivers/i2c/busses/i2c-savage4.c
deleted file mode 100644 (file)
index 8adf4ab..0000000
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
-    i2c-savage4.c - Part of lm_sensors, Linux kernel modules for hardware
-              monitoring
-    Copyright (C) 1998-2003  The LM Sensors Team
-    Alexander Wold <awold@bigfoot.com>
-    Mark D. Studebaker <mdsxyz123@yahoo.com>
-    
-    Based on i2c-voodoo3.c.
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-/* This interfaces to the I2C bus of the Savage4 to gain access to
-   the BT869 and possibly other I2C devices. The DDC bus is not
-   yet supported because its register is not memory-mapped.
-*/
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/i2c.h>
-#include <linux/i2c-algo-bit.h>
-#include <asm/io.h>
-
-/* device IDs */
-#define PCI_CHIP_SAVAGE4       0x8A22
-#define PCI_CHIP_SAVAGE2000    0x9102
-
-#define REG                    0xff20  /* Serial Port 1 Register */
-
-/* bit locations in the register */
-#define I2C_ENAB               0x00000020
-#define I2C_SCL_OUT            0x00000001
-#define I2C_SDA_OUT            0x00000002
-#define I2C_SCL_IN             0x00000008
-#define I2C_SDA_IN             0x00000010
-
-/* delays */
-#define CYCLE_DELAY            10
-#define TIMEOUT                        (HZ / 2)
-
-
-static void __iomem *ioaddr;
-
-/* The sav GPIO registers don't have individual masks for each bit
-   so we always have to read before writing. */
-
-static void bit_savi2c_setscl(void *data, int val)
-{
-       unsigned int r;
-       r = readl(ioaddr + REG);
-       if(val)
-               r |= I2C_SCL_OUT;
-       else
-               r &= ~I2C_SCL_OUT;
-       writel(r, ioaddr + REG);
-       readl(ioaddr + REG);    /* flush posted write */
-}
-
-static void bit_savi2c_setsda(void *data, int val)
-{
-       unsigned int r;
-       r = readl(ioaddr + REG);
-       if(val)
-               r |= I2C_SDA_OUT;
-       else
-               r &= ~I2C_SDA_OUT;
-       writel(r, ioaddr + REG);
-       readl(ioaddr + REG);    /* flush posted write */
-}
-
-/* The GPIO pins are open drain, so the pins always remain outputs.
-   We rely on the i2c-algo-bit routines to set the pins high before
-   reading the input from other chips. */
-
-static int bit_savi2c_getscl(void *data)
-{
-       return (0 != (readl(ioaddr + REG) & I2C_SCL_IN));
-}
-
-static int bit_savi2c_getsda(void *data)
-{
-       return (0 != (readl(ioaddr + REG) & I2C_SDA_IN));
-}
-
-/* Configures the chip */
-
-static int config_s4(struct pci_dev *dev)
-{
-       unsigned long cadr;
-
-       /* map memory */
-       cadr = dev->resource[0].start;
-       cadr &= PCI_BASE_ADDRESS_MEM_MASK;
-       ioaddr = ioremap_nocache(cadr, 0x0080000);
-       if (ioaddr) {
-               /* writel(0x8160, ioaddr + REG2); */
-               writel(0x00000020, ioaddr + REG);
-               dev_info(&dev->dev, "Using Savage4 at %p\n", ioaddr);
-               return 0;
-       }
-       return -ENODEV;
-}
-
-static struct i2c_algo_bit_data sav_i2c_bit_data = {
-       .setsda         = bit_savi2c_setsda,
-       .setscl         = bit_savi2c_setscl,
-       .getsda         = bit_savi2c_getsda,
-       .getscl         = bit_savi2c_getscl,
-       .udelay         = CYCLE_DELAY,
-       .timeout        = TIMEOUT
-};
-
-static struct i2c_adapter savage4_i2c_adapter = {
-       .owner          = THIS_MODULE,
-       .id             = I2C_HW_B_SAVAGE,
-       .name           = "I2C Savage4 adapter",
-       .algo_data      = &sav_i2c_bit_data,
-};
-
-static struct pci_device_id savage4_ids[] __devinitdata = {
-       { PCI_DEVICE(PCI_VENDOR_ID_S3, PCI_CHIP_SAVAGE4) },
-       { PCI_DEVICE(PCI_VENDOR_ID_S3, PCI_CHIP_SAVAGE2000) },
-       { 0, }
-};
-
-MODULE_DEVICE_TABLE (pci, savage4_ids);
-
-static int __devinit savage4_probe(struct pci_dev *dev, const struct pci_device_id *id)
-{
-       int retval;
-
-       retval = config_s4(dev);
-       if (retval)
-               return retval;
-
-       /* set up the sysfs linkage to our parent device */
-       savage4_i2c_adapter.dev.parent = &dev->dev;
-
-       return i2c_bit_add_bus(&savage4_i2c_adapter);
-}
-
-static void __devexit savage4_remove(struct pci_dev *dev)
-{
-       i2c_del_adapter(&savage4_i2c_adapter);
-       iounmap(ioaddr);
-}
-
-static struct pci_driver savage4_driver = {
-       .name           = "savage4_smbus",
-       .id_table       = savage4_ids,
-       .probe          = savage4_probe,
-       .remove         = __devexit_p(savage4_remove),
-};
-
-static int __init i2c_savage4_init(void)
-{
-       return pci_register_driver(&savage4_driver);
-}
-
-static void __exit i2c_savage4_exit(void)
-{
-       pci_unregister_driver(&savage4_driver);
-}
-
-MODULE_AUTHOR("Alexander Wold <awold@bigfoot.com> "
-               "and Mark D. Studebaker <mdsxyz123@yahoo.com>");
-MODULE_DESCRIPTION("Savage4 I2C/SMBus driver");
-MODULE_LICENSE("GPL");
-
-module_init(i2c_savage4_init);
-module_exit(i2c_savage4_exit);
index 114634da6c6e9f3a7bd7ca77bf04476fc1c2818c..4ddefbf238e97f797a5a4627d7887ab4357b3af2 100644 (file)
@@ -143,7 +143,7 @@ static int __init i2c_sibyte_add_bus(struct i2c_adapter *i2c_adap, int speed)
        csr_out32(speed, SMB_CSR(adap,R_SMB_FREQ));
        csr_out32(0, SMB_CSR(adap,R_SMB_CONTROL));
 
-       return i2c_add_adapter(i2c_adap);
+       return i2c_add_numbered_adapter(i2c_adap);
 }
 
 
@@ -156,17 +156,19 @@ static struct i2c_adapter sibyte_board_adapter[2] = {
        {
                .owner          = THIS_MODULE,
                .id             = I2C_HW_SIBYTE,
-               .class          = I2C_CLASS_HWMON,
+               .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
                .algo           = NULL,
                .algo_data      = &sibyte_board_data[0],
+               .nr             = 0,
                .name           = "SiByte SMBus 0",
        },
        {
                .owner          = THIS_MODULE,
                .id             = I2C_HW_SIBYTE,
-               .class          = I2C_CLASS_HWMON,
+               .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
                .algo           = NULL,
                .algo_data      = &sibyte_board_data[1],
+               .nr             = 1,
                .name           = "SiByte SMBus 1",
        },
 };
index 9ca8f9155f958c40378a50770eb3327a0702c2e6..dfc2d5eb6a6895f437abcbb7f19ada78f815e618 100644 (file)
@@ -1,6 +1,4 @@
 /*
-    sis5595.c - Part of lm_sensors, Linux kernel modules for hardware
-              monitoring
     Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl> and
     Philip Edelbrock <phil@netroedge.com>
 
@@ -62,6 +60,7 @@
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/i2c.h>
+#include <linux/acpi.h>
 #include <asm/io.h>
 
 static int blacklist[] = {
@@ -174,6 +173,11 @@ static int sis5595_setup(struct pci_dev *SIS5595_dev)
 
        /* NB: We grab just the two SMBus registers here, but this may still
         * interfere with ACPI :-(  */
+       retval = acpi_check_region(sis5595_base + SMB_INDEX, 2,
+                                  sis5595_driver.name);
+       if (retval)
+               return retval;
+
        if (!request_region(sis5595_base + SMB_INDEX, 2,
                            sis5595_driver.name)) {
                dev_err(&SIS5595_dev->dev, "SMBus registers 0x%04x-0x%04x already in use!\n",
@@ -236,7 +240,7 @@ static int sis5595_transaction(struct i2c_adapter *adap)
                sis5595_write(SMB_STS_HI, temp >> 8);
                if ((temp = sis5595_read(SMB_STS_LO) + (sis5595_read(SMB_STS_HI) << 8)) != 0x00) {
                        dev_dbg(&adap->dev, "Failed! (%02x)\n", temp);
-                       return -1;
+                       return -EBUSY;
                } else {
                        dev_dbg(&adap->dev, "Successful!\n");
                }
@@ -254,19 +258,19 @@ static int sis5595_transaction(struct i2c_adapter *adap)
        /* If the SMBus is still busy, we give up */
        if (timeout >= MAX_TIMEOUT) {
                dev_dbg(&adap->dev, "SMBus Timeout!\n");
-               result = -1;
+               result = -ETIMEDOUT;
        }
 
        if (temp & 0x10) {
                dev_dbg(&adap->dev, "Error: Failed bus transaction\n");
-               result = -1;
+               result = -ENXIO;
        }
 
        if (temp & 0x20) {
                dev_err(&adap->dev, "Bus collision! SMBus may be locked until "
                        "next hard reset (or not...)\n");
                /* Clock stops and slave is stuck in mid-transmission */
-               result = -1;
+               result = -EIO;
        }
 
        temp = sis5595_read(SMB_STS_LO) + (sis5595_read(SMB_STS_HI) << 8);
@@ -282,11 +286,13 @@ static int sis5595_transaction(struct i2c_adapter *adap)
        return result;
 }
 
-/* Return -1 on error. */
+/* Return negative errno on error. */
 static s32 sis5595_access(struct i2c_adapter *adap, u16 addr,
                          unsigned short flags, char read_write,
                          u8 command, int size, union i2c_smbus_data *data)
 {
+       int status;
+
        switch (size) {
        case I2C_SMBUS_QUICK:
                sis5595_write(SMB_ADDR, ((addr & 0x7f) << 1) | (read_write & 0x01));
@@ -318,13 +324,14 @@ static s32 sis5595_access(struct i2c_adapter *adap, u16 addr,
                break;
        default:
                dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
-               return -1;
+               return -EOPNOTSUPP;
        }
 
        sis5595_write(SMB_CTL_LO, ((size & 0x0E)));
 
-       if (sis5595_transaction(adap))
-               return -1;
+       status = sis5595_transaction(adap);
+       if (status)
+               return status;
 
        if ((size != SIS5595_PROC_CALL) &&
            ((read_write == I2C_SMBUS_WRITE) || (size == SIS5595_QUICK)))
@@ -359,7 +366,7 @@ static const struct i2c_algorithm smbus_algorithm = {
 static struct i2c_adapter sis5595_adapter = {
        .owner          = THIS_MODULE,
        .id             = I2C_HW_SMBUS_SIS5595,
-       .class          = I2C_CLASS_HWMON,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .algo           = &smbus_algorithm,
 };
 
index 3765dd7f450f06e7c71f6623d67795c9e4cc056d..e7c4b790da5427393783405f6da6271b120efd64 100644 (file)
@@ -1,7 +1,4 @@
 /*
-    i2c-sis630.c - Part of lm_sensors, Linux kernel modules for hardware
-              monitoring
-
     Copyright (c) 2002,2003 Alexander Malysh <amalysh@web.de>
 
     This program is free software; you can redistribute it and/or modify
@@ -55,6 +52,7 @@
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/i2c.h>
+#include <linux/acpi.h>
 #include <asm/io.h>
 
 /* SIS630 SMBus registers */
@@ -134,7 +132,7 @@ static int sis630_transaction_start(struct i2c_adapter *adap, int size, u8 *oldc
 
                if ((temp = sis630_read(SMB_CNT) & 0x03) != 0x00) {
                        dev_dbg(&adap->dev, "Failed! (%02x)\n", temp);
-                       return -1;
+                       return -EBUSY;
                 } else {
                        dev_dbg(&adap->dev, "Successful!\n");
                }
@@ -177,17 +175,17 @@ static int sis630_transaction_wait(struct i2c_adapter *adap, int size)
        /* If the SMBus is still busy, we give up */
        if (timeout >= MAX_TIMEOUT) {
                dev_dbg(&adap->dev, "SMBus Timeout!\n");
-               result = -1;
+               result = -ETIMEDOUT;
        }
 
        if (temp & 0x02) {
                dev_dbg(&adap->dev, "Error: Failed bus transaction\n");
-               result = -1;
+               result = -ENXIO;
        }
 
        if (temp & 0x04) {
                dev_err(&adap->dev, "Bus collision!\n");
-               result = -1;
+               result = -EIO;
                /*
                  TBD: Datasheet say:
                  the software should clear this bit and restart SMBUS operation.
@@ -250,8 +248,10 @@ static int sis630_block_data(struct i2c_adapter *adap, union i2c_smbus_data *dat
                        if (i==8 || (len<8 && i==len)) {
                                dev_dbg(&adap->dev, "start trans len=%d i=%d\n",len ,i);
                                /* first transaction */
-                               if (sis630_transaction_start(adap, SIS630_BLOCK_DATA, &oldclock))
-                                       return -1;
+                               rc = sis630_transaction_start(adap,
+                                               SIS630_BLOCK_DATA, &oldclock);
+                               if (rc)
+                                       return rc;
                        }
                        else if ((i-1)%8 == 7 || i==len) {
                                dev_dbg(&adap->dev, "trans_wait len=%d i=%d\n",len,i);
@@ -264,9 +264,10 @@ static int sis630_block_data(struct i2c_adapter *adap, union i2c_smbus_data *dat
                                        */
                                        sis630_write(SMB_STS,0x10);
                                }
-                               if (sis630_transaction_wait(adap, SIS630_BLOCK_DATA)) {
+                               rc = sis630_transaction_wait(adap,
+                                               SIS630_BLOCK_DATA);
+                               if (rc) {
                                        dev_dbg(&adap->dev, "trans_wait failed\n");
-                                       rc = -1;
                                        break;
                                }
                        }
@@ -275,13 +276,14 @@ static int sis630_block_data(struct i2c_adapter *adap, union i2c_smbus_data *dat
        else {
                /* read request */
                data->block[0] = len = 0;
-               if (sis630_transaction_start(adap, SIS630_BLOCK_DATA, &oldclock)) {
-                       return -1;
-               }
+               rc = sis630_transaction_start(adap,
+                               SIS630_BLOCK_DATA, &oldclock);
+               if (rc)
+                       return rc;
                do {
-                       if (sis630_transaction_wait(adap, SIS630_BLOCK_DATA)) {
+                       rc = sis630_transaction_wait(adap, SIS630_BLOCK_DATA);
+                       if (rc) {
                                dev_dbg(&adap->dev, "trans_wait failed\n");
-                               rc = -1;
                                break;
                        }
                        /* if this first transaction then read byte count */
@@ -311,11 +313,13 @@ static int sis630_block_data(struct i2c_adapter *adap, union i2c_smbus_data *dat
        return rc;
 }
 
-/* Return -1 on error. */
+/* Return negative errno on error. */
 static s32 sis630_access(struct i2c_adapter *adap, u16 addr,
                         unsigned short flags, char read_write,
                         u8 command, int size, union i2c_smbus_data *data)
 {
+       int status;
+
        switch (size) {
                case I2C_SMBUS_QUICK:
                        sis630_write(SMB_ADDR, ((addr & 0x7f) << 1) | (read_write & 0x01));
@@ -350,13 +354,14 @@ static s32 sis630_access(struct i2c_adapter *adap, u16 addr,
                        size = SIS630_BLOCK_DATA;
                        return sis630_block_data(adap, data, read_write);
                default:
-                       printk("Unsupported I2C size\n");
-                       return -1;
-                       break;
+                       dev_warn(&adap->dev, "Unsupported transaction %d\n",
+                                size);
+                       return -EOPNOTSUPP;
        }
 
-       if (sis630_transaction(adap, size))
-               return -1;
+       status = sis630_transaction(adap, size);
+       if (status)
+               return status;
 
        if ((size != SIS630_PCALL) &&
                ((read_write == I2C_SMBUS_WRITE) || (size == SIS630_QUICK))) {
@@ -372,9 +377,6 @@ static s32 sis630_access(struct i2c_adapter *adap, u16 addr,
                case SIS630_WORD_DATA:
                        data->word = sis630_read(SMB_BYTE) + (sis630_read(SMB_BYTE + 1) << 8);
                        break;
-               default:
-                       return -1;
-                       break;
        }
 
        return 0;
@@ -433,6 +435,11 @@ static int sis630_setup(struct pci_dev *sis630_dev)
 
        dev_dbg(&sis630_dev->dev, "ACPI base at 0x%04x\n", acpi_base);
 
+       retval = acpi_check_region(acpi_base + SMB_STS, SIS630_SMB_IOREGION,
+                                  sis630_driver.name);
+       if (retval)
+               goto exit;
+
        /* Everything is happy, let's grab the memory and set things up. */
        if (!request_region(acpi_base + SMB_STS, SIS630_SMB_IOREGION,
                            sis630_driver.name)) {
@@ -458,7 +465,7 @@ static const struct i2c_algorithm smbus_algorithm = {
 static struct i2c_adapter sis630_adapter = {
        .owner          = THIS_MODULE,
        .id             = I2C_HW_SMBUS_SIS630,
-       .class          = I2C_CLASS_HWMON,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .algo           = &smbus_algorithm,
 };
 
index dc235bb8e24d448ae5f815ff45314bdea789d740..f1bba639664171a805bedf75bac802f71cef0927 100644 (file)
@@ -1,7 +1,4 @@
 /*
-    sis96x.c - Part of lm_sensors, Linux kernel modules for hardware
-              monitoring
-
     Copyright (c) 2003 Mark M. Hoffman <mhoffman@lightlink.com>
 
     This program is free software; you can redistribute it and/or modify
@@ -40,6 +37,7 @@
 #include <linux/ioport.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
+#include <linux/acpi.h>
 #include <asm/io.h>
 
 /* base address register in PCI config space */
@@ -111,7 +109,7 @@ static int sis96x_transaction(int size)
                /* check it again */
                if (((temp = sis96x_read(SMB_CNT)) & 0x03) != 0x00) {
                        dev_dbg(&sis96x_adapter.dev, "Failed (0x%02x)\n", temp);
-                       return -1;
+                       return -EBUSY;
                } else {
                        dev_dbg(&sis96x_adapter.dev, "Successful\n");
                }
@@ -136,19 +134,19 @@ static int sis96x_transaction(int size)
        /* If the SMBus is still busy, we give up */
        if (timeout >= MAX_TIMEOUT) {
                dev_dbg(&sis96x_adapter.dev, "SMBus Timeout! (0x%02x)\n", temp);
-               result = -1;
+               result = -ETIMEDOUT;
        }
 
        /* device error - probably missing ACK */
        if (temp & 0x02) {
                dev_dbg(&sis96x_adapter.dev, "Failed bus transaction!\n");
-               result = -1;
+               result = -ENXIO;
        }
 
        /* bus collision */
        if (temp & 0x04) {
                dev_dbg(&sis96x_adapter.dev, "Bus collision!\n");
-               result = -1;
+               result = -EIO;
        }
 
        /* Finish up by resetting the bus */
@@ -161,11 +159,12 @@ static int sis96x_transaction(int size)
        return result;
 }
 
-/* Return -1 on error. */
+/* Return negative errno on error. */
 static s32 sis96x_access(struct i2c_adapter * adap, u16 addr,
                         unsigned short flags, char read_write,
                         u8 command, int size, union i2c_smbus_data * data)
 {
+       int status;
 
        switch (size) {
        case I2C_SMBUS_QUICK:
@@ -200,20 +199,14 @@ static s32 sis96x_access(struct i2c_adapter * adap, u16 addr,
                        SIS96x_PROC_CALL : SIS96x_WORD_DATA);
                break;
 
-       case I2C_SMBUS_BLOCK_DATA:
-               /* TO DO: */
-               dev_info(&adap->dev, "SMBus block not implemented!\n");
-               return -1;
-               break;
-
        default:
-               dev_info(&adap->dev, "Unsupported I2C size\n");
-               return -1;
-               break;
+               dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
+               return -EOPNOTSUPP;
        }
 
-       if (sis96x_transaction(size))
-               return -1;
+       status = sis96x_transaction(size);
+       if (status)
+               return status;
 
        if ((size != SIS96x_PROC_CALL) &&
                ((read_write == I2C_SMBUS_WRITE) || (size == SIS96x_QUICK)))
@@ -249,7 +242,7 @@ static const struct i2c_algorithm smbus_algorithm = {
 static struct i2c_adapter sis96x_adapter = {
        .owner          = THIS_MODULE,
        .id             = I2C_HW_SMBUS_SIS96X,
-       .class          = I2C_CLASS_HWMON,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .algo           = &smbus_algorithm,
 };
 
@@ -286,6 +279,10 @@ static int __devinit sis96x_probe(struct pci_dev *dev,
        dev_info(&dev->dev, "SiS96x SMBus base address: 0x%04x\n",
                        sis96x_smbus_base);
 
+       retval = acpi_check_resource_conflict(&dev->resource[SIS96x_BAR]);
+       if (retval)
+               return retval;
+
        /* Everything is happy, let's grab the memory and set things up. */
        if (!request_region(sis96x_smbus_base, SMB_IOSIZE,
                            sis96x_driver.name)) {
index d08eeec53913ce9759a74e82b984bdc06dcf18b8..1b7b2af94036ca448ff226f49bd1e14ff45068c4 100644 (file)
@@ -43,7 +43,7 @@ struct stub_chip {
 
 static struct stub_chip *stub_chips;
 
-/* Return -1 on error. */
+/* Return negative errno on error. */
 static s32 stub_xfer(struct i2c_adapter * adap, u16 addr, unsigned short flags,
        char read_write, u8 command, int size, union i2c_smbus_data * data)
 {
@@ -120,7 +120,7 @@ static s32 stub_xfer(struct i2c_adapter * adap, u16 addr, unsigned short flags,
 
        default:
                dev_dbg(&adap->dev, "Unsupported I2C/SMBus command\n");
-               ret = -1;
+               ret = -EOPNOTSUPP;
                break;
        } /* switch (size) */
 
@@ -140,7 +140,7 @@ static const struct i2c_algorithm smbus_algorithm = {
 
 static struct i2c_adapter stub_adapter = {
        .owner          = THIS_MODULE,
-       .class          = I2C_CLASS_HWMON,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .algo           = &smbus_algorithm,
        .name           = "SMBus stub driver",
 };
index de9db49e54d93d8785f0beef0df4c5df28036a12..224aa12ee7c85d880118db61cac79a2b564e6af1 100644 (file)
@@ -96,9 +96,8 @@ static int taos_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
                        sprintf(p, "$%02X", command);
                break;
        default:
-               dev_dbg(&adapter->dev, "Unsupported transaction size %d\n",
-                       size);
-               return -EINVAL;
+               dev_warn(&adapter->dev, "Unsupported transaction %d\n", size);
+               return -EOPNOTSUPP;
        }
 
        /* Send the transaction to the TAOS EVM */
index 61716f6b14dcd2435c1b65f0fe27e05fc4f090cb..29cef0433f34d8a445078e695c600461bdfc0cc6 100644 (file)
@@ -1,7 +1,4 @@
 /*
-    i2c-via.c - Part of lm_sensors,  Linux kernel modules
-                for hardware monitoring
-
     i2c Support for Via Technologies 82C586B South Bridge
 
     Copyright (c) 1998, 1999 Kyösti Mälkki <kmalkki@cc.hut.fi>
@@ -87,7 +84,7 @@ static struct i2c_algo_bit_data bit_data = {
 static struct i2c_adapter vt586b_adapter = {
        .owner          = THIS_MODULE,
        .id             = I2C_HW_B_VIA,
-       .class          = I2C_CLASS_HWMON,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .name           = "VIA i2c",
        .algo_data      = &bit_data,
 };
index 77b13d027f86336a08eda1d6e8110131aff1d84e..862eb352a2d92b9708cdad16adfec687fdc4dece 100644 (file)
@@ -1,6 +1,4 @@
 /*
-    i2c-viapro.c - Part of lm_sensors, Linux kernel modules for hardware
-              monitoring
     Copyright (c) 1998 - 2002  Frodo Looijaard <frodol@dds.nl>,
     Philip Edelbrock <phil@netroedge.com>, Kyösti Mälkki <kmalkki@cc.hut.fi>,
     Mark D. Studebaker <mdsxyz123@yahoo.com>
@@ -50,6 +48,7 @@
 #include <linux/ioport.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
+#include <linux/acpi.h>
 #include <asm/io.h>
 
 static struct pci_dev *vt596_pdev;
@@ -152,7 +151,7 @@ static int vt596_transaction(u8 size)
                if ((temp = inb_p(SMBHSTSTS)) & 0x1F) {
                        dev_err(&vt596_adapter.dev, "SMBus reset failed! "
                                "(0x%02x)\n", temp);
-                       return -1;
+                       return -EBUSY;
                }
        }
 
@@ -167,24 +166,24 @@ static int vt596_transaction(u8 size)
 
        /* If the SMBus is still busy, we give up */
        if (timeout >= MAX_TIMEOUT) {
-               result = -1;
+               result = -ETIMEDOUT;
                dev_err(&vt596_adapter.dev, "SMBus timeout!\n");
        }
 
        if (temp & 0x10) {
-               result = -1;
+               result = -EIO;
                dev_err(&vt596_adapter.dev, "Transaction failed (0x%02x)\n",
                        size);
        }
 
        if (temp & 0x08) {
-               result = -1;
+               result = -EIO;
                dev_err(&vt596_adapter.dev, "SMBus collision!\n");
        }
 
        if (temp & 0x04) {
                int read = inb_p(SMBHSTADD) & 0x01;
-               result = -1;
+               result = -ENXIO;
                /* The quick and receive byte commands are used to probe
                   for chips, so errors are expected, and we don't want
                   to frighten the user. */
@@ -202,12 +201,13 @@ static int vt596_transaction(u8 size)
        return result;
 }
 
-/* Return -1 on error, 0 on success */
+/* Return negative errno on error, 0 on success */
 static s32 vt596_access(struct i2c_adapter *adap, u16 addr,
                unsigned short flags, char read_write, u8 command,
                int size, union i2c_smbus_data *data)
 {
        int i;
+       int status;
 
        switch (size) {
        case I2C_SMBUS_QUICK:
@@ -258,8 +258,9 @@ static s32 vt596_access(struct i2c_adapter *adap, u16 addr,
 
        outb_p(((addr & 0x7f) << 1) | read_write, SMBHSTADD);
 
-       if (vt596_transaction(size)) /* Error in transaction */
-               return -1;
+       status = vt596_transaction(size);
+       if (status)
+               return status;
 
        if ((read_write == I2C_SMBUS_WRITE) || (size == VT596_QUICK))
                return 0;
@@ -285,9 +286,9 @@ static s32 vt596_access(struct i2c_adapter *adap, u16 addr,
        return 0;
 
 exit_unsupported:
-       dev_warn(&vt596_adapter.dev, "Unsupported command invoked! (0x%02x)\n",
+       dev_warn(&vt596_adapter.dev, "Unsupported transaction %d\n",
                 size);
-       return -1;
+       return -EOPNOTSUPP;
 }
 
 static u32 vt596_func(struct i2c_adapter *adapter)
@@ -309,7 +310,7 @@ static const struct i2c_algorithm smbus_algorithm = {
 static struct i2c_adapter vt596_adapter = {
        .owner          = THIS_MODULE,
        .id             = I2C_HW_SMBUS_VIA2,
-       .class          = I2C_CLASS_HWMON,
+       .class          = I2C_CLASS_HWMON | I2C_CLASS_SPD,
        .algo           = &smbus_algorithm,
 };
 
@@ -354,6 +355,10 @@ static int __devinit vt596_probe(struct pci_dev *pdev,
        }
 
 found:
+       error = acpi_check_region(vt596_smba, 8, vt596_driver.name);
+       if (error)
+               return error;
+
        if (!request_region(vt596_smba, 8, vt596_driver.name)) {
                dev_err(&pdev->dev, "SMBus region 0x%x already in use!\n",
                        vt596_smba);
index 88a3447e11e1eaf47fb9ddc0b29fc0ab8fb0048f..1d4ae26ba73d7d2cab02e02f82d50688f5b99f2f 100644 (file)
@@ -1,6 +1,4 @@
 /*
-    voodoo3.c - Part of lm_sensors, Linux kernel modules for hardware
-              monitoring
     Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl>,
     Philip Edelbrock <phil@netroedge.com>,
     Ralph Metzler <rjkm@thp.uni-koeln.de>, and
index 61abe0f33255291ffa7e274f0aed3a8f68f283d6..ed794b145a115bb4d235ded80ffd66dfc3e67668 100644 (file)
@@ -442,7 +442,7 @@ static __init struct scx200_acb_iface *scx200_create_iface(const char *text,
        adapter->owner = THIS_MODULE;
        adapter->id = I2C_HW_SMBUS_SCX200;
        adapter->algo = &scx200_acb_algorithm;
-       adapter->class = I2C_CLASS_HWMON;
+       adapter->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
        adapter->dev.parent = dev;
 
        mutex_init(&iface->mutex);
index 2da2edfa68ec1ebf0954cfc48b9ea87fd3e50d26..50e0a465374193d6f93d4ceb3e74367c0a0b2ec7 100644 (file)
@@ -14,6 +14,32 @@ config DS1682
          This driver can also be built as a module.  If so, the module
          will be called ds1682.
 
+config AT24
+       tristate "EEPROMs from most vendors"
+       depends on SYSFS && EXPERIMENTAL
+       help
+         Enable this driver to get read/write support to most I2C EEPROMs,
+         after you configure the driver to know about each EEPROM on
+         your target board.  Use these generic chip names, instead of
+         vendor-specific ones like at24c64 or 24lc02:
+
+            24c00, 24c01, 24c02, spd (readonly 24c02), 24c04, 24c08,
+            24c16, 24c32, 24c64, 24c128, 24c256, 24c512, 24c1024
+
+         Unless you like data loss puzzles, always be sure that any chip
+         you configure as a 24c32 (32 kbit) or larger is NOT really a
+         24c16 (16 kbit) or smaller, and vice versa. Marking the chip
+         as read-only won't help recover from this. Also, if your chip
+         has any software write-protect mechanism you may want to review the
+         code to make sure this driver won't turn it on by accident.
+
+         If you use this with an SMBus adapter instead of an I2C adapter,
+         full functionality is not available.  Only smaller devices are
+         supported (24c16 and below, max 4 kByte).
+
+         This driver can also be built as a module.  If so, the module
+         will be called at24.
+
 config SENSORS_EEPROM
        tristate "EEPROM reader"
        depends on EXPERIMENTAL
@@ -26,8 +52,8 @@ config SENSORS_EEPROM
          will be called eeprom.
 
 config SENSORS_PCF8574
-       tristate "Philips PCF8574 and PCF8574A"
-       depends on EXPERIMENTAL
+       tristate "Philips PCF8574 and PCF8574A (DEPRECATED)"
+       depends on EXPERIMENTAL && GPIO_PCF857X = "n"
        default n
        help
          If you say yes here you get support for Philips PCF8574 and 
@@ -36,12 +62,16 @@ config SENSORS_PCF8574
          This driver can also be built as a module.  If so, the module
          will be called pcf8574.
 
+         This driver is deprecated and will be dropped soon. Use
+         drivers/gpio/pcf857x.c instead.
+
          These devices are hard to detect and rarely found on mainstream
          hardware.  If unsure, say N.
 
 config PCF8575
-       tristate "Philips PCF8575"
+       tristate "Philips PCF8575 (DEPRECATED)"
        default n
+       depends on GPIO_PCF857X = "n"
        help
          If you say yes here you get support for Philips PCF8575 chip.
          This chip is a 16-bit I/O expander for the I2C bus.  Several other
@@ -50,12 +80,15 @@ config PCF8575
          This driver can also be built as a module.  If so, the module
          will be called pcf8575.
 
+         This driver is deprecated and will be dropped soon. Use
+         drivers/gpio/pcf857x.c instead.
+
          This device is hard to detect and is rarely found on mainstream
          hardware.  If unsure, say N.
 
 config SENSORS_PCA9539
        tristate "Philips PCA9539 16-bit I/O port (DEPRECATED)"
-       depends on EXPERIMENTAL && GPIO_PCA9539 = "n"
+       depends on EXPERIMENTAL && GPIO_PCA953X = "n"
        help
          If you say yes here you get support for the Philips PCA9539
          16-bit I/O port.
@@ -64,7 +97,7 @@ config SENSORS_PCA9539
          will be called pca9539.
 
          This driver is deprecated and will be dropped soon. Use
-         drivers/gpio/pca9539.c instead.
+         drivers/gpio/pca953x.c instead.
 
 config SENSORS_PCF8591
        tristate "Philips PCF8591"
index e47aca0ca5aebda2bb70bc44d3f625a827301ea2..39e3e69ed1256bf93de0be16b9687bd15eb8e4a9 100644 (file)
@@ -10,6 +10,7 @@
 #
 
 obj-$(CONFIG_DS1682)           += ds1682.o
+obj-$(CONFIG_AT24)             += at24.o
 obj-$(CONFIG_SENSORS_EEPROM)   += eeprom.o
 obj-$(CONFIG_SENSORS_MAX6875)  += max6875.o
 obj-$(CONFIG_SENSORS_PCA9539)  += pca9539.o
diff --git a/drivers/i2c/chips/at24.c b/drivers/i2c/chips/at24.c
new file mode 100644 (file)
index 0000000..e764c94
--- /dev/null
@@ -0,0 +1,583 @@
+/*
+ * at24.c - handle most I2C EEPROMs
+ *
+ * Copyright (C) 2005-2007 David Brownell
+ * Copyright (C) 2008 Wolfram Sang, Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/sysfs.h>
+#include <linux/mod_devicetable.h>
+#include <linux/log2.h>
+#include <linux/bitops.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/i2c/at24.h>
+
+/*
+ * I2C EEPROMs from most vendors are inexpensive and mostly interchangeable.
+ * Differences between different vendor product lines (like Atmel AT24C or
+ * MicroChip 24LC, etc) won't much matter for typical read/write access.
+ * There are also I2C RAM chips, likewise interchangeable. One example
+ * would be the PCF8570, which acts like a 24c02 EEPROM (256 bytes).
+ *
+ * However, misconfiguration can lose data. "Set 16-bit memory address"
+ * to a part with 8-bit addressing will overwrite data. Writing with too
+ * big a page size also loses data. And it's not safe to assume that the
+ * conventional addresses 0x50..0x57 only hold eeproms; a PCF8563 RTC
+ * uses 0x51, for just one example.
+ *
+ * Accordingly, explicit board-specific configuration data should be used
+ * in almost all cases. (One partial exception is an SMBus used to access
+ * "SPD" data for DRAM sticks. Those only use 24c02 EEPROMs.)
+ *
+ * So this driver uses "new style" I2C driver binding, expecting to be
+ * told what devices exist. That may be in arch/X/mach-Y/board-Z.c or
+ * similar kernel-resident tables; or, configuration data coming from
+ * a bootloader.
+ *
+ * Other than binding model, current differences from "eeprom" driver are
+ * that this one handles write access and isn't restricted to 24c02 devices.
+ * It also handles larger devices (32 kbit and up) with two-byte addresses,
+ * which won't work on pure SMBus systems.
+ */
+
+struct at24_data {
+       struct at24_platform_data chip;
+       bool use_smbus;
+
+       /*
+        * Lock protects against activities from other Linux tasks,
+        * but not from changes by other I2C masters.
+        */
+       struct mutex lock;
+       struct bin_attribute bin;
+
+       u8 *writebuf;
+       unsigned write_max;
+       unsigned num_addresses;
+
+       /*
+        * Some chips tie up multiple I2C addresses; dummy devices reserve
+        * them for us, and we'll use them with SMBus calls.
+        */
+       struct i2c_client *client[];
+};
+
+/*
+ * This parameter is to help this driver avoid blocking other drivers out
+ * of I2C for potentially troublesome amounts of time. With a 100 kHz I2C
+ * clock, one 256 byte read takes about 1/43 second which is excessive;
+ * but the 1/170 second it takes at 400 kHz may be quite reasonable; and
+ * at 1 MHz (Fm+) a 1/430 second delay could easily be invisible.
+ *
+ * This value is forced to be a power of two so that writes align on pages.
+ */
+static unsigned io_limit = 128;
+module_param(io_limit, uint, 0);
+MODULE_PARM_DESC(io_limit, "Maximum bytes per I/O (default 128)");
+
+/*
+ * Specs often allow 5 msec for a page write, sometimes 20 msec;
+ * it's important to recover from write timeouts.
+ */
+static unsigned write_timeout = 25;
+module_param(write_timeout, uint, 0);
+MODULE_PARM_DESC(write_timeout, "Time (in ms) to try writes (default 25)");
+
+#define AT24_SIZE_BYTELEN 5
+#define AT24_SIZE_FLAGS 8
+
+#define AT24_BITMASK(x) (BIT(x) - 1)
+
+/* create non-zero magic value for given eeprom parameters */
+#define AT24_DEVICE_MAGIC(_len, _flags)                \
+       ((1 << AT24_SIZE_FLAGS | (_flags))              \
+           << AT24_SIZE_BYTELEN | ilog2(_len))
+
+static const struct i2c_device_id at24_ids[] = {
+       /* needs 8 addresses as A0-A2 are ignored */
+       { "24c00", AT24_DEVICE_MAGIC(128 / 8, AT24_FLAG_TAKE8ADDR) },
+       /* old variants can't be handled with this generic entry! */
+       { "24c01", AT24_DEVICE_MAGIC(1024 / 8, 0) },
+       { "24c02", AT24_DEVICE_MAGIC(2048 / 8, 0) },
+       /* spd is a 24c02 in memory DIMMs */
+       { "spd", AT24_DEVICE_MAGIC(2048 / 8,
+               AT24_FLAG_READONLY | AT24_FLAG_IRUGO) },
+       { "24c04", AT24_DEVICE_MAGIC(4096 / 8, 0) },
+       /* 24rf08 quirk is handled at i2c-core */
+       { "24c08", AT24_DEVICE_MAGIC(8192 / 8, 0) },
+       { "24c16", AT24_DEVICE_MAGIC(16384 / 8, 0) },
+       { "24c32", AT24_DEVICE_MAGIC(32768 / 8, AT24_FLAG_ADDR16) },
+       { "24c64", AT24_DEVICE_MAGIC(65536 / 8, AT24_FLAG_ADDR16) },
+       { "24c128", AT24_DEVICE_MAGIC(131072 / 8, AT24_FLAG_ADDR16) },
+       { "24c256", AT24_DEVICE_MAGIC(262144 / 8, AT24_FLAG_ADDR16) },
+       { "24c512", AT24_DEVICE_MAGIC(524288 / 8, AT24_FLAG_ADDR16) },
+       { "24c1024", AT24_DEVICE_MAGIC(1048576 / 8, AT24_FLAG_ADDR16) },
+       { "at24", 0 },
+       { /* END OF LIST */ }
+};
+MODULE_DEVICE_TABLE(i2c, at24_ids);
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * This routine supports chips which consume multiple I2C addresses. It
+ * computes the addressing information to be used for a given r/w request.
+ * Assumes that sanity checks for offset happened at sysfs-layer.
+ */
+static struct i2c_client *at24_translate_offset(struct at24_data *at24,
+               unsigned *offset)
+{
+       unsigned i;
+
+       if (at24->chip.flags & AT24_FLAG_ADDR16) {
+               i = *offset >> 16;
+               *offset &= 0xffff;
+       } else {
+               i = *offset >> 8;
+               *offset &= 0xff;
+       }
+
+       return at24->client[i];
+}
+
+static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf,
+               unsigned offset, size_t count)
+{
+       struct i2c_msg msg[2];
+       u8 msgbuf[2];
+       struct i2c_client *client;
+       int status, i;
+
+       memset(msg, 0, sizeof(msg));
+
+       /*
+        * REVISIT some multi-address chips don't rollover page reads to
+        * the next slave address, so we may need to truncate the count.
+        * Those chips might need another quirk flag.
+        *
+        * If the real hardware used four adjacent 24c02 chips and that
+        * were misconfigured as one 24c08, that would be a similar effect:
+        * one "eeprom" file not four, but larger reads would fail when
+        * they crossed certain pages.
+        */
+
+       /*
+        * Slave address and byte offset derive from the offset. Always
+        * set the byte address; on a multi-master board, another master
+        * may have changed the chip's "current" address pointer.
+        */
+       client = at24_translate_offset(at24, &offset);
+
+       if (count > io_limit)
+               count = io_limit;
+
+       /* Smaller eeproms can work given some SMBus extension calls */
+       if (at24->use_smbus) {
+               if (count > I2C_SMBUS_BLOCK_MAX)
+                       count = I2C_SMBUS_BLOCK_MAX;
+               status = i2c_smbus_read_i2c_block_data(client, offset,
+                               count, buf);
+               dev_dbg(&client->dev, "smbus read %zd@%d --> %d\n",
+                               count, offset, status);
+               return (status < 0) ? -EIO : status;
+       }
+
+       /*
+        * When we have a better choice than SMBus calls, use a combined
+        * I2C message. Write address; then read up to io_limit data bytes.
+        * Note that read page rollover helps us here (unlike writes).
+        * msgbuf is u8 and will cast to our needs.
+        */
+       i = 0;
+       if (at24->chip.flags & AT24_FLAG_ADDR16)
+               msgbuf[i++] = offset >> 8;
+       msgbuf[i++] = offset;
+
+       msg[0].addr = client->addr;
+       msg[0].buf = msgbuf;
+       msg[0].len = i;
+
+       msg[1].addr = client->addr;
+       msg[1].flags = I2C_M_RD;
+       msg[1].buf = buf;
+       msg[1].len = count;
+
+       status = i2c_transfer(client->adapter, msg, 2);
+       dev_dbg(&client->dev, "i2c read %zd@%d --> %d\n",
+                       count, offset, status);
+
+       if (status == 2)
+               return count;
+       else if (status >= 0)
+               return -EIO;
+       else
+               return status;
+}
+
+static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr,
+               char *buf, loff_t off, size_t count)
+{
+       struct at24_data *at24;
+       ssize_t retval = 0;
+
+       at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
+
+       if (unlikely(!count))
+               return count;
+
+       /*
+        * Read data from chip, protecting against concurrent updates
+        * from this host, but not from other I2C masters.
+        */
+       mutex_lock(&at24->lock);
+
+       while (count) {
+               ssize_t status;
+
+               status = at24_eeprom_read(at24, buf, off, count);
+               if (status <= 0) {
+                       if (retval == 0)
+                               retval = status;
+                       break;
+               }
+               buf += status;
+               off += status;
+               count -= status;
+               retval += status;
+       }
+
+       mutex_unlock(&at24->lock);
+
+       return retval;
+}
+
+
+/*
+ * REVISIT: export at24_bin{read,write}() to let other kernel code use
+ * eeprom data. For example, it might hold a board's Ethernet address, or
+ * board-specific calibration data generated on the manufacturing floor.
+ */
+
+
+/*
+ * Note that if the hardware write-protect pin is pulled high, the whole
+ * chip is normally write protected. But there are plenty of product
+ * variants here, including OTP fuses and partial chip protect.
+ *
+ * We only use page mode writes; the alternative is sloooow. This routine
+ * writes at most one page.
+ */
+static ssize_t at24_eeprom_write(struct at24_data *at24, char *buf,
+               unsigned offset, size_t count)
+{
+       struct i2c_client *client;
+       struct i2c_msg msg;
+       ssize_t status;
+       unsigned long timeout, write_time;
+       unsigned next_page;
+
+       /* Get corresponding I2C address and adjust offset */
+       client = at24_translate_offset(at24, &offset);
+
+       /* write_max is at most a page */
+       if (count > at24->write_max)
+               count = at24->write_max;
+
+       /* Never roll over backwards, to the start of this page */
+       next_page = roundup(offset + 1, at24->chip.page_size);
+       if (offset + count > next_page)
+               count = next_page - offset;
+
+       /* If we'll use I2C calls for I/O, set up the message */
+       if (!at24->use_smbus) {
+               int i = 0;
+
+               msg.addr = client->addr;
+               msg.flags = 0;
+
+               /* msg.buf is u8 and casts will mask the values */
+               msg.buf = at24->writebuf;
+               if (at24->chip.flags & AT24_FLAG_ADDR16)
+                       msg.buf[i++] = offset >> 8;
+
+               msg.buf[i++] = offset;
+               memcpy(&msg.buf[i], buf, count);
+               msg.len = i + count;
+       }
+
+       /*
+        * Writes fail if the previous one didn't complete yet. We may
+        * loop a few times until this one succeeds, waiting at least
+        * long enough for one entire page write to work.
+        */
+       timeout = jiffies + msecs_to_jiffies(write_timeout);
+       do {
+               write_time = jiffies;
+               if (at24->use_smbus) {
+                       status = i2c_smbus_write_i2c_block_data(client,
+                                       offset, count, buf);
+                       if (status == 0)
+                               status = count;
+               } else {
+                       status = i2c_transfer(client->adapter, &msg, 1);
+                       if (status == 1)
+                               status = count;
+               }
+               dev_dbg(&client->dev, "write %zd@%d --> %zd (%ld)\n",
+                               count, offset, status, jiffies);
+
+               if (status == count)
+                       return count;
+
+               /* REVISIT: at HZ=100, this is sloooow */
+               msleep(1);
+       } while (time_before(write_time, timeout));
+
+       return -ETIMEDOUT;
+}
+
+static ssize_t at24_bin_write(struct kobject *kobj, struct bin_attribute *attr,
+               char *buf, loff_t off, size_t count)
+{
+       struct at24_data *at24;
+       ssize_t retval = 0;
+
+       at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
+
+       if (unlikely(!count))
+               return count;
+
+       /*
+        * Write data to chip, protecting against concurrent updates
+        * from this host, but not from other I2C masters.
+        */
+       mutex_lock(&at24->lock);
+
+       while (count) {
+               ssize_t status;
+
+               status = at24_eeprom_write(at24, buf, off, count);
+               if (status <= 0) {
+                       if (retval == 0)
+                               retval = status;
+                       break;
+               }
+               buf += status;
+               off += status;
+               count -= status;
+               retval += status;
+       }
+
+       mutex_unlock(&at24->lock);
+
+       return retval;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+       struct at24_platform_data chip;
+       bool writable;
+       bool use_smbus = false;
+       struct at24_data *at24;
+       int err;
+       unsigned i, num_addresses;
+       kernel_ulong_t magic;
+
+       if (client->dev.platform_data) {
+               chip = *(struct at24_platform_data *)client->dev.platform_data;
+       } else {
+               if (!id->driver_data) {
+                       err = -ENODEV;
+                       goto err_out;
+               }
+               magic = id->driver_data;
+               chip.byte_len = BIT(magic & AT24_BITMASK(AT24_SIZE_BYTELEN));
+               magic >>= AT24_SIZE_BYTELEN;
+               chip.flags = magic & AT24_BITMASK(AT24_SIZE_FLAGS);
+               /*
+                * This is slow, but we can't know all eeproms, so we better
+                * play safe. Specifying custom eeprom-types via platform_data
+                * is recommended anyhow.
+                */
+               chip.page_size = 1;
+       }
+
+       if (!is_power_of_2(chip.byte_len))
+               dev_warn(&client->dev,
+                       "byte_len looks suspicious (no power of 2)!\n");
+       if (!is_power_of_2(chip.page_size))
+               dev_warn(&client->dev,
+                       "page_size looks suspicious (no power of 2)!\n");
+
+       /* Use I2C operations unless we're stuck with SMBus extensions. */
+       if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+               if (chip.flags & AT24_FLAG_ADDR16) {
+                       err = -EPFNOSUPPORT;
+                       goto err_out;
+               }
+               if (!i2c_check_functionality(client->adapter,
+                               I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
+                       err = -EPFNOSUPPORT;
+                       goto err_out;
+               }
+               use_smbus = true;
+       }
+
+       if (chip.flags & AT24_FLAG_TAKE8ADDR)
+               num_addresses = 8;
+       else
+               num_addresses = DIV_ROUND_UP(chip.byte_len,
+                       (chip.flags & AT24_FLAG_ADDR16) ? 65536 : 256);
+
+       at24 = kzalloc(sizeof(struct at24_data) +
+               num_addresses * sizeof(struct i2c_client *), GFP_KERNEL);
+       if (!at24) {
+               err = -ENOMEM;
+               goto err_out;
+       }
+
+       mutex_init(&at24->lock);
+       at24->use_smbus = use_smbus;
+       at24->chip = chip;
+       at24->num_addresses = num_addresses;
+
+       /*
+        * Export the EEPROM bytes through sysfs, since that's convenient.
+        * By default, only root should see the data (maybe passwords etc)
+        */
+       at24->bin.attr.name = "eeprom";
+       at24->bin.attr.mode = chip.flags & AT24_FLAG_IRUGO ? S_IRUGO : S_IRUSR;
+       at24->bin.attr.owner = THIS_MODULE;
+       at24->bin.read = at24_bin_read;
+       at24->bin.size = chip.byte_len;
+
+       writable = !(chip.flags & AT24_FLAG_READONLY);
+       if (writable) {
+               if (!use_smbus || i2c_check_functionality(client->adapter,
+                               I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) {
+
+                       unsigned write_max = chip.page_size;
+
+                       at24->bin.write = at24_bin_write;
+                       at24->bin.attr.mode |= S_IWUSR;
+
+                       if (write_max > io_limit)
+                               write_max = io_limit;
+                       if (use_smbus && write_max > I2C_SMBUS_BLOCK_MAX)
+                               write_max = I2C_SMBUS_BLOCK_MAX;
+                       at24->write_max = write_max;
+
+                       /* buffer (data + address at the beginning) */
+                       at24->writebuf = kmalloc(write_max + 2, GFP_KERNEL);
+                       if (!at24->writebuf) {
+                               err = -ENOMEM;
+                               goto err_struct;
+                       }
+               } else {
+                       dev_warn(&client->dev,
+                               "cannot write due to controller restrictions.");
+               }
+       }
+
+       at24->client[0] = client;
+
+       /* use dummy devices for multiple-address chips */
+       for (i = 1; i < num_addresses; i++) {
+               at24->client[i] = i2c_new_dummy(client->adapter,
+                                       client->addr + i);
+               if (!at24->client[i]) {
+                       dev_err(&client->dev, "address 0x%02x unavailable\n",
+                                       client->addr + i);
+                       err = -EADDRINUSE;
+                       goto err_clients;
+               }
+       }
+
+       err = sysfs_create_bin_file(&client->dev.kobj, &at24->bin);
+       if (err)
+               goto err_clients;
+
+       i2c_set_clientdata(client, at24);
+
+       dev_info(&client->dev, "%Zd byte %s EEPROM %s\n",
+               at24->bin.size, client->name,
+               writable ? "(writable)" : "(read-only)");
+       dev_dbg(&client->dev,
+               "page_size %d, num_addresses %d, write_max %d%s\n",
+               chip.page_size, num_addresses,
+               at24->write_max,
+               use_smbus ? ", use_smbus" : "");
+
+       return 0;
+
+err_clients:
+       for (i = 1; i < num_addresses; i++)
+               if (at24->client[i])
+                       i2c_unregister_device(at24->client[i]);
+
+       kfree(at24->writebuf);
+err_struct:
+       kfree(at24);
+err_out:
+       dev_dbg(&client->dev, "probe error %d\n", err);
+       return err;
+}
+
+static int __devexit at24_remove(struct i2c_client *client)
+{
+       struct at24_data *at24;
+       int i;
+
+       at24 = i2c_get_clientdata(client);
+       sysfs_remove_bin_file(&client->dev.kobj, &at24->bin);
+
+       for (i = 1; i < at24->num_addresses; i++)
+               i2c_unregister_device(at24->client[i]);
+
+       kfree(at24->writebuf);
+       kfree(at24);
+       i2c_set_clientdata(client, NULL);
+       return 0;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static struct i2c_driver at24_driver = {
+       .driver = {
+               .name = "at24",
+               .owner = THIS_MODULE,
+       },
+       .probe = at24_probe,
+       .remove = __devexit_p(at24_remove),
+       .id_table = at24_ids,
+};
+
+static int __init at24_init(void)
+{
+       io_limit = rounddown_pow_of_two(io_limit);
+       return i2c_add_driver(&at24_driver);
+}
+module_init(at24_init);
+
+static void __exit at24_exit(void)
+{
+       i2c_del_driver(&at24_driver);
+}
+module_exit(at24_exit);
+
+MODULE_DESCRIPTION("Driver for most I2C EEPROMs");
+MODULE_AUTHOR("David Brownell and Wolfram Sang");
+MODULE_LICENSE("GPL");
index 7dee001e5133bc7df391bc187e28ae082b9315d2..373ea8d8fe8f0dd7af8c2cbe343c41a48741f2d3 100644 (file)
@@ -1,15 +1,9 @@
 /*
-    eeprom.c - Part of lm_sensors, Linux kernel modules for hardware
-               monitoring
     Copyright (C) 1998, 1999  Frodo Looijaard <frodol@dds.nl> and
                               Philip Edelbrock <phil@netroedge.com>
     Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com>
     Copyright (C) 2003 IBM Corp.
-
-    2004-01-16  Jean Delvare <khali@linux-fr.org>
-    Divide the eeprom in 32-byte (arbitrary) slices. This significantly
-    speeds sensors up, as well as various scripts using the eeprom
-    module.
+    Copyright (C) 2004 Jean Delvare <khali@linux-fr.org>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -78,7 +72,7 @@ static struct i2c_driver eeprom_driver = {
 static void eeprom_update_client(struct i2c_client *client, u8 slice)
 {
        struct eeprom_data *data = i2c_get_clientdata(client);
-       int i, j;
+       int i;
 
        mutex_lock(&data->update_lock);
 
@@ -93,15 +87,12 @@ static void eeprom_update_client(struct i2c_client *client, u8 slice)
                                                        != 32)
                                        goto exit;
                } else {
-                       if (i2c_smbus_write_byte(client, slice << 5)) {
-                               dev_dbg(&client->dev, "eeprom read start has failed!\n");
-                               goto exit;
-                       }
-                       for (i = slice << 5; i < (slice + 1) << 5; i++) {
-                               j = i2c_smbus_read_byte(client);
-                               if (j < 0)
+                       for (i = slice << 5; i < (slice + 1) << 5; i += 2) {
+                               int word = i2c_smbus_read_word_data(client, i);
+                               if (word < 0)
                                        goto exit;
-                               data->data[i] = (u8) j;
+                               data->data[i] = word & 0xff;
+                               data->data[i + 1] = word >> 8;
                        }
                }
                data->last_updated[slice] = jiffies;
@@ -159,24 +150,33 @@ static struct bin_attribute eeprom_attr = {
 
 static int eeprom_attach_adapter(struct i2c_adapter *adapter)
 {
+       if (!(adapter->class & (I2C_CLASS_DDC | I2C_CLASS_SPD)))
+               return 0;
        return i2c_probe(adapter, &addr_data, eeprom_detect);
 }
 
 /* This function is called by i2c_probe */
 static int eeprom_detect(struct i2c_adapter *adapter, int address, int kind)
 {
-       struct i2c_client *new_client;
+       struct i2c_client *client;
        struct eeprom_data *data;
        int err = 0;
 
-       /* There are three ways we can read the EEPROM data:
+       /* EDID EEPROMs are often 24C00 EEPROMs, which answer to all
+          addresses 0x50-0x57, but we only care about 0x50. So decline
+          attaching to addresses >= 0x51 on DDC buses */
+       if (!(adapter->class & I2C_CLASS_SPD) && address >= 0x51)
+               goto exit;
+
+       /* There are four ways we can read the EEPROM data:
           (1) I2C block reads (faster, but unsupported by most adapters)
-          (2) Consecutive byte reads (100% overhead)
-          (3) Regular byte data reads (200% overhead)
-          The third method is not implemented by this driver because all
-          known adapters support at least the second. */
-       if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA
-                                           | I2C_FUNC_SMBUS_BYTE))
+          (2) Word reads (128% overhead)
+          (3) Consecutive byte reads (88% overhead, unsafe)
+          (4) Regular byte data reads (265% overhead)
+          The third and fourth methods are not implemented by this driver
+          because all known adapters support one of the first two. */
+       if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_WORD_DATA)
+        && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK))
                goto exit;
 
        if (!(data = kzalloc(sizeof(struct eeprom_data), GFP_KERNEL))) {
@@ -184,50 +184,49 @@ static int eeprom_detect(struct i2c_adapter *adapter, int address, int kind)
                goto exit;
        }
 
-       new_client = &data->client;
+       client = &data->client;
        memset(data->data, 0xff, EEPROM_SIZE);
-       i2c_set_clientdata(new_client, data);
-       new_client->addr = address;
-       new_client->adapter = adapter;
-       new_client->driver = &eeprom_driver;
-       new_client->flags = 0;
+       i2c_set_clientdata(client, data);
+       client->addr = address;
+       client->adapter = adapter;
+       client->driver = &eeprom_driver;
 
        /* Fill in the remaining client fields */
-       strlcpy(new_client->name, "eeprom", I2C_NAME_SIZE);
-       data->valid = 0;
+       strlcpy(client->name, "eeprom", I2C_NAME_SIZE);
        mutex_init(&data->update_lock);
        data->nature = UNKNOWN;
 
        /* Tell the I2C layer a new client has arrived */
-       if ((err = i2c_attach_client(new_client)))
+       if ((err = i2c_attach_client(client)))
                goto exit_kfree;
 
        /* Detect the Vaio nature of EEPROMs.
           We use the "PCG-" or "VGN-" prefix as the signature. */
-       if (address == 0x57) {
+       if (address == 0x57
+        && i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
                char name[4];
 
-               name[0] = i2c_smbus_read_byte_data(new_client, 0x80);
-               name[1] = i2c_smbus_read_byte(new_client);
-               name[2] = i2c_smbus_read_byte(new_client);
-               name[3] = i2c_smbus_read_byte(new_client);
+               name[0] = i2c_smbus_read_byte_data(client, 0x80);
+               name[1] = i2c_smbus_read_byte_data(client, 0x81);
+               name[2] = i2c_smbus_read_byte_data(client, 0x82);
+               name[3] = i2c_smbus_read_byte_data(client, 0x83);
 
                if (!memcmp(name, "PCG-", 4) || !memcmp(name, "VGN-", 4)) {
-                       dev_info(&new_client->dev, "Vaio EEPROM detected, "
+                       dev_info(&client->dev, "Vaio EEPROM detected, "
                                 "enabling privacy protection\n");
                        data->nature = VAIO;
                }
        }
 
        /* create the sysfs eeprom file */
-       err = sysfs_create_bin_file(&new_client->dev.kobj, &eeprom_attr);
+       err = sysfs_create_bin_file(&client->dev.kobj, &eeprom_attr);
        if (err)
                goto exit_detach;
 
        return 0;
 
 exit_detach:
-       i2c_detach_client(new_client);
+       i2c_detach_client(client);
 exit_kfree:
        kfree(data);
 exit:
index cf507b3f60f3567888bc4c964767744652bec99d..5a0285d8b6f9888f9477d4a1a7284f0dd5a7bee6 100644 (file)
@@ -170,7 +170,7 @@ static int max6875_detect(struct i2c_adapter *adapter, int address, int kind)
        struct i2c_client *real_client;
        struct i2c_client *fake_client;
        struct max6875_data *data;
-       int err = 0;
+       int err;
 
        if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE_DATA
                                     | I2C_FUNC_SMBUS_READ_BYTE))
@@ -195,7 +195,6 @@ static int max6875_detect(struct i2c_adapter *adapter, int address, int kind)
        real_client->addr = address;
        real_client->adapter = adapter;
        real_client->driver = &max6875_driver;
-       real_client->flags = 0;
        strlcpy(real_client->name, "max6875", I2C_NAME_SIZE);
        mutex_init(&data->update_lock);
 
@@ -204,7 +203,6 @@ static int max6875_detect(struct i2c_adapter *adapter, int address, int kind)
        fake_client->addr = address | 1;
        fake_client->adapter = adapter;
        fake_client->driver = &max6875_driver;
-       fake_client->flags = 0;
        strlcpy(fake_client->name, "max6875 subclient", I2C_NAME_SIZE);
 
        if ((err = i2c_attach_client(real_client)) != 0)
index f43c4e79b55e9b3cf9bf0bba1317a0196aadddea..58ab7f26be26d1327f17c10a709e2c5ccd3da74f 100644 (file)
@@ -113,7 +113,7 @@ static int pca9539_attach_adapter(struct i2c_adapter *adapter)
 /* This function is called by i2c_probe */
 static int pca9539_detect(struct i2c_adapter *adapter, int address, int kind)
 {
-       struct i2c_client *new_client;
+       struct i2c_client *client;
        struct pca9539_data *data;
        int err = 0;
 
@@ -127,29 +127,28 @@ static int pca9539_detect(struct i2c_adapter *adapter, int address, int kind)
                goto exit;
        }
 
-       new_client = &data->client;
-       i2c_set_clientdata(new_client, data);
-       new_client->addr = address;
-       new_client->adapter = adapter;
-       new_client->driver = &pca9539_driver;
-       new_client->flags = 0;
+       client = &data->client;
+       i2c_set_clientdata(client, data);
+       client->addr = address;
+       client->adapter = adapter;
+       client->driver = &pca9539_driver;
 
        if (kind < 0) {
                /* Detection: the pca9539 only has 8 registers (0-7).
                   A read of 7 should succeed, but a read of 8 should fail. */
-               if ((i2c_smbus_read_byte_data(new_client, 7) < 0) ||
-                   (i2c_smbus_read_byte_data(new_client, 8) >= 0))
+               if ((i2c_smbus_read_byte_data(client, 7) < 0) ||
+                   (i2c_smbus_read_byte_data(client, 8) >= 0))
                        goto exit_kfree;
        }
 
-       strlcpy(new_client->name, "pca9539", I2C_NAME_SIZE);
+       strlcpy(client->name, "pca9539", I2C_NAME_SIZE);
 
        /* Tell the I2C layer a new client has arrived */
-       if ((err = i2c_attach_client(new_client)))
+       if ((err = i2c_attach_client(client)))
                goto exit_kfree;
 
        /* Register sysfs hooks */
-       err = sysfs_create_group(&new_client->dev.kobj,
+       err = sysfs_create_group(&client->dev.kobj,
                                 &pca9539_defattr_group);
        if (err)
                goto exit_detach;
@@ -157,7 +156,7 @@ static int pca9539_detect(struct i2c_adapter *adapter, int address, int kind)
        return 0;
 
 exit_detach:
-       i2c_detach_client(new_client);
+       i2c_detach_client(client);
 exit_kfree:
        kfree(data);
 exit:
index e5b31329b56e146e1022be115c0d7e0cbdc88ede..1b3db2b3ada9506d2a108f499753ba933d33be00 100644 (file)
@@ -1,6 +1,4 @@
 /*
-    pcf8574.c - Part of lm_sensors, Linux kernel modules for hardware
-             monitoring
     Copyright (c) 2000  Frodo Looijaard <frodol@dds.nl>, 
                         Philip Edelbrock <phil@netroedge.com>,
                         Dan Eaton <dan.eaton@rocketlogix.com>
@@ -129,7 +127,7 @@ static int pcf8574_attach_adapter(struct i2c_adapter *adapter)
 /* This function is called by i2c_probe */
 static int pcf8574_detect(struct i2c_adapter *adapter, int address, int kind)
 {
-       struct i2c_client *new_client;
+       struct i2c_client *client;
        struct pcf8574_data *data;
        int err = 0;
        const char *client_name = "";
@@ -144,12 +142,11 @@ static int pcf8574_detect(struct i2c_adapter *adapter, int address, int kind)
                goto exit;
        }
 
-       new_client = &data->client;
-       i2c_set_clientdata(new_client, data);
-       new_client->addr = address;
-       new_client->adapter = adapter;
-       new_client->driver = &pcf8574_driver;
-       new_client->flags = 0;
+       client = &data->client;
+       i2c_set_clientdata(client, data);
+       client->addr = address;
+       client->adapter = adapter;
+       client->driver = &pcf8574_driver;
 
        /* Now, we would do the remaining detection. But the PCF8574 is plainly
           impossible to detect! Stupid chip. */
@@ -168,23 +165,23 @@ static int pcf8574_detect(struct i2c_adapter *adapter, int address, int kind)
                client_name = "pcf8574";
 
        /* Fill in the remaining client fields and put it into the global list */
-       strlcpy(new_client->name, client_name, I2C_NAME_SIZE);
+       strlcpy(client->name, client_name, I2C_NAME_SIZE);
 
        /* Tell the I2C layer a new client has arrived */
-       if ((err = i2c_attach_client(new_client)))
+       if ((err = i2c_attach_client(client)))
                goto exit_free;
        
        /* Initialize the PCF8574 chip */
-       pcf8574_init_client(new_client);
+       pcf8574_init_client(client);
 
        /* Register sysfs hooks */
-       err = sysfs_create_group(&new_client->dev.kobj, &pcf8574_attr_group);
+       err = sysfs_create_group(&client->dev.kobj, &pcf8574_attr_group);
        if (err)
                goto exit_detach;
        return 0;
 
       exit_detach:
-       i2c_detach_client(new_client);
+       i2c_detach_client(client);
       exit_free:
        kfree(data);
       exit:
index 66c7c3bb9429754bbe7f6c31737feefdda750fa0..db735379f22fe512dfdc58402f8891d02bfbca69 100644 (file)
@@ -1,6 +1,4 @@
 /*
-    pcf8591.c - Part of lm_sensors, Linux kernel modules for hardware
-                monitoring
     Copyright (C) 2001-2004 Aurelien Jarno <aurelien@aurel32.net>
     Ported to Linux 2.6 by Aurelien Jarno <aurelien@aurel32.net> with 
     the help of Jean Delvare <khali@linux-fr.org>
@@ -190,7 +188,7 @@ static int pcf8591_attach_adapter(struct i2c_adapter *adapter)
 /* This function is called by i2c_probe */
 static int pcf8591_detect(struct i2c_adapter *adapter, int address, int kind)
 {
-       struct i2c_client *new_client;
+       struct i2c_client *client;
        struct pcf8591_data *data;
        int err = 0;
 
@@ -205,12 +203,11 @@ static int pcf8591_detect(struct i2c_adapter *adapter, int address, int kind)
                goto exit;
        }
        
-       new_client = &data->client;
-       i2c_set_clientdata(new_client, data);
-       new_client->addr = address;
-       new_client->adapter = adapter;
-       new_client->driver = &pcf8591_driver;
-       new_client->flags = 0;
+       client = &data->client;
+       i2c_set_clientdata(client, data);
+       client->addr = address;
+       client->adapter = adapter;
+       client->driver = &pcf8591_driver;
 
        /* Now, we would do the remaining detection. But the PCF8591 is plainly
           impossible to detect! Stupid chip. */
@@ -221,31 +218,31 @@ static int pcf8591_detect(struct i2c_adapter *adapter, int address, int kind)
 
        /* Fill in the remaining client fields and put it into the global 
           list */
-       strlcpy(new_client->name, "pcf8591", I2C_NAME_SIZE);
+       strlcpy(client->name, "pcf8591", I2C_NAME_SIZE);
        mutex_init(&data->update_lock);
 
        /* Tell the I2C layer a new client has arrived */
-       if ((err = i2c_attach_client(new_client)))
+       if ((err = i2c_attach_client(client)))
                goto exit_kfree;
 
        /* Initialize the PCF8591 chip */
-       pcf8591_init_client(new_client);
+       pcf8591_init_client(client);
 
        /* Register sysfs hooks */
-       err = sysfs_create_group(&new_client->dev.kobj, &pcf8591_attr_group);
+       err = sysfs_create_group(&client->dev.kobj, &pcf8591_attr_group);
        if (err)
                goto exit_detach;
 
        /* Register input2 if not in "two differential inputs" mode */
        if (input_mode != 3) {
-               if ((err = device_create_file(&new_client->dev,
+               if ((err = device_create_file(&client->dev,
                                              &dev_attr_in2_input)))
                        goto exit_sysfs_remove;
        }
 
        /* Register input3 only in "four single ended inputs" mode */
        if (input_mode == 0) {
-               if ((err = device_create_file(&new_client->dev,
+               if ((err = device_create_file(&client->dev,
                                              &dev_attr_in3_input)))
                        goto exit_sysfs_remove;
        }
@@ -253,10 +250,10 @@ static int pcf8591_detect(struct i2c_adapter *adapter, int address, int kind)
        return 0;
 
 exit_sysfs_remove:
-       sysfs_remove_group(&new_client->dev.kobj, &pcf8591_attr_group_opt);
-       sysfs_remove_group(&new_client->dev.kobj, &pcf8591_attr_group);
+       sysfs_remove_group(&client->dev.kobj, &pcf8591_attr_group_opt);
+       sysfs_remove_group(&client->dev.kobj, &pcf8591_attr_group);
 exit_detach:
-       i2c_detach_client(new_client);
+       i2c_detach_client(client);
 exit_kfree:
        kfree(data);
 exit:
index d0175f4f8fc683941057bf4866e97a8a9abdcfd3..0a79f7661017b0a548868db95bba17e2d73a2796 100644 (file)
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/idr.h>
-#include <linux/seq_file.h>
 #include <linux/platform_device.h>
 #include <linux/mutex.h>
 #include <linux/completion.h>
 #include <linux/hardirq.h>
 #include <linux/irqflags.h>
-#include <linux/semaphore.h>
 #include <asm/uaccess.h>
 
 #include "i2c-core.h"
@@ -44,7 +42,9 @@
 static DEFINE_MUTEX(core_lock);
 static DEFINE_IDR(i2c_adapter_idr);
 
-#define is_newstyle_driver(d) ((d)->probe || (d)->remove)
+#define is_newstyle_driver(d) ((d)->probe || (d)->remove || (d)->detect)
+
+static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver);
 
 /* ------------------------------------------------------------------------- */
 
@@ -103,19 +103,14 @@ static int i2c_device_probe(struct device *dev)
 {
        struct i2c_client       *client = to_i2c_client(dev);
        struct i2c_driver       *driver = to_i2c_driver(dev->driver);
-       const struct i2c_device_id *id;
        int status;
 
-       if (!driver->probe)
+       if (!driver->probe || !driver->id_table)
                return -ENODEV;
        client->driver = driver;
        dev_dbg(dev, "probe\n");
 
-       if (driver->id_table)
-               id = i2c_match_id(driver->id_table, client);
-       else
-               id = NULL;
-       status = driver->probe(client, id);
+       status = driver->probe(client, i2c_match_id(driver->id_table, client));
        if (status)
                client->driver = NULL;
        return status;
@@ -208,7 +203,7 @@ static struct device_attribute i2c_dev_attrs[] = {
        { },
 };
 
-static struct bus_type i2c_bus_type = {
+struct bus_type i2c_bus_type = {
        .name           = "i2c",
        .dev_attrs      = i2c_dev_attrs,
        .match          = i2c_device_match,
@@ -219,6 +214,7 @@ static struct bus_type i2c_bus_type = {
        .suspend        = i2c_device_suspend,
        .resume         = i2c_device_resume,
 };
+EXPORT_SYMBOL_GPL(i2c_bus_type);
 
 
 /**
@@ -306,6 +302,14 @@ void i2c_unregister_device(struct i2c_client *client)
                return;
        }
 
+       if (adapter->client_unregister) {
+               if (adapter->client_unregister(client)) {
+                       dev_warn(&client->dev,
+                                "client_unregister [%s] failed\n",
+                                client->name);
+               }
+       }
+
        mutex_lock(&adapter->clist_lock);
        list_del(&client->list);
        mutex_unlock(&adapter->clist_lock);
@@ -416,6 +420,10 @@ static int i2c_do_add_adapter(struct device_driver *d, void *data)
        struct i2c_driver *driver = to_i2c_driver(d);
        struct i2c_adapter *adap = data;
 
+       /* Detect supported devices on that bus, and instantiate them */
+       i2c_detect(adap, driver);
+
+       /* Let legacy drivers scan this bus for matching devices */
        if (driver->attach_adapter) {
                /* We ignore the return code; if it fails, too bad */
                driver->attach_adapter(adap);
@@ -455,7 +463,7 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
        if (adap->nr < __i2c_first_dynamic_bus_num)
                i2c_scan_static_board_info(adap);
 
-       /* let legacy drivers scan this bus for matching devices */
+       /* Notify drivers */
        dummy = bus_for_each_drv(&i2c_bus_type, NULL, adap,
                                 i2c_do_add_adapter);
 
@@ -561,8 +569,19 @@ static int i2c_do_del_adapter(struct device_driver *d, void *data)
 {
        struct i2c_driver *driver = to_i2c_driver(d);
        struct i2c_adapter *adapter = data;
+       struct i2c_client *client, *_n;
        int res;
 
+       /* Remove the devices we created ourselves */
+       list_for_each_entry_safe(client, _n, &driver->clients, detected) {
+               if (client->adapter == adapter) {
+                       dev_dbg(&adapter->dev, "Removing %s at 0x%x\n",
+                               client->name, client->addr);
+                       list_del(&client->detected);
+                       i2c_unregister_device(client);
+               }
+       }
+
        if (!driver->detach_adapter)
                return 0;
        res = driver->detach_adapter(adapter);
@@ -582,8 +601,7 @@ static int i2c_do_del_adapter(struct device_driver *d, void *data)
  */
 int i2c_del_adapter(struct i2c_adapter *adap)
 {
-       struct list_head  *item, *_n;
-       struct i2c_client *client;
+       struct i2c_client *client, *_n;
        int res = 0;
 
        mutex_lock(&core_lock);
@@ -604,10 +622,9 @@ int i2c_del_adapter(struct i2c_adapter *adap)
 
        /* detach any active clients. This must be done first, because
         * it can fail; in which case we give up. */
-       list_for_each_safe(item, _n, &adap->clients) {
+       list_for_each_entry_safe(client, _n, &adap->clients, list) {
                struct i2c_driver       *driver;
 
-               client = list_entry(item, struct i2c_client, list);
                driver = client->driver;
 
                /* new style, follow standard driver model */
@@ -646,6 +663,20 @@ EXPORT_SYMBOL(i2c_del_adapter);
 
 /* ------------------------------------------------------------------------- */
 
+static int __attach_adapter(struct device *dev, void *data)
+{
+       struct i2c_adapter *adapter = to_i2c_adapter(dev);
+       struct i2c_driver *driver = data;
+
+       i2c_detect(adapter, driver);
+
+       /* Legacy drivers scan i2c busses directly */
+       if (driver->attach_adapter)
+               driver->attach_adapter(adapter);
+
+       return 0;
+}
+
 /*
  * An i2c_driver is used with one or more i2c_client (device) nodes to access
  * i2c slave chips, on a bus instance associated with some i2c_adapter.  There
@@ -685,72 +716,70 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver)
 
        pr_debug("i2c-core: driver [%s] registered\n", driver->driver.name);
 
-       /* legacy drivers scan i2c busses directly */
-       if (driver->attach_adapter) {
-               struct i2c_adapter *adapter;
-
-               down(&i2c_adapter_class.sem);
-               list_for_each_entry(adapter, &i2c_adapter_class.devices,
-                                   dev.node) {
-                       driver->attach_adapter(adapter);
-               }
-               up(&i2c_adapter_class.sem);
-       }
+       INIT_LIST_HEAD(&driver->clients);
+       /* Walk the adapters that are already present */
+       class_for_each_device(&i2c_adapter_class, driver, __attach_adapter);
 
        mutex_unlock(&core_lock);
        return 0;
 }
 EXPORT_SYMBOL(i2c_register_driver);
 
-/**
- * i2c_del_driver - unregister I2C driver
- * @driver: the driver being unregistered
- * Context: can sleep
- */
-void i2c_del_driver(struct i2c_driver *driver)
+static int __detach_adapter(struct device *dev, void *data)
 {
-       struct list_head   *item2, *_n;
-       struct i2c_client  *client;
-       struct i2c_adapter *adap;
+       struct i2c_adapter *adapter = to_i2c_adapter(dev);
+       struct i2c_driver *driver = data;
+       struct i2c_client *client, *_n;
 
-       mutex_lock(&core_lock);
+       list_for_each_entry_safe(client, _n, &driver->clients, detected) {
+               dev_dbg(&adapter->dev, "Removing %s at 0x%x\n",
+                       client->name, client->addr);
+               list_del(&client->detected);
+               i2c_unregister_device(client);
+       }
 
-       /* new-style driver? */
        if (is_newstyle_driver(driver))
-               goto unregister;
+               return 0;
 
        /* Have a look at each adapter, if clients of this driver are still
         * attached. If so, detach them to be able to kill the driver
         * afterwards.
         */
-       down(&i2c_adapter_class.sem);
-       list_for_each_entry(adap, &i2c_adapter_class.devices, dev.node) {
-               if (driver->detach_adapter) {
-                       if (driver->detach_adapter(adap)) {
-                               dev_err(&adap->dev, "detach_adapter failed "
-                                       "for driver [%s]\n",
-                                       driver->driver.name);
-                       }
-               } else {
-                       list_for_each_safe(item2, _n, &adap->clients) {
-                               client = list_entry(item2, struct i2c_client, list);
-                               if (client->driver != driver)
-                                       continue;
-                               dev_dbg(&adap->dev, "detaching client [%s] "
-                                       "at 0x%02x\n", client->name,
-                                       client->addr);
-                               if (driver->detach_client(client)) {
-                                       dev_err(&adap->dev, "detach_client "
-                                               "failed for client [%s] at "
-                                               "0x%02x\n", client->name,
-                                               client->addr);
-                               }
-                       }
+       if (driver->detach_adapter) {
+               if (driver->detach_adapter(adapter))
+                       dev_err(&adapter->dev,
+                               "detach_adapter failed for driver [%s]\n",
+                               driver->driver.name);
+       } else {
+               struct i2c_client *client, *_n;
+
+               list_for_each_entry_safe(client, _n, &adapter->clients, list) {
+                       if (client->driver != driver)
+                               continue;
+                       dev_dbg(&adapter->dev,
+                               "detaching client [%s] at 0x%02x\n",
+                               client->name, client->addr);
+                       if (driver->detach_client(client))
+                               dev_err(&adapter->dev, "detach_client "
+                                       "failed for client [%s] at 0x%02x\n",
+                                       client->name, client->addr);
                }
        }
-       up(&i2c_adapter_class.sem);
 
- unregister:
+       return 0;
+}
+
+/**
+ * i2c_del_driver - unregister I2C driver
+ * @driver: the driver being unregistered
+ * Context: can sleep
+ */
+void i2c_del_driver(struct i2c_driver *driver)
+{
+       mutex_lock(&core_lock);
+
+       class_for_each_device(&i2c_adapter_class, driver, __detach_adapter);
+
        driver_unregister(&driver->driver);
        pr_debug("i2c-core: driver [%s] unregistered\n", driver->driver.name);
 
@@ -863,8 +892,9 @@ EXPORT_SYMBOL(i2c_detach_client);
  */
 struct i2c_client *i2c_use_client(struct i2c_client *client)
 {
-       get_device(&client->dev);
-       return client;
+       if (client && get_device(&client->dev))
+               return client;
+       return NULL;
 }
 EXPORT_SYMBOL(i2c_use_client);
 
@@ -876,7 +906,8 @@ EXPORT_SYMBOL(i2c_use_client);
  */
 void i2c_release_client(struct i2c_client *client)
 {
-       put_device(&client->dev);
+       if (client)
+               put_device(&client->dev);
 }
 EXPORT_SYMBOL(i2c_release_client);
 
@@ -942,10 +973,39 @@ module_exit(i2c_exit);
  * ----------------------------------------------------
  */
 
+/**
+ * i2c_transfer - execute a single or combined I2C message
+ * @adap: Handle to I2C bus
+ * @msgs: One or more messages to execute before STOP is issued to
+ *     terminate the operation; each message begins with a START.
+ * @num: Number of messages to be executed.
+ *
+ * Returns negative errno, else the number of messages executed.
+ *
+ * Note that there is no requirement that each message be sent to
+ * the same slave address, although that is the most common model.
+ */
 int i2c_transfer(struct i2c_adapter * adap, struct i2c_msg *msgs, int num)
 {
        int ret;
 
+       /* REVISIT the fault reporting model here is weak:
+        *
+        *  - When we get an error after receiving N bytes from a slave,
+        *    there is no way to report "N".
+        *
+        *  - When we get a NAK after transmitting N bytes to a slave,
+        *    there is no way to report "N" ... or to let the master
+        *    continue executing the rest of this combined message, if
+        *    that's the appropriate response.
+        *
+        *  - When for example "num" is two and we successfully complete
+        *    the first message but get an error part way through the
+        *    second, it's unclear whether that should be reported as
+        *    one (discarding status on the second message) or errno
+        *    (discarding status on the first one).
+        */
+
        if (adap->algo->master_xfer) {
 #ifdef DEBUG
                for (ret = 0; ret < num; ret++) {
@@ -971,11 +1031,19 @@ int i2c_transfer(struct i2c_adapter * adap, struct i2c_msg *msgs, int num)
                return ret;
        } else {
                dev_dbg(&adap->dev, "I2C level transfers not supported\n");
-               return -ENOSYS;
+               return -EOPNOTSUPP;
        }
 }
 EXPORT_SYMBOL(i2c_transfer);
 
+/**
+ * i2c_master_send - issue a single I2C message in master transmit mode
+ * @client: Handle to slave device
+ * @buf: Data that will be written to the slave
+ * @count: How many bytes to write
+ *
+ * Returns negative errno, or else the number of bytes written.
+ */
 int i2c_master_send(struct i2c_client *client,const char *buf ,int count)
 {
        int ret;
@@ -995,6 +1063,14 @@ int i2c_master_send(struct i2c_client *client,const char *buf ,int count)
 }
 EXPORT_SYMBOL(i2c_master_send);
 
+/**
+ * i2c_master_recv - issue a single I2C message in master receive mode
+ * @client: Handle to slave device
+ * @buf: Where to store data read from slave
+ * @count: How many bytes to read
+ *
+ * Returns negative errno, or else the number of bytes read.
+ */
 int i2c_master_recv(struct i2c_client *client, char *buf ,int count)
 {
        struct i2c_adapter *adap=client->adapter;
@@ -1103,7 +1179,7 @@ int i2c_probe(struct i2c_adapter *adapter,
 
                dev_warn(&adapter->dev, "SMBus Quick command not supported, "
                         "can't probe for chips\n");
-               return -1;
+               return -EOPNOTSUPP;
        }
 
        /* Probe entries are done second, and are not affected by ignore
@@ -1157,6 +1233,179 @@ int i2c_probe(struct i2c_adapter *adapter,
 }
 EXPORT_SYMBOL(i2c_probe);
 
+/* Separate detection function for new-style drivers */
+static int i2c_detect_address(struct i2c_client *temp_client, int kind,
+                             struct i2c_driver *driver)
+{
+       struct i2c_board_info info;
+       struct i2c_adapter *adapter = temp_client->adapter;
+       int addr = temp_client->addr;
+       int err;
+
+       /* Make sure the address is valid */
+       if (addr < 0x03 || addr > 0x77) {
+               dev_warn(&adapter->dev, "Invalid probe address 0x%02x\n",
+                        addr);
+               return -EINVAL;
+       }
+
+       /* Skip if already in use */
+       if (i2c_check_addr(adapter, addr))
+               return 0;
+
+       /* Make sure there is something at this address, unless forced */
+       if (kind < 0) {
+               if (i2c_smbus_xfer(adapter, addr, 0, 0, 0,
+                                  I2C_SMBUS_QUICK, NULL) < 0)
+                       return 0;
+
+               /* prevent 24RF08 corruption */
+               if ((addr & ~0x0f) == 0x50)
+                       i2c_smbus_xfer(adapter, addr, 0, 0, 0,
+                                      I2C_SMBUS_QUICK, NULL);
+       }
+
+       /* Finally call the custom detection function */
+       memset(&info, 0, sizeof(struct i2c_board_info));
+       info.addr = addr;
+       err = driver->detect(temp_client, kind, &info);
+       if (err) {
+               /* -ENODEV is returned if the detection fails. We catch it
+                  here as this isn't an error. */
+               return err == -ENODEV ? 0 : err;
+       }
+
+       /* Consistency check */
+       if (info.type[0] == '\0') {
+               dev_err(&adapter->dev, "%s detection function provided "
+                       "no name for 0x%x\n", driver->driver.name,
+                       addr);
+       } else {
+               struct i2c_client *client;
+
+               /* Detection succeeded, instantiate the device */
+               dev_dbg(&adapter->dev, "Creating %s at 0x%02x\n",
+                       info.type, info.addr);
+               client = i2c_new_device(adapter, &info);
+               if (client)
+                       list_add_tail(&client->detected, &driver->clients);
+               else
+                       dev_err(&adapter->dev, "Failed creating %s at 0x%02x\n",
+                               info.type, info.addr);
+       }
+       return 0;
+}
+
+static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver)
+{
+       const struct i2c_client_address_data *address_data;
+       struct i2c_client *temp_client;
+       int i, err = 0;
+       int adap_id = i2c_adapter_id(adapter);
+
+       address_data = driver->address_data;
+       if (!driver->detect || !address_data)
+               return 0;
+
+       /* Set up a temporary client to help detect callback */
+       temp_client = kzalloc(sizeof(struct i2c_client), GFP_KERNEL);
+       if (!temp_client)
+               return -ENOMEM;
+       temp_client->adapter = adapter;
+
+       /* Force entries are done first, and are not affected by ignore
+          entries */
+       if (address_data->forces) {
+               const unsigned short * const *forces = address_data->forces;
+               int kind;
+
+               for (kind = 0; forces[kind]; kind++) {
+                       for (i = 0; forces[kind][i] != I2C_CLIENT_END;
+                            i += 2) {
+                               if (forces[kind][i] == adap_id
+                                || forces[kind][i] == ANY_I2C_BUS) {
+                                       dev_dbg(&adapter->dev, "found force "
+                                               "parameter for adapter %d, "
+                                               "addr 0x%02x, kind %d\n",
+                                               adap_id, forces[kind][i + 1],
+                                               kind);
+                                       temp_client->addr = forces[kind][i + 1];
+                                       err = i2c_detect_address(temp_client,
+                                               kind, driver);
+                                       if (err)
+                                               goto exit_free;
+                               }
+                       }
+               }
+       }
+
+       /* Stop here if we can't use SMBUS_QUICK */
+       if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_QUICK)) {
+               if (address_data->probe[0] == I2C_CLIENT_END
+                && address_data->normal_i2c[0] == I2C_CLIENT_END)
+                       goto exit_free;
+
+               dev_warn(&adapter->dev, "SMBus Quick command not supported, "
+                        "can't probe for chips\n");
+               err = -EOPNOTSUPP;
+               goto exit_free;
+       }
+
+       /* Stop here if the classes do not match */
+       if (!(adapter->class & driver->class))
+               goto exit_free;
+
+       /* Probe entries are done second, and are not affected by ignore
+          entries either */
+       for (i = 0; address_data->probe[i] != I2C_CLIENT_END; i += 2) {
+               if (address_data->probe[i] == adap_id
+                || address_data->probe[i] == ANY_I2C_BUS) {
+                       dev_dbg(&adapter->dev, "found probe parameter for "
+                               "adapter %d, addr 0x%02x\n", adap_id,
+                               address_data->probe[i + 1]);
+                       temp_client->addr = address_data->probe[i + 1];
+                       err = i2c_detect_address(temp_client, -1, driver);
+                       if (err)
+                               goto exit_free;
+               }
+       }
+
+       /* Normal entries are done last, unless shadowed by an ignore entry */
+       for (i = 0; address_data->normal_i2c[i] != I2C_CLIENT_END; i += 1) {
+               int j, ignore;
+
+               ignore = 0;
+               for (j = 0; address_data->ignore[j] != I2C_CLIENT_END;
+                    j += 2) {
+                       if ((address_data->ignore[j] == adap_id ||
+                            address_data->ignore[j] == ANY_I2C_BUS)
+                        && address_data->ignore[j + 1]
+                           == address_data->normal_i2c[i]) {
+                               dev_dbg(&adapter->dev, "found ignore "
+                                       "parameter for adapter %d, "
+                                       "addr 0x%02x\n", adap_id,
+                                       address_data->ignore[j + 1]);
+                               ignore = 1;
+                               break;
+                       }
+               }
+               if (ignore)
+                       continue;
+
+               dev_dbg(&adapter->dev, "found normal entry for adapter %d, "
+                       "addr 0x%02x\n", adap_id,
+                       address_data->normal_i2c[i]);
+               temp_client->addr = address_data->normal_i2c[i];
+               err = i2c_detect_address(temp_client, -1, driver);
+               if (err)
+                       goto exit_free;
+       }
+
+ exit_free:
+       kfree(temp_client);
+       return err;
+}
+
 struct i2c_client *
 i2c_new_probed_device(struct i2c_adapter *adap,
                      struct i2c_board_info *info,
@@ -1295,29 +1544,38 @@ static int i2c_smbus_check_pec(u8 cpec, struct i2c_msg *msg)
        if (rpec != cpec) {
                pr_debug("i2c-core: Bad PEC 0x%02x vs. 0x%02x\n",
                        rpec, cpec);
-               return -1;
+               return -EBADMSG;
        }
        return 0;
 }
 
-s32 i2c_smbus_write_quick(struct i2c_client *client, u8 value)
-{
-       return i2c_smbus_xfer(client->adapter,client->addr,client->flags,
-                             value,0,I2C_SMBUS_QUICK,NULL);
-}
-EXPORT_SYMBOL(i2c_smbus_write_quick);
-
+/**
+ * i2c_smbus_read_byte - SMBus "receive byte" protocol
+ * @client: Handle to slave device
+ *
+ * This executes the SMBus "receive byte" protocol, returning negative errno
+ * else the byte received from the device.
+ */
 s32 i2c_smbus_read_byte(struct i2c_client *client)
 {
        union i2c_smbus_data data;
-       if (i2c_smbus_xfer(client->adapter,client->addr,client->flags,
-                          I2C_SMBUS_READ,0,I2C_SMBUS_BYTE, &data))
-               return -1;
-       else
-               return data.byte;
+       int status;
+
+       status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+                               I2C_SMBUS_READ, 0,
+                               I2C_SMBUS_BYTE, &data);
+       return (status < 0) ? status : data.byte;
 }
 EXPORT_SYMBOL(i2c_smbus_read_byte);
 
+/**
+ * i2c_smbus_write_byte - SMBus "send byte" protocol
+ * @client: Handle to slave device
+ * @value: Byte to be sent
+ *
+ * This executes the SMBus "send byte" protocol, returning negative errno
+ * else zero on success.
+ */
 s32 i2c_smbus_write_byte(struct i2c_client *client, u8 value)
 {
        return i2c_smbus_xfer(client->adapter,client->addr,client->flags,
@@ -1325,17 +1583,35 @@ s32 i2c_smbus_write_byte(struct i2c_client *client, u8 value)
 }
 EXPORT_SYMBOL(i2c_smbus_write_byte);
 
+/**
+ * i2c_smbus_read_byte_data - SMBus "read byte" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ *
+ * This executes the SMBus "read byte" protocol, returning negative errno
+ * else a data byte received from the device.
+ */
 s32 i2c_smbus_read_byte_data(struct i2c_client *client, u8 command)
 {
        union i2c_smbus_data data;
-       if (i2c_smbus_xfer(client->adapter,client->addr,client->flags,
-                          I2C_SMBUS_READ,command, I2C_SMBUS_BYTE_DATA,&data))
-               return -1;
-       else
-               return data.byte;
+       int status;
+
+       status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+                               I2C_SMBUS_READ, command,
+                               I2C_SMBUS_BYTE_DATA, &data);
+       return (status < 0) ? status : data.byte;
 }
 EXPORT_SYMBOL(i2c_smbus_read_byte_data);
 
+/**
+ * i2c_smbus_write_byte_data - SMBus "write byte" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ * @value: Byte being written
+ *
+ * This executes the SMBus "write byte" protocol, returning negative errno
+ * else zero on success.
+ */
 s32 i2c_smbus_write_byte_data(struct i2c_client *client, u8 command, u8 value)
 {
        union i2c_smbus_data data;
@@ -1346,17 +1622,35 @@ s32 i2c_smbus_write_byte_data(struct i2c_client *client, u8 command, u8 value)
 }
 EXPORT_SYMBOL(i2c_smbus_write_byte_data);
 
+/**
+ * i2c_smbus_read_word_data - SMBus "read word" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ *
+ * This executes the SMBus "read word" protocol, returning negative errno
+ * else a 16-bit unsigned "word" received from the device.
+ */
 s32 i2c_smbus_read_word_data(struct i2c_client *client, u8 command)
 {
        union i2c_smbus_data data;
-       if (i2c_smbus_xfer(client->adapter,client->addr,client->flags,
-                          I2C_SMBUS_READ,command, I2C_SMBUS_WORD_DATA, &data))
-               return -1;
-       else
-               return data.word;
+       int status;
+
+       status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+                               I2C_SMBUS_READ, command,
+                               I2C_SMBUS_WORD_DATA, &data);
+       return (status < 0) ? status : data.word;
 }
 EXPORT_SYMBOL(i2c_smbus_read_word_data);
 
+/**
+ * i2c_smbus_write_word_data - SMBus "write word" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ * @value: 16-bit "word" being written
+ *
+ * This executes the SMBus "write word" protocol, returning negative errno
+ * else zero on success.
+ */
 s32 i2c_smbus_write_word_data(struct i2c_client *client, u8 command, u16 value)
 {
        union i2c_smbus_data data;
@@ -1368,15 +1662,14 @@ s32 i2c_smbus_write_word_data(struct i2c_client *client, u8 command, u16 value)
 EXPORT_SYMBOL(i2c_smbus_write_word_data);
 
 /**
- * i2c_smbus_read_block_data - SMBus block read request
+ * i2c_smbus_read_block_data - SMBus "block read" protocol
  * @client: Handle to slave device
- * @command: Command byte issued to let the slave know what data should
- *     be returned
+ * @command: Byte interpreted by slave
  * @values: Byte array into which data will be read; big enough to hold
  *     the data returned by the slave.  SMBus allows at most 32 bytes.
  *
- * Returns the number of bytes read in the slave's response, else a
- * negative number to indicate some kind of error.
+ * This executes the SMBus "block read" protocol, returning negative errno
+ * else the number of data bytes in the slave's response.
  *
  * Note that using this function requires that the client's adapter support
  * the I2C_FUNC_SMBUS_READ_BLOCK_DATA functionality.  Not all adapter drivers
@@ -1387,17 +1680,29 @@ s32 i2c_smbus_read_block_data(struct i2c_client *client, u8 command,
                              u8 *values)
 {
        union i2c_smbus_data data;
+       int status;
 
-       if (i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-                          I2C_SMBUS_READ, command,
-                          I2C_SMBUS_BLOCK_DATA, &data))
-               return -1;
+       status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+                               I2C_SMBUS_READ, command,
+                               I2C_SMBUS_BLOCK_DATA, &data);
+       if (status)
+               return status;
 
        memcpy(values, &data.block[1], data.block[0]);
        return data.block[0];
 }
 EXPORT_SYMBOL(i2c_smbus_read_block_data);
 
+/**
+ * i2c_smbus_write_block_data - SMBus "block write" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ * @length: Size of data block; SMBus allows at most 32 bytes
+ * @values: Byte array which will be written.
+ *
+ * This executes the SMBus "block write" protocol, returning negative errno
+ * else zero on success.
+ */
 s32 i2c_smbus_write_block_data(struct i2c_client *client, u8 command,
                               u8 length, const u8 *values)
 {
@@ -1418,14 +1723,16 @@ s32 i2c_smbus_read_i2c_block_data(struct i2c_client *client, u8 command,
                                  u8 length, u8 *values)
 {
        union i2c_smbus_data data;
+       int status;
 
        if (length > I2C_SMBUS_BLOCK_MAX)
                length = I2C_SMBUS_BLOCK_MAX;
        data.block[0] = length;
-       if (i2c_smbus_xfer(client->adapter,client->addr,client->flags,
-                             I2C_SMBUS_READ,command,
-                             I2C_SMBUS_I2C_BLOCK_DATA,&data))
-               return -1;
+       status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+                               I2C_SMBUS_READ, command,
+                               I2C_SMBUS_I2C_BLOCK_DATA, &data);
+       if (status < 0)
+               return status;
 
        memcpy(values, &data.block[1], data.block[0]);
        return data.block[0];
@@ -1466,6 +1773,7 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter * adapter, u16 addr,
                                };
        int i;
        u8 partial_pec = 0;
+       int status;
 
        msgbuf0[0] = command;
        switch(size) {
@@ -1515,10 +1823,10 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter * adapter, u16 addr,
                } else {
                        msg[0].len = data->block[0] + 2;
                        if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 2) {
-                               dev_err(&adapter->dev, "smbus_access called with "
-                                      "invalid block write size (%d)\n",
-                                      data->block[0]);
-                               return -1;
+                               dev_err(&adapter->dev,
+                                       "Invalid block write size %d\n",
+                                       data->block[0]);
+                               return -EINVAL;
                        }
                        for (i = 1; i < msg[0].len; i++)
                                msgbuf0[i] = data->block[i-1];
@@ -1528,10 +1836,10 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter * adapter, u16 addr,
                num = 2; /* Another special case */
                read_write = I2C_SMBUS_READ;
                if (data->block[0] > I2C_SMBUS_BLOCK_MAX) {
-                       dev_err(&adapter->dev, "%s called with invalid "
-                               "block proc call size (%d)\n", __func__,
+                       dev_err(&adapter->dev,
+                               "Invalid block write size %d\n",
                                data->block[0]);
-                       return -1;
+                       return -EINVAL;
                }
                msg[0].len = data->block[0] + 2;
                for (i = 1; i < msg[0].len; i++)
@@ -1546,19 +1854,18 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter * adapter, u16 addr,
                } else {
                        msg[0].len = data->block[0] + 1;
                        if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 1) {
-                               dev_err(&adapter->dev, "i2c_smbus_xfer_emulated called with "
-                                      "invalid block write size (%d)\n",
-                                      data->block[0]);
-                               return -1;
+                               dev_err(&adapter->dev,
+                                       "Invalid block write size %d\n",
+                                       data->block[0]);
+                               return -EINVAL;
                        }
                        for (i = 1; i <= data->block[0]; i++)
                                msgbuf0[i] = data->block[i];
                }
                break;
        default:
-               dev_err(&adapter->dev, "smbus_access called with invalid size (%d)\n",
-                      size);
-               return -1;
+               dev_err(&adapter->dev, "Unsupported transaction %d\n", size);
+               return -EOPNOTSUPP;
        }
 
        i = ((flags & I2C_CLIENT_PEC) && size != I2C_SMBUS_QUICK
@@ -1576,13 +1883,15 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter * adapter, u16 addr,
                        msg[num-1].len++;
        }
 
-       if (i2c_transfer(adapter, msg, num) < 0)
-               return -1;
+       status = i2c_transfer(adapter, msg, num);
+       if (status < 0)
+               return status;
 
        /* Check PEC if last message is a read */
        if (i && (msg[num-1].flags & I2C_M_RD)) {
-               if (i2c_smbus_check_pec(partial_pec, &msg[num-1]) < 0)
-                       return -1;
+               status = i2c_smbus_check_pec(partial_pec, &msg[num-1]);
+               if (status < 0)
+                       return status;
        }
 
        if (read_write == I2C_SMBUS_READ)
@@ -1610,9 +1919,21 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter * adapter, u16 addr,
        return 0;
 }
 
-
+/**
+ * i2c_smbus_xfer - execute SMBus protocol operations
+ * @adapter: Handle to I2C bus
+ * @addr: Address of SMBus slave on that bus
+ * @flags: I2C_CLIENT_* flags (usually zero or I2C_CLIENT_PEC)
+ * @read_write: I2C_SMBUS_READ or I2C_SMBUS_WRITE
+ * @command: Byte interpreted by slave, for protocols which use such bytes
+ * @protocol: SMBus protocol operation to execute, such as I2C_SMBUS_PROC_CALL
+ * @data: Data to be read or written
+ *
+ * This executes an SMBus protocol operation, and returns a negative
+ * errno code else zero on success.
+ */
 s32 i2c_smbus_xfer(struct i2c_adapter * adapter, u16 addr, unsigned short flags,
-                   char read_write, u8 command, int size,
+                  char read_write, u8 command, int protocol,
                    union i2c_smbus_data * data)
 {
        s32 res;
@@ -1622,11 +1943,11 @@ s32 i2c_smbus_xfer(struct i2c_adapter * adapter, u16 addr, unsigned short flags,
        if (adapter->algo->smbus_xfer) {
                mutex_lock(&adapter->bus_lock);
                res = adapter->algo->smbus_xfer(adapter,addr,flags,read_write,
-                                               command,size,data);
+                                               command, protocol, data);
                mutex_unlock(&adapter->bus_lock);
        } else
                res = i2c_smbus_xfer_emulated(adapter,addr,flags,read_write,
-                                             command,size,data);
+                                             command, protocol, data);
 
        return res;
 }
index 006a5857256a15b9cebab931d5c9489a62ed891d..86727fa8858fcf808a1a1373efe0ecc46a2e5678 100644 (file)
@@ -367,8 +367,7 @@ static noinline int i2cdev_ioctl_smbus(struct i2c_client *client,
        return res;
 }
 
-static int i2cdev_ioctl(struct inode *inode, struct file *file,
-               unsigned int cmd, unsigned long arg)
+static long i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
        struct i2c_client *client = (struct i2c_client *)file->private_data;
        unsigned long funcs;
@@ -497,7 +496,7 @@ static const struct file_operations i2cdev_fops = {
        .llseek         = no_llseek,
        .read           = i2cdev_read,
        .write          = i2cdev_write,
-       .ioctl          = i2cdev_ioctl,
+       .unlocked_ioctl = i2cdev_ioctl,
        .open           = i2cdev_open,
        .release        = i2cdev_release,
 };
@@ -559,19 +558,12 @@ static int i2cdev_detach_adapter(struct i2c_adapter *adap)
        return 0;
 }
 
-static int i2cdev_detach_client(struct i2c_client *client)
-{
-       return 0;
-}
-
 static struct i2c_driver i2cdev_driver = {
        .driver = {
                .name   = "dev_driver",
        },
-       .id             = I2C_DRIVERID_I2CDEV,
        .attach_adapter = i2cdev_attach_adapter,
        .detach_adapter = i2cdev_detach_adapter,
-       .detach_client  = i2cdev_detach_client,
 };
 
 /* ------------------------------------------------------------------------- */
index 1607536ff5fb3ec7da44b500157c1631703b3604..cf707c8f08d4c09028e926dd17e2d6335dec9a47 100644 (file)
@@ -98,6 +98,9 @@ if BLK_DEV_IDE
 
 comment "Please see Documentation/ide/ide.txt for help/info on IDE drives"
 
+config IDE_ATAPI
+       bool
+
 config BLK_DEV_IDE_SATA
        bool "Support for SATA (deprecated; conflicts with libata SATA driver)"
        default n
@@ -201,6 +204,7 @@ config BLK_DEV_IDECD_VERBOSE_ERRORS
 
 config BLK_DEV_IDETAPE
        tristate "Include IDE/ATAPI TAPE support"
+       select IDE_ATAPI
        help
          If you have an IDE tape drive using the ATAPI protocol, say Y.
          ATAPI is a newer protocol used by IDE tape and CD-ROM drives,
@@ -223,6 +227,7 @@ config BLK_DEV_IDETAPE
 
 config BLK_DEV_IDEFLOPPY
        tristate "Include IDE/ATAPI FLOPPY support"
+       select IDE_ATAPI
        ---help---
          If you have an IDE floppy drive which uses the ATAPI protocol,
          answer Y.  ATAPI is a newer protocol used by IDE CD-ROM/tape/floppy
@@ -246,6 +251,7 @@ config BLK_DEV_IDEFLOPPY
 config BLK_DEV_IDESCSI
        tristate "SCSI emulation support"
        depends on SCSI
+       select IDE_ATAPI
        ---help---
          WARNING: ide-scsi is no longer needed for cd writing applications!
          The 2.6 kernel supports direct writing to ide-cd, which eliminates
index f94b679b611e127a92d924d925d172e213c0c43d..a2b3f84d710d3a7c22e152b731ea4c0f3b9f71f5 100644 (file)
@@ -14,6 +14,7 @@ EXTRA_CFLAGS                          += -Idrivers/ide
 ide-core-y += ide.o ide-io.o ide-iops.o ide-lib.o ide-probe.o ide-taskfile.o
 
 # core IDE code
+ide-core-$(CONFIG_IDE_ATAPI)           += ide-atapi.o
 ide-core-$(CONFIG_BLK_DEV_IDEPCI)      += setup-pci.o
 ide-core-$(CONFIG_BLK_DEV_IDEDMA)      += ide-dma.o
 ide-core-$(CONFIG_IDE_PROC_FS)         += ide-proc.o
index 2f2b4f4cf229f9e046b3dec3b83e367fd0038efd..3839f5722985128bcce2ded6ef7ba1dd15597560 100644 (file)
@@ -83,7 +83,7 @@ static const struct palm_bk3710_udmatiming palm_bk3710_udmatimings[6] = {
        {125, 160},             /* UDMA Mode 1 */
        {100, 120},             /* UDMA Mode 2 */
        {100, 90},              /* UDMA Mode 3 */
-       {85,  60},              /* UDMA Mode 4 */
+       {100, 60},              /* UDMA Mode 4 */
 };
 
 static void palm_bk3710_setudmamode(void __iomem *base, unsigned int dev,
@@ -405,7 +405,6 @@ static int __devinit palm_bk3710_probe(struct platform_device *pdev)
        ide_init_port_data(hwif, i);
        ide_init_port_hw(hwif, &hw);
 
-       hwif->mmio = 1;
        default_hwif_mmiops(hwif);
 
        idx[0] = i;
index ecf53bb0d2aa2941da54aa772485daa0bfb3a616..ae37ee58bae27a474f80fc3e72edbafc6528cad2 100644 (file)
@@ -52,8 +52,6 @@ static void h8300_tf_load(ide_drive_t *drive, ide_task_t *task)
        if (task->tf_flags & IDE_TFLAG_FLAGGED)
                HIHI = 0xFF;
 
-       ide_set_irq(drive, 1);
-
        if (task->tf_flags & IDE_TFLAG_OUT_DATA)
                mm_outw((tf->hob_data << 8) | tf->data, io_ports->data_addr);
 
@@ -98,7 +96,7 @@ static void h8300_tf_read(ide_drive_t *drive, ide_task_t *task)
        }
 
        /* be sure we're looking at the low order bits */
-       outb(drive->ctl & ~0x80, io_ports->ctl_addr);
+       outb(ATA_DEVCTL_OBS & ~0x80, io_ports->ctl_addr);
 
        if (task->tf_flags & IDE_TFLAG_IN_NSECT)
                tf->nsect  = inb(io_ports->nsect_addr);
@@ -112,7 +110,7 @@ static void h8300_tf_read(ide_drive_t *drive, ide_task_t *task)
                tf->device = inb(io_ports->device_addr);
 
        if (task->tf_flags & IDE_TFLAG_LBA48) {
-               outb(drive->ctl | 0x80, io_ports->ctl_addr);
+               outb(ATA_DEVCTL_OBS | 0x80, io_ports->ctl_addr);
 
                if (task->tf_flags & IDE_TFLAG_IN_HOB_FEATURE)
                        tf->hob_feature = inb(io_ports->feature_addr);
index 9d3601fa56803c01c9870f5fd0ab12b196ff5637..6f704628c27d22a08ac6a7171f08e7706d4f5fcf 100644 (file)
@@ -60,15 +60,15 @@ struct ide_acpi_hwif_link {
 #define DEBPRINT(fmt, args...) do {} while (0)
 #endif /* DEBUGGING */
 
-int ide_noacpi;
+static int ide_noacpi;
 module_param_named(noacpi, ide_noacpi, bool, 0);
 MODULE_PARM_DESC(noacpi, "disable IDE ACPI support");
 
-int ide_acpigtf;
+static int ide_acpigtf;
 module_param_named(acpigtf, ide_acpigtf, bool, 0);
 MODULE_PARM_DESC(acpigtf, "enable IDE ACPI _GTF support");
 
-int ide_acpionboot;
+static int ide_acpionboot;
 module_param_named(acpionboot, ide_acpionboot, bool, 0);
 MODULE_PARM_DESC(acpionboot, "call IDE ACPI methods on boot");
 
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
new file mode 100644 (file)
index 0000000..2802031
--- /dev/null
@@ -0,0 +1,296 @@
+/*
+ * ATAPI support.
+ */
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/ide.h>
+#include <scsi/scsi.h>
+
+#ifdef DEBUG
+#define debug_log(fmt, args...) \
+       printk(KERN_INFO "ide: " fmt, ## args)
+#else
+#define debug_log(fmt, args...) do {} while (0)
+#endif
+
+/* TODO: unify the code thus making some arguments go away */
+ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc,
+       ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry,
+       void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *),
+       void (*retry_pc)(ide_drive_t *), void (*dsc_handle)(ide_drive_t *),
+       void (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned, int))
+{
+       ide_hwif_t *hwif = drive->hwif;
+       xfer_func_t *xferfunc;
+       unsigned int temp;
+       u16 bcount;
+       u8 stat, ireason, scsi = drive->scsi;
+
+       debug_log("Enter %s - interrupt handler\n", __func__);
+
+       if (pc->flags & PC_FLAG_TIMEDOUT) {
+               pc->callback(drive);
+               return ide_stopped;
+       }
+
+       /* Clear the interrupt */
+       stat = ide_read_status(drive);
+
+       if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
+               if (hwif->dma_ops->dma_end(drive) ||
+                   (drive->media == ide_tape && !scsi && (stat & ERR_STAT))) {
+                       if (drive->media == ide_floppy && !scsi)
+                               printk(KERN_ERR "%s: DMA %s error\n",
+                                       drive->name, rq_data_dir(pc->rq)
+                                                    ? "write" : "read");
+                       pc->flags |= PC_FLAG_DMA_ERROR;
+               } else {
+                       pc->xferred = pc->req_xfer;
+                       if (update_buffers)
+                               update_buffers(drive, pc);
+               }
+               debug_log("%s: DMA finished\n", drive->name);
+       }
+
+       /* No more interrupts */
+       if ((stat & DRQ_STAT) == 0) {
+               debug_log("Packet command completed, %d bytes transferred\n",
+                         pc->xferred);
+
+               pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
+
+               local_irq_enable_in_hardirq();
+
+               if (drive->media == ide_tape && !scsi &&
+                   (stat & ERR_STAT) && pc->c[0] == REQUEST_SENSE)
+                       stat &= ~ERR_STAT;
+               if ((stat & ERR_STAT) || (pc->flags & PC_FLAG_DMA_ERROR)) {
+                       /* Error detected */
+                       debug_log("%s: I/O error\n", drive->name);
+
+                       if (drive->media != ide_tape || scsi) {
+                               pc->rq->errors++;
+                               if (scsi)
+                                       goto cmd_finished;
+                       }
+
+                       if (pc->c[0] == REQUEST_SENSE) {
+                               printk(KERN_ERR "%s: I/O error in request sense"
+                                               " command\n", drive->name);
+                               return ide_do_reset(drive);
+                       }
+
+                       debug_log("[cmd %x]: check condition\n", pc->c[0]);
+
+                       /* Retry operation */
+                       retry_pc(drive);
+                       /* queued, but not started */
+                       return ide_stopped;
+               }
+cmd_finished:
+               pc->error = 0;
+               if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) &&
+                   (stat & SEEK_STAT) == 0) {
+                       dsc_handle(drive);
+                       return ide_stopped;
+               }
+               /* Command finished - Call the callback function */
+               pc->callback(drive);
+               return ide_stopped;
+       }
+
+       if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
+               pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
+               printk(KERN_ERR "%s: The device wants to issue more interrupts "
+                               "in DMA mode\n", drive->name);
+               ide_dma_off(drive);
+               return ide_do_reset(drive);
+       }
+       /* Get the number of bytes to transfer on this interrupt. */
+       bcount = (hwif->INB(hwif->io_ports.lbah_addr) << 8) |
+                 hwif->INB(hwif->io_ports.lbam_addr);
+
+       ireason = hwif->INB(hwif->io_ports.nsect_addr);
+
+       if (ireason & CD) {
+               printk(KERN_ERR "%s: CoD != 0 in %s\n", drive->name, __func__);
+               return ide_do_reset(drive);
+       }
+       if (((ireason & IO) == IO) == !!(pc->flags & PC_FLAG_WRITING)) {
+               /* Hopefully, we will never get here */
+               printk(KERN_ERR "%s: We wanted to %s, but the device wants us "
+                               "to %s!\n", drive->name,
+                               (ireason & IO) ? "Write" : "Read",
+                               (ireason & IO) ? "Read" : "Write");
+               return ide_do_reset(drive);
+       }
+       if (!(pc->flags & PC_FLAG_WRITING)) {
+               /* Reading - Check that we have enough space */
+               temp = pc->xferred + bcount;
+               if (temp > pc->req_xfer) {
+                       if (temp > pc->buf_size) {
+                               printk(KERN_ERR "%s: The device wants to send "
+                                               "us more data than expected - "
+                                               "discarding data\n",
+                                               drive->name);
+                               if (scsi)
+                                       temp = pc->buf_size - pc->xferred;
+                               else
+                                       temp = 0;
+                               if (temp) {
+                                       if (pc->sg)
+                                               io_buffers(drive, pc, temp, 0);
+                                       else
+                                               hwif->input_data(drive, NULL,
+                                                       pc->cur_pos, temp);
+                                       printk(KERN_ERR "%s: transferred %d of "
+                                                       "%d bytes\n",
+                                                       drive->name,
+                                                       temp, bcount);
+                               }
+                               pc->xferred += temp;
+                               pc->cur_pos += temp;
+                               ide_pad_transfer(drive, 0, bcount - temp);
+                               ide_set_handler(drive, handler, timeout,
+                                               expiry);
+                               return ide_started;
+                       }
+                       debug_log("The device wants to send us more data than "
+                                 "expected - allowing transfer\n");
+               }
+               xferfunc = hwif->input_data;
+       } else
+               xferfunc = hwif->output_data;
+
+       if ((drive->media == ide_floppy && !scsi && !pc->buf) ||
+           (drive->media == ide_tape && !scsi && pc->bh) ||
+           (scsi && pc->sg))
+               io_buffers(drive, pc, bcount, !!(pc->flags & PC_FLAG_WRITING));
+       else
+               xferfunc(drive, NULL, pc->cur_pos, bcount);
+
+       /* Update the current position */
+       pc->xferred += bcount;
+       pc->cur_pos += bcount;
+
+       debug_log("[cmd %x] transferred %d bytes on that intr.\n",
+                 pc->c[0], bcount);
+
+       /* And set the interrupt handler again */
+       ide_set_handler(drive, handler, timeout, expiry);
+       return ide_started;
+}
+EXPORT_SYMBOL_GPL(ide_pc_intr);
+
+static u8 ide_wait_ireason(ide_drive_t *drive, u8 ireason)
+{
+       ide_hwif_t *hwif = drive->hwif;
+       int retries = 100;
+
+       while (retries-- && ((ireason & CD) == 0 || (ireason & IO))) {
+               printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing "
+                               "a packet command, retrying\n", drive->name);
+               udelay(100);
+               ireason = hwif->INB(hwif->io_ports.nsect_addr);
+               if (retries == 0) {
+                       printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing "
+                                       "a packet command, ignoring\n",
+                                       drive->name);
+                       ireason |= CD;
+                       ireason &= ~IO;
+               }
+       }
+
+       return ireason;
+}
+
+ide_startstop_t ide_transfer_pc(ide_drive_t *drive, struct ide_atapi_pc *pc,
+                               ide_handler_t *handler, unsigned int timeout,
+                               ide_expiry_t *expiry)
+{
+       ide_hwif_t *hwif = drive->hwif;
+       ide_startstop_t startstop;
+       u8 ireason;
+
+       if (ide_wait_stat(&startstop, drive, DRQ_STAT, BUSY_STAT, WAIT_READY)) {
+               printk(KERN_ERR "%s: Strange, packet command initiated yet "
+                               "DRQ isn't asserted\n", drive->name);
+               return startstop;
+       }
+
+       ireason = hwif->INB(hwif->io_ports.nsect_addr);
+       if (drive->media == ide_tape && !drive->scsi)
+               ireason = ide_wait_ireason(drive, ireason);
+
+       if ((ireason & CD) == 0 || (ireason & IO)) {
+               printk(KERN_ERR "%s: (IO,CoD) != (0,1) while issuing "
+                               "a packet command\n", drive->name);
+               return ide_do_reset(drive);
+       }
+
+       /* Set the interrupt routine */
+       ide_set_handler(drive, handler, timeout, expiry);
+
+       /* Begin DMA, if necessary */
+       if (pc->flags & PC_FLAG_DMA_OK) {
+               pc->flags |= PC_FLAG_DMA_IN_PROGRESS;
+               hwif->dma_ops->dma_start(drive);
+       }
+
+       /* Send the actual packet */
+       if ((pc->flags & PC_FLAG_ZIP_DRIVE) == 0)
+               hwif->output_data(drive, NULL, pc->c, 12);
+
+       return ide_started;
+}
+EXPORT_SYMBOL_GPL(ide_transfer_pc);
+
+ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_atapi_pc *pc,
+                            ide_handler_t *handler, unsigned int timeout,
+                            ide_expiry_t *expiry)
+{
+       ide_hwif_t *hwif = drive->hwif;
+       u16 bcount;
+       u8 dma = 0;
+
+       /* We haven't transferred any data yet */
+       pc->xferred = 0;
+       pc->cur_pos = pc->buf;
+
+       /* Request to transfer the entire buffer at once */
+       if (drive->media == ide_tape && !drive->scsi)
+               bcount = pc->req_xfer;
+       else
+               bcount = min(pc->req_xfer, 63 * 1024);
+
+       if (pc->flags & PC_FLAG_DMA_ERROR) {
+               pc->flags &= ~PC_FLAG_DMA_ERROR;
+               ide_dma_off(drive);
+       }
+
+       if ((pc->flags & PC_FLAG_DMA_OK) && drive->using_dma) {
+               if (drive->scsi)
+                       hwif->sg_mapped = 1;
+               dma = !hwif->dma_ops->dma_setup(drive);
+               if (drive->scsi)
+                       hwif->sg_mapped = 0;
+       }
+
+       if (!dma)
+               pc->flags &= ~PC_FLAG_DMA_OK;
+
+       ide_pktcmd_tf_load(drive, drive->scsi ? 0 : IDE_TFLAG_OUT_DEVICE,
+                          bcount, dma);
+
+       /* Issue the packet command */
+       if (pc->flags & PC_FLAG_DRQ_INTERRUPT) {
+               ide_execute_command(drive, WIN_PACKETCMD, handler,
+                                   timeout, NULL);
+               return ide_started;
+       } else {
+               ide_execute_pkt_cmd(drive);
+               return (*handler)(drive);
+       }
+}
+EXPORT_SYMBOL_GPL(ide_issue_pc);
index 68e7f19dc03662b405363ca23bf0081db73baa4a..d99847157186e9669efd3f56bfdb5eac54de7042 100644 (file)
@@ -188,16 +188,6 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
        ide_cd_log_error(drive->name, failed_command, sense);
 }
 
-/* Initialize a ide-cd packet command request */
-void ide_cd_init_rq(ide_drive_t *drive, struct request *rq)
-{
-       struct cdrom_info *cd = drive->driver_data;
-
-       ide_init_drive_cmd(rq);
-       rq->cmd_type = REQ_TYPE_ATA_PC;
-       rq->rq_disk = cd->disk;
-}
-
 static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
                                      struct request *failed_command)
 {
@@ -208,7 +198,9 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
                sense = &info->sense_data;
 
        /* stuff the sense request in front of our current request */
-       ide_cd_init_rq(drive, rq);
+       blk_rq_init(NULL, rq);
+       rq->cmd_type = REQ_TYPE_ATA_PC;
+       rq->rq_disk = info->disk;
 
        rq->data = sense;
        rq->cmd[0] = GPCMD_REQUEST_SENSE;
@@ -216,11 +208,12 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
        rq->data_len = 18;
 
        rq->cmd_type = REQ_TYPE_SENSE;
+       rq->cmd_flags |= REQ_PREEMPT;
 
        /* NOTE! Save the failed command in "rq->buffer" */
        rq->buffer = (void *) failed_command;
 
-       (void) ide_do_drive_cmd(drive, rq, ide_preempt);
+       ide_do_drive_cmd(drive, rq);
 }
 
 static void cdrom_end_request(ide_drive_t *drive, int uptodate)
@@ -537,8 +530,8 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive,
                info->dma = !hwif->dma_ops->dma_setup(drive);
 
        /* set up the controller registers */
-       ide_pktcmd_tf_load(drive, IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL |
-                          IDE_TFLAG_NO_SELECT_MASK, xferlen, info->dma);
+       ide_pktcmd_tf_load(drive, IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL,
+                          xferlen, info->dma);
 
        if (info->cd_flags & IDE_CD_FLAG_DRQ_INTERRUPT) {
                /* waiting for CDB interrupt, not DMA yet. */
@@ -838,34 +831,54 @@ static void ide_cd_request_sense_fixup(struct request *rq)
                }
 }
 
-int ide_cd_queue_pc(ide_drive_t *drive, struct request *rq)
+int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
+                   int write, void *buffer, unsigned *bufflen,
+                   struct request_sense *sense, int timeout,
+                   unsigned int cmd_flags)
 {
-       struct request_sense sense;
+       struct cdrom_info *info = drive->driver_data;
+       struct request_sense local_sense;
        int retries = 10;
-       unsigned int flags = rq->cmd_flags;
+       unsigned int flags = 0;
 
-       if (rq->sense == NULL)
-               rq->sense = &sense;
+       if (!sense)
+               sense = &local_sense;
 
        /* start of retry loop */
        do {
+               struct request *rq;
                int error;
-               unsigned long time = jiffies;
-               rq->cmd_flags = flags;
 
-               error = ide_do_drive_cmd(drive, rq, ide_wait);
-               time = jiffies - time;
+               rq = blk_get_request(drive->queue, write, __GFP_WAIT);
+
+               memcpy(rq->cmd, cmd, BLK_MAX_CDB);
+               rq->cmd_type = REQ_TYPE_ATA_PC;
+               rq->sense = sense;
+               rq->cmd_flags |= cmd_flags;
+               rq->timeout = timeout;
+               if (buffer) {
+                       rq->data = buffer;
+                       rq->data_len = *bufflen;
+               }
+
+               error = blk_execute_rq(drive->queue, info->disk, rq, 0);
+
+               if (buffer)
+                       *bufflen = rq->data_len;
+
+               flags = rq->cmd_flags;
+               blk_put_request(rq);
 
                /*
                 * FIXME: we should probably abort/retry or something in case of
                 * failure.
                 */
-               if (rq->cmd_flags & REQ_FAILED) {
+               if (flags & REQ_FAILED) {
                        /*
                         * The request failed.  Retry if it was due to a unit
                         * attention status (usually means media was changed).
                         */
-                       struct request_sense *reqbuf = rq->sense;
+                       struct request_sense *reqbuf = sense;
 
                        if (reqbuf->sense_key == UNIT_ATTENTION)
                                cdrom_saw_media_change(drive);
@@ -885,10 +898,10 @@ int ide_cd_queue_pc(ide_drive_t *drive, struct request *rq)
                }
 
                /* end of retry loop */
-       } while ((rq->cmd_flags & REQ_FAILED) && retries >= 0);
+       } while ((flags & REQ_FAILED) && retries >= 0);
 
        /* return an error if the command failed */
-       return (rq->cmd_flags & REQ_FAILED) ? -EIO : 0;
+       return (flags & REQ_FAILED) ? -EIO : 0;
 }
 
 /*
@@ -1268,23 +1281,20 @@ static void msf_from_bcd(struct atapi_msf *msf)
 
 int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense)
 {
-       struct request req;
        struct cdrom_info *info = drive->driver_data;
        struct cdrom_device_info *cdi = &info->devinfo;
+       unsigned char cmd[BLK_MAX_CDB];
 
-       ide_cd_init_rq(drive, &req);
-
-       req.sense = sense;
-       req.cmd[0] = GPCMD_TEST_UNIT_READY;
-       req.cmd_flags |= REQ_QUIET;
+       memset(cmd, 0, BLK_MAX_CDB);
+       cmd[0] = GPCMD_TEST_UNIT_READY;
 
        /*
         * Sanyo 3 CD changer uses byte 7 of TEST_UNIT_READY to switch CDs
         * instead of supporting the LOAD_UNLOAD opcode.
         */
-       req.cmd[7] = cdi->sanyo_slot % 3;
+       cmd[7] = cdi->sanyo_slot % 3;
 
-       return ide_cd_queue_pc(drive, &req);
+       return ide_cd_queue_pc(drive, cmd, 0, NULL, 0, sense, 0, REQ_QUIET);
 }
 
 static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
@@ -1297,17 +1307,14 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
        } capbuf;
 
        int stat;
-       struct request req;
-
-       ide_cd_init_rq(drive, &req);
+       unsigned char cmd[BLK_MAX_CDB];
+       unsigned len = sizeof(capbuf);
 
-       req.sense = sense;
-       req.cmd[0] = GPCMD_READ_CDVD_CAPACITY;
-       req.data = (char *)&capbuf;
-       req.data_len = sizeof(capbuf);
-       req.cmd_flags |= REQ_QUIET;
+       memset(cmd, 0, BLK_MAX_CDB);
+       cmd[0] = GPCMD_READ_CDVD_CAPACITY;
 
-       stat = ide_cd_queue_pc(drive, &req);
+       stat = ide_cd_queue_pc(drive, cmd, 0, &capbuf, &len, sense, 0,
+                              REQ_QUIET);
        if (stat == 0) {
                *capacity = 1 + be32_to_cpu(capbuf.lba);
                *sectors_per_frame =
@@ -1321,24 +1328,20 @@ static int cdrom_read_tocentry(ide_drive_t *drive, int trackno, int msf_flag,
                                int format, char *buf, int buflen,
                                struct request_sense *sense)
 {
-       struct request req;
+       unsigned char cmd[BLK_MAX_CDB];
 
-       ide_cd_init_rq(drive, &req);
+       memset(cmd, 0, BLK_MAX_CDB);
 
-       req.sense = sense;
-       req.data =  buf;
-       req.data_len = buflen;
-       req.cmd_flags |= REQ_QUIET;
-       req.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
-       req.cmd[6] = trackno;
-       req.cmd[7] = (buflen >> 8);
-       req.cmd[8] = (buflen & 0xff);
-       req.cmd[9] = (format << 6);
+       cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
+       cmd[6] = trackno;
+       cmd[7] = (buflen >> 8);
+       cmd[8] = (buflen & 0xff);
+       cmd[9] = (format << 6);
 
        if (msf_flag)
-               req.cmd[1] = 2;
+               cmd[1] = 2;
 
-       return ide_cd_queue_pc(drive, &req);
+       return ide_cd_queue_pc(drive, cmd, 0, buf, &buflen, sense, 0, REQ_QUIET);
 }
 
 /* Try to read the entire TOC for the disk into our internal buffer. */
@@ -2103,11 +2106,6 @@ static int ide_cd_probe(ide_drive_t *drive)
                        goto failed;
                }
        }
-       if (drive->scsi) {
-               printk(KERN_INFO "ide-cd: passing drive %s to ide-scsi "
-                                "emulation.\n", drive->name);
-               goto failed;
-       }
        info = kzalloc(sizeof(struct cdrom_info), GFP_KERNEL);
        if (info == NULL) {
                printk(KERN_ERR "%s: Can't allocate a cdrom structure\n",
index a58801c4484d309177afe8b7d32aa77c7ac7f91f..fe0ea36e4124d1c1ba1cc438d9c8538918122664 100644 (file)
@@ -143,8 +143,8 @@ struct cdrom_info {
 void ide_cd_log_error(const char *, struct request *, struct request_sense *);
 
 /* ide-cd.c functions used by ide-cd_ioctl.c */
-void ide_cd_init_rq(ide_drive_t *, struct request *);
-int ide_cd_queue_pc(ide_drive_t *, struct request *);
+int ide_cd_queue_pc(ide_drive_t *, const unsigned char *, int, void *,
+                   unsigned *, struct request_sense *, int, unsigned int);
 int ide_cd_read_toc(ide_drive_t *, struct request_sense *);
 int ide_cdrom_get_capabilities(ide_drive_t *, u8 *);
 void ide_cdrom_update_speed(ide_drive_t *, u8 *);
index 6d147ce6782f17b8d8f1b2667891bc6dd3ff8076..24d002addf73d5c5b6b1e9af1dcad6ceed15dd7b 100644 (file)
@@ -104,8 +104,8 @@ int cdrom_eject(ide_drive_t *drive, int ejectflag,
 {
        struct cdrom_info *cd = drive->driver_data;
        struct cdrom_device_info *cdi = &cd->devinfo;
-       struct request req;
        char loej = 0x02;
+       unsigned char cmd[BLK_MAX_CDB];
 
        if ((cd->cd_flags & IDE_CD_FLAG_NO_EJECT) && !ejectflag)
                return -EDRIVE_CANT_DO_THIS;
@@ -114,17 +114,16 @@ int cdrom_eject(ide_drive_t *drive, int ejectflag,
        if ((cd->cd_flags & IDE_CD_FLAG_DOOR_LOCKED) && ejectflag)
                return 0;
 
-       ide_cd_init_rq(drive, &req);
-
        /* only tell drive to close tray if open, if it can do that */
        if (ejectflag && (cdi->mask & CDC_CLOSE_TRAY))
                loej = 0;
 
-       req.sense = sense;
-       req.cmd[0] = GPCMD_START_STOP_UNIT;
-       req.cmd[4] = loej | (ejectflag != 0);
+       memset(cmd, 0, BLK_MAX_CDB);
+
+       cmd[0] = GPCMD_START_STOP_UNIT;
+       cmd[4] = loej | (ejectflag != 0);
 
-       return ide_cd_queue_pc(drive, &req);
+       return ide_cd_queue_pc(drive, cmd, 0, NULL, 0, sense, 0, 0);
 }
 
 /* Lock the door if LOCKFLAG is nonzero; unlock it otherwise. */
@@ -134,7 +133,6 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag,
 {
        struct cdrom_info *cd = drive->driver_data;
        struct request_sense my_sense;
-       struct request req;
        int stat;
 
        if (sense == NULL)
@@ -144,11 +142,15 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag,
        if (cd->cd_flags & IDE_CD_FLAG_NO_DOORLOCK) {
                stat = 0;
        } else {
-               ide_cd_init_rq(drive, &req);
-               req.sense = sense;
-               req.cmd[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL;
-               req.cmd[4] = lockflag ? 1 : 0;
-               stat = ide_cd_queue_pc(drive, &req);
+               unsigned char cmd[BLK_MAX_CDB];
+
+               memset(cmd, 0, BLK_MAX_CDB);
+
+               cmd[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL;
+               cmd[4] = lockflag ? 1 : 0;
+
+               stat = ide_cd_queue_pc(drive, cmd, 0, NULL, 0,
+                                      sense, 0, 0);
        }
 
        /* If we got an illegal field error, the drive
@@ -206,32 +208,30 @@ int ide_cdrom_select_speed(struct cdrom_device_info *cdi, int speed)
 {
        ide_drive_t *drive = cdi->handle;
        struct cdrom_info *cd = drive->driver_data;
-       struct request rq;
        struct request_sense sense;
        u8 buf[ATAPI_CAPABILITIES_PAGE_SIZE];
        int stat;
-
-       ide_cd_init_rq(drive, &rq);
-
-       rq.sense = &sense;
+       unsigned char cmd[BLK_MAX_CDB];
 
        if (speed == 0)
                speed = 0xffff; /* set to max */
        else
                speed *= 177;   /* Nx to kbytes/s */
 
-       rq.cmd[0] = GPCMD_SET_SPEED;
+       memset(cmd, 0, BLK_MAX_CDB);
+
+       cmd[0] = GPCMD_SET_SPEED;
        /* Read Drive speed in kbytes/second MSB/LSB */
-       rq.cmd[2] = (speed >> 8) & 0xff;
-       rq.cmd[3] = speed & 0xff;
+       cmd[2] = (speed >> 8) & 0xff;
+       cmd[3] = speed & 0xff;
        if ((cdi->mask & (CDC_CD_R | CDC_CD_RW | CDC_DVD_R)) !=
            (CDC_CD_R | CDC_CD_RW | CDC_DVD_R)) {
                /* Write Drive speed in kbytes/second MSB/LSB */
-               rq.cmd[4] = (speed >> 8) & 0xff;
-               rq.cmd[5] = speed & 0xff;
+               cmd[4] = (speed >> 8) & 0xff;
+               cmd[5] = speed & 0xff;
        }
 
-       stat = ide_cd_queue_pc(drive, &rq);
+       stat = ide_cd_queue_pc(drive, cmd, 0, NULL, 0, &sense, 0, 0);
 
        if (!ide_cdrom_get_capabilities(drive, buf)) {
                ide_cdrom_update_speed(drive, buf);
@@ -268,21 +268,19 @@ int ide_cdrom_get_mcn(struct cdrom_device_info *cdi,
 {
        ide_drive_t *drive = cdi->handle;
        int stat, mcnlen;
-       struct request rq;
        char buf[24];
+       unsigned char cmd[BLK_MAX_CDB];
+       unsigned len = sizeof(buf);
 
-       ide_cd_init_rq(drive, &rq);
+       memset(cmd, 0, BLK_MAX_CDB);
 
-       rq.data = buf;
-       rq.data_len = sizeof(buf);
+       cmd[0] = GPCMD_READ_SUBCHANNEL;
+       cmd[1] = 2;             /* MSF addressing */
+       cmd[2] = 0x40;  /* request subQ data */
+       cmd[3] = 2;             /* format */
+       cmd[8] = len;
 
-       rq.cmd[0] = GPCMD_READ_SUBCHANNEL;
-       rq.cmd[1] = 2;          /* MSF addressing */
-       rq.cmd[2] = 0x40;       /* request subQ data */
-       rq.cmd[3] = 2;          /* format */
-       rq.cmd[8] = sizeof(buf);
-
-       stat = ide_cd_queue_pc(drive, &rq);
+       stat = ide_cd_queue_pc(drive, cmd, 0, buf, &len, NULL, 0, 0);
        if (stat)
                return stat;
 
@@ -298,14 +296,14 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
        ide_drive_t *drive = cdi->handle;
        struct cdrom_info *cd = drive->driver_data;
        struct request_sense sense;
-       struct request req;
+       struct request *rq;
        int ret;
 
-       ide_cd_init_rq(drive, &req);
-       req.cmd_type = REQ_TYPE_SPECIAL;
-       req.cmd_flags = REQ_QUIET;
-       ret = ide_do_drive_cmd(drive, &req, ide_wait);
-
+       rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+       rq->cmd_type = REQ_TYPE_SPECIAL;
+       rq->cmd_flags = REQ_QUIET;
+       ret = blk_execute_rq(drive->queue, cd->disk, rq, 0);
+       blk_put_request(rq);
        /*
         * A reset will unlock the door. If it was previously locked,
         * lock it again.
@@ -351,8 +349,8 @@ static int ide_cd_fake_play_trkind(ide_drive_t *drive, void *arg)
        struct atapi_toc_entry *first_toc, *last_toc;
        unsigned long lba_start, lba_end;
        int stat;
-       struct request rq;
        struct request_sense sense;
+       unsigned char cmd[BLK_MAX_CDB];
 
        stat = ide_cd_get_toc_entry(drive, ti->cdti_trk0, &first_toc);
        if (stat)
@@ -370,14 +368,13 @@ static int ide_cd_fake_play_trkind(ide_drive_t *drive, void *arg)
        if (lba_end <= lba_start)
                return -EINVAL;
 
-       ide_cd_init_rq(drive, &rq);
+       memset(cmd, 0, BLK_MAX_CDB);
 
-       rq.sense = &sense;
-       rq.cmd[0] = GPCMD_PLAY_AUDIO_MSF;
-       lba_to_msf(lba_start,   &rq.cmd[3], &rq.cmd[4], &rq.cmd[5]);
-       lba_to_msf(lba_end - 1, &rq.cmd[6], &rq.cmd[7], &rq.cmd[8]);
+       cmd[0] = GPCMD_PLAY_AUDIO_MSF;
+       lba_to_msf(lba_start,   &cmd[3], &cmd[4], &cmd[5]);
+       lba_to_msf(lba_end - 1, &cmd[6], &cmd[7], &cmd[8]);
 
-       return ide_cd_queue_pc(drive, &rq);
+       return ide_cd_queue_pc(drive, cmd, 0, NULL, 0, &sense, 0, 0);
 }
 
 static int ide_cd_read_tochdr(ide_drive_t *drive, void *arg)
@@ -447,8 +444,9 @@ int ide_cdrom_audio_ioctl(struct cdrom_device_info *cdi,
 int ide_cdrom_packet(struct cdrom_device_info *cdi,
                            struct packet_command *cgc)
 {
-       struct request req;
        ide_drive_t *drive = cdi->handle;
+       unsigned int flags = 0;
+       unsigned len = cgc->buflen;
 
        if (cgc->timeout <= 0)
                cgc->timeout = ATAPI_WAIT_PC;
@@ -456,24 +454,21 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi,
        /* here we queue the commands from the uniform CD-ROM
           layer. the packet must be complete, as we do not
           touch it at all. */
-       ide_cd_init_rq(drive, &req);
 
        if (cgc->data_direction == CGC_DATA_WRITE)
-               req.cmd_flags |= REQ_RW;
+               flags |= REQ_RW;
 
-       memcpy(req.cmd, cgc->cmd, CDROM_PACKET_SIZE);
        if (cgc->sense)
                memset(cgc->sense, 0, sizeof(struct request_sense));
-       req.data = cgc->buffer;
-       req.data_len = cgc->buflen;
-       req.timeout = cgc->timeout;
 
        if (cgc->quiet)
-               req.cmd_flags |= REQ_QUIET;
+               flags |= REQ_QUIET;
 
-       req.sense = cgc->sense;
-       cgc->stat = ide_cd_queue_pc(drive, &req);
+       cgc->stat = ide_cd_queue_pc(drive, cgc->cmd,
+                                   cgc->data_direction == CGC_DATA_WRITE,
+                                   cgc->buffer, &len,
+                                   cgc->sense, cgc->timeout, flags);
        if (!cgc->stat)
-               cgc->buflen -= req.data_len;
+               cgc->buflen -= len;
        return cgc->stat;
 }
index 8e08d083fce95cba5f091b0a1892f938f8dfba11..5f49a4ae9dd80f773ad913eaaa2adf91d6316682 100644 (file)
@@ -198,8 +198,7 @@ static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
        }
 
        memset(&task, 0, sizeof(task));
-       task.tf_flags = IDE_TFLAG_NO_SELECT_MASK;  /* FIXME? */
-       task.tf_flags |= (IDE_TFLAG_TF | IDE_TFLAG_DEVICE);
+       task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
 
        if (drive->select.b.lba) {
                if (lba48) {
@@ -617,7 +616,8 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
  */
 static int set_multcount(ide_drive_t *drive, int arg)
 {
-       struct request rq;
+       struct request *rq;
+       int error;
 
        if (arg < 0 || arg > drive->id->max_multsect)
                return -EINVAL;
@@ -625,12 +625,13 @@ static int set_multcount(ide_drive_t *drive, int arg)
        if (drive->special.b.set_multmode)
                return -EBUSY;
 
-       ide_init_drive_cmd(&rq);
-       rq.cmd_type = REQ_TYPE_ATA_TASKFILE;
+       rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+       rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 
        drive->mult_req = arg;
        drive->special.b.set_multmode = 1;
-       (void)ide_do_drive_cmd(drive, &rq, ide_wait);
+       error = blk_execute_rq(drive->queue, NULL, rq, 0);
+       blk_put_request(rq);
 
        return (drive->mult_count == arg) ? 0 : -EIO;
 }
index 653b1ade13d315950a5b22efc54d69244d203fe4..7ee44f86bc5475065a962f8b2380cd0b86158d1b 100644 (file)
@@ -463,7 +463,7 @@ int ide_dma_setup(ide_drive_t *drive)
        }
 
        /* PRD table */
-       if (hwif->mmio)
+       if (hwif->host_flags & IDE_HFLAG_MMIO)
                writel(hwif->dmatable_dma,
                       (void __iomem *)(hwif->dma_base + ATA_DMA_TABLE_OFS));
        else
@@ -692,7 +692,7 @@ static int ide_tune_dma(ide_drive_t *drive)
        ide_hwif_t *hwif = drive->hwif;
        u8 speed;
 
-       if (noautodma || drive->nodma || (drive->id->capability & 1) == 0)
+       if (drive->nodma || (drive->id->capability & 1) == 0)
                return 0;
 
        /* consult the list of known "bad" drives */
index f05fbc2bd7a89ac597c1261acfc143f6df21d173..b3689437269f0586ebdfdebfa3ca53c2cd83a182 100644 (file)
@@ -286,11 +286,12 @@ static void idefloppy_queue_pc_head(ide_drive_t *drive, struct ide_atapi_pc *pc,
 {
        struct ide_floppy_obj *floppy = drive->driver_data;
 
-       ide_init_drive_cmd(rq);
+       blk_rq_init(NULL, rq);
        rq->buffer = (char *) pc;
        rq->cmd_type = REQ_TYPE_SPECIAL;
+       rq->cmd_flags |= REQ_PREEMPT;
        rq->rq_disk = floppy->disk;
-       (void) ide_do_drive_cmd(drive, rq, ide_preempt);
+       ide_do_drive_cmd(drive, rq);
 }
 
 static struct ide_atapi_pc *idefloppy_next_pc_storage(ide_drive_t *drive)
@@ -311,50 +312,41 @@ static struct request *idefloppy_next_rq_storage(ide_drive_t *drive)
        return (&floppy->rq_stack[floppy->rq_stack_index++]);
 }
 
-static void idefloppy_request_sense_callback(ide_drive_t *drive)
+static void ide_floppy_callback(ide_drive_t *drive)
 {
        idefloppy_floppy_t *floppy = drive->driver_data;
-       u8 *buf = floppy->pc->buf;
+       struct ide_atapi_pc *pc = floppy->pc;
+       int uptodate = pc->error ? 0 : 1;
 
        debug_log("Reached %s\n", __func__);
 
-       if (!floppy->pc->error) {
-               floppy->sense_key = buf[2] & 0x0F;
-               floppy->asc = buf[12];
-               floppy->ascq = buf[13];
-               floppy->progress_indication = buf[15] & 0x80 ?
-                       (u16)get_unaligned((u16 *)&buf[16]) : 0x10000;
+       if (floppy->failed_pc == pc)
+               floppy->failed_pc = NULL;
 
-               if (floppy->failed_pc)
-                       debug_log("pc = %x, sense key = %x, asc = %x,"
-                                       " ascq = %x\n",
-                                       floppy->failed_pc->c[0],
-                                       floppy->sense_key,
-                                       floppy->asc,
-                                       floppy->ascq);
-               else
-                       debug_log("sense key = %x, asc = %x, ascq = %x\n",
-                                       floppy->sense_key,
-                                       floppy->asc,
-                                       floppy->ascq);
+       if (pc->c[0] == GPCMD_READ_10 || pc->c[0] == GPCMD_WRITE_10 ||
+           (pc->rq && blk_pc_request(pc->rq)))
+               uptodate = 1; /* FIXME */
+       else if (pc->c[0] == GPCMD_REQUEST_SENSE) {
+               u8 *buf = floppy->pc->buf;
 
+               if (!pc->error) {
+                       floppy->sense_key = buf[2] & 0x0F;
+                       floppy->asc = buf[12];
+                       floppy->ascq = buf[13];
+                       floppy->progress_indication = buf[15] & 0x80 ?
+                               (u16)get_unaligned((u16 *)&buf[16]) : 0x10000;
 
-               idefloppy_end_request(drive, 1, 0);
-       } else {
-               printk(KERN_ERR "Error in REQUEST SENSE itself - Aborting"
-                               " request!\n");
-               idefloppy_end_request(drive, 0, 0);
-       }
-}
+                       if (floppy->failed_pc)
+                               debug_log("pc = %x, ", floppy->failed_pc->c[0]);
 
-/* General packet command callback function. */
-static void idefloppy_pc_callback(ide_drive_t *drive)
-{
-       idefloppy_floppy_t *floppy = drive->driver_data;
-
-       debug_log("Reached %s\n", __func__);
+                       debug_log("sense key = %x, asc = %x, ascq = %x\n",
+                                 floppy->sense_key, floppy->asc, floppy->ascq);
+               } else
+                       printk(KERN_ERR "Error in REQUEST SENSE itself - "
+                                       "Aborting request!\n");
+       }
 
-       idefloppy_end_request(drive, floppy->pc->error ? 0 : 1, 0);
+       idefloppy_end_request(drive, uptodate, 0);
 }
 
 static void idefloppy_init_pc(struct ide_atapi_pc *pc)
@@ -365,7 +357,7 @@ static void idefloppy_init_pc(struct ide_atapi_pc *pc)
        pc->req_xfer = 0;
        pc->buf = pc->pc_buf;
        pc->buf_size = IDEFLOPPY_PC_BUFFER_SIZE;
-       pc->idefloppy_callback = &idefloppy_pc_callback;
+       pc->callback = ide_floppy_callback;
 }
 
 static void idefloppy_create_request_sense_cmd(struct ide_atapi_pc *pc)
@@ -374,7 +366,6 @@ static void idefloppy_create_request_sense_cmd(struct ide_atapi_pc *pc)
        pc->c[0] = GPCMD_REQUEST_SENSE;
        pc->c[4] = 255;
        pc->req_xfer = 18;
-       pc->idefloppy_callback = &idefloppy_request_sense_callback;
 }
 
 /*
@@ -397,174 +388,19 @@ static void idefloppy_retry_pc(ide_drive_t *drive)
 static ide_startstop_t idefloppy_pc_intr(ide_drive_t *drive)
 {
        idefloppy_floppy_t *floppy = drive->driver_data;
-       ide_hwif_t *hwif = drive->hwif;
-       struct ide_atapi_pc *pc = floppy->pc;
-       struct request *rq = pc->rq;
-       xfer_func_t *xferfunc;
-       unsigned int temp;
-       int dma_error = 0;
-       u16 bcount;
-       u8 stat, ireason;
-
-       debug_log("Reached %s interrupt handler\n", __func__);
-
-       if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
-               dma_error = hwif->dma_ops->dma_end(drive);
-               if (dma_error) {
-                       printk(KERN_ERR "%s: DMA %s error\n", drive->name,
-                                       rq_data_dir(rq) ? "write" : "read");
-                       pc->flags |= PC_FLAG_DMA_ERROR;
-               } else {
-                       pc->xferred = pc->req_xfer;
-                       idefloppy_update_buffers(drive, pc);
-               }
-               debug_log("DMA finished\n");
-       }
-
-       /* Clear the interrupt */
-       stat = ide_read_status(drive);
-
-       /* No more interrupts */
-       if ((stat & DRQ_STAT) == 0) {
-               debug_log("Packet command completed, %d bytes transferred\n",
-                               pc->xferred);
-               pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
-
-               local_irq_enable_in_hardirq();
-
-               if ((stat & ERR_STAT) || (pc->flags & PC_FLAG_DMA_ERROR)) {
-                       /* Error detected */
-                       debug_log("%s: I/O error\n", drive->name);
-                       rq->errors++;
-                       if (pc->c[0] == GPCMD_REQUEST_SENSE) {
-                               printk(KERN_ERR "ide-floppy: I/O error in "
-                                       "request sense command\n");
-                               return ide_do_reset(drive);
-                       }
-                       /* Retry operation */
-                       idefloppy_retry_pc(drive);
-                       /* queued, but not started */
-                       return ide_stopped;
-               }
-               pc->error = 0;
-               if (floppy->failed_pc == pc)
-                       floppy->failed_pc = NULL;
-               /* Command finished - Call the callback function */
-               pc->idefloppy_callback(drive);
-               return ide_stopped;
-       }
-
-       if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
-               pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
-               printk(KERN_ERR "ide-floppy: The floppy wants to issue "
-                       "more interrupts in DMA mode\n");
-               ide_dma_off(drive);
-               return ide_do_reset(drive);
-       }
-
-       /* Get the number of bytes to transfer */
-       bcount = (hwif->INB(hwif->io_ports.lbah_addr) << 8) |
-                 hwif->INB(hwif->io_ports.lbam_addr);
-       /* on this interrupt */
-       ireason = hwif->INB(hwif->io_ports.nsect_addr);
-
-       if (ireason & CD) {
-               printk(KERN_ERR "ide-floppy: CoD != 0 in %s\n", __func__);
-               return ide_do_reset(drive);
-       }
-       if (((ireason & IO) == IO) == !!(pc->flags & PC_FLAG_WRITING)) {
-               /* Hopefully, we will never get here */
-               printk(KERN_ERR "ide-floppy: We wanted to %s, ",
-                               (ireason & IO) ? "Write" : "Read");
-               printk(KERN_ERR "but the floppy wants us to %s !\n",
-                               (ireason & IO) ? "Read" : "Write");
-               return ide_do_reset(drive);
-       }
-       if (!(pc->flags & PC_FLAG_WRITING)) {
-               /* Reading - Check that we have enough space */
-               temp = pc->xferred + bcount;
-               if (temp > pc->req_xfer) {
-                       if (temp > pc->buf_size) {
-                               printk(KERN_ERR "ide-floppy: The floppy wants "
-                                       "to send us more data than expected "
-                                       "- discarding data\n");
-                               ide_pad_transfer(drive, 0, bcount);
-
-                               ide_set_handler(drive,
-                                               &idefloppy_pc_intr,
-                                               IDEFLOPPY_WAIT_CMD,
-                                               NULL);
-                               return ide_started;
-                       }
-                       debug_log("The floppy wants to send us more data than"
-                                       " expected - allowing transfer\n");
-               }
-       }
-       if (pc->flags & PC_FLAG_WRITING)
-               xferfunc = hwif->output_data;
-       else
-               xferfunc = hwif->input_data;
-
-       if (pc->buf)
-               xferfunc(drive, NULL, pc->cur_pos, bcount);
-       else
-               ide_floppy_io_buffers(drive, pc, bcount,
-                                     !!(pc->flags & PC_FLAG_WRITING));
-
-       /* Update the current position */
-       pc->xferred += bcount;
-       pc->cur_pos += bcount;
-
-       /* And set the interrupt handler again */
-       ide_set_handler(drive, &idefloppy_pc_intr, IDEFLOPPY_WAIT_CMD, NULL);
-       return ide_started;
-}
-
-/*
- * This is the original routine that did the packet transfer.
- * It fails at high speeds on the Iomega ZIP drive, so there's a slower version
- * for that drive below. The algorithm is chosen based on drive type
- */
-static ide_startstop_t idefloppy_transfer_pc(ide_drive_t *drive)
-{
-       ide_hwif_t *hwif = drive->hwif;
-       ide_startstop_t startstop;
-       idefloppy_floppy_t *floppy = drive->driver_data;
-       u8 ireason;
-
-       if (ide_wait_stat(&startstop, drive, DRQ_STAT, BUSY_STAT, WAIT_READY)) {
-               printk(KERN_ERR "ide-floppy: Strange, packet command "
-                               "initiated yet DRQ isn't asserted\n");
-               return startstop;
-       }
-       ireason = hwif->INB(hwif->io_ports.nsect_addr);
-       if ((ireason & CD) == 0 || (ireason & IO)) {
-               printk(KERN_ERR "ide-floppy: (IO,CoD) != (0,1) while "
-                               "issuing a packet command\n");
-               return ide_do_reset(drive);
-       }
 
-       /* Set the interrupt routine */
-       ide_set_handler(drive, &idefloppy_pc_intr, IDEFLOPPY_WAIT_CMD, NULL);
-
-       /* Send the actual packet */
-       hwif->output_data(drive, NULL, floppy->pc->c, 12);
-
-       return ide_started;
+       return ide_pc_intr(drive, floppy->pc, idefloppy_pc_intr,
+                          IDEFLOPPY_WAIT_CMD, NULL, idefloppy_update_buffers,
+                          idefloppy_retry_pc, NULL, ide_floppy_io_buffers);
 }
 
-
 /*
  * What we have here is a classic case of a top half / bottom half interrupt
  * service routine. In interrupt mode, the device sends an interrupt to signal
  * that it is ready to receive a packet. However, we need to delay about 2-3
  * ticks before issuing the packet or we gets in trouble.
- *
- * So, follow carefully. transfer_pc1 is called as an interrupt (or directly).
- * In either case, when the device says it's ready for a packet, we schedule
- * the packet transfer to occur about 2-3 ticks later in transfer_pc2.
  */
-static int idefloppy_transfer_pc2(ide_drive_t *drive)
+static int idefloppy_transfer_pc(ide_drive_t *drive)
 {
        idefloppy_floppy_t *floppy = drive->driver_data;
 
@@ -575,24 +411,19 @@ static int idefloppy_transfer_pc2(ide_drive_t *drive)
        return IDEFLOPPY_WAIT_CMD;
 }
 
-static ide_startstop_t idefloppy_transfer_pc1(ide_drive_t *drive)
+
+/*
+ * Called as an interrupt (or directly). When the device says it's ready for a
+ * packet, we schedule the packet transfer to occur about 2-3 ticks later in
+ * transfer_pc.
+ */
+static ide_startstop_t idefloppy_start_pc_transfer(ide_drive_t *drive)
 {
-       ide_hwif_t *hwif = drive->hwif;
        idefloppy_floppy_t *floppy = drive->driver_data;
-       ide_startstop_t startstop;
-       u8 ireason;
+       struct ide_atapi_pc *pc = floppy->pc;
+       ide_expiry_t *expiry;
+       unsigned int timeout;
 
-       if (ide_wait_stat(&startstop, drive, DRQ_STAT, BUSY_STAT, WAIT_READY)) {
-               printk(KERN_ERR "ide-floppy: Strange, packet command "
-                               "initiated yet DRQ isn't asserted\n");
-               return startstop;
-       }
-       ireason = hwif->INB(hwif->io_ports.nsect_addr);
-       if ((ireason & CD) == 0 || (ireason & IO)) {
-               printk(KERN_ERR "ide-floppy: (IO,CoD) != (0,1) "
-                               "while issuing a packet command\n");
-               return ide_do_reset(drive);
-       }
        /*
         * The following delay solves a problem with ATAPI Zip 100 drives
         * where the Busy flag was apparently being deasserted before the
@@ -601,10 +432,15 @@ static ide_startstop_t idefloppy_transfer_pc1(ide_drive_t *drive)
         * 40 and 50msec work well. idefloppy_pc_intr will not be actually
         * used until after the packet is moved in about 50 msec.
         */
+       if (pc->flags & PC_FLAG_ZIP_DRIVE) {
+               timeout = floppy->ticks;
+               expiry = &idefloppy_transfer_pc;
+       } else {
+               timeout = IDEFLOPPY_WAIT_CMD;
+               expiry = NULL;
+       }
 
-       ide_set_handler(drive, &idefloppy_pc_intr, floppy->ticks,
-                       &idefloppy_transfer_pc2);
-       return ide_started;
+       return ide_transfer_pc(drive, pc, idefloppy_pc_intr, timeout, expiry);
 }
 
 static void ide_floppy_report_error(idefloppy_floppy_t *floppy,
@@ -627,10 +463,6 @@ static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive,
                struct ide_atapi_pc *pc)
 {
        idefloppy_floppy_t *floppy = drive->driver_data;
-       ide_hwif_t *hwif = drive->hwif;
-       ide_handler_t *pkt_xfer_routine;
-       u16 bcount;
-       u8 dma;
 
        if (floppy->failed_pc == NULL &&
            pc->c[0] != GPCMD_REQUEST_SENSE)
@@ -645,65 +477,16 @@ static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive,
                pc->error = IDEFLOPPY_ERROR_GENERAL;
 
                floppy->failed_pc = NULL;
-               pc->idefloppy_callback(drive);
+               pc->callback(drive);
                return ide_stopped;
        }
 
        debug_log("Retry number - %d\n", pc->retries);
 
        pc->retries++;
-       /* We haven't transferred any data yet */
-       pc->xferred = 0;
-       pc->cur_pos = pc->buf;
-       bcount = min(pc->req_xfer, 63 * 1024);
-
-       if (pc->flags & PC_FLAG_DMA_ERROR) {
-               pc->flags &= ~PC_FLAG_DMA_ERROR;
-               ide_dma_off(drive);
-       }
-       dma = 0;
-
-       if ((pc->flags & PC_FLAG_DMA_RECOMMENDED) && drive->using_dma)
-               dma = !hwif->dma_ops->dma_setup(drive);
 
-       ide_pktcmd_tf_load(drive, IDE_TFLAG_NO_SELECT_MASK |
-                          IDE_TFLAG_OUT_DEVICE, bcount, dma);
-
-       if (dma) {
-               /* Begin DMA, if necessary */
-               pc->flags |= PC_FLAG_DMA_IN_PROGRESS;
-               hwif->dma_ops->dma_start(drive);
-       }
-
-       /* Can we transfer the packet when we get the interrupt or wait? */
-       if (floppy->flags & IDEFLOPPY_FLAG_ZIP_DRIVE) {
-               /* wait */
-               pkt_xfer_routine = &idefloppy_transfer_pc1;
-       } else {
-               /* immediate */
-               pkt_xfer_routine = &idefloppy_transfer_pc;
-       }
-
-       if (floppy->flags & IDEFLOPPY_FLAG_DRQ_INTERRUPT) {
-               /* Issue the packet command */
-               ide_execute_command(drive, WIN_PACKETCMD,
-                               pkt_xfer_routine,
-                               IDEFLOPPY_WAIT_CMD,
-                               NULL);
-               return ide_started;
-       } else {
-               /* Issue the packet command */
-               ide_execute_pkt_cmd(drive);
-               return (*pkt_xfer_routine) (drive);
-       }
-}
-
-static void idefloppy_rw_callback(ide_drive_t *drive)
-{
-       debug_log("Reached %s\n", __func__);
-
-       idefloppy_end_request(drive, 1, 0);
-       return;
+       return ide_issue_pc(drive, pc, idefloppy_start_pc_transfer,
+                           IDEFLOPPY_WAIT_CMD, NULL);
 }
 
 static void idefloppy_create_prevent_cmd(struct ide_atapi_pc *pc, int prevent)
@@ -800,21 +583,19 @@ static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy,
        put_unaligned(cpu_to_be16(blocks), (unsigned short *)&pc->c[7]);
        put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[2]);
 
-       pc->idefloppy_callback = &idefloppy_rw_callback;
        pc->rq = rq;
        pc->b_count = cmd == READ ? 0 : rq->bio->bi_size;
        if (rq->cmd_flags & REQ_RW)
                pc->flags |= PC_FLAG_WRITING;
        pc->buf = NULL;
        pc->req_xfer = pc->buf_size = blocks * floppy->block_size;
-       pc->flags |= PC_FLAG_DMA_RECOMMENDED;
+       pc->flags |= PC_FLAG_DMA_OK;
 }
 
 static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
                struct ide_atapi_pc *pc, struct request *rq)
 {
        idefloppy_init_pc(pc);
-       pc->idefloppy_callback = &idefloppy_rw_callback;
        memcpy(pc->c, rq->cmd, sizeof(pc->c));
        pc->rq = rq;
        pc->b_count = rq->data_len;
@@ -822,7 +603,7 @@ static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
                pc->flags |= PC_FLAG_WRITING;
        pc->buf = rq->data;
        if (rq->bio)
-               pc->flags |= PC_FLAG_DMA_RECOMMENDED;
+               pc->flags |= PC_FLAG_DMA_OK;
        /*
         * possibly problematic, doesn't look like ide-floppy correctly
         * handled scattered requests if dma fails...
@@ -875,7 +656,14 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
                return ide_stopped;
        }
 
+       if (floppy->flags & IDEFLOPPY_FLAG_DRQ_INTERRUPT)
+               pc->flags |= PC_FLAG_DRQ_INTERRUPT;
+
+       if (floppy->flags & IDEFLOPPY_FLAG_ZIP_DRIVE)
+               pc->flags |= PC_FLAG_ZIP_DRIVE;
+
        pc->rq = rq;
+
        return idefloppy_issue_pc(drive, pc);
 }
 
@@ -886,14 +674,16 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
 static int idefloppy_queue_pc_tail(ide_drive_t *drive, struct ide_atapi_pc *pc)
 {
        struct ide_floppy_obj *floppy = drive->driver_data;
-       struct request rq;
+       struct request *rq;
+       int error;
 
-       ide_init_drive_cmd(&rq);
-       rq.buffer = (char *) pc;
-       rq.cmd_type = REQ_TYPE_SPECIAL;
-       rq.rq_disk = floppy->disk;
+       rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+       rq->buffer = (char *) pc;
+       rq->cmd_type = REQ_TYPE_SPECIAL;
+       error = blk_execute_rq(drive->queue, floppy->disk, rq, 0);
+       blk_put_request(rq);
 
-       return ide_do_drive_cmd(drive, &rq, ide_wait);
+       return error;
 }
 
 /*
@@ -1622,11 +1412,6 @@ static int ide_floppy_probe(ide_drive_t *drive)
                                " of ide-floppy\n", drive->name);
                goto failed;
        }
-       if (drive->scsi) {
-               printk(KERN_INFO "ide-floppy: passing drive %s to ide-scsi"
-                               " emulation.\n", drive->name);
-               goto failed;
-       }
        floppy = kzalloc(sizeof(idefloppy_floppy_t), GFP_KERNEL);
        if (!floppy) {
                printk(KERN_ERR "ide-floppy: %s: Can't allocate a floppy"
index 696525342e9a56977c1d2ac88d72497be5a7e866..28057747c1f8fcf91b34dfd2aac2a375c3ab09d9 100644 (file)
@@ -358,31 +358,6 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 
 EXPORT_SYMBOL(ide_end_drive_cmd);
 
-/**
- *     try_to_flush_leftover_data      -       flush junk
- *     @drive: drive to flush
- *
- *     try_to_flush_leftover_data() is invoked in response to a drive
- *     unexpectedly having its DRQ_STAT bit set.  As an alternative to
- *     resetting the drive, this routine tries to clear the condition
- *     by read a sector's worth of data from the drive.  Of course,
- *     this may not help if the drive is *waiting* for data from *us*.
- */
-static void try_to_flush_leftover_data (ide_drive_t *drive)
-{
-       int i = (drive->mult_count ? drive->mult_count : 1) * SECTOR_WORDS;
-
-       if (drive->media != ide_disk)
-               return;
-       while (i > 0) {
-               u32 buffer[16];
-               u32 wcount = (i > 16) ? 16 : i;
-
-               i -= wcount;
-               drive->hwif->input_data(drive, NULL, buffer, wcount * 4);
-       }
-}
-
 static void ide_kill_rq(ide_drive_t *drive, struct request *rq)
 {
        if (rq->rq_disk) {
@@ -422,8 +397,11 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, u8
        }
 
        if ((stat & DRQ_STAT) && rq_data_dir(rq) == READ &&
-           (hwif->host_flags & IDE_HFLAG_ERROR_STOPS_FIFO) == 0)
-               try_to_flush_leftover_data(drive);
+           (hwif->host_flags & IDE_HFLAG_ERROR_STOPS_FIFO) == 0) {
+               int nsect = drive->mult_count ? drive->mult_count : 1;
+
+               ide_pad_transfer(drive, READ, nsect * SECTOR_SIZE);
+       }
 
        if (rq->errors >= ERROR_MAX || blk_noretry_request(rq)) {
                ide_kill_rq(drive, rq);
@@ -459,7 +437,7 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq, u
 
        if (ide_read_status(drive) & (BUSY_STAT | DRQ_STAT))
                /* force an abort */
-               hwif->OUTBSYNC(drive, WIN_IDLEIMMEDIATE,
+               hwif->OUTBSYNC(hwif, WIN_IDLEIMMEDIATE,
                               hwif->io_ports.command_addr);
 
        if (rq->errors >= ERROR_MAX) {
@@ -1538,89 +1516,31 @@ irqreturn_t ide_intr (int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
-/**
- *     ide_init_drive_cmd      -       initialize a drive command request
- *     @rq: request object
- *
- *     Initialize a request before we fill it in and send it down to
- *     ide_do_drive_cmd. Commands must be set up by this function. Right
- *     now it doesn't do a lot, but if that changes abusers will have a
- *     nasty surprise.
- */
-
-void ide_init_drive_cmd (struct request *rq)
-{
-       blk_rq_init(NULL, rq);
-}
-
-EXPORT_SYMBOL(ide_init_drive_cmd);
-
 /**
  *     ide_do_drive_cmd        -       issue IDE special command
  *     @drive: device to issue command
  *     @rq: request to issue
- *     @action: action for processing
  *
  *     This function issues a special IDE device request
  *     onto the request queue.
  *
- *     If action is ide_wait, then the rq is queued at the end of the
- *     request queue, and the function sleeps until it has been processed.
- *     This is for use when invoked from an ioctl handler.
- *
- *     If action is ide_preempt, then the rq is queued at the head of
- *     the request queue, displacing the currently-being-processed
- *     request and this function returns immediately without waiting
- *     for the new rq to be completed.  This is VERY DANGEROUS, and is
- *     intended for careful use by the ATAPI tape/cdrom driver code.
- *
- *     If action is ide_end, then the rq is queued at the end of the
- *     request queue, and the function returns immediately without waiting
- *     for the new rq to be completed. This is again intended for careful
- *     use by the ATAPI tape/cdrom driver code.
+ *     the rq is queued at the head of the request queue, displacing
+ *     the currently-being-processed request and this function
+ *     returns immediately without waiting for the new rq to be
+ *     completed.  This is VERY DANGEROUS, and is intended for
+ *     careful use by the ATAPI tape/cdrom driver code.
  */
-int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t action)
+
+void ide_do_drive_cmd(ide_drive_t *drive, struct request *rq)
 {
        unsigned long flags;
        ide_hwgroup_t *hwgroup = HWGROUP(drive);
-       DECLARE_COMPLETION_ONSTACK(wait);
-       int where = ELEVATOR_INSERT_BACK, err;
-       int must_wait = (action == ide_wait || action == ide_head_wait);
-
-       rq->errors = 0;
-
-       /*
-        * we need to hold an extra reference to request for safe inspection
-        * after completion
-        */
-       if (must_wait) {
-               rq->ref_count++;
-               rq->end_io_data = &wait;
-               rq->end_io = blk_end_sync_rq;
-       }
 
        spin_lock_irqsave(&ide_lock, flags);
-       if (action == ide_preempt)
-               hwgroup->rq = NULL;
-       if (action == ide_preempt || action == ide_head_wait) {
-               where = ELEVATOR_INSERT_FRONT;
-               rq->cmd_flags |= REQ_PREEMPT;
-       }
-       __elv_add_request(drive->queue, rq, where, 0);
-       ide_do_request(hwgroup, IDE_NO_IRQ);
+       hwgroup->rq = NULL;
+       __elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 1);
+       __generic_unplug_device(drive->queue);
        spin_unlock_irqrestore(&ide_lock, flags);
-
-       err = 0;
-       if (must_wait) {
-               wait_for_completion(&wait);
-               if (rq->errors)
-                       err = -EIO;
-
-               blk_put_request(rq);
-       }
-
-       return err;
 }
 
 EXPORT_SYMBOL(ide_do_drive_cmd);
@@ -1637,6 +1557,8 @@ void ide_pktcmd_tf_load(ide_drive_t *drive, u32 tf_flags, u16 bcount, u8 dma)
        task.tf.lbah    = (bcount >> 8) & 0xff;
 
        ide_tf_dump(drive->name, &task.tf);
+       ide_set_irq(drive, 1);
+       SELECT_MASK(drive, 0);
        drive->hwif->tf_load(drive, &task);
 }
 
index 0daf923541ffbc50fe7a9e139e8f172d882b6c78..80ad4f234f3f015a18537c4f65bf891c293bd1ea 100644 (file)
@@ -42,7 +42,7 @@ static void ide_outb (u8 val, unsigned long port)
        outb(val, port);
 }
 
-static void ide_outbsync (ide_drive_t *drive, u8 addr, unsigned long port)
+static void ide_outbsync(ide_hwif_t *hwif, u8 addr, unsigned long port)
 {
        outb(addr, port);
 }
@@ -68,7 +68,7 @@ static void ide_mm_outb (u8 value, unsigned long port)
        writeb(value, (void __iomem *) port);
 }
 
-static void ide_mm_outbsync (ide_drive_t *drive, u8 value, unsigned long port)
+static void ide_mm_outbsync(ide_hwif_t *hwif, u8 value, unsigned long port)
 {
        writeb(value, (void __iomem *) port);
 }
@@ -95,7 +95,7 @@ void SELECT_DRIVE (ide_drive_t *drive)
        hwif->OUTB(drive->select.all, hwif->io_ports.device_addr);
 }
 
-static void SELECT_MASK(ide_drive_t *drive, int mask)
+void SELECT_MASK(ide_drive_t *drive, int mask)
 {
        const struct ide_port_ops *port_ops = drive->hwif->port_ops;
 
@@ -120,11 +120,6 @@ static void ide_tf_load(ide_drive_t *drive, ide_task_t *task)
        if (task->tf_flags & IDE_TFLAG_FLAGGED)
                HIHI = 0xFF;
 
-       ide_set_irq(drive, 1);
-
-       if ((task->tf_flags & IDE_TFLAG_NO_SELECT_MASK) == 0)
-               SELECT_MASK(drive, 0);
-
        if (task->tf_flags & IDE_TFLAG_OUT_DATA) {
                u16 data = (tf->hob_data << 8) | tf->data;
 
@@ -191,7 +186,7 @@ static void ide_tf_read(ide_drive_t *drive, ide_task_t *task)
        }
 
        /* be sure we're looking at the low order bits */
-       tf_outb(drive->ctl & ~0x80, io_ports->ctl_addr);
+       tf_outb(ATA_DEVCTL_OBS & ~0x80, io_ports->ctl_addr);
 
        if (task->tf_flags & IDE_TFLAG_IN_NSECT)
                tf->nsect  = tf_inb(io_ports->nsect_addr);
@@ -205,7 +200,7 @@ static void ide_tf_read(ide_drive_t *drive, ide_task_t *task)
                tf->device = tf_inb(io_ports->device_addr);
 
        if (task->tf_flags & IDE_TFLAG_LBA48) {
-               tf_outb(drive->ctl | 0x80, io_ports->ctl_addr);
+               tf_outb(ATA_DEVCTL_OBS | 0x80, io_ports->ctl_addr);
 
                if (task->tf_flags & IDE_TFLAG_IN_HOB_FEATURE)
                        tf->hob_feature = tf_inb(io_ports->feature_addr);
@@ -689,9 +684,9 @@ int ide_driveid_update(ide_drive_t *drive)
         */
 
        SELECT_MASK(drive, 1);
-       ide_set_irq(drive, 1);
+       ide_set_irq(drive, 0);
        msleep(50);
-       hwif->OUTBSYNC(drive, WIN_IDENTIFY, hwif->io_ports.command_addr);
+       hwif->OUTBSYNC(hwif, WIN_IDENTIFY, hwif->io_ports.command_addr);
        timeout = jiffies + WAIT_WORSTCASE;
        do {
                if (time_after(jiffies, timeout)) {
@@ -744,9 +739,6 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
        int error = 0;
        u8 stat;
 
-//     while (HWGROUP(drive)->busy)
-//             msleep(50);
-
 #ifdef CONFIG_BLK_DEV_IDEDMA
        if (hwif->dma_ops)      /* check if host supports DMA */
                hwif->dma_ops->dma_host_set(drive, 0);
@@ -781,7 +773,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
        ide_set_irq(drive, 0);
        hwif->OUTB(speed, io_ports->nsect_addr);
        hwif->OUTB(SETFEATURES_XFER, io_ports->feature_addr);
-       hwif->OUTBSYNC(drive, WIN_SETFEATURES, io_ports->command_addr);
+       hwif->OUTBSYNC(hwif, WIN_SETFEATURES, io_ports->command_addr);
        if (drive->quirk_list == 2)
                ide_set_irq(drive, 1);
 
@@ -889,7 +881,7 @@ void ide_execute_command(ide_drive_t *drive, u8 cmd, ide_handler_t *handler,
 
        spin_lock_irqsave(&ide_lock, flags);
        __ide_set_handler(drive, handler, timeout, expiry);
-       hwif->OUTBSYNC(drive, cmd, hwif->io_ports.command_addr);
+       hwif->OUTBSYNC(hwif, cmd, hwif->io_ports.command_addr);
        /*
         * Drive takes 400nS to respond, we must avoid the IRQ being
         * serviced before that.
@@ -907,7 +899,7 @@ void ide_execute_pkt_cmd(ide_drive_t *drive)
        unsigned long flags;
 
        spin_lock_irqsave(&ide_lock, flags);
-       hwif->OUTBSYNC(drive, WIN_PACKETCMD, hwif->io_ports.command_addr);
+       hwif->OUTBSYNC(hwif, WIN_PACKETCMD, hwif->io_ports.command_addr);
        ndelay(400);
        spin_unlock_irqrestore(&ide_lock, flags);
 }
@@ -1102,7 +1094,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
                pre_reset(drive);
                SELECT_DRIVE(drive);
                udelay (20);
-               hwif->OUTBSYNC(drive, WIN_SRST, io_ports->command_addr);
+               hwif->OUTBSYNC(hwif, WIN_SRST, io_ports->command_addr);
                ndelay(400);
                hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
                hwgroup->polling = 1;
@@ -1133,14 +1125,14 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
         * recover from reset very quickly, saving us the first 50ms wait time.
         */
        /* set SRST and nIEN */
-       hwif->OUTBSYNC(drive, drive->ctl|6, io_ports->ctl_addr);
+       hwif->OUTBSYNC(hwif, ATA_DEVCTL_OBS | 6, io_ports->ctl_addr);
        /* more than enough time */
        udelay(10);
        if (drive->quirk_list == 2)
-               ctl = drive->ctl;       /* clear SRST and nIEN */
+               ctl = ATA_DEVCTL_OBS;           /* clear SRST and nIEN */
        else
-               ctl = drive->ctl | 2;   /* clear SRST, leave nIEN */
-       hwif->OUTBSYNC(drive, ctl, io_ports->ctl_addr);
+               ctl = ATA_DEVCTL_OBS | 2;       /* clear SRST, leave nIEN */
+       hwif->OUTBSYNC(hwif, ctl, io_ports->ctl_addr);
        /* more than enough time */
        udelay(10);
        hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
index 26e68b65b7cf554c09869f65eba2e8940489238e..d21e51a02c3e9c17579e64cf7a886b66dc77207c 100644 (file)
@@ -293,7 +293,7 @@ static int actual_try_to_identify (ide_drive_t *drive, u8 cmd)
                hwif->OUTB(0, io_ports->feature_addr);
 
        /* ask drive for ID */
-       hwif->OUTBSYNC(drive, cmd, io_ports->command_addr);
+       hwif->OUTBSYNC(hwif, cmd, hwif->io_ports.command_addr);
 
        timeout = ((cmd == WIN_IDENTIFY) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2;
        timeout += jiffies;
@@ -478,9 +478,9 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
                        printk(KERN_ERR "%s: no response (status = 0x%02x), "
                                        "resetting drive\n", drive->name, stat);
                        msleep(50);
-                       hwif->OUTB(drive->select.all, io_ports->device_addr);
+                       SELECT_DRIVE(drive);
                        msleep(50);
-                       hwif->OUTBSYNC(drive, WIN_SRST, io_ports->command_addr);
+                       hwif->OUTBSYNC(hwif, WIN_SRST, io_ports->command_addr);
                        (void)ide_busy_sleep(hwif);
                        rc = try_to_identify(drive, cmd);
                }
@@ -516,7 +516,7 @@ static void enable_nest (ide_drive_t *drive)
        printk("%s: enabling %s -- ", hwif->name, drive->id->model);
        SELECT_DRIVE(drive);
        msleep(50);
-       hwif->OUTBSYNC(drive, EXABYTE_ENABLE_NEST, hwif->io_ports.command_addr);
+       hwif->OUTBSYNC(hwif, EXABYTE_ENABLE_NEST, hwif->io_ports.command_addr);
 
        if (ide_busy_sleep(hwif)) {
                printk(KERN_CONT "failed (timeout)\n");
@@ -1065,7 +1065,7 @@ static int init_irq (ide_hwif_t *hwif)
 
                if (io_ports->ctl_addr)
                        /* clear nIEN */
-                       hwif->OUTB(0x08, io_ports->ctl_addr);
+                       hwif->OUTBSYNC(hwif, ATA_DEVCTL_OBS, io_ports->ctl_addr);
 
                if (request_irq(hwif->irq,&ide_intr,sa,hwif->name,hwgroup))
                        goto out_unlink;
index a3d228302d2019ece45d2b4253cffafab226dd90..f9cf1670e4e18d7a2b55d1c171ff3bbe859db31f 100644 (file)
@@ -144,9 +144,6 @@ enum {
 
 /*************************** End of tunable parameters ***********************/
 
-/* Read/Write error simulation */
-#define SIMULATE_ERRORS                        0
-
 /* tape directions */
 enum {
        IDETAPE_DIR_NONE  = (1 << 0),
@@ -442,7 +439,7 @@ static void idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
        }
 }
 
-static void idetape_update_buffers(struct ide_atapi_pc *pc)
+static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc)
 {
        struct idetape_bh *bh = pc->bh;
        int count;
@@ -506,18 +503,6 @@ static struct request *idetape_next_rq_storage(ide_drive_t *drive)
        return (&tape->rq_stack[tape->rq_stack_index++]);
 }
 
-static void idetape_init_pc(struct ide_atapi_pc *pc)
-{
-       memset(pc->c, 0, 12);
-       pc->retries = 0;
-       pc->flags = 0;
-       pc->req_xfer = 0;
-       pc->buf = pc->pc_buf;
-       pc->buf_size = IDETAPE_PC_BUFFER_SIZE;
-       pc->bh = NULL;
-       pc->b_data = NULL;
-}
-
 /*
  * called on each failed packet command retry to analyze the request sense. We
  * currently do not utilize this information.
@@ -538,8 +523,8 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense)
        if (pc->flags & PC_FLAG_DMA_ERROR) {
                pc->xferred = pc->req_xfer -
                        tape->blk_size *
-                       be32_to_cpu(get_unaligned((u32 *)&sense[3]));
-               idetape_update_buffers(pc);
+                       get_unaligned_be32(&sense[3]);
+               idetape_update_buffers(drive, pc);
        }
 
        /*
@@ -634,21 +619,78 @@ static int idetape_end_request(ide_drive_t *drive, int uptodate, int nr_sects)
        return 0;
 }
 
-static ide_startstop_t idetape_request_sense_callback(ide_drive_t *drive)
+static void ide_tape_callback(ide_drive_t *drive)
 {
        idetape_tape_t *tape = drive->driver_data;
+       struct ide_atapi_pc *pc = tape->pc;
+       int uptodate = pc->error ? 0 : 1;
 
        debug_log(DBG_PROCS, "Enter %s\n", __func__);
 
-       if (!tape->pc->error) {
-               idetape_analyze_error(drive, tape->pc->buf);
-               idetape_end_request(drive, 1, 0);
-       } else {
-               printk(KERN_ERR "ide-tape: Error in REQUEST SENSE itself - "
-                               "Aborting request!\n");
-               idetape_end_request(drive, 0, 0);
+       if (tape->failed_pc == pc)
+               tape->failed_pc = NULL;
+
+       if (pc->c[0] == REQUEST_SENSE) {
+               if (uptodate)
+                       idetape_analyze_error(drive, pc->buf);
+               else
+                       printk(KERN_ERR "ide-tape: Error in REQUEST SENSE "
+                                       "itself - Aborting request!\n");
+       } else if (pc->c[0] == READ_6 || pc->c[0] == WRITE_6) {
+               struct request *rq = drive->hwif->hwgroup->rq;
+               int blocks = pc->xferred / tape->blk_size;
+
+               tape->avg_size += blocks * tape->blk_size;
+
+               if (time_after_eq(jiffies, tape->avg_time + HZ)) {
+                       tape->avg_speed = tape->avg_size * HZ /
+                               (jiffies - tape->avg_time) / 1024;
+                       tape->avg_size = 0;
+                       tape->avg_time = jiffies;
+               }
+
+               tape->first_frame += blocks;
+               rq->current_nr_sectors -= blocks;
+
+               if (pc->error)
+                       uptodate = pc->error;
+       } else if (pc->c[0] == READ_POSITION && uptodate) {
+               u8 *readpos = tape->pc->buf;
+
+               debug_log(DBG_SENSE, "BOP - %s\n",
+                               (readpos[0] & 0x80) ? "Yes" : "No");
+               debug_log(DBG_SENSE, "EOP - %s\n",
+                               (readpos[0] & 0x40) ? "Yes" : "No");
+
+               if (readpos[0] & 0x4) {
+                       printk(KERN_INFO "ide-tape: Block location is unknown"
+                                        "to the tape\n");
+                       clear_bit(IDETAPE_FLAG_ADDRESS_VALID, &tape->flags);
+                       uptodate = 0;
+               } else {
+                       debug_log(DBG_SENSE, "Block Location - %u\n",
+                                       be32_to_cpu(*(u32 *)&readpos[4]));
+
+                       tape->partition = readpos[1];
+                       tape->first_frame = be32_to_cpu(*(u32 *)&readpos[4]);
+                       set_bit(IDETAPE_FLAG_ADDRESS_VALID, &tape->flags);
+               }
        }
-       return ide_stopped;
+
+       idetape_end_request(drive, uptodate, 0);
+}
+
+static void idetape_init_pc(struct ide_atapi_pc *pc)
+{
+       memset(pc->c, 0, 12);
+       pc->retries = 0;
+       pc->flags = 0;
+       pc->req_xfer = 0;
+       pc->buf = pc->pc_buf;
+       pc->buf_size = IDETAPE_PC_BUFFER_SIZE;
+       pc->bh = NULL;
+       pc->b_data = NULL;
+       pc->callback = ide_tape_callback;
 }
 
 static void idetape_create_request_sense_cmd(struct ide_atapi_pc *pc)
@@ -657,7 +699,6 @@ static void idetape_create_request_sense_cmd(struct ide_atapi_pc *pc)
        pc->c[0] = REQUEST_SENSE;
        pc->c[4] = 20;
        pc->req_xfer = 20;
-       pc->idetape_callback = &idetape_request_sense_callback;
 }
 
 static void idetape_init_rq(struct request *rq, u8 cmd)
@@ -688,9 +729,10 @@ static void idetape_queue_pc_head(ide_drive_t *drive, struct ide_atapi_pc *pc,
        struct ide_tape_obj *tape = drive->driver_data;
 
        idetape_init_rq(rq, REQ_IDETAPE_PC1);
+       rq->cmd_flags |= REQ_PREEMPT;
        rq->buffer = (char *) pc;
        rq->rq_disk = tape->disk;
-       (void) ide_do_drive_cmd(drive, rq, ide_preempt);
+       ide_do_drive_cmd(drive, rq);
 }
 
 /*
@@ -698,7 +740,7 @@ static void idetape_queue_pc_head(ide_drive_t *drive, struct ide_atapi_pc *pc,
  *     last packet command. We queue a request sense packet command in
  *     the head of the request list.
  */
-static ide_startstop_t idetape_retry_pc (ide_drive_t *drive)
+static void idetape_retry_pc(ide_drive_t *drive)
 {
        idetape_tape_t *tape = drive->driver_data;
        struct ide_atapi_pc *pc;
@@ -710,7 +752,6 @@ static ide_startstop_t idetape_retry_pc (ide_drive_t *drive)
        idetape_create_request_sense_cmd(pc);
        set_bit(IDETAPE_FLAG_IGNORE_DSC, &tape->flags);
        idetape_queue_pc_head(drive, pc, rq);
-       return ide_stopped;
 }
 
 /*
@@ -727,7 +768,26 @@ static void idetape_postpone_request(ide_drive_t *drive)
        ide_stall_queue(drive, tape->dsc_poll_freq);
 }
 
-typedef void idetape_io_buf(ide_drive_t *, struct ide_atapi_pc *, unsigned int);
+static void ide_tape_handle_dsc(ide_drive_t *drive)
+{
+       idetape_tape_t *tape = drive->driver_data;
+
+       /* Media access command */
+       tape->dsc_polling_start = jiffies;
+       tape->dsc_poll_freq = IDETAPE_DSC_MA_FAST;
+       tape->dsc_timeout = jiffies + IDETAPE_DSC_MA_TIMEOUT;
+       /* Allow ide.c to handle other requests */
+       idetape_postpone_request(drive);
+}
+
+static void ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
+                               unsigned int bcount, int write)
+{
+       if (write)
+               idetape_output_buffers(drive, pc, bcount);
+       else
+               idetape_input_buffers(drive, pc, bcount);
+}
 
 /*
  * This is the usual interrupt handler which will be called during a packet
@@ -738,169 +798,11 @@ typedef void idetape_io_buf(ide_drive_t *, struct ide_atapi_pc *, unsigned int);
  */
 static ide_startstop_t idetape_pc_intr(ide_drive_t *drive)
 {
-       ide_hwif_t *hwif = drive->hwif;
        idetape_tape_t *tape = drive->driver_data;
-       struct ide_atapi_pc *pc = tape->pc;
-       xfer_func_t *xferfunc;
-       idetape_io_buf *iobuf;
-       unsigned int temp;
-#if SIMULATE_ERRORS
-       static int error_sim_count;
-#endif
-       u16 bcount;
-       u8 stat, ireason;
-
-       debug_log(DBG_PROCS, "Enter %s - interrupt handler\n", __func__);
-
-       /* Clear the interrupt */
-       stat = ide_read_status(drive);
-
-       if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
-               if (hwif->dma_ops->dma_end(drive) || (stat & ERR_STAT)) {
-                       /*
-                        * A DMA error is sometimes expected. For example,
-                        * if the tape is crossing a filemark during a
-                        * READ command, it will issue an irq and position
-                        * itself before the filemark, so that only a partial
-                        * data transfer will occur (which causes the DMA
-                        * error). In that case, we will later ask the tape
-                        * how much bytes of the original request were
-                        * actually transferred (we can't receive that
-                        * information from the DMA engine on most chipsets).
-                        */
-
-                       /*
-                        * On the contrary, a DMA error is never expected;
-                        * it usually indicates a hardware error or abort.
-                        * If the tape crosses a filemark during a READ
-                        * command, it will issue an irq and position itself
-                        * after the filemark (not before). Only a partial
-                        * data transfer will occur, but no DMA error.
-                        * (AS, 19 Apr 2001)
-                        */
-                       pc->flags |= PC_FLAG_DMA_ERROR;
-               } else {
-                       pc->xferred = pc->req_xfer;
-                       idetape_update_buffers(pc);
-               }
-               debug_log(DBG_PROCS, "DMA finished\n");
-
-       }
-
-       /* No more interrupts */
-       if ((stat & DRQ_STAT) == 0) {
-               debug_log(DBG_SENSE, "Packet command completed, %d bytes"
-                               " transferred\n", pc->xferred);
-
-               pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
-               local_irq_enable();
-
-#if SIMULATE_ERRORS
-               if ((pc->c[0] == WRITE_6 || pc->c[0] == READ_6) &&
-                   (++error_sim_count % 100) == 0) {
-                       printk(KERN_INFO "ide-tape: %s: simulating error\n",
-                               tape->name);
-                       stat |= ERR_STAT;
-               }
-#endif
-               if ((stat & ERR_STAT) && pc->c[0] == REQUEST_SENSE)
-                       stat &= ~ERR_STAT;
-               if ((stat & ERR_STAT) || (pc->flags & PC_FLAG_DMA_ERROR)) {
-                       /* Error detected */
-                       debug_log(DBG_ERR, "%s: I/O error\n", tape->name);
-
-                       if (pc->c[0] == REQUEST_SENSE) {
-                               printk(KERN_ERR "ide-tape: I/O error in request"
-                                               " sense command\n");
-                               return ide_do_reset(drive);
-                       }
-                       debug_log(DBG_ERR, "[cmd %x]: check condition\n",
-                                       pc->c[0]);
-
-                       /* Retry operation */
-                       return idetape_retry_pc(drive);
-               }
-               pc->error = 0;
-               if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) &&
-                   (stat & SEEK_STAT) == 0) {
-                       /* Media access command */
-                       tape->dsc_polling_start = jiffies;
-                       tape->dsc_poll_freq = IDETAPE_DSC_MA_FAST;
-                       tape->dsc_timeout = jiffies + IDETAPE_DSC_MA_TIMEOUT;
-                       /* Allow ide.c to handle other requests */
-                       idetape_postpone_request(drive);
-                       return ide_stopped;
-               }
-               if (tape->failed_pc == pc)
-                       tape->failed_pc = NULL;
-               /* Command finished - Call the callback function */
-               return pc->idetape_callback(drive);
-       }
-
-       if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
-               pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
-               printk(KERN_ERR "ide-tape: The tape wants to issue more "
-                               "interrupts in DMA mode\n");
-               printk(KERN_ERR "ide-tape: DMA disabled, reverting to PIO\n");
-               ide_dma_off(drive);
-               return ide_do_reset(drive);
-       }
-       /* Get the number of bytes to transfer on this interrupt. */
-       bcount = (hwif->INB(hwif->io_ports.lbah_addr) << 8) |
-                 hwif->INB(hwif->io_ports.lbam_addr);
-
-       ireason = hwif->INB(hwif->io_ports.nsect_addr);
-
-       if (ireason & CD) {
-               printk(KERN_ERR "ide-tape: CoD != 0 in %s\n", __func__);
-               return ide_do_reset(drive);
-       }
-       if (((ireason & IO) == IO) == !!(pc->flags & PC_FLAG_WRITING)) {
-               /* Hopefully, we will never get here */
-               printk(KERN_ERR "ide-tape: We wanted to %s, ",
-                               (ireason & IO) ? "Write" : "Read");
-               printk(KERN_ERR "ide-tape: but the tape wants us to %s !\n",
-                               (ireason & IO) ? "Read" : "Write");
-               return ide_do_reset(drive);
-       }
-       if (!(pc->flags & PC_FLAG_WRITING)) {
-               /* Reading - Check that we have enough space */
-               temp = pc->xferred + bcount;
-               if (temp > pc->req_xfer) {
-                       if (temp > pc->buf_size) {
-                               printk(KERN_ERR "ide-tape: The tape wants to "
-                                       "send us more data than expected "
-                                       "- discarding data\n");
-                               ide_pad_transfer(drive, 0, bcount);
-                               ide_set_handler(drive, &idetape_pc_intr,
-                                               IDETAPE_WAIT_CMD, NULL);
-                               return ide_started;
-                       }
-                       debug_log(DBG_SENSE, "The tape wants to send us more "
-                               "data than expected - allowing transfer\n");
-               }
-               iobuf = &idetape_input_buffers;
-               xferfunc = hwif->input_data;
-       } else {
-               iobuf = &idetape_output_buffers;
-               xferfunc = hwif->output_data;
-       }
-
-       if (pc->bh)
-               iobuf(drive, pc, bcount);
-       else
-               xferfunc(drive, NULL, pc->cur_pos, bcount);
-
-       /* Update the current position */
-       pc->xferred += bcount;
-       pc->cur_pos += bcount;
-
-       debug_log(DBG_SENSE, "[cmd %x] transferred %d bytes on that intr.\n",
-                       pc->c[0], bcount);
 
-       /* And set the interrupt handler again */
-       ide_set_handler(drive, &idetape_pc_intr, IDETAPE_WAIT_CMD, NULL);
-       return ide_started;
+       return ide_pc_intr(drive, tape->pc, idetape_pc_intr, IDETAPE_WAIT_CMD,
+                          NULL, idetape_update_buffers, idetape_retry_pc,
+                          ide_tape_handle_dsc, ide_tape_io_buffers);
 }
 
 /*
@@ -941,56 +843,16 @@ static ide_startstop_t idetape_pc_intr(ide_drive_t *drive)
  */
 static ide_startstop_t idetape_transfer_pc(ide_drive_t *drive)
 {
-       ide_hwif_t *hwif = drive->hwif;
        idetape_tape_t *tape = drive->driver_data;
-       struct ide_atapi_pc *pc = tape->pc;
-       int retries = 100;
-       ide_startstop_t startstop;
-       u8 ireason;
-
-       if (ide_wait_stat(&startstop, drive, DRQ_STAT, BUSY_STAT, WAIT_READY)) {
-               printk(KERN_ERR "ide-tape: Strange, packet command initiated "
-                               "yet DRQ isn't asserted\n");
-               return startstop;
-       }
-       ireason = hwif->INB(hwif->io_ports.nsect_addr);
-       while (retries-- && ((ireason & CD) == 0 || (ireason & IO))) {
-               printk(KERN_ERR "ide-tape: (IO,CoD != (0,1) while issuing "
-                               "a packet command, retrying\n");
-               udelay(100);
-               ireason = hwif->INB(hwif->io_ports.nsect_addr);
-               if (retries == 0) {
-                       printk(KERN_ERR "ide-tape: (IO,CoD != (0,1) while "
-                                       "issuing a packet command, ignoring\n");
-                       ireason |= CD;
-                       ireason &= ~IO;
-               }
-       }
-       if ((ireason & CD) == 0 || (ireason & IO)) {
-               printk(KERN_ERR "ide-tape: (IO,CoD) != (0,1) while issuing "
-                               "a packet command\n");
-               return ide_do_reset(drive);
-       }
-       /* Set the interrupt routine */
-       ide_set_handler(drive, &idetape_pc_intr, IDETAPE_WAIT_CMD, NULL);
-#ifdef CONFIG_BLK_DEV_IDEDMA
-       /* Begin DMA, if necessary */
-       if (pc->flags & PC_FLAG_DMA_IN_PROGRESS)
-               hwif->dma_ops->dma_start(drive);
-#endif
-       /* Send the actual packet */
-       hwif->output_data(drive, NULL, pc->c, 12);
 
-       return ide_started;
+       return ide_transfer_pc(drive, tape->pc, idetape_pc_intr,
+                              IDETAPE_WAIT_CMD, NULL);
 }
 
 static ide_startstop_t idetape_issue_pc(ide_drive_t *drive,
                struct ide_atapi_pc *pc)
 {
-       ide_hwif_t *hwif = drive->hwif;
        idetape_tape_t *tape = drive->driver_data;
-       int dma_ok = 0;
-       u16 bcount;
 
        if (tape->pc->c[0] == REQUEST_SENSE &&
            pc->c[0] == REQUEST_SENSE) {
@@ -1025,50 +887,15 @@ static ide_startstop_t idetape_issue_pc(ide_drive_t *drive,
                        pc->error = IDETAPE_ERROR_GENERAL;
                }
                tape->failed_pc = NULL;
-               return pc->idetape_callback(drive);
+               pc->callback(drive);
+               return ide_stopped;
        }
        debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]);
 
        pc->retries++;
-       /* We haven't transferred any data yet */
-       pc->xferred = 0;
-       pc->cur_pos = pc->buf;
-       /* Request to transfer the entire buffer at once */
-       bcount = pc->req_xfer;
 
-       if (pc->flags & PC_FLAG_DMA_ERROR) {
-               pc->flags &= ~PC_FLAG_DMA_ERROR;
-               printk(KERN_WARNING "ide-tape: DMA disabled, "
-                               "reverting to PIO\n");
-               ide_dma_off(drive);
-       }
-       if ((pc->flags & PC_FLAG_DMA_RECOMMENDED) && drive->using_dma)
-               dma_ok = !hwif->dma_ops->dma_setup(drive);
-
-       ide_pktcmd_tf_load(drive, IDE_TFLAG_NO_SELECT_MASK |
-                          IDE_TFLAG_OUT_DEVICE, bcount, dma_ok);
-
-       if (dma_ok)
-               /* Will begin DMA later */
-               pc->flags |= PC_FLAG_DMA_IN_PROGRESS;
-       if (test_bit(IDETAPE_FLAG_DRQ_INTERRUPT, &tape->flags)) {
-               ide_execute_command(drive, WIN_PACKETCMD, &idetape_transfer_pc,
-                                   IDETAPE_WAIT_CMD, NULL);
-               return ide_started;
-       } else {
-               ide_execute_pkt_cmd(drive);
-               return idetape_transfer_pc(drive);
-       }
-}
-
-static ide_startstop_t idetape_pc_callback(ide_drive_t *drive)
-{
-       idetape_tape_t *tape = drive->driver_data;
-
-       debug_log(DBG_PROCS, "Enter %s\n", __func__);
-
-       idetape_end_request(drive, tape->pc->error ? 0 : 1, 0);
-       return ide_stopped;
+       return ide_issue_pc(drive, pc, idetape_transfer_pc,
+                           IDETAPE_WAIT_CMD, NULL);
 }
 
 /* A mode sense command is used to "sense" tape parameters. */
@@ -1096,7 +923,6 @@ static void idetape_create_mode_sense_cmd(struct ide_atapi_pc *pc, u8 page_code)
                pc->req_xfer = 24;
        else
                pc->req_xfer = 50;
-       pc->idetape_callback = &idetape_pc_callback;
 }
 
 static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive)
@@ -1114,80 +940,41 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive)
                                printk(KERN_ERR "ide-tape: %s: I/O error, ",
                                                tape->name);
                        /* Retry operation */
-                       return idetape_retry_pc(drive);
+                       idetape_retry_pc(drive);
+                       return ide_stopped;
                }
                pc->error = 0;
-               if (tape->failed_pc == pc)
-                       tape->failed_pc = NULL;
        } else {
                pc->error = IDETAPE_ERROR_GENERAL;
                tape->failed_pc = NULL;
        }
-       return pc->idetape_callback(drive);
-}
-
-static ide_startstop_t idetape_rw_callback(ide_drive_t *drive)
-{
-       idetape_tape_t *tape = drive->driver_data;
-       struct request *rq = HWGROUP(drive)->rq;
-       int blocks = tape->pc->xferred / tape->blk_size;
-
-       tape->avg_size += blocks * tape->blk_size;
-
-       if (time_after_eq(jiffies, tape->avg_time + HZ)) {
-               tape->avg_speed = tape->avg_size * HZ /
-                               (jiffies - tape->avg_time) / 1024;
-               tape->avg_size = 0;
-               tape->avg_time = jiffies;
-       }
-       debug_log(DBG_PROCS, "Enter %s\n", __func__);
-
-       tape->first_frame += blocks;
-       rq->current_nr_sectors -= blocks;
-
-       if (!tape->pc->error)
-               idetape_end_request(drive, 1, 0);
-       else
-               idetape_end_request(drive, tape->pc->error, 0);
+       pc->callback(drive);
        return ide_stopped;
 }
 
-static void idetape_create_read_cmd(idetape_tape_t *tape,
-               struct ide_atapi_pc *pc,
-               unsigned int length, struct idetape_bh *bh)
+static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
+               struct ide_atapi_pc *pc, unsigned int length,
+               struct idetape_bh *bh, u8 opcode)
 {
        idetape_init_pc(pc);
-       pc->c[0] = READ_6;
        put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
        pc->c[1] = 1;
-       pc->idetape_callback = &idetape_rw_callback;
        pc->bh = bh;
-       atomic_set(&bh->b_count, 0);
        pc->buf = NULL;
        pc->buf_size = length * tape->blk_size;
        pc->req_xfer = pc->buf_size;
        if (pc->req_xfer == tape->buffer_size)
-               pc->flags |= PC_FLAG_DMA_RECOMMENDED;
-}
+               pc->flags |= PC_FLAG_DMA_OK;
 
-static void idetape_create_write_cmd(idetape_tape_t *tape,
-               struct ide_atapi_pc *pc,
-               unsigned int length, struct idetape_bh *bh)
-{
-       idetape_init_pc(pc);
-       pc->c[0] = WRITE_6;
-       put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
-       pc->c[1] = 1;
-       pc->idetape_callback = &idetape_rw_callback;
-       pc->flags |= PC_FLAG_WRITING;
-       pc->bh = bh;
-       pc->b_data = bh->b_data;
-       pc->b_count = atomic_read(&bh->b_count);
-       pc->buf = NULL;
-       pc->buf_size = length * tape->blk_size;
-       pc->req_xfer = pc->buf_size;
-       if (pc->req_xfer == tape->buffer_size)
-               pc->flags |= PC_FLAG_DMA_RECOMMENDED;
+       if (opcode == READ_6) {
+               pc->c[0] = READ_6;
+               atomic_set(&bh->b_count, 0);
+       } else if (opcode == WRITE_6) {
+               pc->c[0] = WRITE_6;
+               pc->flags |= PC_FLAG_WRITING;
+               pc->b_data = bh->b_data;
+               pc->b_count = atomic_read(&bh->b_count);
+       }
 }
 
 static ide_startstop_t idetape_do_request(ide_drive_t *drive,
@@ -1211,8 +998,10 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
        }
 
        /* Retry a failed packet command */
-       if (tape->failed_pc && tape->pc->c[0] == REQUEST_SENSE)
-               return idetape_issue_pc(drive, tape->failed_pc);
+       if (tape->failed_pc && tape->pc->c[0] == REQUEST_SENSE) {
+               pc = tape->failed_pc;
+               goto out;
+       }
 
        if (postponed_rq != NULL)
                if (rq != postponed_rq) {
@@ -1262,14 +1051,16 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
        }
        if (rq->cmd[0] & REQ_IDETAPE_READ) {
                pc = idetape_next_pc_storage(drive);
-               idetape_create_read_cmd(tape, pc, rq->current_nr_sectors,
-                                       (struct idetape_bh *)rq->special);
+               ide_tape_create_rw_cmd(tape, pc, rq->current_nr_sectors,
+                                       (struct idetape_bh *)rq->special,
+                                       READ_6);
                goto out;
        }
        if (rq->cmd[0] & REQ_IDETAPE_WRITE) {
                pc = idetape_next_pc_storage(drive);
-               idetape_create_write_cmd(tape, pc, rq->current_nr_sectors,
-                                        (struct idetape_bh *)rq->special);
+               ide_tape_create_rw_cmd(tape, pc, rq->current_nr_sectors,
+                                        (struct idetape_bh *)rq->special,
+                                        WRITE_6);
                goto out;
        }
        if (rq->cmd[0] & REQ_IDETAPE_PC1) {
@@ -1284,6 +1075,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
        }
        BUG();
 out:
+       if (test_bit(IDETAPE_FLAG_DRQ_INTERRUPT, &tape->flags))
+               pc->flags |= PC_FLAG_DRQ_INTERRUPT;
+
        return idetape_issue_pc(drive, pc);
 }
 
@@ -1447,40 +1241,6 @@ static void idetape_init_merge_buffer(idetape_tape_t *tape)
        }
 }
 
-static ide_startstop_t idetape_read_position_callback(ide_drive_t *drive)
-{
-       idetape_tape_t *tape = drive->driver_data;
-       u8 *readpos = tape->pc->buf;
-
-       debug_log(DBG_PROCS, "Enter %s\n", __func__);
-
-       if (!tape->pc->error) {
-               debug_log(DBG_SENSE, "BOP - %s\n",
-                               (readpos[0] & 0x80) ? "Yes" : "No");
-               debug_log(DBG_SENSE, "EOP - %s\n",
-                               (readpos[0] & 0x40) ? "Yes" : "No");
-
-               if (readpos[0] & 0x4) {
-                       printk(KERN_INFO "ide-tape: Block location is unknown"
-                                        "to the tape\n");
-                       clear_bit(IDETAPE_FLAG_ADDRESS_VALID, &tape->flags);
-                       idetape_end_request(drive, 0, 0);
-               } else {
-                       debug_log(DBG_SENSE, "Block Location - %u\n",
-                                       be32_to_cpu(*(u32 *)&readpos[4]));
-
-                       tape->partition = readpos[1];
-                       tape->first_frame =
-                               be32_to_cpu(*(u32 *)&readpos[4]);
-                       set_bit(IDETAPE_FLAG_ADDRESS_VALID, &tape->flags);
-                       idetape_end_request(drive, 1, 0);
-               }
-       } else {
-               idetape_end_request(drive, 0, 0);
-       }
-       return ide_stopped;
-}
-
 /*
  * Write a filemark if write_filemark=1. Flush the device buffers without
  * writing a filemark otherwise.
@@ -1492,14 +1252,12 @@ static void idetape_create_write_filemark_cmd(ide_drive_t *drive,
        pc->c[0] = WRITE_FILEMARKS;
        pc->c[4] = write_filemark;
        pc->flags |= PC_FLAG_WAIT_FOR_DSC;
-       pc->idetape_callback = &idetape_pc_callback;
 }
 
 static void idetape_create_test_unit_ready_cmd(struct ide_atapi_pc *pc)
 {
        idetape_init_pc(pc);
        pc->c[0] = TEST_UNIT_READY;
-       pc->idetape_callback = &idetape_pc_callback;
 }
 
 /*
@@ -1518,12 +1276,16 @@ static void idetape_create_test_unit_ready_cmd(struct ide_atapi_pc *pc)
 static int idetape_queue_pc_tail(ide_drive_t *drive, struct ide_atapi_pc *pc)
 {
        struct ide_tape_obj *tape = drive->driver_data;
-       struct request rq;
+       struct request *rq;
+       int error;
 
-       idetape_init_rq(&rq, REQ_IDETAPE_PC1);
-       rq.buffer = (char *) pc;
-       rq.rq_disk = tape->disk;
-       return ide_do_drive_cmd(drive, &rq, ide_wait);
+       rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+       rq->cmd_type = REQ_TYPE_SPECIAL;
+       rq->cmd[0] = REQ_IDETAPE_PC1;
+       rq->buffer = (char *)pc;
+       error = blk_execute_rq(drive->queue, tape->disk, rq, 0);
+       blk_put_request(rq);
+       return error;
 }
 
 static void idetape_create_load_unload_cmd(ide_drive_t *drive,
@@ -1533,7 +1295,6 @@ static void idetape_create_load_unload_cmd(ide_drive_t *drive,
        pc->c[0] = START_STOP;
        pc->c[4] = cmd;
        pc->flags |= PC_FLAG_WAIT_FOR_DSC;
-       pc->idetape_callback = &idetape_pc_callback;
 }
 
 static int idetape_wait_ready(ide_drive_t *drive, unsigned long timeout)
@@ -1585,7 +1346,6 @@ static void idetape_create_read_position_cmd(struct ide_atapi_pc *pc)
        idetape_init_pc(pc);
        pc->c[0] = READ_POSITION;
        pc->req_xfer = 20;
-       pc->idetape_callback = &idetape_read_position_callback;
 }
 
 static int idetape_read_position(ide_drive_t *drive)
@@ -1613,7 +1373,6 @@ static void idetape_create_locate_cmd(ide_drive_t *drive,
        put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[3]);
        pc->c[8] = partition;
        pc->flags |= PC_FLAG_WAIT_FOR_DSC;
-       pc->idetape_callback = &idetape_pc_callback;
 }
 
 static int idetape_create_prevent_cmd(ide_drive_t *drive,
@@ -1628,7 +1387,6 @@ static int idetape_create_prevent_cmd(ide_drive_t *drive,
        idetape_init_pc(pc);
        pc->c[0] = ALLOW_MEDIUM_REMOVAL;
        pc->c[4] = prevent;
-       pc->idetape_callback = &idetape_pc_callback;
        return 1;
 }
 
@@ -1700,26 +1458,33 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
                                 struct idetape_bh *bh)
 {
        idetape_tape_t *tape = drive->driver_data;
-       struct request rq;
+       struct request *rq;
+       int ret, errors;
 
        debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd);
 
-       idetape_init_rq(&rq, cmd);
-       rq.rq_disk = tape->disk;
-       rq.special = (void *)bh;
-       rq.sector = tape->first_frame;
-       rq.nr_sectors           = blocks;
-       rq.current_nr_sectors   = blocks;
-       (void) ide_do_drive_cmd(drive, &rq, ide_wait);
+       rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+       rq->cmd_type = REQ_TYPE_SPECIAL;
+       rq->cmd[0] = cmd;
+       rq->rq_disk = tape->disk;
+       rq->special = (void *)bh;
+       rq->sector = tape->first_frame;
+       rq->nr_sectors = blocks;
+       rq->current_nr_sectors = blocks;
+       blk_execute_rq(drive->queue, tape->disk, rq, 0);
+
+       errors = rq->errors;
+       ret = tape->blk_size * (blocks - rq->current_nr_sectors);
+       blk_put_request(rq);
 
        if ((cmd & (REQ_IDETAPE_READ | REQ_IDETAPE_WRITE)) == 0)
                return 0;
 
        if (tape->merge_bh)
                idetape_init_merge_buffer(tape);
-       if (rq.errors == IDETAPE_ERROR_GENERAL)
+       if (errors == IDETAPE_ERROR_GENERAL)
                return -EIO;
-       return (tape->blk_size * (blocks-rq.current_nr_sectors));
+       return ret;
 }
 
 static void idetape_create_inquiry_cmd(struct ide_atapi_pc *pc)
@@ -1728,7 +1493,6 @@ static void idetape_create_inquiry_cmd(struct ide_atapi_pc *pc)
        pc->c[0] = INQUIRY;
        pc->c[4] = 254;
        pc->req_xfer = 254;
-       pc->idetape_callback = &idetape_pc_callback;
 }
 
 static void idetape_create_rewind_cmd(ide_drive_t *drive,
@@ -1737,7 +1501,6 @@ static void idetape_create_rewind_cmd(ide_drive_t *drive,
        idetape_init_pc(pc);
        pc->c[0] = REZERO_UNIT;
        pc->flags |= PC_FLAG_WAIT_FOR_DSC;
-       pc->idetape_callback = &idetape_pc_callback;
 }
 
 static void idetape_create_erase_cmd(struct ide_atapi_pc *pc)
@@ -1746,7 +1509,6 @@ static void idetape_create_erase_cmd(struct ide_atapi_pc *pc)
        pc->c[0] = ERASE;
        pc->c[1] = 1;
        pc->flags |= PC_FLAG_WAIT_FOR_DSC;
-       pc->idetape_callback = &idetape_pc_callback;
 }
 
 static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd)
@@ -1756,7 +1518,6 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd)
        put_unaligned(cpu_to_be32(count), (unsigned int *) &pc->c[1]);
        pc->c[1] = cmd;
        pc->flags |= PC_FLAG_WAIT_FOR_DSC;
-       pc->idetape_callback = &idetape_pc_callback;
 }
 
 /* Queue up a character device originated write request. */
@@ -2751,9 +2512,8 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
         * Ensure that the number we got makes sense; limit it within
         * IDETAPE_DSC_RW_MIN and IDETAPE_DSC_RW_MAX.
         */
-       tape->best_dsc_rw_freq = max_t(unsigned long,
-                               min_t(unsigned long, t, IDETAPE_DSC_RW_MAX),
-                               IDETAPE_DSC_RW_MIN);
+       tape->best_dsc_rw_freq = clamp_t(unsigned long, t, IDETAPE_DSC_RW_MIN,
+                                        IDETAPE_DSC_RW_MAX);
        printk(KERN_INFO "ide-tape: %s <-> %s: %dKBps, %d*%dkB buffer, "
                "%lums tDSC%s\n",
                drive->name, tape->name, *(u16 *)&tape->caps[14],
@@ -2905,11 +2665,6 @@ static int ide_tape_probe(ide_drive_t *drive)
                                " the driver\n", drive->name);
                goto failed;
        }
-       if (drive->scsi) {
-               printk(KERN_INFO "ide-tape: passing drive %s to ide-scsi"
-                                " emulation.\n", drive->name);
-               goto failed;
-       }
        tape = kzalloc(sizeof(idetape_tape_t), GFP_KERNEL);
        if (tape == NULL) {
                printk(KERN_ERR "ide-tape: %s: Can't allocate a tape struct\n",
index ab545ffa15490367ed013bd9e04a674a1f2df76f..cf55a48a7dd25e5e33f62f5d49da320174c37a30 100644 (file)
@@ -109,13 +109,15 @@ ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task)
 
        if ((task->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) {
                ide_tf_dump(drive->name, tf);
+               ide_set_irq(drive, 1);
+               SELECT_MASK(drive, 0);
                hwif->tf_load(drive, task);
        }
 
        switch (task->data_phase) {
        case TASKFILE_MULTI_OUT:
        case TASKFILE_OUT:
-               hwif->OUTBSYNC(drive, tf->command, hwif->io_ports.command_addr);
+               hwif->OUTBSYNC(hwif, tf->command, hwif->io_ports.command_addr);
                ndelay(400);    /* FIXME */
                return pre_task_out_intr(drive, task->rq);
        case TASKFILE_MULTI_IN:
@@ -492,11 +494,12 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive, struct request *rq)
 
 int ide_raw_taskfile(ide_drive_t *drive, ide_task_t *task, u8 *buf, u16 nsect)
 {
-       struct request rq;
+       struct request *rq;
+       int error;
 
-       blk_rq_init(NULL, &rq);
-       rq.cmd_type = REQ_TYPE_ATA_TASKFILE;
-       rq.buffer = buf;
+       rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+       rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
+       rq->buffer = buf;
 
        /*
         * (ks) We transfer currently only whole sectors.
@@ -504,16 +507,19 @@ int ide_raw_taskfile(ide_drive_t *drive, ide_task_t *task, u8 *buf, u16 nsect)
         * if we would find a solution to transfer any size.
         * To support special commands like READ LONG.
         */
-       rq.hard_nr_sectors = rq.nr_sectors = nsect;
-       rq.hard_cur_sectors = rq.current_nr_sectors = nsect;
+       rq->hard_nr_sectors = rq->nr_sectors = nsect;
+       rq->hard_cur_sectors = rq->current_nr_sectors = nsect;
 
        if (task->tf_flags & IDE_TFLAG_WRITE)
-               rq.cmd_flags |= REQ_RW;
+               rq->cmd_flags |= REQ_RW;
 
-       rq.special = task;
-       task->rq = &rq;
+       rq->special = task;
+       task->rq = rq;
 
-       return ide_do_drive_cmd(drive, &rq, ide_wait);
+       error = blk_execute_rq(drive->queue, NULL, rq, 0);
+       blk_put_request(rq);
+
+       return error;
 }
 
 EXPORT_SYMBOL(ide_raw_taskfile);
@@ -739,12 +745,14 @@ int ide_cmd_ioctl (ide_drive_t *drive, unsigned int cmd, unsigned long arg)
        struct hd_driveid *id = drive->id;
 
        if (NULL == (void *) arg) {
-               struct request rq;
+               struct request *rq;
 
-               ide_init_drive_cmd(&rq);
-               rq.cmd_type = REQ_TYPE_ATA_TASKFILE;
+               rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+               rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
+               err = blk_execute_rq(drive->queue, NULL, rq, 0);
+               blk_put_request(rq);
 
-               return ide_do_drive_cmd(drive, &rq, ide_wait);
+               return err;
        }
 
        if (copy_from_user(args, (void __user *)arg, 4))
index 3b12ffe770712dd1f1a708c7852ff5745a9b6e2a..2e91c5870b4c8afe240b0a2ec826e57c554aef16 100644 (file)
@@ -95,7 +95,6 @@ static struct ide_timing ide_timing[] = {
 #define IDE_TIMING_UDMA                0x80
 #define IDE_TIMING_ALL         0xff
 
-#define FIT(v,vmin,vmax)       max_t(short,min_t(short,v,vmax),vmin)
 #define ENOUGH(v,unit)         (((v)-1)/(unit)+1)
 #define EZ(v,unit)             ((v)?ENOUGH(v,unit):0)
 
index 300431d080a97310ca829beff00f8b7dd49bb3aa..2b8453510e0900211b21ad6d742070fac1556151 100644 (file)
@@ -86,13 +86,10 @@ static const u8 ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR,
                                        IDE6_MAJOR, IDE7_MAJOR,
                                        IDE8_MAJOR, IDE9_MAJOR };
 
-static int idebus_parameter;   /* holds the "idebus=" parameter */
-static int system_bus_speed;   /* holds what we think is VESA/PCI bus speed */
-
 DEFINE_MUTEX(ide_cfg_mtx);
- __cacheline_aligned_in_smp DEFINE_SPINLOCK(ide_lock);
 
-int noautodma = 0;
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(ide_lock);
+EXPORT_SYMBOL(ide_lock);
 
 ide_hwif_t ide_hwifs[MAX_HWIFS];       /* master data repository */
 
@@ -139,7 +136,6 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
                drive->media                    = ide_disk;
                drive->select.all               = (unit<<4)|0xa0;
                drive->hwif                     = hwif;
-               drive->ctl                      = 0x08;
                drive->ready_stat               = READY_STAT;
                drive->bad_wstat                = BAD_W_STAT;
                drive->special.b.recalibrate    = 1;
@@ -154,32 +150,9 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
        }
 }
 
-/*
- * init_ide_data() sets reasonable default values into all fields
- * of all instances of the hwifs and drives, but only on the first call.
- * Subsequent calls have no effect (they don't wipe out anything).
- *
- * This routine is normally called at driver initialization time,
- * but may also be called MUCH earlier during kernel "command-line"
- * parameter processing.  As such, we cannot depend on any other parts
- * of the kernel (such as memory allocation) to be functioning yet.
- *
- * This is too bad, as otherwise we could dynamically allocate the
- * ide_drive_t structs as needed, rather than always consuming memory
- * for the max possible number (MAX_HWIFS * MAX_DRIVES) of them.
- *
- * FIXME: We should stuff the setup data into __init and copy the
- * relevant hwifs/allocate them properly during boot.
- */
-#define MAGIC_COOKIE 0x12345678
 static void __init init_ide_data (void)
 {
        unsigned int index;
-       static unsigned long magic_cookie = MAGIC_COOKIE;
-
-       if (magic_cookie != MAGIC_COOKIE)
-               return;         /* already initialized */
-       magic_cookie = 0;
 
        /* Initialise all interface structures */
        for (index = 0; index < MAX_HWIFS; ++index) {
@@ -189,38 +162,6 @@ static void __init init_ide_data (void)
        }
 }
 
-/**
- *     ide_system_bus_speed    -       guess bus speed
- *
- *     ide_system_bus_speed() returns what we think is the system VESA/PCI
- *     bus speed (in MHz). This is used for calculating interface PIO timings.
- *     The default is 40 for known PCI systems, 50 otherwise.
- *     The "idebus=xx" parameter can be used to override this value.
- *     The actual value to be used is computed/displayed the first time
- *     through. Drivers should only use this as a last resort.
- *
- *     Returns a guessed speed in MHz.
- */
-
-static int ide_system_bus_speed(void)
-{
-#ifdef CONFIG_PCI
-       static struct pci_device_id pci_default[] = {
-               { PCI_DEVICE(PCI_ANY_ID, PCI_ANY_ID) },
-               { }
-       };
-#else
-#define pci_default 0
-#endif /* CONFIG_PCI */
-
-       /* user supplied value */
-       if (idebus_parameter)
-               return idebus_parameter;
-
-       /* safe default value for PCI or VESA and PCI*/
-       return pci_dev_present(pci_default) ? 33 : 50;
-}
-
 void ide_remove_port_from_hwgroup(ide_hwif_t *hwif)
 {
        ide_hwgroup_t *hwgroup = hwif->hwgroup;
@@ -498,7 +439,7 @@ out:
 
 int set_pio_mode(ide_drive_t *drive, int arg)
 {
-       struct request rq;
+       struct request *rq;
        ide_hwif_t *hwif = drive->hwif;
        const struct ide_port_ops *port_ops = hwif->port_ops;
 
@@ -512,12 +453,15 @@ int set_pio_mode(ide_drive_t *drive, int arg)
        if (drive->special.b.set_tune)
                return -EBUSY;
 
-       ide_init_drive_cmd(&rq);
-       rq.cmd_type = REQ_TYPE_ATA_TASKFILE;
+       rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+       rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 
        drive->tune_req = (u8) arg;
        drive->special.b.set_tune = 1;
-       (void) ide_do_drive_cmd(drive, &rq, ide_wait);
+
+       blk_execute_rq(drive->queue, NULL, rq, 0);
+       blk_put_request(rq);
+
        return 0;
 }
 
@@ -537,25 +481,11 @@ static int set_unmaskirq(ide_drive_t *drive, int arg)
        return 0;
 }
 
-/**
- *     system_bus_clock        -       clock guess
- *
- *     External version of the bus clock guess used by very old IDE drivers
- *     for things like VLB timings. Should not be used.
- */
-
-int system_bus_clock (void)
-{
-       return system_bus_speed;
-}
-
-EXPORT_SYMBOL(system_bus_clock);
-
 static int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 {
        ide_drive_t *drive = dev->driver_data;
        ide_hwif_t *hwif = HWIF(drive);
-       struct request rq;
+       struct request *rq;
        struct request_pm_state rqpm;
        ide_task_t args;
        int ret;
@@ -564,18 +494,19 @@ static int generic_ide_suspend(struct device *dev, pm_message_t mesg)
        if (!(drive->dn % 2))
                ide_acpi_get_timing(hwif);
 
-       blk_rq_init(NULL, &rq);
        memset(&rqpm, 0, sizeof(rqpm));
        memset(&args, 0, sizeof(args));
-       rq.cmd_type = REQ_TYPE_PM_SUSPEND;
-       rq.special = &args;
-       rq.data = &rqpm;
+       rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+       rq->cmd_type = REQ_TYPE_PM_SUSPEND;
+       rq->special = &args;
+       rq->data = &rqpm;
        rqpm.pm_step = ide_pm_state_start_suspend;
        if (mesg.event == PM_EVENT_PRETHAW)
                mesg.event = PM_EVENT_FREEZE;
        rqpm.pm_state = mesg.event;
 
-       ret = ide_do_drive_cmd(drive, &rq, ide_wait);
+       ret = blk_execute_rq(drive->queue, NULL, rq, 0);
+       blk_put_request(rq);
        /* only call ACPI _PS3 after both drivers are suspended */
        if (!ret && (((drive->dn % 2) && hwif->drives[0].present
                 && hwif->drives[1].present)
@@ -589,7 +520,7 @@ static int generic_ide_resume(struct device *dev)
 {
        ide_drive_t *drive = dev->driver_data;
        ide_hwif_t *hwif = HWIF(drive);
-       struct request rq;
+       struct request *rq;
        struct request_pm_state rqpm;
        ide_task_t args;
        int err;
@@ -602,16 +533,18 @@ static int generic_ide_resume(struct device *dev)
 
        ide_acpi_exec_tfs(drive);
 
-       blk_rq_init(NULL, &rq);
        memset(&rqpm, 0, sizeof(rqpm));
        memset(&args, 0, sizeof(args));
-       rq.cmd_type = REQ_TYPE_PM_RESUME;
-       rq.special = &args;
-       rq.data = &rqpm;
+       rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+       rq->cmd_type = REQ_TYPE_PM_RESUME;
+       rq->cmd_flags |= REQ_PREEMPT;
+       rq->special = &args;
+       rq->data = &rqpm;
        rqpm.pm_step = ide_pm_state_start_resume;
        rqpm.pm_state = PM_EVENT_ON;
 
-       err = ide_do_drive_cmd(drive, &rq, ide_head_wait);
+       err = blk_execute_rq(drive->queue, NULL, rq, 1);
+       blk_put_request(rq);
 
        if (err == 0 && dev->driver) {
                ide_driver_t *drv = to_ide_driver(dev->driver);
@@ -764,212 +697,6 @@ set_val:
 
 EXPORT_SYMBOL(generic_ide_ioctl);
 
-/*
- * stridx() returns the offset of c within s,
- * or -1 if c is '\0' or not found within s.
- */
-static int __init stridx (const char *s, char c)
-{
-       char *i = strchr(s, c);
-       return (i && c) ? i - s : -1;
-}
-
-/*
- * match_parm() does parsing for ide_setup():
- *
- * 1. the first char of s must be '='.
- * 2. if the remainder matches one of the supplied keywords,
- *     the index (1 based) of the keyword is negated and returned.
- * 3. if the remainder is a series of no more than max_vals numbers
- *     separated by commas, the numbers are saved in vals[] and a
- *     count of how many were saved is returned.  Base10 is assumed,
- *     and base16 is allowed when prefixed with "0x".
- * 4. otherwise, zero is returned.
- */
-static int __init match_parm (char *s, const char *keywords[], int vals[], int max_vals)
-{
-       static const char *decimal = "0123456789";
-       static const char *hex = "0123456789abcdef";
-       int i, n;
-
-       if (*s++ == '=') {
-               /*
-                * Try matching against the supplied keywords,
-                * and return -(index+1) if we match one
-                */
-               if (keywords != NULL) {
-                       for (i = 0; *keywords != NULL; ++i) {
-                               if (!strcmp(s, *keywords++))
-                                       return -(i+1);
-                       }
-               }
-               /*
-                * Look for a series of no more than "max_vals"
-                * numeric values separated by commas, in base10,
-                * or base16 when prefixed with "0x".
-                * Return a count of how many were found.
-                */
-               for (n = 0; (i = stridx(decimal, *s)) >= 0;) {
-                       vals[n] = i;
-                       while ((i = stridx(decimal, *++s)) >= 0)
-                               vals[n] = (vals[n] * 10) + i;
-                       if (*s == 'x' && !vals[n]) {
-                               while ((i = stridx(hex, *++s)) >= 0)
-                                       vals[n] = (vals[n] * 0x10) + i;
-                       }
-                       if (++n == max_vals)
-                               break;
-                       if (*s == ',' || *s == ';')
-                               ++s;
-               }
-               if (!*s)
-                       return n;
-       }
-       return 0;       /* zero = nothing matched */
-}
-
-/*
- * ide_setup() gets called VERY EARLY during initialization,
- * to handle kernel "command line" strings beginning with "hdx=" or "ide".
- *
- * Remember to update Documentation/ide/ide.txt if you change something here.
- */
-static int __init ide_setup(char *s)
-{
-       ide_hwif_t *hwif;
-       ide_drive_t *drive;
-       unsigned int hw, unit;
-       int vals[3];
-       const char max_drive = 'a' + ((MAX_HWIFS * MAX_DRIVES) - 1);
-
-       if (strncmp(s,"hd",2) == 0 && s[2] == '=')      /* hd= is for hd.c   */
-               return 0;                               /* driver and not us */
-
-       if (strncmp(s,"ide",3) && strncmp(s,"idebus",6) && strncmp(s,"hd",2))
-               return 0;
-
-       printk(KERN_INFO "ide_setup: %s", s);
-       init_ide_data ();
-
-#ifdef CONFIG_BLK_DEV_IDEDOUBLER
-       if (!strcmp(s, "ide=doubler")) {
-               extern int ide_doubler;
-
-               printk(" : Enabled support for IDE doublers\n");
-               ide_doubler = 1;
-               goto obsolete_option;
-       }
-#endif /* CONFIG_BLK_DEV_IDEDOUBLER */
-
-       if (!strcmp(s, "ide=nodma")) {
-               printk(" : Prevented DMA\n");
-               noautodma = 1;
-               goto obsolete_option;
-       }
-
-#ifdef CONFIG_BLK_DEV_IDEACPI
-       if (!strcmp(s, "ide=noacpi")) {
-               //printk(" : Disable IDE ACPI support.\n");
-               ide_noacpi = 1;
-               goto obsolete_option;
-       }
-       if (!strcmp(s, "ide=acpigtf")) {
-               //printk(" : Enable IDE ACPI _GTF support.\n");
-               ide_acpigtf = 1;
-               goto obsolete_option;
-       }
-       if (!strcmp(s, "ide=acpionboot")) {
-               //printk(" : Call IDE ACPI methods on boot.\n");
-               ide_acpionboot = 1;
-               goto obsolete_option;
-       }
-#endif /* CONFIG_BLK_DEV_IDEACPI */
-
-       /*
-        * Look for drive options:  "hdx="
-        */
-       if (s[0] == 'h' && s[1] == 'd' && s[2] >= 'a' && s[2] <= max_drive) {
-               const char *hd_words[] = {
-                       "none", "noprobe", "nowerr", "cdrom", "nodma",
-                       "-6", "-7", "-8", "-9", "-10",
-                       "noflush", "remap", "remap63", "scsi", NULL };
-               unit = s[2] - 'a';
-               hw   = unit / MAX_DRIVES;
-               unit = unit % MAX_DRIVES;
-               hwif = &ide_hwifs[hw];
-               drive = &hwif->drives[unit];
-               if (strncmp(s + 4, "ide-", 4) == 0) {
-                       strlcpy(drive->driver_req, s + 4, sizeof(drive->driver_req));
-                       goto obsolete_option;
-               }
-               switch (match_parm(&s[3], hd_words, vals, 3)) {
-                       case -1: /* "none" */
-                       case -2: /* "noprobe" */
-                               drive->noprobe = 1;
-                               goto obsolete_option;
-                       case -3: /* "nowerr" */
-                               drive->bad_wstat = BAD_R_STAT;
-                               goto obsolete_option;
-                       case -4: /* "cdrom" */
-                               drive->present = 1;
-                               drive->media = ide_cdrom;
-                               /* an ATAPI device ignores DRDY */
-                               drive->ready_stat = 0;
-                               goto obsolete_option;
-                       case -5: /* nodma */
-                               drive->nodma = 1;
-                               goto obsolete_option;
-                       case -11: /* noflush */
-                               drive->noflush = 1;
-                               goto obsolete_option;
-                       case -12: /* "remap" */
-                               drive->remap_0_to_1 = 1;
-                               goto obsolete_option;
-                       case -13: /* "remap63" */
-                               drive->sect0 = 63;
-                               goto obsolete_option;
-                       case -14: /* "scsi" */
-                               drive->scsi = 1;
-                               goto obsolete_option;
-                       case 3: /* cyl,head,sect */
-                               drive->media    = ide_disk;
-                               drive->ready_stat = READY_STAT;
-                               drive->cyl      = drive->bios_cyl  = vals[0];
-                               drive->head     = drive->bios_head = vals[1];
-                               drive->sect     = drive->bios_sect = vals[2];
-                               drive->present  = 1;
-                               drive->forced_geom = 1;
-                               goto obsolete_option;
-                       default:
-                               goto bad_option;
-               }
-       }
-
-       if (s[0] != 'i' || s[1] != 'd' || s[2] != 'e')
-               goto bad_option;
-       /*
-        * Look for bus speed option:  "idebus="
-        */
-       if (s[3] == 'b' && s[4] == 'u' && s[5] == 's') {
-               if (match_parm(&s[6], NULL, vals, 1) != 1)
-                       goto bad_option;
-               if (vals[0] >= 20 && vals[0] <= 66) {
-                       idebus_parameter = vals[0];
-               } else
-                       printk(" -- BAD BUS SPEED! Expected value from 20 to 66");
-               goto obsolete_option;
-       }
-
-bad_option:
-       printk(" -- BAD OPTION\n");
-       return 1;
-obsolete_option:
-       printk(" -- OBSOLETE OPTION, WILL BE REMOVED SOON!\n");
-       return 1;
-}
-
-EXPORT_SYMBOL(ide_lock);
-
 static int ide_bus_match(struct device *dev, struct device_driver *drv)
 {
        return 1;
@@ -1281,11 +1008,6 @@ static int __init ide_init(void)
        int ret;
 
        printk(KERN_INFO "Uniform Multi-Platform E-IDE driver\n");
-       system_bus_speed = ide_system_bus_speed();
-
-       printk(KERN_INFO "ide: Assuming %dMHz system bus speed "
-                        "for PIO modes%s\n", system_bus_speed,
-                       idebus_parameter ? "" : "; override with idebus=xx");
 
        ret = bus_register(&ide_bus_type);
        if (ret < 0) {
@@ -1311,32 +1033,7 @@ out_port_class:
        return ret;
 }
 
-#ifdef MODULE
-static char *options = NULL;
-module_param(options, charp, 0);
-MODULE_LICENSE("GPL");
-
-static void __init parse_options (char *line)
-{
-       char *next = line;
-
-       if (line == NULL || !*line)
-               return;
-       while ((line = next) != NULL) {
-               if ((next = strchr(line,' ')) != NULL)
-                       *next++ = 0;
-               if (!ide_setup(line))
-                       printk (KERN_INFO "Unknown option '%s'\n", line);
-       }
-}
-
-int __init init_module (void)
-{
-       parse_options(options);
-       return ide_init();
-}
-
-void __exit cleanup_module (void)
+static void __exit ide_exit(void)
 {
        proc_ide_destroy();
 
@@ -1345,10 +1042,7 @@ void __exit cleanup_module (void)
        bus_unregister(&ide_bus_type);
 }
 
-#else /* !MODULE */
-
-__setup("", ide_setup);
-
 module_init(ide_init);
+module_exit(ide_exit);
 
-#endif /* MODULE */
+MODULE_LICENSE("GPL");
index 90c65cf97448c3fc7e72313f7bb1dbfcd7e4e0fb..052125fafcfa1cb320481c600e5b6129b3e06891 100644 (file)
@@ -116,7 +116,7 @@ static void ali14xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
        int time1, time2;
        u8 param1, param2, param3, param4;
        unsigned long flags;
-       int bus_speed = ide_vlb_clk ? ide_vlb_clk : system_bus_clock();
+       int bus_speed = ide_vlb_clk ? ide_vlb_clk : 50;
 
        /* calculate timing, according to PIO mode */
        time1 = ide_pio_cycle_time(drive, pio);
index fed7d812761c4e9b9ba46b1c1bfbb26648cfac20..b78941680c320b4542a75d0d894c72e9e53e9098 100644 (file)
@@ -64,9 +64,7 @@
 #define GAYLE_HAS_CONTROL_REG  (!ide_doubler)
 #define GAYLE_IDEREG_SIZE      (ide_doubler ? 0x1000 : 0x2000)
 
-int ide_doubler = 0;   /* support IDE doublers? */
-EXPORT_SYMBOL_GPL(ide_doubler);
-
+static int ide_doubler;
 module_param_named(doubler, ide_doubler, bool, 0);
 MODULE_PARM_DESC(doubler, "enable support for IDE doublers");
 #endif /* CONFIG_BLK_DEV_IDEDOUBLER */
index 4fe516df9f74f3d40d2cc9abaa44000c80b891fb..dd6dfb32e85303b4342be96afddddd4584cf33f4 100644 (file)
@@ -212,7 +212,7 @@ static u8 ht_pio2timings(ide_drive_t *drive, const u8 pio)
 {
        int active_time, recovery_time;
        int active_cycles, recovery_cycles;
-       int bus_speed = ide_vlb_clk ? ide_vlb_clk : system_bus_clock();
+       int bus_speed = ide_vlb_clk ? ide_vlb_clk : 50;
 
         if (pio) {
                unsigned int cycle_time;
index 6424af154325d9f1c0776b37f7cb7aac7d932593..51dba82f88127bba661472e69d210027fd5735ea 100644 (file)
@@ -110,7 +110,7 @@ static void qd65xx_select(ide_drive_t *drive)
 
 static u8 qd6500_compute_timing (ide_hwif_t *hwif, int active_time, int recovery_time)
 {
-       int clk = ide_vlb_clk ? ide_vlb_clk : system_bus_clock();
+       int clk = ide_vlb_clk ? ide_vlb_clk : 50;
        u8 act_cyc, rec_cyc;
 
        if (clk <= 33) {
@@ -132,7 +132,7 @@ static u8 qd6500_compute_timing (ide_hwif_t *hwif, int active_time, int recovery
 
 static u8 qd6580_compute_timing (int active_time, int recovery_time)
 {
-       int clk = ide_vlb_clk ? ide_vlb_clk : system_bus_clock();
+       int clk = ide_vlb_clk ? ide_vlb_clk : 50;
        u8 act_cyc, rec_cyc;
 
        act_cyc = 17 - IDE_IN(active_time   * clk / 1000 + 1, 2, 17);
index 7f46c224b7c40f0f2091eea99a0be2f193d44921..ae7a4329a581065dc10f1f18c83b1d5a7daedb1d 100644 (file)
@@ -140,7 +140,7 @@ static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio)
 
 static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev, const char *name)
 {
-       int bus_speed = ide_pci_clk ? ide_pci_clk : system_bus_clock();
+       int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
 
        if (bus_speed <= 33)
                pci_set_drvdata(dev, (void *) aec6xxx_33_base);
index f2129d5e07f2683215eb20940529d457ae04cd3b..f2de00adf147e596448c06913e8790ced832c746 100644 (file)
@@ -72,7 +72,7 @@ static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio)
        int s_time, a_time, c_time;
        u8 s_clc, a_clc, r_clc;
        unsigned long flags;
-       int bus_speed = ide_pci_clk ? ide_pci_clk : system_bus_clock();
+       int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
        int port = hwif->channel ? 0x5c : 0x58;
        int portFIFO = hwif->channel ? 0x55 : 0x54;
        u8 cd_dma_fifo = 0;
index efcf54338be7b9c63543f496f3548d71d8825be2..ad222206a429ea303e4f528142ee251fdc143ab2 100644 (file)
@@ -53,20 +53,20 @@ static void amd_set_speed(struct pci_dev *dev, u8 dn, u8 udma_mask,
        u8 t = 0, offset = amd_offset(dev);
 
        pci_read_config_byte(dev, AMD_ADDRESS_SETUP + offset, &t);
-       t = (t & ~(3 << ((3 - dn) << 1))) | ((FIT(timing->setup, 1, 4) - 1) << ((3 - dn) << 1));
+       t = (t & ~(3 << ((3 - dn) << 1))) | ((clamp_val(timing->setup, 1, 4) - 1) << ((3 - dn) << 1));
        pci_write_config_byte(dev, AMD_ADDRESS_SETUP + offset, t);
 
        pci_write_config_byte(dev, AMD_8BIT_TIMING + offset + (1 - (dn >> 1)),
-               ((FIT(timing->act8b, 1, 16) - 1) << 4) | (FIT(timing->rec8b, 1, 16) - 1));
+               ((clamp_val(timing->act8b, 1, 16) - 1) << 4) | (clamp_val(timing->rec8b, 1, 16) - 1));
 
        pci_write_config_byte(dev, AMD_DRIVE_TIMING + offset + (3 - dn),
-               ((FIT(timing->active, 1, 16) - 1) << 4) | (FIT(timing->recover, 1, 16) - 1));
+               ((clamp_val(timing->active, 1, 16) - 1) << 4) | (clamp_val(timing->recover, 1, 16) - 1));
 
        switch (udma_mask) {
-       case ATA_UDMA2: t = timing->udma ? (0xc0 | (FIT(timing->udma, 2, 5) - 2)) : 0x03; break;
-       case ATA_UDMA4: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 2, 10)]) : 0x03; break;
-       case ATA_UDMA5: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 1, 10)]) : 0x03; break;
-       case ATA_UDMA6: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 1, 15)]) : 0x03; break;
+       case ATA_UDMA2: t = timing->udma ? (0xc0 | (clamp_val(timing->udma, 2, 5) - 2)) : 0x03; break;
+       case ATA_UDMA4: t = timing->udma ? (0xc0 | amd_cyc2udma[clamp_val(timing->udma, 2, 10)]) : 0x03; break;
+       case ATA_UDMA5: t = timing->udma ? (0xc0 | amd_cyc2udma[clamp_val(timing->udma, 1, 10)]) : 0x03; break;
+       case ATA_UDMA6: t = timing->udma ? (0xc0 | amd_cyc2udma[clamp_val(timing->udma, 1, 15)]) : 0x03; break;
        default: return;
        }
 
@@ -179,7 +179,7 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev,
  * Determine the system bus clock.
  */
 
-       amd_clock = (ide_pci_clk ? ide_pci_clk : system_bus_clock()) * 1000;
+       amd_clock = (ide_pci_clk ? ide_pci_clk : 33) * 1000;
 
        switch (amd_clock) {
                case 33000: amd_clock = 33333; break;
index b38a1980dcd5563a92bf5a14fdbd8d115b257b20..cd1ba14984ab1197ac571190f132bb6236f10786 100644 (file)
@@ -525,12 +525,10 @@ static void cmd640_set_mode(ide_drive_t *drive, unsigned int index,
        u8 setup_count, active_count, recovery_count, recovery_count2, cycle_count;
        int bus_speed;
 
-       if (cmd640_vlb && ide_vlb_clk)
-               bus_speed = ide_vlb_clk;
-       else if (!cmd640_vlb && ide_pci_clk)
-               bus_speed = ide_pci_clk;
+       if (cmd640_vlb)
+               bus_speed = ide_vlb_clk ? ide_vlb_clk : 50;
        else
-               bus_speed = system_bus_clock();
+               bus_speed = ide_pci_clk ? ide_pci_clk : 33;
 
        if (pio_mode > 5)
                pio_mode = 5;
index 08674711d0895c6fae3476cba5d02a2d1f351541..ca4774aa27eeb56f33d2708d18ac1666b20d3224 100644 (file)
@@ -69,7 +69,7 @@ static u8 quantize_timing(int timing, int quant)
 static void program_cycle_times (ide_drive_t *drive, int cycle_time, int active_time)
 {
        struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-       int clock_time = 1000 / (ide_pci_clk ? ide_pci_clk : system_bus_clock());
+       int clock_time = 1000 / (ide_pci_clk ? ide_pci_clk : 33);
        u8  cycle_count, active_count, recovery_count, drwtim;
        static const u8 recovery_values[] =
                {15, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0};
@@ -128,7 +128,7 @@ static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio)
                            ide_pio_timings[pio].active_time);
 
        setup_count = quantize_timing(ide_pio_timings[pio].setup_time,
-                       1000 / (ide_pci_clk ? ide_pci_clk : system_bus_clock()));
+                       1000 / (ide_pci_clk ? ide_pci_clk : 33));
 
        /*
         * The primary channel has individual address setup timing registers
index 77cc22c2ad457b61092e67056d31cab28d3fcbb1..8c534afcb6c89ede68e98248c784ec230191103b 100644 (file)
@@ -134,7 +134,7 @@ static int calc_clk(int time, int bus_speed)
 static void compute_clocks(u8 pio, pio_clocks_t *p_pclk)
 {
        int clk1, clk2;
-       int bus_speed = ide_pci_clk ? ide_pci_clk : system_bus_clock();
+       int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
 
        /* we don't check against CY82C693's min and max speed,
         * so you can play with the idebus=xx parameter
index c929dadaaaffb02836cf6e66067f6862cd8c0dae..397c6cbe953c0e42fc8a7636b9eca36377a429b6 100644 (file)
@@ -759,8 +759,7 @@ static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
                                enable_irq (hwif->irq);
                }
        } else
-               outb(mask ? (drive->ctl | 2) : (drive->ctl & ~2),
-                    hwif->io_ports.ctl_addr);
+               outb(ATA_DEVCTL_OBS | (mask ? 2 : 0), hwif->io_ports.ctl_addr);
 }
 
 /*
index a7a41bb827783b7946ada332583e042a1c23338a..45ba71a7182fdf87a4d988a0c4eee66cb224c6d4 100644 (file)
@@ -76,7 +76,7 @@ static void superio_tf_read(ide_drive_t *drive, ide_task_t *task)
        }
 
        /* be sure we're looking at the low order bits */
-       outb(drive->ctl & ~0x80, io_ports->ctl_addr);
+       outb(ATA_DEVCTL_OBS & ~0x80, io_ports->ctl_addr);
 
        if (task->tf_flags & IDE_TFLAG_IN_NSECT)
                tf->nsect  = inb(io_ports->nsect_addr);
@@ -90,7 +90,7 @@ static void superio_tf_read(ide_drive_t *drive, ide_task_t *task)
                tf->device = superio_ide_inb(io_ports->device_addr);
 
        if (task->tf_flags & IDE_TFLAG_LBA48) {
-               outb(drive->ctl | 0x80, io_ports->ctl_addr);
+               outb(ATA_DEVCTL_OBS | 0x80, io_ports->ctl_addr);
 
                if (task->tf_flags & IDE_TFLAG_IN_HOB_FEATURE)
                        tf->hob_feature = inb(io_ports->feature_addr);
index 910fb00deb71486e8d0e7da1a3b922fc5009d4b5..1584ebb6a185b6d8da78fa3b205c909c7d52464e 100644 (file)
@@ -148,11 +148,8 @@ static void scc_ide_outb(u8 addr, unsigned long port)
        out_be32((void*)port, addr);
 }
 
-static void
-scc_ide_outbsync(ide_drive_t * drive, u8 addr, unsigned long port)
+static void scc_ide_outbsync(ide_hwif_t *hwif, u8 addr, unsigned long port)
 {
-       ide_hwif_t *hwif = HWIF(drive);
-
        out_be32((void*)port, addr);
        eieio();
        in_be32((void*)(hwif->dma_base + 0x01c));
@@ -662,8 +659,6 @@ static void scc_tf_load(ide_drive_t *drive, ide_task_t *task)
        if (task->tf_flags & IDE_TFLAG_FLAGGED)
                HIHI = 0xFF;
 
-       ide_set_irq(drive, 1);
-
        if (task->tf_flags & IDE_TFLAG_OUT_DATA)
                out_be32((void *)io_ports->data_addr,
                         (tf->hob_data << 8) | tf->data);
@@ -708,7 +703,7 @@ static void scc_tf_read(ide_drive_t *drive, ide_task_t *task)
        }
 
        /* be sure we're looking at the low order bits */
-       scc_ide_outb(drive->ctl & ~0x80, io_ports->ctl_addr);
+       scc_ide_outb(ATA_DEVCTL_OBS & ~0x80, io_ports->ctl_addr);
 
        if (task->tf_flags & IDE_TFLAG_IN_NSECT)
                tf->nsect  = scc_ide_inb(io_ports->nsect_addr);
@@ -722,7 +717,7 @@ static void scc_tf_read(ide_drive_t *drive, ide_task_t *task)
                tf->device = scc_ide_inb(io_ports->device_addr);
 
        if (task->tf_flags & IDE_TFLAG_LBA48) {
-               scc_ide_outb(drive->ctl | 0x80, io_ports->ctl_addr);
+               scc_ide_outb(ATA_DEVCTL_OBS | 0x80, io_ports->ctl_addr);
 
                if (task->tf_flags & IDE_TFLAG_IN_HOB_FEATURE)
                        tf->hob_feature = scc_ide_inb(io_ports->feature_addr);
@@ -795,7 +790,6 @@ static void __devinit init_mmio_iops_scc(ide_hwif_t *hwif)
 
        hwif->dma_base = dma_base;
        hwif->config_data = ports->ctl;
-       hwif->mmio = 1;
 }
 
 /**
index 16a0bce17d6905b03959ea8817b227be2d70e94b..24513e3dcd6b81c0c1a159a5f5f37200f221268f 100644 (file)
@@ -111,7 +111,7 @@ sgiioc4_init_hwif_ports(hw_regs_t * hw, unsigned long data_port,
 static void
 sgiioc4_maskproc(ide_drive_t * drive, int mask)
 {
-       writeb(mask ? (drive->ctl | 2) : (drive->ctl & ~2),
+       writeb(ATA_DEVCTL_OBS | (mask ? 2 : 0),
               (void __iomem *)drive->hwif->io_ports.ctl_addr);
 }
 
@@ -369,8 +369,7 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
        hwif->sg_max_nents = IOC4_PRD_ENTRIES;
 
        pad = pci_alloc_consistent(dev, IOC4_IDE_CACHELINE_SIZE,
-                                  (dma_addr_t *) &(hwif->dma_status));
-
+                                  (dma_addr_t *)&hwif->extra_base);
        if (pad) {
                ide_set_hwifdata(hwif, pad);
                return 0;
@@ -439,7 +438,7 @@ sgiioc4_configure_for_dma(int dma_direction, ide_drive_t * drive)
 
        /* Address of the Ending DMA */
        memset(ide_get_hwifdata(hwif), 0, IOC4_IDE_CACHELINE_SIZE);
-       ending_dma_addr = cpu_to_le32(hwif->dma_status);
+       ending_dma_addr = cpu_to_le32(hwif->extra_base);
        writel(ending_dma_addr, (void __iomem *)(dma_base + IOC4_DMA_END_ADDR * 4));
 
        writel(dma_direction, (void __iomem *)ioc4_dma_addr);
index 0006b9e58567b7d595de4253b6d67e4a78f12d8f..b75e9bb390a73410c90e82dad9a059a673b4eee5 100644 (file)
@@ -94,7 +94,7 @@ static unsigned long siimage_selreg(ide_hwif_t *hwif, int r)
        unsigned long base = (unsigned long)hwif->hwif_data;
 
        base += 0xA0 + r;
-       if (hwif->mmio)
+       if (hwif->host_flags & IDE_HFLAG_MMIO)
                base += hwif->channel << 6;
        else
                base += hwif->channel << 4;
@@ -117,7 +117,7 @@ static inline unsigned long siimage_seldev(ide_drive_t *drive, int r)
        unsigned long base      = (unsigned long)hwif->hwif_data;
 
        base += 0xA0 + r;
-       if (hwif->mmio)
+       if (hwif->host_flags & IDE_HFLAG_MMIO)
                base += hwif->channel << 6;
        else
                base += hwif->channel << 4;
@@ -190,7 +190,9 @@ static u8 sil_pata_udma_filter(ide_drive_t *drive)
        unsigned long base      = (unsigned long)hwif->hwif_data;
        u8 scsc, mask           = 0;
 
-       scsc = sil_ioread8(dev, base + (hwif->mmio ? 0x4A : 0x8A));
+       base += (hwif->host_flags & IDE_HFLAG_MMIO) ? 0x4A : 0x8A;
+
+       scsc = sil_ioread8(dev, base);
 
        switch (scsc & 0x30) {
        case 0x10:      /* 133 */
@@ -238,8 +240,9 @@ static void sil_set_pio_mode(ide_drive_t *drive, u8 pio)
        unsigned long tfaddr    = siimage_selreg(hwif,  0x02);
        unsigned long base      = (unsigned long)hwif->hwif_data;
        u8 tf_pio               = pio;
-       u8 addr_mask            = hwif->channel ? (hwif->mmio ? 0xF4 : 0x84)
-                                               : (hwif->mmio ? 0xB4 : 0x80);
+       u8 mmio                 = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
+       u8 addr_mask            = hwif->channel ? (mmio ? 0xF4 : 0x84)
+                                               : (mmio ? 0xB4 : 0x80);
        u8 mode                 = 0;
        u8 unit                 = drive->select.b.unit;
 
@@ -290,13 +293,13 @@ static void sil_set_dma_mode(ide_drive_t *drive, const u8 speed)
        u16 ultra = 0, multi    = 0;
        u8 mode = 0, unit       = drive->select.b.unit;
        unsigned long base      = (unsigned long)hwif->hwif_data;
-       u8 scsc = 0, addr_mask  = hwif->channel ?
-                                       (hwif->mmio ? 0xF4 : 0x84) :
-                                       (hwif->mmio ? 0xB4 : 0x80);
+       u8 mmio                 = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
+       u8 scsc = 0, addr_mask  = hwif->channel ? (mmio ? 0xF4 : 0x84)
+                                               : (mmio ? 0xB4 : 0x80);
        unsigned long ma        = siimage_seldev(drive, 0x08);
        unsigned long ua        = siimage_seldev(drive, 0x0C);
 
-       scsc  = sil_ioread8 (dev, base + (hwif->mmio ? 0x4A : 0x8A));
+       scsc  = sil_ioread8 (dev, base + (mmio ? 0x4A : 0x8A));
        mode  = sil_ioread8 (dev, base + addr_mask);
        multi = sil_ioread16(dev, ma);
        ultra = sil_ioread16(dev, ua);
@@ -391,7 +394,7 @@ static int siimage_mmio_dma_test_irq(ide_drive_t *drive)
 
 static int siimage_dma_test_irq(ide_drive_t *drive)
 {
-       if (drive->hwif->mmio)
+       if (drive->hwif->host_flags & IDE_HFLAG_MMIO)
                return siimage_mmio_dma_test_irq(drive);
        else
                return siimage_io_dma_test_irq(drive);
@@ -640,8 +643,6 @@ static void __devinit init_mmio_iops_siimage(ide_hwif_t *hwif)
        hwif->irq = dev->irq;
 
        hwif->dma_base = (unsigned long)addr + (ch ? 0x08 : 0x00);
-
-       hwif->mmio = 1;
 }
 
 static int is_dev_seagate_sata(ide_drive_t *drive)
index 566e0ecb8db1251afe46065a7a371cdcea2f13b7..3ed9728abd2408c53edebd05a7677b4a6b026830 100644 (file)
@@ -120,21 +120,21 @@ static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing)
 
        if (~vdev->via_config->flags & VIA_BAD_AST) {
                pci_read_config_byte(dev, VIA_ADDRESS_SETUP, &t);
-               t = (t & ~(3 << ((3 - dn) << 1))) | ((FIT(timing->setup, 1, 4) - 1) << ((3 - dn) << 1));
+               t = (t & ~(3 << ((3 - dn) << 1))) | ((clamp_val(timing->setup, 1, 4) - 1) << ((3 - dn) << 1));
                pci_write_config_byte(dev, VIA_ADDRESS_SETUP, t);
        }
 
        pci_write_config_byte(dev, VIA_8BIT_TIMING + (1 - (dn >> 1)),
-               ((FIT(timing->act8b, 1, 16) - 1) << 4) | (FIT(timing->rec8b, 1, 16) - 1));
+               ((clamp_val(timing->act8b, 1, 16) - 1) << 4) | (clamp_val(timing->rec8b, 1, 16) - 1));
 
        pci_write_config_byte(dev, VIA_DRIVE_TIMING + (3 - dn),
-               ((FIT(timing->active, 1, 16) - 1) << 4) | (FIT(timing->recover, 1, 16) - 1));
+               ((clamp_val(timing->active, 1, 16) - 1) << 4) | (clamp_val(timing->recover, 1, 16) - 1));
 
        switch (vdev->via_config->udma_mask) {
-       case ATA_UDMA2: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 5) - 2)) : 0x03; break;
-       case ATA_UDMA4: t = timing->udma ? (0xe8 | (FIT(timing->udma, 2, 9) - 2)) : 0x0f; break;
-       case ATA_UDMA5: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 9) - 2)) : 0x07; break;
-       case ATA_UDMA6: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 9) - 2)) : 0x07; break;
+       case ATA_UDMA2: t = timing->udma ? (0xe0 | (clamp_val(timing->udma, 2, 5) - 2)) : 0x03; break;
+       case ATA_UDMA4: t = timing->udma ? (0xe8 | (clamp_val(timing->udma, 2, 9) - 2)) : 0x0f; break;
+       case ATA_UDMA5: t = timing->udma ? (0xe0 | (clamp_val(timing->udma, 2, 9) - 2)) : 0x07; break;
+       case ATA_UDMA6: t = timing->udma ? (0xe0 | (clamp_val(timing->udma, 2, 9) - 2)) : 0x07; break;
        default: return;
        }
 
@@ -340,7 +340,7 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const
         * Determine system bus clock.
         */
 
-       via_clock = (ide_pci_clk ? ide_pci_clk : system_bus_clock()) * 1000;
+       via_clock = (ide_pci_clk ? ide_pci_clk : 33) * 1000;
 
        switch (via_clock) {
                case 33000: via_clock = 33333; break;
index ba2d58727964f2bfa6a527b683535035da2da3b1..dcb2c466bb979efd7977437a2a9dfb969d7274e2 100644 (file)
@@ -480,13 +480,13 @@ pmac_ide_do_update_timings(ide_drive_t *drive)
                pmac_ide_selectproc(drive);
 }
 
-static void
-pmac_outbsync(ide_drive_t *drive, u8 value, unsigned long port)
+static void pmac_outbsync(ide_hwif_t *hwif, u8 value, unsigned long port)
 {
        u32 tmp;
        
        writeb(value, (void __iomem *) port);
-       tmp = readl(PMAC_IDE_REG(IDE_TIMING_CONFIG));
+       tmp = readl((void __iomem *)(hwif->io_ports.data_addr
+                                    + IDE_TIMING_CONFIG));
 }
 
 /*
index 5171601fb2556646544587466a14b8e973389a15..abcfb1739d4d3a2089d580a1f7ffe5583ed3ff6a 100644 (file)
@@ -87,7 +87,7 @@ unsigned long ide_pci_dma_base(ide_hwif_t *hwif, const struct ide_port_info *d)
        unsigned long dma_base = 0;
        u8 dma_stat = 0;
 
-       if (hwif->mmio)
+       if (hwif->host_flags & IDE_HFLAG_MMIO)
                return hwif->dma_base;
 
        if (hwif->mate && hwif->mate->dma_base) {
@@ -374,7 +374,7 @@ int ide_hwif_setup_dma(ide_hwif_t *hwif, const struct ide_port_info *d)
                if (base == 0 || ide_pci_set_master(dev, d->name) < 0)
                        return -1;
 
-               if (hwif->mmio)
+               if (hwif->host_flags & IDE_HFLAG_MMIO)
                        printk(KERN_INFO "    %s: MMIO-DMA\n", hwif->name);
                else
                        printk(KERN_INFO "    %s: BM-DMA at 0x%04lx-0x%04lx\n",
index 781ea59503737bdf558ca0391bfa6b75700f1702..09a2bec7fd3207b65abac3f93339b7cd46a66c8e 100644 (file)
@@ -4,28 +4,33 @@
  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
  *
- * This Software is licensed under one of the following licenses:
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
  *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
  *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
  *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
  *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #include <linux/mutex.h>
@@ -100,6 +105,7 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
        memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
        if (dst_dev_addr)
                memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
+       dev_addr->src_dev = dev;
        return 0;
 }
 EXPORT_SYMBOL(rdma_copy_addr);
index fb9ed1489f95e6e09208ae3a777a60060fd96135..6669287009c2fc4d60514d56da5fb2461532e395 100644 (file)
@@ -32,8 +32,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: agent.h 1389 2004-12-27 22:56:47Z roland $
  */
 
 #ifndef __AGENT_H_
index e85f7013de5779920af4a3902c42c71eea4592fc..68883565b725b08a8bcafd992b205ebe93d771a8 100644 (file)
@@ -31,8 +31,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: cache.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/module.h>
index a47fe64e5c39ded62558a98daef607ae56f8444d..55738eead3bf1ce009568b1028be83b0e7b7d283 100644 (file)
@@ -31,8 +31,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: cm.c 4311 2005-12-05 18:42:01Z sean.hefty $
  */
 
 #include <linux/completion.h>
index 671f137380549ada79c71f601022b209261d109f..ae11d5cc74d07f099cca4951b471c0850d39b126 100644 (file)
@@ -4,29 +4,33 @@
  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
  * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
  *
- * This Software is licensed under one of the following licenses:
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
  *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
  *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
  *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
  *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #include <linux/completion.h>
@@ -126,8 +130,7 @@ struct rdma_id_private {
 
        struct completion       comp;
        atomic_t                refcount;
-       wait_queue_head_t       wait_remove;
-       atomic_t                dev_remove;
+       struct mutex            handler_mutex;
 
        int                     backlog;
        int                     timeout_ms;
@@ -351,26 +354,15 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
                complete(&id_priv->comp);
 }
 
-static int cma_disable_remove(struct rdma_id_private *id_priv,
+static int cma_disable_callback(struct rdma_id_private *id_priv,
                              enum cma_state state)
 {
-       unsigned long flags;
-       int ret;
-
-       spin_lock_irqsave(&id_priv->lock, flags);
-       if (id_priv->state == state) {
-               atomic_inc(&id_priv->dev_remove);
-               ret = 0;
-       } else
-               ret = -EINVAL;
-       spin_unlock_irqrestore(&id_priv->lock, flags);
-       return ret;
-}
-
-static void cma_enable_remove(struct rdma_id_private *id_priv)
-{
-       if (atomic_dec_and_test(&id_priv->dev_remove))
-               wake_up(&id_priv->wait_remove);
+       mutex_lock(&id_priv->handler_mutex);
+       if (id_priv->state != state) {
+               mutex_unlock(&id_priv->handler_mutex);
+               return -EINVAL;
+       }
+       return 0;
 }
 
 static int cma_has_cm_dev(struct rdma_id_private *id_priv)
@@ -395,8 +387,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
        mutex_init(&id_priv->qp_mutex);
        init_completion(&id_priv->comp);
        atomic_set(&id_priv->refcount, 1);
-       init_waitqueue_head(&id_priv->wait_remove);
-       atomic_set(&id_priv->dev_remove, 0);
+       mutex_init(&id_priv->handler_mutex);
        INIT_LIST_HEAD(&id_priv->listen_list);
        INIT_LIST_HEAD(&id_priv->mc_list);
        get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
@@ -923,7 +914,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
        struct rdma_cm_event event;
        int ret = 0;
 
-       if (cma_disable_remove(id_priv, CMA_CONNECT))
+       if (cma_disable_callback(id_priv, CMA_CONNECT))
                return 0;
 
        memset(&event, 0, sizeof event);
@@ -970,7 +961,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
                event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
                break;
        default:
-               printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
+               printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
                       ib_event->event);
                goto out;
        }
@@ -980,12 +971,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
                /* Destroy the CM ID by returning a non-zero value. */
                id_priv->cm_id.ib = NULL;
                cma_exch(id_priv, CMA_DESTROYING);
-               cma_enable_remove(id_priv);
+               mutex_unlock(&id_priv->handler_mutex);
                rdma_destroy_id(&id_priv->id);
                return ret;
        }
 out:
-       cma_enable_remove(id_priv);
+       mutex_unlock(&id_priv->handler_mutex);
        return ret;
 }
 
@@ -998,6 +989,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
        union cma_ip_addr *src, *dst;
        __be16 port;
        u8 ip_ver;
+       int ret;
 
        if (cma_get_net_info(ib_event->private_data, listen_id->ps,
                             &ip_ver, &port, &src, &dst))
@@ -1022,10 +1014,11 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
        if (rt->num_paths == 2)
                rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
 
-       ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
        ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
-       ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
-       rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;
+       ret = rdma_translate_ip(&id->route.addr.src_addr,
+                               &id->route.addr.dev_addr);
+       if (ret)
+               goto destroy_id;
 
        id_priv = container_of(id, struct rdma_id_private, id);
        id_priv->state = CMA_CONNECT;
@@ -1095,7 +1088,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
        int offset, ret;
 
        listen_id = cm_id->context;
-       if (cma_disable_remove(listen_id, CMA_LISTEN))
+       if (cma_disable_callback(listen_id, CMA_LISTEN))
                return -ECONNABORTED;
 
        memset(&event, 0, sizeof event);
@@ -1116,7 +1109,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
                goto out;
        }
 
-       atomic_inc(&conn_id->dev_remove);
+       mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
        mutex_lock(&lock);
        ret = cma_acquire_dev(conn_id);
        mutex_unlock(&lock);
@@ -1138,7 +1131,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
                    !cma_is_ud_ps(conn_id->id.ps))
                        ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
                mutex_unlock(&lock);
-               cma_enable_remove(conn_id);
+               mutex_unlock(&conn_id->handler_mutex);
                goto out;
        }
 
@@ -1147,11 +1140,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 
 release_conn_id:
        cma_exch(conn_id, CMA_DESTROYING);
-       cma_enable_remove(conn_id);
+       mutex_unlock(&conn_id->handler_mutex);
        rdma_destroy_id(&conn_id->id);
 
 out:
-       cma_enable_remove(listen_id);
+       mutex_unlock(&listen_id->handler_mutex);
        return ret;
 }
 
@@ -1217,7 +1210,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
        struct sockaddr_in *sin;
        int ret = 0;
 
-       if (cma_disable_remove(id_priv, CMA_CONNECT))
+       if (cma_disable_callback(id_priv, CMA_CONNECT))
                return 0;
 
        memset(&event, 0, sizeof event);
@@ -1261,12 +1254,12 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
                /* Destroy the CM ID by returning a non-zero value. */
                id_priv->cm_id.iw = NULL;
                cma_exch(id_priv, CMA_DESTROYING);
-               cma_enable_remove(id_priv);
+               mutex_unlock(&id_priv->handler_mutex);
                rdma_destroy_id(&id_priv->id);
                return ret;
        }
 
-       cma_enable_remove(id_priv);
+       mutex_unlock(&id_priv->handler_mutex);
        return ret;
 }
 
@@ -1282,7 +1275,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
        struct ib_device_attr attr;
 
        listen_id = cm_id->context;
-       if (cma_disable_remove(listen_id, CMA_LISTEN))
+       if (cma_disable_callback(listen_id, CMA_LISTEN))
                return -ECONNABORTED;
 
        /* Create a new RDMA id for the new IW CM ID */
@@ -1294,19 +1287,19 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
                goto out;
        }
        conn_id = container_of(new_cm_id, struct rdma_id_private, id);
-       atomic_inc(&conn_id->dev_remove);
+       mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
        conn_id->state = CMA_CONNECT;
 
        dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
        if (!dev) {
                ret = -EADDRNOTAVAIL;
-               cma_enable_remove(conn_id);
+               mutex_unlock(&conn_id->handler_mutex);
                rdma_destroy_id(new_cm_id);
                goto out;
        }
        ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
        if (ret) {
-               cma_enable_remove(conn_id);
+               mutex_unlock(&conn_id->handler_mutex);
                rdma_destroy_id(new_cm_id);
                goto out;
        }
@@ -1315,7 +1308,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
        ret = cma_acquire_dev(conn_id);
        mutex_unlock(&lock);
        if (ret) {
-               cma_enable_remove(conn_id);
+               mutex_unlock(&conn_id->handler_mutex);
                rdma_destroy_id(new_cm_id);
                goto out;
        }
@@ -1331,7 +1324,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 
        ret = ib_query_device(conn_id->id.device, &attr);
        if (ret) {
-               cma_enable_remove(conn_id);
+               mutex_unlock(&conn_id->handler_mutex);
                rdma_destroy_id(new_cm_id);
                goto out;
        }
@@ -1347,14 +1340,17 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
                /* User wants to destroy the CM ID */
                conn_id->cm_id.iw = NULL;
                cma_exch(conn_id, CMA_DESTROYING);
-               cma_enable_remove(conn_id);
+               mutex_unlock(&conn_id->handler_mutex);
                rdma_destroy_id(&conn_id->id);
+               goto out;
        }
 
+       mutex_unlock(&conn_id->handler_mutex);
+
 out:
        if (dev)
                dev_put(dev);
-       cma_enable_remove(listen_id);
+       mutex_unlock(&listen_id->handler_mutex);
        return ret;
 }
 
@@ -1446,7 +1442,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
        ret = rdma_listen(id, id_priv->backlog);
        if (ret)
                printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
-                      "listening on device %s", ret, cma_dev->device->name);
+                      "listening on device %s\n", ret, cma_dev->device->name);
 }
 
 static void cma_listen_on_all(struct rdma_id_private *id_priv)
@@ -1586,7 +1582,7 @@ static void cma_work_handler(struct work_struct *_work)
        struct rdma_id_private *id_priv = work->id;
        int destroy = 0;
 
-       atomic_inc(&id_priv->dev_remove);
+       mutex_lock(&id_priv->handler_mutex);
        if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
                goto out;
 
@@ -1595,7 +1591,7 @@ static void cma_work_handler(struct work_struct *_work)
                destroy = 1;
        }
 out:
-       cma_enable_remove(id_priv);
+       mutex_unlock(&id_priv->handler_mutex);
        cma_deref_id(id_priv);
        if (destroy)
                rdma_destroy_id(&id_priv->id);
@@ -1758,7 +1754,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
        struct rdma_cm_event event;
 
        memset(&event, 0, sizeof event);
-       atomic_inc(&id_priv->dev_remove);
+       mutex_lock(&id_priv->handler_mutex);
 
        /*
         * Grab mutex to block rdma_destroy_id() from removing the device while
@@ -1787,13 +1783,13 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 
        if (id_priv->id.event_handler(&id_priv->id, &event)) {
                cma_exch(id_priv, CMA_DESTROYING);
-               cma_enable_remove(id_priv);
+               mutex_unlock(&id_priv->handler_mutex);
                cma_deref_id(id_priv);
                rdma_destroy_id(&id_priv->id);
                return;
        }
 out:
-       cma_enable_remove(id_priv);
+       mutex_unlock(&id_priv->handler_mutex);
        cma_deref_id(id_priv);
 }
 
@@ -2120,7 +2116,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
        struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
        int ret = 0;
 
-       if (cma_disable_remove(id_priv, CMA_CONNECT))
+       if (cma_disable_callback(id_priv, CMA_CONNECT))
                return 0;
 
        memset(&event, 0, sizeof event);
@@ -2151,7 +2147,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
                event.status = 0;
                break;
        default:
-               printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
+               printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
                       ib_event->event);
                goto out;
        }
@@ -2161,12 +2157,12 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
                /* Destroy the CM ID by returning a non-zero value. */
                id_priv->cm_id.ib = NULL;
                cma_exch(id_priv, CMA_DESTROYING);
-               cma_enable_remove(id_priv);
+               mutex_unlock(&id_priv->handler_mutex);
                rdma_destroy_id(&id_priv->id);
                return ret;
        }
 out:
-       cma_enable_remove(id_priv);
+       mutex_unlock(&id_priv->handler_mutex);
        return ret;
 }
 
@@ -2564,8 +2560,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
        int ret;
 
        id_priv = mc->id_priv;
-       if (cma_disable_remove(id_priv, CMA_ADDR_BOUND) &&
-           cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
+       if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) &&
+           cma_disable_callback(id_priv, CMA_ADDR_RESOLVED))
                return 0;
 
        mutex_lock(&id_priv->qp_mutex);
@@ -2590,12 +2586,12 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
        ret = id_priv->id.event_handler(&id_priv->id, &event);
        if (ret) {
                cma_exch(id_priv, CMA_DESTROYING);
-               cma_enable_remove(id_priv);
+               mutex_unlock(&id_priv->handler_mutex);
                rdma_destroy_id(&id_priv->id);
                return 0;
        }
 
-       cma_enable_remove(id_priv);
+       mutex_unlock(&id_priv->handler_mutex);
        return 0;
 }
 
@@ -2754,6 +2750,7 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv)
 {
        struct rdma_cm_event event;
        enum cma_state state;
+       int ret = 0;
 
        /* Record that we want to remove the device */
        state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
@@ -2761,15 +2758,18 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv)
                return 0;
 
        cma_cancel_operation(id_priv, state);
-       wait_event(id_priv->wait_remove, !atomic_read(&id_priv->dev_remove));
+       mutex_lock(&id_priv->handler_mutex);
 
        /* Check for destruction from another callback. */
        if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
-               return 0;
+               goto out;
 
        memset(&event, 0, sizeof event);
        event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
-       return id_priv->id.event_handler(&id_priv->id, &event);
+       ret = id_priv->id.event_handler(&id_priv->id, &event);
+out:
+       mutex_unlock(&id_priv->handler_mutex);
+       return ret;
 }
 
 static void cma_process_remove(struct cma_device *cma_dev)
index 7ad47a4b166b2bfbd35e5f8359b80827637c8e80..05ac36e6acdb5a6df388446901a3e99e14b598b4 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: core_priv.h 1349 2004-12-16 21:09:43Z roland $
  */
 
 #ifndef _CORE_PRIV_H
index 5ac5ffee05cbbc044c34aecfd6c1f323f719ec6e..7913b804311ebc3809522f99246e01bc84b723f4 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: device.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/module.h>
index 1286dc1b98b2acda72ce158ef8abd443cc3c8dc1..4507043d24c8c7df0580bba9afb7c3ed935c091c 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: fmr_pool.c 2730 2005-06-28 16:43:03Z sean.hefty $
  */
 
 #include <linux/errno.h>
index 8b75010016ecab62ed591d4d687bca6ed9a4ca38..05ce331733b069413c69d1ff7d9a2c00f4231cd2 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mad_priv.h 5596 2006-03-03 01:00:07Z sean.hefty $
  */
 
 #ifndef __IB_MAD_PRIV_H__
index a5e2a310f312f6dfc447a83704e2e169f0348cb5..d0ef7d61c03721a6fae0d59a20d80b01d29275a7 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $
  */
 
 #include "mad_priv.h"
index f0616fd2249479d25d97cca938186a058226c452..3d336bff11486aee7b02f73449ac51b96dba717d 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mad_rmpp.h 1921 2005-02-25 22:58:44Z sean.hefty $
  */
 
 #ifndef __MAD_RMPP_H__
index c972d72357647e874302a979d36bc22eee9c3f17..019bd4b0863e2ea34441f5ec10d2d9592d11722e 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: packer.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/string.h>
index cf474ec270703fd8e565c19e28f8bc378f326e70..1341de793e51d0694d45501c9e54bf4128ba982e 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: sa_query.c 2811 2005-07-06 18:11:43Z halr $
  */
 
 #include <linux/module.h>
@@ -361,7 +359,7 @@ static void update_sm_ah(struct work_struct *work)
 {
        struct ib_sa_port *port =
                container_of(work, struct ib_sa_port, update_task);
-       struct ib_sa_sm_ah *new_ah, *old_ah;
+       struct ib_sa_sm_ah *new_ah;
        struct ib_port_attr port_attr;
        struct ib_ah_attr   ah_attr;
 
@@ -397,12 +395,9 @@ static void update_sm_ah(struct work_struct *work)
        }
 
        spin_lock_irq(&port->ah_lock);
-       old_ah = port->sm_ah;
        port->sm_ah = new_ah;
        spin_unlock_irq(&port->ah_lock);
 
-       if (old_ah)
-               kref_put(&old_ah->ref, free_sm_ah);
 }
 
 static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
@@ -413,8 +408,17 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
            event->event == IB_EVENT_PKEY_CHANGE ||
            event->event == IB_EVENT_SM_CHANGE   ||
            event->event == IB_EVENT_CLIENT_REREGISTER) {
-               struct ib_sa_device *sa_dev;
-               sa_dev = container_of(handler, typeof(*sa_dev), event_handler);
+               unsigned long flags;
+               struct ib_sa_device *sa_dev =
+                       container_of(handler, typeof(*sa_dev), event_handler);
+               struct ib_sa_port *port =
+                       &sa_dev->port[event->element.port_num - sa_dev->start_port];
+
+               spin_lock_irqsave(&port->ah_lock, flags);
+               if (port->sm_ah)
+                       kref_put(&port->sm_ah->ref, free_sm_ah);
+               port->sm_ah = NULL;
+               spin_unlock_irqrestore(&port->ah_lock, flags);
 
                schedule_work(&sa_dev->port[event->element.port_num -
                                            sa_dev->start_port].update_task);
@@ -519,6 +523,10 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
        unsigned long flags;
 
        spin_lock_irqsave(&query->port->ah_lock, flags);
+       if (!query->port->sm_ah) {
+               spin_unlock_irqrestore(&query->port->ah_lock, flags);
+               return -EAGAIN;
+       }
        kref_get(&query->port->sm_ah->ref);
        query->sm_ah = query->port->sm_ah;
        spin_unlock_irqrestore(&query->port->ah_lock, flags);
index 95756551cf7c94bb9c6679168c1a6a19d2381082..4d1042115598d39b95ccc76b12b80da0cbdf89c6 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: sysfs.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include "core_priv.h"
@@ -665,6 +663,120 @@ static struct class ib_class = {
        .dev_uevent = ib_device_uevent,
 };
 
+/* Show a given an attribute in the statistics group */
+static ssize_t show_protocol_stat(const struct device *device,
+                           struct device_attribute *attr, char *buf,
+                           unsigned offset)
+{
+       struct ib_device *dev = container_of(device, struct ib_device, dev);
+       union rdma_protocol_stats stats;
+       ssize_t ret;
+
+       ret = dev->get_protocol_stats(dev, &stats);
+       if (ret)
+               return ret;
+
+       return sprintf(buf, "%llu\n",
+                      (unsigned long long) ((u64 *) &stats)[offset]);
+}
+
+/* generate a read-only iwarp statistics attribute */
+#define IW_STATS_ENTRY(name)                                           \
+static ssize_t show_##name(struct device *device,                      \
+                          struct device_attribute *attr, char *buf)    \
+{                                                                      \
+       return show_protocol_stat(device, attr, buf,                    \
+                                 offsetof(struct iw_protocol_stats, name) / \
+                                 sizeof (u64));                        \
+}                                                                      \
+static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+
+IW_STATS_ENTRY(ipInReceives);
+IW_STATS_ENTRY(ipInHdrErrors);
+IW_STATS_ENTRY(ipInTooBigErrors);
+IW_STATS_ENTRY(ipInNoRoutes);
+IW_STATS_ENTRY(ipInAddrErrors);
+IW_STATS_ENTRY(ipInUnknownProtos);
+IW_STATS_ENTRY(ipInTruncatedPkts);
+IW_STATS_ENTRY(ipInDiscards);
+IW_STATS_ENTRY(ipInDelivers);
+IW_STATS_ENTRY(ipOutForwDatagrams);
+IW_STATS_ENTRY(ipOutRequests);
+IW_STATS_ENTRY(ipOutDiscards);
+IW_STATS_ENTRY(ipOutNoRoutes);
+IW_STATS_ENTRY(ipReasmTimeout);
+IW_STATS_ENTRY(ipReasmReqds);
+IW_STATS_ENTRY(ipReasmOKs);
+IW_STATS_ENTRY(ipReasmFails);
+IW_STATS_ENTRY(ipFragOKs);
+IW_STATS_ENTRY(ipFragFails);
+IW_STATS_ENTRY(ipFragCreates);
+IW_STATS_ENTRY(ipInMcastPkts);
+IW_STATS_ENTRY(ipOutMcastPkts);
+IW_STATS_ENTRY(ipInBcastPkts);
+IW_STATS_ENTRY(ipOutBcastPkts);
+IW_STATS_ENTRY(tcpRtoAlgorithm);
+IW_STATS_ENTRY(tcpRtoMin);
+IW_STATS_ENTRY(tcpRtoMax);
+IW_STATS_ENTRY(tcpMaxConn);
+IW_STATS_ENTRY(tcpActiveOpens);
+IW_STATS_ENTRY(tcpPassiveOpens);
+IW_STATS_ENTRY(tcpAttemptFails);
+IW_STATS_ENTRY(tcpEstabResets);
+IW_STATS_ENTRY(tcpCurrEstab);
+IW_STATS_ENTRY(tcpInSegs);
+IW_STATS_ENTRY(tcpOutSegs);
+IW_STATS_ENTRY(tcpRetransSegs);
+IW_STATS_ENTRY(tcpInErrs);
+IW_STATS_ENTRY(tcpOutRsts);
+
+static struct attribute *iw_proto_stats_attrs[] = {
+       &dev_attr_ipInReceives.attr,
+       &dev_attr_ipInHdrErrors.attr,
+       &dev_attr_ipInTooBigErrors.attr,
+       &dev_attr_ipInNoRoutes.attr,
+       &dev_attr_ipInAddrErrors.attr,
+       &dev_attr_ipInUnknownProtos.attr,
+       &dev_attr_ipInTruncatedPkts.attr,
+       &dev_attr_ipInDiscards.attr,
+       &dev_attr_ipInDelivers.attr,
+       &dev_attr_ipOutForwDatagrams.attr,
+       &dev_attr_ipOutRequests.attr,
+       &dev_attr_ipOutDiscards.attr,
+       &dev_attr_ipOutNoRoutes.attr,
+       &dev_attr_ipReasmTimeout.attr,
+       &dev_attr_ipReasmReqds.attr,
+       &dev_attr_ipReasmOKs.attr,
+       &dev_attr_ipReasmFails.attr,
+       &dev_attr_ipFragOKs.attr,
+       &dev_attr_ipFragFails.attr,
+       &dev_attr_ipFragCreates.attr,
+       &dev_attr_ipInMcastPkts.attr,
+       &dev_attr_ipOutMcastPkts.attr,
+       &dev_attr_ipInBcastPkts.attr,
+       &dev_attr_ipOutBcastPkts.attr,
+       &dev_attr_tcpRtoAlgorithm.attr,
+       &dev_attr_tcpRtoMin.attr,
+       &dev_attr_tcpRtoMax.attr,
+       &dev_attr_tcpMaxConn.attr,
+       &dev_attr_tcpActiveOpens.attr,
+       &dev_attr_tcpPassiveOpens.attr,
+       &dev_attr_tcpAttemptFails.attr,
+       &dev_attr_tcpEstabResets.attr,
+       &dev_attr_tcpCurrEstab.attr,
+       &dev_attr_tcpInSegs.attr,
+       &dev_attr_tcpOutSegs.attr,
+       &dev_attr_tcpRetransSegs.attr,
+       &dev_attr_tcpInErrs.attr,
+       &dev_attr_tcpOutRsts.attr,
+       NULL
+};
+
+static struct attribute_group iw_stats_group = {
+       .name   = "proto_stats",
+       .attrs  = iw_proto_stats_attrs,
+};
+
 int ib_device_register_sysfs(struct ib_device *device)
 {
        struct device *class_dev = &device->dev;
@@ -707,6 +819,12 @@ int ib_device_register_sysfs(struct ib_device *device)
                }
        }
 
+       if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) {
+               ret = sysfs_create_group(&class_dev->kobj, &iw_stats_group);
+               if (ret)
+                       goto err_put;
+       }
+
        return 0;
 
 err_put:
index b25675faaaf5c584c5d7bc81e65d8e6f3110aa60..9494005d1c9a62fe1463c94320af061f6ee1e40e 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ucm.c 4311 2005-12-05 18:42:01Z sean.hefty $
  */
 
 #include <linux/completion.h>
index 997c07db6d8fb216a132b12555d4cc27357e7531..8ec7876bedcf3a6b1c0180cf73358b27e9c306bc 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ud_header.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/errno.h>
index a1768dbb0720b2f42db9a408486ab9526f66e6c6..6f7c096abf1366007cb2352c211a0f8eba132d69 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $
  */
 
 #include <linux/mm.h>
index 208c7f34323ccc372c4618dac9808badff6151b2..268a2d23b7c9a8e9092877ca319989891e99e90d 100644 (file)
@@ -31,8 +31,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: user_mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
  */
 
 #include <linux/module.h>
index 376a57ce1b4067b1fc0e12c96f5863d0d849b741..b3ea9587dc808641e79481131c7119a71cb37431 100644 (file)
@@ -32,8 +32,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: uverbs.h 2559 2005-06-06 19:43:16Z roland $
  */
 
 #ifndef UVERBS_H
index 2c3bff5fe8676f304d24b0d4490030565e9f20c6..56feab6c251edd8e688647ee0cbf6617522b6e1c 100644 (file)
@@ -31,8 +31,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $
  */
 
 #include <linux/file.h>
@@ -919,7 +917,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
                resp->wc[i].opcode         = wc[i].opcode;
                resp->wc[i].vendor_err     = wc[i].vendor_err;
                resp->wc[i].byte_len       = wc[i].byte_len;
-               resp->wc[i].imm_data       = (__u32 __force) wc[i].imm_data;
+               resp->wc[i].ex.imm_data    = (__u32 __force) wc[i].ex.imm_data;
                resp->wc[i].qp_num         = wc[i].qp->qp_num;
                resp->wc[i].src_qp         = wc[i].src_qp;
                resp->wc[i].wc_flags       = wc[i].wc_flags;
index 0f34858e31e717ac676ce3f19f121f614a8b9e2c..aeee856c4060648f4898c4014d30ed2c73065bdd 100644 (file)
@@ -32,8 +32,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: uverbs_main.c 2733 2005-06-28 19:14:34Z roland $
  */
 
 #include <linux/module.h>
index 05042089de6ea32e3a2c6d585324f9ed9557f4b1..a7da9be43e617a56eca3efa9c33532086c3ae46c 100644 (file)
@@ -34,8 +34,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: verbs.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/errno.h>
@@ -317,7 +315,6 @@ static const struct {
 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
        [IB_QPS_RESET] = {
                [IB_QPS_RESET] = { .valid = 1 },
-               [IB_QPS_ERR]   = { .valid = 1 },
                [IB_QPS_INIT]  = {
                        .valid = 1,
                        .req_param = {
@@ -755,6 +752,52 @@ int ib_dereg_mr(struct ib_mr *mr)
 }
 EXPORT_SYMBOL(ib_dereg_mr);
 
+struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
+{
+       struct ib_mr *mr;
+
+       if (!pd->device->alloc_fast_reg_mr)
+               return ERR_PTR(-ENOSYS);
+
+       mr = pd->device->alloc_fast_reg_mr(pd, max_page_list_len);
+
+       if (!IS_ERR(mr)) {
+               mr->device  = pd->device;
+               mr->pd      = pd;
+               mr->uobject = NULL;
+               atomic_inc(&pd->usecnt);
+               atomic_set(&mr->usecnt, 0);
+       }
+
+       return mr;
+}
+EXPORT_SYMBOL(ib_alloc_fast_reg_mr);
+
+struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device,
+                                                         int max_page_list_len)
+{
+       struct ib_fast_reg_page_list *page_list;
+
+       if (!device->alloc_fast_reg_page_list)
+               return ERR_PTR(-ENOSYS);
+
+       page_list = device->alloc_fast_reg_page_list(device, max_page_list_len);
+
+       if (!IS_ERR(page_list)) {
+               page_list->device = device;
+               page_list->max_page_list_len = max_page_list_len;
+       }
+
+       return page_list;
+}
+EXPORT_SYMBOL(ib_alloc_fast_reg_page_list);
+
+void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
+{
+       page_list->device->free_fast_reg_page_list(page_list);
+}
+EXPORT_SYMBOL(ib_free_fast_reg_page_list);
+
 /* Memory windows */
 
 struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
index b1441aeb60c27d017d66e667ffbcd071f56f5c36..dd05c48356425f30e2109cd4b17ba484c2919a0d 100644 (file)
@@ -454,7 +454,7 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev)
            (IB_DEVICE_RESIZE_MAX_WR |
             IB_DEVICE_CURR_QP_STATE_MOD |
             IB_DEVICE_SYS_IMAGE_GUID |
-            IB_DEVICE_ZERO_STAG |
+            IB_DEVICE_LOCAL_DMA_LKEY |
             IB_DEVICE_MEM_WINDOW);
 
        /* Allocate the qptr_array */
index 3f441fc57c1719c6991d028dd5a0979760aa2d30..f6d5747153a5282f7318aea8dd1ae31a9057bee7 100644 (file)
@@ -145,7 +145,9 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
        }
        wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
        memset(wqe, 0, sizeof(*wqe));
-       build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7);
+       build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD,
+                      T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7,
+                      T3_SOPEOP);
        wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
        sge_cmd = qpid << 8 | 3;
        wqe->sge_cmd = cpu_to_be64(sge_cmd);
@@ -276,7 +278,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
        if (!wq->qpid)
                return -ENOMEM;
 
-       wq->rq = kzalloc(depth * sizeof(u64), GFP_KERNEL);
+       wq->rq = kzalloc(depth * sizeof(struct t3_swrq), GFP_KERNEL);
        if (!wq->rq)
                goto err1;
 
@@ -300,6 +302,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
        if (!kernel_domain)
                wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
                                        (wq->qpid << rdev_p->qpshift);
+       wq->rdev = rdev_p;
        PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__,
             wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
        return 0;
@@ -558,7 +561,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
        wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
        memset(wqe, 0, sizeof(*wqe));
        build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
-                      T3_CTL_QP_TID, 7);
+                      T3_CTL_QP_TID, 7, T3_SOPEOP);
        wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
        sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
        wqe->sge_cmd = cpu_to_be64(sge_cmd);
@@ -674,7 +677,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
                build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag,
                               Q_GENBIT(rdev_p->ctrl_qp.wptr,
                                        T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID,
-                              wr_len);
+                              wr_len, T3_SOPEOP);
                if (flag == T3_COMPLETION_FLAG)
                        ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID);
                len -= 96;
@@ -816,6 +819,13 @@ int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
                             0, 0);
 }
 
+int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr)
+{
+       *stag = T3_STAG_UNSET;
+       return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR,
+                            0, 0, 0ULL, 0, 0, pbl_size, pbl_addr);
+}
+
 int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
 {
        struct t3_rdma_init_wr *wqe;
@@ -1257,13 +1267,16 @@ proc_cqe:
                wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
                PDBG("%s completing sq idx %ld\n", __func__,
                     Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
-               *cookie = (wq->sq +
-                          Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id;
+               *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id;
                wq->sq_rptr++;
        } else {
                PDBG("%s completing rq idx %ld\n", __func__,
                     Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
-               *cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
+               *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id;
+               if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr)
+                       cxio_hal_pblpool_free(wq->rdev,
+                               wq->rq[Q_PTR2IDX(wq->rq_rptr,
+                               wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE);
                wq->rq_rptr++;
        }
 
index 6e128f6bab05140282e1d86930aaab98ddb4cc0c..656fe47bc84f94ea2d1af60e2d916be85a69cd47 100644 (file)
 #define T3_CTRL_QP_SIZE_LOG2  8
 #define T3_CTRL_CQ_ID    0
 
-/* TBD */
 #define T3_MAX_NUM_RI (1<<15)
 #define T3_MAX_NUM_QP (1<<15)
 #define T3_MAX_NUM_CQ (1<<15)
 #define T3_MAX_NUM_PD (1<<15)
 #define T3_MAX_PBL_SIZE 256
 #define T3_MAX_RQ_SIZE 1024
+#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1)
+#define T3_MAX_CQ_DEPTH 8192
 #define T3_MAX_NUM_STAG (1<<15)
 #define T3_MAX_MR_SIZE 0x100000000ULL
+#define T3_PAGESIZE_MASK 0xffff000  /* 4KB-128MB */
 
 #define T3_STAG_UNSET 0xffffffff
 
@@ -165,6 +167,7 @@ int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
 int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
                   u32 pbl_addr);
 int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
+int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr);
 int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
 int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
 void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
index f1a25a821a45049f6c5688b92ca4d5c14b6e9fcf..04618f7bfbb323fc9cdd3ec44046ebb952813457 100644 (file)
@@ -39,6 +39,9 @@
 
 #define T3_MAX_SGE      4
 #define T3_MAX_INLINE  64
+#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3)
+#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024)
+#define T3_STAG0_PAGE_SHIFT 15
 
 #define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
 #define Q_FULL(rptr,wptr,size_log2)  ( (((wptr)-(rptr))>>(size_log2)) && \
@@ -72,7 +75,8 @@ enum t3_wr_opcode {
        T3_WR_BIND = FW_WROPCODE_RI_BIND_MW,
        T3_WR_RCV = FW_WROPCODE_RI_RECEIVE,
        T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT,
-       T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP
+       T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP,
+       T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR
 } __attribute__ ((packed));
 
 enum t3_rdma_opcode {
@@ -89,7 +93,8 @@ enum t3_rdma_opcode {
        T3_FAST_REGISTER,
        T3_LOCAL_INV,
        T3_QP_MOD,
-       T3_BYPASS
+       T3_BYPASS,
+       T3_RDMA_READ_REQ_WITH_INV,
 } __attribute__ ((packed));
 
 static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop)
@@ -103,6 +108,7 @@ static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop)
                case T3_WR_BIND: return T3_BIND_MW;
                case T3_WR_INIT: return T3_RDMA_INIT;
                case T3_WR_QP_MOD: return T3_QP_MOD;
+               case T3_WR_FASTREG: return T3_FAST_REGISTER;
                default: break;
        }
        return -1;
@@ -170,11 +176,54 @@ struct t3_send_wr {
        struct t3_sge sgl[T3_MAX_SGE];  /* 4+ */
 };
 
+#define T3_MAX_FASTREG_DEPTH 24
+#define T3_MAX_FASTREG_FRAG 10
+
+struct t3_fastreg_wr {
+       struct fw_riwrh wrh;    /* 0 */
+       union t3_wrid wrid;     /* 1 */
+       __be32 stag;            /* 2 */
+       __be32 len;
+       __be32 va_base_hi;      /* 3 */
+       __be32 va_base_lo_fbo;
+       __be32 page_type_perms; /* 4 */
+       __be32 reserved1;
+       __be64 pbl_addrs[0];    /* 5+ */
+};
+
+/*
+ * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this.
+ */
+struct t3_pbl_frag {
+       struct fw_riwrh wrh;    /* 0 */
+       __be64 pbl_addrs[14];   /* 1..14 */
+};
+
+#define S_FR_PAGE_COUNT                24
+#define M_FR_PAGE_COUNT                0xff
+#define V_FR_PAGE_COUNT(x)     ((x) << S_FR_PAGE_COUNT)
+#define G_FR_PAGE_COUNT(x)     ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT)
+
+#define S_FR_PAGE_SIZE         16
+#define M_FR_PAGE_SIZE         0x1f
+#define V_FR_PAGE_SIZE(x)      ((x) << S_FR_PAGE_SIZE)
+#define G_FR_PAGE_SIZE(x)      ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE)
+
+#define S_FR_TYPE              8
+#define M_FR_TYPE              0x1
+#define V_FR_TYPE(x)           ((x) << S_FR_TYPE)
+#define G_FR_TYPE(x)           ((((x) >> S_FR_TYPE)) & M_FR_TYPE)
+
+#define S_FR_PERMS             0
+#define M_FR_PERMS             0xff
+#define V_FR_PERMS(x)          ((x) << S_FR_PERMS)
+#define G_FR_PERMS(x)          ((((x) >> S_FR_PERMS)) & M_FR_PERMS)
+
 struct t3_local_inv_wr {
        struct fw_riwrh wrh;    /* 0 */
        union t3_wrid wrid;     /* 1 */
        __be32 stag;            /* 2 */
-       __be32 reserved3;
+       __be32 reserved;
 };
 
 struct t3_rdma_write_wr {
@@ -193,7 +242,8 @@ struct t3_rdma_read_wr {
        struct fw_riwrh wrh;    /* 0 */
        union t3_wrid wrid;     /* 1 */
        u8 rdmaop;              /* 2 */
-       u8 reserved[3];
+       u8 local_inv;
+       u8 reserved[2];
        __be32 rem_stag;
        __be64 rem_to;          /* 3 */
        __be32 local_stag;      /* 4 */
@@ -201,18 +251,6 @@ struct t3_rdma_read_wr {
        __be64 local_to;        /* 5 */
 };
 
-enum t3_addr_type {
-       T3_VA_BASED_TO = 0x0,
-       T3_ZERO_BASED_TO = 0x1
-} __attribute__ ((packed));
-
-enum t3_mem_perms {
-       T3_MEM_ACCESS_LOCAL_READ = 0x1,
-       T3_MEM_ACCESS_LOCAL_WRITE = 0x2,
-       T3_MEM_ACCESS_REM_READ = 0x4,
-       T3_MEM_ACCESS_REM_WRITE = 0x8
-} __attribute__ ((packed));
-
 struct t3_bind_mw_wr {
        struct fw_riwrh wrh;    /* 0 */
        union t3_wrid wrid;     /* 1 */
@@ -336,6 +374,11 @@ struct t3_genbit {
        __be64 genbit;
 };
 
+struct t3_wq_in_err {
+       u64 flit[13];
+       u64 err;
+};
+
 enum rdma_init_wr_flags {
        MPA_INITIATOR = (1<<0),
        PRIV_QP = (1<<1),
@@ -346,13 +389,16 @@ union t3_wr {
        struct t3_rdma_write_wr write;
        struct t3_rdma_read_wr read;
        struct t3_receive_wr recv;
+       struct t3_fastreg_wr fastreg;
+       struct t3_pbl_frag pbl_frag;
        struct t3_local_inv_wr local_inv;
        struct t3_bind_mw_wr bind;
        struct t3_bypass_wr bypass;
        struct t3_rdma_init_wr init;
        struct t3_modify_qp_wr qp_mod;
        struct t3_genbit genbit;
-       u64 flit[16];
+       struct t3_wq_in_err wq_in_err;
+       __be64 flit[16];
 };
 
 #define T3_SQ_CQE_FLIT   13
@@ -366,12 +412,18 @@ static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe)
        return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags));
 }
 
+enum t3_wr_hdr_bits {
+       T3_EOP = 1,
+       T3_SOP = 2,
+       T3_SOPEOP = T3_EOP|T3_SOP,
+};
+
 static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op,
                                  enum t3_wr_flags flags, u8 genbit, u32 tid,
-                                 u8 len)
+                                 u8 len, u8 sopeop)
 {
        wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) |
-                                        V_FW_RIWR_SOPEOP(M_FW_RIWR_SOPEOP) |
+                                        V_FW_RIWR_SOPEOP(sopeop) |
                                         V_FW_RIWR_FLAGS(flags));
        wmb();
        wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) |
@@ -404,6 +456,7 @@ enum tpt_addr_type {
 };
 
 enum tpt_mem_perm {
+       TPT_MW_BIND = 0x10,
        TPT_LOCAL_READ = 0x8,
        TPT_LOCAL_WRITE = 0x4,
        TPT_REMOTE_READ = 0x2,
@@ -615,6 +668,11 @@ struct t3_swsq {
        int                     signaled;
 };
 
+struct t3_swrq {
+       __u64                   wr_id;
+       __u32                   pbl_addr;
+};
+
 /*
  * A T3 WQ implements both the SQ and RQ.
  */
@@ -631,14 +689,15 @@ struct t3_wq {
        u32 sq_wptr;                    /* sq_wptr - sq_rptr == count of */
        u32 sq_rptr;                    /* pending wrs */
        u32 sq_size_log2;               /* sq size */
-       u64 *rq;                        /* SW RQ (holds consumer wr_ids */
+       struct t3_swrq *rq;             /* SW RQ (holds consumer wr_ids */
        u32 rq_wptr;                    /* rq_wptr - rq_rptr == count of */
        u32 rq_rptr;                    /* pending wrs */
-       u64 *rq_oldest_wr;              /* oldest wr on the SW RQ */
+       struct t3_swrq *rq_oldest_wr;   /* oldest wr on the SW RQ */
        u32 rq_size_log2;               /* rq size */
        u32 rq_addr;                    /* rq adapter address */
        void __iomem *doorbell;         /* kernel db */
        u64 udb;                        /* user db if any */
+       struct cxio_rdev *rdev;
 };
 
 struct t3_cq {
@@ -659,7 +718,7 @@ struct t3_cq {
 
 static inline void cxio_set_wq_in_error(struct t3_wq *wq)
 {
-       wq->queue->flit[13] = 1;
+       wq->queue->wq_in_err.err = 1;
 }
 
 static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
index 71554eacb13ce3f87d19b8bd582f05eabde6a8f9..4489c89d6710c6b22c478f4b7742ff0ff4688576 100644 (file)
@@ -71,18 +71,16 @@ static void rnic_init(struct iwch_dev *rnicp)
        idr_init(&rnicp->mmidr);
        spin_lock_init(&rnicp->lock);
 
-       rnicp->attr.vendor_id = 0x168;
-       rnicp->attr.vendor_part_id = 7;
        rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
-       rnicp->attr.max_wrs = (1UL << 24) - 1;
+       rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
        rnicp->attr.max_sge_per_wr = T3_MAX_SGE;
        rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE;
        rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1;
-       rnicp->attr.max_cqes_per_cq = (1UL << 24) - 1;
+       rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH;
        rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev);
        rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
        rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
-       rnicp->attr.mem_pgsizes_bitmask = 0x7FFF;       /* 4KB-128MB */
+       rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK;
        rnicp->attr.max_mr_size = T3_MAX_MR_SIZE;
        rnicp->attr.can_resize_wq = 0;
        rnicp->attr.max_rdma_reads_per_qp = 8;
index d2409a505e8d5d3f6b659b4af0a07d0421cf8672..3773453b2cf06e02a2cca70fde7c0f13bce710b3 100644 (file)
@@ -48,8 +48,6 @@ struct iwch_qp;
 struct iwch_mr;
 
 struct iwch_rnic_attributes {
-       u32 vendor_id;
-       u32 vendor_part_id;
        u32 max_qps;
        u32 max_wrs;                            /* Max for any SQ/RQ */
        u32 max_sge_per_wr;
index 4ee8ccd0a9e52f65f62b20627226fccfac80e0f4..cf5474ae68ff010ae23004c45d4b421a1ff1b49c 100644 (file)
@@ -81,6 +81,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
        wc->wr_id = cookie;
        wc->qp = &qhp->ibqp;
        wc->vendor_err = CQE_STATUS(cqe);
+       wc->wc_flags = 0;
 
        PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x "
             "lo 0x%x cookie 0x%llx\n", __func__,
@@ -94,6 +95,11 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
                else
                        wc->byte_len = 0;
                wc->opcode = IB_WC_RECV;
+               if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV ||
+                   CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) {
+                       wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe);
+                       wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+               }
        } else {
                switch (CQE_OPCODE(cqe)) {
                case T3_RDMA_WRITE:
@@ -105,17 +111,20 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
                        break;
                case T3_SEND:
                case T3_SEND_WITH_SE:
+               case T3_SEND_WITH_INV:
+               case T3_SEND_WITH_SE_INV:
                        wc->opcode = IB_WC_SEND;
                        break;
                case T3_BIND_MW:
                        wc->opcode = IB_WC_BIND_MW;
                        break;
 
-               /* these aren't supported yet */
-               case T3_SEND_WITH_INV:
-               case T3_SEND_WITH_SE_INV:
                case T3_LOCAL_INV:
+                       wc->opcode = IB_WC_LOCAL_INV;
+                       break;
                case T3_FAST_REGISTER:
+                       wc->opcode = IB_WC_FAST_REG_MR;
+                       break;
                default:
                        printk(KERN_ERR MOD "Unexpected opcode %d "
                               "in the CQE received for QPID=0x%0x\n",
index 95f82cfb6c54de3b95a3a98a0e303e9c8f2321a3..b89640aa6e103e00edd9644ab6a806ff1c30c8c6 100644 (file)
@@ -56,6 +56,7 @@
 #include "iwch_provider.h"
 #include "iwch_cm.h"
 #include "iwch_user.h"
+#include "common.h"
 
 static int iwch_modify_port(struct ib_device *ibdev,
                            u8 port, int port_modify_mask,
@@ -747,6 +748,7 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
        mhp->attr.type = TPT_MW;
        mhp->attr.stag = stag;
        mmid = (stag) >> 8;
+       mhp->ibmw.rkey = stag;
        insert_handle(rhp, &rhp->mmidr, mhp, mmid);
        PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
        return &(mhp->ibmw);
@@ -768,6 +770,68 @@ static int iwch_dealloc_mw(struct ib_mw *mw)
        return 0;
 }
 
+static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
+{
+       struct iwch_dev *rhp;
+       struct iwch_pd *php;
+       struct iwch_mr *mhp;
+       u32 mmid;
+       u32 stag = 0;
+       int ret;
+
+       php = to_iwch_pd(pd);
+       rhp = php->rhp;
+       mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+       if (!mhp)
+               return ERR_PTR(-ENOMEM);
+
+       mhp->rhp = rhp;
+       ret = iwch_alloc_pbl(mhp, pbl_depth);
+       if (ret) {
+               kfree(mhp);
+               return ERR_PTR(ret);
+       }
+       mhp->attr.pbl_size = pbl_depth;
+       ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
+                                mhp->attr.pbl_size, mhp->attr.pbl_addr);
+       if (ret) {
+               iwch_free_pbl(mhp);
+               kfree(mhp);
+               return ERR_PTR(ret);
+       }
+       mhp->attr.pdid = php->pdid;
+       mhp->attr.type = TPT_NON_SHARED_MR;
+       mhp->attr.stag = stag;
+       mhp->attr.state = 1;
+       mmid = (stag) >> 8;
+       mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+       insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+       PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+       return &(mhp->ibmr);
+}
+
+static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl(
+                                       struct ib_device *device,
+                                       int page_list_len)
+{
+       struct ib_fast_reg_page_list *page_list;
+
+       page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64),
+                           GFP_KERNEL);
+       if (!page_list)
+               return ERR_PTR(-ENOMEM);
+
+       page_list->page_list = (u64 *)(page_list + 1);
+       page_list->max_page_list_len = page_list_len;
+
+       return page_list;
+}
+
+static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list)
+{
+       kfree(page_list);
+}
+
 static int iwch_destroy_qp(struct ib_qp *ib_qp)
 {
        struct iwch_dev *rhp;
@@ -843,6 +907,15 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
         */
        sqsize = roundup_pow_of_two(attrs->cap.max_send_wr);
        wqsize = roundup_pow_of_two(rqsize + sqsize);
+
+       /*
+        * Kernel users need more wq space for fastreg WRs which can take
+        * 2 WR fragments.
+        */
+       ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL;
+       if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
+               wqsize = roundup_pow_of_two(rqsize +
+                               roundup_pow_of_two(attrs->cap.max_send_wr * 2));
        PDBG("%s wqsize %d sqsize %d rqsize %d\n", __func__,
             wqsize, sqsize, rqsize);
        qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
@@ -851,7 +924,6 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
        qhp->wq.size_log2 = ilog2(wqsize);
        qhp->wq.rq_size_log2 = ilog2(rqsize);
        qhp->wq.sq_size_log2 = ilog2(sqsize);
-       ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL;
        if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq,
                           ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) {
                kfree(qhp);
@@ -935,10 +1007,10 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
        qhp->ibqp.qp_num = qhp->wq.qpid;
        init_timer(&(qhp->timer));
        PDBG("%s sq_num_entries %d, rq_num_entries %d "
-            "qpid 0x%0x qhp %p dma_addr 0x%llx size %d\n",
+            "qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n",
             __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
             qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr,
-            1 << qhp->wq.size_log2);
+            1 << qhp->wq.size_log2, qhp->wq.rq_addr);
        return &qhp->ibqp;
 }
 
@@ -1023,6 +1095,29 @@ static int iwch_query_gid(struct ib_device *ibdev, u8 port,
        return 0;
 }
 
+static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev)
+{
+       struct ethtool_drvinfo info;
+       struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
+       char *cp, *next;
+       unsigned fw_maj, fw_min, fw_mic;
+
+       rtnl_lock();
+       lldev->ethtool_ops->get_drvinfo(lldev, &info);
+       rtnl_unlock();
+
+       next = info.fw_version + 1;
+       cp = strsep(&next, ".");
+       sscanf(cp, "%i", &fw_maj);
+       cp = strsep(&next, ".");
+       sscanf(cp, "%i", &fw_min);
+       cp = strsep(&next, ".");
+       sscanf(cp, "%i", &fw_mic);
+
+       return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) |
+              (fw_mic & 0xffff);
+}
+
 static int iwch_query_device(struct ib_device *ibdev,
                             struct ib_device_attr *props)
 {
@@ -1033,7 +1128,10 @@ static int iwch_query_device(struct ib_device *ibdev,
        dev = to_iwch_dev(ibdev);
        memset(props, 0, sizeof *props);
        memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
+       props->hw_ver = dev->rdev.t3cdev_p->type;
+       props->fw_ver = fw_vers_string_to_u64(dev);
        props->device_cap_flags = dev->device_cap_flags;
+       props->page_size_cap = dev->attr.mem_pgsizes_bitmask;
        props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
        props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
        props->max_mr_size = dev->attr.max_mr_size;
@@ -1048,6 +1146,7 @@ static int iwch_query_device(struct ib_device *ibdev,
        props->max_mr = dev->attr.max_mem_regs;
        props->max_pd = dev->attr.max_pds;
        props->local_ca_ack_delay = 0;
+       props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH;
 
        return 0;
 }
@@ -1088,6 +1187,28 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
        return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
 }
 
+static int fw_supports_fastreg(struct iwch_dev *iwch_dev)
+{
+       struct ethtool_drvinfo info;
+       struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
+       char *cp, *next;
+       unsigned fw_maj, fw_min;
+
+       rtnl_lock();
+       lldev->ethtool_ops->get_drvinfo(lldev, &info);
+       rtnl_unlock();
+
+       next = info.fw_version+1;
+       cp = strsep(&next, ".");
+       sscanf(cp, "%i", &fw_maj);
+       cp = strsep(&next, ".");
+       sscanf(cp, "%i", &fw_min);
+
+       PDBG("%s maj %u min %u\n", __func__, fw_maj, fw_min);
+
+       return fw_maj > 6 || (fw_maj == 6 && fw_min > 0);
+}
+
 static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf)
 {
        struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
@@ -1127,6 +1248,61 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
                       iwch_dev->rdev.rnic_info.pdev->device);
 }
 
+static int iwch_get_mib(struct ib_device *ibdev,
+                       union rdma_protocol_stats *stats)
+{
+       struct iwch_dev *dev;
+       struct tp_mib_stats m;
+       int ret;
+
+       PDBG("%s ibdev %p\n", __func__, ibdev);
+       dev = to_iwch_dev(ibdev);
+       ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m);
+       if (ret)
+               return -ENOSYS;
+
+       memset(stats, 0, sizeof *stats);
+       stats->iw.ipInReceives = ((u64) m.ipInReceive_hi << 32) +
+                               m.ipInReceive_lo;
+       stats->iw.ipInHdrErrors = ((u64) m.ipInHdrErrors_hi << 32) +
+                                 m.ipInHdrErrors_lo;
+       stats->iw.ipInAddrErrors = ((u64) m.ipInAddrErrors_hi << 32) +
+                                  m.ipInAddrErrors_lo;
+       stats->iw.ipInUnknownProtos = ((u64) m.ipInUnknownProtos_hi << 32) +
+                                     m.ipInUnknownProtos_lo;
+       stats->iw.ipInDiscards = ((u64) m.ipInDiscards_hi << 32) +
+                                m.ipInDiscards_lo;
+       stats->iw.ipInDelivers = ((u64) m.ipInDelivers_hi << 32) +
+                                m.ipInDelivers_lo;
+       stats->iw.ipOutRequests = ((u64) m.ipOutRequests_hi << 32) +
+                                 m.ipOutRequests_lo;
+       stats->iw.ipOutDiscards = ((u64) m.ipOutDiscards_hi << 32) +
+                                 m.ipOutDiscards_lo;
+       stats->iw.ipOutNoRoutes = ((u64) m.ipOutNoRoutes_hi << 32) +
+                                 m.ipOutNoRoutes_lo;
+       stats->iw.ipReasmTimeout = (u64) m.ipReasmTimeout;
+       stats->iw.ipReasmReqds = (u64) m.ipReasmReqds;
+       stats->iw.ipReasmOKs = (u64) m.ipReasmOKs;
+       stats->iw.ipReasmFails = (u64) m.ipReasmFails;
+       stats->iw.tcpActiveOpens = (u64) m.tcpActiveOpens;
+       stats->iw.tcpPassiveOpens = (u64) m.tcpPassiveOpens;
+       stats->iw.tcpAttemptFails = (u64) m.tcpAttemptFails;
+       stats->iw.tcpEstabResets = (u64) m.tcpEstabResets;
+       stats->iw.tcpOutRsts = (u64) m.tcpOutRsts;
+       stats->iw.tcpCurrEstab = (u64) m.tcpCurrEstab;
+       stats->iw.tcpInSegs = ((u64) m.tcpInSegs_hi << 32) +
+                             m.tcpInSegs_lo;
+       stats->iw.tcpOutSegs = ((u64) m.tcpOutSegs_hi << 32) +
+                              m.tcpOutSegs_lo;
+       stats->iw.tcpRetransSegs = ((u64) m.tcpRetransSeg_hi << 32) +
+                                 m.tcpRetransSeg_lo;
+       stats->iw.tcpInErrs = ((u64) m.tcpInErrs_hi << 32) +
+                             m.tcpInErrs_lo;
+       stats->iw.tcpRtoMin = (u64) m.tcpRtoMin;
+       stats->iw.tcpRtoMax = (u64) m.tcpRtoMax;
+       return 0;
+}
+
 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
 static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
@@ -1136,7 +1312,7 @@ static struct device_attribute *iwch_class_attributes[] = {
        &dev_attr_hw_rev,
        &dev_attr_fw_ver,
        &dev_attr_hca_type,
-       &dev_attr_board_id
+       &dev_attr_board_id,
 };
 
 int iwch_register_device(struct iwch_dev *dev)
@@ -1149,8 +1325,12 @@ int iwch_register_device(struct iwch_dev *dev)
        memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
        memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
        dev->ibdev.owner = THIS_MODULE;
-       dev->device_cap_flags =
-           (IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW);
+       dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
+
+       /* cxgb3 supports STag 0. */
+       dev->ibdev.local_dma_lkey = 0;
+       if (fw_supports_fastreg(dev))
+               dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
 
        dev->ibdev.uverbs_cmd_mask =
            (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -1202,15 +1382,16 @@ int iwch_register_device(struct iwch_dev *dev)
        dev->ibdev.alloc_mw = iwch_alloc_mw;
        dev->ibdev.bind_mw = iwch_bind_mw;
        dev->ibdev.dealloc_mw = iwch_dealloc_mw;
-
+       dev->ibdev.alloc_fast_reg_mr = iwch_alloc_fast_reg_mr;
+       dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl;
+       dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl;
        dev->ibdev.attach_mcast = iwch_multicast_attach;
        dev->ibdev.detach_mcast = iwch_multicast_detach;
        dev->ibdev.process_mad = iwch_process_mad;
-
        dev->ibdev.req_notify_cq = iwch_arm_cq;
        dev->ibdev.post_send = iwch_post_send;
        dev->ibdev.post_recv = iwch_post_receive;
-
+       dev->ibdev.get_protocol_stats = iwch_get_mib;
 
        dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
        if (!dev->ibdev.iwcm)
index 836163fc54291cdd2cbaab8980cf8bba448c68a9..f5ceca05c43512303d4ab0c236f2851c4ab0fa9a 100644 (file)
@@ -296,14 +296,6 @@ static inline u32 iwch_ib_to_tpt_access(int acc)
               TPT_LOCAL_READ;
 }
 
-static inline u32 iwch_ib_to_mwbind_access(int acc)
-{
-       return (acc & IB_ACCESS_REMOTE_WRITE ? T3_MEM_ACCESS_REM_WRITE : 0) |
-              (acc & IB_ACCESS_REMOTE_READ ? T3_MEM_ACCESS_REM_READ : 0) |
-              (acc & IB_ACCESS_LOCAL_WRITE ? T3_MEM_ACCESS_LOCAL_WRITE : 0) |
-              T3_MEM_ACCESS_LOCAL_READ;
-}
-
 enum iwch_mmid_state {
        IWCH_STAG_STATE_VALID,
        IWCH_STAG_STATE_INVALID
index 992613799228e6ab4a8fcee6d781925c5bc1eb85..9a3be3a9d5dc8f931e7189b7b9f46a9f6b8125fd 100644 (file)
 #include "iwch.h"
 #include "iwch_cm.h"
 #include "cxio_hal.h"
+#include "cxio_resource.h"
 
 #define NO_SUPPORT -1
 
-static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
+static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
                                u8 * flit_cnt)
 {
        int i;
@@ -44,59 +45,44 @@ static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
 
        switch (wr->opcode) {
        case IB_WR_SEND:
-       case IB_WR_SEND_WITH_IMM:
                if (wr->send_flags & IB_SEND_SOLICITED)
                        wqe->send.rdmaop = T3_SEND_WITH_SE;
                else
                        wqe->send.rdmaop = T3_SEND;
                wqe->send.rem_stag = 0;
                break;
-#if 0                          /* Not currently supported */
-       case TYPE_SEND_INVALIDATE:
-       case TYPE_SEND_INVALIDATE_IMMEDIATE:
-               wqe->send.rdmaop = T3_SEND_WITH_INV;
-               wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
-               break;
-       case TYPE_SEND_SE_INVALIDATE:
-               wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
-               wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
+       case IB_WR_SEND_WITH_INV:
+               if (wr->send_flags & IB_SEND_SOLICITED)
+                       wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
+               else
+                       wqe->send.rdmaop = T3_SEND_WITH_INV;
+               wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey);
                break;
-#endif
        default:
-               break;
+               return -EINVAL;
        }
        if (wr->num_sge > T3_MAX_SGE)
                return -EINVAL;
        wqe->send.reserved[0] = 0;
        wqe->send.reserved[1] = 0;
        wqe->send.reserved[2] = 0;
-       if (wr->opcode == IB_WR_SEND_WITH_IMM) {
-               plen = 4;
-               wqe->send.sgl[0].stag = wr->ex.imm_data;
-               wqe->send.sgl[0].len = __constant_cpu_to_be32(0);
-               wqe->send.num_sgle = __constant_cpu_to_be32(0);
-               *flit_cnt = 5;
-       } else {
-               plen = 0;
-               for (i = 0; i < wr->num_sge; i++) {
-                       if ((plen + wr->sg_list[i].length) < plen) {
-                               return -EMSGSIZE;
-                       }
-                       plen += wr->sg_list[i].length;
-                       wqe->send.sgl[i].stag =
-                           cpu_to_be32(wr->sg_list[i].lkey);
-                       wqe->send.sgl[i].len =
-                           cpu_to_be32(wr->sg_list[i].length);
-                       wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
-               }
-               wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
-               *flit_cnt = 4 + ((wr->num_sge) << 1);
+       plen = 0;
+       for (i = 0; i < wr->num_sge; i++) {
+               if ((plen + wr->sg_list[i].length) < plen)
+                       return -EMSGSIZE;
+
+               plen += wr->sg_list[i].length;
+               wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
+               wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
+               wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
        }
+       wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
+       *flit_cnt = 4 + ((wr->num_sge) << 1);
        wqe->send.plen = cpu_to_be32(plen);
        return 0;
 }
 
-static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
+static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
                                 u8 *flit_cnt)
 {
        int i;
@@ -137,15 +123,18 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
        return 0;
 }
 
-static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
+static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
                                u8 *flit_cnt)
 {
        if (wr->num_sge > 1)
                return -EINVAL;
        wqe->read.rdmaop = T3_READ_REQ;
+       if (wr->opcode == IB_WR_RDMA_READ_WITH_INV)
+               wqe->read.local_inv = 1;
+       else
+               wqe->read.local_inv = 0;
        wqe->read.reserved[0] = 0;
        wqe->read.reserved[1] = 0;
-       wqe->read.reserved[2] = 0;
        wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
        wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr);
        wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
@@ -155,6 +144,57 @@ static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
        return 0;
 }
 
+static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr,
+                               u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
+{
+       int i;
+       __be64 *p;
+
+       if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH)
+               return -EINVAL;
+       *wr_cnt = 1;
+       wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey);
+       wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length);
+       wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);
+       wqe->fastreg.va_base_lo_fbo =
+                               cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff);
+       wqe->fastreg.page_type_perms = cpu_to_be32(
+               V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) |
+               V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) |
+               V_FR_TYPE(TPT_VATO) |
+               V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags)));
+       p = &wqe->fastreg.pbl_addrs[0];
+       for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) {
+
+               /* If we need a 2nd WR, then set it up */
+               if (i == T3_MAX_FASTREG_FRAG) {
+                       *wr_cnt = 2;
+                       wqe = (union t3_wr *)(wq->queue +
+                               Q_PTR2IDX((wq->wptr+1), wq->size_log2));
+                       build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0,
+                              Q_GENBIT(wq->wptr + 1, wq->size_log2),
+                              0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG,
+                              T3_EOP);
+
+                       p = &wqe->pbl_frag.pbl_addrs[0];
+               }
+               *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]);
+       }
+       *flit_cnt = 5 + wr->wr.fast_reg.page_list_len;
+       if (*flit_cnt > 15)
+               *flit_cnt = 15;
+       return 0;
+}
+
+static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr,
+                               u8 *flit_cnt)
+{
+       wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey);
+       wqe->local_inv.reserved = 0;
+       *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3;
+       return 0;
+}
+
 /*
  * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
  */
@@ -205,23 +245,106 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
        return 0;
 }
 
-static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe,
+static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
                                struct ib_recv_wr *wr)
 {
-       int i;
-       if (wr->num_sge > T3_MAX_SGE)
-               return -EINVAL;
+       int i, err = 0;
+       u32 pbl_addr[T3_MAX_SGE];
+       u8 page_size[T3_MAX_SGE];
+
+       err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr,
+                              page_size);
+       if (err)
+               return err;
+       wqe->recv.pagesz[0] = page_size[0];
+       wqe->recv.pagesz[1] = page_size[1];
+       wqe->recv.pagesz[2] = page_size[2];
+       wqe->recv.pagesz[3] = page_size[3];
        wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
        for (i = 0; i < wr->num_sge; i++) {
                wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
                wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
+
+               /* to in the WQE == the offset into the page */
+               wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) %
+                               (1UL << (12 + page_size[i])));
+
+               /* pbl_addr is the adapters address in the PBL */
+               wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
+       }
+       for (; i < T3_MAX_SGE; i++) {
+               wqe->recv.sgl[i].stag = 0;
+               wqe->recv.sgl[i].len = 0;
+               wqe->recv.sgl[i].to = 0;
+               wqe->recv.pbl_addr[i] = 0;
+       }
+       qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+                            qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
+       qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+                            qhp->wq.rq_size_log2)].pbl_addr = 0;
+       return 0;
+}
+
+static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
+                               struct ib_recv_wr *wr)
+{
+       int i;
+       u32 pbl_addr;
+       u32 pbl_offset;
+
+
+       /*
+        * The T3 HW requires the PBL in the HW recv descriptor to reference
+        * a PBL entry.  So we allocate the max needed PBL memory here and pass
+        * it to the uP in the recv WR.  The uP will build the PBL and setup
+        * the HW recv descriptor.
+        */
+       pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE);
+       if (!pbl_addr)
+               return -ENOMEM;
+
+       /*
+        * Compute the 8B aligned offset.
+        */
+       pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3;
+
+       wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
+
+       for (i = 0; i < wr->num_sge; i++) {
+
+               /*
+                * Use a 128MB page size. This and an imposed 128MB
+                * sge length limit allows us to require only a 2-entry HW
+                * PBL for each SGE.  This restriction is acceptable since
+                * since it is not possible to allocate 128MB of contiguous
+                * DMA coherent memory!
+                */
+               if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN)
+                       return -EINVAL;
+               wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT;
+
+               /*
+                * T3 restricts a recv to all zero-stag or all non-zero-stag.
+                */
+               if (wr->sg_list[i].lkey != 0)
+                       return -EINVAL;
+               wqe->recv.sgl[i].stag = 0;
+               wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
                wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
+               wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset);
+               pbl_offset += 2;
        }
        for (; i < T3_MAX_SGE; i++) {
+               wqe->recv.pagesz[i] = 0;
                wqe->recv.sgl[i].stag = 0;
                wqe->recv.sgl[i].len = 0;
                wqe->recv.sgl[i].to = 0;
+               wqe->recv.pbl_addr[i] = 0;
        }
+       qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+                            qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
+       qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+                            qhp->wq.rq_size_log2)].pbl_addr = pbl_addr;
        return 0;
 }
 
@@ -238,6 +361,7 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        u32 num_wrs;
        unsigned long flag;
        struct t3_swsq *sqp;
+       int wr_cnt = 1;
 
        qhp = to_iwch_qp(ibqp);
        spin_lock_irqsave(&qhp->lock, flag);
@@ -262,33 +386,45 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                t3_wr_flags = 0;
                if (wr->send_flags & IB_SEND_SOLICITED)
                        t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
-               if (wr->send_flags & IB_SEND_FENCE)
-                       t3_wr_flags |= T3_READ_FENCE_FLAG;
                if (wr->send_flags & IB_SEND_SIGNALED)
                        t3_wr_flags |= T3_COMPLETION_FLAG;
                sqp = qhp->wq.sq +
                      Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
                switch (wr->opcode) {
                case IB_WR_SEND:
-               case IB_WR_SEND_WITH_IMM:
+               case IB_WR_SEND_WITH_INV:
+                       if (wr->send_flags & IB_SEND_FENCE)
+                               t3_wr_flags |= T3_READ_FENCE_FLAG;
                        t3_wr_opcode = T3_WR_SEND;
-                       err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
+                       err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
                        break;
                case IB_WR_RDMA_WRITE:
                case IB_WR_RDMA_WRITE_WITH_IMM:
                        t3_wr_opcode = T3_WR_WRITE;
-                       err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
+                       err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
                        break;
                case IB_WR_RDMA_READ:
+               case IB_WR_RDMA_READ_WITH_INV:
                        t3_wr_opcode = T3_WR_READ;
                        t3_wr_flags = 0; /* T3 reads are always signaled */
-                       err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
+                       err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
                        if (err)
                                break;
                        sqp->read_len = wqe->read.local_len;
                        if (!qhp->wq.oldest_read)
                                qhp->wq.oldest_read = sqp;
                        break;
+               case IB_WR_FAST_REG_MR:
+                       t3_wr_opcode = T3_WR_FASTREG;
+                       err = build_fastreg(wqe, wr, &t3_wr_flit_cnt,
+                                                &wr_cnt, &qhp->wq);
+                       break;
+               case IB_WR_LOCAL_INV:
+                       if (wr->send_flags & IB_SEND_FENCE)
+                               t3_wr_flags |= T3_LOCAL_FENCE_FLAG;
+                       t3_wr_opcode = T3_WR_INV_STAG;
+                       err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt);
+                       break;
                default:
                        PDBG("%s post of type=%d TBD!\n", __func__,
                             wr->opcode);
@@ -307,14 +443,15 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
                build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
                               Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
-                              0, t3_wr_flit_cnt);
+                              0, t3_wr_flit_cnt,
+                              (wr_cnt == 1) ? T3_SOPEOP : T3_SOP);
                PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
                     __func__, (unsigned long long) wr->wr_id, idx,
                     Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
                     sqp->opcode);
                wr = wr->next;
                num_wrs--;
-               ++(qhp->wq.wptr);
+               qhp->wq.wptr += wr_cnt;
                ++(qhp->wq.sq_wptr);
        }
        spin_unlock_irqrestore(&qhp->lock, flag);
@@ -345,21 +482,27 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                return -EINVAL;
        }
        while (wr) {
+               if (wr->num_sge > T3_MAX_SGE) {
+                       err = -EINVAL;
+                       *bad_wr = wr;
+                       break;
+               }
                idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
                wqe = (union t3_wr *) (qhp->wq.queue + idx);
                if (num_wrs)
-                       err = iwch_build_rdma_recv(qhp->rhp, wqe, wr);
+                       if (wr->sg_list[0].lkey)
+                               err = build_rdma_recv(qhp, wqe, wr);
+                       else
+                               err = build_zero_stag_recv(qhp, wqe, wr);
                else
                        err = -ENOMEM;
                if (err) {
                        *bad_wr = wr;
                        break;
                }
-               qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] =
-                       wr->wr_id;
                build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
                               Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
-                              0, sizeof(struct t3_receive_wr) >> 3);
+                              0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
                PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
                     "wqe %p \n", __func__, (unsigned long long) wr->wr_id,
                     idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
@@ -419,10 +562,10 @@ int iwch_bind_mw(struct ib_qp *qp,
        sgl.lkey = mw_bind->mr->lkey;
        sgl.length = mw_bind->length;
        wqe->bind.reserved = 0;
-       wqe->bind.type = T3_VA_BASED_TO;
+       wqe->bind.type = TPT_VATO;
 
        /* TBD: check perms */
-       wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags);
+       wqe->bind.perms = iwch_ib_to_tpt_access(mw_bind->mw_access_flags);
        wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey);
        wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
        wqe->bind.mw_len = cpu_to_be32(mw_bind->length);
@@ -430,7 +573,7 @@ int iwch_bind_mw(struct ib_qp *qp,
        err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
        if (err) {
                spin_unlock_irqrestore(&qhp->lock, flag);
-               return err;
+               return err;
        }
        wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
        sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
@@ -441,10 +584,9 @@ int iwch_bind_mw(struct ib_qp *qp,
        sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
        wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
        wqe->bind.mr_pagesz = page_size;
-       wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id;
        build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
                       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
-                               sizeof(struct t3_bind_mw_wr) >> 3);
+                      sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP);
        ++(qhp->wq.wptr);
        ++(qhp->wq.sq_wptr);
        spin_unlock_irqrestore(&qhp->lock, flag);
@@ -758,7 +900,8 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
        init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
        init_attr.rqe_count = iwch_rqes_posted(qhp);
        init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
-       init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0;
+       if (!qhp->ibqp.uobject)
+               init_attr.flags |= PRIV_QP;
        if (peer2peer) {
                init_attr.rtr_type = RTR_READ;
                if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
index ce1ab0571be38303b8554bd69dcde739f252c826..0792d930c481d508fb4ea87e6161795bcdcb5cd4 100644 (file)
@@ -531,7 +531,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
 {
        struct ehca_eq *eq = &shca->eq;
        struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
-       u64 eqe_value;
+       u64 eqe_value, ret;
        unsigned long flags;
        int eqe_cnt, i;
        int eq_empty = 0;
@@ -583,8 +583,13 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
                        ehca_dbg(&shca->ib_device,
                                 "No eqe found for irq event");
                goto unlock_irq_spinlock;
-       } else if (!is_irq)
+       } else if (!is_irq) {
+               ret = hipz_h_eoi(eq->ist);
+               if (ret != H_SUCCESS)
+                       ehca_err(&shca->ib_device,
+                                "bad return code EOI -rc = %ld\n", ret);
                ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
+       }
        if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
                ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
        /* enable irq for new packets */
index 482103eb6eacdc699430c7455e9e9b723515443f..598844d2edc93131a0cd1c9264035e1651f2097a 100644 (file)
@@ -923,6 +923,7 @@ static struct of_device_id ehca_device_table[] =
        },
        {},
 };
+MODULE_DEVICE_TABLE(of, ehca_device_table);
 
 static struct of_platform_driver ehca_driver = {
        .name        = "ehca",
index f093b0033dafff4cd7e3900d565980d44c35cb30..dd9bc68f1c7bdd4a99cb41520ef685dd5f452951 100644 (file)
@@ -544,8 +544,16 @@ int ehca_post_recv(struct ib_qp *qp,
                   struct ib_recv_wr *recv_wr,
                   struct ib_recv_wr **bad_recv_wr)
 {
-       return internal_post_recv(container_of(qp, struct ehca_qp, ib_qp),
-                                 qp->device, recv_wr, bad_recv_wr);
+       struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+
+       /* Reject WR if QP is in RESET state */
+       if (unlikely(my_qp->state == IB_QPS_RESET)) {
+               ehca_err(qp->device, "Invalid QP state  qp_state=%d qpn=%x",
+                        my_qp->state, qp->qp_num);
+               return -EINVAL;
+       }
+
+       return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr);
 }
 
 int ehca_post_srq_recv(struct ib_srq *srq,
@@ -681,7 +689,7 @@ poll_cq_one_read_cqe:
        wc->dlid_path_bits = cqe->dlid;
        wc->src_qp = cqe->remote_qp_number;
        wc->wc_flags = cqe->w_completion_flags;
-       wc->imm_data = cpu_to_be32(cqe->immediate_data);
+       wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
        wc->sl = cqe->service_level;
 
 poll_cq_one_exit0:
index 5245e13c3a30aac8dc4ebfdc3728499f4c2270f6..415d3a465de6e8b982ab9b236a63035bff16aa23 100644 (file)
@@ -933,3 +933,13 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
                                       r_cb,
                                       0, 0, 0, 0);
 }
+
+u64 hipz_h_eoi(int irq)
+{
+       unsigned long xirr;
+
+       iosync();
+       xirr = (0xffULL << 24) | irq;
+
+       return plpar_hcall_norets(H_EOI, xirr);
+}
index 60ce02b7066389e657c1ff7a33a9b8ee87b8120d..2c3c6e0ea5c267f542968d43c6faec51e40a9f7b 100644 (file)
@@ -260,5 +260,6 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
                      const u64 ressource_handle,
                      void *rblock,
                      unsigned long *byte_count);
+u64 hipz_h_eoi(int irq);
 
 #endif /* __HCP_IF_H__ */
index a03bd28d9b487e1f21f81ce0ff3e826a31412bb6..d385e4168c975993e55c7667ac034b30c7ecd882 100644 (file)
@@ -82,7 +82,7 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
                wc->uqueue[head].opcode = entry->opcode;
                wc->uqueue[head].vendor_err = entry->vendor_err;
                wc->uqueue[head].byte_len = entry->byte_len;
-               wc->uqueue[head].imm_data = (__u32 __force)entry->imm_data;
+               wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data;
                wc->uqueue[head].qp_num = entry->qp->qp_num;
                wc->uqueue[head].src_qp = entry->src_qp;
                wc->uqueue[head].wc_flags = entry->wc_flags;
index 8eee7830f042b3d05b20fa4955295905294378a3..fb70712ac85c6d261fcf88bc7af63fef8dbfcdc3 100644 (file)
@@ -2228,8 +2228,8 @@ static void ipath_autoneg_send(struct ipath_devdata *dd, int which)
                0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
                0x40000001, 0x1388, 0x15e, /* rest 0's */
                };
-       dcnt = sizeof(madpayload_start)/sizeof(madpayload_start[0]);
-       hcnt = sizeof(hdr)/sizeof(hdr[0]);
+       dcnt = ARRAY_SIZE(madpayload_start);
+       hcnt = ARRAY_SIZE(hdr);
        if (!swapped) {
                /* for maintainability, do it at runtime */
                for (i = 0; i < hcnt; i++) {
index 5f9315d77a437b912622110b70cfdf7beaaf1b6e..be4fc9ada8e7f1e872b740686a74404ae53525fc 100644 (file)
@@ -111,9 +111,9 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
        nip->revision = cpu_to_be32((majrev << 16) | minrev);
        nip->local_port_num = port;
        vendor = dd->ipath_vendorid;
-       nip->vendor_id[0] = 0;
-       nip->vendor_id[1] = vendor >> 8;
-       nip->vendor_id[2] = vendor;
+       nip->vendor_id[0] = IPATH_SRC_OUI_1;
+       nip->vendor_id[1] = IPATH_SRC_OUI_2;
+       nip->vendor_id[2] = IPATH_SRC_OUI_3;
 
        return reply(smp);
 }
index 108df667d2eeff235fb7d5a9d7939b755504d551..97710522624dfe16218e36b09197177a456dba87 100644 (file)
@@ -1703,11 +1703,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
        case OP(SEND_LAST_WITH_IMMEDIATE):
        send_last_imm:
                if (header_in_data) {
-                       wc.imm_data = *(__be32 *) data;
+                       wc.ex.imm_data = *(__be32 *) data;
                        data += sizeof(__be32);
                } else {
                        /* Immediate data comes after BTH */
-                       wc.imm_data = ohdr->u.imm_data;
+                       wc.ex.imm_data = ohdr->u.imm_data;
                }
                hdrsize += 4;
                wc.wc_flags = IB_WC_WITH_IMM;
index a4b5521567fe6bbb9fc62cf4340163ac14bf8a79..af051f7576638197fb9633879f08e36d78ae3620 100644 (file)
@@ -331,7 +331,7 @@ again:
        switch (wqe->wr.opcode) {
        case IB_WR_SEND_WITH_IMM:
                wc.wc_flags = IB_WC_WITH_IMM;
-               wc.imm_data = wqe->wr.ex.imm_data;
+               wc.ex.imm_data = wqe->wr.ex.imm_data;
                /* FALLTHROUGH */
        case IB_WR_SEND:
                if (!ipath_get_rwqe(qp, 0))
@@ -342,7 +342,7 @@ again:
                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
                        goto inv_err;
                wc.wc_flags = IB_WC_WITH_IMM;
-               wc.imm_data = wqe->wr.ex.imm_data;
+               wc.ex.imm_data = wqe->wr.ex.imm_data;
                if (!ipath_get_rwqe(qp, 1))
                        goto rnr_nak;
                /* FALLTHROUGH */
index 0596ec16fcbd2bfd2b4f1b23bfa45d23b5f86156..82cc588b8bf23d3a75ca15f3171c50df6c1d1943 100644 (file)
@@ -379,11 +379,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
        case OP(SEND_LAST_WITH_IMMEDIATE):
        send_last_imm:
                if (header_in_data) {
-                       wc.imm_data = *(__be32 *) data;
+                       wc.ex.imm_data = *(__be32 *) data;
                        data += sizeof(__be32);
                } else {
                        /* Immediate data comes after BTH */
-                       wc.imm_data = ohdr->u.imm_data;
+                       wc.ex.imm_data = ohdr->u.imm_data;
                }
                hdrsize += 4;
                wc.wc_flags = IB_WC_WITH_IMM;
@@ -483,11 +483,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
        case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
        rdma_last_imm:
                if (header_in_data) {
-                       wc.imm_data = *(__be32 *) data;
+                       wc.ex.imm_data = *(__be32 *) data;
                        data += sizeof(__be32);
                } else {
                        /* Immediate data comes after BTH */
-                       wc.imm_data = ohdr->u.imm_data;
+                       wc.ex.imm_data = ohdr->u.imm_data;
                }
                hdrsize += 4;
                wc.wc_flags = IB_WC_WITH_IMM;
index 77ca8ca74e788a064ab9471d0704083bc802251a..36aa242c487c4d582d39d94b86e53377466455ef 100644 (file)
@@ -96,7 +96,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
 
        if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
                wc.wc_flags = IB_WC_WITH_IMM;
-               wc.imm_data = swqe->wr.ex.imm_data;
+               wc.ex.imm_data = swqe->wr.ex.imm_data;
        }
 
        /*
@@ -492,14 +492,14 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
        if (qp->ibqp.qp_num > 1 &&
            opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
                if (header_in_data) {
-                       wc.imm_data = *(__be32 *) data;
+                       wc.ex.imm_data = *(__be32 *) data;
                        data += sizeof(__be32);
                } else
-                       wc.imm_data = ohdr->u.ud.imm_data;
+                       wc.ex.imm_data = ohdr->u.ud.imm_data;
                wc.wc_flags = IB_WC_WITH_IMM;
                hdrsize += sizeof(u32);
        } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
-               wc.imm_data = 0;
+               wc.ex.imm_data = 0;
                wc.wc_flags = 0;
        } else {
                dev->n_pkt_drops++;
index 7779165b2c2ceef4f98890bfc0bf96340ac833b6..9e23ab0b51a13718cce6b34dc7f7b55fc4dcd053 100644 (file)
@@ -1497,7 +1497,8 @@ static int ipath_query_device(struct ib_device *ibdev,
                IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
                IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
        props->page_size_cap = PAGE_SIZE;
-       props->vendor_id = dev->dd->ipath_vendorid;
+       props->vendor_id =
+               IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
        props->vendor_part_id = dev->dd->ipath_deviceid;
        props->hw_ver = dev->dd->ipath_pcirev;
 
index 4521319b14067c22d576044c35bea72bcb2e3720..299f20832ab64aaed174b67b3047710e74d95354 100644 (file)
@@ -663,18 +663,18 @@ repoll:
 
                switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
                case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
-                       wc->opcode   = IB_WC_RECV_RDMA_WITH_IMM;
-                       wc->wc_flags = IB_WC_WITH_IMM;
-                       wc->imm_data = cqe->immed_rss_invalid;
+                       wc->opcode      = IB_WC_RECV_RDMA_WITH_IMM;
+                       wc->wc_flags    = IB_WC_WITH_IMM;
+                       wc->ex.imm_data = cqe->immed_rss_invalid;
                        break;
                case MLX4_RECV_OPCODE_SEND:
                        wc->opcode   = IB_WC_RECV;
                        wc->wc_flags = 0;
                        break;
                case MLX4_RECV_OPCODE_SEND_IMM:
-                       wc->opcode   = IB_WC_RECV;
-                       wc->wc_flags = IB_WC_WITH_IMM;
-                       wc->imm_data = cqe->immed_rss_invalid;
+                       wc->opcode      = IB_WC_RECV;
+                       wc->wc_flags    = IB_WC_WITH_IMM;
+                       wc->ex.imm_data = cqe->immed_rss_invalid;
                        break;
                }
 
index 4c1e72fc8f575dd19093058177d0db3900756cec..cdca3a511e1c793dd6056ac3956e2e6ca9dc09d1 100644 (file)
@@ -255,7 +255,8 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags,     u8 port_num,
                        return IB_MAD_RESULT_SUCCESS;
        } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
                   in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1   ||
-                  in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2) {
+                  in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2   ||
+                  in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
                if (in_mad->mad_hdr.method  != IB_MGMT_METHOD_GET &&
                    in_mad->mad_hdr.method  != IB_MGMT_METHOD_SET)
                        return IB_MAD_RESULT_SUCCESS;
index 4d61e32866c6029c2dab38f873f6cbbc189df999..bcf50648fa18631e77ae0e80d50846dcabd31ad3 100644 (file)
@@ -90,7 +90,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
        props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
                IB_DEVICE_PORT_ACTIVE_EVENT             |
                IB_DEVICE_SYS_IMAGE_GUID                |
-               IB_DEVICE_RC_RNR_NAK_GEN;
+               IB_DEVICE_RC_RNR_NAK_GEN                |
+               IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
        if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
                props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
        if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -437,7 +438,9 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
 static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
        return mlx4_multicast_attach(to_mdev(ibqp->device)->dev,
-                                    &to_mqp(ibqp)->mqp, gid->raw);
+                                    &to_mqp(ibqp)->mqp, gid->raw,
+                                    !!(to_mqp(ibqp)->flags &
+                                       MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK));
 }
 
 static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
index 5cf994794d25f2ff3f99828696a990c49d58dd31..c4cf5b69eefa3662096348f30a31f5fcbd2a87c8 100644 (file)
@@ -101,7 +101,8 @@ struct mlx4_ib_wq {
 };
 
 enum mlx4_ib_qp_flags {
-       MLX4_IB_QP_LSO          = 1 << 0
+       MLX4_IB_QP_LSO                          = 1 << 0,
+       MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK     = 1 << 1,
 };
 
 struct mlx4_ib_qp {
index a80df22deae8672a79de7e212014d765d72cc3f4..89eb6cbe592ef5487a822808a192ae516827769d 100644 (file)
@@ -129,9 +129,10 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
        int ind;
        void *buf;
        __be32 stamp;
+       struct mlx4_wqe_ctrl_seg *ctrl;
 
-       s = roundup(size, 1U << qp->sq.wqe_shift);
        if (qp->sq_max_wqes_per_wr > 1) {
+               s = roundup(size, 1U << qp->sq.wqe_shift);
                for (i = 0; i < s; i += 64) {
                        ind = (i >> qp->sq.wqe_shift) + n;
                        stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) :
@@ -141,7 +142,8 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
                        *wqe = stamp;
                }
        } else {
-               buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
+               ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
+               s = (ctrl->fence_size & 0x3f) << 4;
                for (i = 64; i < s; i += 64) {
                        wqe = buf + i;
                        *wqe = cpu_to_be32(0xffffffff);
@@ -452,19 +454,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
        spin_lock_init(&qp->rq.lock);
 
        qp->state        = IB_QPS_RESET;
-       qp->atomic_rd_en = 0;
-       qp->resp_depth   = 0;
-
-       qp->rq.head         = 0;
-       qp->rq.tail         = 0;
-       qp->sq.head         = 0;
-       qp->sq.tail         = 0;
-       qp->sq_next_wqe     = 0;
-
        if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
                qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
-       else
-               qp->sq_signal_bits = 0;
 
        err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp);
        if (err)
@@ -509,6 +500,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
        } else {
                qp->sq_no_prefetch = 0;
 
+               if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+                       qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+
                if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
                        qp->flags |= MLX4_IB_QP_LSO;
 
@@ -682,10 +676,15 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
        struct mlx4_ib_qp *qp;
        int err;
 
-       /* We only support LSO, and only for kernel UD QPs. */
-       if (init_attr->create_flags & ~IB_QP_CREATE_IPOIB_UD_LSO)
+       /*
+        * We only support LSO and multicast loopback blocking, and
+        * only for kernel UD QPs.
+        */
+       if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO |
+                                       IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
                return ERR_PTR(-EINVAL);
-       if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO &&
+
+       if (init_attr->create_flags &&
            (pd->uobject || init_attr->qp_type != IB_QPT_UD))
                return ERR_PTR(-EINVAL);
 
@@ -694,7 +693,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
        case IB_QPT_UC:
        case IB_QPT_UD:
        {
-               qp = kmalloc(sizeof *qp, GFP_KERNEL);
+               qp = kzalloc(sizeof *qp, GFP_KERNEL);
                if (!qp)
                        return ERR_PTR(-ENOMEM);
 
@@ -715,7 +714,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
                if (pd->uobject)
                        return ERR_PTR(-EINVAL);
 
-               sqp = kmalloc(sizeof *sqp, GFP_KERNEL);
+               sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
                if (!sqp)
                        return ERR_PTR(-ENOMEM);
 
@@ -906,7 +905,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                               attr->path_mtu);
                        goto out;
                }
-               context->mtu_msgmax = (attr->path_mtu << 5) | 31;
+               context->mtu_msgmax = (attr->path_mtu << 5) |
+                       ilog2(dev->dev->caps.max_msg_sz);
        }
 
        if (qp->rq.wqe_cnt)
@@ -1063,6 +1063,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                for (i = 0; i < qp->sq.wqe_cnt; ++i) {
                        ctrl = get_send_wqe(qp, i);
                        ctrl->owner_opcode = cpu_to_be32(1 << 31);
+                       if (qp->sq_max_wqes_per_wr == 1)
+                               ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4);
 
                        stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
                }
@@ -1127,23 +1129,6 @@ out:
        return err;
 }
 
-static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 };
-static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = {
-               [IB_QPT_UD]  = (IB_QP_PKEY_INDEX                |
-                               IB_QP_PORT                      |
-                               IB_QP_QKEY),
-               [IB_QPT_UC]  = (IB_QP_PKEY_INDEX                |
-                               IB_QP_PORT                      |
-                               IB_QP_ACCESS_FLAGS),
-               [IB_QPT_RC]  = (IB_QP_PKEY_INDEX                |
-                               IB_QP_PORT                      |
-                               IB_QP_ACCESS_FLAGS),
-               [IB_QPT_SMI] = (IB_QP_PKEY_INDEX                |
-                               IB_QP_QKEY),
-               [IB_QPT_GSI] = (IB_QP_PKEY_INDEX                |
-                               IB_QP_QKEY),
-};
-
 int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                      int attr_mask, struct ib_udata *udata)
 {
@@ -1186,15 +1171,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                goto out;
        }
 
-       if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
-               err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr,
-                                         mlx4_ib_qp_attr_mask_table[ibqp->qp_type],
-                                         IB_QPS_RESET, IB_QPS_INIT);
-               if (err)
-                       goto out;
-               cur_state = IB_QPS_INIT;
-       }
-
        err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
 
 out:
@@ -1865,6 +1841,13 @@ done:
 
        qp_init_attr->cap            = qp_attr->cap;
 
+       qp_init_attr->create_flags = 0;
+       if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)
+               qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
+
+       if (qp->flags & MLX4_IB_QP_LSO)
+               qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
+
 out:
        mutex_unlock(&qp->mutex);
        return err;
index a763067096182fe14d9502a2a5fdd6135bd6e580..c5ccc2daab60e46f12a5afa2e471a2aae4ff1aa0 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_allocator.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/errno.h>
index 4b111a852ff65b83b8479e6c3f6ce7fc79cb1ffe..32f6c63154541dec229ec8904d1438fddb88d14d 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_av.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/string.h>
index e948158a28d918673150c60323b3952e07fb2ca6..cc440f90000becfd62928c8aee85e07d4ef30653 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id$
  */
 
 #include <linux/jiffies.h>
@@ -128,7 +126,6 @@ static void handle_catas(struct mthca_dev *dev)
 static void poll_catas(unsigned long dev_ptr)
 {
        struct mthca_dev *dev = (struct mthca_dev *) dev_ptr;
-       unsigned long flags;
        int i;
 
        for (i = 0; i < dev->catas_err.size; ++i)
@@ -137,13 +134,8 @@ static void poll_catas(unsigned long dev_ptr)
                        return;
                }
 
-       spin_lock_irqsave(&catas_lock, flags);
-       if (!dev->catas_err.stop)
-               mod_timer(&dev->catas_err.timer,
-                         jiffies + MTHCA_CATAS_POLL_INTERVAL);
-       spin_unlock_irqrestore(&catas_lock, flags);
-
-       return;
+       mod_timer(&dev->catas_err.timer,
+                 round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL));
 }
 
 void mthca_start_catas_poll(struct mthca_dev *dev)
@@ -151,7 +143,6 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
        unsigned long addr;
 
        init_timer(&dev->catas_err.timer);
-       dev->catas_err.stop = 0;
        dev->catas_err.map  = NULL;
 
        addr = pci_resource_start(dev->pdev, 0) +
@@ -182,10 +173,6 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
 
 void mthca_stop_catas_poll(struct mthca_dev *dev)
 {
-       spin_lock_irq(&catas_lock);
-       dev->catas_err.stop = 1;
-       spin_unlock_irq(&catas_lock);
-
        del_timer_sync(&dev->catas_err.timer);
 
        if (dev->catas_err.map) {
index 54d230ee7d63aca7ef0d023cc52789244834541b..c33e1c53c7997a0b4e446bb3d9e97c14af711d56 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/completion.h>
index 8928ca4a932569d78ee1b75c122790f4a696408d..6efd3265f248230035b4ad96bcda7853d1b3cd0b 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_cmd.h 1349 2004-12-16 21:09:43Z roland $
  */
 
 #ifndef MTHCA_CMD_H
index afa56bfaab2ee11e4e0d023084291a0eabd3f8d1..75671f75cac482bf67ce0a6c3692634cce7f43b5 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_config_reg.h 1349 2004-12-16 21:09:43Z roland $
  */
 
 #ifndef MTHCA_CONFIG_REG_H
index 20401d2ba6b2cdd3ee961ecd4963f8043f96117e..d9f4735c2b3766dee2f3e5907463415efa12f90d 100644 (file)
@@ -32,8 +32,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_cq.c 1369 2004-12-20 16:17:07Z roland $
  */
 
 #include <linux/hardirq.h>
@@ -622,13 +620,13 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
                case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE:
                case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE:
                        entry->wc_flags = IB_WC_WITH_IMM;
-                       entry->imm_data = cqe->imm_etype_pkey_eec;
+                       entry->ex.imm_data = cqe->imm_etype_pkey_eec;
                        entry->opcode = IB_WC_RECV;
                        break;
                case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
                case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
                        entry->wc_flags = IB_WC_WITH_IMM;
-                       entry->imm_data = cqe->imm_etype_pkey_eec;
+                       entry->ex.imm_data = cqe->imm_etype_pkey_eec;
                        entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
                        break;
                default:
index 7bc32f8e377ec4747a0c884243efcd34757bc23d..ee4d073c889fae7c96933ae3e2a7b3d0cfae9e32 100644 (file)
@@ -32,8 +32,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_dev.h 1349 2004-12-16 21:09:43Z roland $
  */
 
 #ifndef MTHCA_DEV_H
@@ -279,7 +277,6 @@ struct mthca_mcg_table {
 struct mthca_catas_err {
        u64                     addr;
        u32 __iomem            *map;
-       unsigned long           stop;
        u32                     size;
        struct timer_list       timer;
        struct list_head        list;
index b374dc395be1af1f95c668bbbea524aeeea311db..14f51ef97d7ecb3404f3fc7ad788a9746c597c84 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/types.h>
index 8bde7f98e58a2bfc81a178d6321ecc1b506298ca..4e36aa7cb3d2f7cfc9eba226b0315f9a30083939 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_eq.c 1382 2004-12-24 02:21:02Z roland $
  */
 
 #include <linux/errno.h>
index 8b7e83e6e88fecd0176725072b349f69ee2a0e9e..640449582aba07ad4774b1027b9842deb0196c8e 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/string.h>
index 200cf13fc9bb241f515f26a6443a363345268838..fb9f91b60f30e401cdb24aad39410db6aae0b0f3 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_main.c 1396 2004-12-28 04:10:27Z roland $
  */
 
 #include <linux/module.h>
index a8ad072be0743c44b056d8ae2fde1faeb6511323..3f5f948792089a3bf33440794f263bf35dc33d6e 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_mcg.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/string.h>
index d5862e5d99a09ce89509976b9bdc5049e49e5b26..1f7d1a29d2a82250182bb5a91a5902b64eda0379 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id$
  */
 
 #include <linux/mm.h>
index a1ab06847b75620d06c31f2189a475c73b036d5c..da9b8f9b884f3ac768b71c42d613e6f96a06bda8 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id$
  */
 
 #ifndef MTHCA_MEMFREE_H
index 820205dec560954562b22c570f7eb13861ec8824..8489b1e81c0f2b2643569d08b802df43150631e9 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_mr.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/slab.h>
index c1e950764bd80f494f64b07111fa8b6256f9d04e..266f14e474066344eda2175de7e0ab9bfdcb90e3 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_pd.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/errno.h>
index 605a8d57fac6764c97a7029b887e7517dbdadb9f..d168c254061191657e0f05b3e99bb3f423ba7fc8 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_profile.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/module.h>
index e76cb62d8e326990b6a05c529dad05d6e225d485..62b009cc87305eff134bb45ac0daca95af57e359 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_profile.h 1349 2004-12-16 21:09:43Z roland $
  */
 
 #ifndef MTHCA_PROFILE_H
index be34f99ca62579b916970d5387110b370b99602d..87ad889e367b2b6b39cfe010ec1ec28f1dd488ac 100644 (file)
@@ -32,8 +32,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_provider.c 4859 2006-01-09 21:55:10Z roland $
  */
 
 #include <rdma/ib_smi.h>
index 934bf9544037c582aaeb60754d78ad534aef618c..c621f8794b8820d11e6293d62c2ce35d11e8e7ee 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_provider.h 1349 2004-12-16 21:09:43Z roland $
  */
 
 #ifndef MTHCA_PROVIDER_H
index 09dc3614cf2c1ad774b847ff8f9959f13bf0095a..f5081bfde6db19641ce62cb3638139caebb6f880 100644 (file)
@@ -31,8 +31,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_qp.c 1355 2004-12-17 15:23:43Z roland $
  */
 
 #include <linux/string.h>
@@ -850,23 +848,6 @@ out:
        return err;
 }
 
-static const struct ib_qp_attr dummy_init_attr = { .port_num = 1 };
-static const int dummy_init_attr_mask[] = {
-       [IB_QPT_UD]  = (IB_QP_PKEY_INDEX                |
-                       IB_QP_PORT                      |
-                       IB_QP_QKEY),
-       [IB_QPT_UC]  = (IB_QP_PKEY_INDEX                |
-                       IB_QP_PORT                      |
-                       IB_QP_ACCESS_FLAGS),
-       [IB_QPT_RC]  = (IB_QP_PKEY_INDEX                |
-                       IB_QP_PORT                      |
-                       IB_QP_ACCESS_FLAGS),
-       [IB_QPT_SMI] = (IB_QP_PKEY_INDEX                |
-                       IB_QP_QKEY),
-       [IB_QPT_GSI] = (IB_QP_PKEY_INDEX                |
-                       IB_QP_QKEY),
-};
-
 int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
                    struct ib_udata *udata)
 {
@@ -928,15 +909,6 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
                goto out;
        }
 
-       if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
-               err = __mthca_modify_qp(ibqp, &dummy_init_attr,
-                                       dummy_init_attr_mask[ibqp->qp_type],
-                                       IB_QPS_RESET, IB_QPS_INIT);
-               if (err)
-                       goto out;
-               cur_state = IB_QPS_INIT;
-       }
-
        err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
 
 out:
@@ -1277,10 +1249,10 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
                return -EINVAL;
 
        /*
-        * For MLX transport we need 2 extra S/G entries:
+        * For MLX transport we need 2 extra send gather entries:
         * one for the header and one for the checksum at the end
         */
-       if (qp->transport == MLX && cap->max_recv_sge + 2 > dev->limits.max_sg)
+       if (qp->transport == MLX && cap->max_send_sge + 2 > dev->limits.max_sg)
                return -EINVAL;
 
        if (mthca_is_memfree(dev)) {
index 91934f2d9dbafeedb0e8cba7a180957ec4baa885..acb6817f6060615a4db753e3b4dde6c7f3dd31bb 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_reset.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/init.h>
index a5ffff6e10263c06c296c0c6d7c0dd3f35b8bd2f..4fabe62aab8a2e522ac36ec03fd729b5a146bbf7 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_srq.c 3047 2005-08-10 03:59:35Z roland $
  */
 
 #include <linux/slab.h>
index 8b728486410d12aef77f16563220624d766fad18..ca5900c96fcfd43cdca637910ecf8f0f9c94f9dd 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id$
  */
 
 #include <asm/page.h>          /* PAGE_SHIFT */
index e1262c942db8c6ae31c91ec5b006454e239b2331..5fe56e8107395f2c4c0887e52715f171ae3ae976 100644 (file)
@@ -29,7 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
  */
 
 #ifndef MTHCA_USER_H
index b3551a8dea1d691388a694ffe97cff117060b31e..341a5ae881c1270abf94283dff7e3d652ef5a975 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: mthca_wqe.h 3047 2005-08-10 03:59:35Z roland $
  */
 
 #ifndef MTHCA_WQE_H
index a4e9269a29bd106b604d1c8012efc25b4f1ab9c9..d2884e7780987c2a9f5ee9ad409086100127ba25 100644 (file)
@@ -328,7 +328,7 @@ void nes_rem_ref(struct ib_qp *ibqp)
                set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
                u64temp = (u64)nesqp->nesqp_context_pbase;
                set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
-               nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+               nes_post_cqp_request(nesdev, cqp_request);
        }
 }
 
index 61b46e9c7d2d5ed079acad11ec9ec3f836263280..39bd897b40c6aa1cd4ded5a0179dafc6035af792 100644 (file)
@@ -94,9 +94,6 @@
 
 #define MAX_DPC_ITERATIONS               128
 
-#define NES_CQP_REQUEST_NO_DOORBELL_RING 0
-#define NES_CQP_REQUEST_RING_DOORBELL    1
-
 #define NES_DRV_OPT_ENABLE_MPA_VER_0     0x00000001
 #define NES_DRV_OPT_DISABLE_MPA_CRC      0x00000002
 #define NES_DRV_OPT_DISABLE_FIRST_WRITE  0x00000004
@@ -538,7 +535,11 @@ void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *);
 void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16);
 void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16);
 struct nes_cqp_request *nes_get_cqp_request(struct nes_device *);
-void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *, int);
+void nes_free_cqp_request(struct nes_device *nesdev,
+                         struct nes_cqp_request *cqp_request);
+void nes_put_cqp_request(struct nes_device *nesdev,
+                        struct nes_cqp_request *cqp_request);
+void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *);
 int nes_arp_table(struct nes_device *, u32, u8 *, u32);
 void nes_mh_fix(unsigned long);
 void nes_clc(unsigned long);
index 9a4b40fae40db3c8994daf5ea66a5cfcfd3b55b5..6aa531d5276dfd969114d07ebcc4c871e611e79d 100644 (file)
@@ -1603,7 +1603,6 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
                        return NULL;
                }
 
-               memset(listener, 0, sizeof(struct nes_cm_listener));
                listener->loc_addr = htonl(cm_info->loc_addr);
                listener->loc_port = htons(cm_info->loc_port);
                listener->reused_node = 0;
index d3278f111ca7dcba33bf5b12de3c01d337309122..85f26d19a32b26187a9289a90fa6c600e33fc0b3 100644 (file)
@@ -398,7 +398,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
        nesadapter->base_pd = 1;
 
        nesadapter->device_cap_flags =
-               IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW;
+               IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
 
        nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
                        [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
@@ -2710,39 +2710,11 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
                                        barrier();
                                        cqp_request->request_done = 1;
                                        wake_up(&cqp_request->waitq);
-                                       if (atomic_dec_and_test(&cqp_request->refcount)) {
-                                               nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
-                                                               cqp_request,
-                                                               le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f);
-                                               if (cqp_request->dynamic) {
-                                                       kfree(cqp_request);
-                                               } else {
-                                                       spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                                                       list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                                                       spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                                               }
-                                       }
-                               } else if (cqp_request->callback) {
-                                       /* Envoke the callback routine */
-                                       cqp_request->cqp_callback(nesdev, cqp_request);
-                                       if (cqp_request->dynamic) {
-                                               kfree(cqp_request);
-                                       } else {
-                                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                                       }
+                                       nes_put_cqp_request(nesdev, cqp_request);
                                } else {
-                                       nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
-                                                       cqp_request,
-                                                       le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f);
-                                       if (cqp_request->dynamic) {
-                                               kfree(cqp_request);
-                                       } else {
-                                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                                       }
+                                       if (cqp_request->callback)
+                                               cqp_request->cqp_callback(nesdev, cqp_request);
+                                       nes_free_cqp_request(nesdev, cqp_request);
                                }
                        } else {
                                wake_up(&nesdev->cqp.waitq);
@@ -3149,7 +3121,6 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port,
 {
        struct nes_device *nesdev = nesvnic->nesdev;
        struct nes_hw_cqp_wqe *cqp_wqe;
-       unsigned long flags;
        struct nes_cqp_request *cqp_request;
        int ret = 0;
        u16 major_code;
@@ -3176,7 +3147,7 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port,
        nes_debug(NES_DBG_QP, "Waiting for CQP completion for APBVT.\n");
 
        atomic_set(&cqp_request->refcount, 2);
-       nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+       nes_post_cqp_request(nesdev, cqp_request);
 
        if (add_port == NES_MANAGE_APBVT_ADD)
                ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
@@ -3184,15 +3155,9 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port,
        nes_debug(NES_DBG_QP, "Completed, ret=%u,  CQP Major:Minor codes = 0x%04X:0x%04X\n",
                        ret, cqp_request->major_code, cqp_request->minor_code);
        major_code = cqp_request->major_code;
-       if (atomic_dec_and_test(&cqp_request->refcount)) {
-               if (cqp_request->dynamic) {
-                       kfree(cqp_request);
-               } else {
-                       spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                       list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                       spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-               }
-       }
+
+       nes_put_cqp_request(nesdev, cqp_request);
+
        if (!ret)
                return -ETIME;
        else if (major_code)
@@ -3252,7 +3217,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr,
                        nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
 
        atomic_set(&cqp_request->refcount, 1);
-       nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+       nes_post_cqp_request(nesdev, cqp_request);
 }
 
 
@@ -3262,7 +3227,6 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr,
 void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
                u32 which_wq, u32 wait_completion)
 {
-       unsigned long flags;
        struct nes_cqp_request *cqp_request;
        struct nes_hw_cqp_wqe *cqp_wqe;
        int ret;
@@ -3285,7 +3249,7 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
                        cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq);
        cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id);
 
-       nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+       nes_post_cqp_request(nesdev, cqp_request);
 
        if (wait_completion) {
                /* Wait for CQP */
@@ -3294,14 +3258,6 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
                nes_debug(NES_DBG_QP, "Flush SQ QP WQEs completed, ret=%u,"
                                " CQP Major:Minor codes = 0x%04X:0x%04X\n",
                                ret, cqp_request->major_code, cqp_request->minor_code);
-               if (atomic_dec_and_test(&cqp_request->refcount)) {
-                       if (cqp_request->dynamic) {
-                               kfree(cqp_request);
-                       } else {
-                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                       }
-               }
+               nes_put_cqp_request(nesdev, cqp_request);
        }
 }
index 745bf94f3f074283f9c6f9dc36547f976d64883e..7b81e0ae00760eedf15e58ce12deffbb9daf7490 100644 (file)
@@ -1172,7 +1172,7 @@ struct nes_vnic {
        u32    mcrq_qp_id;
        struct nes_ucontext *mcrq_ucontext;
        struct nes_cqp_request* (*get_cqp_request)(struct nes_device *nesdev);
-       void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *, int);
+       void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *);
        int (*mcrq_mcast_filter)( struct nes_vnic* nesvnic, __u8* dmi_addr );
        struct net_device_stats netstats;
        /* used to put the netdev on the adapters logical port list */
index fe83d1b2b177161a395ed826a7521181834ab8c2..fb8cbd71a2ef86b17b98539a471e0c4d7bf7b409 100644 (file)
@@ -567,12 +567,36 @@ struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev)
        return cqp_request;
 }
 
+void nes_free_cqp_request(struct nes_device *nesdev,
+                         struct nes_cqp_request *cqp_request)
+{
+       unsigned long flags;
+
+       nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
+                 cqp_request,
+                 le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f);
+
+       if (cqp_request->dynamic) {
+               kfree(cqp_request);
+       } else {
+               spin_lock_irqsave(&nesdev->cqp.lock, flags);
+               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+       }
+}
+
+void nes_put_cqp_request(struct nes_device *nesdev,
+                        struct nes_cqp_request *cqp_request)
+{
+       if (atomic_dec_and_test(&cqp_request->refcount))
+               nes_free_cqp_request(nesdev, cqp_request);
+}
 
 /**
  * nes_post_cqp_request
  */
 void nes_post_cqp_request(struct nes_device *nesdev,
-               struct nes_cqp_request *cqp_request, int ring_doorbell)
+                         struct nes_cqp_request *cqp_request)
 {
        struct nes_hw_cqp_wqe *cqp_wqe;
        unsigned long flags;
@@ -600,10 +624,9 @@ void nes_post_cqp_request(struct nes_device *nesdev,
                                nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size,
                                cqp_request->waiting, atomic_read(&cqp_request->refcount));
                barrier();
-               if (ring_doorbell) {
-                       /* Ring doorbell (1 WQEs) */
-                       nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
-               }
+
+               /* Ring doorbell (1 WQEs) */
+               nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
 
                barrier();
        } else {
index d617da9bd35125fe510dd4b7aa9a7332f4066bce..e3939d13484ef4358453c25127d8e26134c47ec6 100644 (file)
@@ -55,7 +55,6 @@ static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev);
  * nes_alloc_mw
  */
 static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
-       unsigned long flags;
        struct nes_pd *nespd = to_nespd(ibpd);
        struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
        struct nes_device *nesdev = nesvnic->nesdev;
@@ -119,7 +118,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
        set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
 
        atomic_set(&cqp_request->refcount, 2);
-       nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+       nes_post_cqp_request(nesdev, cqp_request);
 
        /* Wait for CQP */
        ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
@@ -128,15 +127,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
                        " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
                        stag, ret, cqp_request->major_code, cqp_request->minor_code);
        if ((!ret) || (cqp_request->major_code)) {
-               if (atomic_dec_and_test(&cqp_request->refcount)) {
-                       if (cqp_request->dynamic) {
-                               kfree(cqp_request);
-                       } else {
-                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                       }
-               }
+               nes_put_cqp_request(nesdev, cqp_request);
                kfree(nesmr);
                nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
                if (!ret) {
@@ -144,17 +135,8 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
                } else {
                        return ERR_PTR(-ENOMEM);
                }
-       } else {
-               if (atomic_dec_and_test(&cqp_request->refcount)) {
-                       if (cqp_request->dynamic) {
-                               kfree(cqp_request);
-                       } else {
-                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                       }
-               }
        }
+       nes_put_cqp_request(nesdev, cqp_request);
 
        nesmr->ibmw.rkey = stag;
        nesmr->mode = IWNES_MEMREG_TYPE_MW;
@@ -178,7 +160,6 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
        struct nes_hw_cqp_wqe *cqp_wqe;
        struct nes_cqp_request *cqp_request;
        int err = 0;
-       unsigned long flags;
        int ret;
 
        /* Deallocate the window with the adapter */
@@ -194,7 +175,7 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
        set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ibmw->rkey);
 
        atomic_set(&cqp_request->refcount, 2);
-       nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+       nes_post_cqp_request(nesdev, cqp_request);
 
        /* Wait for CQP */
        nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X to complete.\n",
@@ -204,32 +185,12 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
        nes_debug(NES_DBG_MR, "Deallocate STag completed, wait_event_timeout ret = %u,"
                        " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
                        ret, cqp_request->major_code, cqp_request->minor_code);
-       if ((!ret) || (cqp_request->major_code)) {
-               if (atomic_dec_and_test(&cqp_request->refcount)) {
-                       if (cqp_request->dynamic) {
-                               kfree(cqp_request);
-                       } else {
-                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                       }
-               }
-               if (!ret) {
-                       err = -ETIME;
-               } else {
-                       err = -EIO;
-               }
-       } else {
-               if (atomic_dec_and_test(&cqp_request->refcount)) {
-                       if (cqp_request->dynamic) {
-                               kfree(cqp_request);
-                       } else {
-                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                       }
-               }
-       }
+       if (!ret)
+               err = -ETIME;
+       else if (cqp_request->major_code)
+               err = -EIO;
+
+       nes_put_cqp_request(nesdev, cqp_request);
 
        nes_free_resource(nesadapter, nesadapter->allocated_mrs,
                        (ibmw->rkey & 0x0fffff00) >> 8);
@@ -516,7 +477,7 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd,
                        (nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used);
 
        atomic_set(&cqp_request->refcount, 2);
-       nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+       nes_post_cqp_request(nesdev, cqp_request);
 
        /* Wait for CQP */
        ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
@@ -526,29 +487,11 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd,
                        stag, ret, cqp_request->major_code, cqp_request->minor_code);
 
        if ((!ret) || (cqp_request->major_code)) {
-               if (atomic_dec_and_test(&cqp_request->refcount)) {
-                       if (cqp_request->dynamic) {
-                               kfree(cqp_request);
-                       } else {
-                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                       }
-               }
+               nes_put_cqp_request(nesdev, cqp_request);
                ret = (!ret) ? -ETIME : -EIO;
                goto failed_leaf_vpbl_pages_alloc;
-       } else {
-               if (atomic_dec_and_test(&cqp_request->refcount)) {
-                       if (cqp_request->dynamic) {
-                               kfree(cqp_request);
-                       } else {
-                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                       }
-               }
        }
-
+       nes_put_cqp_request(nesdev, cqp_request);
        nesfmr->nesmr.ibfmr.lkey = stag;
        nesfmr->nesmr.ibfmr.rkey = stag;
        nesfmr->attr = *ibfmr_attr;
@@ -1474,7 +1417,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
                        set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
 
                        atomic_set(&cqp_request->refcount, 2);
-                       nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+                       nes_post_cqp_request(nesdev, cqp_request);
 
                        /* Wait for CQP */
                        nes_debug(NES_DBG_QP, "Waiting for create iWARP QP%u to complete.\n",
@@ -1487,15 +1430,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
                                        nesqp->hwqp.qp_id, ret, nesdev->cqp.sq_head, nesdev->cqp.sq_tail,
                                        cqp_request->major_code, cqp_request->minor_code);
                        if ((!ret) || (cqp_request->major_code)) {
-                               if (atomic_dec_and_test(&cqp_request->refcount)) {
-                                       if (cqp_request->dynamic) {
-                                               kfree(cqp_request);
-                                       } else {
-                                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                                       }
-                               }
+                               nes_put_cqp_request(nesdev, cqp_request);
                                nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
                                nes_free_qp_mem(nesdev, nesqp,virt_wqs);
                                kfree(nesqp->allocated_buffer);
@@ -1504,18 +1439,10 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
                                } else {
                                        return ERR_PTR(-EIO);
                                }
-                       } else {
-                               if (atomic_dec_and_test(&cqp_request->refcount)) {
-                                       if (cqp_request->dynamic) {
-                                               kfree(cqp_request);
-                                       } else {
-                                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                                       }
-                               }
                        }
 
+                       nes_put_cqp_request(nesdev, cqp_request);
+
                        if (ibpd->uobject) {
                                uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index;
                                uresp.actual_sq_size = sq_size;
@@ -1817,7 +1744,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
                        cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
 
        atomic_set(&cqp_request->refcount, 2);
-       nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+       nes_post_cqp_request(nesdev, cqp_request);
 
        /* Wait for CQP */
        nes_debug(NES_DBG_CQ, "Waiting for create iWARP CQ%u to complete.\n",
@@ -1827,32 +1754,15 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
        nes_debug(NES_DBG_CQ, "Create iWARP CQ%u completed, wait_event_timeout ret = %d.\n",
                        nescq->hw_cq.cq_number, ret);
        if ((!ret) || (cqp_request->major_code)) {
-               if (atomic_dec_and_test(&cqp_request->refcount)) {
-                       if (cqp_request->dynamic) {
-                               kfree(cqp_request);
-                       } else {
-                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                       }
-               }
+               nes_put_cqp_request(nesdev, cqp_request);
                if (!context)
                        pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
                                        nescq->hw_cq.cq_pbase);
                nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
                kfree(nescq);
                return ERR_PTR(-EIO);
-       } else {
-               if (atomic_dec_and_test(&cqp_request->refcount)) {
-                       if (cqp_request->dynamic) {
-                               kfree(cqp_request);
-                       } else {
-                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                       }
-               }
        }
+       nes_put_cqp_request(nesdev, cqp_request);
 
        if (context) {
                /* free the nespbl */
@@ -1931,7 +1841,7 @@ static int nes_destroy_cq(struct ib_cq *ib_cq)
                (nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16)));
        nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number);
        atomic_set(&cqp_request->refcount, 2);
-       nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+       nes_post_cqp_request(nesdev, cqp_request);
 
        /* Wait for CQP */
        nes_debug(NES_DBG_CQ, "Waiting for destroy iWARP CQ%u to complete.\n",
@@ -1942,37 +1852,18 @@ static int nes_destroy_cq(struct ib_cq *ib_cq)
                        " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
                        nescq->hw_cq.cq_number, ret, cqp_request->major_code,
                        cqp_request->minor_code);
-       if ((!ret) || (cqp_request->major_code)) {
-               if (atomic_dec_and_test(&cqp_request->refcount)) {
-                       if (cqp_request->dynamic) {
-                               kfree(cqp_request);
-                       } else {
-                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                       }
-               }
-               if (!ret) {
-                       nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n",
+       if (!ret) {
+               nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n",
                                        nescq->hw_cq.cq_number);
-                       ret = -ETIME;
-               } else {
-                       nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n",
+               ret = -ETIME;
+       } else if (cqp_request->major_code) {
+               nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n",
                                        nescq->hw_cq.cq_number);
-                       ret = -EIO;
-               }
+               ret = -EIO;
        } else {
                ret = 0;
-               if (atomic_dec_and_test(&cqp_request->refcount)) {
-                       if (cqp_request->dynamic) {
-                               kfree(cqp_request);
-                       } else {
-                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                       }
-               }
        }
+       nes_put_cqp_request(nesdev, cqp_request);
 
        if (nescq->cq_mem_size)
                pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size,
@@ -2096,7 +1987,7 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
        barrier();
 
        atomic_set(&cqp_request->refcount, 2);
-       nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+       nes_post_cqp_request(nesdev, cqp_request);
 
        /* Wait for CQP */
        ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
@@ -2105,15 +1996,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
                        " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
                        stag, ret, cqp_request->major_code, cqp_request->minor_code);
        major_code = cqp_request->major_code;
-       if (atomic_dec_and_test(&cqp_request->refcount)) {
-               if (cqp_request->dynamic) {
-                       kfree(cqp_request);
-               } else {
-                       spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                       list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                       spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-               }
-       }
+       nes_put_cqp_request(nesdev, cqp_request);
+
        if (!ret)
                return -ETIME;
        else if (major_code)
@@ -2754,7 +2638,7 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
        set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ib_mr->rkey);
 
        atomic_set(&cqp_request->refcount, 2);
-       nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+       nes_post_cqp_request(nesdev, cqp_request);
 
        /* Wait for CQP */
        nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X completed\n", ib_mr->rkey);
@@ -2771,15 +2655,9 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
 
        major_code = cqp_request->major_code;
        minor_code = cqp_request->minor_code;
-       if (atomic_dec_and_test(&cqp_request->refcount)) {
-               if (cqp_request->dynamic) {
-                       kfree(cqp_request);
-               } else {
-                       spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                       list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                       spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-               }
-       }
+
+       nes_put_cqp_request(nesdev, cqp_request);
+
        if (!ret) {
                nes_debug(NES_DBG_MR, "Timeout waiting to destroy STag,"
                                " ib_mr=%p, rkey = 0x%08X\n",
@@ -2904,7 +2782,6 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
        /* struct iw_cm_id *cm_id = nesqp->cm_id; */
        /* struct iw_cm_event cm_event; */
        struct nes_cqp_request *cqp_request;
-       unsigned long flags;
        int ret;
        u16 major_code;
 
@@ -2932,7 +2809,7 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
        set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase);
 
        atomic_set(&cqp_request->refcount, 2);
-       nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+       nes_post_cqp_request(nesdev, cqp_request);
 
        /* Wait for CQP */
        if (wait_completion) {
@@ -2950,15 +2827,9 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
                                        nesqp->hwqp.qp_id, cqp_request->major_code,
                                        cqp_request->minor_code, next_iwarp_state);
                }
-               if (atomic_dec_and_test(&cqp_request->refcount)) {
-                       if (cqp_request->dynamic) {
-                               kfree(cqp_request);
-                       } else {
-                               spin_lock_irqsave(&nesdev->cqp.lock, flags);
-                               list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                               spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-                       }
-               }
+
+               nes_put_cqp_request(nesdev, cqp_request);
+
                if (!ret)
                        return -ETIME;
                else if (major_code)
index 1f76bad020f3f1799025bf7fbad08f1acc7d9489..691525cf394a34f4b2e4f68bf9ee5ee0f6b1cb85 100644 (file)
@@ -1,6 +1,7 @@
 config INFINIBAND_IPOIB
        tristate "IP-over-InfiniBand"
        depends on NETDEVICES && INET && (IPV6 || IPV6=n)
+       select INET_LRO
        ---help---
          Support for the IP-over-InfiniBand protocol (IPoIB). This
          transports IP packets over InfiniBand so you can use your IB
index ca126fc2b853c14a3d61f24e731f2f304ad9530d..b0ffc9abe8c082fa1c32e112e8aa50d9e06bca83 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ipoib.h 1358 2004-12-17 22:00:11Z roland $
  */
 
 #ifndef _IPOIB_H
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_sa.h>
+#include <linux/inet_lro.h>
 
 /* constants */
 
+enum ipoib_flush_level {
+       IPOIB_FLUSH_LIGHT,
+       IPOIB_FLUSH_NORMAL,
+       IPOIB_FLUSH_HEAVY
+};
+
 enum {
        IPOIB_ENCAP_LEN           = 4,
 
@@ -65,8 +70,8 @@ enum {
        IPOIB_CM_BUF_SIZE         = IPOIB_CM_MTU  + IPOIB_ENCAP_LEN,
        IPOIB_CM_HEAD_SIZE        = IPOIB_CM_BUF_SIZE % PAGE_SIZE,
        IPOIB_CM_RX_SG            = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
-       IPOIB_RX_RING_SIZE        = 128,
-       IPOIB_TX_RING_SIZE        = 64,
+       IPOIB_RX_RING_SIZE        = 256,
+       IPOIB_TX_RING_SIZE        = 128,
        IPOIB_MAX_QUEUE_SIZE      = 8192,
        IPOIB_MIN_QUEUE_SIZE      = 2,
        IPOIB_CM_MAX_CONN_QP      = 4096,
@@ -84,7 +89,6 @@ enum {
        IPOIB_FLAG_SUBINTERFACE   = 5,
        IPOIB_MCAST_RUN           = 6,
        IPOIB_STOP_REAPER         = 7,
-       IPOIB_MCAST_STARTED       = 8,
        IPOIB_FLAG_ADMIN_CM       = 9,
        IPOIB_FLAG_UMCAST         = 10,
        IPOIB_FLAG_CSUM           = 11,
@@ -96,7 +100,11 @@ enum {
        IPOIB_MCAST_FLAG_BUSY     = 2,  /* joining or already joined */
        IPOIB_MCAST_FLAG_ATTACHED = 3,
 
+       IPOIB_MAX_LRO_DESCRIPTORS = 8,
+       IPOIB_LRO_MAX_AGGR        = 64,
+
        MAX_SEND_CQE              = 16,
+       IPOIB_CM_COPYBREAK        = 256,
 };
 
 #define        IPOIB_OP_RECV   (1ul << 31)
@@ -149,6 +157,11 @@ struct ipoib_tx_buf {
        u64             mapping[MAX_SKB_FRAGS + 1];
 };
 
+struct ipoib_cm_tx_buf {
+       struct sk_buff *skb;
+       u64             mapping;
+};
+
 struct ib_cm_id;
 
 struct ipoib_cm_data {
@@ -207,7 +220,7 @@ struct ipoib_cm_tx {
        struct net_device   *dev;
        struct ipoib_neigh  *neigh;
        struct ipoib_path   *path;
-       struct ipoib_tx_buf *tx_ring;
+       struct ipoib_cm_tx_buf *tx_ring;
        unsigned             tx_head;
        unsigned             tx_tail;
        unsigned long        flags;
@@ -249,6 +262,11 @@ struct ipoib_ethtool_st {
        u16     max_coalesced_frames;
 };
 
+struct ipoib_lro {
+       struct net_lro_mgr lro_mgr;
+       struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS];
+};
+
 /*
  * Device private locking: tx_lock protects members used in TX fast
  * path (and we use LLTX so upper layers don't do extra locking).
@@ -264,7 +282,6 @@ struct ipoib_dev_priv {
 
        unsigned long flags;
 
-       struct mutex mcast_mutex;
        struct mutex vlan_mutex;
 
        struct rb_root  path_tree;
@@ -276,10 +293,11 @@ struct ipoib_dev_priv {
 
        struct delayed_work pkey_poll_task;
        struct delayed_work mcast_task;
-       struct work_struct flush_task;
+       struct work_struct flush_light;
+       struct work_struct flush_normal;
+       struct work_struct flush_heavy;
        struct work_struct restart_task;
        struct delayed_work ah_reap_task;
-       struct work_struct pkey_event_task;
 
        struct ib_device *ca;
        u8                port;
@@ -335,6 +353,8 @@ struct ipoib_dev_priv {
        int     hca_caps;
        struct ipoib_ethtool_st ethtool;
        struct timer_list poll_timer;
+
+       struct ipoib_lro lro;
 };
 
 struct ipoib_ah {
@@ -359,6 +379,7 @@ struct ipoib_path {
 
        struct rb_node        rb_node;
        struct list_head      list;
+       int                   valid;
 };
 
 struct ipoib_neigh {
@@ -423,11 +444,14 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
                struct ipoib_ah *address, u32 qpn);
 void ipoib_reap_ah(struct work_struct *work);
 
+void ipoib_mark_paths_invalid(struct net_device *dev);
 void ipoib_flush_paths(struct net_device *dev);
 struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
 
 int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
-void ipoib_ib_dev_flush(struct work_struct *work);
+void ipoib_ib_dev_flush_light(struct work_struct *work);
+void ipoib_ib_dev_flush_normal(struct work_struct *work);
+void ipoib_ib_dev_flush_heavy(struct work_struct *work);
 void ipoib_pkey_event(struct work_struct *work);
 void ipoib_ib_dev_cleanup(struct net_device *dev);
 
@@ -466,9 +490,7 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
 #endif
 
 int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
-                      union ib_gid *mgid);
-int ipoib_mcast_detach(struct net_device *dev, u16 mlid,
-                      union ib_gid *mgid);
+                      union ib_gid *mgid, int set_qkey);
 
 int ipoib_init_qp(struct net_device *dev);
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
index 97e67d36378fa1532cb2224624ae20ea53acefe2..0f2d3045061a0e024e75cc14e12b1628a09779b3 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id$
  */
 
 #include <rdma/ib_cm.h>
@@ -113,18 +111,20 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
 }
 
 static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
-                                       struct ipoib_cm_rx *rx, int id)
+                                       struct ipoib_cm_rx *rx,
+                                       struct ib_recv_wr *wr,
+                                       struct ib_sge *sge, int id)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ib_recv_wr *bad_wr;
        int i, ret;
 
-       priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
+       wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
 
        for (i = 0; i < IPOIB_CM_RX_SG; ++i)
-               priv->cm.rx_sge[i].addr = rx->rx_ring[id].mapping[i];
+               sge[i].addr = rx->rx_ring[id].mapping[i];
 
-       ret = ib_post_recv(rx->qp, &priv->cm.rx_wr, &bad_wr);
+       ret = ib_post_recv(rx->qp, wr, &bad_wr);
        if (unlikely(ret)) {
                ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
                ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
@@ -322,10 +322,33 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
        return 0;
 }
 
+static void ipoib_cm_init_rx_wr(struct net_device *dev,
+                               struct ib_recv_wr *wr,
+                               struct ib_sge *sge)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       int i;
+
+       for (i = 0; i < priv->cm.num_frags; ++i)
+               sge[i].lkey = priv->mr->lkey;
+
+       sge[0].length = IPOIB_CM_HEAD_SIZE;
+       for (i = 1; i < priv->cm.num_frags; ++i)
+               sge[i].length = PAGE_SIZE;
+
+       wr->next    = NULL;
+       wr->sg_list = priv->cm.rx_sge;
+       wr->num_sge = priv->cm.num_frags;
+}
+
 static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
                                   struct ipoib_cm_rx *rx)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct {
+               struct ib_recv_wr wr;
+               struct ib_sge sge[IPOIB_CM_RX_SG];
+       } *t;
        int ret;
        int i;
 
@@ -333,6 +356,14 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
        if (!rx->rx_ring)
                return -ENOMEM;
 
+       t = kmalloc(sizeof *t, GFP_KERNEL);
+       if (!t) {
+               ret = -ENOMEM;
+               goto err_free;
+       }
+
+       ipoib_cm_init_rx_wr(dev, &t->wr, t->sge);
+
        spin_lock_irq(&priv->lock);
 
        if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) {
@@ -351,8 +382,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
                        ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
                                ret = -ENOMEM;
                                goto err_count;
-                       }
-               ret = ipoib_cm_post_receive_nonsrq(dev, rx, i);
+               }
+               ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i);
                if (ret) {
                        ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq "
                                   "failed for buf %d\n", i);
@@ -363,6 +394,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
 
        rx->recv_count = ipoib_recvq_size;
 
+       kfree(t);
+
        return 0;
 
 err_count:
@@ -371,6 +404,7 @@ err_count:
        spin_unlock_irq(&priv->lock);
 
 err_free:
+       kfree(t);
        ipoib_cm_free_rx_ring(dev, rx->rx_ring);
 
        return ret;
@@ -525,6 +559,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
        u64 mapping[IPOIB_CM_RX_SG];
        int frags;
        int has_srq;
+       struct sk_buff *small_skb;
 
        ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
                       wr_id, wc->status);
@@ -579,6 +614,23 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
                }
        }
 
+       if (wc->byte_len < IPOIB_CM_COPYBREAK) {
+               int dlen = wc->byte_len;
+
+               small_skb = dev_alloc_skb(dlen + 12);
+               if (small_skb) {
+                       skb_reserve(small_skb, 12);
+                       ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0],
+                                                  dlen, DMA_FROM_DEVICE);
+                       skb_copy_from_linear_data(skb, small_skb->data, dlen);
+                       ib_dma_sync_single_for_device(priv->ca, rx_ring[wr_id].mapping[0],
+                                                     dlen, DMA_FROM_DEVICE);
+                       skb_put(small_skb, dlen);
+                       skb = small_skb;
+                       goto copied;
+               }
+       }
+
        frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
                                              (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
 
@@ -601,6 +653,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 
        skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
 
+copied:
        skb->protocol = ((struct ipoib_header *) skb->data)->proto;
        skb_reset_mac_header(skb);
        skb_pull(skb, IPOIB_ENCAP_LEN);
@@ -620,7 +673,10 @@ repost:
                        ipoib_warn(priv, "ipoib_cm_post_receive_srq failed "
                                   "for buf %d\n", wr_id);
        } else {
-               if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) {
+               if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p,
+                                                         &priv->cm.rx_wr,
+                                                         priv->cm.rx_sge,
+                                                         wr_id))) {
                        --p->recv_count;
                        ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed "
                                   "for buf %d\n", wr_id);
@@ -647,7 +703,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
 void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
-       struct ipoib_tx_buf *tx_req;
+       struct ipoib_cm_tx_buf *tx_req;
        u64 addr;
 
        if (unlikely(skb->len > tx->mtu)) {
@@ -678,7 +734,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
                return;
        }
 
-       tx_req->mapping[0] = addr;
+       tx_req->mapping = addr;
 
        if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
                               addr, skb->len))) {
@@ -703,7 +759,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_cm_tx *tx = wc->qp->qp_context;
        unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
-       struct ipoib_tx_buf *tx_req;
+       struct ipoib_cm_tx_buf *tx_req;
        unsigned long flags;
 
        ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
@@ -717,7 +773,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 
        tx_req = &tx->tx_ring[wr_id];
 
-       ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, DMA_TO_DEVICE);
+       ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);
 
        /* FIXME: is this right? Shouldn't we only increment on success? */
        ++dev->stats.tx_packets;
@@ -1087,7 +1143,7 @@ err_tx:
 static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
 {
        struct ipoib_dev_priv *priv = netdev_priv(p->dev);
-       struct ipoib_tx_buf *tx_req;
+       struct ipoib_cm_tx_buf *tx_req;
        unsigned long flags;
        unsigned long begin;
 
@@ -1115,7 +1171,7 @@ timeout:
 
        while ((int) p->tx_tail - (int) p->tx_head < 0) {
                tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
-               ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len,
+               ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
                                    DMA_TO_DEVICE);
                dev_kfree_skb_any(tx_req->skb);
                ++p->tx_tail;
@@ -1384,7 +1440,9 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
                ipoib_warn(priv, "enabling connected mode "
                           "will cause multicast packet drops\n");
 
+               rtnl_lock();
                dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO);
+               rtnl_unlock();
                priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
 
                ipoib_flush_paths(dev);
@@ -1393,14 +1451,16 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 
        if (!strcmp(buf, "datagram\n")) {
                clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
-               dev->mtu = min(priv->mcast_mtu, dev->mtu);
-               ipoib_flush_paths(dev);
 
+               rtnl_lock();
                if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) {
                        dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
                        if (priv->hca_caps & IB_DEVICE_UD_TSO)
                                dev->features |= NETIF_F_TSO;
                }
+               dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
+               rtnl_unlock();
+               ipoib_flush_paths(dev);
 
                return count;
        }
@@ -1485,15 +1545,7 @@ int ipoib_cm_dev_init(struct net_device *dev)
                priv->cm.num_frags  = IPOIB_CM_RX_SG;
        }
 
-       for (i = 0; i < priv->cm.num_frags; ++i)
-               priv->cm.rx_sge[i].lkey = priv->mr->lkey;
-
-       priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
-       for (i = 1; i < priv->cm.num_frags; ++i)
-               priv->cm.rx_sge[i].length = PAGE_SIZE;
-       priv->cm.rx_wr.next = NULL;
-       priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
-       priv->cm.rx_wr.num_sge = priv->cm.num_frags;
+       ipoib_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge);
 
        if (ipoib_cm_has_srq(dev)) {
                for (i = 0; i < ipoib_recvq_size; ++i) {
index 10279b79c44d92b9f2384f653fcf9204cf77159d..66af5c1a76e525ec059fd5313bf772c2da8b0a0b 100644 (file)
@@ -86,11 +86,57 @@ static int ipoib_set_coalesce(struct net_device *dev,
        return 0;
 }
 
+static const char ipoib_stats_keys[][ETH_GSTRING_LEN] = {
+       "LRO aggregated", "LRO flushed",
+       "LRO avg aggr", "LRO no desc"
+};
+
+static void ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+       switch (stringset) {
+       case ETH_SS_STATS:
+               memcpy(data, *ipoib_stats_keys, sizeof(ipoib_stats_keys));
+               break;
+       }
+}
+
+static int ipoib_get_sset_count(struct net_device *dev, int sset)
+{
+       switch (sset) {
+       case ETH_SS_STATS:
+               return ARRAY_SIZE(ipoib_stats_keys);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static void ipoib_get_ethtool_stats(struct net_device *dev,
+                               struct ethtool_stats *stats, uint64_t *data)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       int index = 0;
+
+       /* Get LRO statistics */
+       data[index++] = priv->lro.lro_mgr.stats.aggregated;
+       data[index++] = priv->lro.lro_mgr.stats.flushed;
+       if (priv->lro.lro_mgr.stats.flushed)
+               data[index++] = priv->lro.lro_mgr.stats.aggregated /
+                               priv->lro.lro_mgr.stats.flushed;
+       else
+               data[index++] = 0;
+       data[index++] = priv->lro.lro_mgr.stats.no_desc;
+}
+
 static const struct ethtool_ops ipoib_ethtool_ops = {
        .get_drvinfo            = ipoib_get_drvinfo,
        .get_tso                = ethtool_op_get_tso,
        .get_coalesce           = ipoib_get_coalesce,
        .set_coalesce           = ipoib_set_coalesce,
+       .get_flags              = ethtool_op_get_flags,
+       .set_flags              = ethtool_op_set_flags,
+       .get_strings            = ipoib_get_strings,
+       .get_sset_count         = ipoib_get_sset_count,
+       .get_ethtool_stats      = ipoib_get_ethtool_stats,
 };
 
 void ipoib_set_ethtool_ops(struct net_device *dev)
index 8b882bbd1d051ff5fc6673abcad8f927afb24025..961c585da216a9eb3db964b08cfc72b20b981a24 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ipoib_fs.c 1389 2004-12-27 22:56:47Z roland $
  */
 
 #include <linux/err.h>
index f429bce24c20b9be303e75b6dc4ad1cf112a7663..66cafa20c246cea2a024fdaf1bbdaf5b6b26279d 100644 (file)
@@ -31,8 +31,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ipoib_ib.c 1386 2004-12-27 16:23:17Z roland $
  */
 
 #include <linux/delay.h>
@@ -290,7 +288,10 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
        if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok))
                skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-       netif_receive_skb(skb);
+       if (dev->features & NETIF_F_LRO)
+               lro_receive_skb(&priv->lro.lro_mgr, skb, NULL);
+       else
+               netif_receive_skb(skb);
 
 repost:
        if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
@@ -442,6 +443,9 @@ poll_more:
        }
 
        if (done < budget) {
+               if (dev->features & NETIF_F_LRO)
+                       lro_flush_all(&priv->lro.lro_mgr);
+
                netif_rx_complete(dev, napi);
                if (unlikely(ib_req_notify_cq(priv->recv_cq,
                                              IB_CQ_NEXT_COMP |
@@ -898,7 +902,8 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
        return 0;
 }
 
-static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
+static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
+                               enum ipoib_flush_level level)
 {
        struct ipoib_dev_priv *cpriv;
        struct net_device *dev = priv->dev;
@@ -911,7 +916,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
         * the parent is down.
         */
        list_for_each_entry(cpriv, &priv->child_intfs, list)
-               __ipoib_ib_dev_flush(cpriv, pkey_event);
+               __ipoib_ib_dev_flush(cpriv, level);
 
        mutex_unlock(&priv->vlan_mutex);
 
@@ -925,7 +930,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
                return;
        }
 
-       if (pkey_event) {
+       if (level == IPOIB_FLUSH_HEAVY) {
                if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
                        clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
                        ipoib_ib_dev_down(dev, 0);
@@ -943,11 +948,15 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
                priv->pkey_index = new_index;
        }
 
-       ipoib_dbg(priv, "flushing\n");
+       if (level == IPOIB_FLUSH_LIGHT) {
+               ipoib_mark_paths_invalid(dev);
+               ipoib_mcast_dev_flush(dev);
+       }
 
-       ipoib_ib_dev_down(dev, 0);
+       if (level >= IPOIB_FLUSH_NORMAL)
+               ipoib_ib_dev_down(dev, 0);
 
-       if (pkey_event) {
+       if (level == IPOIB_FLUSH_HEAVY) {
                ipoib_ib_dev_stop(dev, 0);
                ipoib_ib_dev_open(dev);
        }
@@ -957,27 +966,34 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
         * we get here, don't bring it back up if it's not configured up
         */
        if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
-               ipoib_ib_dev_up(dev);
+               if (level >= IPOIB_FLUSH_NORMAL)
+                       ipoib_ib_dev_up(dev);
                ipoib_mcast_restart_task(&priv->restart_task);
        }
 }
 
-void ipoib_ib_dev_flush(struct work_struct *work)
+void ipoib_ib_dev_flush_light(struct work_struct *work)
+{
+       struct ipoib_dev_priv *priv =
+               container_of(work, struct ipoib_dev_priv, flush_light);
+
+       __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT);
+}
+
+void ipoib_ib_dev_flush_normal(struct work_struct *work)
 {
        struct ipoib_dev_priv *priv =
-               container_of(work, struct ipoib_dev_priv, flush_task);
+               container_of(work, struct ipoib_dev_priv, flush_normal);
 
-       ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
-       __ipoib_ib_dev_flush(priv, 0);
+       __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL);
 }
 
-void ipoib_pkey_event(struct work_struct *work)
+void ipoib_ib_dev_flush_heavy(struct work_struct *work)
 {
        struct ipoib_dev_priv *priv =
-               container_of(work, struct ipoib_dev_priv, pkey_event_task);
+               container_of(work, struct ipoib_dev_priv, flush_heavy);
 
-       ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name);
-       __ipoib_ib_dev_flush(priv, 1);
+       __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY);
 }
 
 void ipoib_ib_dev_cleanup(struct net_device *dev)
index 2442090ac8d1dbfe083bfca69d46adc2b484c157..8be9ea0436e6bf4751042a644bb78912a319627b 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ipoib_main.c 1377 2004-12-23 19:57:12Z roland $
  */
 
 #include "ipoib.h"
@@ -62,6 +60,15 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
 module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
 
+static int lro;
+module_param(lro, bool, 0444);
+MODULE_PARM_DESC(lro,  "Enable LRO (Large Receive Offload)");
+
+static int lro_max_aggr = IPOIB_LRO_MAX_AGGR;
+module_param(lro_max_aggr, int, 0644);
+MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated "
+               "(default = 64)");
+
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 int ipoib_debug_level;
 
@@ -350,6 +357,23 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
 
 #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
 
+void ipoib_mark_paths_invalid(struct net_device *dev)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_path *path, *tp;
+
+       spin_lock_irq(&priv->lock);
+
+       list_for_each_entry_safe(path, tp, &priv->path_list, list) {
+               ipoib_dbg(priv, "mark path LID 0x%04x GID " IPOIB_GID_FMT " invalid\n",
+                       be16_to_cpu(path->pathrec.dlid),
+                       IPOIB_GID_ARG(path->pathrec.dgid));
+               path->valid =  0;
+       }
+
+       spin_unlock_irq(&priv->lock);
+}
+
 void ipoib_flush_paths(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -386,6 +410,7 @@ static void path_rec_completion(int status,
        struct net_device *dev = path->dev;
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_ah *ah = NULL;
+       struct ipoib_ah *old_ah;
        struct ipoib_neigh *neigh, *tn;
        struct sk_buff_head skqueue;
        struct sk_buff *skb;
@@ -409,6 +434,7 @@ static void path_rec_completion(int status,
 
        spin_lock_irqsave(&priv->lock, flags);
 
+       old_ah   = path->ah;
        path->ah = ah;
 
        if (ah) {
@@ -421,6 +447,17 @@ static void path_rec_completion(int status,
                        __skb_queue_tail(&skqueue, skb);
 
                list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
+                       if (neigh->ah) {
+                               WARN_ON(neigh->ah != old_ah);
+                               /*
+                                * Dropping the ah reference inside
+                                * priv->lock is safe here, because we
+                                * will hold one more reference from
+                                * the original value of path->ah (ie
+                                * old_ah).
+                                */
+                               ipoib_put_ah(neigh->ah);
+                       }
                        kref_get(&path->ah->ref);
                        neigh->ah = path->ah;
                        memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
@@ -443,6 +480,7 @@ static void path_rec_completion(int status,
                        while ((skb = __skb_dequeue(&neigh->queue)))
                                __skb_queue_tail(&skqueue, skb);
                }
+               path->valid = 1;
        }
 
        path->query = NULL;
@@ -450,6 +488,9 @@ static void path_rec_completion(int status,
 
        spin_unlock_irqrestore(&priv->lock, flags);
 
+       if (old_ah)
+               ipoib_put_ah(old_ah);
+
        while ((skb = __skb_dequeue(&skqueue))) {
                skb->dev = dev;
                if (dev_queue_xmit(skb))
@@ -623,8 +664,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
        spin_lock(&priv->lock);
 
        path = __path_find(dev, phdr->hwaddr + 4);
-       if (!path) {
-               path = path_rec_create(dev, phdr->hwaddr + 4);
+       if (!path || !path->valid) {
+               if (!path)
+                       path = path_rec_create(dev, phdr->hwaddr + 4);
                if (path) {
                        /* put pseudoheader back on for next time */
                        skb_push(skb, sizeof *phdr);
@@ -938,6 +980,54 @@ static const struct header_ops ipoib_header_ops = {
        .create = ipoib_hard_header,
 };
 
+static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
+                      void **tcph, u64 *hdr_flags, void *priv)
+{
+       unsigned int ip_len;
+       struct iphdr *iph;
+
+       if (unlikely(skb->protocol != htons(ETH_P_IP)))
+               return -1;
+
+       /*
+        * In the future we may add an else clause that verifies the
+        * checksum and allows devices which do not calculate checksum
+        * to use LRO.
+        */
+       if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY))
+               return -1;
+
+       /* Check for non-TCP packet */
+       skb_reset_network_header(skb);
+       iph = ip_hdr(skb);
+       if (iph->protocol != IPPROTO_TCP)
+               return -1;
+
+       ip_len = ip_hdrlen(skb);
+       skb_set_transport_header(skb, ip_len);
+       *tcph = tcp_hdr(skb);
+
+       /* check if IP header and TCP header are complete */
+       if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
+               return -1;
+
+       *hdr_flags = LRO_IPV4 | LRO_TCP;
+       *iphdr = iph;
+
+       return 0;
+}
+
+static void ipoib_lro_setup(struct ipoib_dev_priv *priv)
+{
+       priv->lro.lro_mgr.max_aggr       = lro_max_aggr;
+       priv->lro.lro_mgr.max_desc       = IPOIB_MAX_LRO_DESCRIPTORS;
+       priv->lro.lro_mgr.lro_arr        = priv->lro.lro_desc;
+       priv->lro.lro_mgr.get_skb_header = get_skb_hdr;
+       priv->lro.lro_mgr.features       = LRO_F_NAPI;
+       priv->lro.lro_mgr.dev            = priv->dev;
+       priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
+}
+
 static void ipoib_setup(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -977,10 +1067,11 @@ static void ipoib_setup(struct net_device *dev)
 
        priv->dev = dev;
 
+       ipoib_lro_setup(priv);
+
        spin_lock_init(&priv->lock);
        spin_lock_init(&priv->tx_lock);
 
-       mutex_init(&priv->mcast_mutex);
        mutex_init(&priv->vlan_mutex);
 
        INIT_LIST_HEAD(&priv->path_list);
@@ -989,9 +1080,10 @@ static void ipoib_setup(struct net_device *dev)
        INIT_LIST_HEAD(&priv->multicast_list);
 
        INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
-       INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
        INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
-       INIT_WORK(&priv->flush_task,   ipoib_ib_dev_flush);
+       INIT_WORK(&priv->flush_light,   ipoib_ib_dev_flush_light);
+       INIT_WORK(&priv->flush_normal,   ipoib_ib_dev_flush_normal);
+       INIT_WORK(&priv->flush_heavy,   ipoib_ib_dev_flush_heavy);
        INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
        INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
 }
@@ -1154,6 +1246,9 @@ static struct net_device *ipoib_add_port(const char *format,
                priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
        }
 
+       if (lro)
+               priv->dev->features |= NETIF_F_LRO;
+
        /*
         * Set the full membership bit, so that we join the right
         * broadcast group, etc.
@@ -1304,6 +1399,12 @@ static int __init ipoib_init_module(void)
        ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
 #endif
 
+       /*
+        * When copying small received packets, we only copy from the
+        * linear data part of the SKB, so we rely on this condition.
+        */
+       BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE);
+
        ret = ipoib_register_debugfs();
        if (ret)
                return ret;
index 3f663fb852c1c627462807fd4348649eeda451d1..1fcc9a898d81c8fefd4ceb852926221fb3261da8 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $
  */
 
 #include <linux/skbuff.h>
@@ -188,6 +186,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_ah *ah;
        int ret;
+       int set_qkey = 0;
 
        mcast->mcmember = *mcmember;
 
@@ -202,6 +201,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
                priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
                spin_unlock_irq(&priv->lock);
                priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
+               set_qkey = 1;
        }
 
        if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -214,7 +214,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
                }
 
                ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
-                                        &mcast->mcmember.mgid);
+                                        &mcast->mcmember.mgid, set_qkey);
                if (ret < 0) {
                        ipoib_warn(priv, "couldn't attach QP to multicast group "
                                   IPOIB_GID_FMT "\n",
@@ -575,8 +575,11 @@ void ipoib_mcast_join_task(struct work_struct *work)
 
        priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
 
-       if (!ipoib_cm_admin_enabled(dev))
-               dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
+       if (!ipoib_cm_admin_enabled(dev)) {
+               rtnl_lock();
+               dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
+               rtnl_unlock();
+       }
 
        ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
 
@@ -594,10 +597,6 @@ int ipoib_mcast_start_thread(struct net_device *dev)
                queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
        mutex_unlock(&mcast_mutex);
 
-       spin_lock_irq(&priv->lock);
-       set_bit(IPOIB_MCAST_STARTED, &priv->flags);
-       spin_unlock_irq(&priv->lock);
-
        return 0;
 }
 
@@ -607,10 +606,6 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
 
        ipoib_dbg_mcast(priv, "stopping multicast thread\n");
 
-       spin_lock_irq(&priv->lock);
-       clear_bit(IPOIB_MCAST_STARTED, &priv->flags);
-       spin_unlock_irq(&priv->lock);
-
        mutex_lock(&mcast_mutex);
        clear_bit(IPOIB_MCAST_RUN, &priv->flags);
        cancel_delayed_work(&priv->mcast_task);
@@ -635,10 +630,10 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
                                IPOIB_GID_ARG(mcast->mcmember.mgid));
 
                /* Remove ourselves from the multicast group */
-               ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
-                                        &mcast->mcmember.mgid);
+               ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
+                                     be16_to_cpu(mcast->mcmember.mlid));
                if (ret)
-                       ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
+                       ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
        }
 
        return 0;
index 8766d29ce3b783275d6b6753620cdc0291855186..68325119f740da879a3d9deaa6095907ee3554bf 100644 (file)
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include "ipoib.h"
 
-int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
+int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
-       struct ib_qp_attr *qp_attr;
+       struct ib_qp_attr *qp_attr = NULL;
        int ret;
        u16 pkey_index;
 
-       ret = -ENOMEM;
-       qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
-       if (!qp_attr)
-               goto out;
-
        if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
                clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
                ret = -ENXIO;
@@ -54,18 +47,23 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
        }
        set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 
-       /* set correct QKey for QP */
-       qp_attr->qkey = priv->qkey;
-       ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
-       if (ret) {
-               ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
-               goto out;
+       if (set_qkey) {
+               ret = -ENOMEM;
+               qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
+               if (!qp_attr)
+                       goto out;
+
+               /* set correct QKey for QP */
+               qp_attr->qkey = priv->qkey;
+               ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
+               if (ret) {
+                       ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
+                       goto out;
+               }
        }
 
        /* attach QP to multicast group */
-       mutex_lock(&priv->mcast_mutex);
        ret = ib_attach_mcast(priv->qp, mgid, mlid);
-       mutex_unlock(&priv->mcast_mutex);
        if (ret)
                ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret);
 
@@ -74,20 +72,6 @@ out:
        return ret;
 }
 
-int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
-{
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
-       int ret;
-
-       mutex_lock(&priv->mcast_mutex);
-       ret = ib_detach_mcast(priv->qp, mgid, mlid);
-       mutex_unlock(&priv->mcast_mutex);
-       if (ret)
-               ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
-
-       return ret;
-}
-
 int ipoib_init_qp(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -201,7 +185,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
        init_attr.recv_cq = priv->recv_cq;
 
        if (priv->hca_caps & IB_DEVICE_UD_TSO)
-               init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO;
+               init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
+
+       if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
+               init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
 
        if (dev->features & NETIF_F_SG)
                init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
@@ -289,15 +276,17 @@ void ipoib_event(struct ib_event_handler *handler,
        if (record->element.port_num != priv->port)
                return;
 
-       if (record->event == IB_EVENT_PORT_ERR    ||
-           record->event == IB_EVENT_PORT_ACTIVE ||
-           record->event == IB_EVENT_LID_CHANGE  ||
-           record->event == IB_EVENT_SM_CHANGE   ||
+       ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event,
+                 record->device->name, record->element.port_num);
+
+       if (record->event == IB_EVENT_SM_CHANGE ||
            record->event == IB_EVENT_CLIENT_REREGISTER) {
-               ipoib_dbg(priv, "Port state change event\n");
-               queue_work(ipoib_workqueue, &priv->flush_task);
+               queue_work(ipoib_workqueue, &priv->flush_light);
+       } else if (record->event == IB_EVENT_PORT_ERR ||
+                  record->event == IB_EVENT_PORT_ACTIVE ||
+                  record->event == IB_EVENT_LID_CHANGE) {
+               queue_work(ipoib_workqueue, &priv->flush_normal);
        } else if (record->event == IB_EVENT_PKEY_CHANGE) {
-               ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port);
-               queue_work(ipoib_workqueue, &priv->pkey_event_task);
+               queue_work(ipoib_workqueue, &priv->flush_heavy);
        }
 }
index 1cdb5cfb0ff12f3cce46c149650cdd3e53f4bb87..b08eb56196d301eb9ecd34797286339669f47890 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ipoib_vlan.c 1349 2004-12-16 21:09:43Z roland $
  */
 
 #include <linux/module.h>
index aeb58cae9a3fa4ca8aaec0a949f2cb72f4815aa9..356fac6d105abbf7c842ad56f99f2e6d716d4a48 100644 (file)
@@ -42,9 +42,6 @@
  *     Zhenyu Wang
  * Modified by:
  *      Erez Zilber
- *
- *
- * $Id: iscsi_iser.c 6965 2006-05-07 11:36:20Z ogerlitz $
  */
 
 #include <linux/types.h>
index a8c1b300e34d54d76eb18eeb646db24b2c34b598..0e10703cf59e5942dbe003bd9d894e7c3af4f048 100644 (file)
@@ -36,8 +36,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: iscsi_iser.h 7051 2006-05-10 12:29:11Z ogerlitz $
  */
 #ifndef __ISCSI_ISER_H__
 #define __ISCSI_ISER_H__
index 08dc81c46f41eac81f223177ca9f1e50ed64d285..31ad498bdc51118f0418349c974e3018ccc6f1fb 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: iser_initiator.c 6964 2006-05-07 11:11:43Z ogerlitz $
  */
 #include <linux/kernel.h>
 #include <linux/slab.h>
index cac50c4dc159df8c1202640d6bc0ab13e6b8cc22..81e49cb10ed36b517f1636c408bc86f5ec211261 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: iser_memory.c 6964 2006-05-07 11:11:43Z ogerlitz $
  */
 #include <linux/module.h>
 #include <linux/kernel.h>
index d19cfe605ebb082786e842515bcad80dfab41bde..77cabee7cc088002a8d3b3f02d6f2b3ed441bb1d 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: iser_verbs.c 7051 2006-05-10 12:29:11Z ogerlitz $
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
index 435145709dd624395cf92cd2414364e58983bd89..ed7c5f72cb8bc24cc02100a1e7847f88f9b9e5b7 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ib_srp.c 3932 2005-11-01 17:19:29Z roland $
  */
 
 #include <linux/module.h>
@@ -49,8 +47,6 @@
 #include <scsi/srp.h>
 #include <scsi/scsi_transport_srp.h>
 
-#include <rdma/ib_cache.h>
-
 #include "ib_srp.h"
 
 #define DRV_NAME       "ib_srp"
@@ -183,10 +179,10 @@ static int srp_init_qp(struct srp_target_port *target,
        if (!attr)
                return -ENOMEM;
 
-       ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
-                                 target->srp_host->port,
-                                 be16_to_cpu(target->path.pkey),
-                                 &attr->pkey_index);
+       ret = ib_find_pkey(target->srp_host->srp_dev->dev,
+                          target->srp_host->port,
+                          be16_to_cpu(target->path.pkey),
+                          &attr->pkey_index);
        if (ret)
                goto out;
 
@@ -1883,8 +1879,7 @@ static ssize_t srp_create_target(struct device *dev,
        if (ret)
                goto err;
 
-       ib_get_cached_gid(host->srp_dev->dev, host->port, 0,
-                         &target->path.sgid);
+       ib_query_gid(host->srp_dev->dev, host->port, 0, &target->path.sgid);
 
        shost_printk(KERN_DEBUG, target->scsi_host, PFX
                     "new target: id_ext %016llx ioc_guid %016llx pkey %04x "
index 63d2ae72406181338dc248c6554bf341205bee1e..e185b907fc12f7eb1ff1e2f1ac2791e501844b69 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ib_srp.h 3932 2005-11-01 17:19:29Z roland $
  */
 
 #ifndef IB_SRP_H
index 6c4f32066919b89271678151d763d9278eed7368..ed0ecd9679cbff52c6875f443b9a5c4b1be92455 100644 (file)
@@ -54,6 +54,7 @@ enum {
        RDMA_CQ_DISABLE         = 16,
        RDMA_CTRL_QP_SETUP      = 17,
        RDMA_GET_MEM            = 18,
+       RDMA_GET_MIB            = 19,
 
        GET_RX_PAGE_INFO        = 50,
 };
index ff9c013ce5357114f6aff22456cb45e3e020659b..cf26968737966ddff1b52d0052644954f234f42f 100644 (file)
@@ -303,6 +303,12 @@ static int cxgb_rdma_ctl(struct adapter *adapter, unsigned int req, void *data)
                spin_unlock_irq(&adapter->sge.reg_lock);
                break;
        }
+       case RDMA_GET_MIB: {
+               spin_lock(&adapter->stats_lock);
+               t3_tp_get_mib_stats(adapter, (struct tp_mib_stats *)data);
+               spin_unlock(&adapter->stats_lock);
+               break;
+       }
        default:
                ret = -EOPNOTSUPP;
        }
@@ -381,6 +387,7 @@ static int cxgb_offload_ctl(struct t3cdev *tdev, unsigned int req, void *data)
        case RDMA_CQ_DISABLE:
        case RDMA_CTRL_QP_SETUP:
        case RDMA_GET_MEM:
+       case RDMA_GET_MIB:
                if (!offload_running(adapter))
                        return -EAGAIN;
                return cxgb_rdma_ctl(adapter, req, data);
index a0177fc55e28a9b241dbfdbd39fa5df1f24c268e..29db711303b9040070aa6c2bc54f7295ed520a24 100644 (file)
@@ -38,7 +38,7 @@
 #define DRV_VERSION "1.0-ko"
 
 /* Firmware version */
-#define FW_VERSION_MAJOR 6
+#define FW_VERSION_MAJOR 7
 #define FW_VERSION_MINOR 0
 #define FW_VERSION_MICRO 0
 #endif                         /* __CHELSIO_VERSION_H */
index d82f2751d2c713e4a7f97579a9881b0d27a16129..2b5006b9be67964239fa0e2b0ebc87df7050ffdf 100644 (file)
@@ -101,6 +101,34 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
                        mlx4_dbg(dev, "    %s\n", fname[i]);
 }
 
+int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg)
+{
+       struct mlx4_cmd_mailbox *mailbox;
+       u32 *inbox;
+       int err = 0;
+
+#define MOD_STAT_CFG_IN_SIZE           0x100
+
+#define MOD_STAT_CFG_PG_SZ_M_OFFSET    0x002
+#define MOD_STAT_CFG_PG_SZ_OFFSET      0x003
+
+       mailbox = mlx4_alloc_cmd_mailbox(dev);
+       if (IS_ERR(mailbox))
+               return PTR_ERR(mailbox);
+       inbox = mailbox->buf;
+
+       memset(inbox, 0, MOD_STAT_CFG_IN_SIZE);
+
+       MLX4_PUT(inbox, cfg->log_pg_sz, MOD_STAT_CFG_PG_SZ_OFFSET);
+       MLX4_PUT(inbox, cfg->log_pg_sz_m, MOD_STAT_CFG_PG_SZ_M_OFFSET);
+
+       err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_MOD_STAT_CFG,
+                       MLX4_CMD_TIME_CLASS_A);
+
+       mlx4_free_cmd_mailbox(dev, mailbox);
+       return err;
+}
+
 int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 {
        struct mlx4_cmd_mailbox *mailbox;
index 306cb9b0242d654bda6ba16ba8cc0c728e589af9..a0e046c149b78c18c359826389145ab93e63f556 100644 (file)
 #include "mlx4.h"
 #include "icm.h"
 
+struct mlx4_mod_stat_cfg {
+       u8 log_pg_sz;
+       u8 log_pg_sz_m;
+};
+
 struct mlx4_dev_cap {
        int max_srq_sz;
        int max_qp_sz;
@@ -162,5 +167,6 @@ int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages);
 int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);
 int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev);
 int mlx4_NOP(struct mlx4_dev *dev);
+int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg);
 
 #endif /* MLX4_FW_H */
index a6aa49fc1d68a6654d449bcc6fa90a7ad8f40f23..d3736013fe9b7ca9b46847ebd2d5cae462c9524d 100644 (file)
@@ -485,6 +485,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
        struct mlx4_priv          *priv = mlx4_priv(dev);
        struct mlx4_adapter        adapter;
        struct mlx4_dev_cap        dev_cap;
+       struct mlx4_mod_stat_cfg   mlx4_cfg;
        struct mlx4_profile        profile;
        struct mlx4_init_hca_param init_hca;
        u64 icm_size;
@@ -502,6 +503,12 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
                return err;
        }
 
+       mlx4_cfg.log_pg_sz_m = 1;
+       mlx4_cfg.log_pg_sz = 0;
+       err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
+       if (err)
+               mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
+
        err = mlx4_dev_cap(dev, &dev_cap);
        if (err) {
                mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
index 57f7f1f0d4ecbee919b3cf90b21e04038b8fe3a6..b4b57870ddfdbefc7ecb26ff0167eed4d398e3c5 100644 (file)
@@ -38,6 +38,9 @@
 
 #include "mlx4.h"
 
+#define MGM_QPN_MASK       0x00FFFFFF
+#define MGM_BLCK_LB_BIT    30
+
 struct mlx4_mgm {
        __be32                  next_gid_index;
        __be32                  members_count;
@@ -153,7 +156,8 @@ static int find_mgm(struct mlx4_dev *dev,
        return err;
 }
 
-int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16])
+int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+                         int block_mcast_loopback)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        struct mlx4_cmd_mailbox *mailbox;
@@ -202,13 +206,18 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16])
        }
 
        for (i = 0; i < members_count; ++i)
-               if (mgm->qp[i] == cpu_to_be32(qp->qpn)) {
+               if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) {
                        mlx4_dbg(dev, "QP %06x already a member of MGM\n", qp->qpn);
                        err = 0;
                        goto out;
                }
 
-       mgm->qp[members_count++] = cpu_to_be32(qp->qpn);
+       if (block_mcast_loopback)
+               mgm->qp[members_count++] = cpu_to_be32((qp->qpn & MGM_QPN_MASK) |
+                                                      (1 << MGM_BLCK_LB_BIT));
+       else
+               mgm->qp[members_count++] = cpu_to_be32(qp->qpn & MGM_QPN_MASK);
+
        mgm->members_count       = cpu_to_be32(members_count);
 
        err = mlx4_WRITE_MCG(dev, index, mailbox);
@@ -283,7 +292,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16])
 
        members_count = be32_to_cpu(mgm->members_count);
        for (loc = -1, i = 0; i < members_count; ++i)
-               if (mgm->qp[i] == cpu_to_be32(qp->qpn))
+               if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn)
                        loc = i;
 
        if (loc == -1) {
index 44d8d5163a1a357e2e4a41b22544f28df25823ce..683bce375c7476fde30b15311adb66972d19d6fe 100644 (file)
 
 #define IDESCSI_DEBUG_LOG              0
 
+#if IDESCSI_DEBUG_LOG
+#define debug_log(fmt, args...) \
+       printk(KERN_INFO "ide-scsi: " fmt, ## args)
+#else
+#define debug_log(fmt, args...) do {} while (0)
+#endif
+
 /*
  *     SCSI command transformation layer
  */
@@ -129,14 +136,15 @@ static inline idescsi_scsi_t *drive_to_idescsi(ide_drive_t *ide_drive)
 #define IDESCSI_PC_RQ                  90
 
 /*
- *     PIO data transfer routines using the scatter gather table.
+ *     PIO data transfer routine using the scatter gather table.
  */
-static void idescsi_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-               unsigned int bcount)
+static void ide_scsi_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
+                               unsigned int bcount, int write)
 {
        ide_hwif_t *hwif = drive->hwif;
-       int count;
+       xfer_func_t *xf = write ? hwif->output_data : hwif->input_data;
        char *buf;
+       int count;
 
        while (bcount) {
                count = min(pc->sg->length - pc->b_count, bcount);
@@ -145,13 +153,13 @@ static void idescsi_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
 
                        local_irq_save(flags);
                        buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) +
-                                       pc->sg->offset;
-                       hwif->input_data(drive, NULL, buf + pc->b_count, count);
+                                         pc->sg->offset;
+                       xf(drive, NULL, buf + pc->b_count, count);
                        kunmap_atomic(buf - pc->sg->offset, KM_IRQ0);
                        local_irq_restore(flags);
                } else {
                        buf = sg_virt(pc->sg);
-                       hwif->input_data(drive, NULL, buf + pc->b_count, count);
+                       xf(drive, NULL, buf + pc->b_count, count);
                }
                bcount -= count; pc->b_count += count;
                if (pc->b_count == pc->sg->length) {
@@ -163,51 +171,34 @@ static void idescsi_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
        }
 
        if (bcount) {
-               printk (KERN_ERR "ide-scsi: scatter gather table too small, discarding data\n");
-               ide_pad_transfer(drive, 0, bcount);
+               printk(KERN_ERR "%s: scatter gather table too small, %s\n",
+                               drive->name, write ? "padding with zeros"
+                                                  : "discarding data");
+               ide_pad_transfer(drive, write, bcount);
        }
 }
 
-static void idescsi_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-               unsigned int bcount)
+static void ide_scsi_hex_dump(u8 *data, int len)
 {
-       ide_hwif_t *hwif = drive->hwif;
-       int count;
-       char *buf;
+       print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, data, len, 0);
+}
 
-       while (bcount) {
-               count = min(pc->sg->length - pc->b_count, bcount);
-               if (PageHighMem(sg_page(pc->sg))) {
-                       unsigned long flags;
+static int idescsi_end_request(ide_drive_t *, int, int);
 
-                       local_irq_save(flags);
-                       buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) +
-                                               pc->sg->offset;
-                       hwif->output_data(drive, NULL, buf + pc->b_count, count);
-                       kunmap_atomic(buf - pc->sg->offset, KM_IRQ0);
-                       local_irq_restore(flags);
-               } else {
-                       buf = sg_virt(pc->sg);
-                       hwif->output_data(drive, NULL, buf + pc->b_count, count);
-               }
-               bcount -= count; pc->b_count += count;
-               if (pc->b_count == pc->sg->length) {
-                       if (!--pc->sg_cnt)
-                               break;
-                       pc->sg = sg_next(pc->sg);
-                       pc->b_count = 0;
-               }
-       }
+static void ide_scsi_callback(ide_drive_t *drive)
+{
+       idescsi_scsi_t *scsi = drive_to_idescsi(drive);
+       struct ide_atapi_pc *pc = scsi->pc;
 
-       if (bcount) {
-               printk (KERN_ERR "ide-scsi: scatter gather table too small, padding with zeros\n");
-               ide_pad_transfer(drive, 1, bcount);
-       }
-}
+       if (pc->flags & PC_FLAG_TIMEDOUT)
+               debug_log("%s: got timed out packet %lu at %lu\n", __func__,
+                         pc->scsi_cmd->serial_number, jiffies);
+               /* end this request now - scsi should retry it*/
+       else if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
+               printk(KERN_INFO "Packet command completed, %d bytes"
+                                " transferred\n", pc->xferred);
 
-static void ide_scsi_hex_dump(u8 *data, int len)
-{
-       print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, data, len, 0);
+       idescsi_end_request(drive, 1, 0);
 }
 
 static int idescsi_check_condition(ide_drive_t *drive,
@@ -228,14 +219,16 @@ static int idescsi_check_condition(ide_drive_t *drive,
                kfree(pc);
                return -ENOMEM;
        }
-       ide_init_drive_cmd(rq);
+       blk_rq_init(NULL, rq);
        rq->special = (char *) pc;
        pc->rq = rq;
        pc->buf = buf;
        pc->c[0] = REQUEST_SENSE;
        pc->c[4] = pc->req_xfer = pc->buf_size = SCSI_SENSE_BUFFERSIZE;
        rq->cmd_type = REQ_TYPE_SENSE;
+       rq->cmd_flags |= REQ_PREEMPT;
        pc->timeout = jiffies + WAIT_READY;
+       pc->callback = ide_scsi_callback;
        /* NOTE! Save the failed packet command in "rq->buffer" */
        rq->buffer = (void *) failed_cmd->special;
        pc->scsi_cmd = ((struct ide_atapi_pc *) failed_cmd->special)->scsi_cmd;
@@ -244,11 +237,10 @@ static int idescsi_check_condition(ide_drive_t *drive,
                ide_scsi_hex_dump(pc->c, 6);
        }
        rq->rq_disk = scsi->disk;
-       return ide_do_drive_cmd(drive, rq, ide_preempt);
+       ide_do_drive_cmd(drive, rq);
+       return 0;
 }
 
-static int idescsi_end_request(ide_drive_t *, int, int);
-
 static ide_startstop_t
 idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err)
 {
@@ -256,7 +248,7 @@ idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err)
 
        if (ide_read_status(drive) & (BUSY_STAT | DRQ_STAT))
                /* force an abort */
-               hwif->OUTBSYNC(drive, WIN_IDLEIMMEDIATE,
+               hwif->OUTBSYNC(hwif, WIN_IDLEIMMEDIATE,
                               hwif->io_ports.command_addr);
 
        rq->errors++;
@@ -269,10 +261,9 @@ idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err)
 static ide_startstop_t
 idescsi_atapi_abort(ide_drive_t *drive, struct request *rq)
 {
-#if IDESCSI_DEBUG_LOG
-       printk(KERN_WARNING "idescsi_atapi_abort called for %lu\n",
+       debug_log("%s called for %lu\n", __func__,
                ((struct ide_atapi_pc *) rq->special)->scsi_cmd->serial_number);
-#endif
+
        rq->errors |= ERROR_MAX;
 
        idescsi_end_request(drive, 0, 0);
@@ -351,9 +342,9 @@ static int idescsi_expiry(ide_drive_t *drive)
        idescsi_scsi_t *scsi = drive_to_idescsi(drive);
        struct ide_atapi_pc   *pc   = scsi->pc;
 
-#if IDESCSI_DEBUG_LOG
-       printk(KERN_WARNING "idescsi_expiry called for %lu at %lu\n", pc->scsi_cmd->serial_number, jiffies);
-#endif
+       debug_log("%s called for %lu at %lu\n", __func__,
+                 pc->scsi_cmd->serial_number, jiffies);
+
        pc->flags |= PC_FLAG_TIMEDOUT;
 
        return 0;                                       /* we do not want the ide subsystem to retry */
@@ -365,141 +356,19 @@ static int idescsi_expiry(ide_drive_t *drive)
 static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive)
 {
        idescsi_scsi_t *scsi = drive_to_idescsi(drive);
-       ide_hwif_t *hwif = drive->hwif;
        struct ide_atapi_pc *pc = scsi->pc;
-       struct request *rq = pc->rq;
-       unsigned int temp;
-       u16 bcount;
-       u8 stat, ireason;
-
-#if IDESCSI_DEBUG_LOG
-       printk (KERN_INFO "ide-scsi: Reached idescsi_pc_intr interrupt handler\n");
-#endif /* IDESCSI_DEBUG_LOG */
-
-       if (pc->flags & PC_FLAG_TIMEDOUT) {
-#if IDESCSI_DEBUG_LOG
-               printk(KERN_WARNING "idescsi_pc_intr: got timed out packet  %lu at %lu\n",
-                               pc->scsi_cmd->serial_number, jiffies);
-#endif
-               /* end this request now - scsi should retry it*/
-               idescsi_end_request (drive, 1, 0);
-               return ide_stopped;
-       }
-       if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
-               pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
-#if IDESCSI_DEBUG_LOG
-               printk ("ide-scsi: %s: DMA complete\n", drive->name);
-#endif /* IDESCSI_DEBUG_LOG */
-               pc->xferred = pc->req_xfer;
-               (void)hwif->dma_ops->dma_end(drive);
-       }
-
-       /* Clear the interrupt */
-       stat = ide_read_status(drive);
-
-       if ((stat & DRQ_STAT) == 0) {
-               /* No more interrupts */
-               if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
-                       printk(KERN_INFO "Packet command completed, %d bytes"
-                                       " transferred\n", pc->xferred);
-               local_irq_enable_in_hardirq();
-               if (stat & ERR_STAT)
-                       rq->errors++;
-               idescsi_end_request (drive, 1, 0);
-               return ide_stopped;
-       }
-       bcount = (hwif->INB(hwif->io_ports.lbah_addr) << 8) |
-                 hwif->INB(hwif->io_ports.lbam_addr);
-       ireason = hwif->INB(hwif->io_ports.nsect_addr);
-
-       if (ireason & CD) {
-               printk(KERN_ERR "ide-scsi: CoD != 0 in idescsi_pc_intr\n");
-               return ide_do_reset (drive);
-       }
-       if (ireason & IO) {
-               temp = pc->xferred + bcount;
-               if (temp > pc->req_xfer) {
-                       if (temp > pc->buf_size) {
-                               printk(KERN_ERR "ide-scsi: The scsi wants to "
-                                       "send us more data than expected "
-                                       "- discarding data\n");
-                               temp = pc->buf_size - pc->xferred;
-                               if (temp) {
-                                       pc->flags &= ~PC_FLAG_WRITING;
-                                       if (pc->sg)
-                                               idescsi_input_buffers(drive, pc,
-                                                                       temp);
-                                       else
-                                               hwif->input_data(drive, NULL,
-                                                       pc->cur_pos, temp);
-                                       printk(KERN_ERR "ide-scsi: transferred"
-                                                       " %d of %d bytes\n",
-                                                       temp, bcount);
-                               }
-                               pc->xferred += temp;
-                               pc->cur_pos += temp;
-                               ide_pad_transfer(drive, 0, bcount - temp);
-                               ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), idescsi_expiry);
-                               return ide_started;
-                       }
-#if IDESCSI_DEBUG_LOG
-                       printk (KERN_NOTICE "ide-scsi: The scsi wants to send us more data than expected - allowing transfer\n");
-#endif /* IDESCSI_DEBUG_LOG */
-               }
-       }
-       if (ireason & IO) {
-               pc->flags &= ~PC_FLAG_WRITING;
-               if (pc->sg)
-                       idescsi_input_buffers(drive, pc, bcount);
-               else
-                       hwif->input_data(drive, NULL, pc->cur_pos, bcount);
-       } else {
-               pc->flags |= PC_FLAG_WRITING;
-               if (pc->sg)
-                       idescsi_output_buffers(drive, pc, bcount);
-               else
-                       hwif->output_data(drive, NULL, pc->cur_pos, bcount);
-       }
-       /* Update the current position */
-       pc->xferred += bcount;
-       pc->cur_pos += bcount;
 
-       /* And set the interrupt handler again */
-       ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), idescsi_expiry);
-       return ide_started;
+       return ide_pc_intr(drive, pc, idescsi_pc_intr, get_timeout(pc),
+                          idescsi_expiry, NULL, NULL, NULL,
+                          ide_scsi_io_buffers);
 }
 
 static ide_startstop_t idescsi_transfer_pc(ide_drive_t *drive)
 {
-       ide_hwif_t *hwif = drive->hwif;
        idescsi_scsi_t *scsi = drive_to_idescsi(drive);
-       struct ide_atapi_pc *pc = scsi->pc;
-       ide_startstop_t startstop;
-       u8 ireason;
 
-       if (ide_wait_stat(&startstop,drive,DRQ_STAT,BUSY_STAT,WAIT_READY)) {
-               printk(KERN_ERR "ide-scsi: Strange, packet command "
-                       "initiated yet DRQ isn't asserted\n");
-               return startstop;
-       }
-       ireason = hwif->INB(hwif->io_ports.nsect_addr);
-       if ((ireason & CD) == 0 || (ireason & IO)) {
-               printk(KERN_ERR "ide-scsi: (IO,CoD) != (0,1) while "
-                               "issuing a packet command\n");
-               return ide_do_reset (drive);
-       }
-       BUG_ON(HWGROUP(drive)->handler != NULL);
-       /* Set the interrupt routine */
-       ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), idescsi_expiry);
-
-       /* Send the actual packet */
-       hwif->output_data(drive, NULL, scsi->pc->c, 12);
-
-       if (pc->flags & PC_FLAG_DMA_OK) {
-               pc->flags |= PC_FLAG_DMA_IN_PROGRESS;
-               hwif->dma_ops->dma_start(drive);
-       }
-       return ide_started;
+       return ide_transfer_pc(drive, scsi->pc, idescsi_pc_intr,
+                              get_timeout(scsi->pc), idescsi_expiry);
 }
 
 static inline int idescsi_set_direction(struct ide_atapi_pc *pc)
@@ -545,38 +414,12 @@ static ide_startstop_t idescsi_issue_pc(ide_drive_t *drive,
                struct ide_atapi_pc *pc)
 {
        idescsi_scsi_t *scsi = drive_to_idescsi(drive);
-       ide_hwif_t *hwif = drive->hwif;
-       u16 bcount;
-       u8 dma = 0;
 
        /* Set the current packet command */
        scsi->pc = pc;
-       /* We haven't transferred any data yet */
-       pc->xferred = 0;
-       pc->cur_pos = pc->buf;
-       /* Request to transfer the entire buffer at once */
-       bcount = min(pc->req_xfer, 63 * 1024);
-
-       if (drive->using_dma && !idescsi_map_sg(drive, pc)) {
-               hwif->sg_mapped = 1;
-               dma = !hwif->dma_ops->dma_setup(drive);
-               hwif->sg_mapped = 0;
-       }
-
-       ide_pktcmd_tf_load(drive, IDE_TFLAG_NO_SELECT_MASK, bcount, dma);
 
-       if (dma)
-               pc->flags |= PC_FLAG_DMA_OK;
-
-       if (test_bit(IDESCSI_DRQ_INTERRUPT, &scsi->flags)) {
-               ide_execute_command(drive, WIN_PACKETCMD, &idescsi_transfer_pc,
-                                   get_timeout(pc), idescsi_expiry);
-               return ide_started;
-       } else {
-               /* Issue the packet command */
-               ide_execute_pkt_cmd(drive);
-               return idescsi_transfer_pc(drive);
-       }
+       return ide_issue_pc(drive, pc, idescsi_transfer_pc,
+                           get_timeout(pc), idescsi_expiry);
 }
 
 /*
@@ -584,14 +427,22 @@ static ide_startstop_t idescsi_issue_pc(ide_drive_t *drive,
  */
 static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block)
 {
-#if IDESCSI_DEBUG_LOG
-       printk (KERN_INFO "dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name,rq->cmd[0],rq->errors);
-       printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors);
-#endif /* IDESCSI_DEBUG_LOG */
+       debug_log("dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name,
+                 rq->cmd[0], rq->errors);
+       debug_log("sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",
+                 rq->sector, rq->nr_sectors, rq->current_nr_sectors);
 
        if (blk_sense_request(rq) || blk_special_request(rq)) {
-               return idescsi_issue_pc(drive,
-                               (struct ide_atapi_pc *) rq->special);
+               struct ide_atapi_pc *pc = (struct ide_atapi_pc *)rq->special;
+               idescsi_scsi_t *scsi = drive_to_idescsi(drive);
+
+               if (test_bit(IDESCSI_DRQ_INTERRUPT, &scsi->flags))
+                       pc->flags |= PC_FLAG_DRQ_INTERRUPT;
+
+               if (drive->using_dma && !idescsi_map_sg(drive, pc))
+                       pc->flags |= PC_FLAG_DMA_OK;
+
+               return idescsi_issue_pc(drive, pc);
        }
        blk_dump_rq_flags(rq, "ide-scsi: unsup command");
        idescsi_end_request (drive, 0, 0);
@@ -646,6 +497,8 @@ static void ide_scsi_remove(ide_drive_t *drive)
        put_disk(g);
 
        ide_scsi_put(scsi);
+
+       drive->scsi = 0;
 }
 
 static int ide_scsi_probe(ide_drive_t *);
@@ -765,6 +618,8 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
 
        memset (pc->c, 0, 12);
        pc->flags = 0;
+       if (cmd->sc_data_direction == DMA_TO_DEVICE)
+               pc->flags |= PC_FLAG_WRITING;
        pc->rq = rq;
        memcpy (pc->c, cmd->cmnd, cmd->cmd_len);
        pc->buf = NULL;
@@ -775,6 +630,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
        pc->scsi_cmd = cmd;
        pc->done = done;
        pc->timeout = jiffies + cmd->timeout_per_command;
+       pc->callback = ide_scsi_callback;
 
        if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) {
                printk ("ide-scsi: %s: que %lu, cmd = ", drive->name, cmd->serial_number);
@@ -785,12 +641,11 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
                }
        }
 
-       ide_init_drive_cmd (rq);
+       blk_rq_init(NULL, rq);
        rq->special = (char *) pc;
        rq->cmd_type = REQ_TYPE_SPECIAL;
        spin_unlock_irq(host->host_lock);
-       rq->rq_disk = scsi->disk;
-       (void) ide_do_drive_cmd (drive, rq, ide_end);
+       blk_execute_rq_nowait(drive->queue, scsi->disk, rq, 0, NULL);
        spin_lock_irq(host->host_lock);
        return 0;
 abort:
@@ -985,6 +840,8 @@ static int ide_scsi_probe(ide_drive_t *drive)
            !(host = scsi_host_alloc(&idescsi_template,sizeof(idescsi_scsi_t))))
                return -ENODEV;
 
+       drive->scsi = 1;
+
        g = alloc_disk(1 << PARTN_BITS);
        if (!g)
                goto out_host_put;
@@ -993,10 +850,10 @@ static int ide_scsi_probe(ide_drive_t *drive)
 
        host->max_id = 1;
 
-#if IDESCSI_DEBUG_LOG
        if (drive->id->last_lun)
-               printk(KERN_NOTICE "%s: id->last_lun=%u\n", drive->name, drive->id->last_lun);
-#endif
+               debug_log("%s: id->last_lun=%u\n", drive->name,
+                         drive->id->last_lun);
+
        if ((drive->id->last_lun & 0x7) != 7)
                host->max_lun = (drive->id->last_lun & 0x7) + 1;
        else
@@ -1025,6 +882,7 @@ static int ide_scsi_probe(ide_drive_t *drive)
 
        put_disk(g);
 out_host_put:
+       drive->scsi = 0;
        scsi_host_put(host);
        return err;
 }
index a0df63289b5f83ed9007be92df25c9c5f7065b4c..0cf96eb8a60f610b3fc26f0bc827453eb1c201fc 100644 (file)
@@ -106,6 +106,7 @@ unsigned char *fb_ddc_read(struct i2c_adapter *adapter)
        algo_data->setsda(algo_data->data, 1);
        algo_data->setscl(algo_data->data, 1);
 
+       adapter->class |= I2C_CLASS_DDC;
        return edid;
 }
 
index ca95f09d8b43f7e9f88ddf7681e361f386493c0c..fcf9fadbf5722d328f521af00aa8fe5675e16da0 100644 (file)
@@ -100,7 +100,8 @@ static int intelfb_gpio_getsda(void *data)
 
 static int intelfb_setup_i2c_bus(struct intelfb_info *dinfo,
                                 struct intelfb_i2c_chan *chan,
-                                const u32 reg, const char *name)
+                                const u32 reg, const char *name,
+                                int class)
 {
        int rc;
 
@@ -108,6 +109,7 @@ static int intelfb_setup_i2c_bus(struct intelfb_info *dinfo,
        chan->reg                       = reg;
        snprintf(chan->adapter.name, sizeof(chan->adapter.name),
                 "intelfb %s", name);
+       chan->adapter.class             = class;
        chan->adapter.owner             = THIS_MODULE;
        chan->adapter.id                = I2C_HW_B_INTELFB;
        chan->adapter.algo_data         = &chan->algo;
@@ -145,7 +147,7 @@ void intelfb_create_i2c_busses(struct intelfb_info *dinfo)
 
        /* setup the DDC bus for analog output */
        intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].ddc_bus, GPIOA,
-                             "CRTDDC_A");
+                             "CRTDDC_A", I2C_CLASS_DDC);
        i++;
 
        /* need to add the output busses for each device
@@ -159,9 +161,9 @@ void intelfb_create_i2c_busses(struct intelfb_info *dinfo)
        case INTEL_865G:
                dinfo->output[i].type = INTELFB_OUTPUT_DVO;
                intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].ddc_bus,
-                                     GPIOD, "DVODDC_D");
+                                     GPIOD, "DVODDC_D", I2C_CLASS_DDC);
                intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].i2c_bus,
-                                     GPIOE, "DVOI2C_E");
+                                     GPIOE, "DVOI2C_E", 0);
                i++;
                break;
        case INTEL_915G:
@@ -174,7 +176,7 @@ void intelfb_create_i2c_busses(struct intelfb_info *dinfo)
                /* SDVO ports have a single control bus - 2 devices */
                dinfo->output[i].type = INTELFB_OUTPUT_SDVO;
                intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].i2c_bus,
-                                     GPIOE, "SDVOCTRL_E");
+                                     GPIOE, "SDVOCTRL_E", 0);
                /* TODO: initialize the SDVO */
                /* I830SDVOInit(pScrn, i, DVOB); */
                i++;
index 4baab7be58de4f075de0f197ac5b019da3d35eb4..75ee5a12e549ec703a0974c984975c1cdf2ac29a 100644 (file)
@@ -104,7 +104,9 @@ static struct i2c_algo_bit_data matrox_i2c_algo_template =
 };
 
 static int i2c_bus_reg(struct i2c_bit_adapter* b, struct matrox_fb_info* minfo, 
-               unsigned int data, unsigned int clock, const char* name) {
+               unsigned int data, unsigned int clock, const char *name,
+               int class)
+{
        int err;
 
        b->minfo = minfo;
@@ -114,6 +116,7 @@ static int i2c_bus_reg(struct i2c_bit_adapter* b, struct matrox_fb_info* minfo,
        snprintf(b->adapter.name, sizeof(b->adapter.name), name,
                minfo->fbcon.node);
        i2c_set_adapdata(&b->adapter, b);
+       b->adapter.class = class;
        b->adapter.algo_data = &b->bac;
        b->adapter.dev.parent = &ACCESS_FBINFO(pcidev)->dev;
        b->bac = matrox_i2c_algo_template;
@@ -159,22 +162,29 @@ static void* i2c_matroxfb_probe(struct matrox_fb_info* minfo) {
        switch (ACCESS_FBINFO(chip)) {
                case MGA_2064:
                case MGA_2164:
-                       err = i2c_bus_reg(&m2info->ddc1, minfo, DDC1B_DATA, DDC1B_CLK, "DDC:fb%u #0");
+                       err = i2c_bus_reg(&m2info->ddc1, minfo,
+                                         DDC1B_DATA, DDC1B_CLK,
+                                         "DDC:fb%u #0", I2C_CLASS_DDC);
                        break;
                default:
-                       err = i2c_bus_reg(&m2info->ddc1, minfo, DDC1_DATA, DDC1_CLK, "DDC:fb%u #0");
+                       err = i2c_bus_reg(&m2info->ddc1, minfo,
+                                         DDC1_DATA, DDC1_CLK,
+                                         "DDC:fb%u #0", I2C_CLASS_DDC);
                        break;
        }
        if (err)
                goto fail_ddc1;
        if (ACCESS_FBINFO(devflags.dualhead)) {
-               err = i2c_bus_reg(&m2info->ddc2, minfo, DDC2_DATA, DDC2_CLK, "DDC:fb%u #1");
+               err = i2c_bus_reg(&m2info->ddc2, minfo,
+                                 DDC2_DATA, DDC2_CLK,
+                                 "DDC:fb%u #1", I2C_CLASS_DDC);
                if (err == -ENODEV) {
                        printk(KERN_INFO "i2c-matroxfb: VGA->TV plug detected, DDC unavailable.\n");
                } else if (err)
                        printk(KERN_INFO "i2c-matroxfb: Could not register secondary output i2c bus. Continuing anyway.\n");
                /* Register maven bus even on G450/G550 */
-               err = i2c_bus_reg(&m2info->maven, minfo, MAT_DATA, MAT_CLK, "MAVEN:fb%u");
+               err = i2c_bus_reg(&m2info->maven, minfo,
+                                 MAT_DATA, MAT_CLK, "MAVEN:fb%u", 0);
                if (err)
                        printk(KERN_INFO "i2c-matroxfb: Could not register Maven i2c bus. Continuing anyway.\n");
        }
index 0f51c0f7c266ee7ebf940d94f869081a4d80b084..5fa1512cd9a210932426e109b5095bbd9529e704 100644 (file)
@@ -1691,11 +1691,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
                         */
                        clear_buffer_dirty(bh);
                        set_buffer_uptodate(bh);
-               } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
+               } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
+                          buffer_dirty(bh)) {
                        WARN_ON(bh->b_size != blocksize);
                        err = get_block(inode, block, bh, 1);
                        if (err)
                                goto recover;
+                       clear_buffer_delay(bh);
                        if (buffer_new(bh)) {
                                /* blockdev mappings never come here */
                                clear_buffer_new(bh);
@@ -1774,7 +1776,8 @@ recover:
        bh = head;
        /* Recovery: lock and submit the mapped buffers */
        do {
-               if (buffer_mapped(bh) && buffer_dirty(bh)) {
+               if (buffer_mapped(bh) && buffer_dirty(bh) &&
+                   !buffer_delay(bh)) {
                        lock_buffer(bh);
                        mark_buffer_async_write(bh);
                } else {
@@ -2061,6 +2064,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
                        struct page *page, void *fsdata)
 {
        struct inode *inode = mapping->host;
+       int i_size_changed = 0;
 
        copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
 
@@ -2073,12 +2077,21 @@ int generic_write_end(struct file *file, struct address_space *mapping,
         */
        if (pos+copied > inode->i_size) {
                i_size_write(inode, pos+copied);
-               mark_inode_dirty(inode);
+               i_size_changed = 1;
        }
 
        unlock_page(page);
        page_cache_release(page);
 
+       /*
+        * Don't mark the inode dirty under page lock. First, it unnecessarily
+        * makes the holding time of page lock longer. Second, it forces lock
+        * ordering of page lock and transaction start for journaling
+        * filesystems.
+        */
+       if (i_size_changed)
+               mark_inode_dirty(inode);
+
        return copied;
 }
 EXPORT_SYMBOL(generic_write_end);
index 9cc80b9cc8d8fa874659f7d4280d0f0e6fba5eca..495ab21b9832a7c5e48116b1dbe2f123af27ec25 100644 (file)
@@ -47,7 +47,7 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
                        ext4_group_t block_group)
 {
        ext4_group_t actual_group;
-       ext4_get_group_no_and_offset(sb, block, &actual_group, 0);
+       ext4_get_group_no_and_offset(sb, block, &actual_group, NULL);
        if (actual_group == block_group)
                return 1;
        return 0;
@@ -121,12 +121,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
                                le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
                }
        } else { /* For META_BG_BLOCK_GROUPS */
-               int group_rel = (block_group -
-                                le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
-                               EXT4_DESC_PER_BLOCK(sb);
-               if (group_rel == 0 || group_rel == 1 ||
-                   (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1))
-                       bit_max += 1;
+               bit_max += ext4_bg_num_gdb(sb, block_group);
        }
 
        if (block_group == sbi->s_groups_count - 1) {
@@ -295,7 +290,7 @@ err_out:
        return 0;
 }
 /**
- * read_block_bitmap()
+ * ext4_read_block_bitmap()
  * @sb:                        super block
  * @block_group:       given block group
  *
@@ -305,7 +300,7 @@ err_out:
  * Return buffer_head on success or NULL in case of failure.
  */
 struct buffer_head *
-read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
+ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
 {
        struct ext4_group_desc * desc;
        struct buffer_head * bh = NULL;
@@ -409,8 +404,7 @@ restart:
                prev = rsv;
        }
        printk("Window map complete.\n");
-       if (bad)
-               BUG();
+       BUG_ON(bad);
 }
 #define rsv_window_dump(root, verbose) \
        __rsv_window_dump((root), (verbose), __func__)
@@ -694,7 +688,7 @@ do_more:
                count -= overflow;
        }
        brelse(bitmap_bh);
-       bitmap_bh = read_block_bitmap(sb, block_group);
+       bitmap_bh = ext4_read_block_bitmap(sb, block_group);
        if (!bitmap_bh)
                goto error_return;
        desc = ext4_get_group_desc (sb, block_group, &gd_bh);
@@ -810,6 +804,13 @@ do_more:
        spin_unlock(sb_bgl_lock(sbi, block_group));
        percpu_counter_add(&sbi->s_freeblocks_counter, count);
 
+       if (sbi->s_log_groups_per_flex) {
+               ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
+               spin_lock(sb_bgl_lock(sbi, flex_group));
+               sbi->s_flex_groups[flex_group].free_blocks += count;
+               spin_unlock(sb_bgl_lock(sbi, flex_group));
+       }
+
        /* We dirtied the bitmap block */
        BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
        err = ext4_journal_dirty_metadata(handle, bitmap_bh);
@@ -1598,23 +1599,35 @@ out:
 
 /**
  * ext4_has_free_blocks()
- * @sbi:               in-core super block structure.
+ * @sbi:       in-core super block structure.
+ * @nblocks:   number of neeed blocks
  *
- * Check if filesystem has at least 1 free block available for allocation.
+ * Check if filesystem has free blocks available for allocation.
+ * Return the number of blocks avaible for allocation for this request
+ * On success, return nblocks
  */
-static int ext4_has_free_blocks(struct ext4_sb_info *sbi)
+ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
+                                               ext4_fsblk_t nblocks)
 {
-       ext4_fsblk_t free_blocks, root_blocks;
+       ext4_fsblk_t free_blocks;
+       ext4_fsblk_t root_blocks = 0;
 
        free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
-       root_blocks = ext4_r_blocks_count(sbi->s_es);
-       if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
+
+       if (!capable(CAP_SYS_RESOURCE) &&
                sbi->s_resuid != current->fsuid &&
-               (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
-               return 0;
-       }
-       return 1;
-}
+               (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
+               root_blocks = ext4_r_blocks_count(sbi->s_es);
+#ifdef CONFIG_SMP
+       if (free_blocks - root_blocks < FBC_BATCH)
+               free_blocks =
+                       percpu_counter_sum_and_set(&sbi->s_freeblocks_counter);
+#endif
+       if (free_blocks - root_blocks < nblocks)
+               return free_blocks - root_blocks;
+       return nblocks;
+ }
+
 
 /**
  * ext4_should_retry_alloc()
@@ -1630,7 +1643,7 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi)
  */
 int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 {
-       if (!ext4_has_free_blocks(EXT4_SB(sb)) || (*retries)++ > 3)
+       if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3)
                return 0;
 
        jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
@@ -1639,20 +1652,24 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 }
 
 /**
- * ext4_new_blocks_old() -- core block(s) allocation function
+ * ext4_old_new_blocks() -- core block bitmap based block allocation function
+ *
  * @handle:            handle to this transaction
  * @inode:             file inode
  * @goal:              given target block(filesystem wide)
  * @count:             target number of blocks to allocate
  * @errp:              error code
  *
- * ext4_new_blocks uses a goal block to assist allocation.  It tries to
- * allocate block(s) from the block group contains the goal block first. If that
- * fails, it will try to allocate block(s) from other block groups without
- * any specific goal block.
+ * ext4_old_new_blocks uses a goal block to assist allocation and look up
+ * the block bitmap directly to do block allocation.  It tries to
+ * allocate block(s) from the block group contains the goal block first. If
+ * that fails, it will try to allocate block(s) from other block groups
+ * without any specific goal block.
+ *
+ * This function is called when -o nomballoc mount option is enabled
  *
  */
-ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
+ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
                        ext4_fsblk_t goal, unsigned long *count, int *errp)
 {
        struct buffer_head *bitmap_bh = NULL;
@@ -1676,13 +1693,26 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
        ext4_group_t ngroups;
        unsigned long num = *count;
 
-       *errp = -ENOSPC;
        sb = inode->i_sb;
        if (!sb) {
+               *errp = -ENODEV;
                printk("ext4_new_block: nonexistent device");
                return 0;
        }
 
+       sbi = EXT4_SB(sb);
+       if (!EXT4_I(inode)->i_delalloc_reserved_flag) {
+               /*
+                * With delalloc we already reserved the blocks
+                */
+               *count = ext4_has_free_blocks(sbi, *count);
+       }
+       if (*count == 0) {
+               *errp = -ENOSPC;
+               return 0;       /*return with ENOSPC error */
+       }
+       num = *count;
+
        /*
         * Check quota for allocation of this block.
         */
@@ -1706,11 +1736,6 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
        if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
                my_rsv = &block_i->rsv_window_node;
 
-       if (!ext4_has_free_blocks(sbi)) {
-               *errp = -ENOSPC;
-               goto out;
-       }
-
        /*
         * First, test whether the goal block is free.
         */
@@ -1734,7 +1759,7 @@ retry_alloc:
                my_rsv = NULL;
 
        if (free_blocks > 0) {
-               bitmap_bh = read_block_bitmap(sb, group_no);
+               bitmap_bh = ext4_read_block_bitmap(sb, group_no);
                if (!bitmap_bh)
                        goto io_error;
                grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
@@ -1770,7 +1795,7 @@ retry_alloc:
                        continue;
 
                brelse(bitmap_bh);
-               bitmap_bh = read_block_bitmap(sb, group_no);
+               bitmap_bh = ext4_read_block_bitmap(sb, group_no);
                if (!bitmap_bh)
                        goto io_error;
                /*
@@ -1882,7 +1907,15 @@ allocated:
        le16_add_cpu(&gdp->bg_free_blocks_count, -num);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
        spin_unlock(sb_bgl_lock(sbi, group_no));
-       percpu_counter_sub(&sbi->s_freeblocks_counter, num);
+       if (!EXT4_I(inode)->i_delalloc_reserved_flag)
+               percpu_counter_sub(&sbi->s_freeblocks_counter, num);
+
+       if (sbi->s_log_groups_per_flex) {
+               ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
+               spin_lock(sb_bgl_lock(sbi, flex_group));
+               sbi->s_flex_groups[flex_group].free_blocks -= num;
+               spin_unlock(sb_bgl_lock(sbi, flex_group));
+       }
 
        BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
        err = ext4_journal_dirty_metadata(handle, gdp_bh);
@@ -1915,46 +1948,104 @@ out:
        return 0;
 }
 
-ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode,
-               ext4_fsblk_t goal, int *errp)
+#define EXT4_META_BLOCK 0x1
+
+static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
+                               ext4_lblk_t iblock, ext4_fsblk_t goal,
+                               unsigned long *count, int *errp, int flags)
 {
        struct ext4_allocation_request ar;
        ext4_fsblk_t ret;
 
        if (!test_opt(inode->i_sb, MBALLOC)) {
-               unsigned long count = 1;
-               ret = ext4_new_blocks_old(handle, inode, goal, &count, errp);
-               return ret;
+               return ext4_old_new_blocks(handle, inode, goal, count, errp);
        }
 
        memset(&ar, 0, sizeof(ar));
+       /* Fill with neighbour allocated blocks */
+
        ar.inode = inode;
        ar.goal = goal;
-       ar.len = 1;
+       ar.len = *count;
+       ar.logical = iblock;
+
+       if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK))
+               /* enable in-core preallocation for data block allocation */
+               ar.flags = EXT4_MB_HINT_DATA;
+       else
+               /* disable in-core preallocation for non-regular files */
+               ar.flags = 0;
+
        ret = ext4_mb_new_blocks(handle, &ar, errp);
+       *count = ar.len;
        return ret;
 }
 
-ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
+/*
+ * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
+ *
+ * @handle:             handle to this transaction
+ * @inode:              file inode
+ * @goal:               given target block(filesystem wide)
+ * @count:             total number of blocks need
+ * @errp:               error code
+ *
+ * Return 1st allocated block numberon success, *count stores total account
+ * error stores in errp pointer
+ */
+ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
                ext4_fsblk_t goal, unsigned long *count, int *errp)
 {
-       struct ext4_allocation_request ar;
        ext4_fsblk_t ret;
-
-       if (!test_opt(inode->i_sb, MBALLOC)) {
-               ret = ext4_new_blocks_old(handle, inode, goal, count, errp);
-               return ret;
+       ret = do_blk_alloc(handle, inode, 0, goal,
+                               count, errp, EXT4_META_BLOCK);
+       /*
+        * Account for the allocated meta blocks
+        */
+       if (!(*errp)) {
+               spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+               EXT4_I(inode)->i_allocated_meta_blocks += *count;
+               spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
        }
-
-       memset(&ar, 0, sizeof(ar));
-       ar.inode = inode;
-       ar.goal = goal;
-       ar.len = *count;
-       ret = ext4_mb_new_blocks(handle, &ar, errp);
-       *count = ar.len;
        return ret;
 }
 
+/*
+ * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks
+ *
+ * @handle:             handle to this transaction
+ * @inode:              file inode
+ * @goal:               given target block(filesystem wide)
+ * @errp:               error code
+ *
+ * Return allocated block number on success
+ */
+ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
+               ext4_fsblk_t goal, int *errp)
+{
+       unsigned long count = 1;
+       return ext4_new_meta_blocks(handle, inode, goal, &count, errp);
+}
+
+/*
+ * ext4_new_blocks() -- allocate data blocks
+ *
+ * @handle:             handle to this transaction
+ * @inode:              file inode
+ * @goal:               given target block(filesystem wide)
+ * @count:             total number of blocks need
+ * @errp:               error code
+ *
+ * Return 1st allocated block numberon success, *count stores total account
+ * error stores in errp pointer
+ */
+
+ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
+                               ext4_lblk_t iblock, ext4_fsblk_t goal,
+                               unsigned long *count, int *errp)
+{
+       return do_blk_alloc(handle, inode, iblock, goal, count, errp, 0);
+}
 
 /**
  * ext4_count_free_blocks() -- count filesystem free blocks
@@ -1986,7 +2077,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
                        continue;
                desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
                brelse(bitmap_bh);
-               bitmap_bh = read_block_bitmap(sb, i);
+               bitmap_bh = ext4_read_block_bitmap(sb, i);
                if (bitmap_bh == NULL)
                        continue;
 
index 2bf0331ea1946303f0caedf360dce0cb9dc38445..d3d23d73c08b5e5e1584f16f88be892e4903ecfa 100644 (file)
@@ -129,7 +129,8 @@ static int ext4_readdir(struct file * filp,
                struct buffer_head *bh = NULL;
 
                map_bh.b_state = 0;
-               err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0);
+               err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh,
+                                               0, 0, 0);
                if (err > 0) {
                        pgoff_t index = map_bh.b_blocknr >>
                                        (PAGE_CACHE_SHIFT - inode->i_blkbits);
@@ -272,7 +273,7 @@ static void free_rb_tree_fname(struct rb_root *root)
 
        while (n) {
                /* Do the node's children first */
-               if ((n)->rb_left) {
+               if (n->rb_left) {
                        n = n->rb_left;
                        continue;
                }
@@ -301,24 +302,18 @@ static void free_rb_tree_fname(struct rb_root *root)
                        parent->rb_right = NULL;
                n = parent;
        }
-       root->rb_node = NULL;
 }
 
 
-static struct dir_private_info *create_dir_info(loff_t pos)
+static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos)
 {
        struct dir_private_info *p;
 
-       p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
+       p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
        if (!p)
                return NULL;
-       p->root.rb_node = NULL;
-       p->curr_node = NULL;
-       p->extra_fname = NULL;
-       p->last_pos = 0;
        p->curr_hash = pos2maj_hash(pos);
        p->curr_minor_hash = pos2min_hash(pos);
-       p->next_hash = 0;
        return p;
 }
 
@@ -433,7 +428,7 @@ static int ext4_dx_readdir(struct file * filp,
        int     ret;
 
        if (!info) {
-               info = create_dir_info(filp->f_pos);
+               info = ext4_htree_create_dir_info(filp->f_pos);
                if (!info)
                        return -ENOMEM;
                filp->private_data = info;
index 8158083f7ac0a04673529125fe2aa58babd4cb8d..303e41cf7b142344d7ee202293e15a6647d6b1ce 100644 (file)
@@ -22,7 +22,7 @@
 #include "ext4_i.h"
 
 /*
- * The second extended filesystem constants/structures
+ * The fourth extended filesystem constants/structures
  */
 
 /*
@@ -45,7 +45,7 @@
 #define ext4_debug(f, a...)                                            \
        do {                                                            \
                printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:",       \
-                       __FILE__, __LINE__, __FUNCTION__);              \
+                       __FILE__, __LINE__, __func__);                  \
                printk (KERN_DEBUG f, ## a);                            \
        } while (0)
 #else
@@ -74,6 +74,9 @@
 #define EXT4_MB_HINT_GOAL_ONLY         256
 /* goal is meaningful */
 #define EXT4_MB_HINT_TRY_GOAL          512
+/* blocks already pre-reserved by delayed allocation */
+#define EXT4_MB_DELALLOC_RESERVED      1024
+
 
 struct ext4_allocation_request {
        /* target inode for block we're allocating */
@@ -170,6 +173,15 @@ struct ext4_group_desc
        __u32   bg_reserved2[3];
 };
 
+/*
+ * Structure of a flex block group info
+ */
+
+struct flex_groups {
+       __u32 free_inodes;
+       __u32 free_blocks;
+};
+
 #define EXT4_BG_INODE_UNINIT   0x0001 /* Inode table/bitmap not in use */
 #define EXT4_BG_BLOCK_UNINIT   0x0002 /* Block bitmap not in use */
 #define EXT4_BG_INODE_ZEROED   0x0004 /* On-disk itable initialized to zero */
@@ -527,6 +539,7 @@ do {                                                                               \
 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT        0x1000000 /* Journal Async Commit */
 #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
 #define EXT4_MOUNT_MBALLOC             0x4000000 /* Buddy allocation support */
+#define EXT4_MOUNT_DELALLOC            0x8000000 /* Delalloc support */
 /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
 #define clear_opt(o, opt)              o &= ~EXT4_MOUNT_##opt
@@ -647,7 +660,10 @@ struct ext4_super_block {
        __le16  s_mmp_interval;         /* # seconds to wait in MMP checking */
        __le64  s_mmp_block;            /* Block for multi-mount protection */
        __le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
-       __u32   s_reserved[163];        /* Padding to the end of the block */
+       __u8    s_log_groups_per_flex;  /* FLEX_BG group size */
+       __u8    s_reserved_char_pad2;
+       __le16  s_reserved_pad;
+       __u32   s_reserved[162];        /* Padding to the end of the block */
 };
 
 #ifdef __KERNEL__
@@ -958,12 +974,17 @@ extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
 extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
 extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
                        ext4_group_t group);
-extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode,
+extern ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
                        ext4_fsblk_t goal, int *errp);
-extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode,
+extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
                        ext4_fsblk_t goal, unsigned long *count, int *errp);
-extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
+extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
+                                       ext4_lblk_t iblock, ext4_fsblk_t goal,
+                                       unsigned long *count, int *errp);
+extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
                        ext4_fsblk_t goal, unsigned long *count, int *errp);
+extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
+                                               ext4_fsblk_t nblocks);
 extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
                        ext4_fsblk_t block, unsigned long count, int metadata);
 extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
@@ -1016,9 +1037,14 @@ extern int __init init_ext4_mballoc(void);
 extern void exit_ext4_mballoc(void);
 extern void ext4_mb_free_blocks(handle_t *, struct inode *,
                unsigned long, unsigned long, int, unsigned long *);
+extern int ext4_mb_add_more_groupinfo(struct super_block *sb,
+               ext4_group_t i, struct ext4_group_desc *desc);
+extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
+               ext4_grpblk_t add);
 
 
 /* inode.c */
+void ext4_da_release_space(struct inode *inode, int used, int to_free);
 int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
                struct buffer_head *bh, ext4_fsblk_t blocknr);
 struct buffer_head *ext4_getblk(handle_t *, struct inode *,
@@ -1033,19 +1059,23 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
 extern struct inode *ext4_iget(struct super_block *, unsigned long);
 extern int  ext4_write_inode (struct inode *, int);
 extern int  ext4_setattr (struct dentry *, struct iattr *);
+extern int  ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
+                               struct kstat *stat);
 extern void ext4_delete_inode (struct inode *);
 extern int  ext4_sync_inode (handle_t *, struct inode *);
 extern void ext4_discard_reservation (struct inode *);
 extern void ext4_dirty_inode(struct inode *);
 extern int ext4_change_inode_journal_flag(struct inode *, int);
 extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
+extern int ext4_can_truncate(struct inode *inode);
 extern void ext4_truncate (struct inode *);
 extern void ext4_set_inode_flags(struct inode *);
 extern void ext4_get_inode_flags(struct ext4_inode_info *);
 extern void ext4_set_aops(struct inode *inode);
 extern int ext4_writepage_trans_blocks(struct inode *);
-extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
+extern int ext4_block_truncate_page(handle_t *handle,
                struct address_space *mapping, loff_t from);
+extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
 
 /* ioctl.c */
 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
@@ -1159,10 +1189,21 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
 }
 
 
+static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
+                                            ext4_group_t block_group)
+{
+       return block_group >> sbi->s_log_groups_per_flex;
+}
+
+static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi)
+{
+       return 1 << sbi->s_log_groups_per_flex;
+}
+
 #define ext4_std_error(sb, errno)                              \
 do {                                                           \
        if ((errno))                                            \
-               __ext4_std_error((sb), __FUNCTION__, (errno));  \
+               __ext4_std_error((sb), __func__, (errno));      \
 } while (0)
 
 /*
@@ -1191,7 +1232,7 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                        ext4_lblk_t iblock,
                        unsigned long max_blocks, struct buffer_head *bh_result,
                        int create, int extend_disksize);
-extern void ext4_ext_truncate(struct inode *, struct page *);
+extern void ext4_ext_truncate(struct inode *);
 extern void ext4_ext_init(struct super_block *);
 extern void ext4_ext_release(struct super_block *);
 extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
@@ -1199,7 +1240,7 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
 extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
                        sector_t block, unsigned long max_blocks,
                        struct buffer_head *bh, int create,
-                       int extend_disksize);
+                       int extend_disksize, int flag);
 #endif /* __KERNEL__ */
 
 #endif /* _EXT4_H */
index 75333b595fab794bda6023a3978b0c61df5e91de..6c166c0a54b7ea24a49a90681e0284dee3d364d6 100644 (file)
@@ -212,6 +212,7 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
                (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
 }
 
+extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
 extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
 extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
 extern int ext4_extent_tree_init(handle_t *, struct inode *);
index 26a4ae255d79da02ad615de4bf6217f1dc5287b0..ef7409f0e7e475fde5afb303b68cdcd7c05e8b1c 100644 (file)
@@ -79,7 +79,7 @@ struct ext4_ext_cache {
 };
 
 /*
- * third extended file system inode data in memory
+ * fourth extended file system inode data in memory
  */
 struct ext4_inode_info {
        __le32  i_data[15];     /* unconverted */
@@ -150,6 +150,7 @@ struct ext4_inode_info {
         */
        struct rw_semaphore i_data_sem;
        struct inode vfs_inode;
+       struct jbd2_inode jinode;
 
        unsigned long i_ext_generation;
        struct ext4_ext_cache i_cached_extent;
@@ -162,6 +163,13 @@ struct ext4_inode_info {
        /* mballoc */
        struct list_head i_prealloc_list;
        spinlock_t i_prealloc_lock;
+
+       /* allocation reservation info for delalloc */
+       unsigned long i_reserved_data_blocks;
+       unsigned long i_reserved_meta_blocks;
+       unsigned long i_allocated_meta_blocks;
+       unsigned short i_delalloc_reserved_flag;
+       spinlock_t i_block_reservation_lock;
 };
 
 #endif /* _EXT4_I */
index 9255a7d28b245546d16001359129aaad43883113..eb8bc3afe6e9f590f491bf9098980f39f27bf245 100644 (file)
@@ -142,19 +142,17 @@ int __ext4_journal_dirty_metadata(const char *where,
                                handle_t *handle, struct buffer_head *bh);
 
 #define ext4_journal_get_undo_access(handle, bh) \
-       __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh))
+       __ext4_journal_get_undo_access(__func__, (handle), (bh))
 #define ext4_journal_get_write_access(handle, bh) \
-       __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh))
+       __ext4_journal_get_write_access(__func__, (handle), (bh))
 #define ext4_journal_revoke(handle, blocknr, bh) \
-       __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh))
+       __ext4_journal_revoke(__func__, (handle), (blocknr), (bh))
 #define ext4_journal_get_create_access(handle, bh) \
-       __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh))
+       __ext4_journal_get_create_access(__func__, (handle), (bh))
 #define ext4_journal_dirty_metadata(handle, bh) \
-       __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
+       __ext4_journal_dirty_metadata(__func__, (handle), (bh))
 #define ext4_journal_forget(handle, bh) \
-       __ext4_journal_forget(__FUNCTION__, (handle), (bh))
-
-int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
+       __ext4_journal_forget(__func__, (handle), (bh))
 
 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
 int __ext4_journal_stop(const char *where, handle_t *handle);
@@ -165,7 +163,7 @@ static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
 }
 
 #define ext4_journal_stop(handle) \
-       __ext4_journal_stop(__FUNCTION__, (handle))
+       __ext4_journal_stop(__func__, (handle))
 
 static inline handle_t *ext4_journal_current_handle(void)
 {
@@ -192,6 +190,11 @@ static inline int ext4_journal_force_commit(journal_t *journal)
        return jbd2_journal_force_commit(journal);
 }
 
+static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
+{
+       return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
+}
+
 /* super.c */
 int ext4_force_commit(struct super_block *sb);
 
index 5802e69f2191d015286924887538264a0b3b2325..6300226d55313d168fed238287744142204e2456 100644 (file)
@@ -25,7 +25,7 @@
 #include <linux/rbtree.h>
 
 /*
- * third extended-fs super-block data in memory
+ * fourth extended-fs super-block data in memory
  */
 struct ext4_sb_info {
        unsigned long s_desc_size;      /* Size of a group descriptor in bytes */
@@ -143,6 +143,9 @@ struct ext4_sb_info {
 
        /* locality groups */
        struct ext4_locality_group *s_locality_groups;
+
+       unsigned int s_log_groups_per_flex;
+       struct flex_groups *s_flex_groups;
 };
 
 #endif /* _EXT4_SB */
index 47929c4e3dae66d104f5bdd6ac39f5436183dff5..42c4c0c892ed9b442617207b00acbdc7be99249f 100644 (file)
@@ -92,17 +92,16 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
        ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
 }
 
-static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed)
+static int ext4_ext_journal_restart(handle_t *handle, int needed)
 {
        int err;
 
        if (handle->h_buffer_credits > needed)
-               return handle;
-       if (!ext4_journal_extend(handle, needed))
-               return handle;
-       err = ext4_journal_restart(handle, needed);
-
-       return handle;
+               return 0;
+       err = ext4_journal_extend(handle, needed);
+       if (err)
+               return err;
+       return ext4_journal_restart(handle, needed);
 }
 
 /*
@@ -180,15 +179,18 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
        return bg_start + colour + block;
 }
 
+/*
+ * Allocation for a meta data block
+ */
 static ext4_fsblk_t
-ext4_ext_new_block(handle_t *handle, struct inode *inode,
+ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
                        struct ext4_ext_path *path,
                        struct ext4_extent *ex, int *err)
 {
        ext4_fsblk_t goal, newblock;
 
        goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
-       newblock = ext4_new_block(handle, inode, goal, err);
+       newblock = ext4_new_meta_block(handle, inode, goal, err);
        return newblock;
 }
 
@@ -246,6 +248,36 @@ static int ext4_ext_space_root_idx(struct inode *inode)
        return size;
 }
 
+/*
+ * Calculate the number of metadata blocks needed
+ * to allocate @blocks
+ * Worse case is one block per extent
+ */
+int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
+{
+       int lcap, icap, rcap, leafs, idxs, num;
+       int newextents = blocks;
+
+       rcap = ext4_ext_space_root_idx(inode);
+       lcap = ext4_ext_space_block(inode);
+       icap = ext4_ext_space_block_idx(inode);
+
+       /* number of new leaf blocks needed */
+       num = leafs = (newextents + lcap - 1) / lcap;
+
+       /*
+        * Worse case, we need separate index block(s)
+        * to link all new leaf blocks
+        */
+       idxs = (leafs + icap - 1) / icap;
+       do {
+               num += idxs;
+               idxs = (idxs + icap - 1) / icap;
+       } while (idxs > rcap);
+
+       return num;
+}
+
 static int
 ext4_ext_max_entries(struct inode *inode, int depth)
 {
@@ -524,6 +556,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
                alloc = 1;
        }
        path[0].p_hdr = eh;
+       path[0].p_bh = NULL;
 
        i = depth;
        /* walk through the tree */
@@ -552,12 +585,14 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
        }
 
        path[ppos].p_depth = i;
-       path[ppos].p_hdr = eh;
        path[ppos].p_ext = NULL;
        path[ppos].p_idx = NULL;
 
        /* find extent */
        ext4_ext_binsearch(inode, path + ppos, block);
+       /* if not an empty leaf */
+       if (path[ppos].p_ext)
+               path[ppos].p_block = ext_pblock(path[ppos].p_ext);
 
        ext4_ext_show_path(inode, path);
 
@@ -688,7 +723,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
        /* allocate all needed blocks */
        ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
        for (a = 0; a < depth - at; a++) {
-               newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+               newblock = ext4_ext_new_meta_block(handle, inode, path,
+                                                  newext, &err);
                if (newblock == 0)
                        goto cleanup;
                ablocks[a] = newblock;
@@ -884,7 +920,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
        ext4_fsblk_t newblock;
        int err = 0;
 
-       newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+       newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err);
        if (newblock == 0)
                return err;
 
@@ -981,6 +1017,8 @@ repeat:
                /* if we found index with free entry, then use that
                 * entry: create all needed subtree and add new leaf */
                err = ext4_ext_split(handle, inode, path, newext, i);
+               if (err)
+                       goto out;
 
                /* refill path */
                ext4_ext_drop_refs(path);
@@ -1883,11 +1921,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
 #endif
 
-               handle = ext4_ext_journal_restart(handle, credits);
-               if (IS_ERR(handle)) {
-                       err = PTR_ERR(handle);
+               err = ext4_ext_journal_restart(handle, credits);
+               if (err)
                        goto out;
-               }
 
                err = ext4_ext_get_access(handle, inode, path + depth);
                if (err)
@@ -2529,6 +2565,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        int err = 0, depth, ret;
        unsigned long allocated = 0;
        struct ext4_allocation_request ar;
+       loff_t disksize;
 
        __clear_bit(BH_New, &bh_result->b_state);
        ext_debug("blocks %u/%lu requested for inode %u\n",
@@ -2616,8 +2653,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                                 */
                                if (allocated > max_blocks)
                                        allocated = max_blocks;
-                               /* mark the buffer unwritten */
-                               __set_bit(BH_Unwritten, &bh_result->b_state);
+                               set_buffer_unwritten(bh_result);
                                goto out2;
                        }
 
@@ -2716,14 +2752,19 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                goto out2;
        }
 
-       if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize)
-               EXT4_I(inode)->i_disksize = inode->i_size;
-
        /* previous routine could use block we allocated */
        newblock = ext_pblock(&newex);
        allocated = ext4_ext_get_actual_len(&newex);
 outnew:
-       __set_bit(BH_New, &bh_result->b_state);
+       if (extend_disksize) {
+               disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
+               if (disksize > i_size_read(inode))
+                       disksize = i_size_read(inode);
+               if (disksize > EXT4_I(inode)->i_disksize)
+                       EXT4_I(inode)->i_disksize = disksize;
+       }
+
+       set_buffer_new(bh_result);
 
        /* Cache only when it is _not_ an uninitialized extent */
        if (create != EXT4_CREATE_UNINITIALIZED_EXT)
@@ -2733,7 +2774,7 @@ out:
        if (allocated > max_blocks)
                allocated = max_blocks;
        ext4_ext_show_leaf(inode, path);
-       __set_bit(BH_Mapped, &bh_result->b_state);
+       set_buffer_mapped(bh_result);
        bh_result->b_bdev = inode->i_sb->s_bdev;
        bh_result->b_blocknr = newblock;
 out2:
@@ -2744,7 +2785,7 @@ out2:
        return err ? err : allocated;
 }
 
-void ext4_ext_truncate(struct inode * inode, struct page *page)
+void ext4_ext_truncate(struct inode *inode)
 {
        struct address_space *mapping = inode->i_mapping;
        struct super_block *sb = inode->i_sb;
@@ -2757,18 +2798,14 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
         */
        err = ext4_writepage_trans_blocks(inode) + 3;
        handle = ext4_journal_start(inode, err);
-       if (IS_ERR(handle)) {
-               if (page) {
-                       clear_highpage(page);
-                       flush_dcache_page(page);
-                       unlock_page(page);
-                       page_cache_release(page);
-               }
+       if (IS_ERR(handle))
                return;
-       }
 
-       if (page)
-               ext4_block_truncate_page(handle, page, mapping, inode->i_size);
+       if (inode->i_size & (sb->s_blocksize - 1))
+               ext4_block_truncate_page(handle, mapping, inode->i_size);
+
+       if (ext4_orphan_add(handle, inode))
+               goto out_stop;
 
        down_write(&EXT4_I(inode)->i_data_sem);
        ext4_ext_invalidate_cache(inode);
@@ -2780,8 +2817,6 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
         * Probably we need not scan at all,
         * because page truncation is enough.
         */
-       if (ext4_orphan_add(handle, inode))
-               goto out_stop;
 
        /* we have to know where to truncate from in crash case */
        EXT4_I(inode)->i_disksize = inode->i_size;
@@ -2798,6 +2833,7 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
                handle->h_sync = 1;
 
 out_stop:
+       up_write(&EXT4_I(inode)->i_data_sem);
        /*
         * If this was a simple ftruncate() and the file will remain alive,
         * then we need to clear up the orphan record which we created above.
@@ -2808,7 +2844,6 @@ out_stop:
        if (inode->i_nlink)
                ext4_orphan_del(handle, inode);
 
-       up_write(&EXT4_I(inode)->i_data_sem);
        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
        ext4_journal_stop(handle);
@@ -2911,7 +2946,7 @@ retry:
                }
                ret = ext4_get_blocks_wrap(handle, inode, block,
                                          max_blocks, &map_bh,
-                                         EXT4_CREATE_UNINITIALIZED_EXT, 0);
+                                         EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
                if (ret <= 0) {
 #ifdef EXT4FS_DEBUG
                        WARN_ON(ret <= 0);
index 4159be6366ab8c7fb39493827765544d818b0ecc..430eb7978db4c92f0503b92895a59000d8f97b3e 100644 (file)
@@ -123,6 +123,23 @@ force_commit:
        return ret;
 }
 
+static struct vm_operations_struct ext4_file_vm_ops = {
+       .fault          = filemap_fault,
+       .page_mkwrite   = ext4_page_mkwrite,
+};
+
+static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       struct address_space *mapping = file->f_mapping;
+
+       if (!mapping->a_ops->readpage)
+               return -ENOEXEC;
+       file_accessed(file);
+       vma->vm_ops = &ext4_file_vm_ops;
+       vma->vm_flags |= VM_CAN_NONLINEAR;
+       return 0;
+}
+
 const struct file_operations ext4_file_operations = {
        .llseek         = generic_file_llseek,
        .read           = do_sync_read,
@@ -133,7 +150,7 @@ const struct file_operations ext4_file_operations = {
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ext4_compat_ioctl,
 #endif
-       .mmap           = generic_file_mmap,
+       .mmap           = ext4_file_mmap,
        .open           = generic_file_open,
        .release        = ext4_release_file,
        .fsync          = ext4_sync_file,
@@ -144,6 +161,7 @@ const struct file_operations ext4_file_operations = {
 const struct inode_operations ext4_file_inode_operations = {
        .truncate       = ext4_truncate,
        .setattr        = ext4_setattr,
+       .getattr        = ext4_getattr,
 #ifdef CONFIG_EXT4DEV_FS_XATTR
        .setxattr       = generic_setxattr,
        .getxattr       = generic_getxattr,
index 1c8ba48d4f8d6014e5708b44706640e3e29a11e9..a45c3737ad31e69e9de6c98075ff98fabe3b0770 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/sched.h>
 #include <linux/writeback.h>
 #include <linux/jbd2.h>
+#include <linux/blkdev.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
 
@@ -45,6 +46,7 @@
 int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
 {
        struct inode *inode = dentry->d_inode;
+       journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
        int ret = 0;
 
        J_ASSERT(ext4_journal_current_handle() == NULL);
@@ -85,6 +87,8 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
                        .nr_to_write = 0, /* sys_fsync did this */
                };
                ret = sync_inode(inode, &wbc);
+               if (journal && (journal->j_flags & JBD2_BARRIER))
+                       blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
        }
 out:
        return ret;
index 7eb0604e7eea8f76973175fc3ee6b247a79171cb..c2c0a8d06d0e059ba949bcadd9081555ab7f298c 100644 (file)
@@ -13,7 +13,7 @@ extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
                                   struct ext4_group_desc *gdp);
 extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
                                       struct ext4_group_desc *gdp);
-struct buffer_head *read_block_bitmap(struct super_block *sb,
+struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
                                      ext4_group_t block_group);
 extern unsigned ext4_init_block_bitmap(struct super_block *sb,
                                       struct buffer_head *bh,
index c6efbab0c80187942dfcedc867d514c76badcefc..a92eb305344fe2fd299cd3c15928a033a1374d10 100644 (file)
@@ -157,6 +157,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
        struct ext4_super_block * es;
        struct ext4_sb_info *sbi;
        int fatal = 0, err;
+       ext4_group_t flex_group;
 
        if (atomic_read(&inode->i_count) > 1) {
                printk ("ext4_free_inode: inode has count=%d\n",
@@ -232,6 +233,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
                        if (is_directory)
                                percpu_counter_dec(&sbi->s_dirs_counter);
 
+                       if (sbi->s_log_groups_per_flex) {
+                               flex_group = ext4_flex_group(sbi, block_group);
+                               spin_lock(sb_bgl_lock(sbi, flex_group));
+                               sbi->s_flex_groups[flex_group].free_inodes++;
+                               spin_unlock(sb_bgl_lock(sbi, flex_group));
+                       }
                }
                BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
                err = ext4_journal_dirty_metadata(handle, bh2);
@@ -286,6 +293,80 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,
        return ret;
 }
 
+#define free_block_ratio 10
+
+static int find_group_flex(struct super_block *sb, struct inode *parent,
+                          ext4_group_t *best_group)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_group_desc *desc;
+       struct buffer_head *bh;
+       struct flex_groups *flex_group = sbi->s_flex_groups;
+       ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
+       ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
+       ext4_group_t ngroups = sbi->s_groups_count;
+       int flex_size = ext4_flex_bg_size(sbi);
+       ext4_group_t best_flex = parent_fbg_group;
+       int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
+       int flexbg_free_blocks;
+       int flex_freeb_ratio;
+       ext4_group_t n_fbg_groups;
+       ext4_group_t i;
+
+       n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >>
+               sbi->s_log_groups_per_flex;
+
+find_close_to_parent:
+       flexbg_free_blocks = flex_group[best_flex].free_blocks;
+       flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
+       if (flex_group[best_flex].free_inodes &&
+           flex_freeb_ratio > free_block_ratio)
+               goto found_flexbg;
+
+       if (best_flex && best_flex == parent_fbg_group) {
+               best_flex--;
+               goto find_close_to_parent;
+       }
+
+       for (i = 0; i < n_fbg_groups; i++) {
+               if (i == parent_fbg_group || i == parent_fbg_group - 1)
+                       continue;
+
+               flexbg_free_blocks = flex_group[i].free_blocks;
+               flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
+
+               if (flex_freeb_ratio > free_block_ratio &&
+                   flex_group[i].free_inodes) {
+                       best_flex = i;
+                       goto found_flexbg;
+               }
+
+               if (best_flex < 0 ||
+                   (flex_group[i].free_blocks >
+                    flex_group[best_flex].free_blocks &&
+                    flex_group[i].free_inodes))
+                       best_flex = i;
+       }
+
+       if (!flex_group[best_flex].free_inodes ||
+           !flex_group[best_flex].free_blocks)
+               return -1;
+
+found_flexbg:
+       for (i = best_flex * flex_size; i < ngroups &&
+                    i < (best_flex + 1) * flex_size; i++) {
+               desc = ext4_get_group_desc(sb, i, &bh);
+               if (le16_to_cpu(desc->bg_free_inodes_count)) {
+                       *best_group = i;
+                       goto out;
+               }
+       }
+
+       return -1;
+out:
+       return 0;
+}
+
 /*
  * Orlov's allocator for directories.
  *
@@ -501,6 +582,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
        struct inode *ret;
        ext4_group_t i;
        int free = 0;
+       ext4_group_t flex_group;
 
        /* Cannot create files in a deleted directory */
        if (!dir || !dir->i_nlink)
@@ -514,6 +596,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
 
        sbi = EXT4_SB(sb);
        es = sbi->s_es;
+
+       if (sbi->s_log_groups_per_flex) {
+               ret2 = find_group_flex(sb, dir, &group);
+               goto got_group;
+       }
+
        if (S_ISDIR(mode)) {
                if (test_opt (sb, OLDALLOC))
                        ret2 = find_group_dir(sb, dir, &group);
@@ -522,6 +610,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
        } else
                ret2 = find_group_other(sb, dir, &group);
 
+got_group:
        err = -ENOSPC;
        if (ret2 == -1)
                goto out;
@@ -600,7 +689,7 @@ got:
        /* We may have to initialize the block bitmap if it isn't already */
        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
            gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
-               struct buffer_head *block_bh = read_block_bitmap(sb, group);
+               struct buffer_head *block_bh = ext4_read_block_bitmap(sb, group);
 
                BUFFER_TRACE(block_bh, "get block bitmap access");
                err = ext4_journal_get_write_access(handle, block_bh);
@@ -676,6 +765,13 @@ got:
                percpu_counter_inc(&sbi->s_dirs_counter);
        sb->s_dirt = 1;
 
+       if (sbi->s_log_groups_per_flex) {
+               flex_group = ext4_flex_group(sbi, group);
+               spin_lock(sb_bgl_lock(sbi, flex_group));
+               sbi->s_flex_groups[flex_group].free_inodes--;
+               spin_unlock(sb_bgl_lock(sbi, flex_group));
+       }
+
        inode->i_uid = current->fsuid;
        if (test_opt (sb, GRPID))
                inode->i_gid = dir->i_gid;
@@ -740,14 +836,10 @@ got:
                goto fail_free_drop;
 
        if (test_opt(sb, EXTENTS)) {
-               /* set extent flag only for diretory, file and normal symlink*/
+               /* set extent flag only for directory, file and normal symlink*/
                if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
                        EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
                        ext4_ext_tree_init(handle, inode);
-                       err = ext4_update_incompat_feature(handle, sb,
-                                       EXT4_FEATURE_INCOMPAT_EXTENTS);
-                       if (err)
-                               goto fail_free_drop;
                }
        }
 
@@ -817,6 +909,14 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
        if (IS_ERR(inode))
                goto iget_failed;
 
+       /*
+        * If the orphans has i_nlinks > 0 then it should be able to be
+        * truncated, otherwise it won't be removed from the orphan list
+        * during processing and an infinite loop will result.
+        */
+       if (inode->i_nlink && !ext4_can_truncate(inode))
+               goto bad_orphan;
+
        if (NEXT_ORPHAN(inode) > max_ino)
                goto bad_orphan;
        brelse(bitmap_bh);
@@ -838,6 +938,7 @@ bad_orphan:
                printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
                       NEXT_ORPHAN(inode));
                printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+               printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
                /* Avoid freeing blocks if we got a bad deleted inode */
                if (inode->i_nlink == 0)
                        inode->i_blocks = 0;
index 8d9707746413d90bc5bf019ea6e2dacad91842c2..8ca2763df091051fea3e02ae7bba35a1e82e21d9 100644 (file)
 #include <linux/string.h>
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
+#include <linux/pagevec.h>
 #include <linux/mpage.h>
 #include <linux/uio.h>
 #include <linux/bio.h>
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
+#include "ext4_extents.h"
+
+static inline int ext4_begin_ordered_truncate(struct inode *inode,
+                                             loff_t new_size)
+{
+       return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode,
+                                                  new_size);
+}
+
+static void ext4_invalidatepage(struct page *page, unsigned long offset);
 
 /*
  * Test whether an inode is a fast symlink.
@@ -181,6 +192,8 @@ void ext4_delete_inode (struct inode * inode)
 {
        handle_t *handle;
 
+       if (ext4_should_order_data(inode))
+               ext4_begin_ordered_truncate(inode, 0);
        truncate_inode_pages(&inode->i_data, 0);
 
        if (is_bad_inode(inode))
@@ -508,11 +521,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
  *             direct blocks
  */
 static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
-                       ext4_fsblk_t goal, int indirect_blks, int blks,
-                       ext4_fsblk_t new_blocks[4], int *err)
+                               ext4_lblk_t iblock, ext4_fsblk_t goal,
+                               int indirect_blks, int blks,
+                               ext4_fsblk_t new_blocks[4], int *err)
 {
        int target, i;
-       unsigned long count = 0;
+       unsigned long count = 0, blk_allocated = 0;
        int index = 0;
        ext4_fsblk_t current_block = 0;
        int ret = 0;
@@ -525,12 +539,13 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
         * the first direct block of this branch.  That's the
         * minimum number of blocks need to allocate(required)
         */
-       target = blks + indirect_blks;
-
-       while (1) {
+       /* first we try to allocate the indirect blocks */
+       target = indirect_blks;
+       while (target > 0) {
                count = target;
                /* allocating blocks for indirect blocks and direct blocks */
-               current_block = ext4_new_blocks(handle,inode,goal,&count,err);
+               current_block = ext4_new_meta_blocks(handle, inode,
+                                                       goal, &count, err);
                if (*err)
                        goto failed_out;
 
@@ -540,16 +555,48 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
                        new_blocks[index++] = current_block++;
                        count--;
                }
-
-               if (count > 0)
+               if (count > 0) {
+                       /*
+                        * save the new block number
+                        * for the first direct block
+                        */
+                       new_blocks[index] = current_block;
+                       printk(KERN_INFO "%s returned more blocks than "
+                                               "requested\n", __func__);
+                       WARN_ON(1);
                        break;
+               }
        }
 
-       /* save the new block number for the first direct block */
-       new_blocks[index] = current_block;
-
+       target = blks - count ;
+       blk_allocated = count;
+       if (!target)
+               goto allocated;
+       /* Now allocate data blocks */
+       count = target;
+       /* allocating blocks for data blocks */
+       current_block = ext4_new_blocks(handle, inode, iblock,
+                                               goal, &count, err);
+       if (*err && (target == blks)) {
+               /*
+                * if the allocation failed and we didn't allocate
+                * any blocks before
+                */
+               goto failed_out;
+       }
+       if (!*err) {
+               if (target == blks) {
+               /*
+                * save the new block number
+                * for the first direct block
+                */
+                       new_blocks[index] = current_block;
+               }
+               blk_allocated += count;
+       }
+allocated:
        /* total number of blocks allocated for direct blocks */
-       ret = count;
+       ret = blk_allocated;
        *err = 0;
        return ret;
 failed_out:
@@ -584,8 +631,9 @@ failed_out:
  *     as described above and return 0.
  */
 static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
-                       int indirect_blks, int *blks, ext4_fsblk_t goal,
-                       ext4_lblk_t *offsets, Indirect *branch)
+                               ext4_lblk_t iblock, int indirect_blks,
+                               int *blks, ext4_fsblk_t goal,
+                               ext4_lblk_t *offsets, Indirect *branch)
 {
        int blocksize = inode->i_sb->s_blocksize;
        int i, n = 0;
@@ -595,7 +643,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
        ext4_fsblk_t new_blocks[4];
        ext4_fsblk_t current_block;
 
-       num = ext4_alloc_blocks(handle, inode, goal, indirect_blks,
+       num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks,
                                *blks, new_blocks, &err);
        if (err)
                return err;
@@ -799,6 +847,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
        struct ext4_inode_info *ei = EXT4_I(inode);
        int count = 0;
        ext4_fsblk_t first_block = 0;
+       loff_t disksize;
 
 
        J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
@@ -855,8 +904,9 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
        /*
         * Block out ext4_truncate while we alter the tree
         */
-       err = ext4_alloc_branch(handle, inode, indirect_blks, &count, goal,
-                               offsets + (partial - chain), partial);
+       err = ext4_alloc_branch(handle, inode, iblock, indirect_blks,
+                                       &count, goal,
+                                       offsets + (partial - chain), partial);
 
        /*
         * The ext4_splice_branch call will free and forget any buffers
@@ -873,8 +923,13 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
         * protect it if you're about to implement concurrent
         * ext4_get_block() -bzzz
        */
-       if (!err && extend_disksize && inode->i_size > ei->i_disksize)
-               ei->i_disksize = inode->i_size;
+       if (!err && extend_disksize) {
+               disksize = ((loff_t) iblock + count) << inode->i_blkbits;
+               if (disksize > i_size_read(inode))
+                       disksize = i_size_read(inode);
+               if (disksize > ei->i_disksize)
+                       ei->i_disksize = disksize;
+       }
        if (err)
                goto cleanup;
 
@@ -934,7 +989,7 @@ out:
  */
 int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
                        unsigned long max_blocks, struct buffer_head *bh,
-                       int create, int extend_disksize)
+                       int create, int extend_disksize, int flag)
 {
        int retval;
 
@@ -975,6 +1030,15 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
         * with create == 1 flag.
         */
        down_write((&EXT4_I(inode)->i_data_sem));
+
+       /*
+        * if the caller is from delayed allocation writeout path
+        * we have already reserved fs blocks for allocation
+        * let the underlying get_block() function know to
+        * avoid double accounting
+        */
+       if (flag)
+               EXT4_I(inode)->i_delalloc_reserved_flag = 1;
        /*
         * We need to check for EXT4 here because migrate
         * could have changed the inode type in between
@@ -996,6 +1060,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
                                                        ~EXT4_EXT_MIGRATE;
                }
        }
+
+       if (flag) {
+               EXT4_I(inode)->i_delalloc_reserved_flag = 0;
+               /*
+                * Update reserved blocks/metadata blocks
+                * after successful block allocation
+                * which were deferred till now
+                */
+               if ((retval > 0) && buffer_delay(bh))
+                       ext4_da_release_space(inode, retval, 0);
+       }
+
        up_write((&EXT4_I(inode)->i_data_sem));
        return retval;
 }
@@ -1021,7 +1097,7 @@ static int ext4_get_block(struct inode *inode, sector_t iblock,
        }
 
        ret = ext4_get_blocks_wrap(handle, inode, iblock,
-                                       max_blocks, bh_result, create, 0);
+                                       max_blocks, bh_result, create, 0, 0);
        if (ret > 0) {
                bh_result->b_size = (ret << inode->i_blkbits);
                ret = 0;
@@ -1047,7 +1123,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
        dummy.b_blocknr = -1000;
        buffer_trace_init(&dummy.b_history);
        err = ext4_get_blocks_wrap(handle, inode, block, 1,
-                                       &dummy, create, 1);
+                                       &dummy, create, 1, 0);
        /*
         * ext4_get_blocks_handle() returns number of blocks
         * mapped. 0 in case of a HOLE.
@@ -1203,19 +1279,20 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
        to = from + len;
 
 retry:
-       page = __grab_cache_page(mapping, index);
-       if (!page)
-               return -ENOMEM;
-       *pagep = page;
-
        handle = ext4_journal_start(inode, needed_blocks);
        if (IS_ERR(handle)) {
-               unlock_page(page);
-               page_cache_release(page);
                ret = PTR_ERR(handle);
                goto out;
        }
 
+       page = __grab_cache_page(mapping, index);
+       if (!page) {
+               ext4_journal_stop(handle);
+               ret = -ENOMEM;
+               goto out;
+       }
+       *pagep = page;
+
        ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
                                                        ext4_get_block);
 
@@ -1225,8 +1302,8 @@ retry:
        }
 
        if (ret) {
-               ext4_journal_stop(handle);
                unlock_page(page);
+               ext4_journal_stop(handle);
                page_cache_release(page);
        }
 
@@ -1236,15 +1313,6 @@ out:
        return ret;
 }
 
-int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
-{
-       int err = jbd2_journal_dirty_data(handle, bh);
-       if (err)
-               ext4_journal_abort_handle(__func__, __func__,
-                                               bh, handle, err);
-       return err;
-}
-
 /* For write_end() in data=journal mode */
 static int write_end_fn(handle_t *handle, struct buffer_head *bh)
 {
@@ -1254,29 +1322,6 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
        return ext4_journal_dirty_metadata(handle, bh);
 }
 
-/*
- * Generic write_end handler for ordered and writeback ext4 journal modes.
- * We can't use generic_write_end, because that unlocks the page and we need to
- * unlock the page after ext4_journal_stop, but ext4_journal_stop must run
- * after block_write_end.
- */
-static int ext4_generic_write_end(struct file *file,
-                               struct address_space *mapping,
-                               loff_t pos, unsigned len, unsigned copied,
-                               struct page *page, void *fsdata)
-{
-       struct inode *inode = file->f_mapping->host;
-
-       copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
-
-       if (pos+copied > inode->i_size) {
-               i_size_write(inode, pos+copied);
-               mark_inode_dirty(inode);
-       }
-
-       return copied;
-}
-
 /*
  * We need to pick up the new inode size which generic_commit_write gave us
  * `file' can be NULL - eg, when called from page_symlink().
@@ -1290,15 +1335,14 @@ static int ext4_ordered_write_end(struct file *file,
                                struct page *page, void *fsdata)
 {
        handle_t *handle = ext4_journal_current_handle();
-       struct inode *inode = file->f_mapping->host;
+       struct inode *inode = mapping->host;
        unsigned from, to;
        int ret = 0, ret2;
 
        from = pos & (PAGE_CACHE_SIZE - 1);
        to = from + len;
 
-       ret = walk_page_buffers(handle, page_buffers(page),
-               from, to, NULL, ext4_journal_dirty_data);
+       ret = ext4_jbd2_file_inode(handle, inode);
 
        if (ret == 0) {
                /*
@@ -1311,7 +1355,7 @@ static int ext4_ordered_write_end(struct file *file,
                new_i_size = pos + copied;
                if (new_i_size > EXT4_I(inode)->i_disksize)
                        EXT4_I(inode)->i_disksize = new_i_size;
-               ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
+               ret2 = generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
                copied = ret2;
                if (ret2 < 0)
@@ -1320,84 +1364,1031 @@ static int ext4_ordered_write_end(struct file *file,
        ret2 = ext4_journal_stop(handle);
        if (!ret)
                ret = ret2;
+
+       return ret ? ret : copied;
+}
+
+static int ext4_writeback_write_end(struct file *file,
+                               struct address_space *mapping,
+                               loff_t pos, unsigned len, unsigned copied,
+                               struct page *page, void *fsdata)
+{
+       handle_t *handle = ext4_journal_current_handle();
+       struct inode *inode = mapping->host;
+       int ret = 0, ret2;
+       loff_t new_i_size;
+
+       new_i_size = pos + copied;
+       if (new_i_size > EXT4_I(inode)->i_disksize)
+               EXT4_I(inode)->i_disksize = new_i_size;
+
+       ret2 = generic_write_end(file, mapping, pos, len, copied,
+                                                       page, fsdata);
+       copied = ret2;
+       if (ret2 < 0)
+               ret = ret2;
+
+       ret2 = ext4_journal_stop(handle);
+       if (!ret)
+               ret = ret2;
+
+       return ret ? ret : copied;
+}
+
+static int ext4_journalled_write_end(struct file *file,
+                               struct address_space *mapping,
+                               loff_t pos, unsigned len, unsigned copied,
+                               struct page *page, void *fsdata)
+{
+       handle_t *handle = ext4_journal_current_handle();
+       struct inode *inode = mapping->host;
+       int ret = 0, ret2;
+       int partial = 0;
+       unsigned from, to;
+
+       from = pos & (PAGE_CACHE_SIZE - 1);
+       to = from + len;
+
+       if (copied < len) {
+               if (!PageUptodate(page))
+                       copied = 0;
+               page_zero_new_buffers(page, from+copied, to);
+       }
+
+       ret = walk_page_buffers(handle, page_buffers(page), from,
+                               to, &partial, write_end_fn);
+       if (!partial)
+               SetPageUptodate(page);
+       if (pos+copied > inode->i_size)
+               i_size_write(inode, pos+copied);
+       EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
+       if (inode->i_size > EXT4_I(inode)->i_disksize) {
+               EXT4_I(inode)->i_disksize = inode->i_size;
+               ret2 = ext4_mark_inode_dirty(handle, inode);
+               if (!ret)
+                       ret = ret2;
+       }
+
        unlock_page(page);
+       ret2 = ext4_journal_stop(handle);
+       if (!ret)
+               ret = ret2;
        page_cache_release(page);
 
-       return ret ? ret : copied;
+       return ret ? ret : copied;
+}
+/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate @blocks for non extent file based file
+ */
+static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
+{
+       int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+       int ind_blks, dind_blks, tind_blks;
+
+       /* number of new indirect blocks needed */
+       ind_blks = (blocks + icap - 1) / icap;
+
+       dind_blks = (ind_blks + icap - 1) / icap;
+
+       tind_blks = 1;
+
+       return ind_blks + dind_blks + tind_blks;
+}
+
+/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate given number of blocks
+ */
+static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
+{
+       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+               return ext4_ext_calc_metadata_amount(inode, blocks);
+
+       return ext4_indirect_calc_metadata_amount(inode, blocks);
+}
+
+static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       unsigned long md_needed, mdblocks, total = 0;
+
+       /*
+        * recalculate the amount of metadata blocks to reserve
+        * in order to allocate nrblocks
+        * worse case is one extent per block
+        */
+       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+       total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
+       mdblocks = ext4_calc_metadata_amount(inode, total);
+       BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
+
+       md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
+       total = md_needed + nrblocks;
+
+       if (ext4_has_free_blocks(sbi, total) < total) {
+               spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+               return -ENOSPC;
+       }
+
+       /* reduce fs free blocks counter */
+       percpu_counter_sub(&sbi->s_freeblocks_counter, total);
+
+       EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
+       EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
+
+       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+       return 0;       /* success */
+}
+
+void ext4_da_release_space(struct inode *inode, int used, int to_free)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       int total, mdb, mdb_free, release;
+
+       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+       /* recalculate the number of metablocks still need to be reserved */
+       total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free;
+       mdb = ext4_calc_metadata_amount(inode, total);
+
+       /* figure out how many metablocks to release */
+       BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+       mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
+
+       /* Account for allocated meta_blocks */
+       mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+
+       release = to_free + mdb_free;
+
+       /* update fs free blocks counter for truncate case */
+       percpu_counter_add(&sbi->s_freeblocks_counter, release);
+
+       /* update per-inode reservations */
+       BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks);
+       EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free);
+
+       BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+       EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+       EXT4_I(inode)->i_allocated_meta_blocks = 0;
+       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+}
+
+static void ext4_da_page_release_reservation(struct page *page,
+                                               unsigned long offset)
+{
+       int to_release = 0;
+       struct buffer_head *head, *bh;
+       unsigned int curr_off = 0;
+
+       head = page_buffers(page);
+       bh = head;
+       do {
+               unsigned int next_off = curr_off + bh->b_size;
+
+               if ((offset <= curr_off) && (buffer_delay(bh))) {
+                       to_release++;
+                       clear_buffer_delay(bh);
+               }
+               curr_off = next_off;
+       } while ((bh = bh->b_this_page) != head);
+       ext4_da_release_space(page->mapping->host, 0, to_release);
+}
+
+/*
+ * Delayed allocation stuff
+ */
+
+struct mpage_da_data {
+       struct inode *inode;
+       struct buffer_head lbh;                 /* extent of blocks */
+       unsigned long first_page, next_page;    /* extent of pages */
+       get_block_t *get_block;
+       struct writeback_control *wbc;
+};
+
+/*
+ * mpage_da_submit_io - walks through extent of pages and try to write
+ * them with __mpage_writepage()
+ *
+ * @mpd->inode: inode
+ * @mpd->first_page: first page of the extent
+ * @mpd->next_page: page after the last page of the extent
+ * @mpd->get_block: the filesystem's block mapper function
+ *
+ * By the time mpage_da_submit_io() is called we expect all blocks
+ * to be allocated. this may be wrong if allocation failed.
+ *
+ * As pages are already locked by write_cache_pages(), we can't use it
+ */
+static int mpage_da_submit_io(struct mpage_da_data *mpd)
+{
+       struct address_space *mapping = mpd->inode->i_mapping;
+       struct mpage_data mpd_pp = {
+               .bio = NULL,
+               .last_block_in_bio = 0,
+               .get_block = mpd->get_block,
+               .use_writepage = 1,
+       };
+       int ret = 0, err, nr_pages, i;
+       unsigned long index, end;
+       struct pagevec pvec;
+
+       BUG_ON(mpd->next_page <= mpd->first_page);
+
+       pagevec_init(&pvec, 0);
+       index = mpd->first_page;
+       end = mpd->next_page - 1;
+
+       while (index <= end) {
+               /* XXX: optimize tail */
+               nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
+               if (nr_pages == 0)
+                       break;
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+
+                       index = page->index;
+                       if (index > end)
+                               break;
+                       index++;
+
+                       err = __mpage_writepage(page, mpd->wbc, &mpd_pp);
+
+                       /*
+                        * In error case, we have to continue because
+                        * remaining pages are still locked
+                        * XXX: unlock and re-dirty them?
+                        */
+                       if (ret == 0)
+                               ret = err;
+               }
+               pagevec_release(&pvec);
+       }
+       if (mpd_pp.bio)
+               mpage_bio_submit(WRITE, mpd_pp.bio);
+
+       return ret;
+}
+
+/*
+ * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
+ *
+ * @mpd->inode - inode to walk through
+ * @exbh->b_blocknr - first block on a disk
+ * @exbh->b_size - amount of space in bytes
+ * @logical - first logical block to start assignment with
+ *
+ * the function goes through all passed space and put actual disk
+ * block numbers into buffer heads, dropping BH_Delay
+ */
+static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
+                                struct buffer_head *exbh)
+{
+       struct inode *inode = mpd->inode;
+       struct address_space *mapping = inode->i_mapping;
+       int blocks = exbh->b_size >> inode->i_blkbits;
+       sector_t pblock = exbh->b_blocknr, cur_logical;
+       struct buffer_head *head, *bh;
+       unsigned long index, end;
+       struct pagevec pvec;
+       int nr_pages, i;
+
+       index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+       pagevec_init(&pvec, 0);
+
+       while (index <= end) {
+               /* XXX: optimize tail */
+               nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
+               if (nr_pages == 0)
+                       break;
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+
+                       index = page->index;
+                       if (index > end)
+                               break;
+                       index++;
+
+                       BUG_ON(!PageLocked(page));
+                       BUG_ON(PageWriteback(page));
+                       BUG_ON(!page_has_buffers(page));
+
+                       bh = page_buffers(page);
+                       head = bh;
+
+                       /* skip blocks out of the range */
+                       do {
+                               if (cur_logical >= logical)
+                                       break;
+                               cur_logical++;
+                       } while ((bh = bh->b_this_page) != head);
+
+                       do {
+                               if (cur_logical >= logical + blocks)
+                                       break;
+                               if (buffer_delay(bh)) {
+                                       bh->b_blocknr = pblock;
+                                       clear_buffer_delay(bh);
+                               } else if (buffer_mapped(bh))
+                                       BUG_ON(bh->b_blocknr != pblock);
+
+                               cur_logical++;
+                               pblock++;
+                       } while ((bh = bh->b_this_page) != head);
+               }
+               pagevec_release(&pvec);
+       }
+}
+
+
+/*
+ * __unmap_underlying_blocks - just a helper function to unmap
+ * set of blocks described by @bh
+ */
+static inline void __unmap_underlying_blocks(struct inode *inode,
+                                            struct buffer_head *bh)
+{
+       struct block_device *bdev = inode->i_sb->s_bdev;
+       int blocks, i;
+
+       blocks = bh->b_size >> inode->i_blkbits;
+       for (i = 0; i < blocks; i++)
+               unmap_underlying_metadata(bdev, bh->b_blocknr + i);
+}
+
+/*
+ * mpage_da_map_blocks - go through given space
+ *
+ * @mpd->lbh - bh describing space
+ * @mpd->get_block - the filesystem's block mapper function
+ *
+ * The function skips space we know is already mapped to disk blocks.
+ *
+ * The function ignores errors ->get_block() returns, thus real
+ * error handling is postponed to __mpage_writepage()
+ */
+static void mpage_da_map_blocks(struct mpage_da_data *mpd)
+{
+       struct buffer_head *lbh = &mpd->lbh;
+       int err = 0, remain = lbh->b_size;
+       sector_t next = lbh->b_blocknr;
+       struct buffer_head new;
+
+       /*
+        * We consider only non-mapped and non-allocated blocks
+        */
+       if (buffer_mapped(lbh) && !buffer_delay(lbh))
+               return;
+
+       while (remain) {
+               new.b_state = lbh->b_state;
+               new.b_blocknr = 0;
+               new.b_size = remain;
+               err = mpd->get_block(mpd->inode, next, &new, 1);
+               if (err) {
+                       /*
+                        * Rather than implement own error handling
+                        * here, we just leave remaining blocks
+                        * unallocated and try again with ->writepage()
+                        */
+                       break;
+               }
+               BUG_ON(new.b_size == 0);
+
+               if (buffer_new(&new))
+                       __unmap_underlying_blocks(mpd->inode, &new);
+
+               /*
+                * If blocks are delayed marked, we need to
+                * put actual blocknr and drop delayed bit
+                */
+               if (buffer_delay(lbh))
+                       mpage_put_bnr_to_bhs(mpd, next, &new);
+
+               /* go for the remaining blocks */
+               next += new.b_size >> mpd->inode->i_blkbits;
+               remain -= new.b_size;
+       }
+}
+
+#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay))
+
+/*
+ * mpage_add_bh_to_extent - try to add one more block to extent of blocks
+ *
+ * @mpd->lbh - extent of blocks
+ * @logical - logical number of the block in the file
+ * @bh - bh of the block (used to access block's state)
+ *
+ * the function is used to collect contig. blocks in same state
+ */
+static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
+                                  sector_t logical, struct buffer_head *bh)
+{
+       struct buffer_head *lbh = &mpd->lbh;
+       sector_t next;
+
+       next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits);
+
+       /*
+        * First block in the extent
+        */
+       if (lbh->b_size == 0) {
+               lbh->b_blocknr = logical;
+               lbh->b_size = bh->b_size;
+               lbh->b_state = bh->b_state & BH_FLAGS;
+               return;
+       }
+
+       /*
+        * Can we merge the block to our big extent?
+        */
+       if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
+               lbh->b_size += bh->b_size;
+               return;
+       }
+
+       /*
+        * We couldn't merge the block to our extent, so we
+        * need to flush current  extent and start new one
+        */
+       mpage_da_map_blocks(mpd);
+
+       /*
+        * Now start a new extent
+        */
+       lbh->b_size = bh->b_size;
+       lbh->b_state = bh->b_state & BH_FLAGS;
+       lbh->b_blocknr = logical;
+}
+
+/*
+ * __mpage_da_writepage - finds extent of pages and blocks
+ *
+ * @page: page to consider
+ * @wbc: not used, we just follow rules
+ * @data: context
+ *
+ * The function finds extents of pages and scan them for all blocks.
+ */
+static int __mpage_da_writepage(struct page *page,
+                               struct writeback_control *wbc, void *data)
+{
+       struct mpage_da_data *mpd = data;
+       struct inode *inode = mpd->inode;
+       struct buffer_head *bh, *head, fake;
+       sector_t logical;
+
+       /*
+        * Can we merge this page to current extent?
+        */
+       if (mpd->next_page != page->index) {
+               /*
+                * Nope, we can't. So, we map non-allocated blocks
+                * and start IO on them using __mpage_writepage()
+                */
+               if (mpd->next_page != mpd->first_page) {
+                       mpage_da_map_blocks(mpd);
+                       mpage_da_submit_io(mpd);
+               }
+
+               /*
+                * Start next extent of pages ...
+                */
+               mpd->first_page = page->index;
+
+               /*
+                * ... and blocks
+                */
+               mpd->lbh.b_size = 0;
+               mpd->lbh.b_state = 0;
+               mpd->lbh.b_blocknr = 0;
+       }
+
+       mpd->next_page = page->index + 1;
+       logical = (sector_t) page->index <<
+                 (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+       if (!page_has_buffers(page)) {
+               /*
+                * There is no attached buffer heads yet (mmap?)
+                * we treat the page asfull of dirty blocks
+                */
+               bh = &fake;
+               bh->b_size = PAGE_CACHE_SIZE;
+               bh->b_state = 0;
+               set_buffer_dirty(bh);
+               set_buffer_uptodate(bh);
+               mpage_add_bh_to_extent(mpd, logical, bh);
+       } else {
+               /*
+                * Page with regular buffer heads, just add all dirty ones
+                */
+               head = page_buffers(page);
+               bh = head;
+               do {
+                       BUG_ON(buffer_locked(bh));
+                       if (buffer_dirty(bh))
+                               mpage_add_bh_to_extent(mpd, logical, bh);
+                       logical++;
+               } while ((bh = bh->b_this_page) != head);
+       }
+
+       return 0;
+}
+
+/*
+ * mpage_da_writepages - walk the list of dirty pages of the given
+ * address space, allocates non-allocated blocks, maps newly-allocated
+ * blocks to existing bhs and issue IO them
+ *
+ * @mapping: address space structure to write
+ * @wbc: subtract the number of written pages from *@wbc->nr_to_write
+ * @get_block: the filesystem's block mapper function.
+ *
+ * This is a library function, which implements the writepages()
+ * address_space_operation.
+ *
+ * In order to avoid duplication of logic that deals with partial pages,
+ * multiple bio per page, etc, we find non-allocated blocks, allocate
+ * them with minimal calls to ->get_block() and re-use __mpage_writepage()
+ *
+ * It's important that we call __mpage_writepage() only once for each
+ * involved page, otherwise we'd have to implement more complicated logic
+ * to deal with pages w/o PG_lock or w/ PG_writeback and so on.
+ *
+ * See comments to mpage_writepages()
+ */
+static int mpage_da_writepages(struct address_space *mapping,
+                              struct writeback_control *wbc,
+                              get_block_t get_block)
+{
+       struct mpage_da_data mpd;
+       int ret;
+
+       if (!get_block)
+               return generic_writepages(mapping, wbc);
+
+       mpd.wbc = wbc;
+       mpd.inode = mapping->host;
+       mpd.lbh.b_size = 0;
+       mpd.lbh.b_state = 0;
+       mpd.lbh.b_blocknr = 0;
+       mpd.first_page = 0;
+       mpd.next_page = 0;
+       mpd.get_block = get_block;
+
+       ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
+
+       /*
+        * Handle last extent of pages
+        */
+       if (mpd.next_page != mpd.first_page) {
+               mpage_da_map_blocks(&mpd);
+               mpage_da_submit_io(&mpd);
+       }
+
+       return ret;
+}
+
+/*
+ * this is a special callback for ->write_begin() only
+ * it's intention is to return mapped block or reserve space
+ */
+static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
+                                 struct buffer_head *bh_result, int create)
+{
+       int ret = 0;
+
+       BUG_ON(create == 0);
+       BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
+
+       /*
+        * first, we need to know whether the block is allocated already
+        * preallocated blocks are unmapped but should treated
+        * the same as allocated blocks.
+        */
+       ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1,  bh_result, 0, 0, 0);
+       if ((ret == 0) && !buffer_delay(bh_result)) {
+               /* the block isn't (pre)allocated yet, let's reserve space */
+               /*
+                * XXX: __block_prepare_write() unmaps passed block,
+                * is it OK?
+                */
+               ret = ext4_da_reserve_space(inode, 1);
+               if (ret)
+                       /* not enough space to reserve */
+                       return ret;
+
+               map_bh(bh_result, inode->i_sb, 0);
+               set_buffer_new(bh_result);
+               set_buffer_delay(bh_result);
+       } else if (ret > 0) {
+               bh_result->b_size = (ret << inode->i_blkbits);
+               ret = 0;
+       }
+
+       return ret;
+}
+#define                EXT4_DELALLOC_RSVED     1
+static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
+                                  struct buffer_head *bh_result, int create)
+{
+       int ret;
+       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
+       loff_t disksize = EXT4_I(inode)->i_disksize;
+       handle_t *handle = NULL;
+
+       handle = ext4_journal_current_handle();
+       if (!handle) {
+               ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
+                                  bh_result, 0, 0, 0);
+               BUG_ON(!ret);
+       } else {
+               ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
+                                  bh_result, create, 0, EXT4_DELALLOC_RSVED);
+       }
+
+       if (ret > 0) {
+               bh_result->b_size = (ret << inode->i_blkbits);
+
+               /*
+                * Update on-disk size along with block allocation
+                * we don't use 'extend_disksize' as size may change
+                * within already allocated block -bzzz
+                */
+               disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
+               if (disksize > i_size_read(inode))
+                       disksize = i_size_read(inode);
+               if (disksize > EXT4_I(inode)->i_disksize) {
+                       /*
+                        * XXX: replace with spinlock if seen contended -bzzz
+                        */
+                       down_write(&EXT4_I(inode)->i_data_sem);
+                       if (disksize > EXT4_I(inode)->i_disksize)
+                               EXT4_I(inode)->i_disksize = disksize;
+                       up_write(&EXT4_I(inode)->i_data_sem);
+
+                       if (EXT4_I(inode)->i_disksize == disksize) {
+                               ret = ext4_mark_inode_dirty(handle, inode);
+                               return ret;
+                       }
+               }
+               ret = 0;
+       }
+       return ret;
+}
+
+static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
+{
+       /*
+        * unmapped buffer is possible for holes.
+        * delay buffer is possible with delayed allocation
+        */
+       return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh));
+}
+
+static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
+                                  struct buffer_head *bh_result, int create)
+{
+       int ret = 0;
+       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
+
+       /*
+        * we don't want to do block allocation in writepage
+        * so call get_block_wrap with create = 0
+        */
+       ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks,
+                                  bh_result, 0, 0, 0);
+       if (ret > 0) {
+               bh_result->b_size = (ret << inode->i_blkbits);
+               ret = 0;
+       }
+       return ret;
+}
+
+/*
+ * get called vi ext4_da_writepages after taking page lock (have journal handle)
+ * get called via journal_submit_inode_data_buffers (no journal handle)
+ * get called via shrink_page_list via pdflush (no journal handle)
+ * or grab_page_cache when doing write_begin (have journal handle)
+ */
+static int ext4_da_writepage(struct page *page,
+                               struct writeback_control *wbc)
+{
+       int ret = 0;
+       loff_t size;
+       unsigned long len;
+       struct buffer_head *page_bufs;
+       struct inode *inode = page->mapping->host;
+
+       size = i_size_read(inode);
+       if (page->index == size >> PAGE_CACHE_SHIFT)
+               len = size & ~PAGE_CACHE_MASK;
+       else
+               len = PAGE_CACHE_SIZE;
+
+       if (page_has_buffers(page)) {
+               page_bufs = page_buffers(page);
+               if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
+                                       ext4_bh_unmapped_or_delay)) {
+                       /*
+                        * We don't want to do  block allocation
+                        * So redirty the page and return
+                        * We may reach here when we do a journal commit
+                        * via journal_submit_inode_data_buffers.
+                        * If we don't have mapping block we just ignore
+                        * them. We can also reach here via shrink_page_list
+                        */
+                       redirty_page_for_writepage(wbc, page);
+                       unlock_page(page);
+                       return 0;
+               }
+       } else {
+               /*
+                * The test for page_has_buffers() is subtle:
+                * We know the page is dirty but it lost buffers. That means
+                * that at some moment in time after write_begin()/write_end()
+                * has been called all buffers have been clean and thus they
+                * must have been written at least once. So they are all
+                * mapped and we can happily proceed with mapping them
+                * and writing the page.
+                *
+                * Try to initialize the buffer_heads and check whether
+                * all are mapped and non delay. We don't want to
+                * do block allocation here.
+                */
+               ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
+                                               ext4_normal_get_block_write);
+               if (!ret) {
+                       page_bufs = page_buffers(page);
+                       /* check whether all are mapped and non delay */
+                       if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
+                                               ext4_bh_unmapped_or_delay)) {
+                               redirty_page_for_writepage(wbc, page);
+                               unlock_page(page);
+                               return 0;
+                       }
+               } else {
+                       /*
+                        * We can't do block allocation here
+                        * so just redity the page and unlock
+                        * and return
+                        */
+                       redirty_page_for_writepage(wbc, page);
+                       unlock_page(page);
+                       return 0;
+               }
+       }
+
+       if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
+               ret = nobh_writepage(page, ext4_normal_get_block_write, wbc);
+       else
+               ret = block_write_full_page(page,
+                                               ext4_normal_get_block_write,
+                                               wbc);
+
+       return ret;
+}
+
+/*
+ * For now just follow the DIO way to estimate the max credits
+ * needed to write out EXT4_MAX_WRITEBACK_PAGES.
+ * todo: need to calculate the max credits need for
+ * extent based files, currently the DIO credits is based on
+ * indirect-blocks mapping way.
+ *
+ * Probably should have a generic way to calculate credits
+ * for DIO, writepages, and truncate
+ */
+#define EXT4_MAX_WRITEBACK_PAGES      DIO_MAX_BLOCKS
+#define EXT4_MAX_WRITEBACK_CREDITS    DIO_CREDITS
+
+static int ext4_da_writepages(struct address_space *mapping,
+                               struct writeback_control *wbc)
+{
+       struct inode *inode = mapping->host;
+       handle_t *handle = NULL;
+       int needed_blocks;
+       int ret = 0;
+       long to_write;
+       loff_t range_start = 0;
+
+       /*
+        * No pages to write? This is mainly a kludge to avoid starting
+        * a transaction for special inodes like journal inode on last iput()
+        * because that could violate lock ordering on umount
+        */
+       if (!mapping->nrpages)
+               return 0;
+
+       /*
+        * Estimate the worse case needed credits to write out
+        * EXT4_MAX_BUF_BLOCKS pages
+        */
+       needed_blocks = EXT4_MAX_WRITEBACK_CREDITS;
+
+       to_write = wbc->nr_to_write;
+       if (!wbc->range_cyclic) {
+               /*
+                * If range_cyclic is not set force range_cont
+                * and save the old writeback_index
+                */
+               wbc->range_cont = 1;
+               range_start =  wbc->range_start;
+       }
+
+       while (!ret && to_write) {
+               /* start a new transaction*/
+               handle = ext4_journal_start(inode, needed_blocks);
+               if (IS_ERR(handle)) {
+                       ret = PTR_ERR(handle);
+                       goto out_writepages;
+               }
+               if (ext4_should_order_data(inode)) {
+                       /*
+                        * With ordered mode we need to add
+                        * the inode to the journal handle
+                        * when we do block allocation.
+                        */
+                       ret = ext4_jbd2_file_inode(handle, inode);
+                       if (ret) {
+                               ext4_journal_stop(handle);
+                               goto out_writepages;
+                       }
+
+               }
+               /*
+                * set the max dirty pages could be write at a time
+                * to fit into the reserved transaction credits
+                */
+               if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES)
+                       wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES;
+
+               to_write -= wbc->nr_to_write;
+               ret = mpage_da_writepages(mapping, wbc,
+                                               ext4_da_get_block_write);
+               ext4_journal_stop(handle);
+               if (wbc->nr_to_write) {
+                       /*
+                        * There is no more writeout needed
+                        * or we requested for a noblocking writeout
+                        * and we found the device congested
+                        */
+                       to_write += wbc->nr_to_write;
+                       break;
+               }
+               wbc->nr_to_write = to_write;
+       }
+
+out_writepages:
+       wbc->nr_to_write = to_write;
+       if (range_start)
+               wbc->range_start = range_start;
+       return ret;
+}
+
+static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
+                               loff_t pos, unsigned len, unsigned flags,
+                               struct page **pagep, void **fsdata)
+{
+       int ret, retries = 0;
+       struct page *page;
+       pgoff_t index;
+       unsigned from, to;
+       struct inode *inode = mapping->host;
+       handle_t *handle;
+
+       index = pos >> PAGE_CACHE_SHIFT;
+       from = pos & (PAGE_CACHE_SIZE - 1);
+       to = from + len;
+
+retry:
+       /*
+        * With delayed allocation, we don't log the i_disksize update
+        * if there is delayed block allocation. But we still need
+        * to journalling the i_disksize update if writes to the end
+        * of file which has an already mapped buffer.
+        */
+       handle = ext4_journal_start(inode, 1);
+       if (IS_ERR(handle)) {
+               ret = PTR_ERR(handle);
+               goto out;
+       }
+
+       page = __grab_cache_page(mapping, index);
+       if (!page)
+               return -ENOMEM;
+       *pagep = page;
+
+       ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+                                                       ext4_da_get_block_prep);
+       if (ret < 0) {
+               unlock_page(page);
+               ext4_journal_stop(handle);
+               page_cache_release(page);
+       }
+
+       if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+               goto retry;
+out:
+       return ret;
+}
+
+/*
+ * Check if we should update i_disksize
+ * when write to the end of file but not require block allocation
+ */
+static int ext4_da_should_update_i_disksize(struct page *page,
+                                        unsigned long offset)
+{
+       struct buffer_head *bh;
+       struct inode *inode = page->mapping->host;
+       unsigned int idx;
+       int i;
+
+       bh = page_buffers(page);
+       idx = offset >> inode->i_blkbits;
+
+       for (i=0; i < idx; i++)
+               bh = bh->b_this_page;
+
+       if (!buffer_mapped(bh) || (buffer_delay(bh)))
+               return 0;
+       return 1;
 }
 
-static int ext4_writeback_write_end(struct file *file,
+static int ext4_da_write_end(struct file *file,
                                struct address_space *mapping,
                                loff_t pos, unsigned len, unsigned copied,
                                struct page *page, void *fsdata)
 {
-       handle_t *handle = ext4_journal_current_handle();
-       struct inode *inode = file->f_mapping->host;
+       struct inode *inode = mapping->host;
        int ret = 0, ret2;
+       handle_t *handle = ext4_journal_current_handle();
        loff_t new_i_size;
+       unsigned long start, end;
+
+       start = pos & (PAGE_CACHE_SIZE - 1);
+       end = start + copied -1;
+
+       /*
+        * generic_write_end() will run mark_inode_dirty() if i_size
+        * changes.  So let's piggyback the i_disksize mark_inode_dirty
+        * into that.
+        */
 
        new_i_size = pos + copied;
-       if (new_i_size > EXT4_I(inode)->i_disksize)
-               EXT4_I(inode)->i_disksize = new_i_size;
+       if (new_i_size > EXT4_I(inode)->i_disksize) {
+               if (ext4_da_should_update_i_disksize(page, end)) {
+                       down_write(&EXT4_I(inode)->i_data_sem);
+                       if (new_i_size > EXT4_I(inode)->i_disksize) {
+                               /*
+                                * Updating i_disksize when extending file
+                                * without needing block allocation
+                                */
+                               if (ext4_should_order_data(inode))
+                                       ret = ext4_jbd2_file_inode(handle,
+                                                                  inode);
 
-       ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
+                               EXT4_I(inode)->i_disksize = new_i_size;
+                       }
+                       up_write(&EXT4_I(inode)->i_data_sem);
+               }
+       }
+       ret2 = generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
        copied = ret2;
        if (ret2 < 0)
                ret = ret2;
-
        ret2 = ext4_journal_stop(handle);
        if (!ret)
                ret = ret2;
-       unlock_page(page);
-       page_cache_release(page);
 
        return ret ? ret : copied;
 }
 
-static int ext4_journalled_write_end(struct file *file,
-                               struct address_space *mapping,
-                               loff_t pos, unsigned len, unsigned copied,
-                               struct page *page, void *fsdata)
+static void ext4_da_invalidatepage(struct page *page, unsigned long offset)
 {
-       handle_t *handle = ext4_journal_current_handle();
-       struct inode *inode = mapping->host;
-       int ret = 0, ret2;
-       int partial = 0;
-       unsigned from, to;
-
-       from = pos & (PAGE_CACHE_SIZE - 1);
-       to = from + len;
-
-       if (copied < len) {
-               if (!PageUptodate(page))
-                       copied = 0;
-               page_zero_new_buffers(page, from+copied, to);
-       }
+       /*
+        * Drop reserved blocks
+        */
+       BUG_ON(!PageLocked(page));
+       if (!page_has_buffers(page))
+               goto out;
 
-       ret = walk_page_buffers(handle, page_buffers(page), from,
-                               to, &partial, write_end_fn);
-       if (!partial)
-               SetPageUptodate(page);
-       if (pos+copied > inode->i_size)
-               i_size_write(inode, pos+copied);
-       EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
-       if (inode->i_size > EXT4_I(inode)->i_disksize) {
-               EXT4_I(inode)->i_disksize = inode->i_size;
-               ret2 = ext4_mark_inode_dirty(handle, inode);
-               if (!ret)
-                       ret = ret2;
-       }
+       ext4_da_page_release_reservation(page, offset);
 
-       ret2 = ext4_journal_stop(handle);
-       if (!ret)
-               ret = ret2;
-       unlock_page(page);
-       page_cache_release(page);
+out:
+       ext4_invalidatepage(page, offset);
 
-       return ret ? ret : copied;
+       return;
 }
 
+
 /*
  * bmap() is special.  It gets used by applications such as lilo and by
  * the swapper to find the on-disk block of a specific piece of data.
@@ -1418,6 +2409,16 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
        journal_t *journal;
        int err;
 
+       if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
+                       test_opt(inode->i_sb, DELALLOC)) {
+               /*
+                * With delalloc we want to sync the file
+                * so that we can make sure we allocate
+                * blocks for file
+                */
+               filemap_write_and_wait(mapping);
+       }
+
        if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
                /*
                 * This is a REALLY heavyweight approach, but the use of
@@ -1462,21 +2463,17 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
        return 0;
 }
 
-static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
-{
-       if (buffer_mapped(bh))
-               return ext4_journal_dirty_data(handle, bh);
-       return 0;
-}
-
 /*
- * Note that we always start a transaction even if we're not journalling
- * data.  This is to preserve ordering: any hole instantiation within
- * __block_write_full_page -> ext4_get_block() should be journalled
- * along with the data so we don't crash and then get metadata which
- * refers to old data.
+ * Note that we don't need to start a transaction unless we're journaling data
+ * because we should have holes filled from ext4_page_mkwrite(). We even don't
+ * need to file the inode to the transaction's list in ordered mode because if
+ * we are writing back data added by write(), the inode is already there and if
+ * we are writing back data modified via mmap(), noone guarantees in which
+ * transaction the data will hit the disk. In case we are journaling data, we
+ * cannot start transaction directly because transaction start ranks above page
+ * lock so we have to do some magic.
  *
- * In all journalling modes block_write_full_page() will start the I/O.
+ * In all journaling modes block_write_full_page() will start the I/O.
  *
  * Problem:
  *
@@ -1518,105 +2515,103 @@ static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
  * disastrous.  Any write() or metadata operation will sync the fs for
  * us.
  *
- * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
- * we don't need to open a transaction here.
  */
-static int ext4_ordered_writepage(struct page *page,
+static int __ext4_normal_writepage(struct page *page,
                                struct writeback_control *wbc)
 {
        struct inode *inode = page->mapping->host;
-       struct buffer_head *page_bufs;
-       handle_t *handle = NULL;
-       int ret = 0;
-       int err;
-
-       J_ASSERT(PageLocked(page));
 
-       /*
-        * We give up here if we're reentered, because it might be for a
-        * different filesystem.
-        */
-       if (ext4_journal_current_handle())
-               goto out_fail;
-
-       handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
+       if (test_opt(inode->i_sb, NOBH))
+               return nobh_writepage(page,
+                                       ext4_normal_get_block_write, wbc);
+       else
+               return block_write_full_page(page,
+                                               ext4_normal_get_block_write,
+                                               wbc);
+}
 
-       if (IS_ERR(handle)) {
-               ret = PTR_ERR(handle);
-               goto out_fail;
-       }
+static int ext4_normal_writepage(struct page *page,
+                               struct writeback_control *wbc)
+{
+       struct inode *inode = page->mapping->host;
+       loff_t size = i_size_read(inode);
+       loff_t len;
 
-       if (!page_has_buffers(page)) {
-               create_empty_buffers(page, inode->i_sb->s_blocksize,
-                               (1 << BH_Dirty)|(1 << BH_Uptodate));
+       J_ASSERT(PageLocked(page));
+       if (page->index == size >> PAGE_CACHE_SHIFT)
+               len = size & ~PAGE_CACHE_MASK;
+       else
+               len = PAGE_CACHE_SIZE;
+
+       if (page_has_buffers(page)) {
+               /* if page has buffers it should all be mapped
+                * and allocated. If there are not buffers attached
+                * to the page we know the page is dirty but it lost
+                * buffers. That means that at some moment in time
+                * after write_begin() / write_end() has been called
+                * all buffers have been clean and thus they must have been
+                * written at least once. So they are all mapped and we can
+                * happily proceed with mapping them and writing the page.
+                */
+               BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
+                                       ext4_bh_unmapped_or_delay));
        }
-       page_bufs = page_buffers(page);
-       walk_page_buffers(handle, page_bufs, 0,
-                       PAGE_CACHE_SIZE, NULL, bget_one);
-
-       ret = block_write_full_page(page, ext4_get_block, wbc);
 
-       /*
-        * The page can become unlocked at any point now, and
-        * truncate can then come in and change things.  So we
-        * can't touch *page from now on.  But *page_bufs is
-        * safe due to elevated refcount.
-        */
-
-       /*
-        * And attach them to the current transaction.  But only if
-        * block_write_full_page() succeeded.  Otherwise they are unmapped,
-        * and generally junk.
-        */
-       if (ret == 0) {
-               err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
-                                       NULL, jbd2_journal_dirty_data_fn);
-               if (!ret)
-                       ret = err;
-       }
-       walk_page_buffers(handle, page_bufs, 0,
-                       PAGE_CACHE_SIZE, NULL, bput_one);
-       err = ext4_journal_stop(handle);
-       if (!ret)
-               ret = err;
-       return ret;
+       if (!ext4_journal_current_handle())
+               return __ext4_normal_writepage(page, wbc);
 
-out_fail:
        redirty_page_for_writepage(wbc, page);
        unlock_page(page);
-       return ret;
+       return 0;
 }
 
-static int ext4_writeback_writepage(struct page *page,
+static int __ext4_journalled_writepage(struct page *page,
                                struct writeback_control *wbc)
 {
-       struct inode *inode = page->mapping->host;
+       struct address_space *mapping = page->mapping;
+       struct inode *inode = mapping->host;
+       struct buffer_head *page_bufs;
        handle_t *handle = NULL;
        int ret = 0;
        int err;
 
-       if (ext4_journal_current_handle())
-               goto out_fail;
+       ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
+                                       ext4_normal_get_block_write);
+       if (ret != 0)
+               goto out_unlock;
+
+       page_bufs = page_buffers(page);
+       walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL,
+                                                               bget_one);
+       /* As soon as we unlock the page, it can go away, but we have
+        * references to buffers so we are safe */
+       unlock_page(page);
 
        handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
-               goto out_fail;
+               goto out;
        }
 
-       if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
-               ret = nobh_writepage(page, ext4_get_block, wbc);
-       else
-               ret = block_write_full_page(page, ext4_get_block, wbc);
+       ret = walk_page_buffers(handle, page_bufs, 0,
+                       PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
 
+       err = walk_page_buffers(handle, page_bufs, 0,
+                               PAGE_CACHE_SIZE, NULL, write_end_fn);
+       if (ret == 0)
+               ret = err;
        err = ext4_journal_stop(handle);
        if (!ret)
                ret = err;
-       return ret;
 
-out_fail:
-       redirty_page_for_writepage(wbc, page);
+       walk_page_buffers(handle, page_bufs, 0,
+                               PAGE_CACHE_SIZE, NULL, bput_one);
+       EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
+       goto out;
+
+out_unlock:
        unlock_page(page);
+out:
        return ret;
 }
 
@@ -1624,59 +2619,53 @@ static int ext4_journalled_writepage(struct page *page,
                                struct writeback_control *wbc)
 {
        struct inode *inode = page->mapping->host;
-       handle_t *handle = NULL;
-       int ret = 0;
-       int err;
+       loff_t size = i_size_read(inode);
+       loff_t len;
 
-       if (ext4_journal_current_handle())
-               goto no_write;
+       J_ASSERT(PageLocked(page));
+       if (page->index == size >> PAGE_CACHE_SHIFT)
+               len = size & ~PAGE_CACHE_MASK;
+       else
+               len = PAGE_CACHE_SIZE;
+
+       if (page_has_buffers(page)) {
+               /* if page has buffers it should all be mapped
+                * and allocated. If there are not buffers attached
+                * to the page we know the page is dirty but it lost
+                * buffers. That means that at some moment in time
+                * after write_begin() / write_end() has been called
+                * all buffers have been clean and thus they must have been
+                * written at least once. So they are all mapped and we can
+                * happily proceed with mapping them and writing the page.
+                */
+               BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
+                                       ext4_bh_unmapped_or_delay));
+       }
 
-       handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
-       if (IS_ERR(handle)) {
-               ret = PTR_ERR(handle);
+       if (ext4_journal_current_handle())
                goto no_write;
-       }
 
-       if (!page_has_buffers(page) || PageChecked(page)) {
+       if (PageChecked(page)) {
                /*
                 * It's mmapped pagecache.  Add buffers and journal it.  There
                 * doesn't seem much point in redirtying the page here.
                 */
                ClearPageChecked(page);
-               ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
-                                       ext4_get_block);
-               if (ret != 0) {
-                       ext4_journal_stop(handle);
-                       goto out_unlock;
-               }
-               ret = walk_page_buffers(handle, page_buffers(page), 0,
-                       PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
-
-               err = walk_page_buffers(handle, page_buffers(page), 0,
-                               PAGE_CACHE_SIZE, NULL, write_end_fn);
-               if (ret == 0)
-                       ret = err;
-               EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
-               unlock_page(page);
+               return __ext4_journalled_writepage(page, wbc);
        } else {
                /*
                 * It may be a page full of checkpoint-mode buffers.  We don't
                 * really know unless we go poke around in the buffer_heads.
                 * But block_write_full_page will do the right thing.
                 */
-               ret = block_write_full_page(page, ext4_get_block, wbc);
+               return block_write_full_page(page,
+                                               ext4_normal_get_block_write,
+                                               wbc);
        }
-       err = ext4_journal_stop(handle);
-       if (!ret)
-               ret = err;
-out:
-       return ret;
-
 no_write:
        redirty_page_for_writepage(wbc, page);
-out_unlock:
        unlock_page(page);
-       goto out;
+       return 0;
 }
 
 static int ext4_readpage(struct file *file, struct page *page)
@@ -1819,7 +2808,7 @@ static int ext4_journalled_set_page_dirty(struct page *page)
 static const struct address_space_operations ext4_ordered_aops = {
        .readpage       = ext4_readpage,
        .readpages      = ext4_readpages,
-       .writepage      = ext4_ordered_writepage,
+       .writepage      = ext4_normal_writepage,
        .sync_page      = block_sync_page,
        .write_begin    = ext4_write_begin,
        .write_end      = ext4_ordered_write_end,
@@ -1833,7 +2822,7 @@ static const struct address_space_operations ext4_ordered_aops = {
 static const struct address_space_operations ext4_writeback_aops = {
        .readpage       = ext4_readpage,
        .readpages      = ext4_readpages,
-       .writepage      = ext4_writeback_writepage,
+       .writepage      = ext4_normal_writepage,
        .sync_page      = block_sync_page,
        .write_begin    = ext4_write_begin,
        .write_end      = ext4_writeback_write_end,
@@ -1857,10 +2846,31 @@ static const struct address_space_operations ext4_journalled_aops = {
        .releasepage    = ext4_releasepage,
 };
 
+static const struct address_space_operations ext4_da_aops = {
+       .readpage       = ext4_readpage,
+       .readpages      = ext4_readpages,
+       .writepage      = ext4_da_writepage,
+       .writepages     = ext4_da_writepages,
+       .sync_page      = block_sync_page,
+       .write_begin    = ext4_da_write_begin,
+       .write_end      = ext4_da_write_end,
+       .bmap           = ext4_bmap,
+       .invalidatepage = ext4_da_invalidatepage,
+       .releasepage    = ext4_releasepage,
+       .direct_IO      = ext4_direct_IO,
+       .migratepage    = buffer_migrate_page,
+};
+
 void ext4_set_aops(struct inode *inode)
 {
-       if (ext4_should_order_data(inode))
+       if (ext4_should_order_data(inode) &&
+               test_opt(inode->i_sb, DELALLOC))
+               inode->i_mapping->a_ops = &ext4_da_aops;
+       else if (ext4_should_order_data(inode))
                inode->i_mapping->a_ops = &ext4_ordered_aops;
+       else if (ext4_should_writeback_data(inode) &&
+                test_opt(inode->i_sb, DELALLOC))
+               inode->i_mapping->a_ops = &ext4_da_aops;
        else if (ext4_should_writeback_data(inode))
                inode->i_mapping->a_ops = &ext4_writeback_aops;
        else
@@ -1873,7 +2883,7 @@ void ext4_set_aops(struct inode *inode)
  * This required during truncate. We need to physically zero the tail end
  * of that block so it doesn't yield old data if the file is later grown.
  */
-int ext4_block_truncate_page(handle_t *handle, struct page *page,
+int ext4_block_truncate_page(handle_t *handle,
                struct address_space *mapping, loff_t from)
 {
        ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
@@ -1882,8 +2892,13 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
        ext4_lblk_t iblock;
        struct inode *inode = mapping->host;
        struct buffer_head *bh;
+       struct page *page;
        int err = 0;
 
+       page = grab_cache_page(mapping, from >> PAGE_CACHE_SHIFT);
+       if (!page)
+               return -EINVAL;
+
        blocksize = inode->i_sb->s_blocksize;
        length = blocksize - (offset & (blocksize - 1));
        iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
@@ -1956,7 +2971,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
                err = ext4_journal_dirty_metadata(handle, bh);
        } else {
                if (ext4_should_order_data(inode))
-                       err = ext4_journal_dirty_data(handle, bh);
+                       err = ext4_jbd2_file_inode(handle, inode);
                mark_buffer_dirty(bh);
        }
 
@@ -2179,7 +3194,21 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
 
        if (this_bh) {
                BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata");
-               ext4_journal_dirty_metadata(handle, this_bh);
+
+               /*
+                * The buffer head should have an attached journal head at this
+                * point. However, if the data is corrupted and an indirect
+                * block pointed to itself, it would have been detached when
+                * the block was cleared. Check for this instead of OOPSing.
+                */
+               if (bh2jh(this_bh))
+                       ext4_journal_dirty_metadata(handle, this_bh);
+               else
+                       ext4_error(inode->i_sb, __func__,
+                                  "circular indirect block detected, "
+                                  "inode=%lu, block=%llu",
+                                  inode->i_ino,
+                                  (unsigned long long) this_bh->b_blocknr);
        }
 }
 
@@ -2305,6 +3334,19 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
        }
 }
 
+int ext4_can_truncate(struct inode *inode)
+{
+       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+               return 0;
+       if (S_ISREG(inode->i_mode))
+               return 1;
+       if (S_ISDIR(inode->i_mode))
+               return 1;
+       if (S_ISLNK(inode->i_mode))
+               return !ext4_inode_is_fast_symlink(inode);
+       return 0;
+}
+
 /*
  * ext4_truncate()
  *
@@ -2347,51 +3389,25 @@ void ext4_truncate(struct inode *inode)
        int n;
        ext4_lblk_t last_block;
        unsigned blocksize = inode->i_sb->s_blocksize;
-       struct page *page;
 
-       if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-           S_ISLNK(inode->i_mode)))
-               return;
-       if (ext4_inode_is_fast_symlink(inode))
-               return;
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+       if (!ext4_can_truncate(inode))
                return;
 
-       /*
-        * We have to lock the EOF page here, because lock_page() nests
-        * outside jbd2_journal_start().
-        */
-       if ((inode->i_size & (blocksize - 1)) == 0) {
-               /* Block boundary? Nothing to do */
-               page = NULL;
-       } else {
-               page = grab_cache_page(mapping,
-                               inode->i_size >> PAGE_CACHE_SHIFT);
-               if (!page)
-                       return;
-       }
-
        if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
-               ext4_ext_truncate(inode, page);
+               ext4_ext_truncate(inode);
                return;
        }
 
        handle = start_transaction(inode);
-       if (IS_ERR(handle)) {
-               if (page) {
-                       clear_highpage(page);
-                       flush_dcache_page(page);
-                       unlock_page(page);
-                       page_cache_release(page);
-               }
+       if (IS_ERR(handle))
                return;         /* AKPM: return what? */
-       }
 
        last_block = (inode->i_size + blocksize-1)
                                        >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
 
-       if (page)
-               ext4_block_truncate_page(handle, page, mapping, inode->i_size);
+       if (inode->i_size & (blocksize - 1))
+               if (ext4_block_truncate_page(handle, mapping, inode->i_size))
+                       goto out_stop;
 
        n = ext4_block_to_path(inode, last_block, offsets, NULL);
        if (n == 0)
@@ -2409,6 +3425,11 @@ void ext4_truncate(struct inode *inode)
        if (ext4_orphan_add(handle, inode))
                goto out_stop;
 
+       /*
+        * From here we block out all ext4_get_block() callers who want to
+        * modify the block allocation tree.
+        */
+       down_write(&ei->i_data_sem);
        /*
         * The orphan list entry will now protect us from any crash which
         * occurs before the truncate completes, so it is now safe to propagate
@@ -2418,12 +3439,6 @@ void ext4_truncate(struct inode *inode)
         */
        ei->i_disksize = inode->i_size;
 
-       /*
-        * From here we block out all ext4_get_block() callers who want to
-        * modify the block allocation tree.
-        */
-       down_write(&ei->i_data_sem);
-
        if (n == 1) {           /* direct blocks */
                ext4_free_data(handle, inode, NULL, i_data+offsets[0],
                               i_data + EXT4_NDIR_BLOCKS);
@@ -3107,7 +4122,14 @@ int ext4_write_inode(struct inode *inode, int wait)
  * be freed, so we have a strong guarantee that no future commit will
  * leave these blocks visible to the user.)
  *
- * Called with inode->sem down.
+ * Another thing we have to assure is that if we are in ordered mode
+ * and inode is still attached to the committing transaction, we must
+ * we start writeout of all the dirty pages which are being truncated.
+ * This way we are sure that all the data written in the previous
+ * transaction are already on disk (truncate waits for pages under
+ * writeback).
+ *
+ * Called with inode->i_mutex down.
  */
 int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 {
@@ -3173,6 +4195,22 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                if (!error)
                        error = rc;
                ext4_journal_stop(handle);
+
+               if (ext4_should_order_data(inode)) {
+                       error = ext4_begin_ordered_truncate(inode,
+                                                           attr->ia_size);
+                       if (error) {
+                               /* Do as much error cleanup as possible */
+                               handle = ext4_journal_start(inode, 3);
+                               if (IS_ERR(handle)) {
+                                       ext4_orphan_del(NULL, inode);
+                                       goto err_out;
+                               }
+                               ext4_orphan_del(handle, inode);
+                               ext4_journal_stop(handle);
+                               goto err_out;
+                       }
+               }
        }
 
        rc = inode_setattr(inode, attr);
@@ -3193,6 +4231,32 @@ err_out:
        return error;
 }
 
+int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
+                struct kstat *stat)
+{
+       struct inode *inode;
+       unsigned long delalloc_blocks;
+
+       inode = dentry->d_inode;
+       generic_fillattr(inode, stat);
+
+       /*
+        * We can't update i_blocks if the block allocation is delayed
+        * otherwise in the case of system crash before the real block
+        * allocation is done, we will have i_blocks inconsistent with
+        * on-disk file blocks.
+        * We always keep i_blocks updated together with real
+        * allocation. But to not confuse with user, stat
+        * will return the blocks that include the delayed allocation
+        * blocks for this file.
+        */
+       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+       delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+
+       stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
+       return 0;
+}
 
 /*
  * How many blocks doth make a writepage()?
@@ -3506,3 +4570,64 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
 
        return err;
 }
+
+static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
+{
+       return !buffer_mapped(bh);
+}
+
+int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+       loff_t size;
+       unsigned long len;
+       int ret = -EINVAL;
+       struct file *file = vma->vm_file;
+       struct inode *inode = file->f_path.dentry->d_inode;
+       struct address_space *mapping = inode->i_mapping;
+
+       /*
+        * Get i_alloc_sem to stop truncates messing with the inode. We cannot
+        * get i_mutex because we are already holding mmap_sem.
+        */
+       down_read(&inode->i_alloc_sem);
+       size = i_size_read(inode);
+       if (page->mapping != mapping || size <= page_offset(page)
+           || !PageUptodate(page)) {
+               /* page got truncated from under us? */
+               goto out_unlock;
+       }
+       ret = 0;
+       if (PageMappedToDisk(page))
+               goto out_unlock;
+
+       if (page->index == size >> PAGE_CACHE_SHIFT)
+               len = size & ~PAGE_CACHE_MASK;
+       else
+               len = PAGE_CACHE_SIZE;
+
+       if (page_has_buffers(page)) {
+               /* return if we have all the buffers mapped */
+               if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
+                                      ext4_bh_unmapped))
+                       goto out_unlock;
+       }
+       /*
+        * OK, we need to fill the hole... Do write_begin write_end
+        * to do block allocation/reservation.We are not holding
+        * inode.i__mutex here. That allow * parallel write_begin,
+        * write_end call. lock_page prevent this from happening
+        * on the same page though
+        */
+       ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
+                       len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+       if (ret < 0)
+               goto out_unlock;
+       ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
+                       len, len, page, NULL);
+       if (ret < 0)
+               goto out_unlock;
+       ret = 0;
+out_unlock:
+       up_read(&inode->i_alloc_sem);
+       return ret;
+}
index c9900aade15072821bb74e87e34d6050f2d84704..8d141a25bbeece7ce4a3894374810daaa52cd4aa 100644 (file)
@@ -381,22 +381,28 @@ static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
 
 static inline int mb_find_next_zero_bit(void *addr, int max, int start)
 {
-       int fix = 0;
+       int fix = 0, ret, tmpmax;
        addr = mb_correct_addr_and_bit(&fix, addr);
-       max += fix;
+       tmpmax = max + fix;
        start += fix;
 
-       return ext4_find_next_zero_bit(addr, max, start) - fix;
+       ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
+       if (ret > max)
+               return max;
+       return ret;
 }
 
 static inline int mb_find_next_bit(void *addr, int max, int start)
 {
-       int fix = 0;
+       int fix = 0, ret, tmpmax;
        addr = mb_correct_addr_and_bit(&fix, addr);
-       max += fix;
+       tmpmax = max + fix;
        start += fix;
 
-       return ext4_find_next_bit(addr, max, start) - fix;
+       ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
+       if (ret > max)
+               return max;
+       return ret;
 }
 
 static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
@@ -803,6 +809,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                if (!buffer_uptodate(bh[i]))
                        goto out;
 
+       err = 0;
        first_block = page->index * blocks_per_page;
        for (i = 0; i < blocks_per_page; i++) {
                int group;
@@ -883,6 +890,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
        int pnum;
        int poff;
        struct page *page;
+       int ret;
 
        mb_debug("load group %lu\n", group);
 
@@ -914,15 +922,21 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
                if (page) {
                        BUG_ON(page->mapping != inode->i_mapping);
                        if (!PageUptodate(page)) {
-                               ext4_mb_init_cache(page, NULL);
+                               ret = ext4_mb_init_cache(page, NULL);
+                               if (ret) {
+                                       unlock_page(page);
+                                       goto err;
+                               }
                                mb_cmp_bitmaps(e4b, page_address(page) +
                                               (poff * sb->s_blocksize));
                        }
                        unlock_page(page);
                }
        }
-       if (page == NULL || !PageUptodate(page))
+       if (page == NULL || !PageUptodate(page)) {
+               ret = -EIO;
                goto err;
+       }
        e4b->bd_bitmap_page = page;
        e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
        mark_page_accessed(page);
@@ -938,14 +952,20 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
                page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
                if (page) {
                        BUG_ON(page->mapping != inode->i_mapping);
-                       if (!PageUptodate(page))
-                               ext4_mb_init_cache(page, e4b->bd_bitmap);
-
+                       if (!PageUptodate(page)) {
+                               ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
+                               if (ret) {
+                                       unlock_page(page);
+                                       goto err;
+                               }
+                       }
                        unlock_page(page);
                }
        }
-       if (page == NULL || !PageUptodate(page))
+       if (page == NULL || !PageUptodate(page)) {
+               ret = -EIO;
                goto err;
+       }
        e4b->bd_buddy_page = page;
        e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
        mark_page_accessed(page);
@@ -962,7 +982,7 @@ err:
                page_cache_release(e4b->bd_buddy_page);
        e4b->bd_buddy = NULL;
        e4b->bd_bitmap = NULL;
-       return -EIO;
+       return ret;
 }
 
 static void ext4_mb_release_desc(struct ext4_buddy *e4b)
@@ -1031,7 +1051,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
        }
 }
 
-static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
+static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                          int first, int count)
 {
        int block = 0;
@@ -1071,11 +1091,12 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                        blocknr += block;
                        blocknr +=
                            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-
+                       ext4_unlock_group(sb, e4b->bd_group);
                        ext4_error(sb, __func__, "double-free of inode"
                                   " %lu's block %llu(bit %u in group %lu)\n",
                                   inode ? inode->i_ino : 0, blocknr, block,
                                   e4b->bd_group);
+                       ext4_lock_group(sb, e4b->bd_group);
                }
                mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
                e4b->bd_info->bb_counters[order]++;
@@ -1113,8 +1134,6 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                } while (1);
        }
        mb_check_buddy(e4b);
-
-       return 0;
 }
 
 static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
@@ -1730,10 +1749,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
                ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
                spin_unlock(&sbi->s_md_lock);
        }
-
-       /* searching for the right group start from the goal value specified */
-       group = ac->ac_g_ex.fe_group;
-
        /* Let's just scan groups to find more-less suitable blocks */
        cr = ac->ac_2order ? 0 : 1;
        /*
@@ -1743,6 +1758,12 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 repeat:
        for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
                ac->ac_criteria = cr;
+               /*
+                * searching for the right group start
+                * from the goal value specified
+                */
+               group = ac->ac_g_ex.fe_group;
+
                for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
                        struct ext4_group_info *grp;
                        struct ext4_group_desc *desc;
@@ -1963,6 +1984,8 @@ static int ext4_mb_seq_history_open(struct inode *inode, struct file *file)
        int rc;
        int size;
 
+       if (unlikely(sbi->s_mb_history == NULL))
+               return -ENOMEM;
        s = kmalloc(sizeof(*s), GFP_KERNEL);
        if (s == NULL)
                return -ENOMEM;
@@ -2165,9 +2188,7 @@ static void ext4_mb_history_init(struct super_block *sb)
        sbi->s_mb_history_cur = 0;
        spin_lock_init(&sbi->s_mb_history_lock);
        i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
-       sbi->s_mb_history = kmalloc(i, GFP_KERNEL);
-       if (likely(sbi->s_mb_history != NULL))
-               memset(sbi->s_mb_history, 0, i);
+       sbi->s_mb_history = kzalloc(i, GFP_KERNEL);
        /* if we can't allocate history, then we simple won't use it */
 }
 
@@ -2215,21 +2236,192 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
 #define ext4_mb_history_init(sb)
 #endif
 
+
+/* Create and initialize ext4_group_info data for the given group. */
+int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
+                         struct ext4_group_desc *desc)
+{
+       int i, len;
+       int metalen = 0;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_group_info **meta_group_info;
+
+       /*
+        * First check if this group is the first of a reserved block.
+        * If it's true, we have to allocate a new table of pointers
+        * to ext4_group_info structures
+        */
+       if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
+               metalen = sizeof(*meta_group_info) <<
+                       EXT4_DESC_PER_BLOCK_BITS(sb);
+               meta_group_info = kmalloc(metalen, GFP_KERNEL);
+               if (meta_group_info == NULL) {
+                       printk(KERN_ERR "EXT4-fs: can't allocate mem for a "
+                              "buddy group\n");
+                       goto exit_meta_group_info;
+               }
+               sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
+                       meta_group_info;
+       }
+
+       /*
+        * calculate needed size. if change bb_counters size,
+        * don't forget about ext4_mb_generate_buddy()
+        */
+       len = offsetof(typeof(**meta_group_info),
+                      bb_counters[sb->s_blocksize_bits + 2]);
+
+       meta_group_info =
+               sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
+       i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
+
+       meta_group_info[i] = kzalloc(len, GFP_KERNEL);
+       if (meta_group_info[i] == NULL) {
+               printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
+               goto exit_group_info;
+       }
+       set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
+               &(meta_group_info[i]->bb_state));
+
+       /*
+        * initialize bb_free to be able to skip
+        * empty groups without initialization
+        */
+       if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+               meta_group_info[i]->bb_free =
+                       ext4_free_blocks_after_init(sb, group, desc);
+       } else {
+               meta_group_info[i]->bb_free =
+                       le16_to_cpu(desc->bg_free_blocks_count);
+       }
+
+       INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
+
+#ifdef DOUBLE_CHECK
+       {
+               struct buffer_head *bh;
+               meta_group_info[i]->bb_bitmap =
+                       kmalloc(sb->s_blocksize, GFP_KERNEL);
+               BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
+               bh = ext4_read_block_bitmap(sb, group);
+               BUG_ON(bh == NULL);
+               memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
+                       sb->s_blocksize);
+               put_bh(bh);
+       }
+#endif
+
+       return 0;
+
+exit_group_info:
+       /* If a meta_group_info table has been allocated, release it now */
+       if (group % EXT4_DESC_PER_BLOCK(sb) == 0)
+               kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
+exit_meta_group_info:
+       return -ENOMEM;
+} /* ext4_mb_add_groupinfo */
+
+/*
+ * Add a group to the existing groups.
+ * This function is used for online resize
+ */
+int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
+                              struct ext4_group_desc *desc)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct inode *inode = sbi->s_buddy_cache;
+       int blocks_per_page;
+       int block;
+       int pnum;
+       struct page *page;
+       int err;
+
+       /* Add group based on group descriptor*/
+       err = ext4_mb_add_groupinfo(sb, group, desc);
+       if (err)
+               return err;
+
+       /*
+        * Cache pages containing dynamic mb_alloc datas (buddy and bitmap
+        * datas) are set not up to date so that they will be re-initilaized
+        * during the next call to ext4_mb_load_buddy
+        */
+
+       /* Set buddy page as not up to date */
+       blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+       block = group * 2;
+       pnum = block / blocks_per_page;
+       page = find_get_page(inode->i_mapping, pnum);
+       if (page != NULL) {
+               ClearPageUptodate(page);
+               page_cache_release(page);
+       }
+
+       /* Set bitmap page as not up to date */
+       block++;
+       pnum = block / blocks_per_page;
+       page = find_get_page(inode->i_mapping, pnum);
+       if (page != NULL) {
+               ClearPageUptodate(page);
+               page_cache_release(page);
+       }
+
+       return 0;
+}
+
+/*
+ * Update an existing group.
+ * This function is used for online resize
+ */
+void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
+{
+       grp->bb_free += add;
+}
+
 static int ext4_mb_init_backend(struct super_block *sb)
 {
        ext4_group_t i;
-       int j, len, metalen;
+       int metalen;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-       int num_meta_group_infos =
-               (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) >>
-                       EXT4_DESC_PER_BLOCK_BITS(sb);
+       struct ext4_super_block *es = sbi->s_es;
+       int num_meta_group_infos;
+       int num_meta_group_infos_max;
+       int array_size;
        struct ext4_group_info **meta_group_info;
+       struct ext4_group_desc *desc;
+
+       /* This is the number of blocks used by GDT */
+       num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) -
+                               1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
+
+       /*
+        * This is the total number of blocks used by GDT including
+        * the number of reserved blocks for GDT.
+        * The s_group_info array is allocated with this value
+        * to allow a clean online resize without a complex
+        * manipulation of pointer.
+        * The drawback is the unused memory when no resize
+        * occurs but it's very low in terms of pages
+        * (see comments below)
+        * Need to handle this properly when META_BG resizing is allowed
+        */
+       num_meta_group_infos_max = num_meta_group_infos +
+                               le16_to_cpu(es->s_reserved_gdt_blocks);
 
+       /*
+        * array_size is the size of s_group_info array. We round it
+        * to the next power of two because this approximation is done
+        * internally by kmalloc so we can have some more memory
+        * for free here (e.g. may be used for META_BG resize).
+        */
+       array_size = 1;
+       while (array_size < sizeof(*sbi->s_group_info) *
+              num_meta_group_infos_max)
+               array_size = array_size << 1;
        /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
         * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
         * So a two level scheme suffices for now. */
-       sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) *
-                                   num_meta_group_infos, GFP_KERNEL);
+       sbi->s_group_info = kmalloc(array_size, GFP_KERNEL);
        if (sbi->s_group_info == NULL) {
                printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
                return -ENOMEM;
@@ -2256,63 +2448,15 @@ static int ext4_mb_init_backend(struct super_block *sb)
                sbi->s_group_info[i] = meta_group_info;
        }
 
-       /*
-        * calculate needed size. if change bb_counters size,
-        * don't forget about ext4_mb_generate_buddy()
-        */
-       len = sizeof(struct ext4_group_info);
-       len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2);
        for (i = 0; i < sbi->s_groups_count; i++) {
-               struct ext4_group_desc *desc;
-
-               meta_group_info =
-                       sbi->s_group_info[i >> EXT4_DESC_PER_BLOCK_BITS(sb)];
-               j = i & (EXT4_DESC_PER_BLOCK(sb) - 1);
-
-               meta_group_info[j] = kzalloc(len, GFP_KERNEL);
-               if (meta_group_info[j] == NULL) {
-                       printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
-                       goto err_freebuddy;
-               }
                desc = ext4_get_group_desc(sb, i, NULL);
                if (desc == NULL) {
                        printk(KERN_ERR
                                "EXT4-fs: can't read descriptor %lu\n", i);
-                       i++;
                        goto err_freebuddy;
                }
-               memset(meta_group_info[j], 0, len);
-               set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
-                       &(meta_group_info[j]->bb_state));
-
-               /*
-                * initialize bb_free to be able to skip
-                * empty groups without initialization
-                */
-               if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
-                       meta_group_info[j]->bb_free =
-                               ext4_free_blocks_after_init(sb, i, desc);
-               } else {
-                       meta_group_info[j]->bb_free =
-                               le16_to_cpu(desc->bg_free_blocks_count);
-               }
-
-               INIT_LIST_HEAD(&meta_group_info[j]->bb_prealloc_list);
-
-#ifdef DOUBLE_CHECK
-               {
-                       struct buffer_head *bh;
-                       meta_group_info[j]->bb_bitmap =
-                               kmalloc(sb->s_blocksize, GFP_KERNEL);
-                       BUG_ON(meta_group_info[j]->bb_bitmap == NULL);
-                       bh = read_block_bitmap(sb, i);
-                       BUG_ON(bh == NULL);
-                       memcpy(meta_group_info[j]->bb_bitmap, bh->b_data,
-                                       sb->s_blocksize);
-                       put_bh(bh);
-               }
-#endif
-
+               if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
+                       goto err_freebuddy;
        }
 
        return 0;
@@ -2336,6 +2480,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
        unsigned i;
        unsigned offset;
        unsigned max;
+       int ret;
 
        if (!test_opt(sb, MBALLOC))
                return 0;
@@ -2370,12 +2515,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
        } while (i <= sb->s_blocksize_bits + 1);
 
        /* init file for buddy data */
-       i = ext4_mb_init_backend(sb);
-       if (i) {
+       ret = ext4_mb_init_backend(sb);
+       if (ret != 0) {
                clear_opt(sbi->s_mount_opt, MBALLOC);
                kfree(sbi->s_mb_offsets);
                kfree(sbi->s_mb_maxs);
-               return i;
+               return ret;
        }
 
        spin_lock_init(&sbi->s_md_lock);
@@ -2548,8 +2693,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
                ext4_lock_group(sb, md->group);
                for (i = 0; i < md->num; i++) {
                        mb_debug(" %u", md->blocks[i]);
-                       err = mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
-                       BUG_ON(err != 0);
+                       mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
                }
                mb_debug("\n");
                ext4_unlock_group(sb, md->group);
@@ -2575,25 +2719,24 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
 
 
 
-#define MB_PROC_VALUE_READ(name)                               \
-static int ext4_mb_read_##name(char *page, char **start,       \
-               off_t off, int count, int *eof, void *data)     \
+#define MB_PROC_FOPS(name)                                     \
+static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v)     \
 {                                                              \
-       struct ext4_sb_info *sbi = data;                        \
-       int len;                                                \
-       *eof = 1;                                               \
-       if (off != 0)                                           \
-               return 0;                                       \
-       len = sprintf(page, "%ld\n", sbi->s_mb_##name);         \
-       *start = page;                                          \
-       return len;                                             \
-}
-
-#define MB_PROC_VALUE_WRITE(name)                              \
-static int ext4_mb_write_##name(struct file *file,             \
-               const char __user *buf, unsigned long cnt, void *data)  \
+       struct ext4_sb_info *sbi = m->private;                  \
+                                                               \
+       seq_printf(m, "%ld\n", sbi->s_mb_##name);               \
+       return 0;                                               \
+}                                                              \
+                                                               \
+static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\
+{                                                              \
+       return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\
+}                                                              \
+                                                               \
+static ssize_t ext4_mb_##name##_proc_write(struct file *file,  \
+               const char __user *buf, size_t cnt, loff_t *ppos)       \
 {                                                              \
-       struct ext4_sb_info *sbi = data;                        \
+       struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\
        char str[32];                                           \
        long value;                                             \
        if (cnt >= sizeof(str))                                 \
@@ -2605,31 +2748,32 @@ static int ext4_mb_write_##name(struct file *file,              \
                return -ERANGE;                                 \
        sbi->s_mb_##name = value;                               \
        return cnt;                                             \
-}
+}                                                              \
+                                                               \
+static const struct file_operations ext4_mb_##name##_proc_fops = {     \
+       .owner          = THIS_MODULE,                          \
+       .open           = ext4_mb_##name##_proc_open,           \
+       .read           = seq_read,                             \
+       .llseek         = seq_lseek,                            \
+       .release        = single_release,                       \
+       .write          = ext4_mb_##name##_proc_write,          \
+};
 
-MB_PROC_VALUE_READ(stats);
-MB_PROC_VALUE_WRITE(stats);
-MB_PROC_VALUE_READ(max_to_scan);
-MB_PROC_VALUE_WRITE(max_to_scan);
-MB_PROC_VALUE_READ(min_to_scan);
-MB_PROC_VALUE_WRITE(min_to_scan);
-MB_PROC_VALUE_READ(order2_reqs);
-MB_PROC_VALUE_WRITE(order2_reqs);
-MB_PROC_VALUE_READ(stream_request);
-MB_PROC_VALUE_WRITE(stream_request);
-MB_PROC_VALUE_READ(group_prealloc);
-MB_PROC_VALUE_WRITE(group_prealloc);
+MB_PROC_FOPS(stats);
+MB_PROC_FOPS(max_to_scan);
+MB_PROC_FOPS(min_to_scan);
+MB_PROC_FOPS(order2_reqs);
+MB_PROC_FOPS(stream_request);
+MB_PROC_FOPS(group_prealloc);
 
 #define        MB_PROC_HANDLER(name, var)                                      \
 do {                                                                   \
-       proc = create_proc_entry(name, mode, sbi->s_mb_proc);           \
+       proc = proc_create_data(name, mode, sbi->s_mb_proc,             \
+                               &ext4_mb_##var##_proc_fops, sbi);       \
        if (proc == NULL) {                                             \
                printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \
                goto err_out;                                           \
        }                                                               \
-       proc->data = sbi;                                               \
-       proc->read_proc  = ext4_mb_read_##var ;                         \
-       proc->write_proc = ext4_mb_write_##var;                         \
 } while (0)
 
 static int ext4_mb_init_per_dev_proc(struct super_block *sb)
@@ -2639,6 +2783,10 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb)
        struct proc_dir_entry *proc;
        char devname[64];
 
+       if (proc_root_ext4 == NULL) {
+               sbi->s_mb_proc = NULL;
+               return -EINVAL;
+       }
        bdevname(sb->s_bdev, devname);
        sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
 
@@ -2747,7 +2895,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 
 
        err = -EIO;
-       bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group);
+       bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
        if (!bitmap_bh)
                goto out_err;
 
@@ -2816,7 +2964,23 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
        spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
-       percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
+
+       /*
+        * free blocks account has already be reduced/reserved
+        * at write_begin() time for delayed allocation
+        * do not double accounting
+        */
+       if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
+               percpu_counter_sub(&sbi->s_freeblocks_counter,
+                                       ac->ac_b_ex.fe_len);
+
+       if (sbi->s_log_groups_per_flex) {
+               ext4_group_t flex_group = ext4_flex_group(sbi,
+                                                         ac->ac_b_ex.fe_group);
+               spin_lock(sb_bgl_lock(sbi, flex_group));
+               sbi->s_flex_groups[flex_group].free_blocks -= ac->ac_b_ex.fe_len;
+               spin_unlock(sb_bgl_lock(sbi, flex_group));
+       }
 
        err = ext4_journal_dirty_metadata(handle, bitmap_bh);
        if (err)
@@ -3473,8 +3637,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
                if (bit >= end)
                        break;
                next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
-               if (next > end)
-                       next = end;
                start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
                                le32_to_cpu(sbi->s_es->s_first_data_block);
                mb_debug("    free preallocated %u/%u in group %u\n",
@@ -3569,7 +3731,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
        if (list_empty(&grp->bb_prealloc_list))
                return 0;
 
-       bitmap_bh = read_block_bitmap(sb, group);
+       bitmap_bh = ext4_read_block_bitmap(sb, group);
        if (bitmap_bh == NULL) {
                /* error handling here */
                ext4_mb_release_desc(&e4b);
@@ -3743,7 +3905,7 @@ repeat:
                err = ext4_mb_load_buddy(sb, group, &e4b);
                BUG_ON(err != 0); /* error handling here */
 
-               bitmap_bh = read_block_bitmap(sb, group);
+               bitmap_bh = ext4_read_block_bitmap(sb, group);
                if (bitmap_bh == NULL) {
                        /* error handling here */
                        ext4_mb_release_desc(&e4b);
@@ -4011,10 +4173,21 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
        sbi = EXT4_SB(sb);
 
        if (!test_opt(sb, MBALLOC)) {
-               block = ext4_new_blocks_old(handle, ar->inode, ar->goal,
+               block = ext4_old_new_blocks(handle, ar->inode, ar->goal,
                                            &(ar->len), errp);
                return block;
        }
+       if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
+               /*
+                * With delalloc we already reserved the blocks
+                */
+               ar->len = ext4_has_free_blocks(sbi, ar->len);
+       }
+
+       if (ar->len == 0) {
+               *errp = -ENOSPC;
+               return 0;
+       }
 
        while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
                ar->flags |= EXT4_MB_HINT_NOPREALLOC;
@@ -4026,10 +4199,14 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
        }
        inquota = ar->len;
 
+       if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
+               ar->flags |= EXT4_MB_DELALLOC_RESERVED;
+
        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
        if (!ac) {
+               ar->len = 0;
                *errp = -ENOMEM;
-               return 0;
+               goto out1;
        }
 
        ext4_mb_poll_new_transaction(sb, handle);
@@ -4037,12 +4214,11 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
        *errp = ext4_mb_initialize_context(ac, ar);
        if (*errp) {
                ar->len = 0;
-               goto out;
+               goto out2;
        }
 
        ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
        if (!ext4_mb_use_preallocated(ac)) {
-
                ac->ac_op = EXT4_MB_HISTORY_ALLOC;
                ext4_mb_normalize_request(ac, ar);
 repeat:
@@ -4085,11 +4261,12 @@ repeat:
 
        ext4_mb_release_context(ac);
 
-out:
+out2:
+       kmem_cache_free(ext4_ac_cachep, ac);
+out1:
        if (ar->len < inquota)
                DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
 
-       kmem_cache_free(ext4_ac_cachep, ac);
        return block;
 }
 static void ext4_mb_poll_new_transaction(struct super_block *sb,
@@ -4242,7 +4419,7 @@ do_more:
                overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
                count -= overflow;
        }
-       bitmap_bh = read_block_bitmap(sb, block_group);
+       bitmap_bh = ext4_read_block_bitmap(sb, block_group);
        if (!bitmap_bh)
                goto error_return;
        gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
@@ -4309,10 +4486,9 @@ do_more:
                ext4_mb_free_metadata(handle, &e4b, block_group, bit, count);
        } else {
                ext4_lock_group(sb, block_group);
-               err = mb_free_blocks(inode, &e4b, bit, count);
+               mb_free_blocks(inode, &e4b, bit, count);
                ext4_mb_return_to_preallocation(inode, &e4b, block, count);
                ext4_unlock_group(sb, block_group);
-               BUG_ON(err != 0);
        }
 
        spin_lock(sb_bgl_lock(sbi, block_group));
@@ -4321,6 +4497,13 @@ do_more:
        spin_unlock(sb_bgl_lock(sbi, block_group));
        percpu_counter_add(&sbi->s_freeblocks_counter, count);
 
+       if (sbi->s_log_groups_per_flex) {
+               ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
+               spin_lock(sb_bgl_lock(sbi, flex_group));
+               sbi->s_flex_groups[flex_group].free_blocks += count;
+               spin_unlock(sb_bgl_lock(sbi, flex_group));
+       }
+
        ext4_mb_release_desc(&e4b);
 
        *freed += count;
index ab16beaa830d3814250146d044f8d2c4a59c7dd4..387ad98350c378425beee97065cc51389903564d 100644 (file)
@@ -182,6 +182,16 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
 static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                             struct inode *inode);
 
+/*
+ * p is at least 6 bytes before the end of page
+ */
+static inline struct ext4_dir_entry_2 *
+ext4_next_entry(struct ext4_dir_entry_2 *p)
+{
+       return (struct ext4_dir_entry_2 *)((char *)p +
+               ext4_rec_len_from_disk(p->rec_len));
+}
+
 /*
  * Future: use high four bits of block for coalesce-on-delete flags
  * Mask them off for now.
@@ -231,13 +241,13 @@ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
 {
        unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
                EXT4_DIR_REC_LEN(2) - infosize;
-       return 0? 20: entry_space / sizeof(struct dx_entry);
+       return entry_space / sizeof(struct dx_entry);
 }
 
 static inline unsigned dx_node_limit (struct inode *dir)
 {
        unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
-       return 0? 22: entry_space / sizeof(struct dx_entry);
+       return entry_space / sizeof(struct dx_entry);
 }
 
 /*
@@ -553,15 +563,6 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 }
 
 
-/*
- * p is at least 6 bytes before the end of page
- */
-static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p)
-{
-       return (struct ext4_dir_entry_2 *)((char *)p +
-               ext4_rec_len_from_disk(p->rec_len));
-}
-
 /*
  * This function fills a red-black tree with information from a
  * directory block.  It returns the number directory entries loaded
@@ -993,19 +994,21 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
                de = (struct ext4_dir_entry_2 *) bh->b_data;
                top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
                                       EXT4_DIR_REC_LEN(0));
-               for (; de < top; de = ext4_next_entry(de))
-               if (ext4_match (namelen, name, de)) {
-                       if (!ext4_check_dir_entry("ext4_find_entry",
-                                                 dir, de, bh,
-                                 (block<<EXT4_BLOCK_SIZE_BITS(sb))
-                                         +((char *)de - bh->b_data))) {
-                               brelse (bh);
+               for (; de < top; de = ext4_next_entry(de)) {
+                       int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
+                                 + ((char *) de - bh->b_data);
+
+                       if (!ext4_check_dir_entry(__func__, dir, de, bh, off)) {
+                               brelse(bh);
                                *err = ERR_BAD_DX_DIR;
                                goto errout;
                        }
-                       *res_dir = de;
-                       dx_release (frames);
-                       return bh;
+
+                       if (ext4_match(namelen, name, de)) {
+                               *res_dir = de;
+                               dx_release(frames);
+                               return bh;
+                       }
                }
                brelse (bh);
                /* Check to see if we should continue to search */
index 9ff7b1c04239d6581a12f0dd18a73538a0695abc..f000fbe2cd93c0fb10938e1eba8c89e63dfc434a 100644 (file)
@@ -865,6 +865,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
        gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
 
+       /*
+        * We can allocate memory for mb_alloc based on the new group
+        * descriptor
+        */
+       if (test_opt(sb, MBALLOC)) {
+               err = ext4_mb_add_more_groupinfo(sb, input->group, gdp);
+               if (err)
+                       goto exit_journal;
+       }
        /*
         * Make the new blocks and inodes valid next.  We do this before
         * increasing the group count so that once the group is enabled,
@@ -957,6 +966,8 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        handle_t *handle;
        int err;
        unsigned long freed_blocks;
+       ext4_group_t group;
+       struct ext4_group_info *grp;
 
        /* We don't need to worry about locking wrt other resizers just
         * yet: we're going to revalidate es->s_blocks_count after
@@ -988,7 +999,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        }
 
        /* Handle the remaining blocks in the last group only. */
-       ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last);
+       ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
 
        if (last == 0) {
                ext4_warning(sb, __func__,
@@ -1060,6 +1071,45 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                   o_blocks_count + add);
        if ((err = ext4_journal_stop(handle)))
                goto exit_put;
+
+       /*
+        * Mark mballoc pages as not up to date so that they will be updated
+        * next time they are loaded by ext4_mb_load_buddy.
+        */
+       if (test_opt(sb, MBALLOC)) {
+               struct ext4_sb_info *sbi = EXT4_SB(sb);
+               struct inode *inode = sbi->s_buddy_cache;
+               int blocks_per_page;
+               int block;
+               int pnum;
+               struct page *page;
+
+               /* Set buddy page as not up to date */
+               blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+               block = group * 2;
+               pnum = block / blocks_per_page;
+               page = find_get_page(inode->i_mapping, pnum);
+               if (page != NULL) {
+                       ClearPageUptodate(page);
+                       page_cache_release(page);
+               }
+
+               /* Set bitmap page as not up to date */
+               block++;
+               pnum = block / blocks_per_page;
+               page = find_get_page(inode->i_mapping, pnum);
+               if (page != NULL) {
+                       ClearPageUptodate(page);
+                       page_cache_release(page);
+               }
+
+               /* Get the info on the last group */
+               grp = ext4_get_group_info(sb, group);
+
+               /* Update free blocks in group info */
+               ext4_mb_update_group_info(grp, add);
+       }
+
        if (test_opt(sb, DEBUG))
                printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
                       ext4_blocks_count(es));
index 02bf2434397981e223d62fbcb4cd73a03667049e..1cb371dcd609aae5f115cf236acd4d0c66956efb 100644 (file)
@@ -506,6 +506,7 @@ static void ext4_put_super (struct super_block * sb)
        ext4_ext_release(sb);
        ext4_xattr_put_super(sb);
        jbd2_journal_destroy(sbi->s_journal);
+       sbi->s_journal = NULL;
        if (!(sb->s_flags & MS_RDONLY)) {
                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
                es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -517,6 +518,7 @@ static void ext4_put_super (struct super_block * sb)
        for (i = 0; i < sbi->s_gdb_count; i++)
                brelse(sbi->s_group_desc[i]);
        kfree(sbi->s_group_desc);
+       kfree(sbi->s_flex_groups);
        percpu_counter_destroy(&sbi->s_freeblocks_counter);
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
        percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -571,6 +573,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
        memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
        INIT_LIST_HEAD(&ei->i_prealloc_list);
        spin_lock_init(&ei->i_prealloc_lock);
+       jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
+       ei->i_reserved_data_blocks = 0;
+       ei->i_reserved_meta_blocks = 0;
+       ei->i_allocated_meta_blocks = 0;
+       ei->i_delalloc_reserved_flag = 0;
+       spin_lock_init(&(ei->i_block_reservation_lock));
        return &ei->vfs_inode;
 }
 
@@ -635,6 +643,8 @@ static void ext4_clear_inode(struct inode *inode)
        EXT4_I(inode)->i_block_alloc_info = NULL;
        if (unlikely(rsv))
                kfree(rsv);
+       jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
+                                      &EXT4_I(inode)->jinode);
 }
 
 static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb)
@@ -671,7 +681,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
        unsigned long def_mount_opts;
        struct super_block *sb = vfs->mnt_sb;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-       journal_t *journal = sbi->s_journal;
        struct ext4_super_block *es = sbi->s_es;
 
        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
@@ -747,6 +756,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
                seq_puts(seq, ",nomballoc");
        if (test_opt(sb, I_VERSION))
                seq_puts(seq, ",i_version");
+       if (!test_opt(sb, DELALLOC))
+               seq_puts(seq, ",nodelalloc");
+
 
        if (sbi->s_stripe)
                seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
@@ -894,7 +906,7 @@ enum {
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
        Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
-       Opt_mballoc, Opt_nomballoc, Opt_stripe,
+       Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
 };
 
 static match_table_t tokens = {
@@ -953,6 +965,8 @@ static match_table_t tokens = {
        {Opt_nomballoc, "nomballoc"},
        {Opt_stripe, "stripe=%u"},
        {Opt_resize, "resize"},
+       {Opt_delalloc, "delalloc"},
+       {Opt_nodelalloc, "nodelalloc"},
        {Opt_err, NULL},
 };
 
@@ -990,6 +1004,7 @@ static int parse_options (char *options, struct super_block *sb,
        int qtype, qfmt;
        char *qname;
 #endif
+       ext4_fsblk_t last_block;
 
        if (!options)
                return 1;
@@ -1309,15 +1324,39 @@ set_qf_format:
                        clear_opt(sbi->s_mount_opt, NOBH);
                        break;
                case Opt_extents:
+                       if (!EXT4_HAS_INCOMPAT_FEATURE(sb,
+                                       EXT4_FEATURE_INCOMPAT_EXTENTS)) {
+                               ext4_warning(sb, __func__,
+                                       "extents feature not enabled "
+                                       "on this filesystem, use tune2fs\n");
+                               return 0;
+                       }
                        set_opt (sbi->s_mount_opt, EXTENTS);
                        break;
                case Opt_noextents:
+                       /*
+                        * When e2fsprogs support resizing an already existing
+                        * ext3 file system to greater than 2**32 we need to
+                        * add support to block allocator to handle growing
+                        * already existing block  mapped inode so that blocks
+                        * allocated for them fall within 2**32
+                        */
+                       last_block = ext4_blocks_count(sbi->s_es) - 1;
+                       if (last_block  > 0xffffffffULL) {
+                               printk(KERN_ERR "EXT4-fs: Filesystem too "
+                                               "large to mount with "
+                                               "-o noextents options\n");
+                               return 0;
+                       }
                        clear_opt (sbi->s_mount_opt, EXTENTS);
                        break;
                case Opt_i_version:
                        set_opt(sbi->s_mount_opt, I_VERSION);
                        sb->s_flags |= MS_I_VERSION;
                        break;
+               case Opt_nodelalloc:
+                       clear_opt(sbi->s_mount_opt, DELALLOC);
+                       break;
                case Opt_mballoc:
                        set_opt(sbi->s_mount_opt, MBALLOC);
                        break;
@@ -1331,6 +1370,9 @@ set_qf_format:
                                return 0;
                        sbi->s_stripe = option;
                        break;
+               case Opt_delalloc:
+                       set_opt(sbi->s_mount_opt, DELALLOC);
+                       break;
                default:
                        printk (KERN_ERR
                                "EXT4-fs: Unrecognized mount option \"%s\" "
@@ -1443,6 +1485,54 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
        return res;
 }
 
+static int ext4_fill_flex_info(struct super_block *sb)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_group_desc *gdp = NULL;
+       struct buffer_head *bh;
+       ext4_group_t flex_group_count;
+       ext4_group_t flex_group;
+       int groups_per_flex = 0;
+       __u64 block_bitmap = 0;
+       int i;
+
+       if (!sbi->s_es->s_log_groups_per_flex) {
+               sbi->s_log_groups_per_flex = 0;
+               return 1;
+       }
+
+       sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
+       groups_per_flex = 1 << sbi->s_log_groups_per_flex;
+
+       flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
+               groups_per_flex;
+       sbi->s_flex_groups = kmalloc(flex_group_count *
+                                    sizeof(struct flex_groups), GFP_KERNEL);
+       if (sbi->s_flex_groups == NULL) {
+               printk(KERN_ERR "EXT4-fs: not enough memory\n");
+               goto failed;
+       }
+       memset(sbi->s_flex_groups, 0, flex_group_count *
+              sizeof(struct flex_groups));
+
+       gdp = ext4_get_group_desc(sb, 1, &bh);
+       block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
+
+       for (i = 0; i < sbi->s_groups_count; i++) {
+               gdp = ext4_get_group_desc(sb, i, &bh);
+
+               flex_group = ext4_flex_group(sbi, i);
+               sbi->s_flex_groups[flex_group].free_inodes +=
+                       le16_to_cpu(gdp->bg_free_inodes_count);
+               sbi->s_flex_groups[flex_group].free_blocks +=
+                       le16_to_cpu(gdp->bg_free_blocks_count);
+       }
+
+       return 1;
+failed:
+       return 0;
+}
+
 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
                            struct ext4_group_desc *gdp)
 {
@@ -1810,8 +1900,8 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
 }
 
 static int ext4_fill_super (struct super_block *sb, void *data, int silent)
-                               __releases(kernel_sem)
-                               __acquires(kernel_sem)
+                               __releases(kernel_lock)
+                               __acquires(kernel_lock)
 
 {
        struct buffer_head * bh;
@@ -1851,11 +1941,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
                goto out_fail;
        }
 
-       if (!sb_set_blocksize(sb, blocksize)) {
-               printk(KERN_ERR "EXT4-fs: bad blocksize %d.\n", blocksize);
-               goto out_fail;
-       }
-
        /*
         * The ext4 superblock will not be buffer aligned for other than 1kB
         * block sizes.  We need to calculate the offset from buffer start.
@@ -1919,15 +2004,28 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 
        /*
         * turn on extents feature by default in ext4 filesystem
-        * User -o noextents to turn it off
+        * only if feature flag already set by mkfs or tune2fs.
+        * Use -o noextents to turn it off
         */
-       set_opt(sbi->s_mount_opt, EXTENTS);
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
+               set_opt(sbi->s_mount_opt, EXTENTS);
+       else
+               ext4_warning(sb, __func__,
+                       "extents feature not enabled on this filesystem, "
+                       "use tune2fs.\n");
        /*
-        * turn on mballoc feature by default in ext4 filesystem
-        * User -o nomballoc to turn it off
+        * turn on mballoc code by default in ext4 filesystem
+        * Use -o nomballoc to turn it off
         */
        set_opt(sbi->s_mount_opt, MBALLOC);
 
+       /*
+        * enable delayed allocation by default
+        * Use -o nodelalloc to turn it off
+        */
+       set_opt(sbi->s_mount_opt, DELALLOC);
+
+
        if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
                            NULL, 0))
                goto failed_mount;
@@ -2138,6 +2236,14 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
                printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
                goto failed_mount2;
        }
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+               if (!ext4_fill_flex_info(sb)) {
+                       printk(KERN_ERR
+                              "EXT4-fs: unable to initialize "
+                              "flex_bg meta info!\n");
+                       goto failed_mount2;
+               }
+
        sbi->s_gdb_count = db_count;
        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
        spin_lock_init(&sbi->s_next_gen_lock);
@@ -2358,6 +2464,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
                test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
                "writeback");
 
+       if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+               printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
+                               "requested data journaling mode\n");
+               clear_opt(sbi->s_mount_opt, DELALLOC);
+       } else if (test_opt(sb, DELALLOC))
+               printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
+
        ext4_ext_init(sb);
        ext4_mb_init(sb, needs_recovery);
 
@@ -2372,6 +2485,7 @@ cantfind_ext4:
 
 failed_mount4:
        jbd2_journal_destroy(sbi->s_journal);
+       sbi->s_journal = NULL;
 failed_mount3:
        percpu_counter_destroy(&sbi->s_freeblocks_counter);
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
@@ -3325,7 +3439,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
                        err = ext4_journal_dirty_metadata(handle, bh);
                else {
                        /* Always do at least ordered writes for quotas */
-                       err = ext4_journal_dirty_data(handle, bh);
+                       err = ext4_jbd2_file_inode(handle, inode);
                        mark_buffer_dirty(bh);
                }
                brelse(bh);
index ff08633f398edb410cba33f9912f743ebfa29ada..93c5fdcdad2e3beae27e16f0c35830ce11d20db3 100644 (file)
@@ -810,7 +810,7 @@ inserted:
                        /* We need to allocate a new block */
                        ext4_fsblk_t goal = ext4_group_first_block_no(sb,
                                                EXT4_I(inode)->i_block_group);
-                       ext4_fsblk_t block = ext4_new_block(handle, inode,
+                       ext4_fsblk_t block = ext4_new_meta_block(handle, inode,
                                                        goal, &error);
                        if (error)
                                goto cleanup;
index fff33382cadcfc2b93d3e81dab1ad84e57364d6a..ac1a52cf2a37dedde562cf4d35984ea49746683e 100644 (file)
 #include "ext4.h"
 #include "xattr.h"
 
-#define XATTR_TRUSTED_PREFIX "trusted."
-
 static size_t
 ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
                        const char *name, size_t name_len)
 {
-       const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+       const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
        const size_t total_len = prefix_len + name_len + 1;
 
        if (!capable(CAP_SYS_ADMIN))
index 67be723fcc4eeb6cfa3cb996d5e029176b900cc7..d91aa61b42aae34545a1fab88b9f54e5f405a0a6 100644 (file)
 #include "ext4.h"
 #include "xattr.h"
 
-#define XATTR_USER_PREFIX "user."
-
 static size_t
 ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
                     const char *name, size_t name_len)
 {
-       const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+       const size_t prefix_len = XATTR_USER_PREFIX_LEN;
        const size_t total_len = prefix_len + name_len + 1;
 
        if (!test_opt(inode->i_sb, XATTR_USER))
index 7f7947e3dfbb4594f56581711c053772cc6cb18b..ab2f57e3fb87b025668c20a5cfe66b378d7fdf5b 100644 (file)
@@ -14,23 +14,11 @@ config GFS2_FS
          GFS is perfect consistency -- changes made to the filesystem on one
          machine show up immediately on all other machines in the cluster.
 
-         To use the GFS2 filesystem, you will need to enable one or more of
-         the below locking modules. Documentation and utilities for GFS2 can
+         To use the GFS2 filesystem in a cluster, you will need to enable
+         the locking module below. Documentation and utilities for GFS2 can
          be found here: http://sources.redhat.com/cluster
 
-config GFS2_FS_LOCKING_NOLOCK
-       tristate "GFS2 \"nolock\" locking module"
-       depends on GFS2_FS
-       help
-         Single node locking module for GFS2.
-
-         Use this module if you want to use GFS2 on a single node without
-         its clustering features. You can still take advantage of the
-         large file support, and upgrade to running a full cluster later on
-         if required.
-
-         If you will only be using GFS2 in cluster mode, you do not need this
-         module.
+         The "nolock" lock module is now built in to GFS2 by default.
 
 config GFS2_FS_LOCKING_DLM
        tristate "GFS2 DLM locking module"
index e2350df02a0746ea8eb9ef784737678a4bf29b65..ec65851ec80a21056d8c1f8980ab51e559b558f6 100644 (file)
@@ -5,6 +5,5 @@ gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
        ops_fstype.o ops_inode.o ops_super.o quota.o \
        recovery.o rgrp.o super.o sys.o trans.o util.o
 
-obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/
 obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/
 
index 3bb11c0f8b56a15408103944f830bce648628414..ef606e3a5cf484ce6722e1855e7b7e5c3762b956 100644 (file)
@@ -15,11 +15,6 @@ enum {
        CREATE = 1,
 };
 
-enum {
-       NO_WAIT = 0,
-       WAIT = 1,
-};
-
 enum {
        NO_FORCE = 0,
        FORCE = 1,
index d636b3e80f5d26dda0a0f799011da225c5ef24ba..13391e546616ce47a9b413dde1ac73ad396d808d 100644 (file)
@@ -45,21 +45,19 @@ struct gfs2_gl_hash_bucket {
         struct hlist_head hb_list;
 };
 
-struct glock_iter {
-       int hash;                     /* hash bucket index         */
-       struct gfs2_sbd *sdp;         /* incore superblock         */
-       struct gfs2_glock *gl;        /* current glock struct      */
-       struct seq_file *seq;         /* sequence file for debugfs */
-       char string[512];             /* scratch space             */
+struct gfs2_glock_iter {
+       int hash;                       /* hash bucket index         */
+       struct gfs2_sbd *sdp;           /* incore superblock         */
+       struct gfs2_glock *gl;          /* current glock struct      */
+       char string[512];               /* scratch space             */
 };
 
 typedef void (*glock_examiner) (struct gfs2_glock * gl);
 
 static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
-static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl);
-static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh);
-static void gfs2_glock_drop_th(struct gfs2_glock *gl);
-static void run_queue(struct gfs2_glock *gl);
+static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
+#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0)
+static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
 
 static DECLARE_RWSEM(gfs2_umount_flush_sem);
 static struct dentry *gfs2_root;
@@ -122,33 +120,6 @@ static inline rwlock_t *gl_lock_addr(unsigned int x)
 }
 #endif
 
-/**
- * relaxed_state_ok - is a requested lock compatible with the current lock mode?
- * @actual: the current state of the lock
- * @requested: the lock state that was requested by the caller
- * @flags: the modifier flags passed in by the caller
- *
- * Returns: 1 if the locks are compatible, 0 otherwise
- */
-
-static inline int relaxed_state_ok(unsigned int actual, unsigned requested,
-                                  int flags)
-{
-       if (actual == requested)
-               return 1;
-
-       if (flags & GL_EXACT)
-               return 0;
-
-       if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED)
-               return 1;
-
-       if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY))
-               return 1;
-
-       return 0;
-}
-
 /**
  * gl_hash() - Turn glock number into hash bucket number
  * @lock: The glock number
@@ -182,7 +153,7 @@ static void glock_free(struct gfs2_glock *gl)
        struct gfs2_sbd *sdp = gl->gl_sbd;
        struct inode *aspace = gl->gl_aspace;
 
-       if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+       if (sdp->sd_lockstruct.ls_ops->lm_put_lock)
                sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock);
 
        if (aspace)
@@ -211,17 +182,14 @@ static void gfs2_glock_hold(struct gfs2_glock *gl)
 int gfs2_glock_put(struct gfs2_glock *gl)
 {
        int rv = 0;
-       struct gfs2_sbd *sdp = gl->gl_sbd;
 
        write_lock(gl_lock_addr(gl->gl_hash));
        if (atomic_dec_and_test(&gl->gl_ref)) {
                hlist_del(&gl->gl_list);
                write_unlock(gl_lock_addr(gl->gl_hash));
-               gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED);
-               gfs2_assert(sdp, list_empty(&gl->gl_reclaim));
-               gfs2_assert(sdp, list_empty(&gl->gl_holders));
-               gfs2_assert(sdp, list_empty(&gl->gl_waiters1));
-               gfs2_assert(sdp, list_empty(&gl->gl_waiters3));
+               GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_UNLOCKED);
+               GLOCK_BUG_ON(gl, !list_empty(&gl->gl_reclaim));
+               GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
                glock_free(gl);
                rv = 1;
                goto out;
@@ -281,22 +249,401 @@ static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp,
        return gl;
 }
 
+/**
+ * may_grant - check if its ok to grant a new lock
+ * @gl: The glock
+ * @gh: The lock request which we wish to grant
+ *
+ * Returns: true if its ok to grant the lock
+ */
+
+static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh)
+{
+       const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list);
+       if ((gh->gh_state == LM_ST_EXCLUSIVE ||
+            gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head)
+               return 0;
+       if (gl->gl_state == gh->gh_state)
+               return 1;
+       if (gh->gh_flags & GL_EXACT)
+               return 0;
+       if (gl->gl_state == LM_ST_EXCLUSIVE) {
+               if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED)
+                       return 1;
+               if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED)
+                       return 1;
+       }
+       if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY))
+               return 1;
+       return 0;
+}
+
+static void gfs2_holder_wake(struct gfs2_holder *gh)
+{
+       clear_bit(HIF_WAIT, &gh->gh_iflags);
+       smp_mb__after_clear_bit();
+       wake_up_bit(&gh->gh_iflags, HIF_WAIT);
+}
+
+/**
+ * do_promote - promote as many requests as possible on the current queue
+ * @gl: The glock
+ * 
+ * Returns: true if there is a blocked holder at the head of the list
+ */
+
+static int do_promote(struct gfs2_glock *gl)
+{
+       const struct gfs2_glock_operations *glops = gl->gl_ops;
+       struct gfs2_holder *gh, *tmp;
+       int ret;
+
+restart:
+       list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
+               if (test_bit(HIF_HOLDER, &gh->gh_iflags))
+                       continue;
+               if (may_grant(gl, gh)) {
+                       if (gh->gh_list.prev == &gl->gl_holders &&
+                           glops->go_lock) {
+                               spin_unlock(&gl->gl_spin);
+                               /* FIXME: eliminate this eventually */
+                               ret = glops->go_lock(gh);
+                               spin_lock(&gl->gl_spin);
+                               if (ret) {
+                                       gh->gh_error = ret;
+                                       list_del_init(&gh->gh_list);
+                                       gfs2_holder_wake(gh);
+                                       goto restart;
+                               }
+                               set_bit(HIF_HOLDER, &gh->gh_iflags);
+                               gfs2_holder_wake(gh);
+                               goto restart;
+                       }
+                       set_bit(HIF_HOLDER, &gh->gh_iflags);
+                       gfs2_holder_wake(gh);
+                       continue;
+               }
+               if (gh->gh_list.prev == &gl->gl_holders)
+                       return 1;
+               break;
+       }
+       return 0;
+}
+
+/**
+ * do_error - Something unexpected has happened during a lock request
+ *
+ */
+
+static inline void do_error(struct gfs2_glock *gl, const int ret)
+{
+       struct gfs2_holder *gh, *tmp;
+
+       list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
+               if (test_bit(HIF_HOLDER, &gh->gh_iflags))
+                       continue;
+               if (ret & LM_OUT_ERROR)
+                       gh->gh_error = -EIO;
+               else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
+                       gh->gh_error = GLR_TRYFAILED;
+               else
+                       continue;
+               list_del_init(&gh->gh_list);
+               gfs2_holder_wake(gh);
+       }
+}
+
+/**
+ * find_first_waiter - find the first gh that's waiting for the glock
+ * @gl: the glock
+ */
+
+static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl)
+{
+       struct gfs2_holder *gh;
+
+       list_for_each_entry(gh, &gl->gl_holders, gh_list) {
+               if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
+                       return gh;
+       }
+       return NULL;
+}
+
+/**
+ * state_change - record that the glock is now in a different state
+ * @gl: the glock
+ * @new_state the new state
+ *
+ */
+
+static void state_change(struct gfs2_glock *gl, unsigned int new_state)
+{
+       int held1, held2;
+
+       held1 = (gl->gl_state != LM_ST_UNLOCKED);
+       held2 = (new_state != LM_ST_UNLOCKED);
+
+       if (held1 != held2) {
+               if (held2)
+                       gfs2_glock_hold(gl);
+               else
+                       gfs2_glock_put(gl);
+       }
+
+       gl->gl_state = new_state;
+       gl->gl_tchange = jiffies;
+}
+
+static void gfs2_demote_wake(struct gfs2_glock *gl)
+{
+       gl->gl_demote_state = LM_ST_EXCLUSIVE;
+       clear_bit(GLF_DEMOTE, &gl->gl_flags);
+       smp_mb__after_clear_bit();
+       wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
+}
+
+/**
+ * finish_xmote - The DLM has replied to one of our lock requests
+ * @gl: The glock
+ * @ret: The status from the DLM
+ *
+ */
+
+static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
+{
+       const struct gfs2_glock_operations *glops = gl->gl_ops;
+       struct gfs2_holder *gh;
+       unsigned state = ret & LM_OUT_ST_MASK;
+
+       spin_lock(&gl->gl_spin);
+       state_change(gl, state);
+       gh = find_first_waiter(gl);
+
+       /* Demote to UN request arrived during demote to SH or DF */
+       if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
+           state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED)
+               gl->gl_target = LM_ST_UNLOCKED;
+
+       /* Check for state != intended state */
+       if (unlikely(state != gl->gl_target)) {
+               if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
+                       /* move to back of queue and try next entry */
+                       if (ret & LM_OUT_CANCELED) {
+                               if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0)
+                                       list_move_tail(&gh->gh_list, &gl->gl_holders);
+                               gh = find_first_waiter(gl);
+                               gl->gl_target = gh->gh_state;
+                               goto retry;
+                       }
+                       /* Some error or failed "try lock" - report it */
+                       if ((ret & LM_OUT_ERROR) ||
+                           (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
+                               gl->gl_target = gl->gl_state;
+                               do_error(gl, ret);
+                               goto out;
+                       }
+               }
+               switch(state) {
+               /* Unlocked due to conversion deadlock, try again */
+               case LM_ST_UNLOCKED:
+retry:
+                       do_xmote(gl, gh, gl->gl_target);
+                       break;
+               /* Conversion fails, unlock and try again */
+               case LM_ST_SHARED:
+               case LM_ST_DEFERRED:
+                       do_xmote(gl, gh, LM_ST_UNLOCKED);
+                       break;
+               default: /* Everything else */
+                       printk(KERN_ERR "GFS2: wanted %u got %u\n", gl->gl_target, state);
+                       GLOCK_BUG_ON(gl, 1);
+               }
+               spin_unlock(&gl->gl_spin);
+               gfs2_glock_put(gl);
+               return;
+       }
+
+       /* Fast path - we got what we asked for */
+       if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags))
+               gfs2_demote_wake(gl);
+       if (state != LM_ST_UNLOCKED) {
+               if (glops->go_xmote_bh) {
+                       int rv;
+                       spin_unlock(&gl->gl_spin);
+                       rv = glops->go_xmote_bh(gl, gh);
+                       if (rv == -EAGAIN)
+                               return;
+                       spin_lock(&gl->gl_spin);
+                       if (rv) {
+                               do_error(gl, rv);
+                               goto out;
+                       }
+               }
+               do_promote(gl);
+       }
+out:
+       clear_bit(GLF_LOCK, &gl->gl_flags);
+       spin_unlock(&gl->gl_spin);
+       gfs2_glock_put(gl);
+}
+
+static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
+                                unsigned int cur_state, unsigned int req_state,
+                                unsigned int flags)
+{
+       int ret = LM_OUT_ERROR;
+
+       if (!sdp->sd_lockstruct.ls_ops->lm_lock)
+               return req_state == LM_ST_UNLOCKED ? 0 : req_state;
+
+       if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+               ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state,
+                                                        req_state, flags);
+       return ret;
+}
+
+/**
+ * do_xmote - Calls the DLM to change the state of a lock
+ * @gl: The lock state
+ * @gh: The holder (only for promotes)
+ * @target: The target lock state
+ *
+ */
+
+static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target)
+{
+       const struct gfs2_glock_operations *glops = gl->gl_ops;
+       struct gfs2_sbd *sdp = gl->gl_sbd;
+       unsigned int lck_flags = gh ? gh->gh_flags : 0;
+       int ret;
+
+       lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
+                     LM_FLAG_PRIORITY);
+       BUG_ON(gl->gl_state == target);
+       BUG_ON(gl->gl_state == gl->gl_target);
+       if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
+           glops->go_inval) {
+               set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
+               do_error(gl, 0); /* Fail queued try locks */
+       }
+       spin_unlock(&gl->gl_spin);
+       if (glops->go_xmote_th)
+               glops->go_xmote_th(gl);
+       if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
+               glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
+       clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
+
+       gfs2_glock_hold(gl);
+       if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED ||
+           gl->gl_state == LM_ST_DEFERRED) &&
+           !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
+               lck_flags |= LM_FLAG_TRY_1CB;
+       ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, target, lck_flags);
+
+       if (!(ret & LM_OUT_ASYNC)) {
+               finish_xmote(gl, ret);
+               gfs2_glock_hold(gl);
+               if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+                       gfs2_glock_put(gl);
+       } else {
+               GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC);
+       }
+       spin_lock(&gl->gl_spin);
+}
+
+/**
+ * find_first_holder - find the first "holder" gh
+ * @gl: the glock
+ */
+
+static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
+{
+       struct gfs2_holder *gh;
+
+       if (!list_empty(&gl->gl_holders)) {
+               gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
+               if (test_bit(HIF_HOLDER, &gh->gh_iflags))
+                       return gh;
+       }
+       return NULL;
+}
+
+/**
+ * run_queue - do all outstanding tasks related to a glock
+ * @gl: The glock in question
+ * @nonblock: True if we must not block in run_queue
+ *
+ */
+
+static void run_queue(struct gfs2_glock *gl, const int nonblock)
+{
+       struct gfs2_holder *gh = NULL;
+
+       if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
+               return;
+
+       GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
+
+       if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
+           gl->gl_demote_state != gl->gl_state) {
+               if (find_first_holder(gl))
+                       goto out;
+               if (nonblock)
+                       goto out_sched;
+               set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
+               GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
+               gl->gl_target = gl->gl_demote_state;
+       } else {
+               if (test_bit(GLF_DEMOTE, &gl->gl_flags))
+                       gfs2_demote_wake(gl);
+               if (do_promote(gl) == 0)
+                       goto out;
+               gh = find_first_waiter(gl);
+               gl->gl_target = gh->gh_state;
+               if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
+                       do_error(gl, 0); /* Fail queued try locks */
+       }
+       do_xmote(gl, gh, gl->gl_target);
+       return;
+
+out_sched:
+       gfs2_glock_hold(gl);
+       if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+               gfs2_glock_put(gl);
+out:
+       clear_bit(GLF_LOCK, &gl->gl_flags);
+}
+
 static void glock_work_func(struct work_struct *work)
 {
+       unsigned long delay = 0;
        struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
 
+       if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags))
+               finish_xmote(gl, gl->gl_reply);
        spin_lock(&gl->gl_spin);
-       if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags))
-               set_bit(GLF_DEMOTE, &gl->gl_flags);
-       run_queue(gl);
+       if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
+           gl->gl_state != LM_ST_UNLOCKED &&
+           gl->gl_demote_state != LM_ST_EXCLUSIVE) {
+               unsigned long holdtime, now = jiffies;
+               holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
+               if (time_before(now, holdtime))
+                       delay = holdtime - now;
+               set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags);
+       }
+       run_queue(gl, 0);
        spin_unlock(&gl->gl_spin);
-       gfs2_glock_put(gl);
+       if (!delay ||
+           queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
+               gfs2_glock_put(gl);
 }
 
 static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
                     void **lockp)
 {
        int error = -EIO;
+       if (!sdp->sd_lockstruct.ls_ops->lm_get_lock)
+               return 0;
        if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
                error = sdp->sd_lockstruct.ls_ops->lm_get_lock(
                                sdp->sd_lockstruct.ls_lockspace, name, lockp);
@@ -342,12 +689,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
        gl->gl_name = name;
        atomic_set(&gl->gl_ref, 1);
        gl->gl_state = LM_ST_UNLOCKED;
+       gl->gl_target = LM_ST_UNLOCKED;
        gl->gl_demote_state = LM_ST_EXCLUSIVE;
        gl->gl_hash = hash;
-       gl->gl_owner_pid = NULL;
-       gl->gl_ip = 0;
        gl->gl_ops = glops;
-       gl->gl_req_gh = NULL;
        gl->gl_stamp = jiffies;
        gl->gl_tchange = jiffies;
        gl->gl_object = NULL;
@@ -439,664 +784,85 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *
  *
  */
 
-void gfs2_holder_uninit(struct gfs2_holder *gh)
-{
-       put_pid(gh->gh_owner_pid);
-       gfs2_glock_put(gh->gh_gl);
-       gh->gh_gl = NULL;
-       gh->gh_ip = 0;
-}
-
-static void gfs2_holder_wake(struct gfs2_holder *gh)
-{
-       clear_bit(HIF_WAIT, &gh->gh_iflags);
-       smp_mb__after_clear_bit();
-       wake_up_bit(&gh->gh_iflags, HIF_WAIT);
-}
-
-static int just_schedule(void *word)
-{
-        schedule();
-        return 0;
-}
-
-static void wait_on_holder(struct gfs2_holder *gh)
-{
-       might_sleep();
-       wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE);
-}
-
-static void gfs2_demote_wake(struct gfs2_glock *gl)
-{
-       gl->gl_demote_state = LM_ST_EXCLUSIVE;
-        clear_bit(GLF_DEMOTE, &gl->gl_flags);
-        smp_mb__after_clear_bit();
-        wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
-}
-
-static void wait_on_demote(struct gfs2_glock *gl)
-{
-       might_sleep();
-       wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE);
-}
-
-/**
- * rq_mutex - process a mutex request in the queue
- * @gh: the glock holder
- *
- * Returns: 1 if the queue is blocked
- */
-
-static int rq_mutex(struct gfs2_holder *gh)
-{
-       struct gfs2_glock *gl = gh->gh_gl;
-
-       list_del_init(&gh->gh_list);
-       /*  gh->gh_error never examined.  */
-       set_bit(GLF_LOCK, &gl->gl_flags);
-       clear_bit(HIF_WAIT, &gh->gh_iflags);
-       smp_mb();
-       wake_up_bit(&gh->gh_iflags, HIF_WAIT);
-
-       return 1;
-}
-
-/**
- * rq_promote - process a promote request in the queue
- * @gh: the glock holder
- *
- * Acquire a new inter-node lock, or change a lock state to more restrictive.
- *
- * Returns: 1 if the queue is blocked
- */
-
-static int rq_promote(struct gfs2_holder *gh)
-{
-       struct gfs2_glock *gl = gh->gh_gl;
-
-       if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
-               if (list_empty(&gl->gl_holders)) {
-                       gl->gl_req_gh = gh;
-                       set_bit(GLF_LOCK, &gl->gl_flags);
-                       spin_unlock(&gl->gl_spin);
-                       gfs2_glock_xmote_th(gh->gh_gl, gh);
-                       spin_lock(&gl->gl_spin);
-               }
-               return 1;
-       }
-
-       if (list_empty(&gl->gl_holders)) {
-               set_bit(HIF_FIRST, &gh->gh_iflags);
-               set_bit(GLF_LOCK, &gl->gl_flags);
-       } else {
-               struct gfs2_holder *next_gh;
-               if (gh->gh_state == LM_ST_EXCLUSIVE)
-                       return 1;
-               next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder,
-                                    gh_list);
-               if (next_gh->gh_state == LM_ST_EXCLUSIVE)
-                        return 1;
-       }
-
-       list_move_tail(&gh->gh_list, &gl->gl_holders);
-       gh->gh_error = 0;
-       set_bit(HIF_HOLDER, &gh->gh_iflags);
-
-       gfs2_holder_wake(gh);
-
-       return 0;
-}
-
-/**
- * rq_demote - process a demote request in the queue
- * @gh: the glock holder
- *
- * Returns: 1 if the queue is blocked
- */
-
-static int rq_demote(struct gfs2_glock *gl)
-{
-       if (!list_empty(&gl->gl_holders))
-               return 1;
-
-       if (gl->gl_state == gl->gl_demote_state ||
-           gl->gl_state == LM_ST_UNLOCKED) {
-               gfs2_demote_wake(gl);
-               return 0;
-       }
-
-       set_bit(GLF_LOCK, &gl->gl_flags);
-       set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
-
-       if (gl->gl_demote_state == LM_ST_UNLOCKED ||
-           gl->gl_state != LM_ST_EXCLUSIVE) {
-               spin_unlock(&gl->gl_spin);
-               gfs2_glock_drop_th(gl);
-       } else {
-               spin_unlock(&gl->gl_spin);
-               gfs2_glock_xmote_th(gl, NULL);
-       }
-
-       spin_lock(&gl->gl_spin);
-       clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
-
-       return 0;
-}
-
-/**
- * run_queue - process holder structures on a glock
- * @gl: the glock
- *
- */
-static void run_queue(struct gfs2_glock *gl)
-{
-       struct gfs2_holder *gh;
-       int blocked = 1;
-
-       for (;;) {
-               if (test_bit(GLF_LOCK, &gl->gl_flags))
-                       break;
-
-               if (!list_empty(&gl->gl_waiters1)) {
-                       gh = list_entry(gl->gl_waiters1.next,
-                                       struct gfs2_holder, gh_list);
-                       blocked = rq_mutex(gh);
-               } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
-                       blocked = rq_demote(gl);
-                       if (test_bit(GLF_WAITERS2, &gl->gl_flags) &&
-                                    !blocked) {
-                               set_bit(GLF_DEMOTE, &gl->gl_flags);
-                               gl->gl_demote_state = LM_ST_UNLOCKED;
-                       }
-                       clear_bit(GLF_WAITERS2, &gl->gl_flags);
-               } else if (!list_empty(&gl->gl_waiters3)) {
-                       gh = list_entry(gl->gl_waiters3.next,
-                                       struct gfs2_holder, gh_list);
-                       blocked = rq_promote(gh);
-               } else
-                       break;
-
-               if (blocked)
-                       break;
-       }
-}
-
-/**
- * gfs2_glmutex_lock - acquire a local lock on a glock
- * @gl: the glock
- *
- * Gives caller exclusive access to manipulate a glock structure.
- */
-
-static void gfs2_glmutex_lock(struct gfs2_glock *gl)
-{
-       spin_lock(&gl->gl_spin);
-       if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
-               struct gfs2_holder gh;
-
-               gfs2_holder_init(gl, 0, 0, &gh);
-               set_bit(HIF_WAIT, &gh.gh_iflags);
-               list_add_tail(&gh.gh_list, &gl->gl_waiters1);
-               spin_unlock(&gl->gl_spin);
-               wait_on_holder(&gh);
-               gfs2_holder_uninit(&gh);
-       } else {
-               gl->gl_owner_pid = get_pid(task_pid(current));
-               gl->gl_ip = (unsigned long)__builtin_return_address(0);
-               spin_unlock(&gl->gl_spin);
-       }
-}
-
-/**
- * gfs2_glmutex_trylock - try to acquire a local lock on a glock
- * @gl: the glock
- *
- * Returns: 1 if the glock is acquired
- */
-
-static int gfs2_glmutex_trylock(struct gfs2_glock *gl)
-{
-       int acquired = 1;
-
-       spin_lock(&gl->gl_spin);
-       if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
-               acquired = 0;
-       } else {
-               gl->gl_owner_pid = get_pid(task_pid(current));
-               gl->gl_ip = (unsigned long)__builtin_return_address(0);
-       }
-       spin_unlock(&gl->gl_spin);
-
-       return acquired;
-}
-
-/**
- * gfs2_glmutex_unlock - release a local lock on a glock
- * @gl: the glock
- *
- */
-
-static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
-{
-       struct pid *pid;
-
-       spin_lock(&gl->gl_spin);
-       clear_bit(GLF_LOCK, &gl->gl_flags);
-       pid = gl->gl_owner_pid;
-       gl->gl_owner_pid = NULL;
-       gl->gl_ip = 0;
-       run_queue(gl);
-       spin_unlock(&gl->gl_spin);
-
-       put_pid(pid);
-}
-
-/**
- * handle_callback - process a demote request
- * @gl: the glock
- * @state: the state the caller wants us to change to
- *
- * There are only two requests that we are going to see in actual
- * practise: LM_ST_SHARED and LM_ST_UNLOCKED
- */
-
-static void handle_callback(struct gfs2_glock *gl, unsigned int state,
-                           int remote, unsigned long delay)
-{
-       int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
-
-       spin_lock(&gl->gl_spin);
-       set_bit(bit, &gl->gl_flags);
-       if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
-               gl->gl_demote_state = state;
-               gl->gl_demote_time = jiffies;
-               if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
-                   gl->gl_object) {
-                       gfs2_glock_schedule_for_reclaim(gl);
-                       spin_unlock(&gl->gl_spin);
-                       return;
-               }
-       } else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
-                       gl->gl_demote_state != state) {
-               if (test_bit(GLF_DEMOTE_IN_PROGRESS,  &gl->gl_flags)) 
-                       set_bit(GLF_WAITERS2, &gl->gl_flags);
-               else 
-                       gl->gl_demote_state = LM_ST_UNLOCKED;
-       }
-       spin_unlock(&gl->gl_spin);
-}
-
-/**
- * state_change - record that the glock is now in a different state
- * @gl: the glock
- * @new_state the new state
- *
- */
-
-static void state_change(struct gfs2_glock *gl, unsigned int new_state)
-{
-       int held1, held2;
-
-       held1 = (gl->gl_state != LM_ST_UNLOCKED);
-       held2 = (new_state != LM_ST_UNLOCKED);
-
-       if (held1 != held2) {
-               if (held2)
-                       gfs2_glock_hold(gl);
-               else
-                       gfs2_glock_put(gl);
-       }
-
-       gl->gl_state = new_state;
-       gl->gl_tchange = jiffies;
-}
-
-/**
- * drop_bh - Called after a lock module unlock completes
- * @gl: the glock
- * @ret: the return status
- *
- * Doesn't wake up the process waiting on the struct gfs2_holder (if any)
- * Doesn't drop the reference on the glock the top half took out
- *
- */
-
-static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
-{
-       struct gfs2_sbd *sdp = gl->gl_sbd;
-       struct gfs2_holder *gh = gl->gl_req_gh;
-
-       gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
-       gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
-       gfs2_assert_warn(sdp, !ret);
-
-       state_change(gl, LM_ST_UNLOCKED);
-
-       if (test_and_clear_bit(GLF_CONV_DEADLK, &gl->gl_flags)) {
-               spin_lock(&gl->gl_spin);
-               gh->gh_error = 0;
-               spin_unlock(&gl->gl_spin);
-               gfs2_glock_xmote_th(gl, gl->gl_req_gh);
-               gfs2_glock_put(gl);
-               return;
-       }
-
-       spin_lock(&gl->gl_spin);
-       gfs2_demote_wake(gl);
-       clear_bit(GLF_LOCK, &gl->gl_flags);
-       spin_unlock(&gl->gl_spin);
-       gfs2_glock_put(gl);
-}
-
-/**
- * xmote_bh - Called after the lock module is done acquiring a lock
- * @gl: The glock in question
- * @ret: the int returned from the lock module
- *
- */
-
-static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
-{
-       struct gfs2_sbd *sdp = gl->gl_sbd;
-       const struct gfs2_glock_operations *glops = gl->gl_ops;
-       struct gfs2_holder *gh = gl->gl_req_gh;
-       int op_done = 1;
-
-       if (!gh && (ret & LM_OUT_ST_MASK) == LM_ST_UNLOCKED) {
-               drop_bh(gl, ret);
-               return;
-       }
-
-       gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
-       gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
-       gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC));
-
-       state_change(gl, ret & LM_OUT_ST_MASK);
-
-       /*  Deal with each possible exit condition  */
-
-       if (!gh) {
-               gl->gl_stamp = jiffies;
-               if (ret & LM_OUT_CANCELED) {
-                       op_done = 0;
-               } else {
-                       spin_lock(&gl->gl_spin);
-                       if (gl->gl_state != gl->gl_demote_state) {
-                               spin_unlock(&gl->gl_spin);
-                               gfs2_glock_drop_th(gl);
-                               gfs2_glock_put(gl);
-                               return;
-                       }
-                       gfs2_demote_wake(gl);
-                       spin_unlock(&gl->gl_spin);
-               }
-       } else {
-               spin_lock(&gl->gl_spin);
-               if (ret & LM_OUT_CONV_DEADLK) {
-                       gh->gh_error = 0;
-                       set_bit(GLF_CONV_DEADLK, &gl->gl_flags);
-                       spin_unlock(&gl->gl_spin);
-                       gfs2_glock_drop_th(gl);
-                       gfs2_glock_put(gl);
-                       return;
-               }
-               list_del_init(&gh->gh_list);
-               gh->gh_error = -EIO;
-               if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 
-                       goto out;
-               gh->gh_error = GLR_CANCELED;
-               if (ret & LM_OUT_CANCELED) 
-                       goto out;
-               if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
-                       list_add_tail(&gh->gh_list, &gl->gl_holders);
-                       gh->gh_error = 0;
-                       set_bit(HIF_HOLDER, &gh->gh_iflags);
-                       set_bit(HIF_FIRST, &gh->gh_iflags);
-                       op_done = 0;
-                       goto out;
-               }
-               gh->gh_error = GLR_TRYFAILED;
-               if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
-                       goto out;
-               gh->gh_error = -EINVAL;
-               if (gfs2_assert_withdraw(sdp, 0) == -1)
-                       fs_err(sdp, "ret = 0x%.8X\n", ret);
-out:
-               spin_unlock(&gl->gl_spin);
-       }
-
-       if (glops->go_xmote_bh)
-               glops->go_xmote_bh(gl);
-
-       if (op_done) {
-               spin_lock(&gl->gl_spin);
-               gl->gl_req_gh = NULL;
-               clear_bit(GLF_LOCK, &gl->gl_flags);
-               spin_unlock(&gl->gl_spin);
-       }
-
-       gfs2_glock_put(gl);
-
-       if (gh)
-               gfs2_holder_wake(gh);
-}
-
-static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
-                                unsigned int cur_state, unsigned int req_state,
-                                unsigned int flags)
-{
-       int ret = 0;
-       if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
-               ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state,
-                                                        req_state, flags);
-       return ret;
-}
-
-/**
- * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock
- * @gl: The glock in question
- * @state: the requested state
- * @flags: modifier flags to the lock call
- *
- */
-
-static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh)
-{
-       struct gfs2_sbd *sdp = gl->gl_sbd;
-       int flags = gh ? gh->gh_flags : 0;
-       unsigned state = gh ? gh->gh_state : gl->gl_demote_state;
-       const struct gfs2_glock_operations *glops = gl->gl_ops;
-       int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
-                                LM_FLAG_NOEXP | LM_FLAG_ANY |
-                                LM_FLAG_PRIORITY);
-       unsigned int lck_ret;
-
-       if (glops->go_xmote_th)
-               glops->go_xmote_th(gl);
-       if (state == LM_ST_DEFERRED && glops->go_inval)
-               glops->go_inval(gl, DIO_METADATA);
-
-       gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
-       gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
-       gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED);
-       gfs2_assert_warn(sdp, state != gl->gl_state);
-
-       gfs2_glock_hold(gl);
-
-       lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags);
-
-       if (gfs2_assert_withdraw(sdp, !(lck_ret & LM_OUT_ERROR)))
-               return;
-
-       if (lck_ret & LM_OUT_ASYNC)
-               gfs2_assert_warn(sdp, lck_ret == LM_OUT_ASYNC);
-       else
-               xmote_bh(gl, lck_ret);
-}
-
-static unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock,
-                                  unsigned int cur_state)
-{
-       int ret = 0;
-       if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
-               ret =  sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state);
-       return ret;
-}
-
-/**
- * gfs2_glock_drop_th - call into the lock module to unlock a lock
- * @gl: the glock
- *
- */
-
-static void gfs2_glock_drop_th(struct gfs2_glock *gl)
-{
-       struct gfs2_sbd *sdp = gl->gl_sbd;
-       const struct gfs2_glock_operations *glops = gl->gl_ops;
-       unsigned int ret;
-
-       if (glops->go_xmote_th)
-               glops->go_xmote_th(gl);
-       if (glops->go_inval)
-               glops->go_inval(gl, DIO_METADATA);
-
-       gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
-       gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
-       gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
-
-       gfs2_glock_hold(gl);
+void gfs2_holder_uninit(struct gfs2_holder *gh)
+{
+       put_pid(gh->gh_owner_pid);
+       gfs2_glock_put(gh->gh_gl);
+       gh->gh_gl = NULL;
+       gh->gh_ip = 0;
+}
 
-       ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state);
+static int just_schedule(void *word)
+{
+        schedule();
+        return 0;
+}
 
-       if (gfs2_assert_withdraw(sdp, !(ret & LM_OUT_ERROR)))
-               return;
+static void wait_on_holder(struct gfs2_holder *gh)
+{
+       might_sleep();
+       wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE);
+}
 
-       if (!ret)
-               drop_bh(gl, ret);
-       else
-               gfs2_assert_warn(sdp, ret == LM_OUT_ASYNC);
+static void wait_on_demote(struct gfs2_glock *gl)
+{
+       might_sleep();
+       wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE);
 }
 
 /**
- * do_cancels - cancel requests for locks stuck waiting on an expire flag
- * @gh: the LM_FLAG_PRIORITY holder waiting to acquire the lock
+ * handle_callback - process a demote request
+ * @gl: the glock
+ * @state: the state the caller wants us to change to
  *
- * Don't cancel GL_NOCANCEL requests.
+ * There are only two requests that we are going to see in actual
+ * practise: LM_ST_SHARED and LM_ST_UNLOCKED
  */
 
-static void do_cancels(struct gfs2_holder *gh)
+static void handle_callback(struct gfs2_glock *gl, unsigned int state,
+                           int remote, unsigned long delay)
 {
-       struct gfs2_glock *gl = gh->gh_gl;
-       struct gfs2_sbd *sdp = gl->gl_sbd;
-
-       spin_lock(&gl->gl_spin);
+       int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
 
-       while (gl->gl_req_gh != gh &&
-              !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
-              !list_empty(&gh->gh_list)) {
-               if (!(gl->gl_req_gh && (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) {
-                       spin_unlock(&gl->gl_spin);
-                       if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
-                               sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock);
-                       msleep(100);
-                       spin_lock(&gl->gl_spin);
-               } else {
-                       spin_unlock(&gl->gl_spin);
-                       msleep(100);
-                       spin_lock(&gl->gl_spin);
-               }
+       set_bit(bit, &gl->gl_flags);
+       if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
+               gl->gl_demote_state = state;
+               gl->gl_demote_time = jiffies;
+               if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
+                   gl->gl_object)
+                       gfs2_glock_schedule_for_reclaim(gl);
+       } else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
+                       gl->gl_demote_state != state) {
+               gl->gl_demote_state = LM_ST_UNLOCKED;
        }
-
-       spin_unlock(&gl->gl_spin);
 }
 
 /**
- * glock_wait_internal - wait on a glock acquisition
+ * gfs2_glock_wait - wait on a glock acquisition
  * @gh: the glock holder
  *
  * Returns: 0 on success
  */
 
-static int glock_wait_internal(struct gfs2_holder *gh)
+int gfs2_glock_wait(struct gfs2_holder *gh)
 {
-       struct gfs2_glock *gl = gh->gh_gl;
-       struct gfs2_sbd *sdp = gl->gl_sbd;
-       const struct gfs2_glock_operations *glops = gl->gl_ops;
-
-       if (test_bit(HIF_ABORTED, &gh->gh_iflags))
-               return -EIO;
-
-       if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
-               spin_lock(&gl->gl_spin);
-               if (gl->gl_req_gh != gh &&
-                   !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
-                   !list_empty(&gh->gh_list)) {
-                       list_del_init(&gh->gh_list);
-                       gh->gh_error = GLR_TRYFAILED;
-                       run_queue(gl);
-                       spin_unlock(&gl->gl_spin);
-                       return gh->gh_error;
-               }
-               spin_unlock(&gl->gl_spin);
-       }
-
-       if (gh->gh_flags & LM_FLAG_PRIORITY)
-               do_cancels(gh);
-
        wait_on_holder(gh);
-       if (gh->gh_error)
-               return gh->gh_error;
-
-       gfs2_assert_withdraw(sdp, test_bit(HIF_HOLDER, &gh->gh_iflags));
-       gfs2_assert_withdraw(sdp, relaxed_state_ok(gl->gl_state, gh->gh_state,
-                                                  gh->gh_flags));
-
-       if (test_bit(HIF_FIRST, &gh->gh_iflags)) {
-               gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
-
-               if (glops->go_lock) {
-                       gh->gh_error = glops->go_lock(gh);
-                       if (gh->gh_error) {
-                               spin_lock(&gl->gl_spin);
-                               list_del_init(&gh->gh_list);
-                               spin_unlock(&gl->gl_spin);
-                       }
-               }
-
-               spin_lock(&gl->gl_spin);
-               gl->gl_req_gh = NULL;
-               clear_bit(GLF_LOCK, &gl->gl_flags);
-               run_queue(gl);
-               spin_unlock(&gl->gl_spin);
-       }
-
        return gh->gh_error;
 }
 
-static inline struct gfs2_holder *
-find_holder_by_owner(struct list_head *head, struct pid *pid)
-{
-       struct gfs2_holder *gh;
-
-       list_for_each_entry(gh, head, gh_list) {
-               if (gh->gh_owner_pid == pid)
-                       return gh;
-       }
-
-       return NULL;
-}
-
-static void print_dbg(struct glock_iter *gi, const char *fmt, ...)
+void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
 {
        va_list args;
 
        va_start(args, fmt);
-       if (gi) {
+       if (seq) {
+               struct gfs2_glock_iter *gi = seq->private;
                vsprintf(gi->string, fmt, args);
-               seq_printf(gi->seq, gi->string);
-       }
-       else
+               seq_printf(seq, gi->string);
+       } else {
+               printk(KERN_ERR " ");
                vprintk(fmt, args);
+       }
        va_end(args);
 }
 
@@ -1104,50 +870,76 @@ static void print_dbg(struct glock_iter *gi, const char *fmt, ...)
  * add_to_queue - Add a holder to the wait queue (but look for recursion)
  * @gh: the holder structure to add
  *
+ * Eventually we should move the recursive locking trap to a
+ * debugging option or something like that. This is the fast
+ * path and needs to have the minimum number of distractions.
+ * 
  */
 
-static void add_to_queue(struct gfs2_holder *gh)
+static inline void add_to_queue(struct gfs2_holder *gh)
 {
        struct gfs2_glock *gl = gh->gh_gl;
-       struct gfs2_holder *existing;
+       struct gfs2_sbd *sdp = gl->gl_sbd;
+       struct list_head *insert_pt = NULL;
+       struct gfs2_holder *gh2;
+       int try_lock = 0;
 
        BUG_ON(gh->gh_owner_pid == NULL);
        if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
                BUG();
 
-       if (!(gh->gh_flags & GL_FLOCK)) {
-               existing = find_holder_by_owner(&gl->gl_holders, 
-                                               gh->gh_owner_pid);
-               if (existing) {
-                       print_symbol(KERN_WARNING "original: %s\n", 
-                                    existing->gh_ip);
-                       printk(KERN_INFO "pid : %d\n",
-                                       pid_nr(existing->gh_owner_pid));
-                       printk(KERN_INFO "lock type : %d lock state : %d\n",
-                              existing->gh_gl->gl_name.ln_type, 
-                              existing->gh_gl->gl_state);
-                       print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
-                       printk(KERN_INFO "pid : %d\n",
-                                       pid_nr(gh->gh_owner_pid));
-                       printk(KERN_INFO "lock type : %d lock state : %d\n",
-                              gl->gl_name.ln_type, gl->gl_state);
-                       BUG();
-               }
-               
-               existing = find_holder_by_owner(&gl->gl_waiters3, 
-                                               gh->gh_owner_pid);
-               if (existing) {
-                       print_symbol(KERN_WARNING "original: %s\n", 
-                                    existing->gh_ip);
-                       print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
-                       BUG();
+       if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
+               if (test_bit(GLF_LOCK, &gl->gl_flags))
+                       try_lock = 1;
+               if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
+                       goto fail;
+       }
+
+       list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
+               if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
+                   (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
+                       goto trap_recursive;
+               if (try_lock &&
+                   !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) &&
+                   !may_grant(gl, gh)) {
+fail:
+                       gh->gh_error = GLR_TRYFAILED;
+                       gfs2_holder_wake(gh);
+                       return;
                }
+               if (test_bit(HIF_HOLDER, &gh2->gh_iflags))
+                       continue;
+               if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
+                       insert_pt = &gh2->gh_list;
+       }
+       if (likely(insert_pt == NULL)) {
+               list_add_tail(&gh->gh_list, &gl->gl_holders);
+               if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
+                       goto do_cancel;
+               return;
+       }
+       list_add_tail(&gh->gh_list, insert_pt);
+do_cancel:
+       gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
+       if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
+               spin_unlock(&gl->gl_spin);
+               if (sdp->sd_lockstruct.ls_ops->lm_cancel)
+                       sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock);
+               spin_lock(&gl->gl_spin);
        }
+       return;
 
-       if (gh->gh_flags & LM_FLAG_PRIORITY)
-               list_add(&gh->gh_list, &gl->gl_waiters3);
-       else
-               list_add_tail(&gh->gh_list, &gl->gl_waiters3);
+trap_recursive:
+       print_symbol(KERN_ERR "original: %s\n", gh2->gh_ip);
+       printk(KERN_ERR "pid: %d\n", pid_nr(gh2->gh_owner_pid));
+       printk(KERN_ERR "lock type: %d req lock state : %d\n",
+              gh2->gh_gl->gl_name.ln_type, gh2->gh_state);
+       print_symbol(KERN_ERR "new: %s\n", gh->gh_ip);
+       printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid));
+       printk(KERN_ERR "lock type: %d req lock state : %d\n",
+              gh->gh_gl->gl_name.ln_type, gh->gh_state);
+       __dump_glock(NULL, gl);
+       BUG();
 }
 
 /**
@@ -1165,24 +957,16 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
        struct gfs2_sbd *sdp = gl->gl_sbd;
        int error = 0;
 
-restart:
-       if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
-               set_bit(HIF_ABORTED, &gh->gh_iflags);
+       if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
                return -EIO;
-       }
 
        spin_lock(&gl->gl_spin);
        add_to_queue(gh);
-       run_queue(gl);
+       run_queue(gl, 1);
        spin_unlock(&gl->gl_spin);
 
-       if (!(gh->gh_flags & GL_ASYNC)) {
-               error = glock_wait_internal(gh);
-               if (error == GLR_CANCELED) {
-                       msleep(100);
-                       goto restart;
-               }
-       }
+       if (!(gh->gh_flags & GL_ASYNC))
+               error = gfs2_glock_wait(gh);
 
        return error;
 }
@@ -1196,48 +980,7 @@ restart:
 
 int gfs2_glock_poll(struct gfs2_holder *gh)
 {
-       struct gfs2_glock *gl = gh->gh_gl;
-       int ready = 0;
-
-       spin_lock(&gl->gl_spin);
-
-       if (test_bit(HIF_HOLDER, &gh->gh_iflags))
-               ready = 1;
-       else if (list_empty(&gh->gh_list)) {
-               if (gh->gh_error == GLR_CANCELED) {
-                       spin_unlock(&gl->gl_spin);
-                       msleep(100);
-                       if (gfs2_glock_nq(gh))
-                               return 1;
-                       return 0;
-               } else
-                       ready = 1;
-       }
-
-       spin_unlock(&gl->gl_spin);
-
-       return ready;
-}
-
-/**
- * gfs2_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC
- * @gh: the holder structure
- *
- * Returns: 0, GLR_TRYFAILED, or errno on failure
- */
-
-int gfs2_glock_wait(struct gfs2_holder *gh)
-{
-       int error;
-
-       error = glock_wait_internal(gh);
-       if (error == GLR_CANCELED) {
-               msleep(100);
-               gh->gh_flags &= ~GL_ASYNC;
-               error = gfs2_glock_nq(gh);
-       }
-
-       return error;
+       return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1;
 }
 
 /**
@@ -1251,26 +994,30 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
        struct gfs2_glock *gl = gh->gh_gl;
        const struct gfs2_glock_operations *glops = gl->gl_ops;
        unsigned delay = 0;
+       int fast_path = 0;
 
+       spin_lock(&gl->gl_spin);
        if (gh->gh_flags & GL_NOCACHE)
                handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
 
-       gfs2_glmutex_lock(gl);
-
-       spin_lock(&gl->gl_spin);
        list_del_init(&gh->gh_list);
-
-       if (list_empty(&gl->gl_holders)) {
+       if (find_first_holder(gl) == NULL) {
                if (glops->go_unlock) {
+                       GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags));
                        spin_unlock(&gl->gl_spin);
                        glops->go_unlock(gh);
                        spin_lock(&gl->gl_spin);
+                       clear_bit(GLF_LOCK, &gl->gl_flags);
                }
                gl->gl_stamp = jiffies;
+               if (list_empty(&gl->gl_holders) &&
+                   !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
+                   !test_bit(GLF_DEMOTE, &gl->gl_flags))
+                       fast_path = 1;
        }
-
-       clear_bit(GLF_LOCK, &gl->gl_flags);
        spin_unlock(&gl->gl_spin);
+       if (likely(fast_path))
+               return;
 
        gfs2_glock_hold(gl);
        if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
@@ -1454,6 +1201,8 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
 static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp)
 {
        int error = -EIO;
+       if (!sdp->sd_lockstruct.ls_ops->lm_hold_lvb)
+               return 0;
        if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
                error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
        return error;
@@ -1469,20 +1218,14 @@ int gfs2_lvb_hold(struct gfs2_glock *gl)
 {
        int error;
 
-       gfs2_glmutex_lock(gl);
-
        if (!atomic_read(&gl->gl_lvb_count)) {
                error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb);
-               if (error) {
-                       gfs2_glmutex_unlock(gl);
+               if (error) 
                        return error;
-               }
                gfs2_glock_hold(gl);
        }
        atomic_inc(&gl->gl_lvb_count);
 
-       gfs2_glmutex_unlock(gl);
-
        return 0;
 }
 
@@ -1497,17 +1240,13 @@ void gfs2_lvb_unhold(struct gfs2_glock *gl)
        struct gfs2_sbd *sdp = gl->gl_sbd;
 
        gfs2_glock_hold(gl);
-       gfs2_glmutex_lock(gl);
-
        gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0);
        if (atomic_dec_and_test(&gl->gl_lvb_count)) {
-               if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+               if (sdp->sd_lockstruct.ls_ops->lm_unhold_lvb)
                        sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb);
                gl->gl_lvb = NULL;
                gfs2_glock_put(gl);
        }
-
-       gfs2_glmutex_unlock(gl);
        gfs2_glock_put(gl);
 }
 
@@ -1527,7 +1266,9 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
        if (time_before(now, holdtime))
                delay = holdtime - now;
 
+       spin_lock(&gl->gl_spin);
        handle_callback(gl, state, 1, delay);
+       spin_unlock(&gl->gl_spin);
        if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
                gfs2_glock_put(gl);
 }
@@ -1568,7 +1309,8 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
                gl = gfs2_glock_find(sdp, &async->lc_name);
                if (gfs2_assert_warn(sdp, gl))
                        return;
-               xmote_bh(gl, async->lc_ret);
+               gl->gl_reply = async->lc_ret;
+               set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
                if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
                        gfs2_glock_put(gl);
                up_read(&gfs2_umount_flush_sem);
@@ -1581,11 +1323,6 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
                        wake_up_process(sdp->sd_recoverd_process);
                return;
 
-       case LM_CB_DROPLOCKS:
-               gfs2_gl_hash_clear(sdp, NO_WAIT);
-               gfs2_quota_scan(sdp);
-               return;
-
        default:
                gfs2_assert_warn(sdp, 0);
                return;
@@ -1646,6 +1383,7 @@ void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
 void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
 {
        struct gfs2_glock *gl;
+       int done_callback = 0;
 
        spin_lock(&sdp->sd_reclaim_lock);
        if (list_empty(&sdp->sd_reclaim_list)) {
@@ -1660,14 +1398,16 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
        atomic_dec(&sdp->sd_reclaim_count);
        atomic_inc(&sdp->sd_reclaimed);
 
-       if (gfs2_glmutex_trylock(gl)) {
-               if (list_empty(&gl->gl_holders) &&
-                   gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
-                       handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
-               gfs2_glmutex_unlock(gl);
+       spin_lock(&gl->gl_spin);
+       if (find_first_holder(gl) == NULL &&
+           gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) {
+               handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
+               done_callback = 1;
        }
-
-       gfs2_glock_put(gl);
+       spin_unlock(&gl->gl_spin);
+       if (!done_callback ||
+           queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+               gfs2_glock_put(gl);
 }
 
 /**
@@ -1724,18 +1464,14 @@ static void scan_glock(struct gfs2_glock *gl)
 {
        if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object)
                return;
+       if (test_bit(GLF_LOCK, &gl->gl_flags))
+               return;
 
-       if (gfs2_glmutex_trylock(gl)) {
-               if (list_empty(&gl->gl_holders) &&
-                   gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
-                       goto out_schedule;
-               gfs2_glmutex_unlock(gl);
-       }
-       return;
-
-out_schedule:
-       gfs2_glmutex_unlock(gl);
-       gfs2_glock_schedule_for_reclaim(gl);
+       spin_lock(&gl->gl_spin);
+       if (find_first_holder(gl) == NULL &&
+           gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
+               gfs2_glock_schedule_for_reclaim(gl);
+       spin_unlock(&gl->gl_spin);
 }
 
 /**
@@ -1760,12 +1496,13 @@ static void clear_glock(struct gfs2_glock *gl)
                spin_unlock(&sdp->sd_reclaim_lock);
        }
 
-       if (gfs2_glmutex_trylock(gl)) {
-               if (list_empty(&gl->gl_holders) &&
-                   gl->gl_state != LM_ST_UNLOCKED)
-                       handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
-               gfs2_glmutex_unlock(gl);
-       }
+       spin_lock(&gl->gl_spin);
+       if (find_first_holder(gl) == NULL && gl->gl_state != LM_ST_UNLOCKED)
+               handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
+       spin_unlock(&gl->gl_spin);
+       gfs2_glock_hold(gl);
+       if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+               gfs2_glock_put(gl);
 }
 
 /**
@@ -1773,11 +1510,10 @@ static void clear_glock(struct gfs2_glock *gl)
  * @sdp: the filesystem
  * @wait: wait until it's all gone
  *
- * Called when unmounting the filesystem, or when inter-node lock manager
- * requests DROPLOCKS because it is running out of capacity.
+ * Called when unmounting the filesystem.
  */
 
-void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
+void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
 {
        unsigned long t;
        unsigned int x;
@@ -1792,7 +1528,7 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
                                cont = 1;
                }
 
-               if (!wait || !cont)
+               if (!cont)
                        break;
 
                if (time_after_eq(jiffies,
@@ -1810,180 +1546,164 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
        }
 }
 
-/*
- *  Diagnostic routines to help debug distributed deadlock
- */
-
-static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt,
-                              unsigned long address)
+static const char *state2str(unsigned state)
 {
-       char buffer[KSYM_SYMBOL_LEN];
-
-       sprint_symbol(buffer, address);
-       print_dbg(gi, fmt, buffer);
+       switch(state) {
+       case LM_ST_UNLOCKED:
+               return "UN";
+       case LM_ST_SHARED:
+               return "SH";
+       case LM_ST_DEFERRED:
+               return "DF";
+       case LM_ST_EXCLUSIVE:
+               return "EX";
+       }
+       return "??";
+}
+
+static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
+{
+       char *p = buf;
+       if (flags & LM_FLAG_TRY)
+               *p++ = 't';
+       if (flags & LM_FLAG_TRY_1CB)
+               *p++ = 'T';
+       if (flags & LM_FLAG_NOEXP)
+               *p++ = 'e';
+       if (flags & LM_FLAG_ANY)
+               *p++ = 'a';
+       if (flags & LM_FLAG_PRIORITY)
+               *p++ = 'p';
+       if (flags & GL_ASYNC)
+               *p++ = 'a';
+       if (flags & GL_EXACT)
+               *p++ = 'E';
+       if (flags & GL_ATIME)
+               *p++ = 'a';
+       if (flags & GL_NOCACHE)
+               *p++ = 'c';
+       if (test_bit(HIF_HOLDER, &iflags))
+               *p++ = 'H';
+       if (test_bit(HIF_WAIT, &iflags))
+               *p++ = 'W';
+       if (test_bit(HIF_FIRST, &iflags))
+               *p++ = 'F';
+       *p = 0;
+       return buf;
 }
 
 /**
  * dump_holder - print information about a glock holder
- * @str: a string naming the type of holder
+ * @seq: the seq_file struct
  * @gh: the glock holder
  *
  * Returns: 0 on success, -ENOBUFS when we run out of space
  */
 
-static int dump_holder(struct glock_iter *gi, char *str,
-                      struct gfs2_holder *gh)
+static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
 {
-       unsigned int x;
-       struct task_struct *gh_owner;
+       struct task_struct *gh_owner = NULL;
+       char buffer[KSYM_SYMBOL_LEN];
+       char flags_buf[32];
 
-       print_dbg(gi, "  %s\n", str);
-       if (gh->gh_owner_pid) {
-               print_dbg(gi, "    owner = %ld ",
-                               (long)pid_nr(gh->gh_owner_pid));
+       sprint_symbol(buffer, gh->gh_ip);
+       if (gh->gh_owner_pid)
                gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
-               if (gh_owner)
-                       print_dbg(gi, "(%s)\n", gh_owner->comm);
-               else
-                       print_dbg(gi, "(ended)\n");
-       } else
-               print_dbg(gi, "    owner = -1\n");
-       print_dbg(gi, "    gh_state = %u\n", gh->gh_state);
-       print_dbg(gi, "    gh_flags =");
-       for (x = 0; x < 32; x++)
-               if (gh->gh_flags & (1 << x))
-                       print_dbg(gi, " %u", x);
-       print_dbg(gi, " \n");
-       print_dbg(gi, "    error = %d\n", gh->gh_error);
-       print_dbg(gi, "    gh_iflags =");
-       for (x = 0; x < 32; x++)
-               if (test_bit(x, &gh->gh_iflags))
-                       print_dbg(gi, " %u", x);
-       print_dbg(gi, " \n");
-        gfs2_print_symbol(gi, "    initialized at: %s\n", gh->gh_ip);
-
+       gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n",
+                 state2str(gh->gh_state),
+                 hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
+                 gh->gh_error, 
+                 gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
+                 gh_owner ? gh_owner->comm : "(ended)", buffer);
        return 0;
 }
 
-/**
- * dump_inode - print information about an inode
- * @ip: the inode
- *
- * Returns: 0 on success, -ENOBUFS when we run out of space
- */
-
-static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip)
-{
-       unsigned int x;
-
-       print_dbg(gi, "  Inode:\n");
-       print_dbg(gi, "    num = %llu/%llu\n",
-                 (unsigned long long)ip->i_no_formal_ino,
-                 (unsigned long long)ip->i_no_addr);
-       print_dbg(gi, "    type = %u\n", IF2DT(ip->i_inode.i_mode));
-       print_dbg(gi, "    i_flags =");
-       for (x = 0; x < 32; x++)
-               if (test_bit(x, &ip->i_flags))
-                       print_dbg(gi, " %u", x);
-       print_dbg(gi, " \n");
-       return 0;
+static const char *gflags2str(char *buf, const unsigned long *gflags)
+{
+       char *p = buf;
+       if (test_bit(GLF_LOCK, gflags))
+               *p++ = 'l';
+       if (test_bit(GLF_STICKY, gflags))
+               *p++ = 's';
+       if (test_bit(GLF_DEMOTE, gflags))
+               *p++ = 'D';
+       if (test_bit(GLF_PENDING_DEMOTE, gflags))
+               *p++ = 'd';
+       if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags))
+               *p++ = 'p';
+       if (test_bit(GLF_DIRTY, gflags))
+               *p++ = 'y';
+       if (test_bit(GLF_LFLUSH, gflags))
+               *p++ = 'f';
+       if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags))
+               *p++ = 'i';
+       if (test_bit(GLF_REPLY_PENDING, gflags))
+               *p++ = 'r';
+       *p = 0;
+       return buf;
 }
 
 /**
- * dump_glock - print information about a glock
+ * __dump_glock - print information about a glock
+ * @seq: The seq_file struct
  * @gl: the glock
- * @count: where we are in the buffer
+ *
+ * The file format is as follows:
+ * One line per object, capital letters are used to indicate objects
+ * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented,
+ * other objects are indented by a single space and follow the glock to
+ * which they are related. Fields are indicated by lower case letters
+ * followed by a colon and the field value, except for strings which are in
+ * [] so that its possible to see if they are composed of spaces for
+ * example. The field's are n = number (id of the object), f = flags,
+ * t = type, s = state, r = refcount, e = error, p = pid.
  *
  * Returns: 0 on success, -ENOBUFS when we run out of space
  */
 
-static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
+static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
 {
-       struct gfs2_holder *gh;
-       unsigned int x;
-       int error = -ENOBUFS;
-       struct task_struct *gl_owner;
+       const struct gfs2_glock_operations *glops = gl->gl_ops;
+       unsigned long long dtime;
+       const struct gfs2_holder *gh;
+       char gflags_buf[32];
+       int error = 0;
 
-       spin_lock(&gl->gl_spin);
+       dtime = jiffies - gl->gl_demote_time;
+       dtime *= 1000000/HZ; /* demote time in uSec */
+       if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
+               dtime = 0;
+       gfs2_print_dbg(seq, "G:  s:%s n:%u/%llu f:%s t:%s d:%s/%llu l:%d a:%d r:%d\n",
+                 state2str(gl->gl_state),
+                 gl->gl_name.ln_type,
+                 (unsigned long long)gl->gl_name.ln_number,
+                 gflags2str(gflags_buf, &gl->gl_flags),
+                 state2str(gl->gl_target),
+                 state2str(gl->gl_demote_state), dtime,
+                 atomic_read(&gl->gl_lvb_count),
+                 atomic_read(&gl->gl_ail_count),
+                 atomic_read(&gl->gl_ref));
 
-       print_dbg(gi, "Glock 0x%p (%u, 0x%llx)\n", gl, gl->gl_name.ln_type,
-                  (unsigned long long)gl->gl_name.ln_number);
-       print_dbg(gi, "  gl_flags =");
-       for (x = 0; x < 32; x++) {
-               if (test_bit(x, &gl->gl_flags))
-                       print_dbg(gi, " %u", x);
-       }
-       if (!test_bit(GLF_LOCK, &gl->gl_flags))
-               print_dbg(gi, " (unlocked)");
-       print_dbg(gi, " \n");
-       print_dbg(gi, "  gl_ref = %d\n", atomic_read(&gl->gl_ref));
-       print_dbg(gi, "  gl_state = %u\n", gl->gl_state);
-       if (gl->gl_owner_pid) {
-               gl_owner = pid_task(gl->gl_owner_pid, PIDTYPE_PID);
-               if (gl_owner)
-                       print_dbg(gi, "  gl_owner = pid %d (%s)\n",
-                                 pid_nr(gl->gl_owner_pid), gl_owner->comm);
-               else
-                       print_dbg(gi, "  gl_owner = %d (ended)\n",
-                                 pid_nr(gl->gl_owner_pid));
-       } else
-               print_dbg(gi, "  gl_owner = -1\n");
-       print_dbg(gi, "  gl_ip = %lu\n", gl->gl_ip);
-       print_dbg(gi, "  req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no");
-       print_dbg(gi, "  lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
-       print_dbg(gi, "  object = %s\n", (gl->gl_object) ? "yes" : "no");
-       print_dbg(gi, "  reclaim = %s\n",
-                  (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
-       if (gl->gl_aspace)
-               print_dbg(gi, "  aspace = 0x%p nrpages = %lu\n", gl->gl_aspace,
-                          gl->gl_aspace->i_mapping->nrpages);
-       else
-               print_dbg(gi, "  aspace = no\n");
-       print_dbg(gi, "  ail = %d\n", atomic_read(&gl->gl_ail_count));
-       if (gl->gl_req_gh) {
-               error = dump_holder(gi, "Request", gl->gl_req_gh);
-               if (error)
-                       goto out;
-       }
        list_for_each_entry(gh, &gl->gl_holders, gh_list) {
-               error = dump_holder(gi, "Holder", gh);
+               error = dump_holder(seq, gh);
                if (error)
                        goto out;
        }
-       list_for_each_entry(gh, &gl->gl_waiters1, gh_list) {
-               error = dump_holder(gi, "Waiter1", gh);
-               if (error)
-                       goto out;
-       }
-       list_for_each_entry(gh, &gl->gl_waiters3, gh_list) {
-               error = dump_holder(gi, "Waiter3", gh);
-               if (error)
-                       goto out;
-       }
-       if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
-               print_dbg(gi, "  Demotion req to state %u (%llu uS ago)\n",
-                         gl->gl_demote_state, (unsigned long long)
-                         (jiffies - gl->gl_demote_time)*(1000000/HZ));
-       }
-       if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
-               if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
-                       list_empty(&gl->gl_holders)) {
-                       error = dump_inode(gi, gl->gl_object);
-                       if (error)
-                               goto out;
-               } else {
-                       error = -ENOBUFS;
-                       print_dbg(gi, "  Inode: busy\n");
-               }
-       }
-
-       error = 0;
-
+       if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump)
+               error = glops->go_dump(seq, gl);
 out:
-       spin_unlock(&gl->gl_spin);
        return error;
 }
 
+static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
+{
+       int ret;
+       spin_lock(&gl->gl_spin);
+       ret = __dump_glock(seq, gl);
+       spin_unlock(&gl->gl_spin);
+       return ret;
+}
+
 /**
  * gfs2_dump_lockstate - print out the current lockstate
  * @sdp: the filesystem
@@ -2086,7 +1806,7 @@ void gfs2_glock_exit(void)
 module_param(scand_secs, uint, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs");
 
-static int gfs2_glock_iter_next(struct glock_iter *gi)
+static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
 {
        struct gfs2_glock *gl;
 
@@ -2104,7 +1824,7 @@ restart:
                gfs2_glock_put(gl);
        if (gl && gi->gl == NULL)
                gi->hash++;
-       while(gi->gl == NULL) {
+       while (gi->gl == NULL) {
                if (gi->hash >= GFS2_GL_HASH_SIZE)
                        return 1;
                read_lock(gl_lock_addr(gi->hash));
@@ -2122,58 +1842,34 @@ restart:
        return 0;
 }
 
-static void gfs2_glock_iter_free(struct glock_iter *gi)
+static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi)
 {
        if (gi->gl)
                gfs2_glock_put(gi->gl);
-       kfree(gi);
-}
-
-static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp)
-{
-       struct glock_iter *gi;
-
-       gi = kmalloc(sizeof (*gi), GFP_KERNEL);
-       if (!gi)
-               return NULL;
-
-       gi->sdp = sdp;
-       gi->hash = 0;
-       gi->seq = NULL;
        gi->gl = NULL;
-       memset(gi->string, 0, sizeof(gi->string));
-
-       if (gfs2_glock_iter_next(gi)) {
-               gfs2_glock_iter_free(gi);
-               return NULL;
-       }
-
-       return gi;
 }
 
-static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos)
+static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
 {
-       struct glock_iter *gi;
+       struct gfs2_glock_iter *gi = seq->private;
        loff_t n = *pos;
 
-       gi = gfs2_glock_iter_init(file->private);
-       if (!gi)
-               return NULL;
+       gi->hash = 0;
 
-       while(n--) {
+       do {
                if (gfs2_glock_iter_next(gi)) {
                        gfs2_glock_iter_free(gi);
                        return NULL;
                }
-       }
+       } while (n--);
 
-       return gi;
+       return gi->gl;
 }
 
-static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr,
+static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
                                 loff_t *pos)
 {
-       struct glock_iter *gi = iter_ptr;
+       struct gfs2_glock_iter *gi = seq->private;
 
        (*pos)++;
 
@@ -2182,24 +1878,18 @@ static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr,
                return NULL;
        }
 
-       return gi;
+       return gi->gl;
 }
 
-static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr)
+static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
 {
-       struct glock_iter *gi = iter_ptr;
-       if (gi)
-               gfs2_glock_iter_free(gi);
+       struct gfs2_glock_iter *gi = seq->private;
+       gfs2_glock_iter_free(gi);
 }
 
-static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr)
+static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
 {
-       struct glock_iter *gi = iter_ptr;
-
-       gi->seq = file;
-       dump_glock(gi, gi->gl);
-
-       return 0;
+       return dump_glock(seq, iter_ptr);
 }
 
 static const struct seq_operations gfs2_glock_seq_ops = {
@@ -2211,17 +1901,14 @@ static const struct seq_operations gfs2_glock_seq_ops = {
 
 static int gfs2_debugfs_open(struct inode *inode, struct file *file)
 {
-       struct seq_file *seq;
-       int ret;
-
-       ret = seq_open(file, &gfs2_glock_seq_ops);
-       if (ret)
-               return ret;
-
-       seq = file->private_data;
-       seq->private = inode->i_private;
-
-       return 0;
+       int ret = seq_open_private(file, &gfs2_glock_seq_ops,
+                                  sizeof(struct gfs2_glock_iter));
+       if (ret == 0) {
+               struct seq_file *seq = file->private_data;
+               struct gfs2_glock_iter *gi = seq->private;
+               gi->sdp = inode->i_private;
+       }
+       return ret;
 }
 
 static const struct file_operations gfs2_debug_fops = {
@@ -2229,7 +1916,7 @@ static const struct file_operations gfs2_debug_fops = {
        .open    = gfs2_debugfs_open,
        .read    = seq_read,
        .llseek  = seq_lseek,
-       .release = seq_release
+       .release = seq_release_private,
 };
 
 int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
index cdad3e6f815026dfc55d542d1fc84c4b8f778984..971d92af70fce8ff1e857e73242c322352fcf50c 100644 (file)
 #define GL_SKIP                        0x00000100
 #define GL_ATIME               0x00000200
 #define GL_NOCACHE             0x00000400
-#define GL_FLOCK               0x00000800
-#define GL_NOCANCEL            0x00001000
 
 #define GLR_TRYFAILED          13
-#define GLR_CANCELED           14
 
 static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
 {
@@ -41,6 +38,8 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *
        spin_lock(&gl->gl_spin);
        pid = task_pid(current);
        list_for_each_entry(gh, &gl->gl_holders, gh_list) {
+               if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
+                       break;
                if (gh->gh_owner_pid == pid)
                        goto out;
        }
@@ -70,7 +69,7 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
 {
        int ret;
        spin_lock(&gl->gl_spin);
-       ret = test_bit(GLF_DEMOTE, &gl->gl_flags) || !list_empty(&gl->gl_waiters3);
+       ret = test_bit(GLF_DEMOTE, &gl->gl_flags);
        spin_unlock(&gl->gl_spin);
        return ret;
 }
@@ -98,6 +97,7 @@ int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
 void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
+void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
 
 /**
  * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
@@ -130,10 +130,9 @@ int gfs2_lvb_hold(struct gfs2_glock *gl);
 void gfs2_lvb_unhold(struct gfs2_glock *gl);
 
 void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
-
 void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
 void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
-void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait);
+void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
 
 int __init gfs2_glock_init(void);
 void gfs2_glock_exit(void);
index 07d84d16cda4365eac465f375d22497414d0c988..c6c318c2a0f6d11cc23781d88f3f9c3666fa0ec2 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/buffer_head.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/lm_interface.h>
+#include <linux/bio.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -171,26 +172,6 @@ static void inode_go_sync(struct gfs2_glock *gl)
        }
 }
 
-/**
- * inode_go_xmote_bh - After promoting/demoting a glock
- * @gl: the glock
- *
- */
-
-static void inode_go_xmote_bh(struct gfs2_glock *gl)
-{
-       struct gfs2_holder *gh = gl->gl_req_gh;
-       struct buffer_head *bh;
-       int error;
-
-       if (gl->gl_state != LM_ST_UNLOCKED &&
-           (!gh || !(gh->gh_flags & GL_SKIP))) {
-               error = gfs2_meta_read(gl, gl->gl_name.ln_number, 0, &bh);
-               if (!error)
-                       brelse(bh);
-       }
-}
-
 /**
  * inode_go_inval - prepare a inode glock to be released
  * @gl: the glock
@@ -266,6 +247,26 @@ static int inode_go_lock(struct gfs2_holder *gh)
        return error;
 }
 
+/**
+ * inode_go_dump - print information about an inode
+ * @seq: The iterator
+ * @ip: the inode
+ *
+ * Returns: 0 on success, -ENOBUFS when we run out of space
+ */
+
+static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
+{
+       const struct gfs2_inode *ip = gl->gl_object;
+       if (ip == NULL)
+               return 0;
+       gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%08lx\n",
+                 (unsigned long long)ip->i_no_formal_ino,
+                 (unsigned long long)ip->i_no_addr,
+                 IF2DT(ip->i_inode.i_mode), ip->i_flags);
+       return 0;
+}
+
 /**
  * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
  * @gl: the glock
@@ -305,6 +306,22 @@ static void rgrp_go_unlock(struct gfs2_holder *gh)
        gfs2_rgrp_bh_put(gh->gh_gl->gl_object);
 }
 
+/**
+ * rgrp_go_dump - print out an rgrp
+ * @seq: The iterator
+ * @gl: The glock in question
+ *
+ */
+
+static int rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
+{
+       const struct gfs2_rgrpd *rgd = gl->gl_object;
+       if (rgd == NULL)
+               return 0;
+       gfs2_print_dbg(seq, " R: n:%llu\n", (unsigned long long)rgd->rd_addr);
+       return 0;
+}
+
 /**
  * trans_go_sync - promote/demote the transaction glock
  * @gl: the glock
@@ -330,7 +347,7 @@ static void trans_go_sync(struct gfs2_glock *gl)
  *
  */
 
-static void trans_go_xmote_bh(struct gfs2_glock *gl)
+static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
 {
        struct gfs2_sbd *sdp = gl->gl_sbd;
        struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
@@ -338,8 +355,7 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl)
        struct gfs2_log_header_host head;
        int error;
 
-       if (gl->gl_state != LM_ST_UNLOCKED &&
-           test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
+       if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
                j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
 
                error = gfs2_find_jhead(sdp->sd_jdesc, &head);
@@ -354,6 +370,7 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl)
                        gfs2_log_pointers_init(sdp, head.lh_blkno);
                }
        }
+       return 0;
 }
 
 /**
@@ -375,12 +392,12 @@ const struct gfs2_glock_operations gfs2_meta_glops = {
 
 const struct gfs2_glock_operations gfs2_inode_glops = {
        .go_xmote_th = inode_go_sync,
-       .go_xmote_bh = inode_go_xmote_bh,
        .go_inval = inode_go_inval,
        .go_demote_ok = inode_go_demote_ok,
        .go_lock = inode_go_lock,
+       .go_dump = inode_go_dump,
        .go_type = LM_TYPE_INODE,
-       .go_min_hold_time = HZ / 10,
+       .go_min_hold_time = HZ / 5,
 };
 
 const struct gfs2_glock_operations gfs2_rgrp_glops = {
@@ -389,8 +406,9 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
        .go_demote_ok = rgrp_go_demote_ok,
        .go_lock = rgrp_go_lock,
        .go_unlock = rgrp_go_unlock,
+       .go_dump = rgrp_go_dump,
        .go_type = LM_TYPE_RGRP,
-       .go_min_hold_time = HZ / 10,
+       .go_min_hold_time = HZ / 5,
 };
 
 const struct gfs2_glock_operations gfs2_trans_glops = {
index eabe5eac41dade68b5cccdffc499eec3eedd7728..448697a5c462764c18ef977a1432291fc21cc049 100644 (file)
@@ -77,7 +77,6 @@ struct gfs2_rgrp_host {
 struct gfs2_rgrpd {
        struct list_head rd_list;       /* Link with superblock */
        struct list_head rd_list_mru;
-       struct list_head rd_recent;     /* Recently used rgrps */
        struct gfs2_glock *rd_gl;       /* Glock for this rgrp */
        u64 rd_addr;                    /* grp block disk address */
        u64 rd_data0;                   /* first data location */
@@ -128,20 +127,20 @@ struct gfs2_bufdata {
 
 struct gfs2_glock_operations {
        void (*go_xmote_th) (struct gfs2_glock *gl);
-       void (*go_xmote_bh) (struct gfs2_glock *gl);
+       int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
        void (*go_inval) (struct gfs2_glock *gl, int flags);
        int (*go_demote_ok) (struct gfs2_glock *gl);
        int (*go_lock) (struct gfs2_holder *gh);
        void (*go_unlock) (struct gfs2_holder *gh);
+       int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
        const int go_type;
        const unsigned long go_min_hold_time;
 };
 
 enum {
        /* States */
-       HIF_HOLDER              = 6,
+       HIF_HOLDER              = 6,  /* Set for gh that "holds" the glock */
        HIF_FIRST               = 7,
-       HIF_ABORTED             = 9,
        HIF_WAIT                = 10,
 };
 
@@ -154,20 +153,20 @@ struct gfs2_holder {
        unsigned gh_flags;
 
        int gh_error;
-       unsigned long gh_iflags;
+       unsigned long gh_iflags; /* HIF_... */
        unsigned long gh_ip;
 };
 
 enum {
-       GLF_LOCK                = 1,
-       GLF_STICKY              = 2,
-       GLF_DEMOTE              = 3,
-       GLF_PENDING_DEMOTE      = 4,
-       GLF_DIRTY               = 5,
-       GLF_DEMOTE_IN_PROGRESS  = 6,
-       GLF_LFLUSH              = 7,
-       GLF_WAITERS2            = 8,
-       GLF_CONV_DEADLK         = 9,
+       GLF_LOCK                        = 1,
+       GLF_STICKY                      = 2,
+       GLF_DEMOTE                      = 3,
+       GLF_PENDING_DEMOTE              = 4,
+       GLF_DEMOTE_IN_PROGRESS          = 5,
+       GLF_DIRTY                       = 6,
+       GLF_LFLUSH                      = 7,
+       GLF_INVALIDATE_IN_PROGRESS      = 8,
+       GLF_REPLY_PENDING               = 9,
 };
 
 struct gfs2_glock {
@@ -179,19 +178,14 @@ struct gfs2_glock {
        spinlock_t gl_spin;
 
        unsigned int gl_state;
+       unsigned int gl_target;
+       unsigned int gl_reply;
        unsigned int gl_hash;
        unsigned int gl_demote_state; /* state requested by remote node */
        unsigned long gl_demote_time; /* time of first demote request */
-       struct pid *gl_owner_pid;
-       unsigned long gl_ip;
        struct list_head gl_holders;
-       struct list_head gl_waiters1;   /* HIF_MUTEX */
-       struct list_head gl_waiters3;   /* HIF_PROMOTE */
 
        const struct gfs2_glock_operations *gl_ops;
-
-       struct gfs2_holder *gl_req_gh;
-
        void *gl_lock;
        char *gl_lvb;
        atomic_t gl_lvb_count;
@@ -427,7 +421,6 @@ struct gfs2_tune {
        unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
        unsigned int gt_atime_quantum; /* Min secs between atime updates */
        unsigned int gt_new_files_jdata;
-       unsigned int gt_new_files_directio;
        unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
        unsigned int gt_stall_secs; /* Detects trouble! */
        unsigned int gt_complain_secs;
@@ -534,7 +527,6 @@ struct gfs2_sbd {
        struct mutex sd_rindex_mutex;
        struct list_head sd_rindex_list;
        struct list_head sd_rindex_mru_list;
-       struct list_head sd_rindex_recent_list;
        struct gfs2_rgrpd *sd_rindex_forward;
        unsigned int sd_rgrps;
 
index 09453d057e4126535ab0c1641c75b4d6fa7b3808..6da0ab355b8a6e6672ae256ae881289b72c9edb0 100644 (file)
@@ -504,7 +504,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
        }
 
        if (!is_root) {
-               error = permission(dir, MAY_EXEC, NULL);
+               error = gfs2_permission(dir, MAY_EXEC);
                if (error)
                        goto out;
        }
@@ -667,7 +667,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
 {
        int error;
 
-       error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL);
+       error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
        if (error)
                return error;
 
@@ -789,12 +789,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
                if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) ||
                    gfs2_tune_get(sdp, gt_new_files_jdata))
                        di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
-               if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) ||
-                   gfs2_tune_get(sdp, gt_new_files_directio))
-                       di->di_flags |= cpu_to_be32(GFS2_DIF_DIRECTIO);
        } else if (S_ISDIR(mode)) {
-               di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
-                                           GFS2_DIF_INHERIT_DIRECTIO);
                di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
                                            GFS2_DIF_INHERIT_JDATA);
        }
@@ -1134,7 +1129,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
        if (IS_APPEND(&dip->i_inode))
                return -EPERM;
 
-       error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL);
+       error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
        if (error)
                return error;
 
index 580da454b38fa97b649f7ccdac2fb61080337558..6074c2506f75b3fced5934ff64e159b06b45b409 100644 (file)
@@ -72,7 +72,6 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip,
 }
 
 
-void gfs2_inode_attr_in(struct gfs2_inode *ip);
 void gfs2_set_iop(struct inode *inode);
 struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
                                u64 no_addr, u64 no_formal_ino,
@@ -91,6 +90,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
                struct gfs2_inode *ip);
 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
                   const struct gfs2_inode *ip);
+int gfs2_permission(struct inode *inode, int mask);
 int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
 int gfs2_glock_nq_atime(struct gfs2_holder *gh);
index 663fee7287832baf70a2d644239d97ecf4db1523..523243a13a2184a0fcbe02ecc10df012af3c77f8 100644 (file)
@@ -23,12 +23,54 @@ struct lmh_wrapper {
        const struct lm_lockops *lw_ops;
 };
 
+static int nolock_mount(char *table_name, char *host_data,
+                       lm_callback_t cb, void *cb_data,
+                       unsigned int min_lvb_size, int flags,
+                       struct lm_lockstruct *lockstruct,
+                       struct kobject *fskobj);
+
 /* List of registered low-level locking protocols.  A file system selects one
    of them by name at mount time, e.g. lock_nolock, lock_dlm. */
 
+static const struct lm_lockops nolock_ops = {
+       .lm_proto_name = "lock_nolock",
+       .lm_mount = nolock_mount,
+};
+
+static struct lmh_wrapper nolock_proto  = {
+       .lw_list = LIST_HEAD_INIT(nolock_proto.lw_list),
+       .lw_ops = &nolock_ops,
+};
+
 static LIST_HEAD(lmh_list);
 static DEFINE_MUTEX(lmh_lock);
 
+static int nolock_mount(char *table_name, char *host_data,
+                       lm_callback_t cb, void *cb_data,
+                       unsigned int min_lvb_size, int flags,
+                       struct lm_lockstruct *lockstruct,
+                       struct kobject *fskobj)
+{
+       char *c;
+       unsigned int jid;
+
+       c = strstr(host_data, "jid=");
+       if (!c)
+               jid = 0;
+       else {
+               c += 4;
+               sscanf(c, "%u", &jid);
+       }
+
+       lockstruct->ls_jid = jid;
+       lockstruct->ls_first = 1;
+       lockstruct->ls_lvb_size = min_lvb_size;
+       lockstruct->ls_ops = &nolock_ops;
+       lockstruct->ls_flags = LM_LSFLAG_LOCAL;
+
+       return 0;
+}
+
 /**
  * gfs2_register_lockproto - Register a low-level locking protocol
  * @proto: the protocol definition
@@ -116,9 +158,13 @@ int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
        int try = 0;
        int error, found;
 
+
 retry:
        mutex_lock(&lmh_lock);
 
+       if (list_empty(&nolock_proto.lw_list))
+               list_add(&nolock_proto.lw_list, &lmh_list);
+
        found = 0;
        list_for_each_entry(lw, &lmh_list, lw_list) {
                if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) {
@@ -139,7 +185,8 @@ retry:
                goto out;
        }
 
-       if (!try_module_get(lw->lw_ops->lm_owner)) {
+       if (lw->lw_ops->lm_owner &&
+           !try_module_get(lw->lw_ops->lm_owner)) {
                try = 0;
                mutex_unlock(&lmh_lock);
                msleep(1000);
@@ -158,7 +205,8 @@ out:
 void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct)
 {
        mutex_lock(&lmh_lock);
-       lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
+       if (lockstruct->ls_ops->lm_unmount)
+               lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
        if (lockstruct->ls_ops->lm_owner)
                module_put(lockstruct->ls_ops->lm_owner);
        mutex_unlock(&lmh_lock);
index cf7ea8abec876c8a8cf02f8324160b4fd8ca3b6d..2482c9047505fcc0b2439b0693142f3646793f1a 100644 (file)
 
 static char junk_lvb[GDLM_LVB_SIZE];
 
-static void queue_complete(struct gdlm_lock *lp)
+
+/* convert dlm lock-mode to gfs lock-state */
+
+static s16 gdlm_make_lmstate(s16 dlmmode)
 {
-       struct gdlm_ls *ls = lp->ls;
+       switch (dlmmode) {
+       case DLM_LOCK_IV:
+       case DLM_LOCK_NL:
+               return LM_ST_UNLOCKED;
+       case DLM_LOCK_EX:
+               return LM_ST_EXCLUSIVE;
+       case DLM_LOCK_CW:
+               return LM_ST_DEFERRED;
+       case DLM_LOCK_PR:
+               return LM_ST_SHARED;
+       }
+       gdlm_assert(0, "unknown DLM mode %d", dlmmode);
+       return -1;
+}
 
-       clear_bit(LFL_ACTIVE, &lp->flags);
+/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
+   thread gets to it. */
+
+static void queue_submit(struct gdlm_lock *lp)
+{
+       struct gdlm_ls *ls = lp->ls;
 
        spin_lock(&ls->async_lock);
-       list_add_tail(&lp->clist, &ls->complete);
+       list_add_tail(&lp->delay_list, &ls->submit);
        spin_unlock(&ls->async_lock);
        wake_up(&ls->thread_wait);
 }
 
-static inline void gdlm_ast(void *astarg)
+static void wake_up_ast(struct gdlm_lock *lp)
 {
-       queue_complete(astarg);
+       clear_bit(LFL_AST_WAIT, &lp->flags);
+       smp_mb__after_clear_bit();
+       wake_up_bit(&lp->flags, LFL_AST_WAIT);
 }
 
-static inline void gdlm_bast(void *astarg, int mode)
+static void gdlm_delete_lp(struct gdlm_lock *lp)
 {
-       struct gdlm_lock *lp = astarg;
        struct gdlm_ls *ls = lp->ls;
 
-       if (!mode) {
-               printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n",
-                       lp->lockname.ln_type,
-                       (unsigned long long)lp->lockname.ln_number);
-               return;
-       }
-
        spin_lock(&ls->async_lock);
-       if (!lp->bast_mode) {
-               list_add_tail(&lp->blist, &ls->blocking);
-               lp->bast_mode = mode;
-       } else if (lp->bast_mode < mode)
-               lp->bast_mode = mode;
+       if (!list_empty(&lp->delay_list))
+               list_del_init(&lp->delay_list);
+       ls->all_locks_count--;
        spin_unlock(&ls->async_lock);
-       wake_up(&ls->thread_wait);
+
+       kfree(lp);
 }
 
-void gdlm_queue_delayed(struct gdlm_lock *lp)
+static void gdlm_queue_delayed(struct gdlm_lock *lp)
 {
        struct gdlm_ls *ls = lp->ls;
 
@@ -59,6 +73,236 @@ void gdlm_queue_delayed(struct gdlm_lock *lp)
        spin_unlock(&ls->async_lock);
 }
 
+static void process_complete(struct gdlm_lock *lp)
+{
+       struct gdlm_ls *ls = lp->ls;
+       struct lm_async_cb acb;
+
+       memset(&acb, 0, sizeof(acb));
+
+       if (lp->lksb.sb_status == -DLM_ECANCEL) {
+               log_info("complete dlm cancel %x,%llx flags %lx",
+                        lp->lockname.ln_type,
+                        (unsigned long long)lp->lockname.ln_number,
+                        lp->flags);
+
+               lp->req = lp->cur;
+               acb.lc_ret |= LM_OUT_CANCELED;
+               if (lp->cur == DLM_LOCK_IV)
+                       lp->lksb.sb_lkid = 0;
+               goto out;
+       }
+
+       if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
+               if (lp->lksb.sb_status != -DLM_EUNLOCK) {
+                       log_info("unlock sb_status %d %x,%llx flags %lx",
+                                lp->lksb.sb_status, lp->lockname.ln_type,
+                                (unsigned long long)lp->lockname.ln_number,
+                                lp->flags);
+                       return;
+               }
+
+               lp->cur = DLM_LOCK_IV;
+               lp->req = DLM_LOCK_IV;
+               lp->lksb.sb_lkid = 0;
+
+               if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
+                       gdlm_delete_lp(lp);
+                       return;
+               }
+               goto out;
+       }
+
+       if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
+               memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
+
+       if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
+               if (lp->req == DLM_LOCK_PR)
+                       lp->req = DLM_LOCK_CW;
+               else if (lp->req == DLM_LOCK_CW)
+                       lp->req = DLM_LOCK_PR;
+       }
+
+       /*
+        * A canceled lock request.  The lock was just taken off the delayed
+        * list and was never even submitted to dlm.
+        */
+
+       if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
+               log_info("complete internal cancel %x,%llx",
+                        lp->lockname.ln_type,
+                        (unsigned long long)lp->lockname.ln_number);
+               lp->req = lp->cur;
+               acb.lc_ret |= LM_OUT_CANCELED;
+               goto out;
+       }
+
+       /*
+        * An error occured.
+        */
+
+       if (lp->lksb.sb_status) {
+               /* a "normal" error */
+               if ((lp->lksb.sb_status == -EAGAIN) &&
+                   (lp->lkf & DLM_LKF_NOQUEUE)) {
+                       lp->req = lp->cur;
+                       if (lp->cur == DLM_LOCK_IV)
+                               lp->lksb.sb_lkid = 0;
+                       goto out;
+               }
+
+               /* this could only happen with cancels I think */
+               log_info("ast sb_status %d %x,%llx flags %lx",
+                        lp->lksb.sb_status, lp->lockname.ln_type,
+                        (unsigned long long)lp->lockname.ln_number,
+                        lp->flags);
+               return;
+       }
+
+       /*
+        * This is an AST for an EX->EX conversion for sync_lvb from GFS.
+        */
+
+       if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
+               wake_up_ast(lp);
+               return;
+       }
+
+       /*
+        * A lock has been demoted to NL because it initially completed during
+        * BLOCK_LOCKS.  Now it must be requested in the originally requested
+        * mode.
+        */
+
+       if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
+               gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx",
+                           lp->lockname.ln_type,
+                           (unsigned long long)lp->lockname.ln_number);
+               gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx",
+                           lp->lockname.ln_type,
+                           (unsigned long long)lp->lockname.ln_number);
+
+               lp->cur = DLM_LOCK_NL;
+               lp->req = lp->prev_req;
+               lp->prev_req = DLM_LOCK_IV;
+               lp->lkf &= ~DLM_LKF_CONVDEADLK;
+
+               set_bit(LFL_NOCACHE, &lp->flags);
+
+               if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
+                   !test_bit(LFL_NOBLOCK, &lp->flags))
+                       gdlm_queue_delayed(lp);
+               else
+                       queue_submit(lp);
+               return;
+       }
+
+       /*
+        * A request is granted during dlm recovery.  It may be granted
+        * because the locks of a failed node were cleared.  In that case,
+        * there may be inconsistent data beneath this lock and we must wait
+        * for recovery to complete to use it.  When gfs recovery is done this
+        * granted lock will be converted to NL and then reacquired in this
+        * granted state.
+        */
+
+       if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
+           !test_bit(LFL_NOBLOCK, &lp->flags) &&
+           lp->req != DLM_LOCK_NL) {
+
+               lp->cur = lp->req;
+               lp->prev_req = lp->req;
+               lp->req = DLM_LOCK_NL;
+               lp->lkf |= DLM_LKF_CONVERT;
+               lp->lkf &= ~DLM_LKF_CONVDEADLK;
+
+               log_debug("rereq %x,%llx id %x %d,%d",
+                         lp->lockname.ln_type,
+                         (unsigned long long)lp->lockname.ln_number,
+                         lp->lksb.sb_lkid, lp->cur, lp->req);
+
+               set_bit(LFL_REREQUEST, &lp->flags);
+               queue_submit(lp);
+               return;
+       }
+
+       /*
+        * DLM demoted the lock to NL before it was granted so GFS must be
+        * told it cannot cache data for this lock.
+        */
+
+       if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
+               set_bit(LFL_NOCACHE, &lp->flags);
+
+out:
+       /*
+        * This is an internal lock_dlm lock
+        */
+
+       if (test_bit(LFL_INLOCK, &lp->flags)) {
+               clear_bit(LFL_NOBLOCK, &lp->flags);
+               lp->cur = lp->req;
+               wake_up_ast(lp);
+               return;
+       }
+
+       /*
+        * Normal completion of a lock request.  Tell GFS it now has the lock.
+        */
+
+       clear_bit(LFL_NOBLOCK, &lp->flags);
+       lp->cur = lp->req;
+
+       acb.lc_name = lp->lockname;
+       acb.lc_ret |= gdlm_make_lmstate(lp->cur);
+
+       ls->fscb(ls->sdp, LM_CB_ASYNC, &acb);
+}
+
+static void gdlm_ast(void *astarg)
+{
+       struct gdlm_lock *lp = astarg;
+       clear_bit(LFL_ACTIVE, &lp->flags);
+       process_complete(lp);
+}
+
+static void process_blocking(struct gdlm_lock *lp, int bast_mode)
+{
+       struct gdlm_ls *ls = lp->ls;
+       unsigned int cb = 0;
+
+       switch (gdlm_make_lmstate(bast_mode)) {
+       case LM_ST_EXCLUSIVE:
+               cb = LM_CB_NEED_E;
+               break;
+       case LM_ST_DEFERRED:
+               cb = LM_CB_NEED_D;
+               break;
+       case LM_ST_SHARED:
+               cb = LM_CB_NEED_S;
+               break;
+       default:
+               gdlm_assert(0, "unknown bast mode %u", bast_mode);
+       }
+
+       ls->fscb(ls->sdp, cb, &lp->lockname);
+}
+
+
+static void gdlm_bast(void *astarg, int mode)
+{
+       struct gdlm_lock *lp = astarg;
+
+       if (!mode) {
+               printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n",
+                       lp->lockname.ln_type,
+                       (unsigned long long)lp->lockname.ln_number);
+               return;
+       }
+
+       process_blocking(lp, mode);
+}
+
 /* convert gfs lock-state to dlm lock-mode */
 
 static s16 make_mode(s16 lmstate)
@@ -77,24 +321,6 @@ static s16 make_mode(s16 lmstate)
        return -1;
 }
 
-/* convert dlm lock-mode to gfs lock-state */
-
-s16 gdlm_make_lmstate(s16 dlmmode)
-{
-       switch (dlmmode) {
-       case DLM_LOCK_IV:
-       case DLM_LOCK_NL:
-               return LM_ST_UNLOCKED;
-       case DLM_LOCK_EX:
-               return LM_ST_EXCLUSIVE;
-       case DLM_LOCK_CW:
-               return LM_ST_DEFERRED;
-       case DLM_LOCK_PR:
-               return LM_ST_SHARED;
-       }
-       gdlm_assert(0, "unknown DLM mode %d", dlmmode);
-       return -1;
-}
 
 /* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
    DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
@@ -134,14 +360,6 @@ static inline unsigned int make_flags(struct gdlm_lock *lp,
 
        if (lp->lksb.sb_lkid != 0) {
                lkf |= DLM_LKF_CONVERT;
-
-               /* Conversion deadlock avoidance by DLM */
-
-               if (!(lp->ls->fsflags & LM_MFLAG_CONV_NODROP) &&
-                   !test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
-                   !(lkf & DLM_LKF_NOQUEUE) &&
-                   cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
-                       lkf |= DLM_LKF_CONVDEADLK;
        }
 
        if (lp->lvb)
@@ -173,14 +391,9 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
        make_strname(name, &lp->strname);
        lp->ls = ls;
        lp->cur = DLM_LOCK_IV;
-       lp->lvb = NULL;
-       lp->hold_null = NULL;
-       INIT_LIST_HEAD(&lp->clist);
-       INIT_LIST_HEAD(&lp->blist);
        INIT_LIST_HEAD(&lp->delay_list);
 
        spin_lock(&ls->async_lock);
-       list_add(&lp->all_list, &ls->all_locks);
        ls->all_locks_count++;
        spin_unlock(&ls->async_lock);
 
@@ -188,26 +401,6 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
        return 0;
 }
 
-void gdlm_delete_lp(struct gdlm_lock *lp)
-{
-       struct gdlm_ls *ls = lp->ls;
-
-       spin_lock(&ls->async_lock);
-       if (!list_empty(&lp->clist))
-               list_del_init(&lp->clist);
-       if (!list_empty(&lp->blist))
-               list_del_init(&lp->blist);
-       if (!list_empty(&lp->delay_list))
-               list_del_init(&lp->delay_list);
-       gdlm_assert(!list_empty(&lp->all_list), "%x,%llx", lp->lockname.ln_type,
-                   (unsigned long long)lp->lockname.ln_number);
-       list_del_init(&lp->all_list);
-       ls->all_locks_count--;
-       spin_unlock(&ls->async_lock);
-
-       kfree(lp);
-}
-
 int gdlm_get_lock(void *lockspace, struct lm_lockname *name,
                  void **lockp)
 {
@@ -261,7 +454,7 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp)
 
        if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
                lp->lksb.sb_status = -EAGAIN;
-               queue_complete(lp);
+               gdlm_ast(lp);
                error = 0;
        }
 
@@ -308,6 +501,12 @@ unsigned int gdlm_lock(void *lock, unsigned int cur_state,
 {
        struct gdlm_lock *lp = lock;
 
+       if (req_state == LM_ST_UNLOCKED)
+               return gdlm_unlock(lock, cur_state);
+
+       if (req_state == LM_ST_UNLOCKED)
+               return gdlm_unlock(lock, cur_state);
+
        clear_bit(LFL_DLM_CANCEL, &lp->flags);
        if (flags & LM_FLAG_NOEXP)
                set_bit(LFL_NOBLOCK, &lp->flags);
@@ -351,7 +550,7 @@ void gdlm_cancel(void *lock)
        if (delay_list) {
                set_bit(LFL_CANCEL, &lp->flags);
                set_bit(LFL_ACTIVE, &lp->flags);
-               queue_complete(lp);
+               gdlm_ast(lp);
                return;
        }
 
@@ -507,22 +706,3 @@ void gdlm_submit_delayed(struct gdlm_ls *ls)
        wake_up(&ls->thread_wait);
 }
 
-int gdlm_release_all_locks(struct gdlm_ls *ls)
-{
-       struct gdlm_lock *lp, *safe;
-       int count = 0;
-
-       spin_lock(&ls->async_lock);
-       list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) {
-               list_del_init(&lp->all_list);
-
-               if (lp->lvb && lp->lvb != junk_lvb)
-                       kfree(lp->lvb);
-               kfree(lp);
-               count++;
-       }
-       spin_unlock(&ls->async_lock);
-
-       return count;
-}
-
index a243cf69c54ed84dbfafe37418f1c0fbf1c784e7..3c98e7c6f93bb50b3a71b3cb6d11dd91aa9d1d56 100644 (file)
@@ -72,19 +72,12 @@ struct gdlm_ls {
        int                     recover_jid_done;
        int                     recover_jid_status;
        spinlock_t              async_lock;
-       struct list_head        complete;
-       struct list_head        blocking;
        struct list_head        delayed;
        struct list_head        submit;
-       struct list_head        all_locks;
        u32             all_locks_count;
        wait_queue_head_t       wait_control;
-       struct task_struct      *thread1;
-       struct task_struct      *thread2;
+       struct task_struct      *thread;
        wait_queue_head_t       thread_wait;
-       unsigned long           drop_time;
-       int                     drop_locks_count;
-       int                     drop_locks_period;
 };
 
 enum {
@@ -117,12 +110,7 @@ struct gdlm_lock {
        u32                     lkf;            /* dlm flags DLM_LKF_ */
        unsigned long           flags;          /* lock_dlm flags LFL_ */
 
-       int                     bast_mode;      /* protected by async_lock */
-
-       struct list_head        clist;          /* complete */
-       struct list_head        blist;          /* blocking */
        struct list_head        delay_list;     /* delayed */
-       struct list_head        all_list;       /* all locks for the fs */
        struct gdlm_lock        *hold_null;     /* NL lock for hold_lvb */
 };
 
@@ -159,11 +147,7 @@ void gdlm_release_threads(struct gdlm_ls *);
 
 /* lock.c */
 
-s16 gdlm_make_lmstate(s16);
-void gdlm_queue_delayed(struct gdlm_lock *);
 void gdlm_submit_delayed(struct gdlm_ls *);
-int gdlm_release_all_locks(struct gdlm_ls *);
-void gdlm_delete_lp(struct gdlm_lock *);
 unsigned int gdlm_do_lock(struct gdlm_lock *);
 
 int gdlm_get_lock(void *, struct lm_lockname *, void **);
index 470bdf650b500b6dc957c0717fb00a503dec734f..09d78c216f4828792a27a0651a1cd98d6abf5d74 100644 (file)
@@ -22,22 +22,14 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp,
        if (!ls)
                return NULL;
 
-       ls->drop_locks_count = GDLM_DROP_COUNT;
-       ls->drop_locks_period = GDLM_DROP_PERIOD;
        ls->fscb = cb;
        ls->sdp = sdp;
        ls->fsflags = flags;
        spin_lock_init(&ls->async_lock);
-       INIT_LIST_HEAD(&ls->complete);
-       INIT_LIST_HEAD(&ls->blocking);
        INIT_LIST_HEAD(&ls->delayed);
        INIT_LIST_HEAD(&ls->submit);
-       INIT_LIST_HEAD(&ls->all_locks);
        init_waitqueue_head(&ls->thread_wait);
        init_waitqueue_head(&ls->wait_control);
-       ls->thread1 = NULL;
-       ls->thread2 = NULL;
-       ls->drop_time = jiffies;
        ls->jid = -1;
 
        strncpy(buf, table_name, 256);
@@ -180,7 +172,6 @@ out:
 static void gdlm_unmount(void *lockspace)
 {
        struct gdlm_ls *ls = lockspace;
-       int rv;
 
        log_debug("unmount flags %lx", ls->flags);
 
@@ -194,9 +185,7 @@ static void gdlm_unmount(void *lockspace)
        gdlm_kobject_release(ls);
        dlm_release_lockspace(ls->dlm_lockspace, 2);
        gdlm_release_threads(ls);
-       rv = gdlm_release_all_locks(ls);
-       if (rv)
-               log_info("gdlm_unmount: %d stray locks freed", rv);
+       BUG_ON(ls->all_locks_count);
 out:
        kfree(ls);
 }
@@ -232,7 +221,6 @@ static void gdlm_withdraw(void *lockspace)
 
        dlm_release_lockspace(ls->dlm_lockspace, 2);
        gdlm_release_threads(ls);
-       gdlm_release_all_locks(ls);
        gdlm_kobject_release(ls);
 }
 
index a4ff271df9ee4432eb52ceb16f59cb65a0af1f87..4ec571c3d8a9cb00758a2e3b7dc40180b678f4fe 100644 (file)
@@ -114,17 +114,6 @@ static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf)
        return sprintf(buf, "%d\n", ls->recover_jid_status);
 }
 
-static ssize_t drop_count_show(struct gdlm_ls *ls, char *buf)
-{
-       return sprintf(buf, "%d\n", ls->drop_locks_count);
-}
-
-static ssize_t drop_count_store(struct gdlm_ls *ls, const char *buf, size_t len)
-{
-       ls->drop_locks_count = simple_strtol(buf, NULL, 0);
-       return len;
-}
-
 struct gdlm_attr {
        struct attribute attr;
        ssize_t (*show)(struct gdlm_ls *, char *);
@@ -144,7 +133,6 @@ GDLM_ATTR(first_done,     0444, first_done_show,     NULL);
 GDLM_ATTR(recover,        0644, recover_show,        recover_store);
 GDLM_ATTR(recover_done,   0444, recover_done_show,   NULL);
 GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
-GDLM_ATTR(drop_count,     0644, drop_count_show,     drop_count_store);
 
 static struct attribute *gdlm_attrs[] = {
        &gdlm_attr_proto_name.attr,
@@ -157,7 +145,6 @@ static struct attribute *gdlm_attrs[] = {
        &gdlm_attr_recover.attr,
        &gdlm_attr_recover_done.attr,
        &gdlm_attr_recover_status.attr,
-       &gdlm_attr_drop_count.attr,
        NULL,
 };
 
index e53db6fd28ab62f140e44557c993d24f1036d1c1..38823efd698c809513f87c6585c91084a9517a43 100644 (file)
 
 #include "lock_dlm.h"
 
-/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
-   thread gets to it. */
-
-static void queue_submit(struct gdlm_lock *lp)
-{
-       struct gdlm_ls *ls = lp->ls;
-
-       spin_lock(&ls->async_lock);
-       list_add_tail(&lp->delay_list, &ls->submit);
-       spin_unlock(&ls->async_lock);
-       wake_up(&ls->thread_wait);
-}
-
-static void process_blocking(struct gdlm_lock *lp, int bast_mode)
-{
-       struct gdlm_ls *ls = lp->ls;
-       unsigned int cb = 0;
-
-       switch (gdlm_make_lmstate(bast_mode)) {
-       case LM_ST_EXCLUSIVE:
-               cb = LM_CB_NEED_E;
-               break;
-       case LM_ST_DEFERRED:
-               cb = LM_CB_NEED_D;
-               break;
-       case LM_ST_SHARED:
-               cb = LM_CB_NEED_S;
-               break;
-       default:
-               gdlm_assert(0, "unknown bast mode %u", lp->bast_mode);
-       }
-
-       ls->fscb(ls->sdp, cb, &lp->lockname);
-}
-
-static void wake_up_ast(struct gdlm_lock *lp)
-{
-       clear_bit(LFL_AST_WAIT, &lp->flags);
-       smp_mb__after_clear_bit();
-       wake_up_bit(&lp->flags, LFL_AST_WAIT);
-}
-
-static void process_complete(struct gdlm_lock *lp)
-{
-       struct gdlm_ls *ls = lp->ls;
-       struct lm_async_cb acb;
-       s16 prev_mode = lp->cur;
-
-       memset(&acb, 0, sizeof(acb));
-
-       if (lp->lksb.sb_status == -DLM_ECANCEL) {
-               log_info("complete dlm cancel %x,%llx flags %lx",
-                        lp->lockname.ln_type,
-                        (unsigned long long)lp->lockname.ln_number,
-                        lp->flags);
-
-               lp->req = lp->cur;
-               acb.lc_ret |= LM_OUT_CANCELED;
-               if (lp->cur == DLM_LOCK_IV)
-                       lp->lksb.sb_lkid = 0;
-               goto out;
-       }
-
-       if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
-               if (lp->lksb.sb_status != -DLM_EUNLOCK) {
-                       log_info("unlock sb_status %d %x,%llx flags %lx",
-                                lp->lksb.sb_status, lp->lockname.ln_type,
-                                (unsigned long long)lp->lockname.ln_number,
-                                lp->flags);
-                       return;
-               }
-
-               lp->cur = DLM_LOCK_IV;
-               lp->req = DLM_LOCK_IV;
-               lp->lksb.sb_lkid = 0;
-
-               if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
-                       gdlm_delete_lp(lp);
-                       return;
-               }
-               goto out;
-       }
-
-       if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
-               memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
-
-       if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
-               if (lp->req == DLM_LOCK_PR)
-                       lp->req = DLM_LOCK_CW;
-               else if (lp->req == DLM_LOCK_CW)
-                       lp->req = DLM_LOCK_PR;
-       }
-
-       /*
-        * A canceled lock request.  The lock was just taken off the delayed
-        * list and was never even submitted to dlm.
-        */
-
-       if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
-               log_info("complete internal cancel %x,%llx",
-                        lp->lockname.ln_type,
-                        (unsigned long long)lp->lockname.ln_number);
-               lp->req = lp->cur;
-               acb.lc_ret |= LM_OUT_CANCELED;
-               goto out;
-       }
-
-       /*
-        * An error occured.
-        */
-
-       if (lp->lksb.sb_status) {
-               /* a "normal" error */
-               if ((lp->lksb.sb_status == -EAGAIN) &&
-                   (lp->lkf & DLM_LKF_NOQUEUE)) {
-                       lp->req = lp->cur;
-                       if (lp->cur == DLM_LOCK_IV)
-                               lp->lksb.sb_lkid = 0;
-                       goto out;
-               }
-
-               /* this could only happen with cancels I think */
-               log_info("ast sb_status %d %x,%llx flags %lx",
-                        lp->lksb.sb_status, lp->lockname.ln_type,
-                        (unsigned long long)lp->lockname.ln_number,
-                        lp->flags);
-               if (lp->lksb.sb_status == -EDEADLOCK &&
-                   lp->ls->fsflags & LM_MFLAG_CONV_NODROP) {
-                       lp->req = lp->cur;
-                       acb.lc_ret |= LM_OUT_CONV_DEADLK;
-                       if (lp->cur == DLM_LOCK_IV)
-                               lp->lksb.sb_lkid = 0;
-                       goto out;
-               } else
-                       return;
-       }
-
-       /*
-        * This is an AST for an EX->EX conversion for sync_lvb from GFS.
-        */
-
-       if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
-               wake_up_ast(lp);
-               return;
-       }
-
-       /*
-        * A lock has been demoted to NL because it initially completed during
-        * BLOCK_LOCKS.  Now it must be requested in the originally requested
-        * mode.
-        */
-
-       if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
-               gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx",
-                           lp->lockname.ln_type,
-                           (unsigned long long)lp->lockname.ln_number);
-               gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx",
-                           lp->lockname.ln_type,
-                           (unsigned long long)lp->lockname.ln_number);
-
-               lp->cur = DLM_LOCK_NL;
-               lp->req = lp->prev_req;
-               lp->prev_req = DLM_LOCK_IV;
-               lp->lkf &= ~DLM_LKF_CONVDEADLK;
-
-               set_bit(LFL_NOCACHE, &lp->flags);
-
-               if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
-                   !test_bit(LFL_NOBLOCK, &lp->flags))
-                       gdlm_queue_delayed(lp);
-               else
-                       queue_submit(lp);
-               return;
-       }
-
-       /*
-        * A request is granted during dlm recovery.  It may be granted
-        * because the locks of a failed node were cleared.  In that case,
-        * there may be inconsistent data beneath this lock and we must wait
-        * for recovery to complete to use it.  When gfs recovery is done this
-        * granted lock will be converted to NL and then reacquired in this
-        * granted state.
-        */
-
-       if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
-           !test_bit(LFL_NOBLOCK, &lp->flags) &&
-           lp->req != DLM_LOCK_NL) {
-
-               lp->cur = lp->req;
-               lp->prev_req = lp->req;
-               lp->req = DLM_LOCK_NL;
-               lp->lkf |= DLM_LKF_CONVERT;
-               lp->lkf &= ~DLM_LKF_CONVDEADLK;
-
-               log_debug("rereq %x,%llx id %x %d,%d",
-                         lp->lockname.ln_type,
-                         (unsigned long long)lp->lockname.ln_number,
-                         lp->lksb.sb_lkid, lp->cur, lp->req);
-
-               set_bit(LFL_REREQUEST, &lp->flags);
-               queue_submit(lp);
-               return;
-       }
-
-       /*
-        * DLM demoted the lock to NL before it was granted so GFS must be
-        * told it cannot cache data for this lock.
-        */
-
-       if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
-               set_bit(LFL_NOCACHE, &lp->flags);
-
-out:
-       /*
-        * This is an internal lock_dlm lock
-        */
-
-       if (test_bit(LFL_INLOCK, &lp->flags)) {
-               clear_bit(LFL_NOBLOCK, &lp->flags);
-               lp->cur = lp->req;
-               wake_up_ast(lp);
-               return;
-       }
-
-       /*
-        * Normal completion of a lock request.  Tell GFS it now has the lock.
-        */
-
-       clear_bit(LFL_NOBLOCK, &lp->flags);
-       lp->cur = lp->req;
-
-       acb.lc_name = lp->lockname;
-       acb.lc_ret |= gdlm_make_lmstate(lp->cur);
-
-       if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) &&
-           (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL))
-               acb.lc_ret |= LM_OUT_CACHEABLE;
-
-       ls->fscb(ls->sdp, LM_CB_ASYNC, &acb);
-}
-
-static inline int no_work(struct gdlm_ls *ls, int blocking)
+static inline int no_work(struct gdlm_ls *ls)
 {
        int ret;
 
        spin_lock(&ls->async_lock);
-       ret = list_empty(&ls->complete) && list_empty(&ls->submit);
-       if (ret && blocking)
-               ret = list_empty(&ls->blocking);
+       ret = list_empty(&ls->submit);
        spin_unlock(&ls->async_lock);
 
        return ret;
 }
 
-static inline int check_drop(struct gdlm_ls *ls)
-{
-       if (!ls->drop_locks_count)
-               return 0;
-
-       if (time_after(jiffies, ls->drop_time + ls->drop_locks_period * HZ)) {
-               ls->drop_time = jiffies;
-               if (ls->all_locks_count >= ls->drop_locks_count)
-                       return 1;
-       }
-       return 0;
-}
-
-static int gdlm_thread(void *data, int blist)
+static int gdlm_thread(void *data)
 {
        struct gdlm_ls *ls = (struct gdlm_ls *) data;
        struct gdlm_lock *lp = NULL;
-       uint8_t complete, blocking, submit, drop;
-
-       /* Only thread1 is allowed to do blocking callbacks since gfs
-          may wait for a completion callback within a blocking cb. */
 
        while (!kthread_should_stop()) {
                wait_event_interruptible(ls->thread_wait,
-                               !no_work(ls, blist) || kthread_should_stop());
-
-               complete = blocking = submit = drop = 0;
+                               !no_work(ls) || kthread_should_stop());
 
                spin_lock(&ls->async_lock);
 
-               if (blist && !list_empty(&ls->blocking)) {
-                       lp = list_entry(ls->blocking.next, struct gdlm_lock,
-                                       blist);
-                       list_del_init(&lp->blist);
-                       blocking = lp->bast_mode;
-                       lp->bast_mode = 0;
-               } else if (!list_empty(&ls->complete)) {
-                       lp = list_entry(ls->complete.next, struct gdlm_lock,
-                                       clist);
-                       list_del_init(&lp->clist);
-                       complete = 1;
-               } else if (!list_empty(&ls->submit)) {
+               if (!list_empty(&ls->submit)) {
                        lp = list_entry(ls->submit.next, struct gdlm_lock,
                                        delay_list);
                        list_del_init(&lp->delay_list);
-                       submit = 1;
+                       spin_unlock(&ls->async_lock);
+                       gdlm_do_lock(lp);
+                       spin_lock(&ls->async_lock);
                }
-
-               drop = check_drop(ls);
                spin_unlock(&ls->async_lock);
-
-               if (complete)
-                       process_complete(lp);
-
-               else if (blocking)
-                       process_blocking(lp, blocking);
-
-               else if (submit)
-                       gdlm_do_lock(lp);
-
-               if (drop)
-                       ls->fscb(ls->sdp, LM_CB_DROPLOCKS, NULL);
-
-               schedule();
        }
 
        return 0;
 }
 
-static int gdlm_thread1(void *data)
-{
-       return gdlm_thread(data, 1);
-}
-
-static int gdlm_thread2(void *data)
-{
-       return gdlm_thread(data, 0);
-}
-
 int gdlm_init_threads(struct gdlm_ls *ls)
 {
        struct task_struct *p;
        int error;
 
-       p = kthread_run(gdlm_thread1, ls, "lock_dlm1");
-       error = IS_ERR(p);
-       if (error) {
-               log_error("can't start lock_dlm1 thread %d", error);
-               return error;
-       }
-       ls->thread1 = p;
-
-       p = kthread_run(gdlm_thread2, ls, "lock_dlm2");
+       p = kthread_run(gdlm_thread, ls, "lock_dlm");
        error = IS_ERR(p);
        if (error) {
-               log_error("can't start lock_dlm2 thread %d", error);
-               kthread_stop(ls->thread1);
+               log_error("can't start lock_dlm thread %d", error);
                return error;
        }
-       ls->thread2 = p;
+       ls->thread = p;
 
        return 0;
 }
 
 void gdlm_release_threads(struct gdlm_ls *ls)
 {
-       kthread_stop(ls->thread1);
-       kthread_stop(ls->thread2);
+       kthread_stop(ls->thread);
 }
 
diff --git a/fs/gfs2/locking/nolock/Makefile b/fs/gfs2/locking/nolock/Makefile
deleted file mode 100644 (file)
index 35e9730..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += lock_nolock.o
-lock_nolock-y := main.o
-
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c
deleted file mode 100644 (file)
index 284a5ec..0000000
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/fs.h>
-#include <linux/lm_interface.h>
-
-struct nolock_lockspace {
-       unsigned int nl_lvb_size;
-};
-
-static const struct lm_lockops nolock_ops;
-
-static int nolock_mount(char *table_name, char *host_data,
-                       lm_callback_t cb, void *cb_data,
-                       unsigned int min_lvb_size, int flags,
-                       struct lm_lockstruct *lockstruct,
-                       struct kobject *fskobj)
-{
-       char *c;
-       unsigned int jid;
-       struct nolock_lockspace *nl;
-
-       c = strstr(host_data, "jid=");
-       if (!c)
-               jid = 0;
-       else {
-               c += 4;
-               sscanf(c, "%u", &jid);
-       }
-
-       nl = kzalloc(sizeof(struct nolock_lockspace), GFP_KERNEL);
-       if (!nl)
-               return -ENOMEM;
-
-       nl->nl_lvb_size = min_lvb_size;
-
-       lockstruct->ls_jid = jid;
-       lockstruct->ls_first = 1;
-       lockstruct->ls_lvb_size = min_lvb_size;
-       lockstruct->ls_lockspace = nl;
-       lockstruct->ls_ops = &nolock_ops;
-       lockstruct->ls_flags = LM_LSFLAG_LOCAL;
-
-       return 0;
-}
-
-static void nolock_others_may_mount(void *lockspace)
-{
-}
-
-static void nolock_unmount(void *lockspace)
-{
-       struct nolock_lockspace *nl = lockspace;
-       kfree(nl);
-}
-
-static void nolock_withdraw(void *lockspace)
-{
-}
-
-/**
- * nolock_get_lock - get a lm_lock_t given a descripton of the lock
- * @lockspace: the lockspace the lock lives in
- * @name: the name of the lock
- * @lockp: return the lm_lock_t here
- *
- * Returns: 0 on success, -EXXX on failure
- */
-
-static int nolock_get_lock(void *lockspace, struct lm_lockname *name,
-                          void **lockp)
-{
-       *lockp = lockspace;
-       return 0;
-}
-
-/**
- * nolock_put_lock - get rid of a lock structure
- * @lock: the lock to throw away
- *
- */
-
-static void nolock_put_lock(void *lock)
-{
-}
-
-/**
- * nolock_lock - acquire a lock
- * @lock: the lock to manipulate
- * @cur_state: the current state
- * @req_state: the requested state
- * @flags: modifier flags
- *
- * Returns: A bitmap of LM_OUT_*
- */
-
-static unsigned int nolock_lock(void *lock, unsigned int cur_state,
-                               unsigned int req_state, unsigned int flags)
-{
-       return req_state | LM_OUT_CACHEABLE;
-}
-
-/**
- * nolock_unlock - unlock a lock
- * @lock: the lock to manipulate
- * @cur_state: the current state
- *
- * Returns: 0
- */
-
-static unsigned int nolock_unlock(void *lock, unsigned int cur_state)
-{
-       return 0;
-}
-
-static void nolock_cancel(void *lock)
-{
-}
-
-/**
- * nolock_hold_lvb - hold on to a lock value block
- * @lock: the lock the LVB is associated with
- * @lvbp: return the lm_lvb_t here
- *
- * Returns: 0 on success, -EXXX on failure
- */
-
-static int nolock_hold_lvb(void *lock, char **lvbp)
-{
-       struct nolock_lockspace *nl = lock;
-       int error = 0;
-
-       *lvbp = kzalloc(nl->nl_lvb_size, GFP_NOFS);
-       if (!*lvbp)
-               error = -ENOMEM;
-
-       return error;
-}
-
-/**
- * nolock_unhold_lvb - release a LVB
- * @lock: the lock the LVB is associated with
- * @lvb: the lock value block
- *
- */
-
-static void nolock_unhold_lvb(void *lock, char *lvb)
-{
-       kfree(lvb);
-}
-
-static int nolock_plock_get(void *lockspace, struct lm_lockname *name,
-                           struct file *file, struct file_lock *fl)
-{
-       posix_test_lock(file, fl);
-
-       return 0;
-}
-
-static int nolock_plock(void *lockspace, struct lm_lockname *name,
-                       struct file *file, int cmd, struct file_lock *fl)
-{
-       int error;
-       error = posix_lock_file_wait(file, fl);
-       return error;
-}
-
-static int nolock_punlock(void *lockspace, struct lm_lockname *name,
-                         struct file *file, struct file_lock *fl)
-{
-       int error;
-       error = posix_lock_file_wait(file, fl);
-       return error;
-}
-
-static void nolock_recovery_done(void *lockspace, unsigned int jid,
-                                unsigned int message)
-{
-}
-
-static const struct lm_lockops nolock_ops = {
-       .lm_proto_name = "lock_nolock",
-       .lm_mount = nolock_mount,
-       .lm_others_may_mount = nolock_others_may_mount,
-       .lm_unmount = nolock_unmount,
-       .lm_withdraw = nolock_withdraw,
-       .lm_get_lock = nolock_get_lock,
-       .lm_put_lock = nolock_put_lock,
-       .lm_lock = nolock_lock,
-       .lm_unlock = nolock_unlock,
-       .lm_cancel = nolock_cancel,
-       .lm_hold_lvb = nolock_hold_lvb,
-       .lm_unhold_lvb = nolock_unhold_lvb,
-       .lm_plock_get = nolock_plock_get,
-       .lm_plock = nolock_plock,
-       .lm_punlock = nolock_punlock,
-       .lm_recovery_done = nolock_recovery_done,
-       .lm_owner = THIS_MODULE,
-};
-
-static int __init init_nolock(void)
-{
-       int error;
-
-       error = gfs2_register_lockproto(&nolock_ops);
-       if (error) {
-               printk(KERN_WARNING
-                      "lock_nolock: can't register protocol: %d\n", error);
-               return error;
-       }
-
-       printk(KERN_INFO
-              "Lock_Nolock (built %s %s) installed\n", __DATE__, __TIME__);
-       return 0;
-}
-
-static void __exit exit_nolock(void)
-{
-       gfs2_unregister_lockproto(&nolock_ops);
-}
-
-module_init(init_nolock);
-module_exit(exit_nolock);
-
-MODULE_DESCRIPTION("GFS Nolock Locking Module");
-MODULE_AUTHOR("Red Hat, Inc.");
-MODULE_LICENSE("GPL");
-
index 548264b1836d59d39275180c4ed5a7343eacc2f3..6c6af9f5e3ab58373eb897cb1adf2093481a1295 100644 (file)
@@ -87,6 +87,8 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
  */
 
 static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+__releases(&sdp->sd_log_lock)
+__acquires(&sdp->sd_log_lock)
 {
        struct gfs2_bufdata *bd, *s;
        struct buffer_head *bh;
index 7711528165088002a3601de4c57ef101eae755ba..7c64510ccfd250f0b8f45fa8376598cd2b626072 100644 (file)
@@ -21,6 +21,7 @@
  */
 
 static inline void gfs2_log_lock(struct gfs2_sbd *sdp)
+__acquires(&sdp->sd_log_lock)
 {
        spin_lock(&sdp->sd_log_lock);
 }
@@ -32,6 +33,7 @@ static inline void gfs2_log_lock(struct gfs2_sbd *sdp)
  */
 
 static inline void gfs2_log_unlock(struct gfs2_sbd *sdp)
+__releases(&sdp->sd_log_lock)
 {
        spin_unlock(&sdp->sd_log_lock);
 }
index 053e2ebbbd502f9a70ba00704ac96150be3e2ff1..bcc668d0fadd0a2d078efa861cbd4942d527914e 100644 (file)
@@ -40,8 +40,6 @@ static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo)
        INIT_HLIST_NODE(&gl->gl_list);
        spin_lock_init(&gl->gl_spin);
        INIT_LIST_HEAD(&gl->gl_holders);
-       INIT_LIST_HEAD(&gl->gl_waiters1);
-       INIT_LIST_HEAD(&gl->gl_waiters3);
        gl->gl_lvb = NULL;
        atomic_set(&gl->gl_lvb_count, 0);
        INIT_LIST_HEAD(&gl->gl_reclaim);
index 78d75f892f82e198db720fe53c57fe49780dade7..09853620c95121ecd0b2f8b4dd05397eef969e1d 100644 (file)
@@ -129,7 +129,7 @@ void gfs2_meta_sync(struct gfs2_glock *gl)
 }
 
 /**
- * getbuf - Get a buffer with a given address space
+ * gfs2_getbuf - Get a buffer with a given address space
  * @gl: the glock
  * @blkno: the block number (filesystem scope)
  * @create: 1 if the buffer should be created
@@ -137,7 +137,7 @@ void gfs2_meta_sync(struct gfs2_glock *gl)
  * Returns: the buffer
  */
 
-static struct buffer_head *getbuf(struct gfs2_glock *gl, u64 blkno, int create)
+struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
 {
        struct address_space *mapping = gl->gl_aspace->i_mapping;
        struct gfs2_sbd *sdp = gl->gl_sbd;
@@ -205,7 +205,7 @@ static void meta_prep_new(struct buffer_head *bh)
 struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
 {
        struct buffer_head *bh;
-       bh = getbuf(gl, blkno, CREATE);
+       bh = gfs2_getbuf(gl, blkno, CREATE);
        meta_prep_new(bh);
        return bh;
 }
@@ -223,7 +223,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
 int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
                   struct buffer_head **bhp)
 {
-       *bhp = getbuf(gl, blkno, CREATE);
+       *bhp = gfs2_getbuf(gl, blkno, CREATE);
        if (!buffer_uptodate(*bhp)) {
                ll_rw_block(READ_META, 1, bhp);
                if (flags & DIO_WAIT) {
@@ -346,7 +346,7 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
        struct buffer_head *bh;
 
        while (blen) {
-               bh = getbuf(ip->i_gl, bstart, NO_CREATE);
+               bh = gfs2_getbuf(ip->i_gl, bstart, NO_CREATE);
                if (bh) {
                        lock_buffer(bh);
                        gfs2_log_lock(sdp);
@@ -421,7 +421,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
        if (extlen > max_ra)
                extlen = max_ra;
 
-       first_bh = getbuf(gl, dblock, CREATE);
+       first_bh = gfs2_getbuf(gl, dblock, CREATE);
 
        if (buffer_uptodate(first_bh))
                goto out;
@@ -432,7 +432,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
        extlen--;
 
        while (extlen) {
-               bh = getbuf(gl, dblock, CREATE);
+               bh = gfs2_getbuf(gl, dblock, CREATE);
 
                if (!buffer_uptodate(bh) && !buffer_locked(bh))
                        ll_rw_block(READA, 1, &bh);
index 73e3b1c76fe145c637707bb241faab0a0c00513a..b1a5f3674d436d342a73f1142788b4f59848911c 100644 (file)
@@ -47,6 +47,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
 int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
                   int flags, struct buffer_head **bhp);
 int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
+struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create);
 
 void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
                         int meta);
index f55394e57cb28facf0e8928053064f8b70ad7f37..e64a1b04117ad5b27bb5fc1efd98ffe6295056a8 100644 (file)
@@ -499,34 +499,34 @@ static int __gfs2_readpage(void *file, struct page *page)
  * @file: The file to read
  * @page: The page of the file
  *
- * This deals with the locking required. We use a trylock in order to
- * avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE
- * in the event that we are unable to get the lock.
+ * This deals with the locking required. We have to unlock and
+ * relock the page in order to get the locking in the right
+ * order.
  */
 
 static int gfs2_readpage(struct file *file, struct page *page)
 {
-       struct gfs2_inode *ip = GFS2_I(page->mapping->host);
-       struct gfs2_holder *gh;
+       struct address_space *mapping = page->mapping;
+       struct gfs2_inode *ip = GFS2_I(mapping->host);
+       struct gfs2_holder gh;
        int error;
 
-       gh = gfs2_glock_is_locked_by_me(ip->i_gl);
-       if (!gh) {
-               gh = kmalloc(sizeof(struct gfs2_holder), GFP_NOFS);
-               if (!gh)
-                       return -ENOBUFS;
-               gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, gh);
+       unlock_page(page);
+       gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
+       error = gfs2_glock_nq_atime(&gh);
+       if (unlikely(error))
+               goto out;
+       error = AOP_TRUNCATED_PAGE;
+       lock_page(page);
+       if (page->mapping == mapping && !PageUptodate(page))
+               error = __gfs2_readpage(file, page);
+       else
                unlock_page(page);
-               error = gfs2_glock_nq_atime(gh);
-               if (likely(error != 0))
-                       goto out;
-               return AOP_TRUNCATED_PAGE;
-       }
-       error = __gfs2_readpage(file, page);
-       gfs2_glock_dq(gh);
+       gfs2_glock_dq(&gh);
 out:
-       gfs2_holder_uninit(gh);
-       kfree(gh);
+       gfs2_holder_uninit(&gh);
+       if (error && error != AOP_TRUNCATED_PAGE)
+               lock_page(page);
        return error;
 }
 
index 24dd59450088bd9ca949dd9c9e8c4770bb6223fc..e9a366d4411cf2412e82d9e645609c69c0dfcd7d 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/uio.h>
 #include <linux/blkdev.h>
 #include <linux/mm.h>
+#include <linux/mount.h>
 #include <linux/fs.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/ext2_fs.h>
@@ -133,7 +134,6 @@ static const u32 fsflags_to_gfs2[32] = {
        [7] = GFS2_DIF_NOATIME,
        [12] = GFS2_DIF_EXHASH,
        [14] = GFS2_DIF_INHERIT_JDATA,
-       [20] = GFS2_DIF_INHERIT_DIRECTIO,
 };
 
 static const u32 gfs2_to_fsflags[32] = {
@@ -142,7 +142,6 @@ static const u32 gfs2_to_fsflags[32] = {
        [gfs2fl_AppendOnly] = FS_APPEND_FL,
        [gfs2fl_NoAtime] = FS_NOATIME_FL,
        [gfs2fl_ExHash] = FS_INDEX_FL,
-       [gfs2fl_InheritDirectio] = FS_DIRECTIO_FL,
        [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
 };
 
@@ -160,12 +159,8 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
                return error;
 
        fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags);
-       if (!S_ISDIR(inode->i_mode)) {
-               if (ip->i_di.di_flags & GFS2_DIF_JDATA)
-                       fsflags |= FS_JOURNAL_DATA_FL;
-               if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
-                       fsflags |= FS_DIRECTIO_FL;
-       }
+       if (!S_ISDIR(inode->i_mode) && ip->i_di.di_flags & GFS2_DIF_JDATA)
+               fsflags |= FS_JOURNAL_DATA_FL;
        if (put_user(fsflags, ptr))
                error = -EFAULT;
 
@@ -194,13 +189,11 @@ void gfs2_set_inode_flags(struct inode *inode)
 
 /* Flags that can be set by user space */
 #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA|                   \
-                            GFS2_DIF_DIRECTIO|                 \
                             GFS2_DIF_IMMUTABLE|                \
                             GFS2_DIF_APPENDONLY|               \
                             GFS2_DIF_NOATIME|                  \
                             GFS2_DIF_SYNC|                     \
                             GFS2_DIF_SYSTEM|                   \
-                            GFS2_DIF_INHERIT_DIRECTIO|         \
                             GFS2_DIF_INHERIT_JDATA)
 
 /**
@@ -220,10 +213,14 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
        int error;
        u32 new_flags, flags;
 
-       error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+       error = mnt_want_write(filp->f_path.mnt);
        if (error)
                return error;
 
+       error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+       if (error)
+               goto out_drop_write;
+
        flags = ip->i_di.di_flags;
        new_flags = (flags & ~mask) | (reqflags & mask);
        if ((new_flags ^ flags) == 0)
@@ -242,7 +239,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
            !capable(CAP_LINUX_IMMUTABLE))
                goto out;
        if (!IS_IMMUTABLE(inode)) {
-               error = permission(inode, MAY_WRITE, NULL);
+               error = gfs2_permission(inode, MAY_WRITE);
                if (error)
                        goto out;
        }
@@ -272,6 +269,8 @@ out_trans_end:
        gfs2_trans_end(sdp);
 out:
        gfs2_glock_dq_uninit(&gh);
+out_drop_write:
+       mnt_drop_write(filp->f_path.mnt);
        return error;
 }
 
@@ -285,8 +284,6 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
        if (!S_ISDIR(inode->i_mode)) {
                if (gfsflags & GFS2_DIF_INHERIT_JDATA)
                        gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA);
-               if (gfsflags & GFS2_DIF_INHERIT_DIRECTIO)
-                       gfsflags ^= (GFS2_DIF_DIRECTIO | GFS2_DIF_INHERIT_DIRECTIO);
                return do_gfs2_set_flags(filp, gfsflags, ~0);
        }
        return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA);
@@ -487,11 +484,6 @@ static int gfs2_open(struct inode *inode, struct file *file)
                        goto fail_gunlock;
                }
 
-               /* Listen to the Direct I/O flag */
-
-               if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
-                       file->f_flags |= O_DIRECT;
-
                gfs2_glock_dq_uninit(&i_gh);
        }
 
@@ -669,8 +661,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
        int error = 0;
 
        state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
-       flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE 
-               | GL_FLOCK;
+       flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
 
        mutex_lock(&fp->f_fl_mutex);
 
@@ -683,9 +674,8 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
                gfs2_glock_dq_wait(fl_gh);
                gfs2_holder_reinit(state, flags, fl_gh);
        } else {
-               error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
-                                     ip->i_no_addr, &gfs2_flock_glops,
-                                     CREATE, &gl);
+               error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr,
+                                      &gfs2_flock_glops, CREATE, &gl);
                if (error)
                        goto out;
                gfs2_holder_init(gl, state, flags, fl_gh);
index b2028c82e8d1ae53fc04672303ce4e14fa3d7276..b4d1d6490633dbbe6a0662824526c976ef091d28 100644 (file)
@@ -64,7 +64,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
        mutex_init(&sdp->sd_rindex_mutex);
        INIT_LIST_HEAD(&sdp->sd_rindex_list);
        INIT_LIST_HEAD(&sdp->sd_rindex_mru_list);
-       INIT_LIST_HEAD(&sdp->sd_rindex_recent_list);
 
        INIT_LIST_HEAD(&sdp->sd_jindex_list);
        spin_lock_init(&sdp->sd_jindex_spin);
@@ -364,6 +363,8 @@ static int map_journal_extents(struct gfs2_sbd *sdp)
 
 static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
 {
+       if (!sdp->sd_lockstruct.ls_ops->lm_others_may_mount)
+               return;
        if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
                sdp->sd_lockstruct.ls_ops->lm_others_may_mount(
                                        sdp->sd_lockstruct.ls_lockspace);
@@ -741,8 +742,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
                goto out;
        }
 
-       if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) ||
-           gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
+       if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
            gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >=
                                  GFS2_MIN_LVB_SIZE)) {
                gfs2_unmount_lockproto(&sdp->sd_lockstruct);
@@ -873,7 +873,7 @@ fail_sb:
 fail_locking:
        init_locking(sdp, &mount_gh, UNDO);
 fail_lm:
-       gfs2_gl_hash_clear(sdp, WAIT);
+       gfs2_gl_hash_clear(sdp);
        gfs2_lm_unmount(sdp);
        while (invalidate_inodes(sb))
                yield();
index 2686ad4c0029acd2ce1cdfdf46e3d29985a77143..1e252dfc52940c745dea61137afc386dbe183cd7 100644 (file)
@@ -163,7 +163,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
        if (error)
                goto out;
 
-       error = permission(dir, MAY_WRITE | MAY_EXEC, NULL);
+       error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
        if (error)
                goto out_gunlock;
 
@@ -669,7 +669,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                        }
                }
        } else {
-               error = permission(ndir, MAY_WRITE | MAY_EXEC, NULL);
+               error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC);
                if (error)
                        goto out_gunlock;
 
@@ -704,7 +704,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
        /* Check out the dir to be renamed */
 
        if (dir_rename) {
-               error = permission(odentry->d_inode, MAY_WRITE, NULL);
+               error = gfs2_permission(odentry->d_inode, MAY_WRITE);
                if (error)
                        goto out_gunlock;
        }
@@ -891,7 +891,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
  * Returns: errno
  */
 
-static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
+int gfs2_permission(struct inode *inode, int mask)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_holder i_gh;
@@ -905,13 +905,22 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
                unlock = 1;
        }
 
-       error = generic_permission(inode, mask, gfs2_check_acl);
+       if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
+               error = -EACCES;
+       else
+               error = generic_permission(inode, mask, gfs2_check_acl);
        if (unlock)
                gfs2_glock_dq_uninit(&i_gh);
 
        return error;
 }
 
+static int gfs2_iop_permission(struct inode *inode, int mask,
+                              struct nameidata *nd)
+{
+       return gfs2_permission(inode, mask);
+}
+
 static int setattr_size(struct inode *inode, struct iattr *attr)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
@@ -1141,7 +1150,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
 }
 
 const struct inode_operations gfs2_file_iops = {
-       .permission = gfs2_permission,
+       .permission = gfs2_iop_permission,
        .setattr = gfs2_setattr,
        .getattr = gfs2_getattr,
        .setxattr = gfs2_setxattr,
@@ -1160,7 +1169,7 @@ const struct inode_operations gfs2_dir_iops = {
        .rmdir = gfs2_rmdir,
        .mknod = gfs2_mknod,
        .rename = gfs2_rename,
-       .permission = gfs2_permission,
+       .permission = gfs2_iop_permission,
        .setattr = gfs2_setattr,
        .getattr = gfs2_getattr,
        .setxattr = gfs2_setxattr,
@@ -1172,7 +1181,7 @@ const struct inode_operations gfs2_dir_iops = {
 const struct inode_operations gfs2_symlink_iops = {
        .readlink = gfs2_readlink,
        .follow_link = gfs2_follow_link,
-       .permission = gfs2_permission,
+       .permission = gfs2_iop_permission,
        .setattr = gfs2_setattr,
        .getattr = gfs2_getattr,
        .setxattr = gfs2_setxattr,
index 0b7cc920eb89331f651cb3909c9492270d3db242..f66ea0f7a356bc433663da8011373a25e5fb48ec 100644 (file)
@@ -126,7 +126,7 @@ static void gfs2_put_super(struct super_block *sb)
        gfs2_clear_rgrpd(sdp);
        gfs2_jindex_free(sdp);
        /*  Take apart glock structures and buffer lists  */
-       gfs2_gl_hash_clear(sdp, WAIT);
+       gfs2_gl_hash_clear(sdp);
        /*  Unmount the locking protocol  */
        gfs2_lm_unmount(sdp);
 
@@ -155,7 +155,7 @@ static void gfs2_write_super(struct super_block *sb)
 static int gfs2_sync_fs(struct super_block *sb, int wait)
 {
        sb->s_dirt = 0;
-       if (wait)
+       if (wait && sb->s_fs_info)
                gfs2_log_flush(sb->s_fs_info, NULL);
        return 0;
 }
index 56aaf915c59ab05bee1764db4a386f24c87084a0..3e073f5144fa00abf87fb18c678ae7410c3b478c 100644 (file)
@@ -904,7 +904,7 @@ static int need_sync(struct gfs2_quota_data *qd)
                do_sync = 0;
        else {
                value *= gfs2_jindex_size(sdp) * num;
-               do_div(value, den);
+               value = div_s64(value, den);
                value += (s64)be64_to_cpu(qd->qd_qb.qb_value);
                if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit))
                        do_sync = 0;
index 2888e4b4b1c5ff546ab409d5090d0d979562925d..d5e91f4f6a0b7aa88f36e5ce666c8781d31a5103 100644 (file)
@@ -428,6 +428,9 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
 static void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
                                  unsigned int message)
 {
+       if (!sdp->sd_lockstruct.ls_ops->lm_recovery_done)
+               return;
+
        if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
                sdp->sd_lockstruct.ls_ops->lm_recovery_done(
                        sdp->sd_lockstruct.ls_lockspace, jid, message);
@@ -505,7 +508,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
 
                error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
                                           LM_FLAG_NOEXP | LM_FLAG_PRIORITY |
-                                          GL_NOCANCEL | GL_NOCACHE, &t_gh);
+                                          GL_NOCACHE, &t_gh);
                if (error)
                        goto fail_gunlock_ji;
 
index 3401628d742b6cbfd5b53c9a4ed11f42d3372413..2d90fb2535054056161eb24c0400d92e311dc7a4 100644 (file)
@@ -371,11 +371,6 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp)
 
        spin_lock(&sdp->sd_rindex_spin);
        sdp->sd_rindex_forward = NULL;
-       head = &sdp->sd_rindex_recent_list;
-       while (!list_empty(head)) {
-               rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
-               list_del(&rgd->rd_recent);
-       }
        spin_unlock(&sdp->sd_rindex_spin);
 
        head = &sdp->sd_rindex_list;
@@ -944,107 +939,30 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
        return NULL;
 }
 
-/**
- * recent_rgrp_first - get first RG from "recent" list
- * @sdp: The GFS2 superblock
- * @rglast: address of the rgrp used last
- *
- * Returns: The first rgrp in the recent list
- */
-
-static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
-                                           u64 rglast)
-{
-       struct gfs2_rgrpd *rgd;
-
-       spin_lock(&sdp->sd_rindex_spin);
-
-       if (rglast) {
-               list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
-                       if (rgrp_contains_block(rgd, rglast))
-                               goto out;
-               }
-       }
-       rgd = NULL;
-       if (!list_empty(&sdp->sd_rindex_recent_list))
-               rgd = list_entry(sdp->sd_rindex_recent_list.next,
-                                struct gfs2_rgrpd, rd_recent);
-out:
-       spin_unlock(&sdp->sd_rindex_spin);
-       return rgd;
-}
-
 /**
  * recent_rgrp_next - get next RG from "recent" list
  * @cur_rgd: current rgrp
- * @remove:
  *
  * Returns: The next rgrp in the recent list
  */
 
-static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd,
-                                          int remove)
+static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd)
 {
        struct gfs2_sbd *sdp = cur_rgd->rd_sbd;
        struct list_head *head;
        struct gfs2_rgrpd *rgd;
 
        spin_lock(&sdp->sd_rindex_spin);
-
-       head = &sdp->sd_rindex_recent_list;
-
-       list_for_each_entry(rgd, head, rd_recent) {
-               if (rgd == cur_rgd) {
-                       if (cur_rgd->rd_recent.next != head)
-                               rgd = list_entry(cur_rgd->rd_recent.next,
-                                                struct gfs2_rgrpd, rd_recent);
-                       else
-                               rgd = NULL;
-
-                       if (remove)
-                               list_del(&cur_rgd->rd_recent);
-
-                       goto out;
-               }
+       head = &sdp->sd_rindex_mru_list;
+       if (unlikely(cur_rgd->rd_list_mru.next == head)) {
+               spin_unlock(&sdp->sd_rindex_spin);
+               return NULL;
        }
-
-       rgd = NULL;
-       if (!list_empty(head))
-               rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
-
-out:
+       rgd = list_entry(cur_rgd->rd_list_mru.next, struct gfs2_rgrpd, rd_list_mru);
        spin_unlock(&sdp->sd_rindex_spin);
        return rgd;
 }
 
-/**
- * recent_rgrp_add - add an RG to tail of "recent" list
- * @new_rgd: The rgrp to add
- *
- */
-
-static void recent_rgrp_add(struct gfs2_rgrpd *new_rgd)
-{
-       struct gfs2_sbd *sdp = new_rgd->rd_sbd;
-       struct gfs2_rgrpd *rgd;
-       unsigned int count = 0;
-       unsigned int max = sdp->sd_rgrps / gfs2_jindex_size(sdp);
-
-       spin_lock(&sdp->sd_rindex_spin);
-
-       list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
-               if (rgd == new_rgd)
-                       goto out;
-
-               if (++count >= max)
-                       goto out;
-       }
-       list_add_tail(&new_rgd->rd_recent, &sdp->sd_rindex_recent_list);
-
-out:
-       spin_unlock(&sdp->sd_rindex_spin);
-}
-
 /**
  * forward_rgrp_get - get an rgrp to try next from full list
  * @sdp: The GFS2 superblock
@@ -1112,9 +1030,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
        int loops = 0;
        int error, rg_locked;
 
-       /* Try recently successful rgrps */
-
-       rgd = recent_rgrp_first(sdp, ip->i_goal);
+       rgd = gfs2_blk2rgrpd(sdp, ip->i_goal);
 
        while (rgd) {
                rg_locked = 0;
@@ -1136,11 +1052,9 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
                                gfs2_glock_dq_uninit(&al->al_rgd_gh);
                        if (inode)
                                return inode;
-                       rgd = recent_rgrp_next(rgd, 1);
-                       break;
-
+                       /* fall through */
                case GLR_TRYFAILED:
-                       rgd = recent_rgrp_next(rgd, 0);
+                       rgd = recent_rgrp_next(rgd);
                        break;
 
                default:
@@ -1199,7 +1113,9 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 
 out:
        if (begin) {
-               recent_rgrp_add(rgd);
+               spin_lock(&sdp->sd_rindex_spin);
+               list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
+               spin_unlock(&sdp->sd_rindex_spin);
                rgd = gfs2_rgrpd_get_next(rgd);
                if (!rgd)
                        rgd = gfs2_rgrpd_get_first(sdp);
index 7aeacbc65f35c9bf01e359323681f13387dfb5d3..63a8a902d9db16029898c8d1caf74e5a198aac48 100644 (file)
@@ -65,7 +65,6 @@ void gfs2_tune_init(struct gfs2_tune *gt)
        gt->gt_quota_quantum = 60;
        gt->gt_atime_quantum = 3600;
        gt->gt_new_files_jdata = 0;
-       gt->gt_new_files_directio = 0;
        gt->gt_max_readahead = 1 << 18;
        gt->gt_stall_secs = 600;
        gt->gt_complain_secs = 10;
@@ -941,8 +940,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
        }
 
        error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED,
-                              LM_FLAG_PRIORITY | GL_NOCACHE,
-                              t_gh);
+                                  GL_NOCACHE, t_gh);
 
        list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
                error = gfs2_jdesc_check(jd);
index 9ab9fc85ecd04e7740c81b394289a30e1da24e08..74846559fc3f4adc6a2ca913ff44a1ec7138d064 100644 (file)
@@ -110,18 +110,6 @@ static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
        return len;
 }
 
-static ssize_t shrink_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
-{
-       if (!capable(CAP_SYS_ADMIN))
-               return -EACCES;
-
-       if (simple_strtol(buf, NULL, 0) != 1)
-               return -EINVAL;
-
-       gfs2_gl_hash_clear(sdp, NO_WAIT);
-       return len;
-}
-
 static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
                                size_t len)
 {
@@ -175,7 +163,6 @@ static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
 GFS2_ATTR(id,                  0444, id_show,       NULL);
 GFS2_ATTR(fsname,              0444, fsname_show,   NULL);
 GFS2_ATTR(freeze,              0644, freeze_show,   freeze_store);
-GFS2_ATTR(shrink,              0200, NULL,          shrink_store);
 GFS2_ATTR(withdraw,            0644, withdraw_show, withdraw_store);
 GFS2_ATTR(statfs_sync,         0200, NULL,          statfs_sync_store);
 GFS2_ATTR(quota_sync,          0200, NULL,          quota_sync_store);
@@ -186,7 +173,6 @@ static struct attribute *gfs2_attrs[] = {
        &gfs2_attr_id.attr,
        &gfs2_attr_fsname.attr,
        &gfs2_attr_freeze.attr,
-       &gfs2_attr_shrink.attr,
        &gfs2_attr_withdraw.attr,
        &gfs2_attr_statfs_sync.attr,
        &gfs2_attr_quota_sync.attr,
@@ -426,7 +412,6 @@ TUNE_ATTR(max_readahead, 0);
 TUNE_ATTR(complain_secs, 0);
 TUNE_ATTR(statfs_slow, 0);
 TUNE_ATTR(new_files_jdata, 0);
-TUNE_ATTR(new_files_directio, 0);
 TUNE_ATTR(quota_simul_sync, 1);
 TUNE_ATTR(quota_cache_secs, 1);
 TUNE_ATTR(stall_secs, 1);
@@ -455,7 +440,6 @@ static struct attribute *tune_attrs[] = {
        &tune_attr_quotad_secs.attr,
        &tune_attr_quota_scale.attr,
        &tune_attr_new_files_jdata.attr,
-       &tune_attr_new_files_directio.attr,
        NULL,
 };
 
index 6914598022ce836e10a13aa8be50aab1ec3bdc74..91389c8aee8a7c765c1ae9d7f2bbeed51f95cd0c 100644 (file)
@@ -688,7 +688,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
 
        J_ASSERT(transaction->t_state == T_FINISHED);
        J_ASSERT(transaction->t_buffers == NULL);
-       J_ASSERT(transaction->t_sync_datalist == NULL);
        J_ASSERT(transaction->t_forget == NULL);
        J_ASSERT(transaction->t_iobuf_list == NULL);
        J_ASSERT(transaction->t_shadow_list == NULL);
index a2ed72f7ceee78f529d9fbebb6a89b02cbd86f15..f8b3be8732262f1642c0a1b4e19399bd048b964f 100644 (file)
@@ -22,6 +22,8 @@
 #include <linux/pagemap.h>
 #include <linux/jiffies.h>
 #include <linux/crc32.h>
+#include <linux/writeback.h>
+#include <linux/backing-dev.h>
 
 /*
  * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -37,8 +39,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 }
 
 /*
- * When an ext3-ordered file is truncated, it is possible that many pages are
- * not sucessfully freed, because they are attached to a committing transaction.
+ * When an ext4 file is truncated, it is possible that some pages are not
+ * successfully freed, because they are attached to a committing transaction.
  * After the transaction commits, these pages are left on the LRU, with no
  * ->mapping, and with attached buffers.  These pages are trivially reclaimable
  * by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -79,21 +81,6 @@ nope:
        __brelse(bh);
 }
 
-/*
- * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
- * held.  For ranking reasons we must trylock.  If we lose, schedule away and
- * return 0.  j_list_lock is dropped in this case.
- */
-static int inverted_lock(journal_t *journal, struct buffer_head *bh)
-{
-       if (!jbd_trylock_bh_state(bh)) {
-               spin_unlock(&journal->j_list_lock);
-               schedule();
-               return 0;
-       }
-       return 1;
-}
-
 /*
  * Done it all: now submit the commit record.  We should have
  * cleaned up our previous buffers by now, so if we are in abort
@@ -112,6 +99,7 @@ static int journal_submit_commit_record(journal_t *journal,
        struct buffer_head *bh;
        int ret;
        int barrier_done = 0;
+       struct timespec now = current_kernel_time();
 
        if (is_journal_aborted(journal))
                return 0;
@@ -126,6 +114,8 @@ static int journal_submit_commit_record(journal_t *journal,
        tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
        tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
        tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
+       tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
+       tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
 
        if (JBD2_HAS_COMPAT_FEATURE(journal,
                                    JBD2_FEATURE_COMPAT_CHECKSUM)) {
@@ -197,159 +187,104 @@ static int journal_wait_on_commit_record(struct buffer_head *bh)
 }
 
 /*
- * Wait for all submitted IO to complete.
+ * write the filemap data using writepage() address_space_operations.
+ * We don't do block allocation here even for delalloc. We don't
+ * use writepages() because with dealyed allocation we may be doing
+ * block allocation in writepages().
  */
-static int journal_wait_on_locked_list(journal_t *journal,
-                                      transaction_t *commit_transaction)
+static int journal_submit_inode_data_buffers(struct address_space *mapping)
 {
-       int ret = 0;
-       struct journal_head *jh;
-
-       while (commit_transaction->t_locked_list) {
-               struct buffer_head *bh;
-
-               jh = commit_transaction->t_locked_list->b_tprev;
-               bh = jh2bh(jh);
-               get_bh(bh);
-               if (buffer_locked(bh)) {
-                       spin_unlock(&journal->j_list_lock);
-                       wait_on_buffer(bh);
-                       if (unlikely(!buffer_uptodate(bh)))
-                               ret = -EIO;
-                       spin_lock(&journal->j_list_lock);
-               }
-               if (!inverted_lock(journal, bh)) {
-                       put_bh(bh);
-                       spin_lock(&journal->j_list_lock);
-                       continue;
-               }
-               if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
-                       __jbd2_journal_unfile_buffer(jh);
-                       jbd_unlock_bh_state(bh);
-                       jbd2_journal_remove_journal_head(bh);
-                       put_bh(bh);
-               } else {
-                       jbd_unlock_bh_state(bh);
-               }
-               put_bh(bh);
-               cond_resched_lock(&journal->j_list_lock);
-       }
+       int ret;
+       struct writeback_control wbc = {
+               .sync_mode =  WB_SYNC_ALL,
+               .nr_to_write = mapping->nrpages * 2,
+               .range_start = 0,
+               .range_end = i_size_read(mapping->host),
+               .for_writepages = 1,
+       };
+
+       ret = generic_writepages(mapping, &wbc);
        return ret;
-  }
+}
 
-static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
+/*
+ * Submit all the data buffers of inode associated with the transaction to
+ * disk.
+ *
+ * We are in a committing transaction. Therefore no new inode can be added to
+ * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
+ * operate on from being released while we write out pages.
+ */
+static int journal_submit_data_buffers(journal_t *journal,
+               transaction_t *commit_transaction)
 {
-       int i;
+       struct jbd2_inode *jinode;
+       int err, ret = 0;
+       struct address_space *mapping;
 
-       for (i = 0; i < bufs; i++) {
-               wbuf[i]->b_end_io = end_buffer_write_sync;
-               /* We use-up our safety reference in submit_bh() */
-               submit_bh(WRITE, wbuf[i]);
+       spin_lock(&journal->j_list_lock);
+       list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+               mapping = jinode->i_vfs_inode->i_mapping;
+               jinode->i_flags |= JI_COMMIT_RUNNING;
+               spin_unlock(&journal->j_list_lock);
+               /*
+                * submit the inode data buffers. We use writepage
+                * instead of writepages. Because writepages can do
+                * block allocation  with delalloc. We need to write
+                * only allocated blocks here.
+                */
+               err = journal_submit_inode_data_buffers(mapping);
+               if (!ret)
+                       ret = err;
+               spin_lock(&journal->j_list_lock);
+               J_ASSERT(jinode->i_transaction == commit_transaction);
+               jinode->i_flags &= ~JI_COMMIT_RUNNING;
+               wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
        }
+       spin_unlock(&journal->j_list_lock);
+       return ret;
 }
 
 /*
- *  Submit all the data buffers to disk
+ * Wait for data submitted for writeout, refile inodes to proper
+ * transaction if needed.
+ *
  */
-static void journal_submit_data_buffers(journal_t *journal,
-                               transaction_t *commit_transaction)
+static int journal_finish_inode_data_buffers(journal_t *journal,
+               transaction_t *commit_transaction)
 {
-       struct journal_head *jh;
-       struct buffer_head *bh;
-       int locked;
-       int bufs = 0;
-       struct buffer_head **wbuf = journal->j_wbuf;
+       struct jbd2_inode *jinode, *next_i;
+       int err, ret = 0;
 
-       /*
-        * Whenever we unlock the journal and sleep, things can get added
-        * onto ->t_sync_datalist, so we have to keep looping back to
-        * write_out_data until we *know* that the list is empty.
-        *
-        * Cleanup any flushed data buffers from the data list.  Even in
-        * abort mode, we want to flush this out as soon as possible.
-        */
-write_out_data:
-       cond_resched();
+       /* For locking, see the comment in journal_submit_data_buffers() */
        spin_lock(&journal->j_list_lock);
+       list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+               jinode->i_flags |= JI_COMMIT_RUNNING;
+               spin_unlock(&journal->j_list_lock);
+               err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
+               if (!ret)
+                       ret = err;
+               spin_lock(&journal->j_list_lock);
+               jinode->i_flags &= ~JI_COMMIT_RUNNING;
+               wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
+       }
 
-       while (commit_transaction->t_sync_datalist) {
-               jh = commit_transaction->t_sync_datalist;
-               bh = jh2bh(jh);
-               locked = 0;
-
-               /* Get reference just to make sure buffer does not disappear
-                * when we are forced to drop various locks */
-               get_bh(bh);
-               /* If the buffer is dirty, we need to submit IO and hence
-                * we need the buffer lock. We try to lock the buffer without
-                * blocking. If we fail, we need to drop j_list_lock and do
-                * blocking lock_buffer().
-                */
-               if (buffer_dirty(bh)) {
-                       if (test_set_buffer_locked(bh)) {
-                               BUFFER_TRACE(bh, "needs blocking lock");
-                               spin_unlock(&journal->j_list_lock);
-                               /* Write out all data to prevent deadlocks */
-                               journal_do_submit_data(wbuf, bufs);
-                               bufs = 0;
-                               lock_buffer(bh);
-                               spin_lock(&journal->j_list_lock);
-                       }
-                       locked = 1;
-               }
-               /* We have to get bh_state lock. Again out of order, sigh. */
-               if (!inverted_lock(journal, bh)) {
-                       jbd_lock_bh_state(bh);
-                       spin_lock(&journal->j_list_lock);
-               }
-               /* Someone already cleaned up the buffer? */
-               if (!buffer_jbd(bh)
-                       || jh->b_transaction != commit_transaction
-                       || jh->b_jlist != BJ_SyncData) {
-                       jbd_unlock_bh_state(bh);
-                       if (locked)
-                               unlock_buffer(bh);
-                       BUFFER_TRACE(bh, "already cleaned up");
-                       put_bh(bh);
-                       continue;
-               }
-               if (locked && test_clear_buffer_dirty(bh)) {
-                       BUFFER_TRACE(bh, "needs writeout, adding to array");
-                       wbuf[bufs++] = bh;
-                       __jbd2_journal_file_buffer(jh, commit_transaction,
-                                               BJ_Locked);
-                       jbd_unlock_bh_state(bh);
-                       if (bufs == journal->j_wbufsize) {
-                               spin_unlock(&journal->j_list_lock);
-                               journal_do_submit_data(wbuf, bufs);
-                               bufs = 0;
-                               goto write_out_data;
-                       }
-               } else if (!locked && buffer_locked(bh)) {
-                       __jbd2_journal_file_buffer(jh, commit_transaction,
-                                               BJ_Locked);
-                       jbd_unlock_bh_state(bh);
-                       put_bh(bh);
+       /* Now refile inode to proper lists */
+       list_for_each_entry_safe(jinode, next_i,
+                                &commit_transaction->t_inode_list, i_list) {
+               list_del(&jinode->i_list);
+               if (jinode->i_next_transaction) {
+                       jinode->i_transaction = jinode->i_next_transaction;
+                       jinode->i_next_transaction = NULL;
+                       list_add(&jinode->i_list,
+                               &jinode->i_transaction->t_inode_list);
                } else {
-                       BUFFER_TRACE(bh, "writeout complete: unfile");
-                       __jbd2_journal_unfile_buffer(jh);
-                       jbd_unlock_bh_state(bh);
-                       if (locked)
-                               unlock_buffer(bh);
-                       jbd2_journal_remove_journal_head(bh);
-                       /* Once for our safety reference, once for
-                        * jbd2_journal_remove_journal_head() */
-                       put_bh(bh);
-                       put_bh(bh);
-               }
-
-               if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
-                       spin_unlock(&journal->j_list_lock);
-                       goto write_out_data;
+                       jinode->i_transaction = NULL;
                }
        }
        spin_unlock(&journal->j_list_lock);
-       journal_do_submit_data(wbuf, bufs);
+
+       return ret;
 }
 
 static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
@@ -524,21 +459,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         * Now start flushing things to disk, in the order they appear
         * on the transaction lists.  Data blocks go first.
         */
-       err = 0;
-       journal_submit_data_buffers(journal, commit_transaction);
-
-       /*
-        * Wait for all previously submitted IO to complete if commit
-        * record is to be written synchronously.
-        */
-       spin_lock(&journal->j_list_lock);
-       if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
-               JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
-               err = journal_wait_on_locked_list(journal,
-                                               commit_transaction);
-
-       spin_unlock(&journal->j_list_lock);
-
+       err = journal_submit_data_buffers(journal, commit_transaction);
        if (err)
                jbd2_journal_abort(journal, err);
 
@@ -546,16 +467,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
        jbd_debug(3, "JBD: commit phase 2\n");
 
-       /*
-        * If we found any dirty or locked buffers, then we should have
-        * looped back up to the write_out_data label.  If there weren't
-        * any then journal_clean_data_list should have wiped the list
-        * clean by now, so check that it is in fact empty.
-        */
-       J_ASSERT (commit_transaction->t_sync_datalist == NULL);
-
-       jbd_debug (3, "JBD: commit phase 3\n");
-
        /*
         * Way to go: we have now written out all of the data for a
         * transaction!  Now comes the tricky part: we need to write out
@@ -574,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        J_ASSERT(commit_transaction->t_nr_buffers <=
                 commit_transaction->t_outstanding_credits);
 
+       err = 0;
        descriptor = NULL;
        bufs = 0;
        while (commit_transaction->t_buffers) {
@@ -748,15 +660,19 @@ start_journal_io:
                                                 &cbh, crc32_sum);
                if (err)
                        __jbd2_journal_abort_hard(journal);
-
-               spin_lock(&journal->j_list_lock);
-               err = journal_wait_on_locked_list(journal,
-                                               commit_transaction);
-               spin_unlock(&journal->j_list_lock);
-               if (err)
-                       __jbd2_journal_abort_hard(journal);
        }
 
+       /*
+        * This is the right place to wait for data buffers both for ASYNC
+        * and !ASYNC commit. If commit is ASYNC, we need to wait only after
+        * the commit block went to disk (which happens above). If commit is
+        * SYNC, we need to wait for data buffers before we start writing
+        * commit block, which happens below in such setting.
+        */
+       err = journal_finish_inode_data_buffers(journal, commit_transaction);
+       if (err)
+               jbd2_journal_abort(journal, err);
+
        /* Lo and behold: we have just managed to send a transaction to
            the log.  Before we can commit it, wait for the IO so far to
            complete.  Control buffers being written are on the
@@ -768,7 +684,7 @@ start_journal_io:
           so we incur less scheduling load.
        */
 
-       jbd_debug(3, "JBD: commit phase 4\n");
+       jbd_debug(3, "JBD: commit phase 3\n");
 
        /*
         * akpm: these are BJ_IO, and j_list_lock is not needed.
@@ -827,7 +743,7 @@ wait_for_iobuf:
 
        J_ASSERT (commit_transaction->t_shadow_list == NULL);
 
-       jbd_debug(3, "JBD: commit phase 5\n");
+       jbd_debug(3, "JBD: commit phase 4\n");
 
        /* Here we wait for the revoke record and descriptor record buffers */
  wait_for_ctlbuf:
@@ -854,7 +770,7 @@ wait_for_iobuf:
                /* AKPM: bforget here */
        }
 
-       jbd_debug(3, "JBD: commit phase 6\n");
+       jbd_debug(3, "JBD: commit phase 5\n");
 
        if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
@@ -874,9 +790,9 @@ wait_for_iobuf:
            transaction can be removed from any checkpoint list it was on
            before. */
 
-       jbd_debug(3, "JBD: commit phase 7\n");
+       jbd_debug(3, "JBD: commit phase 6\n");
 
-       J_ASSERT(commit_transaction->t_sync_datalist == NULL);
+       J_ASSERT(list_empty(&commit_transaction->t_inode_list));
        J_ASSERT(commit_transaction->t_buffers == NULL);
        J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
        J_ASSERT(commit_transaction->t_iobuf_list == NULL);
@@ -997,7 +913,7 @@ restart_loop:
 
        /* Done with this transaction! */
 
-       jbd_debug(3, "JBD: commit phase 8\n");
+       jbd_debug(3, "JBD: commit phase 7\n");
 
        J_ASSERT(commit_transaction->t_state == T_COMMIT);
 
index 2e24567c4a797ddf5c18c481bf53eef8bc01ad50..b26c6d9fe6aeaeadb241e5b187dcb1ac75bc8884 100644 (file)
@@ -50,7 +50,6 @@ EXPORT_SYMBOL(jbd2_journal_unlock_updates);
 EXPORT_SYMBOL(jbd2_journal_get_write_access);
 EXPORT_SYMBOL(jbd2_journal_get_create_access);
 EXPORT_SYMBOL(jbd2_journal_get_undo_access);
-EXPORT_SYMBOL(jbd2_journal_dirty_data);
 EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
 EXPORT_SYMBOL(jbd2_journal_release_buffer);
 EXPORT_SYMBOL(jbd2_journal_forget);
@@ -82,6 +81,10 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
 EXPORT_SYMBOL(jbd2_journal_invalidatepage);
 EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
 EXPORT_SYMBOL(jbd2_journal_force_commit);
+EXPORT_SYMBOL(jbd2_journal_file_inode);
+EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
+EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
+EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
@@ -2194,6 +2197,54 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
        jbd_unlock_bh_journal_head(bh);
 }
 
+/*
+ * Initialize jbd inode head
+ */
+void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
+{
+       jinode->i_transaction = NULL;
+       jinode->i_next_transaction = NULL;
+       jinode->i_vfs_inode = inode;
+       jinode->i_flags = 0;
+       INIT_LIST_HEAD(&jinode->i_list);
+}
+
+/*
+ * Function to be called before we start removing inode from memory (i.e.,
+ * clear_inode() is a fine place to be called from). It removes inode from
+ * transaction's lists.
+ */
+void jbd2_journal_release_jbd_inode(journal_t *journal,
+                                   struct jbd2_inode *jinode)
+{
+       int writeout = 0;
+
+       if (!journal)
+               return;
+restart:
+       spin_lock(&journal->j_list_lock);
+       /* Is commit writing out inode - we have to wait */
+       if (jinode->i_flags & JI_COMMIT_RUNNING) {
+               wait_queue_head_t *wq;
+               DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
+               wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
+               prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+               spin_unlock(&journal->j_list_lock);
+               schedule();
+               finish_wait(wq, &wait.wait);
+               goto restart;
+       }
+
+       /* Do we need to wait for data writeback? */
+       if (journal->j_committing_transaction == jinode->i_transaction)
+               writeout = 1;
+       if (jinode->i_transaction) {
+               list_del(&jinode->i_list);
+               jinode->i_transaction = NULL;
+       }
+       spin_unlock(&journal->j_list_lock);
+}
+
 /*
  * debugfs tunables
  */
index d6e006e67804d5facfbad041797e87586266751b..4f7cadbb19faa4396edeb4ff0be6840fc7a313af 100644 (file)
@@ -41,7 +41,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
  *     new transaction and we can't block without protecting against other
  *     processes trying to touch the journal while it is in transition.
  *
- * Called under j_state_lock
  */
 
 static transaction_t *
@@ -52,6 +51,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
        transaction->t_tid = journal->j_transaction_sequence++;
        transaction->t_expires = jiffies + journal->j_commit_interval;
        spin_lock_init(&transaction->t_handle_lock);
+       INIT_LIST_HEAD(&transaction->t_inode_list);
 
        /* Set up the commit timer for the new transaction. */
        journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
@@ -942,183 +942,6 @@ out:
        return err;
 }
 
-/**
- * int jbd2_journal_dirty_data() -  mark a buffer as containing dirty data which
- *                             needs to be flushed before we can commit the
- *                             current transaction.
- * @handle: transaction
- * @bh: bufferhead to mark
- *
- * The buffer is placed on the transaction's data list and is marked as
- * belonging to the transaction.
- *
- * Returns error number or 0 on success.
- *
- * jbd2_journal_dirty_data() can be called via page_launder->ext3_writepage
- * by kswapd.
- */
-int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
-{
-       journal_t *journal = handle->h_transaction->t_journal;
-       int need_brelse = 0;
-       struct journal_head *jh;
-
-       if (is_handle_aborted(handle))
-               return 0;
-
-       jh = jbd2_journal_add_journal_head(bh);
-       JBUFFER_TRACE(jh, "entry");
-
-       /*
-        * The buffer could *already* be dirty.  Writeout can start
-        * at any time.
-        */
-       jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid);
-
-       /*
-        * What if the buffer is already part of a running transaction?
-        *
-        * There are two cases:
-        * 1) It is part of the current running transaction.  Refile it,
-        *    just in case we have allocated it as metadata, deallocated
-        *    it, then reallocated it as data.
-        * 2) It is part of the previous, still-committing transaction.
-        *    If all we want to do is to guarantee that the buffer will be
-        *    written to disk before this new transaction commits, then
-        *    being sure that the *previous* transaction has this same
-        *    property is sufficient for us!  Just leave it on its old
-        *    transaction.
-        *
-        * In case (2), the buffer must not already exist as metadata
-        * --- that would violate write ordering (a transaction is free
-        * to write its data at any point, even before the previous
-        * committing transaction has committed).  The caller must
-        * never, ever allow this to happen: there's nothing we can do
-        * about it in this layer.
-        */
-       jbd_lock_bh_state(bh);
-       spin_lock(&journal->j_list_lock);
-
-       /* Now that we have bh_state locked, are we really still mapped? */
-       if (!buffer_mapped(bh)) {
-               JBUFFER_TRACE(jh, "unmapped buffer, bailing out");
-               goto no_journal;
-       }
-
-       if (jh->b_transaction) {
-               JBUFFER_TRACE(jh, "has transaction");
-               if (jh->b_transaction != handle->h_transaction) {
-                       JBUFFER_TRACE(jh, "belongs to older transaction");
-                       J_ASSERT_JH(jh, jh->b_transaction ==
-                                       journal->j_committing_transaction);
-
-                       /* @@@ IS THIS TRUE  ? */
-                       /*
-                        * Not any more.  Scenario: someone does a write()
-                        * in data=journal mode.  The buffer's transaction has
-                        * moved into commit.  Then someone does another
-                        * write() to the file.  We do the frozen data copyout
-                        * and set b_next_transaction to point to j_running_t.
-                        * And while we're in that state, someone does a
-                        * writepage() in an attempt to pageout the same area
-                        * of the file via a shared mapping.  At present that
-                        * calls jbd2_journal_dirty_data(), and we get right here.
-                        * It may be too late to journal the data.  Simply
-                        * falling through to the next test will suffice: the
-                        * data will be dirty and wil be checkpointed.  The
-                        * ordering comments in the next comment block still
-                        * apply.
-                        */
-                       //J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
-
-                       /*
-                        * If we're journalling data, and this buffer was
-                        * subject to a write(), it could be metadata, forget
-                        * or shadow against the committing transaction.  Now,
-                        * someone has dirtied the same darn page via a mapping
-                        * and it is being writepage()'d.
-                        * We *could* just steal the page from commit, with some
-                        * fancy locking there.  Instead, we just skip it -
-                        * don't tie the page's buffers to the new transaction
-                        * at all.
-                        * Implication: if we crash before the writepage() data
-                        * is written into the filesystem, recovery will replay
-                        * the write() data.
-                        */
-                       if (jh->b_jlist != BJ_None &&
-                                       jh->b_jlist != BJ_SyncData &&
-                                       jh->b_jlist != BJ_Locked) {
-                               JBUFFER_TRACE(jh, "Not stealing");
-                               goto no_journal;
-                       }
-
-                       /*
-                        * This buffer may be undergoing writeout in commit.  We
-                        * can't return from here and let the caller dirty it
-                        * again because that can cause the write-out loop in
-                        * commit to never terminate.
-                        */
-                       if (buffer_dirty(bh)) {
-                               get_bh(bh);
-                               spin_unlock(&journal->j_list_lock);
-                               jbd_unlock_bh_state(bh);
-                               need_brelse = 1;
-                               sync_dirty_buffer(bh);
-                               jbd_lock_bh_state(bh);
-                               spin_lock(&journal->j_list_lock);
-                               /* Since we dropped the lock... */
-                               if (!buffer_mapped(bh)) {
-                                       JBUFFER_TRACE(jh, "buffer got unmapped");
-                                       goto no_journal;
-                               }
-                               /* The buffer may become locked again at any
-                                  time if it is redirtied */
-                       }
-
-                       /* journal_clean_data_list() may have got there first */
-                       if (jh->b_transaction != NULL) {
-                               JBUFFER_TRACE(jh, "unfile from commit");
-                               __jbd2_journal_temp_unlink_buffer(jh);
-                               /* It still points to the committing
-                                * transaction; move it to this one so
-                                * that the refile assert checks are
-                                * happy. */
-                               jh->b_transaction = handle->h_transaction;
-                       }
-                       /* The buffer will be refiled below */
-
-               }
-               /*
-                * Special case --- the buffer might actually have been
-                * allocated and then immediately deallocated in the previous,
-                * committing transaction, so might still be left on that
-                * transaction's metadata lists.
-                */
-               if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
-                       JBUFFER_TRACE(jh, "not on correct data list: unfile");
-                       J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
-                       __jbd2_journal_temp_unlink_buffer(jh);
-                       jh->b_transaction = handle->h_transaction;
-                       JBUFFER_TRACE(jh, "file as data");
-                       __jbd2_journal_file_buffer(jh, handle->h_transaction,
-                                               BJ_SyncData);
-               }
-       } else {
-               JBUFFER_TRACE(jh, "not on a transaction");
-               __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_SyncData);
-       }
-no_journal:
-       spin_unlock(&journal->j_list_lock);
-       jbd_unlock_bh_state(bh);
-       if (need_brelse) {
-               BUFFER_TRACE(bh, "brelse");
-               __brelse(bh);
-       }
-       JBUFFER_TRACE(jh, "exit");
-       jbd2_journal_put_journal_head(jh);
-       return 0;
-}
-
 /**
  * int jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
  * @handle: transaction to add buffer to.
@@ -1541,10 +1364,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
  * Remove a buffer from the appropriate transaction list.
  *
  * Note that this function can *change* the value of
- * bh->b_transaction->t_sync_datalist, t_buffers, t_forget,
- * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list.  If the caller
- * is holding onto a copy of one of thee pointers, it could go bad.
- * Generally the caller needs to re-read the pointer from the transaction_t.
+ * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list,
+ * t_log_list or t_reserved_list.  If the caller is holding onto a copy of one
+ * of these pointers, it could go bad.  Generally the caller needs to re-read
+ * the pointer from the transaction_t.
  *
  * Called under j_list_lock.  The journal may not be locked.
  */
@@ -1566,9 +1389,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
        switch (jh->b_jlist) {
        case BJ_None:
                return;
-       case BJ_SyncData:
-               list = &transaction->t_sync_datalist;
-               break;
        case BJ_Metadata:
                transaction->t_nr_buffers--;
                J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
@@ -1589,9 +1409,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
        case BJ_Reserved:
                list = &transaction->t_reserved_list;
                break;
-       case BJ_Locked:
-               list = &transaction->t_locked_list;
-               break;
        }
 
        __blist_del_buffer(list, jh);
@@ -1634,15 +1451,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
                goto out;
 
        spin_lock(&journal->j_list_lock);
-       if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) {
-               if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
-                       /* A written-back ordered data buffer */
-                       JBUFFER_TRACE(jh, "release data");
-                       __jbd2_journal_unfile_buffer(jh);
-                       jbd2_journal_remove_journal_head(bh);
-                       __brelse(bh);
-               }
-       } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
+       if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
                /* written-back checkpointed metadata buffer */
                if (jh->b_jlist == BJ_None) {
                        JBUFFER_TRACE(jh, "remove from checkpoint list");
@@ -1656,12 +1465,43 @@ out:
        return;
 }
 
+/*
+ * jbd2_journal_try_to_free_buffers() could race with
+ * jbd2_journal_commit_transaction(). The later might still hold the
+ * reference count to the buffers when inspecting them on
+ * t_syncdata_list or t_locked_list.
+ *
+ * jbd2_journal_try_to_free_buffers() will call this function to
+ * wait for the current transaction to finish syncing data buffers, before
+ * try to free that buffer.
+ *
+ * Called with journal->j_state_lock hold.
+ */
+static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal)
+{
+       transaction_t *transaction;
+       tid_t tid;
+
+       spin_lock(&journal->j_state_lock);
+       transaction = journal->j_committing_transaction;
+
+       if (!transaction) {
+               spin_unlock(&journal->j_state_lock);
+               return;
+       }
+
+       tid = transaction->t_tid;
+       spin_unlock(&journal->j_state_lock);
+       jbd2_log_wait_commit(journal, tid);
+}
 
 /**
  * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
  * @journal: journal for operation
  * @page: to try and free
- * @unused_gfp_mask: unused
+ * @gfp_mask: we use the mask to detect how hard should we try to release
+ * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
+ * release the buffers.
  *
  *
  * For all the buffers on this page,
@@ -1690,9 +1530,11 @@ out:
  * journal_try_to_free_buffer() is changing its state.  But that
  * cannot happen because we never reallocate freed data as metadata
  * while the data is part of a transaction.  Yes?
+ *
+ * Return 0 on failure, 1 on success
  */
 int jbd2_journal_try_to_free_buffers(journal_t *journal,
-                               struct page *page, gfp_t unused_gfp_mask)
+                               struct page *page, gfp_t gfp_mask)
 {
        struct buffer_head *head;
        struct buffer_head *bh;
@@ -1708,7 +1550,8 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
                /*
                 * We take our own ref against the journal_head here to avoid
                 * having to add tons of locking around each instance of
-                * jbd2_journal_remove_journal_head() and jbd2_journal_put_journal_head().
+                * jbd2_journal_remove_journal_head() and
+                * jbd2_journal_put_journal_head().
                 */
                jh = jbd2_journal_grab_journal_head(bh);
                if (!jh)
@@ -1721,7 +1564,28 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
                if (buffer_jbd(bh))
                        goto busy;
        } while ((bh = bh->b_this_page) != head);
+
        ret = try_to_free_buffers(page);
+
+       /*
+        * There are a number of places where jbd2_journal_try_to_free_buffers()
+        * could race with jbd2_journal_commit_transaction(), the later still
+        * holds the reference to the buffers to free while processing them.
+        * try_to_free_buffers() failed to free those buffers. Some of the
+        * caller of releasepage() request page buffers to be dropped, otherwise
+        * treat the fail-to-free as errors (such as generic_file_direct_IO())
+        *
+        * So, if the caller of try_to_release_page() wants the synchronous
+        * behaviour(i.e make sure buffers are dropped upon return),
+        * let's wait for the current transaction to finish flush of
+        * dirty data buffers, then try to free those buffers again,
+        * with the journal locked.
+        */
+       if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
+               jbd2_journal_wait_for_transaction_sync_data(journal);
+               ret = try_to_free_buffers(page);
+       }
+
 busy:
        return ret;
 }
@@ -1823,6 +1687,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
        if (!buffer_jbd(bh))
                goto zap_buffer_unlocked;
 
+       /* OK, we have data buffer in journaled mode */
        spin_lock(&journal->j_state_lock);
        jbd_lock_bh_state(bh);
        spin_lock(&journal->j_list_lock);
@@ -1886,15 +1751,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                }
        } else if (transaction == journal->j_committing_transaction) {
                JBUFFER_TRACE(jh, "on committing transaction");
-               if (jh->b_jlist == BJ_Locked) {
-                       /*
-                        * The buffer is on the committing transaction's locked
-                        * list.  We have the buffer locked, so I/O has
-                        * completed.  So we can nail the buffer now.
-                        */
-                       may_free = __dispose_buffer(jh, transaction);
-                       goto zap_buffer;
-               }
                /*
                 * If it is committing, we simply cannot touch it.  We
                 * can remove it's next_transaction pointer from the
@@ -2027,9 +1883,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
                J_ASSERT_JH(jh, !jh->b_committed_data);
                J_ASSERT_JH(jh, !jh->b_frozen_data);
                return;
-       case BJ_SyncData:
-               list = &transaction->t_sync_datalist;
-               break;
        case BJ_Metadata:
                transaction->t_nr_buffers++;
                list = &transaction->t_buffers;
@@ -2049,9 +1902,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
        case BJ_Reserved:
                list = &transaction->t_reserved_list;
                break;
-       case BJ_Locked:
-               list =  &transaction->t_locked_list;
-               break;
        }
 
        __blist_add_buffer(list, jh);
@@ -2141,3 +1991,88 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
        spin_unlock(&journal->j_list_lock);
        __brelse(bh);
 }
+
+/*
+ * File inode in the inode list of the handle's transaction
+ */
+int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
+{
+       transaction_t *transaction = handle->h_transaction;
+       journal_t *journal = transaction->t_journal;
+
+       if (is_handle_aborted(handle))
+               return -EIO;
+
+       jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
+                       transaction->t_tid);
+
+       /*
+        * First check whether inode isn't already on the transaction's
+        * lists without taking the lock. Note that this check is safe
+        * without the lock as we cannot race with somebody removing inode
+        * from the transaction. The reason is that we remove inode from the
+        * transaction only in journal_release_jbd_inode() and when we commit
+        * the transaction. We are guarded from the first case by holding
+        * a reference to the inode. We are safe against the second case
+        * because if jinode->i_transaction == transaction, commit code
+        * cannot touch the transaction because we hold reference to it,
+        * and if jinode->i_next_transaction == transaction, commit code
+        * will only file the inode where we want it.
+        */
+       if (jinode->i_transaction == transaction ||
+           jinode->i_next_transaction == transaction)
+               return 0;
+
+       spin_lock(&journal->j_list_lock);
+
+       if (jinode->i_transaction == transaction ||
+           jinode->i_next_transaction == transaction)
+               goto done;
+
+       /* On some different transaction's list - should be
+        * the committing one */
+       if (jinode->i_transaction) {
+               J_ASSERT(jinode->i_next_transaction == NULL);
+               J_ASSERT(jinode->i_transaction ==
+                                       journal->j_committing_transaction);
+               jinode->i_next_transaction = transaction;
+               goto done;
+       }
+       /* Not on any transaction list... */
+       J_ASSERT(!jinode->i_next_transaction);
+       jinode->i_transaction = transaction;
+       list_add(&jinode->i_list, &transaction->t_inode_list);
+done:
+       spin_unlock(&journal->j_list_lock);
+
+       return 0;
+}
+
+/*
+ * This function must be called when inode is journaled in ordered mode
+ * before truncation happens. It starts writeout of truncated part in
+ * case it is in the committing transaction so that we stand to ordered
+ * mode consistency guarantees.
+ */
+int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
+                                       loff_t new_size)
+{
+       journal_t *journal;
+       transaction_t *commit_trans;
+       int ret = 0;
+
+       if (!inode->i_transaction && !inode->i_next_transaction)
+               goto out;
+       journal = inode->i_transaction->t_journal;
+       spin_lock(&journal->j_state_lock);
+       commit_trans = journal->j_committing_transaction;
+       spin_unlock(&journal->j_state_lock);
+       if (inode->i_transaction == commit_trans) {
+               ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping,
+                       new_size, LLONG_MAX);
+               if (ret)
+                       jbd2_journal_abort(journal, ret);
+       }
+out:
+       return ret;
+}
index bf6ab19b86ee1e2766dad7165800c70897dde984..6a73de84bcef142350ad60f3b3fefe2b172de043 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/ctype.h>
 #include <linux/module.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <asm/uaccess.h>
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
 
 static struct proc_dir_entry *base;
 #ifdef CONFIG_JFS_DEBUG
-static int loglevel_read(char *page, char **start, off_t off,
-                        int count, int *eof, void *data)
+static int jfs_loglevel_proc_show(struct seq_file *m, void *v)
 {
-       int len;
-
-       len = sprintf(page, "%d\n", jfsloglevel);
-
-       len -= off;
-       *start = page + off;
-
-       if (len > count)
-               len = count;
-       else
-               *eof = 1;
-
-       if (len < 0)
-               len = 0;
+       seq_printf(m, "%d\n", jfsloglevel);
+       return 0;
+}
 
-       return len;
+static int jfs_loglevel_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, jfs_loglevel_proc_show, NULL);
 }
 
-static int loglevel_write(struct file *file, const char __user *buffer,
-                       unsigned long count, void *data)
+static ssize_t jfs_loglevel_proc_write(struct file *file,
+               const char __user *buffer, size_t count, loff_t *ppos)
 {
        char c;
 
@@ -65,22 +56,30 @@ static int loglevel_write(struct file *file, const char __user *buffer,
        jfsloglevel = c - '0';
        return count;
 }
+
+static const struct file_operations jfs_loglevel_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = jfs_loglevel_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+       .write          = jfs_loglevel_proc_write,
+};
 #endif
 
 static struct {
        const char      *name;
-       read_proc_t     *read_fn;
-       write_proc_t    *write_fn;
+       const struct file_operations *proc_fops;
 } Entries[] = {
 #ifdef CONFIG_JFS_STATISTICS
-       { "lmstats",    jfs_lmstats_read, },
-       { "txstats",    jfs_txstats_read, },
-       { "xtstat",     jfs_xtstat_read, },
-       { "mpstat",     jfs_mpstat_read, },
+       { "lmstats",    &jfs_lmstats_proc_fops, },
+       { "txstats",    &jfs_txstats_proc_fops, },
+       { "xtstat",     &jfs_xtstat_proc_fops, },
+       { "mpstat",     &jfs_mpstat_proc_fops, },
 #endif
 #ifdef CONFIG_JFS_DEBUG
-       { "TxAnchor",   jfs_txanchor_read, },
-       { "loglevel",   loglevel_read, loglevel_write }
+       { "TxAnchor",   &jfs_txanchor_proc_fops, },
+       { "loglevel",   &jfs_loglevel_proc_fops }
 #endif
 };
 #define NPROCENT       ARRAY_SIZE(Entries)
@@ -93,13 +92,8 @@ void jfs_proc_init(void)
                return;
        base->owner = THIS_MODULE;
 
-       for (i = 0; i < NPROCENT; i++) {
-               struct proc_dir_entry *p;
-               if ((p = create_proc_entry(Entries[i].name, 0, base))) {
-                       p->read_proc = Entries[i].read_fn;
-                       p->write_proc = Entries[i].write_fn;
-               }
-       }
+       for (i = 0; i < NPROCENT; i++)
+               proc_create(Entries[i].name, 0, base, Entries[i].proc_fops);
 }
 
 void jfs_proc_clean(void)
index 044c1e654cc00e4f1790bd16e67ecc053944a213..eafd1300a00b2a9076ca135d8b2a21783b2ba126 100644 (file)
@@ -62,7 +62,7 @@ extern void jfs_proc_clean(void);
 
 extern int jfsloglevel;
 
-extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
+extern const struct file_operations jfs_txanchor_proc_fops;
 
 /* information message: e.g., configuration, major event */
 #define jfs_info(fmt, arg...) do {                     \
@@ -105,10 +105,10 @@ extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
  *     ----------
  */
 #ifdef CONFIG_JFS_STATISTICS
-extern int jfs_lmstats_read(char *, char **, off_t, int, int *, void *);
-extern int jfs_txstats_read(char *, char **, off_t, int, int *, void *);
-extern int jfs_mpstat_read(char *, char **, off_t, int, int *, void *);
-extern int jfs_xtstat_read(char *, char **, off_t, int, int *, void *);
+extern const struct file_operations jfs_lmstats_proc_fops;
+extern const struct file_operations jfs_txstats_proc_fops;
+extern const struct file_operations jfs_mpstat_proc_fops;
+extern const struct file_operations jfs_xtstat_proc_fops;
 
 #define        INCREMENT(x)            ((x)++)
 #define        DECREMENT(x)            ((x)--)
index cdac2d5bafeb3547a4ca51831bfd134b1f03ebd1..2545bb317235d2c6423c6cefa97afd73ed763107 100644 (file)
@@ -243,9 +243,6 @@ typedef union {
 #define JFS_REMOVE 3
 #define JFS_RENAME 4
 
-#define DIRENTSIZ(namlen) \
-    ( (sizeof(struct dirent) - 2*(JFS_NAME_MAX+1) + 2*((namlen)+1) + 3) &~ 3 )
-
 /*
  * Maximum file offset for directories.
  */
index 734ec916beafb1c1306cd5994c7858f92f5b4190..d6363d8309d0c5e53370c073e5e620a250d2567a 100644 (file)
@@ -1520,7 +1520,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
                                        jfs_error(ip->i_sb,
                                                  "diAlloc: can't find free bit "
                                                  "in wmap");
-                                       return EIO;
+                                       return -EIO;
                                }
 
                                /* determine the inode number within the
index 325a9679b95a7f8e1e30cb9bbb255cbfc222be0e..cd2ec2988b59ab05304a5e2c864ee9aa2e3baa69 100644 (file)
@@ -69,6 +69,7 @@
 #include <linux/freezer.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
+#include <linux/seq_file.h>
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
 #include "jfs_metapage.h"
@@ -2503,13 +2504,9 @@ exit:
 }
 
 #ifdef CONFIG_JFS_STATISTICS
-int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length,
-                     int *eof, void *data)
+static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
 {
-       int len = 0;
-       off_t begin;
-
-       len += sprintf(buffer,
+       seq_printf(m,
                       "JFS Logmgr stats\n"
                       "================\n"
                       "commits = %d\n"
@@ -2522,19 +2519,19 @@ int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length,
                       lmStat.pagedone,
                       lmStat.full_page,
                       lmStat.partial_page);
+       return 0;
+}
 
-       begin = offset;
-       *start = buffer + begin;
-       len -= begin;
-
-       if (len > length)
-               len = length;
-       else
-               *eof = 1;
-
-       if (len < 0)
-               len = 0;
-
-       return len;
+static int jfs_lmstats_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, jfs_lmstats_proc_show, NULL);
 }
+
+const struct file_operations jfs_lmstats_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = jfs_lmstats_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
 #endif /* CONFIG_JFS_STATISTICS */
index d1e64f2f2fcd1b434b4c07434bb19b82e6b189f9..854ff0ec574f8805a71c09152bf60f9e8bd66053 100644 (file)
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/bio.h>
 #include <linux/init.h>
 #include <linux/buffer_head.h>
 #include <linux/mempool.h>
+#include <linux/seq_file.h>
 #include "jfs_incore.h"
 #include "jfs_superblock.h"
 #include "jfs_filsys.h"
@@ -804,13 +806,9 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len)
 }
 
 #ifdef CONFIG_JFS_STATISTICS
-int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
-                   int *eof, void *data)
+static int jfs_mpstat_proc_show(struct seq_file *m, void *v)
 {
-       int len = 0;
-       off_t begin;
-
-       len += sprintf(buffer,
+       seq_printf(m,
                       "JFS Metapage statistics\n"
                       "=======================\n"
                       "page allocations = %d\n"
@@ -819,19 +817,19 @@ int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
                       mpStat.pagealloc,
                       mpStat.pagefree,
                       mpStat.lockwait);
+       return 0;
+}
 
-       begin = offset;
-       *start = buffer + begin;
-       len -= begin;
-
-       if (len > length)
-               len = length;
-       else
-               *eof = 1;
-
-       if (len < 0)
-               len = 0;
-
-       return len;
+static int jfs_mpstat_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, jfs_mpstat_proc_show, NULL);
 }
+
+const struct file_operations jfs_mpstat_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = jfs_mpstat_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
 #endif
index e7c60ae6b5b21c92f724220d6c6fdf39456425af..f26e4d03ada50aabc2e9870235cf297a838a1486 100644 (file)
@@ -49,6 +49,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/kthread.h>
+#include <linux/seq_file.h>
 #include "jfs_incore.h"
 #include "jfs_inode.h"
 #include "jfs_filsys.h"
@@ -3009,11 +3010,8 @@ int jfs_sync(void *arg)
 }
 
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
-int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length,
-                     int *eof, void *data)
+static int jfs_txanchor_proc_show(struct seq_file *m, void *v)
 {
-       int len = 0;
-       off_t begin;
        char *freewait;
        char *freelockwait;
        char *lowlockwait;
@@ -3025,7 +3023,7 @@ int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length,
        lowlockwait =
            waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
 
-       len += sprintf(buffer,
+       seq_printf(m,
                       "JFS TxAnchor\n"
                       "============\n"
                       "freetid = %d\n"
@@ -3044,31 +3042,27 @@ int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length,
                       TxAnchor.tlocksInUse,
                       jfs_tlocks_low,
                       list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
+       return 0;
+}
 
-       begin = offset;
-       *start = buffer + begin;
-       len -= begin;
-
-       if (len > length)
-               len = length;
-       else
-               *eof = 1;
-
-       if (len < 0)
-               len = 0;
-
-       return len;
+static int jfs_txanchor_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, jfs_txanchor_proc_show, NULL);
 }
+
+const struct file_operations jfs_txanchor_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = jfs_txanchor_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
 #endif
 
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
-int jfs_txstats_read(char *buffer, char **start, off_t offset, int length,
-                    int *eof, void *data)
+static int jfs_txstats_proc_show(struct seq_file *m, void *v)
 {
-       int len = 0;
-       off_t begin;
-
-       len += sprintf(buffer,
+       seq_printf(m,
                       "JFS TxStats\n"
                       "===========\n"
                       "calls to txBegin = %d\n"
@@ -3089,19 +3083,19 @@ int jfs_txstats_read(char *buffer, char **start, off_t offset, int length,
                       TxStat.txBeginAnon_lockslow,
                       TxStat.txLockAlloc,
                       TxStat.txLockAlloc_freelock);
+       return 0;
+}
 
-       begin = offset;
-       *start = buffer + begin;
-       len -= begin;
-
-       if (len > length)
-               len = length;
-       else
-               *eof = 1;
-
-       if (len < 0)
-               len = 0;
-
-       return len;
+static int jfs_txstats_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, jfs_txstats_proc_show, NULL);
 }
+
+const struct file_operations jfs_txstats_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = jfs_txstats_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
 #endif
index 5a61ebf2cbccdd96035fe8efb4e653a80b931e13..ae3acafb447b2bad3c4346bb9194b6d549dcd694 100644 (file)
@@ -20,7 +20,9 @@
  */
 
 #include <linux/fs.h>
+#include <linux/module.h>
 #include <linux/quotaops.h>
+#include <linux/seq_file.h>
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
 #include "jfs_metapage.h"
@@ -4134,13 +4136,9 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
 }
 
 #ifdef CONFIG_JFS_STATISTICS
-int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length,
-                   int *eof, void *data)
+static int jfs_xtstat_proc_show(struct seq_file *m, void *v)
 {
-       int len = 0;
-       off_t begin;
-
-       len += sprintf(buffer,
+       seq_printf(m,
                       "JFS Xtree statistics\n"
                       "====================\n"
                       "searches = %d\n"
@@ -4149,19 +4147,19 @@ int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length,
                       xtStat.search,
                       xtStat.fastSearch,
                       xtStat.split);
+       return 0;
+}
 
-       begin = offset;
-       *start = buffer + begin;
-       len -= begin;
-
-       if (len > length)
-               len = length;
-       else
-               *eof = 1;
-
-       if (len < 0)
-               len = 0;
-
-       return len;
+static int jfs_xtstat_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, jfs_xtstat_proc_show, NULL);
 }
+
+const struct file_operations jfs_xtstat_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = jfs_xtstat_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
 #endif
index 0ba6778edaa2ae73d9595d8f425c7d3e98dce7a7..2aba82386810591560890e91b02c8f62a4959bd3 100644 (file)
@@ -1455,7 +1455,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
                free_UCSname(&key);
                if (rc == -ENOENT) {
                        d_add(dentry, NULL);
-                       return ERR_PTR(0);
+                       return NULL;
                } else if (rc) {
                        jfs_err("jfs_lookup: dtSearch returned %d", rc);
                        return ERR_PTR(rc);
index 50ea65451732edf196c7750c2e1f0ee60b547bba..0288e6d7936a200e51b585914cf5ae76847b91a3 100644 (file)
@@ -499,7 +499,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
        inode = jfs_iget(sb, ROOT_I);
        if (IS_ERR(inode)) {
                ret = PTR_ERR(inode);
-               goto out_no_root;
+               goto out_no_rw;
        }
        sb->s_root = d_alloc_root(inode);
        if (!sb->s_root)
@@ -521,9 +521,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
        return 0;
 
 out_no_root:
-       jfs_err("jfs_read_super: get root inode failed");
-       if (inode)
-               iput(inode);
+       jfs_err("jfs_read_super: get root dentry failed");
+       iput(inode);
 
 out_no_rw:
        rc = jfs_umount(sb);
index 235e4d3873a88d17837f1a9551f09664c8ffcb54..dbcc7af76a15d483a5efc5c1a68aa11566072623 100644 (file)
@@ -82,7 +82,7 @@ static void mpage_end_io_write(struct bio *bio, int err)
        bio_put(bio);
 }
 
-static struct bio *mpage_bio_submit(int rw, struct bio *bio)
+struct bio *mpage_bio_submit(int rw, struct bio *bio)
 {
        bio->bi_end_io = mpage_end_io_read;
        if (rw == WRITE)
@@ -90,6 +90,7 @@ static struct bio *mpage_bio_submit(int rw, struct bio *bio)
        submit_bio(rw, bio);
        return NULL;
 }
+EXPORT_SYMBOL(mpage_bio_submit);
 
 static struct bio *
 mpage_alloc(struct block_device *bdev,
@@ -435,15 +436,9 @@ EXPORT_SYMBOL(mpage_readpage);
  * written, so it can intelligently allocate a suitably-sized BIO.  For now,
  * just allocate full-size (16-page) BIOs.
  */
-struct mpage_data {
-       struct bio *bio;
-       sector_t last_block_in_bio;
-       get_block_t *get_block;
-       unsigned use_writepage;
-};
 
-static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
-                            void *data)
+int __mpage_writepage(struct page *page, struct writeback_control *wbc,
+                     void *data)
 {
        struct mpage_data *mpd = data;
        struct bio *bio = mpd->bio;
@@ -651,6 +646,7 @@ out:
        mpd->bio = bio;
        return ret;
 }
+EXPORT_SYMBOL(__mpage_writepage);
 
 /**
  * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
index fd4a6a0393acb9171aea41ff18301005d00354f9..738bb9fb3e53c86ddb40119158b1b88307a5ab35 100644 (file)
 
 /* Due to the structure of pre-exisiting code, don't use assembler line
    comment character # to ignore the arguments. Instead, use a dummy macro. */
-.macro __cfi_ignore a=0, b=0, c=0, d=0
+.macro cfi_ignore a=0, b=0, c=0, d=0
 .endm
 
-#define CFI_STARTPROC  __cfi_ignore
-#define CFI_ENDPROC    __cfi_ignore
-#define CFI_DEF_CFA    __cfi_ignore
-#define CFI_DEF_CFA_REGISTER   __cfi_ignore
-#define CFI_DEF_CFA_OFFSET     __cfi_ignore
-#define CFI_ADJUST_CFA_OFFSET  __cfi_ignore
-#define CFI_OFFSET     __cfi_ignore
-#define CFI_REL_OFFSET __cfi_ignore
-#define CFI_REGISTER   __cfi_ignore
-#define CFI_RESTORE    __cfi_ignore
-#define CFI_REMEMBER_STATE __cfi_ignore
-#define CFI_RESTORE_STATE __cfi_ignore
-#define CFI_UNDEFINED __cfi_ignore
-#define CFI_SIGNAL_FRAME __cfi_ignore
+#define CFI_STARTPROC  cfi_ignore
+#define CFI_ENDPROC    cfi_ignore
+#define CFI_DEF_CFA    cfi_ignore
+#define CFI_DEF_CFA_REGISTER   cfi_ignore
+#define CFI_DEF_CFA_OFFSET     cfi_ignore
+#define CFI_ADJUST_CFA_OFFSET  cfi_ignore
+#define CFI_OFFSET     cfi_ignore
+#define CFI_REL_OFFSET cfi_ignore
+#define CFI_REGISTER   cfi_ignore
+#define CFI_RESTORE    cfi_ignore
+#define CFI_REMEMBER_STATE cfi_ignore
+#define CFI_RESTORE_STATE cfi_ignore
+#define CFI_UNDEFINED cfi_ignore
+#define CFI_SIGNAL_FRAME cfi_ignore
 
 #endif
 
index 0764b662b33936a396dea2c50f537ffea5ce31c5..1c1b13e29223a6e170372f4911e91af3a2021527 100644 (file)
@@ -1089,6 +1089,7 @@ extern int drm_mm_remove_space_from_tail(struct drm_mm *mm, unsigned long size);
 extern int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size);
 
 extern void drm_core_ioremap(struct drm_map *map, struct drm_device *dev);
+extern void drm_core_ioremap_wc(struct drm_map *map, struct drm_device *dev);
 extern void drm_core_ioremapfree(struct drm_map *map, struct drm_device *dev);
 
 static __inline__ struct drm_map *drm_core_findmap(struct drm_device *dev,
index 1ffd8bfdc4c94956fa14bb8e6c880d79e0953918..32a441b05fd5fbedf33748e7151cec38ea27dd90 100644 (file)
@@ -651,7 +651,6 @@ extern void generic_make_request(struct bio *bio);
 extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
-extern void blk_end_sync_rq(struct request *rq, int error);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
index faac13e2cc5c75b0afe4c940cc54151d11b614d3..52e510a0aec2715baa3bacef354cff909f4e79ae 100644 (file)
@@ -1740,6 +1740,8 @@ extern int wait_on_page_writeback_range(struct address_space *mapping,
                                pgoff_t start, pgoff_t end);
 extern int __filemap_fdatawrite_range(struct address_space *mapping,
                                loff_t start, loff_t end, int sync_mode);
+extern int filemap_fdatawrite_range(struct address_space *mapping,
+                               loff_t start, loff_t end);
 
 extern long do_fsync(struct file *file, int datasync);
 extern void sync_supers(void);
index 77afbb60fd1185fb76699eacd9a8ba6fd5f7f919..0177d280f733df0837f7635b6413b2ff0a23202a 100644 (file)
@@ -33,9 +33,11 @@ struct i2c_algo_pcf_data {
        int  (*getclock) (void *data);
        void (*waitforpin) (void);
 
-       /* local settings */
-       int udelay;
-       int timeout;
+       /* Multi-master lost arbitration back-off delay (msecs)
+        * This should be set by the bus adapter or knowledgable client
+        * if bus is multi-mastered, else zero
+        */
+       unsigned long lab_mdelay;
 };
 
 int i2c_pcf_add_bus(struct i2c_adapter *);
index 580acc93903e63795a23c1efee2dddbc23cab7b0..ef13b7c66df32bd5fdbf2c5aa2ee1e0d8fb17a2e 100644 (file)
@@ -91,8 +91,6 @@
 #define I2C_DRIVERID_M52790    95      /* Mitsubishi M52790SP/FP AV switch */
 #define I2C_DRIVERID_CS5345    96      /* cs5345 audio processor       */
 
-#define I2C_DRIVERID_I2CDEV    900
-
 #define I2C_DRIVERID_OV7670 1048       /* Omnivision 7670 camera */
 
 /*
 #define I2C_HW_B_RIVA          0x010010 /* Riva based graphics cards */
 #define I2C_HW_B_IOC           0x010011 /* IOC bit-wiggling */
 #define I2C_HW_B_IXP2000       0x010016 /* GPIO on IXP2000 systems */
-#define I2C_HW_B_S3VIA         0x010018 /* S3Via ProSavage adapter */
 #define I2C_HW_B_ZR36067       0x010019 /* Zoran-36057/36067 based boards */
 #define I2C_HW_B_PCILYNX       0x01001a /* TI PCILynx I2C adapter */
 #define I2C_HW_B_CX2388x       0x01001b /* connexant 2388x based tv cards */
index 8dc7301321926f6f4a6e9454dcd50d9aa9d1722f..08be0d21864c6fcd81dd667baee289b81edf20e5 100644 (file)
@@ -35,6 +35,8 @@
 #include <linux/sched.h>       /* for completion */
 #include <linux/mutex.h>
 
+extern struct bus_type i2c_bus_type;
+
 /* --- General options ------------------------------------------------        */
 
 struct i2c_msg;
@@ -43,6 +45,7 @@ struct i2c_adapter;
 struct i2c_client;
 struct i2c_driver;
 union i2c_smbus_data;
+struct i2c_board_info;
 
 /*
  * The master routines are the ones normally used to transmit data to devices
@@ -69,9 +72,8 @@ extern s32 i2c_smbus_xfer (struct i2c_adapter * adapter, u16 addr,
                            union i2c_smbus_data * data);
 
 /* Now follow the 'nice' access routines. These also document the calling
-   conventions of smbus_access. */
+   conventions of i2c_smbus_xfer. */
 
-extern s32 i2c_smbus_write_quick(struct i2c_client * client, u8 value);
 extern s32 i2c_smbus_read_byte(struct i2c_client * client);
 extern s32 i2c_smbus_write_byte(struct i2c_client * client, u8 value);
 extern s32 i2c_smbus_read_byte_data(struct i2c_client * client, u8 command);
@@ -93,15 +95,33 @@ extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client * client,
                                          u8 command, u8 length,
                                          const u8 *values);
 
-/*
- * A driver is capable of handling one or more physical devices present on
- * I2C adapters. This information is used to inform the driver of adapter
- * events.
+/**
+ * struct i2c_driver - represent an I2C device driver
+ * @class: What kind of i2c device we instantiate (for detect)
+ * @detect: Callback for device detection
+ * @address_data: The I2C addresses to probe, ignore or force (for detect)
+ * @clients: List of detected clients we created (for i2c-core use only)
  *
  * The driver.owner field should be set to the module owner of this driver.
  * The driver.name field should be set to the name of this driver.
+ *
+ * For automatic device detection, both @detect and @address_data must
+ * be defined. @class should also be set, otherwise only devices forced
+ * with module parameters will be created. The detect function must
+ * fill at least the name field of the i2c_board_info structure it is
+ * handed upon successful detection, and possibly also the flags field.
+ *
+ * If @detect is missing, the driver will still work fine for enumerated
+ * devices. Detected devices simply won't be supported. This is expected
+ * for the many I2C/SMBus devices which can't be detected reliably, and
+ * the ones which can always be enumerated in practice.
+ *
+ * The i2c_client structure which is handed to the @detect callback is
+ * not a real i2c_client. It is initialized just enough so that you can
+ * call i2c_smbus_read_byte_data and friends on it. Don't do anything
+ * else with it. In particular, calling dev_dbg and friends on it is
+ * not allowed.
  */
-
 struct i2c_driver {
        int id;
        unsigned int class;
@@ -141,6 +161,11 @@ struct i2c_driver {
 
        struct device_driver driver;
        const struct i2c_device_id *id_table;
+
+       /* Device detection callback for automatic device creation */
+       int (*detect)(struct i2c_client *, int kind, struct i2c_board_info *);
+       const struct i2c_client_address_data *address_data;
+       struct list_head clients;
 };
 #define to_i2c_driver(d) container_of(d, struct i2c_driver, driver)
 
@@ -156,6 +181,7 @@ struct i2c_driver {
  * @dev: Driver model device node for the slave.
  * @irq: indicates the IRQ generated by this device (if any)
  * @list: list of active/busy clients (DEPRECATED)
+ * @detected: member of an i2c_driver.clients list
  * @released: used to synchronize client releases & detaches and references
  *
  * An i2c_client identifies a single device (i.e. chip) connected to an
@@ -173,6 +199,7 @@ struct i2c_client {
        struct device dev;              /* the device structure         */
        int irq;                        /* irq issued by device         */
        struct list_head list;          /* DEPRECATED */
+       struct list_head detected;
        struct completion released;
 };
 #define to_i2c_client(d) container_of(d, struct i2c_client, dev)
@@ -350,10 +377,11 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data)
 #define I2C_CLASS_HWMON                (1<<0)  /* lm_sensors, ... */
 #define I2C_CLASS_TV_ANALOG    (1<<1)  /* bttv + friends */
 #define I2C_CLASS_TV_DIGITAL   (1<<2)  /* dvb cards */
-#define I2C_CLASS_DDC          (1<<3)  /* i2c-matroxfb ? */
+#define I2C_CLASS_DDC          (1<<3)  /* DDC bus on graphics adapters */
 #define I2C_CLASS_CAM_ANALOG   (1<<4)  /* camera with analog CCD */
 #define I2C_CLASS_CAM_DIGITAL  (1<<5)  /* most webcams */
 #define I2C_CLASS_SOUND                (1<<6)  /* sound devices */
+#define I2C_CLASS_SPD          (1<<7)  /* SPD EEPROMs and similar */
 #define I2C_CLASS_ALL          (UINT_MAX) /* all of the above */
 
 /* i2c_client_address_data is the struct for holding default client
@@ -537,7 +565,7 @@ union i2c_smbus_data {
                               /* and one more for user-space compatibility */
 };
 
-/* smbus_access read or write markers */
+/* i2c_smbus_xfer read or write markers */
 #define I2C_SMBUS_READ 1
 #define I2C_SMBUS_WRITE        0
 
diff --git a/include/linux/i2c/at24.h b/include/linux/i2c/at24.h
new file mode 100644 (file)
index 0000000..f6edd52
--- /dev/null
@@ -0,0 +1,28 @@
+#ifndef _LINUX_AT24_H
+#define _LINUX_AT24_H
+
+#include <linux/types.h>
+
+/*
+ * As seen through Linux I2C, differences between the most common types of I2C
+ * memory include:
+ * - How much memory is available (usually specified in bit)?
+ * - What write page size does it support?
+ * - Special flags (16 bit addresses, read_only, world readable...)?
+ *
+ * If you set up a custom eeprom type, please double-check the parameters.
+ * Especially page_size needs extra care, as you risk data loss if your value
+ * is bigger than what the chip actually supports!
+ */
+
+struct at24_platform_data {
+       u32             byte_len;               /* size (sum of all addr) */
+       u16             page_size;              /* for writes */
+       u8              flags;
+#define AT24_FLAG_ADDR16       0x80    /* address pointer is 16 bit */
+#define AT24_FLAG_READONLY     0x40    /* sysfs-entry will be read-only */
+#define AT24_FLAG_IRUGO                0x20    /* sysfs-entry will be world-readable */
+#define AT24_FLAG_TAKE8ADDR    0x10    /* take always 8 addresses (24c00) */
+};
+
+#endif /* _LINUX_AT24_H */
index eddb6daadf4abceaffec3f9a0b36603e2e97f4cb..ac4eeb2932efb03a61484d1badbfd34620957a28 100644 (file)
@@ -364,7 +364,6 @@ typedef struct ide_drive_s {
         u8     wcache;         /* status of write cache */
        u8      acoustic;       /* acoustic management */
        u8      media;          /* disk, cdrom, tape, floppy, ... */
-       u8      ctl;            /* "normal" value for Control register */
        u8      ready_stat;     /* min status value for drive ready */
        u8      mult_count;     /* current multiple sector setting */
        u8      mult_req;       /* requested multiple sector setting */
@@ -493,7 +492,7 @@ typedef struct hwif_s {
        void (*ide_dma_clear_irq)(ide_drive_t *drive);
 
        void (*OUTB)(u8 addr, unsigned long port);
-       void (*OUTBSYNC)(ide_drive_t *drive, u8 addr, unsigned long port);
+       void (*OUTBSYNC)(struct hwif_s *hwif, u8 addr, unsigned long port);
 
        u8  (*INB)(unsigned long port);
 
@@ -532,7 +531,6 @@ typedef struct hwif_s {
        unsigned        serialized : 1; /* serialized all channel operation */
        unsigned        sharing_irq: 1; /* 1 = sharing irq with another hwif */
        unsigned        sg_mapped  : 1; /* sg_table and sg_nents are ready */
-       unsigned        mmio       : 1; /* host uses MMIO */
 
        struct device           gendev;
        struct device           *portdev;
@@ -604,12 +602,13 @@ enum {
        PC_FLAG_SUPPRESS_ERROR          = (1 << 1),
        PC_FLAG_WAIT_FOR_DSC            = (1 << 2),
        PC_FLAG_DMA_OK                  = (1 << 3),
-       PC_FLAG_DMA_RECOMMENDED         = (1 << 4),
-       PC_FLAG_DMA_IN_PROGRESS         = (1 << 5),
-       PC_FLAG_DMA_ERROR               = (1 << 6),
-       PC_FLAG_WRITING                 = (1 << 7),
+       PC_FLAG_DMA_IN_PROGRESS         = (1 << 4),
+       PC_FLAG_DMA_ERROR               = (1 << 5),
+       PC_FLAG_WRITING                 = (1 << 6),
        /* command timed out */
-       PC_FLAG_TIMEDOUT                = (1 << 8),
+       PC_FLAG_TIMEDOUT                = (1 << 7),
+       PC_FLAG_ZIP_DRIVE               = (1 << 8),
+       PC_FLAG_DRQ_INTERRUPT           = (1 << 9),
 };
 
 struct ide_atapi_pc {
@@ -642,8 +641,8 @@ struct ide_atapi_pc {
         * to change/removal later.
         */
        u8 pc_buf[256];
-       void (*idefloppy_callback) (ide_drive_t *);
-       ide_startstop_t (*idetape_callback) (ide_drive_t *);
+
+       void (*callback)(ide_drive_t *);
 
        /* idetape only */
        struct idetape_bh *bh;
@@ -813,10 +812,6 @@ int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsig
 #ifndef _IDE_C
 extern ide_hwif_t      ide_hwifs[];            /* master data repository */
 #endif
-extern int ide_noacpi;
-extern int ide_acpigtf;
-extern int ide_acpionboot;
-extern int noautodma;
 
 extern int ide_vlb_clk;
 extern int ide_pci_clk;
@@ -857,25 +852,12 @@ int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long);
 
 extern ide_startstop_t ide_do_reset (ide_drive_t *);
 
-extern void ide_init_drive_cmd (struct request *rq);
-
-/*
- * "action" parameter type for ide_do_drive_cmd() below.
- */
-typedef enum {
-       ide_wait,       /* insert rq at end of list, and wait for it */
-       ide_preempt,    /* insert rq in front of current request */
-       ide_head_wait,  /* insert rq in front of current request and wait for it */
-       ide_end         /* insert rq at end of list, but don't wait for it */
-} ide_action_t;
-
-extern int ide_do_drive_cmd(ide_drive_t *, struct request *, ide_action_t);
+extern void ide_do_drive_cmd(ide_drive_t *, struct request *);
 
 extern void ide_end_drive_cmd(ide_drive_t *, u8, u8);
 
 enum {
        IDE_TFLAG_LBA48                 = (1 << 0),
-       IDE_TFLAG_NO_SELECT_MASK        = (1 << 1),
        IDE_TFLAG_FLAGGED               = (1 << 2),
        IDE_TFLAG_OUT_DATA              = (1 << 3),
        IDE_TFLAG_OUT_HOB_FEATURE       = (1 << 4),
@@ -980,11 +962,23 @@ typedef struct ide_task_s {
 void ide_tf_dump(const char *, struct ide_taskfile *);
 
 extern void SELECT_DRIVE(ide_drive_t *);
+void SELECT_MASK(ide_drive_t *, int);
 
 extern int drive_is_ready(ide_drive_t *);
 
 void ide_pktcmd_tf_load(ide_drive_t *, u32, u16, u8);
 
+ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc,
+       ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry,
+       void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *),
+       void (*retry_pc)(ide_drive_t *), void (*dsc_handle)(ide_drive_t *),
+       void (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned int,
+                          int));
+ide_startstop_t ide_transfer_pc(ide_drive_t *, struct ide_atapi_pc *,
+                               ide_handler_t *, unsigned int, ide_expiry_t *);
+ide_startstop_t ide_issue_pc(ide_drive_t *, struct ide_atapi_pc *,
+                            ide_handler_t *, unsigned int, ide_expiry_t *);
+
 ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *);
 
 void task_end_request(ide_drive_t *, struct request *, u8);
@@ -996,8 +990,6 @@ int ide_taskfile_ioctl(ide_drive_t *, unsigned int, unsigned long);
 int ide_cmd_ioctl(ide_drive_t *, unsigned int, unsigned long);
 int ide_task_ioctl(ide_drive_t *, unsigned int, unsigned long);
 
-extern int system_bus_clock(void);
-
 extern int ide_driveid_update(ide_drive_t *);
 extern int ide_config_drive_speed(ide_drive_t *, u8);
 extern u8 eighty_ninty_three (ide_drive_t *);
@@ -1349,7 +1341,8 @@ static inline void ide_set_irq(ide_drive_t *drive, int on)
 {
        ide_hwif_t *hwif = drive->hwif;
 
-       hwif->OUTB(drive->ctl | (on ? 0 : 2), hwif->io_ports.ctl_addr);
+       hwif->OUTBSYNC(hwif, ATA_DEVCTL_OBS | (on ? 0 : 2),
+                      hwif->io_ports.ctl_addr);
 }
 
 static inline u8 ide_read_status(ide_drive_t *drive)
index a86186dd04743452e7a65c7886ed53637534d827..62aa4f895abe9fde4bd0fcbac99a9da71ceb0c63 100644 (file)
@@ -104,8 +104,11 @@ extern void enable_irq(unsigned int irq);
 
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
 
+extern cpumask_t irq_default_affinity;
+
 extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask);
 extern int irq_can_set_affinity(unsigned int irq);
+extern int irq_select_affinity(unsigned int irq);
 
 #else /* CONFIG_SMP */
 
@@ -119,6 +122,8 @@ static inline int irq_can_set_affinity(unsigned int irq)
        return 0;
 }
 
+static inline int irq_select_affinity(unsigned int irq)  { return 0; }
+
 #endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */
 
 #ifdef CONFIG_GENERIC_HARDIRQS
index 552e0ec269c9640b0536e2306a07236b9e0d43f8..8ccb462ea42c4cb3c4813c51ad285fb168efc2f9 100644 (file)
@@ -244,15 +244,6 @@ static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
 }
 #endif
 
-#ifdef CONFIG_AUTO_IRQ_AFFINITY
-extern int select_smp_affinity(unsigned int irq);
-#else
-static inline int select_smp_affinity(unsigned int irq)
-{
-       return 1;
-}
-#endif
-
 extern int no_irq_affinity;
 
 static inline int irq_balancing_disabled(unsigned int irq)
index d147f0f903604e0a1d6ae08fefd22255be2fd3c3..3dd2090070989863af4e87f9cc25b21900578bd9 100644 (file)
@@ -168,6 +168,8 @@ struct commit_header {
        unsigned char   h_chksum_size;
        unsigned char   h_padding[2];
        __be32          h_chksum[JBD2_CHECKSUM_BYTES];
+       __be64          h_commit_sec;
+       __be32          h_commit_nsec;
 };
 
 /*
@@ -379,6 +381,38 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
        bit_spin_unlock(BH_JournalHead, &bh->b_state);
 }
 
+/* Flags in jbd_inode->i_flags */
+#define __JI_COMMIT_RUNNING 0
+/* Commit of the inode data in progress. We use this flag to protect us from
+ * concurrent deletion of inode. We cannot use reference to inode for this
+ * since we cannot afford doing last iput() on behalf of kjournald
+ */
+#define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING)
+
+/**
+ * struct jbd_inode is the structure linking inodes in ordered mode
+ *   present in a transaction so that we can sync them during commit.
+ */
+struct jbd2_inode {
+       /* Which transaction does this inode belong to? Either the running
+        * transaction or the committing one. [j_list_lock] */
+       transaction_t *i_transaction;
+
+       /* Pointer to the running transaction modifying inode's data in case
+        * there is already a committing transaction touching it. [j_list_lock] */
+       transaction_t *i_next_transaction;
+
+       /* List of inodes in the i_transaction [j_list_lock] */
+       struct list_head i_list;
+
+       /* VFS inode this inode belongs to [constant during the lifetime
+        * of the structure] */
+       struct inode *i_vfs_inode;
+
+       /* Flags of inode [j_list_lock] */
+       unsigned int i_flags;
+};
+
 struct jbd2_revoke_table_s;
 
 /**
@@ -508,24 +542,12 @@ struct transaction_s
         */
        struct journal_head     *t_reserved_list;
 
-       /*
-        * Doubly-linked circular list of all buffers under writeout during
-        * commit [j_list_lock]
-        */
-       struct journal_head     *t_locked_list;
-
        /*
         * Doubly-linked circular list of all metadata buffers owned by this
         * transaction [j_list_lock]
         */
        struct journal_head     *t_buffers;
 
-       /*
-        * Doubly-linked circular list of all data buffers still to be
-        * flushed before this transaction can be committed [j_list_lock]
-        */
-       struct journal_head     *t_sync_datalist;
-
        /*
         * Doubly-linked circular list of all forget buffers (superseded
         * buffers which we can un-checkpoint once this transaction commits)
@@ -564,6 +586,12 @@ struct transaction_s
         */
        struct journal_head     *t_log_list;
 
+       /*
+        * List of inodes whose data we've modified in data=ordered mode.
+        * [j_list_lock]
+        */
+       struct list_head        t_inode_list;
+
        /*
         * Protects info related to handles
         */
@@ -1004,7 +1032,6 @@ extern int         jbd2_journal_extend (handle_t *, int nblocks);
 extern int      jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
 extern int      jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
 extern int      jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
-extern int      jbd2_journal_dirty_data (handle_t *, struct buffer_head *);
 extern int      jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
 extern void     jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
 extern int      jbd2_journal_forget (handle_t *, struct buffer_head *);
@@ -1044,6 +1071,10 @@ extern void         jbd2_journal_ack_err    (journal_t *);
 extern int        jbd2_journal_clear_err  (journal_t *);
 extern int        jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
 extern int        jbd2_journal_force_commit(journal_t *);
+extern int        jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
+extern int        jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size);
+extern void       jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
+extern void       jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
 
 /*
  * journal_head management
@@ -1179,15 +1210,13 @@ static inline int jbd_space_needed(journal_t *journal)
 
 /* journaling buffer types */
 #define BJ_None                0       /* Not journaled */
-#define BJ_SyncData    1       /* Normal data: flush before commit */
-#define BJ_Metadata    2       /* Normal journaled metadata */
-#define BJ_Forget      3       /* Buffer superseded by this transaction */
-#define BJ_IO          4       /* Buffer is for temporary IO use */
-#define BJ_Shadow      5       /* Buffer contents being shadowed to the log */
-#define BJ_LogCtl      6       /* Buffer contains log descriptors */
-#define BJ_Reserved    7       /* Buffer is reserved for access by journal */
-#define BJ_Locked      8       /* Locked for I/O during commit */
-#define BJ_Types       9
+#define BJ_Metadata    1       /* Normal journaled metadata */
+#define BJ_Forget      2       /* Buffer superseded by this transaction */
+#define BJ_IO          3       /* Buffer is for temporary IO use */
+#define BJ_Shadow      4       /* Buffer contents being shadowed to the log */
+#define BJ_LogCtl      5       /* Buffer contains log descriptors */
+#define BJ_Reserved    6       /* Buffer is reserved for access by journal */
+#define BJ_Types       7
 
 extern int jbd_blocks_per_page(struct inode *inode);
 
index e57e5d08312d44929321204c1c0600b1068f82eb..5b247b8a6b3bf9e77143d62c0fb8c57cedd5cee3 100644 (file)
@@ -27,6 +27,7 @@
 #define __LINUX_LIBATA_H__
 
 #include <linux/delay.h>
+#include <linux/jiffies.h>
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
 #include <linux/scatterlist.h>
@@ -115,7 +116,7 @@ enum {
        /* tag ATA_MAX_QUEUE - 1 is reserved for internal commands */
        ATA_MAX_QUEUE           = 32,
        ATA_TAG_INTERNAL        = ATA_MAX_QUEUE - 1,
-       ATA_SHORT_PAUSE         = (HZ >> 6) + 1,
+       ATA_SHORT_PAUSE         = 16,
 
        ATAPI_MAX_DRAIN         = 16 << 10,
 
@@ -168,6 +169,7 @@ enum {
        ATA_LFLAG_ASSUME_CLASS  = ATA_LFLAG_ASSUME_ATA | ATA_LFLAG_ASSUME_SEMB,
        ATA_LFLAG_NO_RETRY      = (1 << 5), /* don't retry this link */
        ATA_LFLAG_DISABLED      = (1 << 6), /* link is disabled */
+       ATA_LFLAG_SW_ACTIVITY   = (1 << 7), /* keep activity stats */
 
        /* struct ata_port flags */
        ATA_FLAG_SLAVE_POSS     = (1 << 0), /* host supports slave dev */
@@ -190,6 +192,10 @@ enum {
        ATA_FLAG_AN             = (1 << 18), /* controller supports AN */
        ATA_FLAG_PMP            = (1 << 19), /* controller supports PMP */
        ATA_FLAG_IPM            = (1 << 20), /* driver can handle IPM */
+       ATA_FLAG_EM             = (1 << 21), /* driver supports enclosure
+                                             * management */
+       ATA_FLAG_SW_ACTIVITY    = (1 << 22), /* driver supports sw activity
+                                             * led */
 
        /* The following flag belongs to ap->pflags but is kept in
         * ap->flags because it's referenced in many LLDs and will be
@@ -234,17 +240,16 @@ enum {
        /* bits 24:31 of host->flags are reserved for LLD specific flags */
 
        /* various lengths of time */
-       ATA_TMOUT_BOOT          = 30 * HZ,      /* heuristic */
-       ATA_TMOUT_BOOT_QUICK    = 7 * HZ,       /* heuristic */
-       ATA_TMOUT_INTERNAL      = 30 * HZ,
-       ATA_TMOUT_INTERNAL_QUICK = 5 * HZ,
+       ATA_TMOUT_BOOT          = 30000,        /* heuristic */
+       ATA_TMOUT_BOOT_QUICK    =  7000,        /* heuristic */
+       ATA_TMOUT_INTERNAL_QUICK = 5000,
 
        /* FIXME: GoVault needs 2s but we can't afford that without
         * parallel probing.  800ms is enough for iVDR disk
         * HHD424020F7SV00.  Increase to 2secs when parallel probing
         * is in place.
         */
-       ATA_TMOUT_FF_WAIT       = 4 * HZ / 5,
+       ATA_TMOUT_FF_WAIT       =  800,
 
        /* Spec mandates to wait for ">= 2ms" before checking status
         * after reset.  We wait 150ms, because that was the magic
@@ -256,14 +261,14 @@ enum {
         *
         * Old drivers/ide uses the 2mS rule and then waits for ready.
         */
-       ATA_WAIT_AFTER_RESET_MSECS = 150,
+       ATA_WAIT_AFTER_RESET    =  150,
 
        /* If PMP is supported, we have to do follow-up SRST.  As some
         * PMPs don't send D2H Reg FIS after hardreset, LLDs are
         * advised to wait only for the following duration before
         * doing SRST.
         */
-       ATA_TMOUT_PMP_SRST_WAIT = 1 * HZ,
+       ATA_TMOUT_PMP_SRST_WAIT = 1000,
 
        /* ATA bus states */
        BUS_UNKNOWN             = 0,
@@ -340,6 +345,11 @@ enum {
 
        SATA_PMP_RW_TIMEOUT     = 3000,         /* PMP read/write timeout */
 
+       /* This should match the actual table size of
+        * ata_eh_cmd_timeout_table in libata-eh.c.
+        */
+       ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 5,
+
        /* Horkage types. May be set by libata or controller on drives
           (some horkage may be drive/controller pair dependant */
 
@@ -441,6 +451,15 @@ enum link_pm {
        MEDIUM_POWER,
 };
 extern struct device_attribute dev_attr_link_power_management_policy;
+extern struct device_attribute dev_attr_em_message_type;
+extern struct device_attribute dev_attr_em_message;
+extern struct device_attribute dev_attr_sw_activity;
+
+enum sw_activity {
+       OFF,
+       BLINK_ON,
+       BLINK_OFF,
+};
 
 #ifdef CONFIG_ATA_SFF
 struct ata_ioports {
@@ -597,10 +616,14 @@ struct ata_eh_info {
 struct ata_eh_context {
        struct ata_eh_info      i;
        int                     tries[ATA_MAX_DEVICES];
+       int                     cmd_timeout_idx[ATA_MAX_DEVICES]
+                                              [ATA_EH_CMD_TIMEOUT_TABLE_SIZE];
        unsigned int            classes[ATA_MAX_DEVICES];
        unsigned int            did_probe_mask;
        unsigned int            saved_ncq_enabled;
        u8                      saved_xfer_mode[ATA_MAX_DEVICES];
+       /* timestamp for the last reset attempt or success */
+       unsigned long           last_reset;
 };
 
 struct ata_acpi_drive
@@ -692,6 +715,7 @@ struct ata_port {
        struct timer_list       fastdrain_timer;
        unsigned long           fastdrain_cnt;
 
+       int                     em_message_type;
        void                    *private_data;
 
 #ifdef CONFIG_ATA_ACPI
@@ -783,6 +807,12 @@ struct ata_port_operations {
        u8   (*bmdma_status)(struct ata_port *ap);
 #endif /* CONFIG_ATA_SFF */
 
+       ssize_t (*em_show)(struct ata_port *ap, char *buf);
+       ssize_t (*em_store)(struct ata_port *ap, const char *message,
+                           size_t size);
+       ssize_t (*sw_activity_show)(struct ata_device *dev, char *buf);
+       ssize_t (*sw_activity_store)(struct ata_device *dev,
+                                    enum sw_activity val);
        /*
         * Obsolete
         */
@@ -895,8 +925,7 @@ extern void ata_host_resume(struct ata_host *host);
 #endif
 extern int ata_ratelimit(void);
 extern u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val,
-                            unsigned long interval_msec,
-                            unsigned long timeout_msec);
+                            unsigned long interval, unsigned long timeout);
 extern int atapi_cmd_type(u8 opcode);
 extern void ata_tf_to_fis(const struct ata_taskfile *tf,
                          u8 pmp, int is_cmd, u8 *fis);
@@ -1389,6 +1418,12 @@ static inline int ata_check_ready(u8 status)
        return 0;
 }
 
+static inline unsigned long ata_deadline(unsigned long from_jiffies,
+                                        unsigned long timeout_msecs)
+{
+       return from_jiffies + msecs_to_jiffies(timeout_msecs);
+}
+
 
 /**************************************************************************
  * PMP - drivers/ata/libata-pmp.c
index f274997bc2832fb9d8cf0c4e6d0ee2d5858d61e9..2ed8fa1b762ba6e5b36b4cf1118c256ac0c9d318 100644 (file)
@@ -122,11 +122,9 @@ typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data);
  */
 
 #define LM_OUT_ST_MASK         0x00000003
-#define LM_OUT_CACHEABLE       0x00000004
 #define LM_OUT_CANCELED                0x00000008
 #define LM_OUT_ASYNC           0x00000080
 #define LM_OUT_ERROR           0x00000100
-#define LM_OUT_CONV_DEADLK     0x00000200
 
 /*
  * lm_callback_t types
@@ -138,9 +136,6 @@ typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data);
  * LM_CB_NEED_RECOVERY
  * The given journal needs to be recovered.
  *
- * LM_CB_DROPLOCKS
- * Reduce the number of cached locks.
- *
  * LM_CB_ASYNC
  * The given lock has been granted.
  */
@@ -149,7 +144,6 @@ typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data);
 #define LM_CB_NEED_D           258
 #define LM_CB_NEED_S           259
 #define LM_CB_NEED_RECOVERY    260
-#define LM_CB_DROPLOCKS                261
 #define LM_CB_ASYNC            262
 
 /*
index a744383d16e956b7fbc6cf4be6f2cb179ec2c0f3..81b3dd5206e04e659c3fae8114e12a20175c5128 100644 (file)
@@ -398,7 +398,8 @@ int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_waterm
 int mlx4_INIT_PORT(struct mlx4_dev *dev, int port);
 int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
 
-int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]);
+int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+                         int block_mcast_loopback);
 int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]);
 
 int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
index 068a0c9946af7e1f780f8cd287a517a14852286b..5c42821da2d19d7f010071b272e76147056646ab 100644 (file)
  */
 #ifdef CONFIG_BLOCK
 
+struct mpage_data {
+       struct bio *bio;
+       sector_t last_block_in_bio;
+       get_block_t *get_block;
+       unsigned use_writepage;
+};
+
 struct writeback_control;
 
+struct bio *mpage_bio_submit(int rw, struct bio *bio);
 int mpage_readpages(struct address_space *mapping, struct list_head *pages,
                                unsigned nr_pages, get_block_t get_block);
 int mpage_readpage(struct page *page, get_block_t get_block);
+int __mpage_writepage(struct page *page, struct writeback_control *wbc,
+                     void *data);
 int mpage_writepages(struct address_space *mapping,
                struct writeback_control *wbc, get_block_t get_block);
 int mpage_writepage(struct page *page, get_block_t *get_block,
index 9007ccdfc1127cfe73db03e31dd82a843f8f4fa8..20838883535704c89cf72bafe29a28d12dea45e3 100644 (file)
@@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
 void percpu_counter_destroy(struct percpu_counter *fbc);
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
 void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
-s64 __percpu_counter_sum(struct percpu_counter *fbc);
+s64 __percpu_counter_sum(struct percpu_counter *fbc, int set);
 
 static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 {
@@ -44,13 +44,19 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 
 static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
 {
-       s64 ret = __percpu_counter_sum(fbc);
+       s64 ret = __percpu_counter_sum(fbc, 0);
        return ret < 0 ? 0 : ret;
 }
 
+static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc)
+{
+       return __percpu_counter_sum(fbc, 1);
+}
+
+
 static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
 {
-       return __percpu_counter_sum(fbc);
+       return __percpu_counter_sum(fbc, 0);
 }
 
 static inline s64 percpu_counter_read(struct percpu_counter *fbc)
index 24f3d2282e1184c3741e7d99f212ca4079f3e723..2158fc0d5a56448001fa8a0cb569b66790cdd60a 100644 (file)
@@ -179,4 +179,17 @@ void arch_update_cpu_topology(void);
 #endif
 #endif /* CONFIG_NUMA */
 
+#ifndef topology_physical_package_id
+#define topology_physical_package_id(cpu)      ((void)(cpu), -1)
+#endif
+#ifndef topology_core_id
+#define topology_core_id(cpu)                  ((void)(cpu), 0)
+#endif
+#ifndef topology_thread_siblings
+#define topology_thread_siblings(cpu)          cpumask_of_cpu(cpu)
+#endif
+#ifndef topology_core_siblings
+#define topology_core_siblings(cpu)            cpumask_of_cpu(cpu)
+#endif
+
 #endif /* _LINUX_TOPOLOGY_H */
index bd91987c065fcd1f923a5cd05d6f781c3f685aab..12b15c561a1f1ca3dcfe91c9f64e439857fc6bd6 100644 (file)
@@ -63,6 +63,7 @@ struct writeback_control {
        unsigned for_writepages:1;      /* This is a writepages() call */
        unsigned range_cyclic:1;        /* range_start is cyclic */
        unsigned more_io:1;             /* more io to be dispatched */
+       unsigned range_cont:1;
 };
 
 /*
index c36750ff6ae82caf596aaee36d25de55e9d4316a..483057b2f4b40487805d84163c5013a385dc04aa 100644 (file)
@@ -2,29 +2,33 @@
  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
  *
- * This Software is licensed under one of the following licenses:
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
  *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
  *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
  *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
  *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #if !defined(IB_ADDR_H)
@@ -57,6 +61,7 @@ struct rdma_dev_addr {
        unsigned char dst_dev_addr[MAX_ADDR_LEN];
        unsigned char broadcast[MAX_ADDR_LEN];
        enum rdma_node_type dev_type;
+       struct net_device *src_dev;
 };
 
 /**
index f179d233ffc34f22aa02ee2d928d74c691566938..00a2b8ec327f7c1ce089172c2a1a92474bfee39e 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ib_cache.h 1349 2004-12-16 21:09:43Z roland $
  */
 
 #ifndef _IB_CACHE_H
index a627c8682d2fbd49a3847245ad29ac3398e2a9b6..ec7c6d99ed3f557208f059e1fc3d99877dd9ee12 100644 (file)
@@ -31,8 +31,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ib_cm.h 4311 2005-12-05 18:42:01Z sean.hefty $
  */
 #if !defined(IB_CM_H)
 #define IB_CM_H
index 00dadbf94e1d75cca1d9004a9b997efe3eb103c6..f62b842e6596142750d476ef5f6ce606f253ab43 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ib_fmr_pool.h 2730 2005-06-28 16:43:03Z sean.hefty $
  */
 
 #if !defined(IB_FMR_POOL_H)
@@ -61,7 +59,7 @@ struct ib_fmr_pool_param {
        int                     pool_size;
        int                     dirty_watermark;
        void                  (*flush_function)(struct ib_fmr_pool *pool,
-                                               void *              arg);
+                                               void               *arg);
        void                   *flush_arg;
        unsigned                cache:1;
 };
index 7228c056b9e9b3d9f78d05da24ed0b34d1a74e82..5f6c40fffcf4a35e617eb44bcba2049a243a036c 100644 (file)
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ib_mad.h 5596 2006-03-03 01:00:07Z sean.hefty $
  */
 
-#if !defined( IB_MAD_H )
+#if !defined(IB_MAD_H)
 #define IB_MAD_H
 
 #include <linux/list.h>
@@ -194,8 +192,7 @@ struct ib_vendor_mad {
        u8                      data[IB_MGMT_VENDOR_DATA];
 };
 
-struct ib_class_port_info
-{
+struct ib_class_port_info {
        u8                      base_version;
        u8                      class_version;
        __be16                  capability_mask;
@@ -614,11 +611,11 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
  * any class specific header, and MAD data area.
  * If @rmpp_active is set, the RMPP header will be initialized for sending.
  */
-struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
-                                           u32 remote_qpn, u16 pkey_index,
-                                           int rmpp_active,
-                                           int hdr_len, int data_len,
-                                           gfp_t gfp_mask);
+struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent,
+                                          u32 remote_qpn, u16 pkey_index,
+                                          int rmpp_active,
+                                          int hdr_len, int data_len,
+                                          gfp_t gfp_mask);
 
 /**
  * ib_is_mad_class_rmpp - returns whether given management class
index f926020d63314dc2a5a43fd047ea4ef95d43bc36..d7fc45c4eba9e95d409d0a96677ff0e6add87f96 100644 (file)
@@ -28,8 +28,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ib_pack.h 1349 2004-12-16 21:09:43Z roland $
  */
 
 #ifndef IB_PACK_H
index 942692b0b92e2063664ce0af9e0550f16ab38507..3841c1aff692556ed4518bfd79c56cd4f6fdce59 100644 (file)
@@ -30,8 +30,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ib_sa.h 2811 2005-07-06 18:11:43Z halr $
  */
 
 #ifndef IB_SA_H
index f29af135ba833c841926e33b6d9bac0de42693e9..aaca0878668fb388922ddf4c556bc86c8d0228ad 100644 (file)
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ib_smi.h 1389 2004-12-27 22:56:47Z roland $
  */
 
-#if !defined( IB_SMI_H )
+#if !defined(IB_SMI_H)
 #define IB_SMI_H
 
 #include <rdma/ib_mad.h>
index 37650afb982c7ee85608157ea4df5118968fa82f..bd3d380781e0bcc61fb464d9d50d217b24251d14 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ib_user_cm.h 4019 2005-11-11 00:33:09Z sean.hefty $
  */
 
 #ifndef IB_USER_CM_H
index 29d2c7205a90f17d76928dd1c01ae9940866005c..d6fce1cbdb906985ee89fe8556e6a205bc36ee6e 100644 (file)
@@ -29,8 +29,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ib_user_mad.h 2814 2005-07-06 19:14:09Z halr $
  */
 
 #ifndef IB_USER_MAD_H
index 8d65bf0a625b0ef14a119760246e8acd323d6bbc..a17f77106149bb8ee63f02a8bdddf79f72a2c928 100644 (file)
@@ -31,8 +31,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ib_user_verbs.h 4019 2005-11-11 00:33:09Z sean.hefty $
  */
 
 #ifndef IB_USER_VERBS_H
@@ -291,7 +289,10 @@ struct ib_uverbs_wc {
        __u32 opcode;
        __u32 vendor_err;
        __u32 byte_len;
-       __u32 imm_data;
+       union {
+               __u32 imm_data;
+               __u32 invalidate_rkey;
+       } ex;
        __u32 qp_num;
        __u32 src_qp;
        __u32 wc_flags;
index 31d30b1852e8880c0e63333085309e5263f0954b..90b529f7a154efdc26c02b43b7e591022b491987 100644 (file)
@@ -34,8 +34,6 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * $Id: ib_verbs.h 1349 2004-12-16 21:09:43Z roland $
  */
 
 #if !defined(IB_VERBS_H)
@@ -93,7 +91,7 @@ enum ib_device_cap_flags {
        IB_DEVICE_RC_RNR_NAK_GEN        = (1<<12),
        IB_DEVICE_SRQ_RESIZE            = (1<<13),
        IB_DEVICE_N_NOTIFY_CQ           = (1<<14),
-       IB_DEVICE_ZERO_STAG             = (1<<15),
+       IB_DEVICE_LOCAL_DMA_LKEY        = (1<<15),
        IB_DEVICE_RESERVED              = (1<<16), /* old SEND_W_INV */
        IB_DEVICE_MEM_WINDOW            = (1<<17),
        /*
@@ -105,6 +103,8 @@ enum ib_device_cap_flags {
         */
        IB_DEVICE_UD_IP_CSUM            = (1<<18),
        IB_DEVICE_UD_TSO                = (1<<19),
+       IB_DEVICE_MEM_MGT_EXTENSIONS    = (1<<21),
+       IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
 };
 
 enum ib_atomic_cap {
@@ -150,6 +150,7 @@ struct ib_device_attr {
        int                     max_srq;
        int                     max_srq_wr;
        int                     max_srq_sge;
+       unsigned int            max_fast_reg_page_list_len;
        u16                     max_pkeys;
        u8                      local_ca_ack_delay;
 };
@@ -226,6 +227,57 @@ static inline int ib_width_enum_to_int(enum ib_port_width width)
        }
 }
 
+struct ib_protocol_stats {
+       /* TBD... */
+};
+
+struct iw_protocol_stats {
+       u64     ipInReceives;
+       u64     ipInHdrErrors;
+       u64     ipInTooBigErrors;
+       u64     ipInNoRoutes;
+       u64     ipInAddrErrors;
+       u64     ipInUnknownProtos;
+       u64     ipInTruncatedPkts;
+       u64     ipInDiscards;
+       u64     ipInDelivers;
+       u64     ipOutForwDatagrams;
+       u64     ipOutRequests;
+       u64     ipOutDiscards;
+       u64     ipOutNoRoutes;
+       u64     ipReasmTimeout;
+       u64     ipReasmReqds;
+       u64     ipReasmOKs;
+       u64     ipReasmFails;
+       u64     ipFragOKs;
+       u64     ipFragFails;
+       u64     ipFragCreates;
+       u64     ipInMcastPkts;
+       u64     ipOutMcastPkts;
+       u64     ipInBcastPkts;
+       u64     ipOutBcastPkts;
+
+       u64     tcpRtoAlgorithm;
+       u64     tcpRtoMin;
+       u64     tcpRtoMax;
+       u64     tcpMaxConn;
+       u64     tcpActiveOpens;
+       u64     tcpPassiveOpens;
+       u64     tcpAttemptFails;
+       u64     tcpEstabResets;
+       u64     tcpCurrEstab;
+       u64     tcpInSegs;
+       u64     tcpOutSegs;
+       u64     tcpRetransSegs;
+       u64     tcpInErrs;
+       u64     tcpOutRsts;
+};
+
+union rdma_protocol_stats {
+       struct ib_protocol_stats        ib;
+       struct iw_protocol_stats        iw;
+};
+
 struct ib_port_attr {
        enum ib_port_state      state;
        enum ib_mtu             max_mtu;
@@ -413,6 +465,8 @@ enum ib_wc_opcode {
        IB_WC_FETCH_ADD,
        IB_WC_BIND_MW,
        IB_WC_LSO,
+       IB_WC_LOCAL_INV,
+       IB_WC_FAST_REG_MR,
 /*
  * Set value of IB_WC_RECV so consumers can test if a completion is a
  * receive by testing (opcode & IB_WC_RECV).
@@ -423,7 +477,8 @@ enum ib_wc_opcode {
 
 enum ib_wc_flags {
        IB_WC_GRH               = 1,
-       IB_WC_WITH_IMM          = (1<<1)
+       IB_WC_WITH_IMM          = (1<<1),
+       IB_WC_WITH_INVALIDATE   = (1<<2),
 };
 
 struct ib_wc {
@@ -433,7 +488,10 @@ struct ib_wc {
        u32                     vendor_err;
        u32                     byte_len;
        struct ib_qp           *qp;
-       __be32                  imm_data;
+       union {
+               __be32          imm_data;
+               u32             invalidate_rkey;
+       } ex;
        u32                     src_qp;
        int                     wc_flags;
        u16                     pkey_index;
@@ -498,7 +556,8 @@ enum ib_qp_type {
 };
 
 enum ib_qp_create_flags {
-       IB_QP_CREATE_IPOIB_UD_LSO       = 1 << 0,
+       IB_QP_CREATE_IPOIB_UD_LSO               = 1 << 0,
+       IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK   = 1 << 1,
 };
 
 struct ib_qp_init_attr {
@@ -627,6 +686,9 @@ enum ib_wr_opcode {
        IB_WR_ATOMIC_FETCH_AND_ADD,
        IB_WR_LSO,
        IB_WR_SEND_WITH_INV,
+       IB_WR_RDMA_READ_WITH_INV,
+       IB_WR_LOCAL_INV,
+       IB_WR_FAST_REG_MR,
 };
 
 enum ib_send_flags {
@@ -643,6 +705,12 @@ struct ib_sge {
        u32     lkey;
 };
 
+struct ib_fast_reg_page_list {
+       struct ib_device       *device;
+       u64                    *page_list;
+       unsigned int            max_page_list_len;
+};
+
 struct ib_send_wr {
        struct ib_send_wr      *next;
        u64                     wr_id;
@@ -675,6 +743,15 @@ struct ib_send_wr {
                        u16     pkey_index; /* valid for GSI only */
                        u8      port_num;   /* valid for DR SMPs on switch only */
                } ud;
+               struct {
+                       u64                             iova_start;
+                       struct ib_fast_reg_page_list   *page_list;
+                       unsigned int                    page_shift;
+                       unsigned int                    page_list_len;
+                       u32                             length;
+                       int                             access_flags;
+                       u32                             rkey;
+               } fast_reg;
        } wr;
 };
 
@@ -777,7 +854,7 @@ struct ib_cq {
        struct ib_uobject      *uobject;
        ib_comp_handler         comp_handler;
        void                  (*event_handler)(struct ib_event *, void *);
-       void *                  cq_context;
+       void                   *cq_context;
        int                     cqe;
        atomic_t                usecnt; /* count number of work queues */
 };
@@ -883,7 +960,7 @@ struct ib_dma_mapping_ops {
        void            (*sync_single_for_cpu)(struct ib_device *dev,
                                               u64 dma_handle,
                                               size_t size,
-                                              enum dma_data_direction dir);
+                                              enum dma_data_direction dir);
        void            (*sync_single_for_device)(struct ib_device *dev,
                                                  u64 dma_handle,
                                                  size_t size,
@@ -919,6 +996,8 @@ struct ib_device {
 
        struct iw_cm_verbs           *iwcm;
 
+       int                        (*get_protocol_stats)(struct ib_device *device,
+                                                        union rdma_protocol_stats *stats);
        int                        (*query_device)(struct ib_device *device,
                                                   struct ib_device_attr *device_attr);
        int                        (*query_port)(struct ib_device *device,
@@ -1013,6 +1092,11 @@ struct ib_device {
        int                        (*query_mr)(struct ib_mr *mr,
                                               struct ib_mr_attr *mr_attr);
        int                        (*dereg_mr)(struct ib_mr *mr);
+       struct ib_mr *             (*alloc_fast_reg_mr)(struct ib_pd *pd,
+                                              int max_page_list_len);
+       struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
+                                                                  int page_list_len);
+       void                       (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list);
        int                        (*rereg_phys_mr)(struct ib_mr *mr,
                                                    int mr_rereg_mask,
                                                    struct ib_pd *pd,
@@ -1065,6 +1149,7 @@ struct ib_device {
 
        char                         node_desc[64];
        __be64                       node_guid;
+       u32                          local_dma_lkey;
        u8                           node_type;
        u8                           phys_port_cnt;
 };
@@ -1806,6 +1891,54 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
  */
 int ib_dereg_mr(struct ib_mr *mr);
 
+/**
+ * ib_alloc_fast_reg_mr - Allocates memory region usable with the
+ *   IB_WR_FAST_REG_MR send work request.
+ * @pd: The protection domain associated with the region.
+ * @max_page_list_len: requested max physical buffer list length to be
+ *   used with fast register work requests for this MR.
+ */
+struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
+
+/**
+ * ib_alloc_fast_reg_page_list - Allocates a page list array
+ * @device - ib device pointer.
+ * @page_list_len - size of the page list array to be allocated.
+ *
+ * This allocates and returns a struct ib_fast_reg_page_list * and a
+ * page_list array that is at least page_list_len in size.  The actual
+ * size is returned in max_page_list_len.  The caller is responsible
+ * for initializing the contents of the page_list array before posting
+ * a send work request with the IB_WC_FAST_REG_MR opcode.
+ *
+ * The page_list array entries must be translated using one of the
+ * ib_dma_*() functions just like the addresses passed to
+ * ib_map_phys_fmr().  Once the ib_post_send() is issued, the struct
+ * ib_fast_reg_page_list must not be modified by the caller until the
+ * IB_WC_FAST_REG_MR work request completes.
+ */
+struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(
+                               struct ib_device *device, int page_list_len);
+
+/**
+ * ib_free_fast_reg_page_list - Deallocates a previously allocated
+ *   page list array.
+ * @page_list - struct ib_fast_reg_page_list pointer to be deallocated.
+ */
+void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
+
+/**
+ * ib_update_fast_reg_key - updates the key portion of the fast_reg MR
+ *   R_Key and L_Key.
+ * @mr - struct ib_mr pointer to be updated.
+ * @newkey - new key to be used.
+ */
+static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
+{
+       mr->lkey = (mr->lkey & 0xffffff00) | newkey;
+       mr->rkey = (mr->rkey & 0xffffff00) | newkey;
+}
+
 /**
  * ib_alloc_mw - Allocates a memory window.
  * @pd: The protection domain associated with the memory window.
index aeefa9b740dc88dda55d327d79a894bc680bcf6d..cbb822e8d7913d8cd30fbdc7217b9be373709f0c 100644 (file)
@@ -62,7 +62,7 @@ struct iw_cm_event {
        struct sockaddr_in remote_addr;
        void *private_data;
        u8 private_data_len;
-       voidprovider_data;
+       void *provider_data;
 };
 
 /**
index 010f876f41d8b559804fee1d0e3e42f7dec1a1d6..22bb2e7bab1a9ea7c879d60696854232f270a363 100644 (file)
@@ -2,29 +2,33 @@
  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
  *
- * This Software is licensed under one of the following licenses:
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
  *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
  *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
  *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
  *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #if !defined(RDMA_CM_H)
@@ -57,11 +61,11 @@ enum rdma_cm_event_type {
 };
 
 enum rdma_port_space {
-       RDMA_PS_SDP  = 0x0001,
-       RDMA_PS_IPOIB= 0x0002,
-       RDMA_PS_TCP  = 0x0106,
-       RDMA_PS_UDP  = 0x0111,
-       RDMA_PS_SCTP = 0x0183
+       RDMA_PS_SDP   = 0x0001,
+       RDMA_PS_IPOIB = 0x0002,
+       RDMA_PS_TCP   = 0x0106,
+       RDMA_PS_UDP   = 0x0111,
+       RDMA_PS_SCTP  = 0x0183
 };
 
 struct rdma_addr {
index 950424b38f1605b17adc6d41a961e7f629cf0c01..2389c3b4540498718860ef04dd5297498257b09a 100644 (file)
@@ -1,29 +1,33 @@
 /*
  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #if !defined(RDMA_CM_IB_H)
index d1a7605c5b8fe01368e7290f2d215512d646cc32..a5e026bc45c4174ec72a13b719fd44211385fb24 100644 (file)
  * of the License.
  */
 
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/sched.h>
-#include <linux/delay.h>
+#include <linux/stacktrace.h>
+
+static void backtrace_test_normal(void)
+{
+       printk("Testing a backtrace from process context.\n");
+       printk("The following trace is a kernel self test and not a bug!\n");
 
-static struct timer_list backtrace_timer;
+       dump_stack();
+}
 
-static void backtrace_test_timer(unsigned long data)
+static DECLARE_COMPLETION(backtrace_work);
+
+static void backtrace_test_irq_callback(unsigned long data)
+{
+       dump_stack();
+       complete(&backtrace_work);
+}
+
+static DECLARE_TASKLET(backtrace_tasklet, &backtrace_test_irq_callback, 0);
+
+static void backtrace_test_irq(void)
 {
        printk("Testing a backtrace from irq context.\n");
        printk("The following trace is a kernel self test and not a bug!\n");
-       dump_stack();
+
+       init_completion(&backtrace_work);
+       tasklet_schedule(&backtrace_tasklet);
+       wait_for_completion(&backtrace_work);
+}
+
+#ifdef CONFIG_STACKTRACE
+static void backtrace_test_saved(void)
+{
+       struct stack_trace trace;
+       unsigned long entries[8];
+
+       printk("Testing a saved backtrace.\n");
+       printk("The following trace is a kernel self test and not a bug!\n");
+
+       trace.nr_entries = 0;
+       trace.max_entries = ARRAY_SIZE(entries);
+       trace.entries = entries;
+       trace.skip = 0;
+
+       save_stack_trace(&trace);
+       print_stack_trace(&trace, 0);
+}
+#else
+static void backtrace_test_saved(void)
+{
+       printk("Saved backtrace test skipped.\n");
 }
+#endif
+
 static int backtrace_regression_test(void)
 {
        printk("====[ backtrace testing ]===========\n");
-       printk("Testing a backtrace from process context.\n");
-       printk("The following trace is a kernel self test and not a bug!\n");
-       dump_stack();
 
-       init_timer(&backtrace_timer);
-       backtrace_timer.function = backtrace_test_timer;
-       mod_timer(&backtrace_timer, jiffies + 10);
+       backtrace_test_normal();
+       backtrace_test_irq();
+       backtrace_test_saved();
 
-       msleep(10);
        printk("====[ end of backtrace testing ]====\n");
        return 0;
 }
index 27a83ee41443c7f192c830d857deacce7af6c508..2913a8bff612e8d571c618d88c4fd77a222006c3 100644 (file)
@@ -300,11 +300,10 @@ EXPORT_SYMBOL_GPL(ktime_sub_ns);
  */
 u64 ktime_divns(const ktime_t kt, s64 div)
 {
-       u64 dclc, inc, dns;
+       u64 dclc;
        int sft = 0;
 
-       dclc = dns = ktime_to_ns(kt);
-       inc = div;
+       dclc = ktime_to_ns(kt);
        /* Make sure the divisor is less than 2^32: */
        while (div >> 32) {
                sft++;
@@ -632,8 +631,6 @@ void clock_was_set(void)
  */
 void hres_timers_resume(void)
 {
-       WARN_ON_ONCE(num_online_cpus() > 1);
-
        /* Retrigger the CPU local events: */
        retrigger_next_event(NULL);
 }
index 46d6611a33bbe4a7997637168feb543acd0f0b8e..77a51be360103c98c8ce3a165668ea423f69d119 100644 (file)
@@ -17,6 +17,8 @@
 
 #ifdef CONFIG_SMP
 
+cpumask_t irq_default_affinity = CPU_MASK_ALL;
+
 /**
  *     synchronize_irq - wait for pending IRQ handlers (on other CPUs)
  *     @irq: interrupt number to wait for
@@ -95,6 +97,27 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
        return 0;
 }
 
+#ifndef CONFIG_AUTO_IRQ_AFFINITY
+/*
+ * Generic version of the affinity autoselector.
+ */
+int irq_select_affinity(unsigned int irq)
+{
+       cpumask_t mask;
+
+       if (!irq_can_set_affinity(irq))
+               return 0;
+
+       cpus_and(mask, cpu_online_map, irq_default_affinity);
+
+       irq_desc[irq].affinity = mask;
+       irq_desc[irq].chip->set_affinity(irq, mask);
+
+       set_balance_irq_affinity(irq, mask);
+       return 0;
+}
+#endif
+
 #endif
 
 /**
@@ -354,7 +377,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 
                /* Setup the type (level, edge polarity) if configured: */
                if (new->flags & IRQF_TRIGGER_MASK) {
-                       if (desc->chip && desc->chip->set_type)
+                       if (desc->chip->set_type)
                                desc->chip->set_type(irq,
                                                new->flags & IRQF_TRIGGER_MASK);
                        else
@@ -364,8 +387,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
                                 */
                                printk(KERN_WARNING "No IRQF_TRIGGER set_type "
                                       "function for IRQ %d (%s)\n", irq,
-                                      desc->chip ? desc->chip->name :
-                                      "unknown");
+                                      desc->chip->name);
                } else
                        compat_irq_chip_set_default_handler(desc);
 
@@ -382,6 +404,9 @@ int setup_irq(unsigned int irq, struct irqaction *new)
                } else
                        /* Undo nested disables: */
                        desc->depth = 1;
+
+               /* Set default affinity mask once everything is setup */
+               irq_select_affinity(irq);
        }
        /* Reset broken irq detection when installing new handler */
        desc->irq_count = 0;
@@ -571,8 +596,6 @@ int request_irq(unsigned int irq, irq_handler_t handler,
        action->next = NULL;
        action->dev_id = dev_id;
 
-       select_smp_affinity(irq);
-
 #ifdef CONFIG_DEBUG_SHIRQ
        if (irqflags & IRQF_SHARED) {
                /*
index c2f2ccb0549a18a8552e0c8a018a198e4c70195e..6c6d35d68ee9c9628d5e5b67f2e32fdd91553f52 100644 (file)
@@ -44,7 +44,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
                                   unsigned long count, void *data)
 {
        unsigned int irq = (int)(long)data, full_count = count, err;
-       cpumask_t new_value, tmp;
+       cpumask_t new_value;
 
        if (!irq_desc[irq].chip->set_affinity || no_irq_affinity ||
            irq_balancing_disabled(irq))
@@ -62,17 +62,51 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
         * way to make the system unusable accidentally :-) At least
         * one online CPU still has to be targeted.
         */
-       cpus_and(tmp, new_value, cpu_online_map);
-       if (cpus_empty(tmp))
+       if (!cpus_intersects(new_value, cpu_online_map))
                /* Special case for empty set - allow the architecture
                   code to set default SMP affinity. */
-               return select_smp_affinity(irq) ? -EINVAL : full_count;
+               return irq_select_affinity(irq) ? -EINVAL : full_count;
 
        irq_set_affinity(irq, new_value);
 
        return full_count;
 }
 
+static int default_affinity_read(char *page, char **start, off_t off,
+                                 int count, int *eof, void *data)
+{
+       int len = cpumask_scnprintf(page, count, irq_default_affinity);
+       if (count - len < 2)
+               return -EINVAL;
+       len += sprintf(page + len, "\n");
+       return len;
+}
+
+static int default_affinity_write(struct file *file, const char __user *buffer,
+                                  unsigned long count, void *data)
+{
+       unsigned int full_count = count, err;
+       cpumask_t new_value;
+
+       err = cpumask_parse_user(buffer, count, new_value);
+       if (err)
+               return err;
+
+       if (!is_affinity_mask_valid(new_value))
+               return -EINVAL;
+
+       /*
+        * Do not allow disabling IRQs completely - it's a too easy
+        * way to make the system unusable accidentally :-) At least
+        * one online CPU still has to be targeted.
+        */
+       if (!cpus_intersects(new_value, cpu_online_map))
+               return -EINVAL;
+
+       irq_default_affinity = new_value;
+
+       return full_count;
+}
 #endif
 
 static int irq_spurious_read(char *page, char **start, off_t off,
@@ -171,6 +205,21 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
                remove_proc_entry(action->dir->name, irq_desc[irq].dir);
 }
 
+void register_default_affinity_proc(void)
+{
+#ifdef CONFIG_SMP
+       struct proc_dir_entry *entry;
+
+       /* create /proc/irq/default_smp_affinity */
+       entry = create_proc_entry("default_smp_affinity", 0600, root_irq_dir);
+       if (entry) {
+               entry->data = NULL;
+               entry->read_proc  = default_affinity_read;
+               entry->write_proc = default_affinity_write;
+       }
+#endif
+}
+
 void init_irq_proc(void)
 {
        int i;
@@ -180,6 +229,8 @@ void init_irq_proc(void)
        if (!root_irq_dir)
                return;
 
+       register_default_affinity_proc();
+
        /*
         * Create entries for all existing IRQs.
         */
index f1525ad06cb3ebbb83680b2bc0176854002217ca..c42a03aef36f07fd326eba959a90aa7bbe45dbd8 100644 (file)
@@ -1037,6 +1037,9 @@ static void check_thread_timers(struct task_struct *tsk,
                                sig->rlim[RLIMIT_RTTIME].rlim_cur +=
                                                                USEC_PER_SEC;
                        }
+                       printk(KERN_INFO
+                               "RT Watchdog Timeout: %s[%d]\n",
+                               tsk->comm, task_pid_nr(tsk));
                        __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
                }
        }
index b71816e47a3019ed3d55f0a7ceb5cc179fa32370..94b527ef1d1e37fe060ab812f13ef7276910549f 100644 (file)
@@ -6,19 +6,21 @@
  *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  */
 #include <linux/sched.h>
+#include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/stacktrace.h>
 
 void print_stack_trace(struct stack_trace *trace, int spaces)
 {
-       int i, j;
+       int i;
 
-       for (i = 0; i < trace->nr_entries; i++) {
-               unsigned long ip = trace->entries[i];
+       if (WARN_ON(!trace->entries))
+               return;
 
-               for (j = 0; j < spaces + 1; j++)
-                       printk(" ");
-               print_ip_sym(ip);
+       for (i = 0; i < trace->nr_entries; i++) {
+               printk("%*c", 1 + spaces, ' ');
+               print_ip_sym(trace->entries[i]);
        }
 }
+EXPORT_SYMBOL_GPL(print_stack_trace);
 
index d63008b09a4cee1cfc71b5e53e929b39312afff0..beef7ccdf842f3cea9994c19b879baa06f3e94f5 100644 (file)
@@ -48,6 +48,13 @@ static void tick_do_update_jiffies64(ktime_t now)
        unsigned long ticks = 0;
        ktime_t delta;
 
+       /*
+        * Do a quick check without holding xtime_lock:
+        */
+       delta = ktime_sub(now, last_jiffies_update);
+       if (delta.tv64 < tick_period.tv64)
+               return;
+
        /* Reevalute with xtime_lock held */
        write_seqlock(&xtime_lock);
 
@@ -228,6 +235,7 @@ void tick_nohz_stop_sched_tick(void)
                               local_softirq_pending());
                        ratelimit++;
                }
+               goto end;
        }
 
        ts->idle_calls++;
index d8b6279a9b4232cf7ca92305199b4a915cd0f591..c459e8547bd8cd11ede47e50f44e055fa1d06a6b 100644 (file)
@@ -419,7 +419,6 @@ config DEBUG_LOCKING_API_SELFTESTS
 
 config STACKTRACE
        bool
-       depends on DEBUG_KERNEL
        depends on STACKTRACE_SUPPORT
 
 config DEBUG_KOBJECT
@@ -563,6 +562,9 @@ config BACKTRACE_SELF_TEST
          for distributions or general kernels, but only for kernel
          developers working on architecture code.
 
+         Note that if you want to also test saved backtraces, you will
+         have to enable STACKTRACE as well.
+
          Say N if you are unsure.
 
 config LKDTM
index 119174494cb5c096eaf5b1da239dbd5a4040ebc0..4a8ba4bf5f6f2b1c0de7d16f794d6d39cbb00d31 100644 (file)
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add);
  * Add up all the per-cpu counts, return the result.  This is a more accurate
  * but much slower version of percpu_counter_read_positive()
  */
-s64 __percpu_counter_sum(struct percpu_counter *fbc)
+s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
 {
        s64 ret;
        int cpu;
@@ -62,7 +62,12 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
        for_each_online_cpu(cpu) {
                s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
                ret += *pcount;
+               if (set)
+                       *pcount = 0;
        }
+       if (set)
+               fbc->count = ret;
+
        spin_unlock(&fbc->lock);
        return ret;
 }
index 1e6a7d34874fd31f3e9549c7540a4ce28c2402c6..65d9d9e2b755e7dd7e662d4dd5cc45b19514a7ab 100644 (file)
@@ -236,11 +236,12 @@ int filemap_fdatawrite(struct address_space *mapping)
 }
 EXPORT_SYMBOL(filemap_fdatawrite);
 
-static int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
+int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
                                loff_t end)
 {
        return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
 }
+EXPORT_SYMBOL(filemap_fdatawrite_range);
 
 /**
  * filemap_flush - mostly a non-blocking flush
index b38f700825fca31b81a48ecce6d60b7856bfcf84..94c6d8988ab3239019ecc720b7ac0624e06016df 100644 (file)
@@ -960,6 +960,9 @@ retry:
        }
        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
                mapping->writeback_index = index;
+
+       if (wbc->range_cont)
+               wbc->range_start = index << PAGE_CACHE_SHIFT;
        return ret;
 }
 EXPORT_SYMBOL(write_cache_pages);
index 91200feb3f9c3792c2b06218a6ff44c667e168b2..63f131fc42e4e2e2eaedbe5f89d52ea3fe5fe000 100644 (file)
@@ -555,15 +555,13 @@ static int selinux_set_mnt_opts(struct super_block *sb,
        struct task_security_struct *tsec = current->security;
        struct superblock_security_struct *sbsec = sb->s_security;
        const char *name = sb->s_type->name;
-       struct dentry *root = sb->s_root;
-       struct inode *root_inode = root->d_inode;
-       struct inode_security_struct *root_isec = root_inode->i_security;
+       struct inode *inode = sbsec->sb->s_root->d_inode;
+       struct inode_security_struct *root_isec = inode->i_security;
        u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0;
        u32 defcontext_sid = 0;
        char **mount_options = opts->mnt_opts;
        int *flags = opts->mnt_opts_flags;
        int num_opts = opts->num_mnt_opts;
-       bool can_xattr = false;
 
        mutex_lock(&sbsec->lock);
 
@@ -667,24 +665,14 @@ static int selinux_set_mnt_opts(struct super_block *sb,
                goto out;
        }
 
-       if (strcmp(name, "proc") == 0)
+       if (strcmp(sb->s_type->name, "proc") == 0)
                sbsec->proc = 1;
 
-       /*
-        * test if the fs supports xattrs, fs_use might make use of this if the
-        * fs has no definition in policy.
-        */
-       if (root_inode->i_op->getxattr) {
-               rc = root_inode->i_op->getxattr(root, XATTR_NAME_SELINUX, NULL, 0);
-               if (rc >= 0 || rc == -ENODATA)
-                       can_xattr = true;
-       }
-
        /* Determine the labeling behavior to use for this filesystem type. */
-       rc = security_fs_use(name, &sbsec->behavior, &sbsec->sid, can_xattr);
+       rc = security_fs_use(sb->s_type->name, &sbsec->behavior, &sbsec->sid);
        if (rc) {
                printk(KERN_WARNING "%s: security_fs_use(%s) returned %d\n",
-                      __func__, name, rc);
+                      __func__, sb->s_type->name, rc);
                goto out;
        }
 
index 44cba2e21dcf2a9e65f273be89ae970d01d78f5a..7c543003d653676f5d72a34ffd60d1f301b2d11a 100644 (file)
@@ -136,7 +136,7 @@ int security_get_allow_unknown(void);
 #define SECURITY_FS_USE_MNTPOINT       6 /* use mountpoint labeling */
 
 int security_fs_use(const char *fstype, unsigned int *behavior,
-       u32 *sid, bool can_xattr);
+       u32 *sid);
 
 int security_genfs_sid(const char *fstype, char *name, u16 sclass,
        u32 *sid);
index 8e42da120101432885fa63ac1b867c6f8c227d0e..b52f923ce680f95fc5b8ffbbebc70a87697db132 100644 (file)
@@ -1934,8 +1934,7 @@ out:
 int security_fs_use(
        const char *fstype,
        unsigned int *behavior,
-       u32 *sid,
-       bool can_xattr)
+       u32 *sid)
 {
        int rc = 0;
        struct ocontext *c;
@@ -1949,7 +1948,6 @@ int security_fs_use(
                c = c->next;
        }
 
-       /* look for labeling behavior defined in policy */
        if (c) {
                *behavior = c->v.behavior;
                if (!c->sid[0]) {
@@ -1960,23 +1958,14 @@ int security_fs_use(
                                goto out;
                }
                *sid = c->sid[0];
-               goto out;
-       }
-
-       /* labeling behavior not in policy, use xattrs if possible */
-       if (can_xattr) {
-               *behavior = SECURITY_FS_USE_XATTR;
-               *sid = SECINITSID_FS;
-               goto out;
-       }
-
-       /* no behavior in policy and can't use xattrs, try GENFS */
-       rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, sid);
-       if (rc) {
-               *behavior = SECURITY_FS_USE_NONE;
-               rc = 0;
        } else {
-               *behavior = SECURITY_FS_USE_GENFS;
+               rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, sid);
+               if (rc) {
+                       *behavior = SECURITY_FS_USE_NONE;
+                       rc = 0;
+               } else {
+                       *behavior = SECURITY_FS_USE_GENFS;
+               }
        }
 
 out: